Merge branch 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelv...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 23 Mar 2012 21:37:52 +0000 (14:37 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 23 Mar 2012 21:37:52 +0000 (14:37 -0700)
Pull hwmon updates from Jean Delvare:
 "We have support for the MCP3021, MC13892 and GMT G781, automatic fan
  speed control for LM63/LM64 chips, and a few clean-ups."

* 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging:
  hwmon: Add MCP3021 ADC driver
  hwmon: (mc13783-adc) Add support for the MC13892 PMIC
  hwmon: (mc13783-adc) Remove space before tab
  hwmon: (lm63) Let the user adjust the lookup table
  hwmon: (lm63) Make fan speed control strategy changeable
  hwmon: (lm63) Reorganize the code
  hwmon: (lm90) Restore original configuration if probe function fails
  hwmon: (lm90) Add support for GMT G781
  hwmon: (lm90) Fix multi-line comments
  hwmon: (w83795) Fix multi-line comments
  hwmon: (w83795) Unconditionally support manual fan speed control
  hwmon: (fam15h_power) Increase output resolution
  hwmon: (fam15h_power) Correct sign extension of running_avg_capture

376 files changed:
Documentation/DocBook/kgdb.tmpl
Documentation/devicetree/bindings/i2c/sirf-i2c.txt [new file with mode: 0644]
Documentation/filesystems/nfs/idmapper.txt
Documentation/filesystems/nfs/pnfs.txt
Documentation/kernel-parameters.txt
MAINTAINERS
arch/alpha/include/asm/pci.h
arch/alpha/kernel/pci.c
arch/alpha/kernel/pci_impl.h
arch/alpha/kernel/sys_marvel.c
arch/alpha/kernel/sys_titan.c
arch/arm/common/it8152.c
arch/arm/include/asm/pci.h
arch/arm/kernel/bios32.c
arch/arm/mach-cns3xxx/pcie.c
arch/arm/mach-dove/pcie.c
arch/arm/mach-footbridge/dc21285.c
arch/arm/mach-integrator/pci_v3.c
arch/arm/mach-iop13xx/pci.c
arch/arm/mach-ixp2000/ixdp2400.c
arch/arm/mach-ixp2000/ixdp2800.c
arch/arm/mach-ixp2000/ixdp2x00.c
arch/arm/mach-ixp2000/pci.c
arch/arm/mach-ixp23xx/pci.c
arch/arm/mach-ixp4xx/common-pci.c
arch/arm/mach-kirkwood/pcie.c
arch/arm/mach-ks8695/pci.c
arch/arm/mach-mv78xx0/pcie.c
arch/arm/mach-orion5x/pci.c
arch/arm/mach-sa1100/pci-nanoengine.c
arch/arm/mach-tegra/include/mach/smmu.h [new file with mode: 0644]
arch/arm/mach-tegra/pcie.c
arch/arm/mach-versatile/pci.c
arch/arm/mm/iomap.c
arch/arm/plat-iop/pci.c
arch/ia64/include/asm/pci.h
arch/ia64/pci/pci.c
arch/ia64/sn/kernel/io_init.c
arch/microblaze/include/asm/pci-bridge.h
arch/microblaze/include/asm/pci.h
arch/microblaze/pci/pci-common.c
arch/mips/include/asm/pci.h
arch/mips/pci/fixup-cobalt.c
arch/mips/pci/pci-bcm1480.c
arch/mips/pci/pci-ip27.c
arch/mips/pci/pci-lantiq.c
arch/mips/pci/pci-sb1250.c
arch/mips/pci/pci-xlr.c
arch/mips/pci/pci.c
arch/mn10300/include/asm/pci.h
arch/mn10300/unit-asb2305/pci.c
arch/parisc/include/asm/pci.h
arch/parisc/kernel/pci.c
arch/powerpc/include/asm/pci.h
arch/powerpc/include/asm/ppc-pci.h
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/pci_32.c
arch/powerpc/kernel/pci_64.c
arch/powerpc/kernel/pci_of_scan.c
arch/powerpc/kernel/rtas_pci.c
arch/powerpc/platforms/maple/pci.c
arch/powerpc/platforms/pasemi/pci.c
arch/powerpc/platforms/powermac/pci.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/pci.c
arch/powerpc/platforms/pseries/pci_dlpar.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/platforms/wsp/wsp_pci.c
arch/sh/drivers/pci/pci.c
arch/sh/include/asm/pci.h
arch/sparc/include/asm/pci_32.h
arch/sparc/include/asm/pci_64.h
arch/sparc/kernel/leon_pci.c
arch/sparc/kernel/pci.c
arch/unicore32/include/asm/pci.h
arch/unicore32/kernel/pci.c
arch/x86/include/asm/kgdb.h
arch/x86/kernel/kgdb.c
arch/x86/kernel/pci-dma.c
arch/x86/pci/acpi.c
arch/x86/pci/fixup.c
arch/x86/pci/i386.c
arch/x86/pci/mrst.c
arch/xtensa/kernel/pci.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/Makefile
drivers/i2c/busses/i2c-designware-platdrv.c
drivers/i2c/busses/i2c-eg20t.c
drivers/i2c/busses/i2c-imx.c
drivers/i2c/busses/i2c-mpc.c
drivers/i2c/busses/i2c-s3c2410.c
drivers/i2c/busses/i2c-sirf.c [new file with mode: 0644]
drivers/i2c/busses/i2c-tegra.c
drivers/i2c/busses/i2c-versatile.c
drivers/i2c/busses/i2c-xlr.c [new file with mode: 0644]
drivers/iommu/Kconfig
drivers/iommu/Makefile
drivers/iommu/amd_iommu_init.c
drivers/iommu/amd_iommu_v2.c
drivers/iommu/tegra-gart.c [new file with mode: 0644]
drivers/iommu/tegra-smmu.c [new file with mode: 0644]
drivers/message/fusion/mptbase.c
drivers/mtd/ubi/build.c
drivers/mtd/ubi/eba.c
drivers/mtd/ubi/io.c
drivers/mtd/ubi/scan.c
drivers/mtd/ubi/ubi.h
drivers/mtd/ubi/wl.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bonding.h
drivers/net/ethernet/broadcom/cnic.c
drivers/net/ethernet/broadcom/cnic_defs.h
drivers/net/ethernet/broadcom/cnic_if.h
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/broadcom/tg3.h
drivers/net/ethernet/marvell/sky2.c
drivers/net/usb/usbnet.c
drivers/parisc/dino.c
drivers/parisc/lba_pci.c
drivers/pci/Kconfig
drivers/pci/bus.c
drivers/pci/hotplug/acpiphp_glue.c
drivers/pci/hotplug/cpci_hotplug_pci.c
drivers/pci/hotplug/cpcihp_generic.c
drivers/pci/hotplug/cpqphp_pci.c
drivers/pci/hotplug/fakephp.c
drivers/pci/hotplug/ibmphp_core.c
drivers/pci/hotplug/ibmphp_ebda.c
drivers/pci/hotplug/pciehp_hpc.c
drivers/pci/hotplug/pciehp_pci.c
drivers/pci/hotplug/rpadlpar_core.c
drivers/pci/hotplug/sgi_hotplug.c
drivers/pci/hotplug/shpchp_pci.c
drivers/pci/iov.c
drivers/pci/pci-driver.c
drivers/pci/pci-sysfs.c
drivers/pci/pci.c
drivers/pci/pci.h
drivers/pci/pcie/Kconfig
drivers/pci/pcie/aspm.c
drivers/pci/pcie/portdrv.h
drivers/pci/pcie/portdrv_core.c
drivers/pci/probe.c
drivers/pci/quirks.c
drivers/pci/remove.c
drivers/pci/setup-bus.c
drivers/pci/setup-res.c
drivers/pci/xen-pcifront.c
drivers/pcmcia/cardbus.c
drivers/platform/x86/asus-wmi.c
drivers/platform/x86/eeepc-laptop.c
drivers/scsi/bnx2fc/bnx2fc_constants.h
drivers/scsi/bnx2i/57xx_iscsi_constants.h
drivers/scsi/mpt2sas/mpt2sas_base.c
drivers/usb/host/pci-quirks.c
fs/cifs/README
fs/cifs/cifs_debug.c
fs/cifs/cifsfs.c
fs/cifs/cifsglob.h
fs/cifs/cifsproto.h
fs/cifs/cifssmb.c
fs/cifs/connect.c
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/misc.c
fs/cifs/transport.c
fs/lockd/clnt4xdr.c
fs/lockd/clntlock.c
fs/lockd/clntxdr.c
fs/lockd/host.c
fs/lockd/mon.c
fs/lockd/netns.h [new file with mode: 0644]
fs/lockd/svc.c
fs/lockd/svclock.c
fs/nfs/Kconfig
fs/nfs/blocklayout/blocklayout.c
fs/nfs/blocklayout/blocklayout.h
fs/nfs/blocklayout/blocklayoutdev.c
fs/nfs/blocklayout/blocklayoutdm.c
fs/nfs/blocklayout/extents.c
fs/nfs/cache_lib.c
fs/nfs/cache_lib.h
fs/nfs/callback.c
fs/nfs/callback.h
fs/nfs/callback_proc.c
fs/nfs/callback_xdr.c
fs/nfs/client.c
fs/nfs/delegation.c
fs/nfs/delegation.h
fs/nfs/dir.c
fs/nfs/direct.c
fs/nfs/dns_resolve.c
fs/nfs/dns_resolve.h
fs/nfs/file.c
fs/nfs/fscache.c
fs/nfs/idmap.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/mount_clnt.c
fs/nfs/namespace.c
fs/nfs/netns.h [new file with mode: 0644]
fs/nfs/nfs2xdr.c
fs/nfs/nfs3acl.c
fs/nfs/nfs3proc.c
fs/nfs/nfs3xdr.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4filelayout.c
fs/nfs/nfs4filelayout.h
fs/nfs/nfs4filelayoutdev.c
fs/nfs/nfs4namespace.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/nfs4xdr.c
fs/nfs/nfsroot.c
fs/nfs/objlayout/objio_osd.c
fs/nfs/objlayout/objlayout.c
fs/nfs/objlayout/objlayout.h
fs/nfs/pagelist.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/pnfs_dev.c
fs/nfs/proc.c
fs/nfs/read.c
fs/nfs/super.c
fs/nfs/sysctl.c
fs/nfs/unlink.c
fs/nfs/write.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4state.c
fs/nfsd/nfsctl.c
fs/nfsd/nfssvc.c
fs/nfsd/stats.c
fs/pstore/platform.c
fs/quota/quota.c
fs/ubifs/debug.c
fs/ubifs/debug.h
fs/ubifs/dir.c
fs/ubifs/recovery.c
fs/ubifs/sb.c
fs/ubifs/ubifs.h
fs/xfs/Makefile
fs/xfs/xfs_aops.c
fs/xfs/xfs_aops.h
fs/xfs/xfs_bmap.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_dfrag.c
fs/xfs/xfs_dir2_block.c
fs/xfs/xfs_dquot.c
fs/xfs/xfs_dquot.h
fs/xfs/xfs_file.c
fs/xfs/xfs_iget.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_inode_item.h
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_ioctl32.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_itable.c
fs/xfs/xfs_log.c
fs/xfs/xfs_log.h
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_qm.c
fs/xfs/xfs_qm.h
fs/xfs/xfs_qm_bhv.c
fs/xfs/xfs_qm_stats.c [deleted file]
fs/xfs/xfs_qm_stats.h [deleted file]
fs/xfs/xfs_qm_syscalls.c
fs/xfs/xfs_quota.h
fs/xfs/xfs_quota_priv.h
fs/xfs/xfs_sb.h
fs/xfs/xfs_stats.c
fs/xfs/xfs_stats.h
fs/xfs/xfs_super.c
fs/xfs/xfs_super.h
fs/xfs/xfs_sync.c
fs/xfs/xfs_sync.h
fs/xfs/xfs_trace.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans_ail.c
fs/xfs/xfs_trans_buf.c
fs/xfs/xfs_trans_dquot.c
fs/xfs/xfs_trans_inode.c
fs/xfs/xfs_trans_priv.h
fs/xfs/xfs_vnode.h
fs/xfs/xfs_vnodeops.h
include/asm-generic/pci-bridge.h
include/asm-generic/pci.h
include/linux/amd-iommu.h
include/linux/i2c/at24.h
include/linux/ioport.h
include/linux/key.h
include/linux/lockd/bind.h
include/linux/lockd/lockd.h
include/linux/lockd/xdr4.h
include/linux/nfs.h
include/linux/nfs4.h
include/linux/nfs_fs.h
include/linux/nfs_fs_i.h
include/linux/nfs_fs_sb.h
include/linux/nfs_idmap.h
include/linux/nfs_iostat.h
include/linux/nfs_page.h
include/linux/nfs_xdr.h
include/linux/pci.h
include/linux/pci_regs.h
include/linux/sunrpc/auth.h
include/linux/sunrpc/bc_xprt.h
include/linux/sunrpc/cache.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/debug.h
include/linux/sunrpc/metrics.h
include/linux/sunrpc/rpc_pipe_fs.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/stats.h
include/linux/sunrpc/svc.h
include/linux/sunrpc/svc_xprt.h
include/linux/sunrpc/svcauth.h
include/linux/sunrpc/svcauth_gss.h
include/linux/sunrpc/svcsock.h
include/linux/sunrpc/xprt.h
include/linux/sunrpc/xprtsock.h
include/trace/events/sunrpc.h [new file with mode: 0644]
kernel/debug/debug_core.c
kernel/debug/gdbstub.c
kernel/debug/kdb/kdb_bp.c
kernel/debug/kdb/kdb_io.c
kernel/debug/kdb/kdb_keyboard.c
kernel/debug/kdb/kdb_main.c
kernel/debug/kdb/kdb_private.h
mm/vmscan.c
net/core/dev.c
net/ipv4/devinet.c
net/ipv4/netfilter/iptable_filter.c
net/ipv6/netfilter/ip6table_filter.c
net/l2tp/l2tp_ppp.c
net/netlabel/netlabel_kapi.c
net/rds/ib_cm.c
net/rds/iw_cm.c
net/rds/loop.c
net/sunrpc/Kconfig
net/sunrpc/addr.c
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/auth_gss/gss_krb5_crypto.c
net/sunrpc/auth_gss/gss_krb5_mech.c
net/sunrpc/auth_gss/gss_krb5_seal.c
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/backchannel_rqst.c
net/sunrpc/cache.c
net/sunrpc/clnt.c
net/sunrpc/netns.h
net/sunrpc/rpc_pipe.c
net/sunrpc/rpcb_clnt.c
net/sunrpc/sched.c
net/sunrpc/stats.c
net/sunrpc/sunrpc.h
net/sunrpc/sunrpc_syms.c
net/sunrpc/svc.c
net/sunrpc/svc_xprt.c
net/sunrpc/svcauth_unix.c
net/sunrpc/svcsock.c
net/sunrpc/sysctl.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtsock.c
net/xfrm/xfrm_output.c
net/xfrm/xfrm_replay.c
security/keys/key.c
security/keys/keyctl.c
tools/testing/ktest/ktest.pl
tools/testing/ktest/sample.conf

index d71b57fcf116bcb263b8a761d2ebf87b3231f56a..4ee4ba3509fca70c9179487dc17ce436068962a6 100644 (file)
    <para>It is possible to use this option with kgdboc on a tty that is not a system console.
    </para>
   </para>
+  </sect1>
+   <sect1 id="kgdbreboot">
+   <title>Run time parameter: kgdbreboot</title>
+   <para> The kgdbreboot feature allows you to change how the debugger
+   deals with the reboot notification.  You have 3 choices for the
+   behavior.  The default behavior is always set to 0.</para>
+   <orderedlist>
+   <listitem><para>echo -1 > /sys/module/debug_core/parameters/kgdbreboot</para>
+   <para>Ignore the reboot notification entirely.</para>
+   </listitem>
+   <listitem><para>echo 0 > /sys/module/debug_core/parameters/kgdbreboot</para>
+   <para>Send the detach message to any attached debugger client.</para>
+   </listitem>
+   <listitem><para>echo 1 > /sys/module/debug_core/parameters/kgdbreboot</para>
+   <para>Enter the debugger on reboot notify.</para>
+   </listitem>
+   </orderedlist>
   </sect1>
   </chapter>
   <chapter id="usingKDB">
diff --git a/Documentation/devicetree/bindings/i2c/sirf-i2c.txt b/Documentation/devicetree/bindings/i2c/sirf-i2c.txt
new file mode 100644 (file)
index 0000000..7baf9e1
--- /dev/null
@@ -0,0 +1,19 @@
+I2C for SiRFprimaII platforms
+
+Required properties :
+- compatible : Must be "sirf,prima2-i2c"
+- reg: physical base address of the controller and length of memory mapped
+     region.
+- interrupts: interrupt number to the cpu.
+
+Optional properties:
+- clock-frequency : Constains desired I2C/HS-I2C bus clock frequency in Hz.
+  The absence of the propoerty indicates the default frequency 100 kHz.
+
+Examples :
+
+i2c0: i2c@b00e0000 {
+    compatible = "sirf,prima2-i2c";
+    reg = <0xb00e0000 0x10000>;
+    interrupts = <24>;
+};
index 120fd3cf7fd92b666cfcf7ea8eda236e01e22282..fe03d10bb79a36055401b8b3475f57bf57ea38c3 100644 (file)
@@ -4,13 +4,21 @@ ID Mapper
 =========
 Id mapper is used by NFS to translate user and group ids into names, and to
 translate user and group names into ids.  Part of this translation involves
-performing an upcall to userspace to request the information.  Id mapper will
-user request-key to perform this upcall and cache the result.  The program
-/usr/sbin/nfs.idmap should be called by request-key, and will perform the
-translation and initialize a key with the resulting information.
+performing an upcall to userspace to request the information.  There are two
+ways NFS could obtain this information: placing a call to /sbin/request-key
+or by placing a call to the rpc.idmap daemon.
+
+NFS will attempt to call /sbin/request-key first.  If this succeeds, the
+result will be cached using the generic request-key cache.  This call should
+only fail if /etc/request-key.conf is not configured for the id_resolver key
+type, see the "Configuring" section below if you wish to use the request-key
+method.
+
+If the call to /sbin/request-key fails (if /etc/request-key.conf is not
+configured with the id_resolver key type), then the idmapper will ask the
+legacy rpc.idmap daemon for the id mapping.  This result will be stored
+in a custom NFS idmap cache.
 
- NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this
- feature.
 
 ===========
 Configuring
index 983e14abe7e9d282a9ae017ac34f8bd51749c3a4..c7919c6e3beabf9390714175cbc22d047b05025f 100644 (file)
@@ -53,3 +53,57 @@ lseg maintains an extra reference corresponding to the NFS_LSEG_VALID
 bit which holds it in the pnfs_layout_hdr's list.  When the final lseg
 is removed from the pnfs_layout_hdr's list, the NFS_LAYOUT_DESTROYED
 bit is set, preventing any new lsegs from being added.
+
+layout drivers
+--------------
+
+PNFS utilizes what is called layout drivers. The STD defines 3 basic
+layout types: "files" "objects" and "blocks". For each of these types
+there is a layout-driver with a common function-vectors table which
+are called by the nfs-client pnfs-core to implement the different layout
+types.
+
+Files-layout-driver code is in: fs/nfs/nfs4filelayout.c && nfs4filelayoutdev.c
+Objects-layout-deriver code is in: fs/nfs/objlayout/.. directory
+Blocks-layout-deriver code is in: fs/nfs/blocklayout/.. directory
+
+objects-layout setup
+--------------------
+
+As part of the full STD implementation the objlayoutdriver.ko needs, at times,
+to automatically login to yet undiscovered iscsi/osd devices. For this the
+driver makes up-calles to a user-mode script called *osd_login*
+
+The path_name of the script to use is by default:
+       /sbin/osd_login.
+This name can be overridden by the Kernel module parameter:
+       objlayoutdriver.osd_login_prog
+
+If Kernel does not find the osd_login_prog path it will zero it out
+and will not attempt farther logins. An admin can then write new value
+to the objlayoutdriver.osd_login_prog Kernel parameter to re-enable it.
+
+The /sbin/osd_login is part of the nfs-utils package, and should usually
+be installed on distributions that support this Kernel version.
+
+The API to the login script is as follows:
+       Usage: $0 -u <URI> -o <OSDNAME> -s <SYSTEMID>
+       Options:
+               -u              target uri e.g. iscsi://<ip>:<port>
+                               (allways exists)
+                               (More protocols can be defined in the future.
+                                The client does not interpret this string it is
+                                passed unchanged as recieved from the Server)
+               -o              osdname of the requested target OSD
+                               (Might be empty)
+                               (A string which denotes the OSD name, there is a
+                                limit of 64 chars on this string)
+               -s              systemid of the requested target OSD
+                               (Might be empty)
+                               (This string, if not empty is always an hex
+                                representation of the 20 bytes osd_system_id)
+
+blocks-layout setup
+-------------------
+
+TODO: Document the setup needs of the blocks layout driver
index 247dcfd62034612e09dce56d6d6fffff4f0c957a..1c9a348548dcf70fb70b2ca8d2bb650f02267f7a 100644 (file)
@@ -1672,6 +1672,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        of returning the full 64-bit number.
                        The default is to return 64-bit inode numbers.
 
+       nfs.max_session_slots=
+                       [NFSv4.1] Sets the maximum number of session slots
+                       the client will attempt to negotiate with the server.
+                       This limits the number of simultaneous RPC requests
+                       that the client can send to the NFSv4.1 server.
+                       Note that there is little point in setting this
+                       value higher than the max_tcp_slot_table_limit.
+
        nfs.nfs4_disable_idmapping=
                        [NFSv4] When set to the default of '1', this option
                        ensures that both the RPC level authentication
@@ -1685,6 +1693,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        back to using the idmapper.
                        To turn off this behaviour, set the value to '0'.
 
+       nfs.send_implementation_id =
+                       [NFSv4.1] Send client implementation identification
+                       information in exchange_id requests.
+                       If zero, no implementation identification information
+                       will be sent.
+                       The default is to send the implementation identification
+                       information.
+
+
+       objlayoutdriver.osd_login_prog=
+                       [NFS] [OBJLAYOUT] sets the pathname to the program which
+                       is used to automatically discover and login into new
+                       osd-targets. Please see:
+                       Documentation/filesystems/pnfs.txt for more explanations
+
        nmi_debug=      [KNL,AVR32,SH] Specify one or more actions to take
                        when a NMI is triggered.
                        Format: [state][,regs][,debounce][,die]
@@ -2124,8 +2147,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                                the default.
                                off: Turn ECRC off
                                on: Turn ECRC on.
-               realloc         reallocate PCI resources if allocations done by BIOS
-                               are erroneous.
+               realloc=        Enable/disable reallocating PCI bridge resources
+                               if allocations done by BIOS are too small to
+                               accommodate resources required by all child
+                               devices.
+                               off: Turn realloc off
+                               on: Turn realloc on
+               realloc         same as realloc=on
+               noari           do not use PCIe ARI.
 
        pcie_aspm=      [PCIE] Forcibly enable or disable PCIe Active State Power
                        Management.
@@ -2133,6 +2162,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                force   Enable ASPM even on devices that claim not to support it.
                        WARNING: Forcing ASPM on may cause system lockups.
 
+       pcie_hp=        [PCIE] PCI Express Hotplug driver options:
+               nomsi   Do not use MSI for PCI Express Native Hotplug (this
+                       makes all PCIe ports use INTx for hotplug services).
+
        pcie_ports=     [PCIE] PCIe ports handling:
                auto    Ask the BIOS whether or not to use native PCIe services
                        associated with PCIe ports (PME, hot-plug, AER).  Use
index 95eba3135018ca685b58a35fd06541e180706328..ec9bcb17c57277e1734f8f8baff6014030ee2e03 100644 (file)
@@ -503,7 +503,7 @@ F:  arch/x86/include/asm/geode.h
 AMD IOMMU (AMD-VI)
 M:     Joerg Roedel <joerg.roedel@amd.com>
 L:     iommu@lists.linux-foundation.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
 S:     Supported
 F:     drivers/iommu/amd_iommu*.[ch]
 F:     include/linux/amd-iommu.h
@@ -5120,7 +5120,7 @@ F:        Documentation/PCI/pci-error-recovery.txt
 F:     Documentation/powerpc/eeh-pci-error-recovery.txt
 
 PCI SUBSYSTEM
-M:     Jesse Barnes <jbarnes@virtuousgeek.org>
+M:     Bjorn Helgaas <bhelgaas@google.com>
 L:     linux-pci@vger.kernel.org
 Q:     http://patchwork.kernel.org/project/linux-pci/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6.git
@@ -5130,7 +5130,7 @@ F:        drivers/pci/
 F:     include/linux/pci*
 
 PCI HOTPLUG
-M:     Jesse Barnes <jbarnes@virtuousgeek.org>
+M:     Bjorn Helgaas <bhelgaas@google.com>
 L:     linux-pci@vger.kernel.org
 S:     Supported
 F:     drivers/pci/hotplug
index 28d0497fd3c7b6b95b48571fff3f6028534de145..d01afb78919c076c9a064c30dd29af8deb9a8bfc 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/dma-mapping.h>
 #include <asm/scatterlist.h>
 #include <asm/machvec.h>
+#include <asm-generic/pci-bridge.h>
 
 /*
  * The following structure is used to manage multiple PCI busses.
@@ -99,12 +100,6 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
        return channel ? 15 : 14;
 }
 
-extern void pcibios_resource_to_bus(struct pci_dev *, struct pci_bus_region *,
-                                   struct resource *);
-
-extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                                   struct pci_bus_region *region);
-
 #define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index
 
 static inline int pci_proc_domain(struct pci_bus *bus)
index 8c723c1b086a03fd26ca69beabd794da836cc41b..1a629636cc16ae8e17ec16a973d846725eb9c807 100644 (file)
@@ -43,12 +43,10 @@ const char *const pci_mem_names[] = {
 
 const char pci_hae0_name[] = "HAE0";
 
-/* Indicate whether we respect the PCI setup left by console. */
 /*
- * Make this long-lived  so that we know when shutting down
- * whether we probed only or not.
+ * If PCI_PROBE_ONLY in pci_flags is set, we don't change any PCI resource
+ * assignments.
  */
-int pci_probe_only;
 
 /*
  * The PCI controller list.
@@ -215,7 +213,7 @@ pdev_save_srm_config(struct pci_dev *dev)
        struct pdev_srm_saved_conf *tmp;
        static int printed = 0;
 
-       if (!alpha_using_srm || pci_probe_only)
+       if (!alpha_using_srm || pci_has_flag(PCI_PROBE_ONLY))
                return;
 
        if (!printed) {
@@ -242,7 +240,7 @@ pci_restore_srm_config(void)
        struct pdev_srm_saved_conf *tmp;
 
        /* No need to restore if probed only. */
-       if (pci_probe_only)
+       if (pci_has_flag(PCI_PROBE_ONLY))
                return;
 
        /* Restore SRM config. */
@@ -252,47 +250,18 @@ pci_restore_srm_config(void)
 }
 #endif
 
-void __devinit
-pcibios_fixup_resource(struct resource *res, struct resource *root)
-{
-       res->start += root->start;
-       res->end += root->start;
-}
-
-void __devinit
-pcibios_fixup_device_resources(struct pci_dev *dev, struct pci_bus *bus)
-{
-       /* Update device resources.  */
-       struct pci_controller *hose = (struct pci_controller *)bus->sysdata;
-       int i;
-
-       for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-               if (!dev->resource[i].start)
-                       continue;
-               if (dev->resource[i].flags & IORESOURCE_IO)
-                       pcibios_fixup_resource(&dev->resource[i],
-                                              hose->io_space);
-               else if (dev->resource[i].flags & IORESOURCE_MEM)
-                       pcibios_fixup_resource(&dev->resource[i],
-                                              hose->mem_space);
-       }
-}
-
 void __devinit
 pcibios_fixup_bus(struct pci_bus *bus)
 {
        struct pci_dev *dev = bus->self;
 
-       if (pci_probe_only && dev &&
+       if (pci_has_flag(PCI_PROBE_ONLY) && dev &&
                   (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
                pci_read_bridge_bases(bus);
-               pcibios_fixup_device_resources(dev, bus);
        } 
 
        list_for_each_entry(dev, &bus->devices, bus_list) {
                pdev_save_srm_config(dev);
-               if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
-                       pcibios_fixup_device_resources(dev, bus);
        }
 }
 
@@ -302,42 +271,6 @@ pcibios_update_irq(struct pci_dev *dev, int irq)
        pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
 
-void
-pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                        struct resource *res)
-{
-       struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
-       unsigned long offset = 0;
-
-       if (res->flags & IORESOURCE_IO)
-               offset = hose->io_space->start;
-       else if (res->flags & IORESOURCE_MEM)
-               offset = hose->mem_space->start;
-
-       region->start = res->start - offset;
-       region->end = res->end - offset;
-}
-
-void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                            struct pci_bus_region *region)
-{
-       struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
-       unsigned long offset = 0;
-
-       if (res->flags & IORESOURCE_IO)
-               offset = hose->io_space->start;
-       else if (res->flags & IORESOURCE_MEM)
-               offset = hose->mem_space->start;
-
-       res->start = region->start + offset;
-       res->end = region->end + offset;
-}
-
-#ifdef CONFIG_HOTPLUG
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-#endif
-
 int
 pcibios_enable_device(struct pci_dev *dev, int mask)
 {
@@ -374,7 +307,8 @@ pcibios_claim_one_bus(struct pci_bus *b)
 
                        if (r->parent || !r->start || !r->flags)
                                continue;
-                       if (pci_probe_only || (r->flags & IORESOURCE_PCI_FIXED))
+                       if (pci_has_flag(PCI_PROBE_ONLY) ||
+                           (r->flags & IORESOURCE_PCI_FIXED))
                                pci_claim_resource(dev, i);
                }
        }
@@ -416,8 +350,10 @@ common_init_pci(void)
                        hose->mem_space->end = end;
 
                INIT_LIST_HEAD(&resources);
-               pci_add_resource(&resources, hose->io_space);
-               pci_add_resource(&resources, hose->mem_space);
+               pci_add_resource_offset(&resources, hose->io_space,
+                                       hose->io_space->start);
+               pci_add_resource_offset(&resources, hose->mem_space,
+                                       hose->mem_space->start);
 
                bus = pci_scan_root_bus(NULL, next_busno, alpha_mv.pci_ops,
                                        hose, &resources);
index 85457b2d4516dc009ad0407c6b01731add69081b..2b0ac429f5ebc4912e34b41029810fa914124479 100644 (file)
@@ -173,9 +173,6 @@ extern void pci_restore_srm_config(void);
 extern struct pci_controller *hose_head, **hose_tail;
 extern struct pci_controller *pci_isa_hose;
 
-/* Indicate that we trust the console to configure things properly.  */
-extern int pci_probe_only;
-
 extern unsigned long alpha_agpgart_size;
 
 extern void common_init_pci(void);
index 95cfc83ece8f7e771fc62f6567d1a2d0f948fffc..fc8b125086115f7bd2e66e210dfb24753d5d2f13 100644 (file)
@@ -384,7 +384,8 @@ marvel_init_pci(void)
 
        marvel_register_error_handlers();
 
-       pci_probe_only = 1;
+       /* Indicate that we trust the console to configure things properly */
+       pci_set_flags(PCI_PROBE_ONLY);
        common_init_pci();
        locate_and_init_vga(NULL);
 
index f47b30a2a117036db3cd877db99fcc72ca26203d..b8eafa053539005d7ddd0bffb4ee293cd5910891 100644 (file)
@@ -331,7 +331,8 @@ titan_init_pci(void)
         */
        titan_late_init();
  
-       pci_probe_only = 1;
+       /* Indicate that we trust the console to configure things properly */
+       pci_set_flags(PCI_PROBE_ONLY);
        common_init_pci();
        SMC669_Init(0);
        locate_and_init_vga(NULL);
index fb1f1cfce60c098f94966663e0b6d06dc88496cd..dcb13494ca0d6a72bbdc5b92ac4cfde178225bdd 100644 (file)
@@ -299,8 +299,8 @@ int __init it8152_pci_setup(int nr, struct pci_sys_data *sys)
                goto err1;
        }
 
-       pci_add_resource(&sys->resources, &it8152_io);
-       pci_add_resource(&sys->resources, &it8152_mem);
+       pci_add_resource_offset(&sys->resources, &it8152_io, sys->io_offset);
+       pci_add_resource_offset(&sys->resources, &it8152_mem, sys->mem_offset);
 
        if (platform_notify || platform_notify_remove) {
                printk(KERN_ERR "PCI: Can't use platform_notify\n");
index da337ba57ffd786ebcdd20efd60d98d22976357c..a98a2e112fae0ef9b33ac8319398dde8418b0b09 100644 (file)
@@ -57,14 +57,6 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
                                enum pci_mmap_state mmap_state, int write_combine);
 
-extern void
-pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                        struct resource *res);
-
-extern void
-pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                       struct pci_bus_region *region);
-
 /*
  * Dummy implementation; always return 0.
  */
index f58ba3589908ce541a7b8978712c19ed14bc5a00..632df9a66f8c1e989a3d369457579b5613c65aab 100644 (file)
@@ -16,7 +16,6 @@
 #include <asm/mach/pci.h>
 
 static int debug_pci;
-static int use_firmware;
 
 /*
  * We can't use pci_find_device() here since we are
@@ -294,28 +293,6 @@ static inline int pdev_bad_for_parity(struct pci_dev *dev)
 
 }
 
-/*
- * Adjust the device resources from bus-centric to Linux-centric.
- */
-static void __devinit
-pdev_fixup_device_resources(struct pci_sys_data *root, struct pci_dev *dev)
-{
-       resource_size_t offset;
-       int i;
-
-       for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-               if (dev->resource[i].start == 0)
-                       continue;
-               if (dev->resource[i].flags & IORESOURCE_MEM)
-                       offset = root->mem_offset;
-               else
-                       offset = root->io_offset;
-
-               dev->resource[i].start += offset;
-               dev->resource[i].end   += offset;
-       }
-}
-
 /*
  * pcibios_fixup_bus - Called after each bus is probed,
  * but before its children are examined.
@@ -333,8 +310,6 @@ void pcibios_fixup_bus(struct pci_bus *bus)
        list_for_each_entry(dev, &bus->devices, bus_list) {
                u16 status;
 
-               pdev_fixup_device_resources(root, dev);
-
                pci_read_config_word(dev, PCI_STATUS, &status);
 
                /*
@@ -399,43 +374,6 @@ void pcibios_fixup_bus(struct pci_bus *bus)
 EXPORT_SYMBOL(pcibios_fixup_bus);
 #endif
 
-/*
- * Convert from Linux-centric to bus-centric addresses for bridge devices.
- */
-void
-pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                        struct resource *res)
-{
-       struct pci_sys_data *root = dev->sysdata;
-       unsigned long offset = 0;
-
-       if (res->flags & IORESOURCE_IO)
-               offset = root->io_offset;
-       if (res->flags & IORESOURCE_MEM)
-               offset = root->mem_offset;
-
-       region->start = res->start - offset;
-       region->end   = res->end - offset;
-}
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-
-void __devinit
-pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                       struct pci_bus_region *region)
-{
-       struct pci_sys_data *root = dev->sysdata;
-       unsigned long offset = 0;
-
-       if (res->flags & IORESOURCE_IO)
-               offset = root->io_offset;
-       if (res->flags & IORESOURCE_MEM)
-               offset = root->mem_offset;
-
-       res->start = region->start + offset;
-       res->end   = region->end + offset;
-}
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-
 /*
  * Swizzle the device pin each time we cross a bridge.
  * This might update pin and returns the slot number.
@@ -497,10 +435,10 @@ static void __init pcibios_init_hw(struct hw_pci *hw)
 
                if (ret > 0) {
                        if (list_empty(&sys->resources)) {
-                               pci_add_resource(&sys->resources,
-                                                &ioport_resource);
-                               pci_add_resource(&sys->resources,
-                                                &iomem_resource);
+                               pci_add_resource_offset(&sys->resources,
+                                        &ioport_resource, sys->io_offset);
+                               pci_add_resource_offset(&sys->resources,
+                                        &iomem_resource, sys->mem_offset);
                        }
 
                        sys->bus = hw->scan(nr, sys);
@@ -525,6 +463,7 @@ void __init pci_common_init(struct hw_pci *hw)
 
        INIT_LIST_HEAD(&hw->buses);
 
+       pci_add_flags(PCI_REASSIGN_ALL_RSRC);
        if (hw->preinit)
                hw->preinit();
        pcibios_init_hw(hw);
@@ -536,7 +475,7 @@ void __init pci_common_init(struct hw_pci *hw)
        list_for_each_entry(sys, &hw->buses, node) {
                struct pci_bus *bus = sys->bus;
 
-               if (!use_firmware) {
+               if (!pci_has_flag(PCI_PROBE_ONLY)) {
                        /*
                         * Size the bridge windows.
                         */
@@ -573,7 +512,7 @@ char * __init pcibios_setup(char *str)
                debug_pci = 1;
                return NULL;
        } else if (!strcmp(str, "firmware")) {
-               use_firmware = 1;
+               pci_add_flags(PCI_PROBE_ONLY);
                return NULL;
        }
        return str;
index e159d69967c91b562ff6590d59141198447c534f..79d001f831e056046acb1780d8ba9ccf64830ad4 100644 (file)
@@ -155,8 +155,8 @@ static int cns3xxx_pci_setup(int nr, struct pci_sys_data *sys)
        BUG_ON(request_resource(&iomem_resource, res_io) ||
               request_resource(&iomem_resource, res_mem));
 
-       pci_add_resource(&sys->resources, res_io);
-       pci_add_resource(&sys->resources, res_mem);
+       pci_add_resource_offset(&sys->resources, res_io, sys->io_offset);
+       pci_add_resource_offset(&sys->resources, res_mem, sys->mem_offset);
 
        return 1;
 }
index 52e96d397ba8cfffe68b1085f3cdbf5eee2bc463..48a032005ea36c04e55c368f8bfe813ab45ec404 100644 (file)
@@ -69,7 +69,7 @@ static int __init dove_pcie_setup(int nr, struct pci_sys_data *sys)
        pp->res[0].flags = IORESOURCE_IO;
        if (request_resource(&ioport_resource, &pp->res[0]))
                panic("Request PCIe IO resource failed\n");
-       pci_add_resource(&sys->resources, &pp->res[0]);
+       pci_add_resource_offset(&sys->resources, &pp->res[0], sys->io_offset);
 
        /*
         * IORESOURCE_MEM
@@ -88,7 +88,7 @@ static int __init dove_pcie_setup(int nr, struct pci_sys_data *sys)
        pp->res[1].flags = IORESOURCE_MEM;
        if (request_resource(&iomem_resource, &pp->res[1]))
                panic("Request PCIe Memory resource failed\n");
-       pci_add_resource(&sys->resources, &pp->res[1]);
+       pci_add_resource_offset(&sys->resources, &pp->res[1], sys->mem_offset);
 
        return 1;
 }
index f685650c25d7b00be7ca14939c41ce0cde81d812..3194d3f73503fe32efaf86cdecd46442e6afbcf9 100644 (file)
@@ -275,11 +275,13 @@ int __init dc21285_setup(int nr, struct pci_sys_data *sys)
        allocate_resource(&iomem_resource, &res[0], 0x40000000,
                          0x80000000, 0xffffffff, 0x40000000, NULL, NULL);
 
-       pci_add_resource(&sys->resources, &ioport_resource);
-       pci_add_resource(&sys->resources, &res[0]);
-       pci_add_resource(&sys->resources, &res[1]);
        sys->mem_offset  = DC21285_PCI_MEM;
 
+       pci_add_resource_offset(&sys->resources,
+                               &ioport_resource, sys->io_offset);
+       pci_add_resource_offset(&sys->resources, &res[0], sys->mem_offset);
+       pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset);
+
        return 1;
 }
 
index 3c82566acece2d8296d343eceddc8dad389a93c3..015be770c1d87e04261e0d62ebec3a07ec548b40 100644 (file)
@@ -378,9 +378,10 @@ static int __init pci_v3_setup_resources(struct pci_sys_data *sys)
         * the mem resource for this bus
         * the prefetch mem resource for this bus
         */
-       pci_add_resource(&sys->resources, &ioport_resource);
-       pci_add_resource(&sys->resources, &non_mem);
-       pci_add_resource(&sys->resources, &pre_mem);
+       pci_add_resource_offset(&sys->resources,
+                               &ioport_resource, sys->io_offset);
+       pci_add_resource_offset(&sys->resources, &non_mem, sys->mem_offset);
+       pci_add_resource_offset(&sys->resources, &pre_mem, sys->mem_offset);
 
        return 1;
 }
index b8f5a87365112d0622b017dcbd2c7507e1e2934c..861cb12ef4363b69e78e83c34b4bc0b3c1c5ccbd 100644 (file)
@@ -1084,8 +1084,8 @@ int iop13xx_pci_setup(int nr, struct pci_sys_data *sys)
        request_resource(&ioport_resource, &res[0]);
        request_resource(&iomem_resource, &res[1]);
 
-       pci_add_resource(&sys->resources, &res[0]);
-       pci_add_resource(&sys->resources, &res[1]);
+       pci_add_resource_offset(&sys->resources, &res[0], sys->io_offset);
+       pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset);
 
        return 1;
 }
index f53e911ec94a9f475d3d65842cdb22d66e0b0f67..d519944653adb32c544874c0266049851abfb8db 100644 (file)
@@ -134,11 +134,11 @@ static void ixdp2400_pci_postinit(void)
 
        if (ixdp2x00_master_npu()) {
                dev = pci_get_bus_and_slot(1, IXDP2400_SLAVE_ENET_DEVFN);
-               pci_remove_bus_device(dev);
+               pci_stop_and_remove_bus_device(dev);
                pci_dev_put(dev);
        } else {
                dev = pci_get_bus_and_slot(1, IXDP2400_MASTER_ENET_DEVFN);
-               pci_remove_bus_device(dev);
+               pci_stop_and_remove_bus_device(dev);
                pci_dev_put(dev);
 
                ixdp2x00_slave_pci_postinit();
index a2e7c393e74fb198d3d3aad258c88bd17ce2cc08..b415febd2025fcf0022547d8bbea57b388b06bfc 100644 (file)
@@ -262,14 +262,14 @@ int __init ixdp2800_pci_init(void)
                pci_common_init(&ixdp2800_pci);
                if (ixdp2x00_master_npu()) {
                        dev = pci_get_bus_and_slot(1, IXDP2800_SLAVE_ENET_DEVFN);
-                       pci_remove_bus_device(dev);
+                       pci_stop_and_remove_bus_device(dev);
                        pci_dev_put(dev);
 
                        ixdp2800_master_enable_slave();
                        ixdp2800_master_wait_for_slave_bus_scan();
                } else {
                        dev = pci_get_bus_and_slot(1, IXDP2800_MASTER_ENET_DEVFN);
-                       pci_remove_bus_device(dev);
+                       pci_stop_and_remove_bus_device(dev);
                        pci_dev_put(dev);
                }
        }
index 634b6c852f68f58d0d8cf368e45cf0946b0987a7..dd9838299068266e3315c5fbd21c302834fabf0d 100644 (file)
@@ -239,12 +239,12 @@ void ixdp2x00_slave_pci_postinit(void)
         * Remove PMC device is there is one
         */
        if((dev = pci_get_bus_and_slot(1, IXDP2X00_PMC_DEVFN))) {
-               pci_remove_bus_device(dev);
+               pci_stop_and_remove_bus_device(dev);
                pci_dev_put(dev);
        }
 
        dev = pci_get_bus_and_slot(0, IXDP2X00_21555_DEVFN);
-       pci_remove_bus_device(dev);
+       pci_stop_and_remove_bus_device(dev);
        pci_dev_put(dev);
 }
 
index 626fda435aa921b8ecaa91d4e7e25da9abc31a01..49c36f3cd602c551be909d48a1b3f78a11616dfa 100644 (file)
@@ -243,8 +243,10 @@ int ixp2000_pci_setup(int nr, struct pci_sys_data *sys)
        if (nr >= 1)
                return 0;
 
-       pci_add_resource(&sys->resources, &ixp2000_pci_io_space);
-       pci_add_resource(&sys->resources, &ixp2000_pci_mem_space);
+       pci_add_resource_offset(&sys->resources,
+                               &ixp2000_pci_io_space, sys->io_offset);
+       pci_add_resource_offset(&sys->resources,
+                               &ixp2000_pci_mem_space, sys->mem_offset);
 
        return 1;
 }
index 25b5c462cea2015703924bf74bf591d3ababb2c5..3cbbd3208fa8994dafc8c5247d49924921aa36a7 100644 (file)
@@ -281,8 +281,10 @@ int ixp23xx_pci_setup(int nr, struct pci_sys_data *sys)
        if (nr >= 1)
                return 0;
 
-       pci_add_resource(&sys->resources, &ixp23xx_pci_io_space);
-       pci_add_resource(&sys->resources, &ixp23xx_pci_mem_space);
+       pci_add_resource_offset(&sys->resources,
+                               &ixp23xx_pci_io_space, sys->io_offset);
+       pci_add_resource_offset(&sys->resources,
+                               &ixp23xx_pci_mem_space, sys->mem_offset);
 
        return 1;
 }
index 5eff15f24bc27e5a4f12dd56dab5eaea8cf4c118..8508882b13f0303c36d2f39edf043097c05c1961 100644 (file)
@@ -472,8 +472,8 @@ int ixp4xx_setup(int nr, struct pci_sys_data *sys)
        request_resource(&ioport_resource, &res[0]);
        request_resource(&iomem_resource, &res[1]);
 
-       pci_add_resource(&sys->resources, &res[0]);
-       pci_add_resource(&sys->resources, &res[1]);
+       pci_add_resource_offset(&sys->resources, &res[0], sys->io_offset);
+       pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset);
 
        platform_notify = ixp4xx_pci_platform_notify;
        platform_notify_remove = ixp4xx_pci_platform_notify_remove;
index a066a6d8d9d2126bc577401b8710e16a1b40d133..f56a0118c1bb1f2fc42b09f19917706e6b1182d2 100644 (file)
@@ -198,9 +198,9 @@ static int __init kirkwood_pcie_setup(int nr, struct pci_sys_data *sys)
        if (request_resource(&iomem_resource, &pp->res[1]))
                panic("Request PCIe%d Memory resource failed\n", index);
 
-       pci_add_resource(&sys->resources, &pp->res[0]);
-       pci_add_resource(&sys->resources, &pp->res[1]);
        sys->io_offset = 0;
+       pci_add_resource_offset(&sys->resources, &pp->res[0], sys->io_offset);
+       pci_add_resource_offset(&sys->resources, &pp->res[1], sys->mem_offset);
 
        /*
         * Generic PCIe unit setup.
index b26f992071df4553df94bf2da1d8356af03cc56f..acc7014358172dc5987b7aff9c52518b9346f311 100644 (file)
@@ -169,8 +169,8 @@ static int __init ks8695_pci_setup(int nr, struct pci_sys_data *sys)
        request_resource(&iomem_resource, &pci_mem);
        request_resource(&ioport_resource, &pci_io);
 
-       pci_add_resource(&sys->resources, &pci_io);
-       pci_add_resource(&sys->resources, &pci_mem);
+       pci_add_resource_offset(&sys->resources, &pci_io, sys->io_offset);
+       pci_add_resource_offset(&sys->resources, &pci_mem, sys->mem_offset);
 
        /* Assign and enable processor bridge */
        ks8695_local_writeconfig(PCI_BASE_ADDRESS_0, KS8695_PCIMEM_PA);
index 8459f6d7d8caff6091fe05d2862895ba94460626..df3e38055a246380a6e631de74e5aff3d3d887d8 100644 (file)
@@ -155,8 +155,8 @@ static int __init mv78xx0_pcie_setup(int nr, struct pci_sys_data *sys)
        orion_pcie_set_local_bus_nr(pp->base, sys->busnr);
        orion_pcie_setup(pp->base);
 
-       pci_add_resource(&sys->resources, &pp->res[0]);
-       pci_add_resource(&sys->resources, &pp->res[1]);
+       pci_add_resource_offset(&sys->resources, &pp->res[0], sys->io_offset);
+       pci_add_resource_offset(&sys->resources, &pp->res[1], sys->mem_offset);
 
        return 1;
 }
index 09a045f0c406c50b64275f450993874b1af18f02..d6a91948e4dc58bea1124691eb336ddfd543e4ac 100644 (file)
@@ -171,13 +171,14 @@ static int __init pcie_setup(struct pci_sys_data *sys)
        /*
         * IORESOURCE_IO
         */
+       sys->io_offset = 0;
        res[0].name = "PCIe I/O Space";
        res[0].flags = IORESOURCE_IO;
        res[0].start = ORION5X_PCIE_IO_BUS_BASE;
        res[0].end = res[0].start + ORION5X_PCIE_IO_SIZE - 1;
        if (request_resource(&ioport_resource, &res[0]))
                panic("Request PCIe IO resource failed\n");
-       pci_add_resource(&sys->resources, &res[0]);
+       pci_add_resource_offset(&sys->resources, &res[0], sys->io_offset);
 
        /*
         * IORESOURCE_MEM
@@ -188,9 +189,7 @@ static int __init pcie_setup(struct pci_sys_data *sys)
        res[1].end = res[1].start + ORION5X_PCIE_MEM_SIZE - 1;
        if (request_resource(&iomem_resource, &res[1]))
                panic("Request PCIe Memory resource failed\n");
-       pci_add_resource(&sys->resources, &res[1]);
-
-       sys->io_offset = 0;
+       pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset);
 
        return 1;
 }
@@ -499,13 +498,14 @@ static int __init pci_setup(struct pci_sys_data *sys)
        /*
         * IORESOURCE_IO
         */
+       sys->io_offset = 0;
        res[0].name = "PCI I/O Space";
        res[0].flags = IORESOURCE_IO;
        res[0].start = ORION5X_PCI_IO_BUS_BASE;
        res[0].end = res[0].start + ORION5X_PCI_IO_SIZE - 1;
        if (request_resource(&ioport_resource, &res[0]))
                panic("Request PCI IO resource failed\n");
-       pci_add_resource(&sys->resources, &res[0]);
+       pci_add_resource_offset(&sys->resources, &res[0], sys->io_offset);
 
        /*
         * IORESOURCE_MEM
@@ -516,9 +516,7 @@ static int __init pci_setup(struct pci_sys_data *sys)
        res[1].end = res[1].start + ORION5X_PCI_MEM_SIZE - 1;
        if (request_resource(&iomem_resource, &res[1]))
                panic("Request PCI Memory resource failed\n");
-       pci_add_resource(&sys->resources, &res[1]);
-
-       sys->io_offset = 0;
+       pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset);
 
        return 1;
 }
index 0d01ca788922931f854846c03203089b8ae33648..b466bca9c651a1dfed4d5ddb20850bba6bf5ae8b 100644 (file)
@@ -244,9 +244,11 @@ static int __init pci_nanoengine_setup_resources(struct pci_sys_data *sys)
                printk(KERN_ERR "PCI: unable to allocate prefetchable\n");
                return -EBUSY;
        }
-       pci_add_resource(&sys->resources, &pci_io_ports);
-       pci_add_resource(&sys->resources, &pci_non_prefetchable_memory);
-       pci_add_resource(&sys->resources, &pci_prefetchable_memory);
+       pci_add_resource_offset(&sys->resources, &pci_io_ports, sys->io_offset);
+       pci_add_resource_offset(&sys->resources,
+                               &pci_non_prefetchable_memory, sys->mem_offset);
+       pci_add_resource_offset(&sys->resources,
+                               &pci_prefetchable_memory, sys->mem_offset);
 
        return 1;
 }
diff --git a/arch/arm/mach-tegra/include/mach/smmu.h b/arch/arm/mach-tegra/include/mach/smmu.h
new file mode 100644 (file)
index 0000000..dad403a
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * IOMMU API for SMMU in Tegra30
+ *
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef        MACH_SMMU_H
+#define        MACH_SMMU_H
+
+enum smmu_hwgrp {
+       HWGRP_AFI,
+       HWGRP_AVPC,
+       HWGRP_DC,
+       HWGRP_DCB,
+       HWGRP_EPP,
+       HWGRP_G2,
+       HWGRP_HC,
+       HWGRP_HDA,
+       HWGRP_ISP,
+       HWGRP_MPE,
+       HWGRP_NV,
+       HWGRP_NV2,
+       HWGRP_PPCS,
+       HWGRP_SATA,
+       HWGRP_VDE,
+       HWGRP_VI,
+
+       HWGRP_COUNT,
+
+       HWGRP_END = ~0,
+};
+
+#define HWG_AFI                (1 << HWGRP_AFI)
+#define HWG_AVPC       (1 << HWGRP_AVPC)
+#define HWG_DC         (1 << HWGRP_DC)
+#define HWG_DCB                (1 << HWGRP_DCB)
+#define HWG_EPP                (1 << HWGRP_EPP)
+#define HWG_G2         (1 << HWGRP_G2)
+#define HWG_HC         (1 << HWGRP_HC)
+#define HWG_HDA                (1 << HWGRP_HDA)
+#define HWG_ISP                (1 << HWGRP_ISP)
+#define HWG_MPE                (1 << HWGRP_MPE)
+#define HWG_NV         (1 << HWGRP_NV)
+#define HWG_NV2                (1 << HWGRP_NV2)
+#define HWG_PPCS       (1 << HWGRP_PPCS)
+#define HWG_SATA       (1 << HWGRP_SATA)
+#define HWG_VDE                (1 << HWGRP_VDE)
+#define HWG_VI         (1 << HWGRP_VI)
+
+#endif /* MACH_SMMU_H */
index af8b634357278b5682955d7eca3f4a3f946e0895..14b29ab5d8f0d006ae4230088d9071b9ad41d234 100644 (file)
@@ -408,7 +408,7 @@ static int tegra_pcie_setup(int nr, struct pci_sys_data *sys)
        pp->res[0].flags = IORESOURCE_IO;
        if (request_resource(&ioport_resource, &pp->res[0]))
                panic("Request PCIe IO resource failed\n");
-       pci_add_resource(&sys->resources, &pp->res[0]);
+       pci_add_resource_offset(&sys->resources, &pp->res[0], sys->io_offset);
 
        /*
         * IORESOURCE_MEM
@@ -427,7 +427,7 @@ static int tegra_pcie_setup(int nr, struct pci_sys_data *sys)
        pp->res[1].flags = IORESOURCE_MEM;
        if (request_resource(&iomem_resource, &pp->res[1]))
                panic("Request PCIe Memory resource failed\n");
-       pci_add_resource(&sys->resources, &pp->res[1]);
+       pci_add_resource_offset(&sys->resources, &pp->res[1], sys->mem_offset);
 
        /*
         * IORESOURCE_MEM | IORESOURCE_PREFETCH
@@ -446,7 +446,7 @@ static int tegra_pcie_setup(int nr, struct pci_sys_data *sys)
        pp->res[2].flags = IORESOURCE_MEM | IORESOURCE_PREFETCH;
        if (request_resource(&iomem_resource, &pp->res[2]))
                panic("Request PCIe Prefetch Memory resource failed\n");
-       pci_add_resource(&sys->resources, &pp->res[2]);
+       pci_add_resource_offset(&sys->resources, &pp->res[2], sys->mem_offset);
 
        return 1;
 }
index 90069bce23bc3c4b2b1d88096e0eca5e8dde9841..51733b022d0433b2f1704f720fd93ed3a6a9af03 100644 (file)
@@ -219,9 +219,9 @@ static int __init pci_versatile_setup_resources(struct list_head *resources)
         * the mem resource for this bus
         * the prefetch mem resource for this bus
         */
-       pci_add_resource(resources, &io_mem);
-       pci_add_resource(resources, &non_mem);
-       pci_add_resource(resources, &pre_mem);
+       pci_add_resource_offset(resources, &io_mem, sys->io_offset);
+       pci_add_resource_offset(resources, &non_mem, sys->mem_offset);
+       pci_add_resource_offset(resources, &pre_mem, sys->mem_offset);
 
        goto out;
 
index e62956e1203094bc6daa022486b93bd490c70057..4614208369f1edc27a4809859dd5bd0e13276908 100644 (file)
@@ -32,9 +32,6 @@ EXPORT_SYMBOL(pcibios_min_io);
 unsigned long pcibios_min_mem = 0x01000000;
 EXPORT_SYMBOL(pcibios_min_mem);
 
-unsigned int pci_flags = PCI_REASSIGN_ALL_RSRC;
-EXPORT_SYMBOL(pci_flags);
-
 void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
 {
        if ((unsigned long)addr >= VMALLOC_START &&
index f4d40a27111e57328739f9ac3111564dbcda8395..72768356447ae9b8b3aa6b56314cc172708d998d 100644 (file)
@@ -215,8 +215,8 @@ int iop3xx_pci_setup(int nr, struct pci_sys_data *sys)
        sys->mem_offset = IOP3XX_PCI_LOWER_MEM_PA - *IOP3XX_OMWTVR0;
        sys->io_offset  = IOP3XX_PCI_LOWER_IO_PA - *IOP3XX_OIOWTVR;
 
-       pci_add_resource(&sys->resources, &res[0]);
-       pci_add_resource(&sys->resources, &res[1]);
+       pci_add_resource_offset(&sys->resources, &res[0], sys->io_offset);
+       pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset);
 
        return 1;
 }
index 279b38ae74aab41bf0e5ed73fcdd2eb1b7a295c1..b22e5f5fa5939ea0ab41ba929b7bfddf0d9c122e 100644 (file)
@@ -108,12 +108,6 @@ static inline int pci_proc_domain(struct pci_bus *bus)
        return (pci_domain_nr(bus) != 0);
 }
 
-extern void pcibios_resource_to_bus(struct pci_dev *dev,
-               struct pci_bus_region *region, struct resource *res);
-
-extern void pcibios_bus_to_resource(struct pci_dev *dev,
-               struct resource *res, struct pci_bus_region *region);
-
 static inline struct resource *
 pcibios_select_root(struct pci_dev *pdev, struct resource *res)
 {
index f82f5d4b65fdace5e6ebdbff09a85ea02857a57a..d1ce3200147c9465c2af3d69a79ba0cae20036c0 100644 (file)
@@ -320,7 +320,8 @@ static __devinit acpi_status add_window(struct acpi_resource *res, void *data)
         * Ignore these tiny memory ranges */
        if (!((window->resource.flags & IORESOURCE_MEM) &&
              (window->resource.end - window->resource.start < 16)))
-               pci_add_resource(&info->resources, &window->resource);
+               pci_add_resource_offset(&info->resources, &window->resource,
+                                       window->offset);
 
        return AE_OK;
 }
@@ -395,54 +396,6 @@ out1:
        return NULL;
 }
 
-void pcibios_resource_to_bus(struct pci_dev *dev,
-               struct pci_bus_region *region, struct resource *res)
-{
-       struct pci_controller *controller = PCI_CONTROLLER(dev);
-       unsigned long offset = 0;
-       int i;
-
-       for (i = 0; i < controller->windows; i++) {
-               struct pci_window *window = &controller->window[i];
-               if (!(window->resource.flags & res->flags))
-                       continue;
-               if (window->resource.start > res->start)
-                       continue;
-               if (window->resource.end < res->end)
-                       continue;
-               offset = window->offset;
-               break;
-       }
-
-       region->start = res->start - offset;
-       region->end = res->end - offset;
-}
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-
-void pcibios_bus_to_resource(struct pci_dev *dev,
-               struct resource *res, struct pci_bus_region *region)
-{
-       struct pci_controller *controller = PCI_CONTROLLER(dev);
-       unsigned long offset = 0;
-       int i;
-
-       for (i = 0; i < controller->windows; i++) {
-               struct pci_window *window = &controller->window[i];
-               if (!(window->resource.flags & res->flags))
-                       continue;
-               if (window->resource.start - window->offset > region->start)
-                       continue;
-               if (window->resource.end - window->offset < region->end)
-                       continue;
-               offset = window->offset;
-               break;
-       }
-
-       res->start = region->start + offset;
-       res->end = region->end + offset;
-}
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-
 static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
 {
        unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
@@ -464,15 +417,11 @@ static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
 static void __devinit
 pcibios_fixup_resources(struct pci_dev *dev, int start, int limit)
 {
-       struct pci_bus_region region;
        int i;
 
        for (i = start; i < limit; i++) {
                if (!dev->resource[i].flags)
                        continue;
-               region.start = dev->resource[i].start;
-               region.end = dev->resource[i].end;
-               pcibios_bus_to_resource(dev, &dev->resource[i], &region);
                if ((is_valid_resource(dev, i)))
                        pci_claim_resource(dev, i);
        }
index 0a36f082eaf1d9e10ab95484c9e1118714d3c44a..238e2c511d94e735fb823cb612d30a7333a28510 100644 (file)
@@ -297,7 +297,8 @@ sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
        s64 status = 0;
        struct pci_controller *controller;
        struct pcibus_bussoft *prom_bussoft_ptr;
-
+       LIST_HEAD(resources);
+       int i;
 
        status = sal_get_pcibus_info((u64) segment, (u64) busnum,
                                     (u64) ia64_tpa(&prom_bussoft_ptr));
@@ -315,7 +316,15 @@ sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
         */
        controller->platform_data = prom_bussoft_ptr;
 
-       bus = pci_scan_bus(busnum, &pci_root_ops, controller);
+       sn_legacy_pci_window_fixup(controller,
+                                  prom_bussoft_ptr->bs_legacy_io,
+                                  prom_bussoft_ptr->bs_legacy_mem);
+       for (i = 0; i < controller->windows; i++)
+               pci_add_resource_offset(&resources,
+                                       &controller->window[i].resource,
+                                       controller->window[i].offset);
+       bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, controller,
+                               &resources);
        if (bus == NULL)
                goto error_return; /* error, or bus already scanned */
 
@@ -348,9 +357,6 @@ sn_bus_fixup(struct pci_bus *bus)
                        return;
                }
                sn_common_bus_fixup(bus, prom_bussoft_ptr);
-               sn_legacy_pci_window_fixup(PCI_CONTROLLER(bus),
-                                          prom_bussoft_ptr->bs_legacy_io,
-                                          prom_bussoft_ptr->bs_legacy_mem);
         }
         list_for_each_entry(pci_dev, &bus->devices, bus_list) {
                 sn_io_slot_fixup(pci_dev);
index e9834b2991d07fbca13aa127cacdd59bd89a1ee4..cb5d39794800fdc0141c5e69bd63e985862e386d 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/pci.h>
 #include <linux/list.h>
 #include <linux/ioport.h>
-#include <asm-generic/pci-bridge.h>
 
 struct device_node;
 
index 033137628e8a0aea3681e5ce7cc204b0ce9f2d66..a0da88bf70c575ab9000fb2d2cae86a01fd055f6 100644 (file)
@@ -94,14 +94,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
  */
 #define PCI_DMA_BUS_IS_PHYS     (1)
 
-extern void pcibios_resource_to_bus(struct pci_dev *dev,
-                       struct pci_bus_region *region,
-                       struct resource *res);
-
-extern void pcibios_bus_to_resource(struct pci_dev *dev,
-                       struct resource *res,
-                       struct pci_bus_region *region);
-
 static inline struct resource *pcibios_select_root(struct pci_dev *pdev,
                        struct resource *res)
 {
index 85f2ac1230a8f5e89d78955f67819999f69367c3..d10403dadd2b3a2021ded3f26ad052640a457f75 100644 (file)
@@ -46,9 +46,6 @@ static int global_phb_number;         /* Global phb counter */
 /* ISA Memory physical address */
 resource_size_t isa_mem_base;
 
-/* Default PCI flags is 0 on ppc32, modified at boot on ppc64 */
-unsigned int pci_flags;
-
 static struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
 
 unsigned long isa_io_base;
@@ -833,64 +830,7 @@ int pci_proc_domain(struct pci_bus *bus)
 {
        struct pci_controller *hose = pci_bus_to_host(bus);
 
-       if (!(pci_flags & PCI_ENABLE_PROC_DOMAINS))
-               return 0;
-       if (pci_flags & PCI_COMPAT_DOMAIN_0)
-               return hose->global_number != 0;
-       return 1;
-}
-
-void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                            struct resource *res)
-{
-       resource_size_t offset = 0, mask = (resource_size_t)-1;
-       struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
-       if (!hose)
-               return;
-       if (res->flags & IORESOURCE_IO) {
-               offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-               mask = 0xffffffffu;
-       } else if (res->flags & IORESOURCE_MEM)
-               offset = hose->pci_mem_offset;
-
-       region->start = (res->start - offset) & mask;
-       region->end = (res->end - offset) & mask;
-}
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-
-void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                            struct pci_bus_region *region)
-{
-       resource_size_t offset = 0, mask = (resource_size_t)-1;
-       struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
-       if (!hose)
-               return;
-       if (res->flags & IORESOURCE_IO) {
-               offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-               mask = 0xffffffffu;
-       } else if (res->flags & IORESOURCE_MEM)
-               offset = hose->pci_mem_offset;
-       res->start = (region->start + offset) & mask;
-       res->end = (region->end + offset) & mask;
-}
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-
-/* Fixup a bus resource into a linux resource */
-static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
-{
-       struct pci_controller *hose = pci_bus_to_host(dev->bus);
-       resource_size_t offset = 0, mask = (resource_size_t)-1;
-
-       if (res->flags & IORESOURCE_IO) {
-               offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-               mask = 0xffffffffu;
-       } else if (res->flags & IORESOURCE_MEM)
-               offset = hose->pci_mem_offset;
-
-       res->start = (res->start + offset) & mask;
-       res->end = (res->end + offset) & mask;
+       return 0;
 }
 
 /* This header fixup will do the resource fixup for all devices as they are
@@ -910,13 +850,7 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev)
                struct resource *res = dev->resource + i;
                if (!res->flags)
                        continue;
-               /* On platforms that have PCI_PROBE_ONLY set, we don't
-                * consider 0 as an unassigned BAR value. It's technically
-                * a valid value, but linux doesn't like it... so when we can
-                * re-assign things, we do so, but if we can't, we keep it
-                * around and hope for the best...
-                */
-               if (res->start == 0 && !(pci_flags & PCI_PROBE_ONLY)) {
+               if (res->start == 0) {
                        pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]" \
                                                        "is unassigned\n",
                                 pci_name(dev), i,
@@ -929,18 +863,11 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev)
                        continue;
                }
 
-               pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] fixup...\n",
+               pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]\n",
                         pci_name(dev), i,
                         (unsigned long long)res->start,\
                         (unsigned long long)res->end,
                         (unsigned int)res->flags);
-
-               fixup_resource(res, dev);
-
-               pr_debug("PCI:%s            %016llx-%016llx\n",
-                        pci_name(dev),
-                        (unsigned long long)res->start,
-                        (unsigned long long)res->end);
        }
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources);
@@ -959,10 +886,6 @@ static int __devinit pcibios_uninitialized_bridge_resource(struct pci_bus *bus,
        u16 command;
        int i;
 
-       /* We don't do anything if PCI_PROBE_ONLY is set */
-       if (pci_flags & PCI_PROBE_ONLY)
-               return 0;
-
        /* Job is a bit different between memory and IO */
        if (res->flags & IORESOURCE_MEM) {
                /* If the BAR is non-0 (res != pci_mem_offset) then it's
@@ -1037,9 +960,6 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus)
                         (unsigned long long)res->end,
                         (unsigned int)res->flags);
 
-               /* Perform fixup */
-               fixup_resource(res, dev);
-
                /* Try to detect uninitialized P2P bridge resources,
                 * and clear them out so they get re-assigned later
                 */
@@ -1107,9 +1027,6 @@ EXPORT_SYMBOL(pcibios_fixup_bus);
 
 static int skip_isa_ioresource_align(struct pci_dev *dev)
 {
-       if ((pci_flags & PCI_CAN_SKIP_ISA_ALIGN) &&
-           !(dev->bus->bridge_ctl & PCI_BRIDGE_CTL_ISA))
-               return 1;
        return 0;
 }
 
@@ -1236,8 +1153,6 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus)
                         * and as such ensure proper re-allocation
                         * later.
                         */
-                       if (pci_flags & PCI_REASSIGN_ALL_RSRC)
-                               goto clear_resource;
                        pr = pci_find_parent_resource(bus->self, res);
                        if (pr == res) {
                                /* this happens when the generic PCI
@@ -1422,27 +1337,19 @@ void __init pcibios_resource_survey(void)
        list_for_each_entry(b, &pci_root_buses, node)
                pcibios_allocate_bus_resources(b);
 
-       if (!(pci_flags & PCI_REASSIGN_ALL_RSRC)) {
-               pcibios_allocate_resources(0);
-               pcibios_allocate_resources(1);
-       }
+       pcibios_allocate_resources(0);
+       pcibios_allocate_resources(1);
 
        /* Before we start assigning unassigned resource, we try to reserve
         * the low IO area and the VGA memory area if they intersect the
         * bus available resources to avoid allocating things on top of them
         */
-       if (!(pci_flags & PCI_PROBE_ONLY)) {
-               list_for_each_entry(b, &pci_root_buses, node)
-                       pcibios_reserve_legacy_regions(b);
-       }
+       list_for_each_entry(b, &pci_root_buses, node)
+               pcibios_reserve_legacy_regions(b);
 
-       /* Now, if the platform didn't decide to blindly trust the firmware,
-        * we proceed to assigning things that were left unassigned
-        */
-       if (!(pci_flags & PCI_PROBE_ONLY)) {
-               pr_debug("PCI: Assigning unassigned resources...\n");
-               pci_assign_unassigned_resources();
-       }
+       /* Now proceed to assigning things that were left unassigned */
+       pr_debug("PCI: Assigning unassigned resources...\n");
+       pci_assign_unassigned_resources();
 }
 
 #ifdef CONFIG_HOTPLUG
@@ -1535,7 +1442,7 @@ static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, s
                res->end = res->start + IO_SPACE_LIMIT;
                res->flags = IORESOURCE_IO;
        }
-       pci_add_resource(resources, res);
+       pci_add_resource_offset(resources, res, hose->io_base_virt - _IO_BASE);
 
        pr_debug("PCI: PHB IO resource    = %016llx-%016llx [%lx]\n",
                 (unsigned long long)res->start,
@@ -1558,7 +1465,7 @@ static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, s
                        res->flags = IORESOURCE_MEM;
 
                }
-               pci_add_resource(resources, res);
+               pci_add_resource_offset(resources, res, hose->pci_mem_offset);
 
                pr_debug("PCI: PHB MEM resource %d = %016llx-%016llx [%lx]\n",
                        i, (unsigned long long)res->start,
index 576397c699208160f60e13f5aa5f95c7bd1b4bb5..fcd4060f642196b2e248fac9e3eca2d8cb3bb6d4 100644 (file)
@@ -92,6 +92,7 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 #include <asm/scatterlist.h>
 #include <linux/string.h>
 #include <asm/io.h>
+#include <asm-generic/pci-bridge.h>
 
 struct pci_dev;
 
@@ -112,12 +113,6 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 }
 #endif
 
-extern void pcibios_resource_to_bus(struct pci_dev *dev,
-       struct pci_bus_region *region, struct resource *res);
-
-extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                                   struct pci_bus_region *region);
-
 #define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index
 
 static inline int pci_proc_domain(struct pci_bus *bus)
@@ -145,8 +140,6 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 #define arch_setup_msi_irqs arch_setup_msi_irqs
 #endif
 
-extern int pci_probe_only;
-
 extern char * (*pcibios_plat_setup)(char *str);
 
 #endif /* _ASM_PCI_H */
index acacd1407c63e1eb5aa800bffc8a0941b3bc849b..9553b14002dda51a757cf06ccb0bca5483b86c1a 100644 (file)
@@ -51,67 +51,6 @@ static void qube_raq_galileo_early_fixup(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_MARVELL, PCI_DEVICE_ID_MARVELL_GT64111,
         qube_raq_galileo_early_fixup);
 
-static void __devinit cobalt_legacy_ide_resource_fixup(struct pci_dev *dev,
-                                                      struct resource *res)
-{
-       struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
-       unsigned long offset = hose->io_offset;
-       struct resource orig = *res;
-
-       if (!(res->flags & IORESOURCE_IO) ||
-           !(res->flags & IORESOURCE_PCI_FIXED))
-               return;
-
-       res->start -= offset;
-       res->end -= offset;
-       dev_printk(KERN_DEBUG, &dev->dev, "converted legacy %pR to bus %pR\n",
-                  &orig, res);
-}
-
-static void __devinit cobalt_legacy_ide_fixup(struct pci_dev *dev)
-{
-       u32 class;
-       u8 progif;
-
-       /*
-        * If the IDE controller is in legacy mode, pci_setup_device() fills in
-        * the resources with the legacy addresses that normally appear on the
-        * PCI bus, just as if we had read them from a BAR.
-        *
-        * However, with the GT-64111, those legacy addresses, e.g., 0x1f0,
-        * will never appear on the PCI bus because it converts memory accesses
-        * in the PCI I/O region (which is never at address zero) into I/O port
-        * accesses with no address translation.
-        *
-        * For example, if GT_DEF_PCI0_IO_BASE is 0x10000000, a load or store
-        * to physical address 0x100001f0 will become a PCI access to I/O port
-        * 0x100001f0.  There's no way to generate an access to I/O port 0x1f0,
-        * but the VT82C586 IDE controller does respond at 0x100001f0 because
-        * it only decodes the low 24 bits of the address.
-        *
-        * When this quirk runs, the pci_dev resources should contain bus
-        * addresses, not Linux I/O port numbers, so convert legacy addresses
-        * like 0x1f0 to bus addresses like 0x100001f0.  Later, we'll convert
-        * them back with pcibios_fixup_bus() or pcibios_bus_to_resource().
-        */
-       class = dev->class >> 8;
-       if (class != PCI_CLASS_STORAGE_IDE)
-               return;
-
-       pci_read_config_byte(dev, PCI_CLASS_PROG, &progif);
-       if ((progif & 1) == 0) {
-               cobalt_legacy_ide_resource_fixup(dev, &dev->resource[0]);
-               cobalt_legacy_ide_resource_fixup(dev, &dev->resource[1]);
-       }
-       if ((progif & 4) == 0) {
-               cobalt_legacy_ide_resource_fixup(dev, &dev->resource[2]);
-               cobalt_legacy_ide_resource_fixup(dev, &dev->resource[3]);
-       }
-}
-
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1,
-         cobalt_legacy_ide_fixup);
-
 static void qube_raq_via_bmIDE_fixup(struct pci_dev *dev)
 {
        unsigned short cfgword;
index af8c319969656e9e77e7337b48b28d9f4c82b56c..37b52dc3d27e808cfad0498904a98b729755d4ed 100644 (file)
@@ -204,7 +204,7 @@ static int __init bcm1480_pcibios_init(void)
        uint64_t reg;
 
        /* CFE will assign PCI resources */
-       pci_probe_only = 1;
+       pci_set_flags(PCI_PROBE_ONLY);
 
        /* Avoid ISA compat ranges.  */
        PCIBIOS_MIN_IO = 0x00008000UL;
index 193e9494f98e060bfa66e2bcd0105685fd90f4ff..0fbe4c0c170a25f5af0bd0da083a14c47e82e5f7 100644 (file)
@@ -50,7 +50,7 @@ int __cpuinit bridge_probe(nasid_t nasid, int widget_id, int masterwid)
        bridge_t *bridge;
        int slot;
 
-       pci_probe_only = 1;
+       pci_set_flags(PCI_PROBE_ONLY);
 
        printk("a bridge\n");
 
index be1e1afe12c3ce3aa527b4142d8f8b7aee17143e..030c77e7926e5395111913da53ef884e5ac64899 100644 (file)
@@ -270,7 +270,8 @@ static int __devinit ltq_pci_probe(struct platform_device *pdev)
 {
        struct ltq_pci_data *ltq_pci_data =
                (struct ltq_pci_data *) pdev->dev.platform_data;
-       pci_probe_only = 0;
+
+       pci_clear_flags(PCI_PROBE_ONLY);
        ltq_pci_irq_map = ltq_pci_data->irq;
        ltq_pci_membase = ioremap_nocache(PCI_CR_BASE_ADDR, PCI_CR_SIZE);
        ltq_pci_mapped_cfg =
index 1711e8e101bc8e0348bb840d2b8973cd7c73f39f..dd97f3a83baa26f0c42daddb9a70dfad278f30fd 100644 (file)
@@ -213,7 +213,7 @@ static int __init sb1250_pcibios_init(void)
        uint64_t reg;
 
        /* CFE will assign PCI resources */
-       pci_probe_only = 1;
+       pci_set_flags(PCI_PROBE_ONLY);
 
        /* Avoid ISA compat ranges.  */
        PCIBIOS_MIN_IO = 0x00008000UL;
index 3d701a962ef409928e3c149f00e6b4ad7b6739e2..1644805a6730db188bf9f26245c947551e8a4b5b 100644 (file)
@@ -292,7 +292,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 static int __init pcibios_init(void)
 {
        /* PSB assigns PCI resources */
-       pci_probe_only = 1;
+       pci_set_flags(PCI_PROBE_ONLY);
        pci_config_base = ioremap(DEFAULT_PCI_CONFIG_BASE, 16 << 20);
 
        /* Extend IO port for memory mapped io */
index 15521505ebe80a4a6975f11905e4694111474a08..0514866fa9255f13f8cc2578c840b22ad7626482 100644 (file)
 #include <asm/cpu-info.h>
 
 /*
- * Indicate whether we respect the PCI setup left by the firmware.
- *
- * Make this long-lived  so that we know when shutting down
- * whether we probed only or not.
+ * If PCI_PROBE_ONLY in pci_flags is set, we don't change any PCI resource
+ * assignments.
  */
-int pci_probe_only;
-
-#define PCI_ASSIGN_ALL_BUSSES  1
-
-unsigned int pci_probe = PCI_ASSIGN_ALL_BUSSES;
 
 /*
  * The PCI controller list.
@@ -92,11 +85,12 @@ static void __devinit pcibios_scanbus(struct pci_controller *hose)
        if (!hose->iommu)
                PCI_DMA_BUS_IS_PHYS = 1;
 
-       if (hose->get_busno && pci_probe_only)
+       if (hose->get_busno && pci_has_flag(PCI_PROBE_ONLY))
                next_busno = (*hose->get_busno)();
 
-       pci_add_resource(&resources, hose->mem_resource);
-       pci_add_resource(&resources, hose->io_resource);
+       pci_add_resource_offset(&resources,
+                               hose->mem_resource, hose->mem_offset);
+       pci_add_resource_offset(&resources, hose->io_resource, hose->io_offset);
        bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose,
                                &resources);
        if (!bus)
@@ -115,7 +109,7 @@ static void __devinit pcibios_scanbus(struct pci_controller *hose)
                        need_domain_info = 1;
                }
 
-               if (!pci_probe_only) {
+               if (!pci_has_flag(PCI_PROBE_ONLY)) {
                        pci_bus_size_bridges(bus);
                        pci_bus_assign_resources(bus);
                        pci_enable_bridges(bus);
@@ -241,7 +235,7 @@ static int pcibios_enable_resources(struct pci_dev *dev, int mask)
 
 unsigned int pcibios_assign_all_busses(void)
 {
-       return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
+       return 1;
 }
 
 int pcibios_enable_device(struct pci_dev *dev, int mask)
@@ -254,42 +248,13 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
        return pcibios_plat_dev_init(dev);
 }
 
-static void pcibios_fixup_device_resources(struct pci_dev *dev,
-       struct pci_bus *bus)
-{
-       /* Update device resources.  */
-       struct pci_controller *hose = (struct pci_controller *)bus->sysdata;
-       unsigned long offset = 0;
-       int i;
-
-       for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-               if (!dev->resource[i].start)
-                       continue;
-               if (dev->resource[i].flags & IORESOURCE_IO)
-                       offset = hose->io_offset;
-               else if (dev->resource[i].flags & IORESOURCE_MEM)
-                       offset = hose->mem_offset;
-
-               dev->resource[i].start += offset;
-               dev->resource[i].end += offset;
-       }
-}
-
 void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 {
-       /* Propagate hose info into the subordinate devices.  */
-
        struct pci_dev *dev = bus->self;
 
-       if (pci_probe_only && dev &&
+       if (pci_has_flag(PCI_PROBE_ONLY) && dev &&
            (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
                pci_read_bridge_bases(bus);
-               pcibios_fixup_device_resources(dev, bus);
-       }
-
-       list_for_each_entry(dev, &bus->devices, bus_list) {
-               if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
-                       pcibios_fixup_device_resources(dev, bus);
        }
 }
 
@@ -299,40 +264,7 @@ pcibios_update_irq(struct pci_dev *dev, int irq)
        pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
 
-void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                        struct resource *res)
-{
-       struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
-       unsigned long offset = 0;
-
-       if (res->flags & IORESOURCE_IO)
-               offset = hose->io_offset;
-       else if (res->flags & IORESOURCE_MEM)
-               offset = hose->mem_offset;
-
-       region->start = res->start - offset;
-       region->end = res->end - offset;
-}
-
-void __devinit
-pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                       struct pci_bus_region *region)
-{
-       struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
-       unsigned long offset = 0;
-
-       if (res->flags & IORESOURCE_IO)
-               offset = hose->io_offset;
-       else if (res->flags & IORESOURCE_MEM)
-               offset = hose->mem_offset;
-
-       res->start = region->start + offset;
-       res->end = region->end + offset;
-}
-
 #ifdef CONFIG_HOTPLUG
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-EXPORT_SYMBOL(pcibios_bus_to_resource);
 EXPORT_SYMBOL(PCIBIOS_MIN_IO);
 EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
 #endif
index 6095a28561ddc483e5c054912520191a265b9734..8137c25c4e15912841f702a655b05677d0c90a04 100644 (file)
@@ -85,22 +85,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 /* implement the pci_ DMA API in terms of the generic device dma_ one */
 #include <asm-generic/pci-dma-compat.h>
 
-/**
- * pcibios_resource_to_bus - convert resource to PCI bus address
- * @dev: device which owns this resource
- * @region: converted bus-centric region (start,end)
- * @res: resource to convert
- *
- * Convert a resource to a PCI device bus address or bus window.
- */
-extern void pcibios_resource_to_bus(struct pci_dev *dev,
-                                   struct pci_bus_region *region,
-                                   struct resource *res);
-
-extern void pcibios_bus_to_resource(struct pci_dev *dev,
-                                   struct resource *res,
-                                   struct pci_bus_region *region);
-
 static inline struct resource *
 pcibios_select_root(struct pci_dev *pdev, struct resource *res)
 {
index a7c5f08ca9f5ecfb1aee4930552e89b317e9e8e5..6dce9fc2cf3c131ce6349a291775441e0a3f5c43 100644 (file)
@@ -32,8 +32,7 @@ struct pci_ops *pci_root_ops;
  * insert specific PCI bus resources instead of using the platform-level bus
  * resources directly for the PCI root bus.
  *
- * These are configured and inserted by pcibios_init() and are attached to the
- * root bus by pcibios_fixup_bus().
+ * These are configured and inserted by pcibios_init().
  */
 static struct resource pci_ioport_resource = {
        .name   = "PCI IO",
@@ -77,52 +76,6 @@ static inline int __query(const struct pci_bus *bus, unsigned int devfn)
        return 1;
 }
 
-/*
- * translate Linuxcentric addresses to PCI bus addresses
- */
-void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                            struct resource *res)
-{
-       if (res->flags & IORESOURCE_IO) {
-               region->start = (res->start & 0x00ffffff);
-               region->end   = (res->end   & 0x00ffffff);
-       }
-
-       if (res->flags & IORESOURCE_MEM) {
-               region->start = (res->start & 0x03ffffff) | MEM_PAGING_REG;
-               region->end   = (res->end   & 0x03ffffff) | MEM_PAGING_REG;
-       }
-
-#if 0
-       printk(KERN_DEBUG "RES->BUS: %lx-%lx => %lx-%lx\n",
-              res->start, res->end, region->start, region->end);
-#endif
-}
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-
-/*
- * translate PCI bus addresses to Linuxcentric addresses
- */
-void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                            struct pci_bus_region *region)
-{
-       if (res->flags & IORESOURCE_IO) {
-               res->start = (region->start & 0x00ffffff) | 0xbe000000;
-               res->end   = (region->end   & 0x00ffffff) | 0xbe000000;
-       }
-
-       if (res->flags & IORESOURCE_MEM) {
-               res->start = (region->start & 0x03ffffff) | 0xb8000000;
-               res->end   = (region->end   & 0x03ffffff) | 0xb8000000;
-       }
-
-#if 0
-       printk(KERN_INFO "BUS->RES: %lx-%lx => %lx-%lx\n",
-              region->start, region->end, res->start, res->end);
-#endif
-}
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-
 /*
  *
  */
@@ -364,9 +317,6 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
                if (!dev->resource[i].flags)
                        continue;
 
-               region.start = dev->resource[i].start;
-               region.end = dev->resource[i].end;
-               pcibios_bus_to_resource(dev, &dev->resource[i], &region);
                if (is_valid_resource(dev, i))
                        pci_claim_resource(dev, i);
        }
@@ -397,6 +347,7 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
  */
 static int __init pcibios_init(void)
 {
+       resource_size_t io_offset, mem_offset;
        LIST_HEAD(resources);
 
        ioport_resource.start   = 0xA0000000;
@@ -420,8 +371,13 @@ static int __init pcibios_init(void)
        printk(KERN_INFO "PCI: Probing PCI hardware [mempage %08x]\n",
               MEM_PAGING_REG);
 
-       pci_add_resource(&resources, &pci_ioport_resource);
-       pci_add_resource(&resources, &pci_iomem_resource);
+       io_offset = pci_ioport_resource.start -
+           (pci_ioport_resource.start & 0x00ffffff);
+       mem_offset = pci_iomem_resource.start -
+           ((pci_iomem_resource.start & 0x03ffffff) | MEM_PAGING_REG);
+
+       pci_add_resource_offset(&resources, &pci_ioport_resource, io_offset);
+       pci_add_resource_offset(&resources, &pci_iomem_resource, mem_offset);
        pci_root_bus = pci_scan_root_bus(NULL, 0, &pci_direct_ampci, NULL,
                                         &resources);
 
index 2242a5c636c231ba4767c9c6988ca1048163b46e..3234f492d5754b3935d03f73e3ead2bb940747c5 100644 (file)
@@ -82,38 +82,8 @@ struct pci_hba_data {
 
 #ifdef CONFIG_64BIT
 #define PCI_F_EXTEND           0xffffffff00000000UL
-#define PCI_IS_LMMIO(hba,a)    pci_is_lmmio(hba,a)
-
-/* We need to know if an address is LMMMIO or GMMIO.
- * LMMIO requires mangling and GMMIO we must use as-is.
- */
-static __inline__  int pci_is_lmmio(struct pci_hba_data *hba, unsigned long a)
-{
-       return(((a) & PCI_F_EXTEND) == PCI_F_EXTEND);
-}
-
-/*
-** Convert between PCI (IO_VIEW) addresses and processor (PA_VIEW) addresses.
-** See pci.c for more conversions used by Generic PCI code.
-**
-** Platform characteristics/firmware guarantee that
-**     (1) PA_VIEW - IO_VIEW = lmmio_offset for both LMMIO and ELMMIO
-**     (2) PA_VIEW == IO_VIEW for GMMIO
-*/
-#define PCI_BUS_ADDR(hba,a)    (PCI_IS_LMMIO(hba,a)    \
-               ?  ((a) - hba->lmmio_space_offset)      /* mangle LMMIO */ \
-               : (a))                                  /* GMMIO */
-#define PCI_HOST_ADDR(hba,a)   (((a) & PCI_F_EXTEND) == 0 \
-               ? (a) + hba->lmmio_space_offset \
-               : (a))
-
 #else  /* !CONFIG_64BIT */
-
-#define PCI_BUS_ADDR(hba,a)    (a)
-#define PCI_HOST_ADDR(hba,a)   (a)
 #define PCI_F_EXTEND           0UL
-#define PCI_IS_LMMIO(hba,a)    (1)     /* 32-bit doesn't support GMMIO */
-
 #endif /* !CONFIG_64BIT */
 
 /*
@@ -245,14 +215,6 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 }
 #endif
 
-extern void
-pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                        struct resource *res);
-
-extern void
-pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                       struct pci_bus_region *region);
-
 static inline void pcibios_penalize_isa_irq(int irq, int active)
 {
        /* We don't need to penalize isa irq's */
index 9efd97405317726369599d9f11e797c5a48d687c..74d544b1cd224f1fc1967b0ca84f25b49dcc380f 100644 (file)
@@ -195,58 +195,6 @@ void __init pcibios_init_bus(struct pci_bus *bus)
        pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bridge_ctl);
 }
 
-/* called by drivers/pci/setup-bus.c:pci_setup_bridge().  */
-void __devinit pcibios_resource_to_bus(struct pci_dev *dev,
-               struct pci_bus_region *region, struct resource *res)
-{
-#ifdef CONFIG_64BIT
-       struct pci_hba_data *hba = HBA_DATA(dev->bus->bridge->platform_data);
-#endif
-
-       if (res->flags & IORESOURCE_IO) {
-               /*
-               ** I/O space may see busnumbers here. Something
-               ** in the form of 0xbbxxxx where bb is the bus num
-               ** and xxxx is the I/O port space address.
-               ** Remaining address translation are done in the
-               ** PCI Host adapter specific code - ie dino_out8.
-               */
-               region->start = PCI_PORT_ADDR(res->start);
-               region->end   = PCI_PORT_ADDR(res->end);
-       } else if (res->flags & IORESOURCE_MEM) {
-               /* Convert MMIO addr to PCI addr (undo global virtualization) */
-               region->start = PCI_BUS_ADDR(hba, res->start);
-               region->end   = PCI_BUS_ADDR(hba, res->end);
-       }
-
-       DBG_RES("pcibios_resource_to_bus(%02x %s [%lx,%lx])\n",
-               dev->bus->number, res->flags & IORESOURCE_IO ? "IO" : "MEM",
-               region->start, region->end);
-}
-
-void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                             struct pci_bus_region *region)
-{
-#ifdef CONFIG_64BIT
-       struct pci_hba_data *hba = HBA_DATA(dev->bus->bridge->platform_data);
-#endif
-
-       if (res->flags & IORESOURCE_MEM) {
-               res->start = PCI_HOST_ADDR(hba, region->start);
-               res->end = PCI_HOST_ADDR(hba, region->end);
-       }
-
-       if (res->flags & IORESOURCE_IO) {
-               res->start = region->start;
-               res->end = region->end;
-       }
-}
-
-#ifdef CONFIG_HOTPLUG
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-#endif
-
 /*
  * pcibios align resources() is called every time generic PCI code
  * wants to generate a new address. The process of looking for
index f54b3d26ce9d80e1f361d33d451253b048c0521b..6653f2743c4e60f12fd57e06a75db536d393711a 100644 (file)
@@ -154,14 +154,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
 
 #endif /* CONFIG_PPC64 */
 
-extern void pcibios_resource_to_bus(struct pci_dev *dev,
-                       struct pci_bus_region *region,
-                       struct resource *res);
-
-extern void pcibios_bus_to_resource(struct pci_dev *dev,
-                       struct resource *res,
-                       struct pci_bus_region *region);
-
 extern void pcibios_claim_one_bus(struct pci_bus *b);
 
 extern void pcibios_finish_adding_to_bus(struct pci_bus *bus);
@@ -190,6 +182,7 @@ extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
                                 const struct resource *rsrc,
                                 resource_size_t *start, resource_size_t *end);
 
+extern resource_size_t pcibios_io_space_offset(struct pci_controller *hose);
 extern void pcibios_setup_bus_devices(struct pci_bus *bus);
 extern void pcibios_setup_bus_self(struct pci_bus *bus);
 extern void pcibios_setup_phb_io_space(struct pci_controller *hose);
index e660b37aa7d064d77c959c4ce4dd1a36a9316ab1..80fa704d410fe5105bd7776776a5af9a257147ac 100644 (file)
@@ -45,8 +45,6 @@ extern void init_pci_config_tokens (void);
 extern unsigned long get_phb_buid (struct device_node *);
 extern int rtas_setup_phb(struct pci_controller *phb);
 
-extern unsigned long pci_probe_only;
-
 #ifdef CONFIG_EEH
 
 void pci_addr_cache_build(void);
index d0373bcb7c9dcebd42e0b8baddf48c881b375018..8e78e93c818536b4a33a1ae29f10b3d157b29e63 100644 (file)
@@ -49,9 +49,6 @@ static int global_phb_number;         /* Global phb counter */
 /* ISA Memory physical address */
 resource_size_t isa_mem_base;
 
-/* Default PCI flags is 0 on ppc32, modified at boot on ppc64 */
-unsigned int pci_flags = 0;
-
 
 static struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
 
@@ -834,60 +831,6 @@ int pci_proc_domain(struct pci_bus *bus)
        return 1;
 }
 
-void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                            struct resource *res)
-{
-       resource_size_t offset = 0, mask = (resource_size_t)-1;
-       struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
-       if (!hose)
-               return;
-       if (res->flags & IORESOURCE_IO) {
-               offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-               mask = 0xffffffffu;
-       } else if (res->flags & IORESOURCE_MEM)
-               offset = hose->pci_mem_offset;
-
-       region->start = (res->start - offset) & mask;
-       region->end = (res->end - offset) & mask;
-}
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-
-void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                            struct pci_bus_region *region)
-{
-       resource_size_t offset = 0, mask = (resource_size_t)-1;
-       struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
-       if (!hose)
-               return;
-       if (res->flags & IORESOURCE_IO) {
-               offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-               mask = 0xffffffffu;
-       } else if (res->flags & IORESOURCE_MEM)
-               offset = hose->pci_mem_offset;
-       res->start = (region->start + offset) & mask;
-       res->end = (region->end + offset) & mask;
-}
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-
-/* Fixup a bus resource into a linux resource */
-static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
-{
-       struct pci_controller *hose = pci_bus_to_host(dev->bus);
-       resource_size_t offset = 0, mask = (resource_size_t)-1;
-
-       if (res->flags & IORESOURCE_IO) {
-               offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-               mask = 0xffffffffu;
-       } else if (res->flags & IORESOURCE_MEM)
-               offset = hose->pci_mem_offset;
-
-       res->start = (res->start + offset) & mask;
-       res->end = (res->end + offset) & mask;
-}
-
-
 /* This header fixup will do the resource fixup for all devices as they are
  * probed, but not for bridge ranges
  */
@@ -927,18 +870,11 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev)
                        continue;
                }
 
-               pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] fixup...\n",
+               pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]\n",
                         pci_name(dev), i,
                         (unsigned long long)res->start,\
                         (unsigned long long)res->end,
                         (unsigned int)res->flags);
-
-               fixup_resource(res, dev);
-
-               pr_debug("PCI:%s            %016llx-%016llx\n",
-                        pci_name(dev),
-                        (unsigned long long)res->start,
-                        (unsigned long long)res->end);
        }
 
        /* Call machine specific resource fixup */
@@ -1040,27 +976,18 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus)
                        continue;
                }
 
-               pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n",
+               pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x]\n",
                         pci_name(dev), i,
                         (unsigned long long)res->start,\
                         (unsigned long long)res->end,
                         (unsigned int)res->flags);
 
-               /* Perform fixup */
-               fixup_resource(res, dev);
-
                /* Try to detect uninitialized P2P bridge resources,
                 * and clear them out so they get re-assigned later
                 */
                if (pcibios_uninitialized_bridge_resource(bus, res)) {
                        res->flags = 0;
                        pr_debug("PCI:%s            (unassigned)\n", pci_name(dev));
-               } else {
-
-                       pr_debug("PCI:%s            %016llx-%016llx\n",
-                                pci_name(dev),
-                                (unsigned long long)res->start,
-                                (unsigned long long)res->end);
                }
        }
 }
@@ -1550,6 +1477,11 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
        return pci_enable_resources(dev, mask);
 }
 
+resource_size_t pcibios_io_space_offset(struct pci_controller *hose)
+{
+       return (unsigned long) hose->io_base_virt - _IO_BASE;
+}
+
 static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, struct list_head *resources)
 {
        struct resource *res;
@@ -1574,7 +1506,7 @@ static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, s
                 (unsigned long long)res->start,
                 (unsigned long long)res->end,
                 (unsigned long)res->flags);
-       pci_add_resource(resources, res);
+       pci_add_resource_offset(resources, res, pcibios_io_space_offset(hose));
 
        /* Hookup PHB Memory resources */
        for (i = 0; i < 3; ++i) {
@@ -1597,7 +1529,7 @@ static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, s
                         (unsigned long long)res->start,
                         (unsigned long long)res->end,
                         (unsigned long)res->flags);
-               pci_add_resource(resources, res);
+               pci_add_resource_offset(resources, res, hose->pci_mem_offset);
        }
 
        pr_debug("PCI: PHB MEM offset     = %016llx\n",
index fdd1a3d951dcaf50b5e63b72901ef507b9f19a90..4b06ec5a502e2ce47a31d7a6f3d9fa4b1d06bf72 100644 (file)
@@ -219,9 +219,9 @@ void __devinit pcibios_setup_phb_io_space(struct pci_controller *hose)
        struct resource *res = &hose->io_resource;
 
        /* Fixup IO space offset */
-       io_offset = (unsigned long)hose->io_base_virt - isa_io_base;
-       res->start = (res->start + io_offset) & 0xffffffffu;
-       res->end = (res->end + io_offset) & 0xffffffffu;
+       io_offset = pcibios_io_space_offset(hose);
+       res->start += io_offset;
+       res->end += io_offset;
 }
 
 static int __init pcibios_init(void)
index 3318d39b7d4c34b8c8fe3815eed7aa1fb5a0e09c..94a54f61d341d3363d8bd9b9365964e73aee1346 100644 (file)
@@ -33,8 +33,6 @@
 #include <asm/machdep.h>
 #include <asm/ppc-pci.h>
 
-unsigned long pci_probe_only = 1;
-
 /* pci_io_base -- the base address from which io bars are offsets.
  * This is the lowest I/O base address (so bar values are always positive),
  * and it *must* be the start of ISA space if an ISA bus exists because
@@ -55,9 +53,6 @@ static int __init pcibios_init(void)
         */
        ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
 
-       if (pci_probe_only)
-               pci_add_flags(PCI_PROBE_ONLY);
-
        /* On ppc64, we always enable PCI domains and we keep domain 0
         * backward compatible in /proc for video cards
         */
@@ -173,7 +168,7 @@ static int __devinit pcibios_map_phb_io_space(struct pci_controller *hose)
                return -ENOMEM;
 
        /* Fixup hose IO resource */
-       io_virt_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+       io_virt_offset = pcibios_io_space_offset(hose);
        hose->io_resource.start += io_virt_offset;
        hose->io_resource.end += io_virt_offset;
 
index b37d0b5a796e941fb079a737936dbb1fa29e1179..89dde171a6fabfafc23d1e29f3cba0bf00350bbe 100644 (file)
@@ -75,6 +75,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
 {
        u64 base, size;
        unsigned int flags;
+       struct pci_bus_region region;
        struct resource *res;
        const u32 *addrs;
        u32 i;
@@ -106,10 +107,11 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
                        printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
                        continue;
                }
-               res->start = base;
-               res->end = base + size - 1;
                res->flags = flags;
                res->name = pci_name(dev);
+               region.start = base;
+               region.end = base + size - 1;
+               pcibios_bus_to_resource(dev, res, &region);
        }
 }
 
@@ -209,6 +211,7 @@ void __devinit of_scan_pci_bridge(struct pci_dev *dev)
        struct pci_bus *bus;
        const u32 *busrange, *ranges;
        int len, i, mode;
+       struct pci_bus_region region;
        struct resource *res;
        unsigned int flags;
        u64 size;
@@ -270,9 +273,10 @@ void __devinit of_scan_pci_bridge(struct pci_dev *dev)
                        res = bus->resource[i];
                        ++i;
                }
-               res->start = of_read_number(&ranges[1], 2);
-               res->end = res->start + size - 1;
                res->flags = flags;
+               region.start = of_read_number(&ranges[1], 2);
+               region.end = region.start + size - 1;
+               pcibios_bus_to_resource(dev, res, &region);
        }
        sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
                bus->number);
index 517bd86bc3f020537617710daeb0e9e34a07da5b..179af906dcda5685d77a118f0b5afe99db67286e 100644 (file)
@@ -279,7 +279,7 @@ void __init find_and_init_phbs(void)
        eeh_dev_phb_init();
 
        /*
-        * pci_probe_only and pci_assign_all_buses can be set via properties
+        * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
         * in chosen.
         */
        if (of_chosen) {
@@ -287,8 +287,12 @@ void __init find_and_init_phbs(void)
 
                prop = of_get_property(of_chosen,
                                "linux,pci-probe-only", NULL);
-               if (prop)
-                       pci_probe_only = *prop;
+               if (prop) {
+                       if (*prop)
+                               pci_add_flags(PCI_PROBE_ONLY);
+                       else
+                               pci_clear_flags(PCI_PROBE_ONLY);
+               }
 
 #ifdef CONFIG_PPC32 /* Will be made generic soon */
                prop = of_get_property(of_chosen,
index 401e3f3f74c8f4e3774cb97f6e99f7aa37fa9e2a..465ee8f5c0868070d9ea8364a340a5aa1a5b6bda 100644 (file)
@@ -620,7 +620,7 @@ void __init maple_pci_init(void)
        }
 
        /* Tell pci.c to not change any resource allocations.  */
-       pci_probe_only = 1;
+       pci_add_flags(PCI_PROBE_ONLY);
 }
 
 int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel)
index b6a0ec45c69554c7558ec45f478ce40b5cf1411c..aa862713258c739b5b9ad52cb2e3973bff5b7bee 100644 (file)
@@ -229,9 +229,6 @@ void __init pas_pci_init(void)
 
        /* Setup the linkage between OF nodes and PHBs */
        pci_devs_phb_init();
-
-       /* Use the common resource allocation mechanism */
-       pci_probe_only = 1;
 }
 
 void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset)
index 31a7d3a7ce25b8c1f94d8cc9e9742d16b0c4ccf2..43bbe1bda9391ad5801112bd4135b6dd813e69c1 100644 (file)
@@ -1059,9 +1059,6 @@ void __init pmac_pci_init(void)
        }
        /* pmac_check_ht_link(); */
 
-       /* We can allocate missing resources if any */
-       pci_probe_only = 0;
-
 #else /* CONFIG_PPC64 */
        init_p2pbridge();
        init_second_ohare();
index 5e155dfc4320ab011589d678408002dd05503a96..fbdd74dac3ac6573277ba4e6e853c5311ff8e0df 100644 (file)
@@ -1299,15 +1299,14 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
        /* Setup MSI support */
        pnv_pci_init_ioda_msis(phb);
 
-       /* We set both probe_only and PCI_REASSIGN_ALL_RSRC. This is an
+       /* We set both PCI_PROBE_ONLY and PCI_REASSIGN_ALL_RSRC. This is an
         * odd combination which essentially means that we skip all resource
         * fixups and assignments in the generic code, and do it all
         * ourselves here
         */
-       pci_probe_only = 1;
        ppc_md.pcibios_fixup_phb = pnv_pci_ioda_fixup_phb;
        ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
-       pci_add_flags(PCI_REASSIGN_ALL_RSRC);
+       pci_add_flags(PCI_PROBE_ONLY | PCI_REASSIGN_ALL_RSRC);
 
        /* Reset IODA tables to a clean state */
        rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
index 214478d781ae72a25fb83fb02eb5750a93f42eb2..be3cfc5ceabbfb316a55c34a4fb5da18e41dd66e 100644 (file)
@@ -562,10 +562,7 @@ void __init pnv_pci_init(void)
 {
        struct device_node *np;
 
-       pci_set_flags(PCI_CAN_SKIP_ISA_ALIGN);
-
-       /* We do not want to just probe */
-       pci_probe_only = 0;
+       pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
 
        /* OPAL absent, try POPAL first then RTAS detection of PHBs */
        if (!firmware_has_feature(FW_FEATURE_OPAL)) {
index fbb21fc3080b4e130142b03ff2519d79e7a5f545..8b7bafa489c27235d4c65631681fb29cfa71ef8c 100644 (file)
@@ -84,7 +84,7 @@ void pcibios_remove_pci_devices(struct pci_bus *bus)
        list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
                pr_debug("     * Removing %s...\n", pci_name(dev));
                eeh_remove_bus_device(dev);
-               pci_remove_bus_device(dev);
+               pci_stop_and_remove_bus_device(dev);
        }
 }
 EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
index 8f137af616afcbbe3f223d471f87c71c17ef3fae..51ecac920dd8e4cb74e17a9086cb893ad57dc6d6 100644 (file)
@@ -383,6 +383,9 @@ static void __init pSeries_setup_arch(void)
 
        fwnmi_init();
 
+       /* By default, only probe PCI (can be overriden by rtas_pci) */
+       pci_add_flags(PCI_PROBE_ONLY);
+
        /* Find and initialize PCI host bridges */
        init_pci_config_tokens();
        eeh_pseries_init();
index d24b3acf858eaf79f01a1a7662f0f53ef815595f..763014cd1e622783bbc5e4410d89ce7525c9cc60 100644 (file)
@@ -682,7 +682,6 @@ static int __init wsp_setup_one_phb(struct device_node *np)
        /* XXX Force re-assigning of everything for now */
        pci_add_flags(PCI_REASSIGN_ALL_BUS | PCI_REASSIGN_ALL_RSRC |
                      PCI_ENABLE_PROC_DOMAINS);
-       pci_probe_only = 0;
 
        /* Calculate how the TCE space is divided */
        phb->dma32_base         = 0;
index 1e7b0e2e764d1ae1319dd5a63ec563c9af9f1120..9d10a3cb879740ed175d4aede3f6108c58e0f096 100644 (file)
@@ -37,11 +37,20 @@ static void __devinit pcibios_scanbus(struct pci_channel *hose)
        static int next_busno;
        static int need_domain_info;
        LIST_HEAD(resources);
+       struct resource *res;
+       resource_size_t offset;
        int i;
        struct pci_bus *bus;
 
-       for (i = 0; i < hose->nr_resources; i++)
-               pci_add_resource(&resources, hose->resources + i);
+       for (i = 0; i < hose->nr_resources; i++) {
+               res = hose->resources + i;
+               offset = 0;
+               if (res->flags & IORESOURCE_IO)
+                       offset = hose->io_offset;
+               else if (res->flags & IORESOURCE_MEM)
+                       offset = hose->mem_offset;
+               pci_add_resource_offset(&resources, res, offset);
+       }
 
        bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose,
                                &resources);
@@ -143,42 +152,12 @@ static int __init pcibios_init(void)
 }
 subsys_initcall(pcibios_init);
 
-static void pcibios_fixup_device_resources(struct pci_dev *dev,
-       struct pci_bus *bus)
-{
-       /* Update device resources.  */
-       struct pci_channel *hose = bus->sysdata;
-       unsigned long offset = 0;
-       int i;
-
-       for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-               if (!dev->resource[i].start)
-                       continue;
-               if (dev->resource[i].flags & IORESOURCE_IO)
-                       offset = hose->io_offset;
-               else if (dev->resource[i].flags & IORESOURCE_MEM)
-                       offset = hose->mem_offset;
-
-               dev->resource[i].start += offset;
-               dev->resource[i].end += offset;
-       }
-}
-
 /*
  *  Called after each bus is probed, but before its children
  *  are examined.
  */
 void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 {
-       struct pci_dev *dev;
-       struct list_head *ln;
-
-       for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) {
-               dev = pci_dev_b(ln);
-
-               if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
-                       pcibios_fixup_device_resources(dev, bus);
-       }
 }
 
 /*
@@ -208,36 +187,6 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
        return start;
 }
 
-void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                            struct resource *res)
-{
-       struct pci_channel *hose = dev->sysdata;
-       unsigned long offset = 0;
-
-       if (res->flags & IORESOURCE_IO)
-               offset = hose->io_offset;
-       else if (res->flags & IORESOURCE_MEM)
-               offset = hose->mem_offset;
-
-       region->start = res->start - offset;
-       region->end = res->end - offset;
-}
-
-void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                            struct pci_bus_region *region)
-{
-       struct pci_channel *hose = dev->sysdata;
-       unsigned long offset = 0;
-
-       if (res->flags & IORESOURCE_IO)
-               offset = hose->io_offset;
-       else if (res->flags & IORESOURCE_MEM)
-               offset = hose->mem_offset;
-
-       res->start = region->start + offset;
-       res->end = region->end + offset;
-}
-
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
        return pci_enable_resources(dev, mask);
@@ -381,8 +330,6 @@ EXPORT_SYMBOL(pci_iounmap);
 #endif /* CONFIG_GENERIC_IOMAP */
 
 #ifdef CONFIG_HOTPLUG
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-EXPORT_SYMBOL(pcibios_bus_to_resource);
 EXPORT_SYMBOL(PCIBIOS_MIN_IO);
 EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
 #endif
index cb21e2399dc1829a13045531711a91a77eefc311..bff96c2e7d254f520aa3c089a8bded0dc2ea2ed6 100644 (file)
@@ -114,12 +114,6 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 /* Board-specific fixup routines. */
 int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin);
 
-extern void pcibios_resource_to_bus(struct pci_dev *dev,
-       struct pci_bus_region *region, struct resource *res);
-
-extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                                   struct pci_bus_region *region);
-
 #define pci_domain_nr(bus) ((struct pci_channel *)(bus)->sysdata)->index
 
 static inline int pci_proc_domain(struct pci_bus *bus)
index 6de7f7bf956abb8ec48dd6eebe8863443feb5393..dc503297481f4673874b973719c9afa79f004faa 100644 (file)
@@ -52,14 +52,6 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
  * 64Kbytes by the Host controller.
  */
 
-extern void
-pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                       struct resource *res);
-
-extern void
-pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                       struct pci_bus_region *region);
-
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
        return PCI_IRQ_NONE;
index 755a4bb6bcd32278e62fbe4afc2977946495231d..1633b718d3bc3c608ef59d4a7068108e815d1029 100644 (file)
@@ -73,14 +73,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
                               enum pci_mmap_state mmap_state,
                               int write_combine);
 
-extern void
-pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                       struct resource *res);
-
-extern void
-pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                       struct pci_bus_region *region);
-
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
        return PCI_IRQ_NONE;
index c7bec25fdb1c1b5d18d67c6882437c5475354d31..aba6b958b2a5da25a49adbe76783eca1e5570dc9 100644 (file)
 
 /* The LEON architecture does not rely on a BIOS or bootloader to setup
  * PCI for us. The Linux generic routines are used to setup resources,
- * reset values of confuration-space registers settings ae preseved.
+ * reset values of configuration-space register settings are preserved.
+ *
+ * PCI Memory and Prefetchable Memory is direct-mapped. However I/O Space is
+ * accessed through a Window which is translated to low 64KB in PCI space, the
+ * first 4KB is not used so 60KB is available.
  */
 void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info)
 {
        LIST_HEAD(resources);
        struct pci_bus *root_bus;
 
-       pci_add_resource(&resources, &info->io_space);
+       pci_add_resource_offset(&resources, &info->io_space,
+                               info->io_space.start - 0x1000);
        pci_add_resource(&resources, &info->mem_space);
 
        root_bus = pci_scan_root_bus(&ofdev->dev, 0, info->ops, info,
@@ -38,44 +43,6 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info)
        }
 }
 
-/* PCI Memory and Prefetchable Memory is direct-mapped. However I/O Space is
- * accessed through a Window which is translated to low 64KB in PCI space, the
- * first 4KB is not used so 60KB is available.
- *
- * This function is used by generic code to translate resource addresses into
- * PCI addresses.
- */
-void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                            struct resource *res)
-{
-       struct leon_pci_info *info = dev->bus->sysdata;
-
-       region->start = res->start;
-       region->end = res->end;
-
-       if (res->flags & IORESOURCE_IO) {
-               region->start -= (info->io_space.start - 0x1000);
-               region->end -= (info->io_space.start - 0x1000);
-       }
-}
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-
-/* see pcibios_resource_to_bus() comment */
-void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                            struct pci_bus_region *region)
-{
-       struct leon_pci_info *info = dev->bus->sysdata;
-
-       res->start = region->start;
-       res->end = region->end;
-
-       if (res->flags & IORESOURCE_IO) {
-               res->start += (info->io_space.start - 0x1000);
-               res->end += (info->io_space.start - 0x1000);
-       }
-}
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-
 void __devinit pcibios_fixup_bus(struct pci_bus *pbus)
 {
        struct leon_pci_info *info = pbus->sysdata;
index bb8bc2e519ac03735f79361d7b400eccee08b8ba..fdaf218116709e3c8b7ce2a29db76862c1800604 100644 (file)
@@ -375,13 +375,6 @@ static void __devinit apb_calc_first_last(u8 map, u32 *first_p, u32 *last_p)
        *last_p = last;
 }
 
-static void pci_resource_adjust(struct resource *res,
-                               struct resource *root)
-{
-       res->start += root->start;
-       res->end += root->start;
-}
-
 /* For PCI bus devices which lack a 'ranges' property we interrogate
  * the config space values to set the resources, just like the generic
  * Linux PCI probing code does.
@@ -390,7 +383,8 @@ static void __devinit pci_cfg_fake_ranges(struct pci_dev *dev,
                                          struct pci_bus *bus,
                                          struct pci_pbm_info *pbm)
 {
-       struct resource *res;
+       struct pci_bus_region region;
+       struct resource *res, res2;
        u8 io_base_lo, io_limit_lo;
        u16 mem_base_lo, mem_limit_lo;
        unsigned long base, limit;
@@ -412,11 +406,14 @@ static void __devinit pci_cfg_fake_ranges(struct pci_dev *dev,
        res = bus->resource[0];
        if (base <= limit) {
                res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
+               res2.flags = res->flags;
+               region.start = base;
+               region.end = limit + 0xfff;
+               pcibios_bus_to_resource(dev, &res2, &region);
                if (!res->start)
-                       res->start = base;
+                       res->start = res2.start;
                if (!res->end)
-                       res->end = limit + 0xfff;
-               pci_resource_adjust(res, &pbm->io_space);
+                       res->end = res2.end;
        }
 
        pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo);
@@ -428,9 +425,9 @@ static void __devinit pci_cfg_fake_ranges(struct pci_dev *dev,
        if (base <= limit) {
                res->flags = ((mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) |
                              IORESOURCE_MEM);
-               res->start = base;
-               res->end = limit + 0xfffff;
-               pci_resource_adjust(res, &pbm->mem_space);
+               region.start = base;
+               region.end = limit + 0xfffff;
+               pcibios_bus_to_resource(dev, res, &region);
        }
 
        pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo);
@@ -459,9 +456,9 @@ static void __devinit pci_cfg_fake_ranges(struct pci_dev *dev,
        if (base <= limit) {
                res->flags = ((mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) |
                              IORESOURCE_MEM | IORESOURCE_PREFETCH);
-               res->start = base;
-               res->end = limit + 0xfffff;
-               pci_resource_adjust(res, &pbm->mem_space);
+               region.start = base;
+               region.end = limit + 0xfffff;
+               pcibios_bus_to_resource(dev, res, &region);
        }
 }
 
@@ -472,6 +469,7 @@ static void __devinit apb_fake_ranges(struct pci_dev *dev,
                                      struct pci_bus *bus,
                                      struct pci_pbm_info *pbm)
 {
+       struct pci_bus_region region;
        struct resource *res;
        u32 first, last;
        u8 map;
@@ -479,18 +477,18 @@ static void __devinit apb_fake_ranges(struct pci_dev *dev,
        pci_read_config_byte(dev, APB_IO_ADDRESS_MAP, &map);
        apb_calc_first_last(map, &first, &last);
        res = bus->resource[0];
-       res->start = (first << 21);
-       res->end = (last << 21) + ((1 << 21) - 1);
        res->flags = IORESOURCE_IO;
-       pci_resource_adjust(res, &pbm->io_space);
+       region.start = (first << 21);
+       region.end = (last << 21) + ((1 << 21) - 1);
+       pcibios_bus_to_resource(dev, res, &region);
 
        pci_read_config_byte(dev, APB_MEM_ADDRESS_MAP, &map);
        apb_calc_first_last(map, &first, &last);
        res = bus->resource[1];
-       res->start = (first << 21);
-       res->end = (last << 21) + ((1 << 21) - 1);
        res->flags = IORESOURCE_MEM;
-       pci_resource_adjust(res, &pbm->mem_space);
+       region.start = (first << 21);
+       region.end = (last << 21) + ((1 << 21) - 1);
+       pcibios_bus_to_resource(dev, res, &region);
 }
 
 static void __devinit pci_of_scan_bus(struct pci_pbm_info *pbm,
@@ -506,6 +504,7 @@ static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm,
        struct pci_bus *bus;
        const u32 *busrange, *ranges;
        int len, i, simba;
+       struct pci_bus_region region;
        struct resource *res;
        unsigned int flags;
        u64 size;
@@ -556,8 +555,6 @@ static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm,
        }
        i = 1;
        for (; len >= 32; len -= 32, ranges += 8) {
-               struct resource *root;
-
                flags = pci_parse_of_flags(ranges[0]);
                size = GET_64BIT(ranges, 6);
                if (flags == 0 || size == 0)
@@ -569,7 +566,6 @@ static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm,
                                       " for bridge %s\n", node->full_name);
                                continue;
                        }
-                       root = &pbm->io_space;
                } else {
                        if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
                                printk(KERN_ERR "PCI: too many memory ranges"
@@ -578,18 +574,12 @@ static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm,
                        }
                        res = bus->resource[i];
                        ++i;
-                       root = &pbm->mem_space;
                }
 
-               res->start = GET_64BIT(ranges, 1);
-               res->end = res->start + size - 1;
                res->flags = flags;
-
-               /* Another way to implement this would be to add an of_device
-                * layer routine that can calculate a resource for a given
-                * range property value in a PCI device.
-                */
-               pci_resource_adjust(res, root);
+               region.start = GET_64BIT(ranges, 1);
+               region.end = region.start + size - 1;
+               pcibios_bus_to_resource(dev, res, &region);
        }
 after_ranges:
        sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
@@ -691,8 +681,10 @@ struct pci_bus * __devinit pci_scan_one_pbm(struct pci_pbm_info *pbm,
 
        printk("PCI: Scanning PBM %s\n", node->full_name);
 
-       pci_add_resource(&resources, &pbm->io_space);
-       pci_add_resource(&resources, &pbm->mem_space);
+       pci_add_resource_offset(&resources, &pbm->io_space,
+                               pbm->io_space.start);
+       pci_add_resource_offset(&resources, &pbm->mem_space,
+                               pbm->mem_space.start);
        bus = pci_create_root_bus(parent, pbm->pci_first_busno, pbm->pci_ops,
                                  pbm, &resources);
        if (!bus) {
@@ -755,46 +747,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
        return 0;
 }
 
-void pcibios_resource_to_bus(struct pci_dev *pdev, struct pci_bus_region *region,
-                            struct resource *res)
-{
-       struct pci_pbm_info *pbm = pdev->bus->sysdata;
-       struct resource zero_res, *root;
-
-       zero_res.start = 0;
-       zero_res.end = 0;
-       zero_res.flags = res->flags;
-
-       if (res->flags & IORESOURCE_IO)
-               root = &pbm->io_space;
-       else
-               root = &pbm->mem_space;
-
-       pci_resource_adjust(&zero_res, root);
-
-       region->start = res->start - zero_res.start;
-       region->end = res->end - zero_res.start;
-}
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-
-void pcibios_bus_to_resource(struct pci_dev *pdev, struct resource *res,
-                            struct pci_bus_region *region)
-{
-       struct pci_pbm_info *pbm = pdev->bus->sysdata;
-       struct resource *root;
-
-       res->start = region->start;
-       res->end = region->end;
-
-       if (res->flags & IORESOURCE_IO)
-               root = &pbm->io_space;
-       else
-               root = &pbm->mem_space;
-
-       pci_resource_adjust(res, root);
-}
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-
 char * __devinit pcibios_setup(char *str)
 {
        return str;
index dd3867727c3521138436c1a05bf8bd4b4a976ee4..f5e108f4a151a9bbb2d72865bd0c83519dee9707 100644 (file)
@@ -14,6 +14,7 @@
 
 #ifdef __KERNEL__
 #include <asm-generic/pci-dma-compat.h>
+#include <asm-generic/pci-bridge.h>
 #include <asm-generic/pci.h>
 #include <mach/hardware.h> /* for PCIBIOS_MIN_* */
 
index a8f07fe10cad0e6284c99a6a9155629bf7da9b91..2fc2b1ba825e6f49701ac0659ea13f5e3c77c021 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/io.h>
 
 static int debug_pci;
-static int use_firmware;
 
 #define CONFIG_CMD(bus, devfn, where)  \
        (0x80000000 | (bus->number << 16) | (devfn << 8) | (where & ~3))
@@ -276,7 +275,7 @@ static int __init pci_common_init(void)
 
        pci_fixup_irqs(pci_common_swizzle, pci_puv3_map_irq);
 
-       if (!use_firmware) {
+       if (!pci_has_flag(PCI_PROBE_ONLY)) {
                /*
                 * Size the bridge windows.
                 */
@@ -303,7 +302,7 @@ char * __devinit pcibios_setup(char *str)
                debug_pci = 1;
                return NULL;
        } else if (!strcmp(str, "firmware")) {
-               use_firmware = 1;
+               pci_add_flags(PCI_PROBE_ONLY);
                return NULL;
        }
        return str;
index 77e95f54570a9728816055df2e6c562e1a608ccc..332f98c9111f41d92def3e02f0771783c8dce100 100644 (file)
@@ -64,11 +64,15 @@ enum regnames {
        GDB_PS,                 /* 17 */
        GDB_CS,                 /* 18 */
        GDB_SS,                 /* 19 */
+       GDB_DS,                 /* 20 */
+       GDB_ES,                 /* 21 */
+       GDB_FS,                 /* 22 */
+       GDB_GS,                 /* 23 */
 };
 #define GDB_ORIG_AX            57
-#define DBG_MAX_REG_NUM                20
-/* 17 64 bit regs and 3 32 bit regs */
-#define NUMREGBYTES            ((17 * 8) + (3 * 4))
+#define DBG_MAX_REG_NUM                24
+/* 17 64 bit regs and 5 32 bit regs */
+#define NUMREGBYTES            ((17 * 8) + (5 * 4))
 #endif /* ! CONFIG_X86_32 */
 
 static inline void arch_kgdb_breakpoint(void)
index faba5771acad05c26bf01423dfa9917a9f38a662..fdc37b3d0ce35ab379b09eed4d2811fdf95fc511 100644 (file)
@@ -67,8 +67,6 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
        { "ss", 4, offsetof(struct pt_regs, ss) },
        { "ds", 4, offsetof(struct pt_regs, ds) },
        { "es", 4, offsetof(struct pt_regs, es) },
-       { "fs", 4, -1 },
-       { "gs", 4, -1 },
 #else
        { "ax", 8, offsetof(struct pt_regs, ax) },
        { "bx", 8, offsetof(struct pt_regs, bx) },
@@ -90,7 +88,11 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
        { "flags", 4, offsetof(struct pt_regs, flags) },
        { "cs", 4, offsetof(struct pt_regs, cs) },
        { "ss", 4, offsetof(struct pt_regs, ss) },
+       { "ds", 4, -1 },
+       { "es", 4, -1 },
 #endif
+       { "fs", 4, -1 },
+       { "gs", 4, -1 },
 };
 
 int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
index 1c4d769e21ea07053f81f75b7aed2b21ab1f397f..28e5e06fcba484c7520c718fb3cd9d961a103988 100644 (file)
@@ -262,10 +262,11 @@ rootfs_initcall(pci_iommu_init);
 
 static __devinit void via_no_dac(struct pci_dev *dev)
 {
-       if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
+       if (forbid_dac == 0) {
                dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
                forbid_dac = 1;
        }
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID,
+                               PCI_CLASS_BRIDGE_PCI, 8, via_no_dac);
 #endif
index 49a5cb55429b6ae51696f43c5a05c63a07df5566..ed2835e148b50442fe258ec8adb2c713b1e8ca67 100644 (file)
@@ -416,7 +416,12 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
                kfree(sd);
        } else {
                get_current_resources(device, busnum, domain, &resources);
-               if (list_empty(&resources))
+
+               /*
+                * _CRS with no apertures is normal, so only fall back to
+                * defaults or native bridge info if we're ignoring _CRS.
+                */
+               if (!pci_use_crs)
                        x86_pci_root_bus_resources(busnum, &resources);
                bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd,
                                          &resources);
index 6dd89555fbfa9bdddd8f797f6fe48231ef773be9..d0e6e403b4f671d0ba3b9b1456fcc8d68c3ff24b 100644 (file)
@@ -164,11 +164,11 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_
  */
 static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev)
 {
-       if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
-           (dev->device & 0xff00) == 0x2400)
+       if ((dev->device & 0xff00) == 0x2400)
                dev->transparent = 1;
 }
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
+                        PCI_CLASS_BRIDGE_PCI, 8, pci_fixup_transparent_bridge);
 
 /*
  * Fixup for C1 Halt Disconnect problem on nForce2 systems.
@@ -322,9 +322,6 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
        struct pci_bus *bus;
        u16 config;
 
-       if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
-               return;
-
        /* Is VGA routed to us? */
        bus = pdev->bus;
        while (bus) {
@@ -353,7 +350,8 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
                dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n");
        }
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
+                               PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
 
 
 static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {
index 91821a1a0c3a7e1a7206eba6867dbe5dd8da769d..831971e731f7a0379d732ec040e683f490845aa0 100644 (file)
 #include <asm/io_apic.h>
 
 
+/*
+ * This list of dynamic mappings is for temporarily maintaining
+ * original BIOS BAR addresses for possible reinstatement.
+ */
+struct pcibios_fwaddrmap {
+       struct list_head list;
+       struct pci_dev *dev;
+       resource_size_t fw_addr[DEVICE_COUNT_RESOURCE];
+};
+
+static LIST_HEAD(pcibios_fwaddrmappings);
+static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock);
+
+/* Must be called with 'pcibios_fwaddrmap_lock' lock held. */
+static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev)
+{
+       struct pcibios_fwaddrmap *map;
+
+       WARN_ON(!spin_is_locked(&pcibios_fwaddrmap_lock));
+
+       list_for_each_entry(map, &pcibios_fwaddrmappings, list)
+               if (map->dev == dev)
+                       return map;
+
+       return NULL;
+}
+
+static void
+pcibios_save_fw_addr(struct pci_dev *dev, int idx, resource_size_t fw_addr)
+{
+       unsigned long flags;
+       struct pcibios_fwaddrmap *map;
+
+       spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
+       map = pcibios_fwaddrmap_lookup(dev);
+       if (!map) {
+               spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
+               map = kzalloc(sizeof(*map), GFP_KERNEL);
+               if (!map)
+                       return;
+
+               map->dev = pci_dev_get(dev);
+               map->fw_addr[idx] = fw_addr;
+               INIT_LIST_HEAD(&map->list);
+
+               spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
+               list_add_tail(&map->list, &pcibios_fwaddrmappings);
+       } else
+               map->fw_addr[idx] = fw_addr;
+       spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
+}
+
+resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx)
+{
+       unsigned long flags;
+       struct pcibios_fwaddrmap *map;
+       resource_size_t fw_addr = 0;
+
+       spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
+       map = pcibios_fwaddrmap_lookup(dev);
+       if (map)
+               fw_addr = map->fw_addr[idx];
+       spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
+
+       return fw_addr;
+}
+
+static void pcibios_fw_addr_list_del(void)
+{
+       unsigned long flags;
+       struct pcibios_fwaddrmap *entry, *next;
+
+       spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
+       list_for_each_entry_safe(entry, next, &pcibios_fwaddrmappings, list) {
+               list_del(&entry->list);
+               pci_dev_put(entry->dev);
+               kfree(entry);
+       }
+       spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
+}
+
 static int
 skip_isa_ioresource_align(struct pci_dev *dev) {
 
@@ -182,7 +263,8 @@ static void __init pcibios_allocate_resources(int pass)
                                        idx, r, disabled, pass);
                                if (pci_claim_resource(dev, idx) < 0) {
                                        /* We'll assign a new address later */
-                                       dev->fw_addr[idx] = r->start;
+                                       pcibios_save_fw_addr(dev,
+                                                       idx, r->start);
                                        r->end -= r->start;
                                        r->start = 0;
                                }
@@ -228,6 +310,7 @@ static int __init pcibios_assign_resources(void)
        }
 
        pci_assign_unassigned_resources();
+       pcibios_fw_addr_list_del();
 
        return 0;
 }
index cb29191cee5877824391a33de0cea9c5255f2c55..140942f66b314dbdf6907b7314ee4016751da528 100644 (file)
@@ -43,6 +43,8 @@
 #define PCI_FIXED_BAR_4_SIZE   0x14
 #define PCI_FIXED_BAR_5_SIZE   0x1c
 
+static int pci_soc_mode = 0;
+
 /**
  * fixed_bar_cap - return the offset of the fixed BAR cap if found
  * @bus: PCI bus
@@ -148,7 +150,9 @@ static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
         */
        if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE)
                return 0;
-       if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0)))
+       if (bus == 0 && (devfn == PCI_DEVFN(2, 0)
+                               || devfn == PCI_DEVFN(0, 0)
+                               || devfn == PCI_DEVFN(3, 0)))
                return 1;
        return 0; /* langwell on others */
 }
@@ -231,14 +235,43 @@ struct pci_ops pci_mrst_ops = {
  */
 int __init pci_mrst_init(void)
 {
-       printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n");
+       printk(KERN_INFO "Intel MID platform detected, using MID PCI ops\n");
        pci_mmcfg_late_init();
        pcibios_enable_irq = mrst_pci_irq_enable;
        pci_root_ops = pci_mrst_ops;
+       pci_soc_mode = 1;
        /* Continue with standard init */
        return 1;
 }
 
+/* Langwell devices are not true pci devices, they are not subject to 10 ms
+ * d3 to d0 delay required by pci spec.
+ */
+static void __devinit pci_d3delay_fixup(struct pci_dev *dev)
+{
+       /* PCI fixups are effectively decided compile time. If we have a dual
+          SoC/non-SoC kernel we don't want to mangle d3 on non SoC devices */
+        if (!pci_soc_mode)
+            return;
+       /* true pci devices in lincroft should allow type 1 access, the rest
+        * are langwell fake pci devices.
+        */
+       if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID))
+               return;
+       dev->d3_delay = 0;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup);
+
+static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev)
+{
+       pci_set_power_state(dev, PCI_D3cold);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0812, mrst_power_off_unused_dev);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev);
+
 /*
  * Langwell devices reside at fixed offsets, don't try to move them.
  */
@@ -248,6 +281,9 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
        u32 size;
        int i;
 
+       if (!pci_soc_mode)
+               return;
+
        /* Must have extended configuration space */
        if (dev->cfg_size < PCIE_CAP_OFFSET + 4)
                return;
index 61045c192e886761055391ffffbf13a7e607e89f..eb30e356f5bec906d2c34faad04954659c653130 100644 (file)
@@ -153,7 +153,7 @@ static void __init pci_controller_apertures(struct pci_controller *pci_ctrl,
        }
        res->start += io_offset;
        res->end += io_offset;
-       pci_add_resource(resources, res);
+       pci_add_resource_offset(resources, res, io_offset);
 
        for (i = 0; i < 3; i++) {
                res = &pci_ctrl->mem_resources[i];
@@ -200,24 +200,9 @@ subsys_initcall(pcibios_init);
 
 void __init pcibios_fixup_bus(struct pci_bus *bus)
 {
-       struct pci_controller *pci_ctrl = bus->sysdata;
-       struct resource *res;
-       unsigned long io_offset;
-       int i;
-
-       io_offset = (unsigned long)pci_ctrl->io_space.base;
        if (bus->parent) {
                /* This is a subordinate bridge */
                pci_read_bridge_bases(bus);
-
-               for (i = 0; i < 4; i++) {
-                       if ((res = bus->resource[i]) == NULL || !res->flags)
-                               continue;
-                       if (io_offset && (res->flags & IORESOURCE_IO)) {
-                               res->start += io_offset;
-                               res->end += io_offset;
-                       }
-               }
        }
 }
 
index 3101dd59e3794c6d2d34c2ff4df65f1cece29a28..71c1b0a7535ccad12a2d95068890359ce74b7e3e 100644 (file)
@@ -369,6 +369,21 @@ config I2C_DESIGNWARE_PCI
          This driver can also be built as a module.  If so, the module
          will be called i2c-designware-pci.
 
+config I2C_EG20T
+       tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) I2C"
+       depends on PCI
+       help
+         This driver is for PCH(Platform controller Hub) I2C of EG20T which
+         is an IOH(Input/Output Hub) for x86 embedded processor.
+         This driver can access PCH I2C bus device.
+
+         This driver also can be used for LAPIS Semiconductor IOH(Input/
+         Output Hub), ML7213, ML7223 and ML7831.
+         ML7213 IOH is for IVI(In-Vehicle Infotainment) use, ML7223 IOH is
+         for MP(Media Phone) use and ML7831 IOH is for general purpose use.
+         ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series.
+         ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH.
+
 config I2C_GPIO
        tristate "GPIO-based bitbanging I2C"
        depends on GENERIC_GPIO
@@ -630,6 +645,16 @@ config I2C_SIMTEC
          This driver can also be built as a module. If so, the module
          will be called i2c-simtec.
 
+config I2C_SIRF
+       tristate "CSR SiRFprimaII I2C interface"
+       depends on ARCH_PRIMA2
+       help
+         If you say yes to this option, support will be included for the
+         CSR SiRFprimaII I2C interface.
+
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-sirf.
+
 config I2C_STU300
        tristate "ST Microelectronics DDC I2C interface"
        depends on MACH_U300
@@ -681,20 +706,15 @@ config I2C_XILINX
          This driver can also be built as a module.  If so, the module
          will be called xilinx_i2c.
 
-config I2C_EG20T
-       tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) I2C"
-       depends on PCI
+config I2C_XLR
+       tristate "XLR I2C support"
+       depends on CPU_XLR
        help
-         This driver is for PCH(Platform controller Hub) I2C of EG20T which
-         is an IOH(Input/Output Hub) for x86 embedded processor.
-         This driver can access PCH I2C bus device.
+         This driver enables support for the on-chip I2C interface of
+         the Netlogic XLR/XLS MIPS processors.
 
-         This driver also can be used for LAPIS Semiconductor IOH(Input/
-         Output Hub), ML7213, ML7223 and ML7831.
-         ML7213 IOH is for IVI(In-Vehicle Infotainment) use, ML7223 IOH is
-         for MP(Media Phone) use and ML7831 IOH is for general purpose use.
-         ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series.
-         ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH.
+         This driver can also be built as a module.  If so, the module
+         will be called i2c-xlr.
 
 comment "External I2C/SMBus adapter drivers"
 
index fba6da60aa0e0220ff5cf8eb6a906456b7afead2..569567b0d02704653a8884757fa47aaba0fb453e 100644 (file)
@@ -37,6 +37,7 @@ obj-$(CONFIG_I2C_DESIGNWARE_PLATFORM) += i2c-designware-platform.o
 i2c-designware-platform-objs := i2c-designware-platdrv.o i2c-designware-core.o
 obj-$(CONFIG_I2C_DESIGNWARE_PCI)       += i2c-designware-pci.o
 i2c-designware-pci-objs := i2c-designware-pcidrv.o i2c-designware-core.o
+obj-$(CONFIG_I2C_EG20T)                += i2c-eg20t.o
 obj-$(CONFIG_I2C_GPIO)         += i2c-gpio.o
 obj-$(CONFIG_I2C_HIGHLANDER)   += i2c-highlander.o
 obj-$(CONFIG_I2C_IBM_IIC)      += i2c-ibm_iic.o
@@ -63,12 +64,13 @@ obj-$(CONFIG_I2C_S6000)             += i2c-s6000.o
 obj-$(CONFIG_I2C_SH7760)       += i2c-sh7760.o
 obj-$(CONFIG_I2C_SH_MOBILE)    += i2c-sh_mobile.o
 obj-$(CONFIG_I2C_SIMTEC)       += i2c-simtec.o
+obj-$(CONFIG_I2C_SIRF)         += i2c-sirf.o
 obj-$(CONFIG_I2C_STU300)       += i2c-stu300.o
 obj-$(CONFIG_I2C_TEGRA)                += i2c-tegra.o
 obj-$(CONFIG_I2C_VERSATILE)    += i2c-versatile.o
 obj-$(CONFIG_I2C_OCTEON)       += i2c-octeon.o
 obj-$(CONFIG_I2C_XILINX)       += i2c-xiic.o
-obj-$(CONFIG_I2C_EG20T)         += i2c-eg20t.o
+obj-$(CONFIG_I2C_XLR)          += i2c-xlr.o
 
 # External I2C/SMBus adapter drivers
 obj-$(CONFIG_I2C_DIOLAN_U2C)   += i2c-diolan-u2c.o
index 5244c4724df7a558e0e84459c7f5c89dacb29ef2..4ba589ab8614050d65ec1f45d918899a36e3ac0b 100644 (file)
@@ -214,7 +214,7 @@ static int __init dw_i2c_init_driver(void)
 {
        return platform_driver_probe(&dw_i2c_driver, dw_i2c_probe);
 }
-module_init(dw_i2c_init_driver);
+subsys_initcall(dw_i2c_init_driver);
 
 static void __exit dw_i2c_exit_driver(void)
 {
index ca8877641040284e4eb7964c9abe63a7e2d4a11d..f086131cb1c70372384d1af7be50414ce73b4356 100644 (file)
@@ -271,30 +271,36 @@ static inline bool ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
 /**
  * pch_i2c_wait_for_bus_idle() - check the status of bus.
  * @adap:      Pointer to struct i2c_algo_pch_data.
- * @timeout:   waiting time counter (us).
+ * @timeout:   waiting time counter (ms).
  */
 static s32 pch_i2c_wait_for_bus_idle(struct i2c_algo_pch_data *adap,
                                     s32 timeout)
 {
        void __iomem *p = adap->pch_base_address;
-       ktime_t ns_val;
+       int schedule = 0;
+       unsigned long end = jiffies + msecs_to_jiffies(timeout);
+
+       while (ioread32(p + PCH_I2CSR) & I2CMBB_BIT) {
+               if (time_after(jiffies, end)) {
+                       pch_dbg(adap, "I2CSR = %x\n", ioread32(p + PCH_I2CSR));
+                       pch_err(adap, "%s: Timeout Error.return%d\n",
+                                       __func__, -ETIME);
+                       pch_i2c_init(adap);
 
-       if ((ioread32(p + PCH_I2CSR) & I2CMBB_BIT) == 0)
-               return 0;
+                       return -ETIME;
+               }
 
-       /* MAX timeout value is timeout*1000*1000nsec */
-       ns_val = ktime_add_ns(ktime_get(), timeout*1000*1000);
-       do {
-               msleep(20);
-               if ((ioread32(p + PCH_I2CSR) & I2CMBB_BIT) == 0)
-                       return 0;
-       } while (ktime_lt(ktime_get(), ns_val));
+               if (!schedule)
+                       /* Retry after some usecs */
+                       udelay(5);
+               else
+                       /* Wait a bit more without consuming CPU */
+                       usleep_range(20, 1000);
 
-       pch_dbg(adap, "I2CSR = %x\n", ioread32(p + PCH_I2CSR));
-       pch_err(adap, "%s: Timeout Error.return%d\n", __func__, -ETIME);
-       pch_i2c_init(adap);
+               schedule = 1;
+       }
 
-       return -ETIME;
+       return 0;
 }
 
 /**
@@ -778,8 +784,6 @@ static s32 pch_i2c_xfer(struct i2c_adapter *i2c_adap,
        struct i2c_msg *pmsg;
        u32 i = 0;
        u32 status;
-       u32 msglen;
-       u32 subaddrlen;
        s32 ret;
 
        struct i2c_algo_pch_data *adap = i2c_adap->algo_data;
@@ -804,12 +808,6 @@ static s32 pch_i2c_xfer(struct i2c_adapter *i2c_adap,
                status = pmsg->flags;
                pch_dbg(adap,
                        "After invoking I2C_MODE_SEL :flag= 0x%x\n", status);
-               /* calculate sub address length and message length */
-               /* these are applicable only for buffer mode */
-               subaddrlen = pmsg->buf[0];
-               /* calculate actual message length excluding
-                * the sub address fields */
-               msglen = (pmsg->len) - (subaddrlen + 1);
 
                if ((status & (I2C_M_RD)) != false) {
                        ret = pch_i2c_readbytes(i2c_adap, pmsg, (i + 1 == num),
index 58832e578fff24dbbff08b38eb82e093a2cf36df..124d9c594f40bc0052b8d0e1cc4861d0024fdfa0 100644 (file)
@@ -149,11 +149,6 @@ static int i2c_imx_bus_busy(struct imx_i2c_struct *i2c_imx, int for_busy)
                        break;
                if (!for_busy && !(temp & I2SR_IBB))
                        break;
-               if (signal_pending(current)) {
-                       dev_dbg(&i2c_imx->adapter.dev,
-                               "<%s> I2C Interrupted\n", __func__);
-                       return -EINTR;
-               }
                if (time_after(jiffies, orig_jiffies + msecs_to_jiffies(500))) {
                        dev_dbg(&i2c_imx->adapter.dev,
                                "<%s> I2C bus is busy\n", __func__);
index a8ebb84e23f9b179c80c2eb20c8266461b0265da..206caacd30d7fb52d65f59207bc82a4f7fc3912b 100644 (file)
@@ -454,7 +454,7 @@ static int mpc_write(struct mpc_i2c *i2c, int target,
 }
 
 static int mpc_read(struct mpc_i2c *i2c, int target,
-                   u8 *data, int length, int restart)
+                   u8 *data, int length, int restart, bool recv_len)
 {
        unsigned timeout = i2c->adap.timeout;
        int i, result;
@@ -470,7 +470,7 @@ static int mpc_read(struct mpc_i2c *i2c, int target,
                return result;
 
        if (length) {
-               if (length == 1)
+               if (length == 1 && !recv_len)
                        writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA | CCR_TXAK);
                else
                        writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA);
@@ -479,17 +479,46 @@ static int mpc_read(struct mpc_i2c *i2c, int target,
        }
 
        for (i = 0; i < length; i++) {
+               u8 byte;
+
                result = i2c_wait(i2c, timeout, 0);
                if (result < 0)
                        return result;
 
-               /* Generate txack on next to last byte */
-               if (i == length - 2)
-                       writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA | CCR_TXAK);
-               /* Do not generate stop on last byte */
-               if (i == length - 1)
-                       writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA | CCR_MTX);
-               data[i] = readb(i2c->base + MPC_I2C_DR);
+               /*
+                * For block reads, we have to know the total length (1st byte)
+                * before we can determine if we are done.
+                */
+               if (i || !recv_len) {
+                       /* Generate txack on next to last byte */
+                       if (i == length - 2)
+                               writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA
+                                        | CCR_TXAK);
+                       /* Do not generate stop on last byte */
+                       if (i == length - 1)
+                               writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA
+                                        | CCR_MTX);
+               }
+
+               byte = readb(i2c->base + MPC_I2C_DR);
+
+               /*
+                * Adjust length if first received byte is length.
+                * The length is 1 length byte plus actually data length
+                */
+               if (i == 0 && recv_len) {
+                       if (byte == 0 || byte > I2C_SMBUS_BLOCK_MAX)
+                               return -EPROTO;
+                       length += byte;
+                       /*
+                        * For block reads, generate txack here if data length
+                        * is 1 byte (total length is 2 bytes).
+                        */
+                       if (length == 2)
+                               writeccr(i2c, CCR_MIEN | CCR_MEN | CCR_MSTA
+                                        | CCR_TXAK);
+               }
+               data[i] = byte;
        }
 
        return length;
@@ -532,12 +561,17 @@ static int mpc_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
                        "Doing %s %d bytes to 0x%02x - %d of %d messages\n",
                        pmsg->flags & I2C_M_RD ? "read" : "write",
                        pmsg->len, pmsg->addr, i + 1, num);
-               if (pmsg->flags & I2C_M_RD)
-                       ret =
-                           mpc_read(i2c, pmsg->addr, pmsg->buf, pmsg->len, i);
-               else
+               if (pmsg->flags & I2C_M_RD) {
+                       bool recv_len = pmsg->flags & I2C_M_RECV_LEN;
+
+                       ret = mpc_read(i2c, pmsg->addr, pmsg->buf, pmsg->len, i,
+                                      recv_len);
+                       if (recv_len && ret > 0)
+                               pmsg->len = ret;
+               } else {
                        ret =
                            mpc_write(i2c, pmsg->addr, pmsg->buf, pmsg->len, i);
+               }
        }
        mpc_i2c_stop(i2c);
        return (ret < 0) ? ret : num;
@@ -545,7 +579,8 @@ static int mpc_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 
 static u32 mpc_functionality(struct i2c_adapter *adap)
 {
-       return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+       return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL
+         | I2C_FUNC_SMBUS_READ_BLOCK_DATA | I2C_FUNC_SMBUS_BLOCK_PROC_CALL;
 }
 
 static const struct i2c_algorithm mpc_algo = {
index 4c17180816853a339ddb7a3dcb22b47425786173..737f7218a32ce5b136af3ecb4156f807f4fd28b8 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/clk.h>
 #include <linux/cpufreq.h>
 #include <linux/slab.h>
@@ -564,6 +565,7 @@ static int s3c24xx_i2c_xfer(struct i2c_adapter *adap,
        int retry;
        int ret;
 
+       pm_runtime_get_sync(&adap->dev);
        clk_enable(i2c->clk);
 
        for (retry = 0; retry < adap->retries; retry++) {
@@ -572,6 +574,7 @@ static int s3c24xx_i2c_xfer(struct i2c_adapter *adap,
 
                if (ret != -EAGAIN) {
                        clk_disable(i2c->clk);
+                       pm_runtime_put_sync(&adap->dev);
                        return ret;
                }
 
@@ -581,6 +584,7 @@ static int s3c24xx_i2c_xfer(struct i2c_adapter *adap,
        }
 
        clk_disable(i2c->clk);
+       pm_runtime_put_sync(&adap->dev);
        return -EREMOTEIO;
 }
 
@@ -890,7 +894,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
                }
        }
 
-       i2c = kzalloc(sizeof(struct s3c24xx_i2c), GFP_KERNEL);
+       i2c = devm_kzalloc(&pdev->dev, sizeof(struct s3c24xx_i2c), GFP_KERNEL);
        if (!i2c) {
                dev_err(&pdev->dev, "no memory for state\n");
                return -ENOMEM;
@@ -1013,6 +1017,9 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
        of_i2c_register_devices(&i2c->adap);
        platform_set_drvdata(pdev, i2c);
 
+       pm_runtime_enable(&pdev->dev);
+       pm_runtime_enable(&i2c->adap.dev);
+
        dev_info(&pdev->dev, "%s: S3C I2C adapter\n", dev_name(&i2c->adap.dev));
        clk_disable(i2c->clk);
        return 0;
@@ -1035,7 +1042,6 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
        clk_put(i2c->clk);
 
  err_noclk:
-       kfree(i2c);
        return ret;
 }
 
@@ -1048,6 +1054,9 @@ static int s3c24xx_i2c_remove(struct platform_device *pdev)
 {
        struct s3c24xx_i2c *i2c = platform_get_drvdata(pdev);
 
+       pm_runtime_disable(&i2c->adap.dev);
+       pm_runtime_disable(&pdev->dev);
+
        s3c24xx_i2c_deregister_cpufreq(i2c);
 
        i2c_del_adapter(&i2c->adap);
@@ -1061,7 +1070,6 @@ static int s3c24xx_i2c_remove(struct platform_device *pdev)
        release_resource(i2c->ioarea);
        s3c24xx_i2c_dt_gpio_free(i2c);
        kfree(i2c->ioarea);
-       kfree(i2c);
 
        return 0;
 }
diff --git a/drivers/i2c/busses/i2c-sirf.c b/drivers/i2c/busses/i2c-sirf.c
new file mode 100644 (file)
index 0000000..5574a47
--- /dev/null
@@ -0,0 +1,459 @@
+/*
+ * I2C bus driver for CSR SiRFprimaII
+ *
+ * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
+ *
+ * Licensed under GPLv2 or later.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+
+#define SIRFSOC_I2C_CLK_CTRL           0x00
+#define SIRFSOC_I2C_STATUS             0x0C
+#define SIRFSOC_I2C_CTRL               0x10
+#define SIRFSOC_I2C_IO_CTRL            0x14
+#define SIRFSOC_I2C_SDA_DELAY          0x18
+#define SIRFSOC_I2C_CMD_START          0x1C
+#define SIRFSOC_I2C_CMD_BUF            0x30
+#define SIRFSOC_I2C_DATA_BUF           0x80
+
+#define SIRFSOC_I2C_CMD_BUF_MAX                16
+#define SIRFSOC_I2C_DATA_BUF_MAX       16
+
+#define SIRFSOC_I2C_CMD(x)             (SIRFSOC_I2C_CMD_BUF + (x)*0x04)
+#define SIRFSOC_I2C_DATA_MASK(x)        (0xFF<<(((x)&3)*8))
+#define SIRFSOC_I2C_DATA_SHIFT(x)       (((x)&3)*8)
+
+#define SIRFSOC_I2C_DIV_MASK           (0xFFFF)
+
+/* I2C status flags */
+#define SIRFSOC_I2C_STAT_BUSY          BIT(0)
+#define SIRFSOC_I2C_STAT_TIP           BIT(1)
+#define SIRFSOC_I2C_STAT_NACK          BIT(2)
+#define SIRFSOC_I2C_STAT_TR_INT                BIT(4)
+#define SIRFSOC_I2C_STAT_STOP          BIT(6)
+#define SIRFSOC_I2C_STAT_CMD_DONE      BIT(8)
+#define SIRFSOC_I2C_STAT_ERR           BIT(9)
+#define SIRFSOC_I2C_CMD_INDEX          (0x1F<<16)
+
+/* I2C control flags */
+#define SIRFSOC_I2C_RESET              BIT(0)
+#define SIRFSOC_I2C_CORE_EN            BIT(1)
+#define SIRFSOC_I2C_MASTER_MODE                BIT(2)
+#define SIRFSOC_I2C_CMD_DONE_EN                BIT(11)
+#define SIRFSOC_I2C_ERR_INT_EN         BIT(12)
+
+#define SIRFSOC_I2C_SDA_DELAY_MASK     (0xFF)
+#define SIRFSOC_I2C_SCLF_FILTER                (3<<8)
+
+#define SIRFSOC_I2C_START_CMD          BIT(0)
+
+#define SIRFSOC_I2C_CMD_RP(x)          ((x)&0x7)
+#define SIRFSOC_I2C_NACK               BIT(3)
+#define SIRFSOC_I2C_WRITE              BIT(4)
+#define SIRFSOC_I2C_READ               BIT(5)
+#define SIRFSOC_I2C_STOP               BIT(6)
+#define SIRFSOC_I2C_START              BIT(7)
+
+#define SIRFSOC_I2C_DEFAULT_SPEED 100000
+
+struct sirfsoc_i2c {
+       void __iomem *base;
+       struct clk *clk;
+       u32 cmd_ptr;            /* Current position in CMD buffer */
+       u8 *buf;                /* Buffer passed by user */
+       u32 msg_len;            /* Message length */
+       u32 finished_len;       /* number of bytes read/written */
+       u32 read_cmd_len;       /* number of read cmd sent */
+       int msg_read;           /* 1 indicates a read message */
+       int err_status;         /* 1 indicates an error on bus */
+
+       u32 sda_delay;          /* For suspend/resume */
+       u32 clk_div;
+       int last;               /* Last message in transfer, STOP cmd can be sent */
+
+       struct completion done; /* indicates completion of message transfer */
+       struct i2c_adapter adapter;
+};
+
+static void i2c_sirfsoc_read_data(struct sirfsoc_i2c *siic)
+{
+       u32 data = 0;
+       int i;
+
+       for (i = 0; i < siic->read_cmd_len; i++) {
+               if (!(i & 0x3))
+                       data = readl(siic->base + SIRFSOC_I2C_DATA_BUF + i);
+               siic->buf[siic->finished_len++] =
+                       (u8)((data & SIRFSOC_I2C_DATA_MASK(i)) >>
+                               SIRFSOC_I2C_DATA_SHIFT(i));
+       }
+}
+
+static void i2c_sirfsoc_queue_cmd(struct sirfsoc_i2c *siic)
+{
+       u32 regval;
+       int i = 0;
+
+       if (siic->msg_read) {
+               while (((siic->finished_len + i) < siic->msg_len)
+                               && (siic->cmd_ptr < SIRFSOC_I2C_CMD_BUF_MAX)) {
+                       regval = SIRFSOC_I2C_READ | SIRFSOC_I2C_CMD_RP(0);
+                       if (((siic->finished_len + i) ==
+                                       (siic->msg_len - 1)) && siic->last)
+                               regval |= SIRFSOC_I2C_STOP | SIRFSOC_I2C_NACK;
+                       writel(regval,
+                               siic->base + SIRFSOC_I2C_CMD(siic->cmd_ptr++));
+                       i++;
+               }
+
+               siic->read_cmd_len = i;
+       } else {
+               while ((siic->cmd_ptr < SIRFSOC_I2C_CMD_BUF_MAX - 1)
+                               && (siic->finished_len < siic->msg_len)) {
+                       regval = SIRFSOC_I2C_WRITE | SIRFSOC_I2C_CMD_RP(0);
+                       if ((siic->finished_len == (siic->msg_len - 1))
+                               && siic->last)
+                               regval |= SIRFSOC_I2C_STOP;
+                       writel(regval,
+                               siic->base + SIRFSOC_I2C_CMD(siic->cmd_ptr++));
+                       writel(siic->buf[siic->finished_len++],
+                               siic->base + SIRFSOC_I2C_CMD(siic->cmd_ptr++));
+               }
+       }
+       siic->cmd_ptr = 0;
+
+       /* Trigger the transfer */
+       writel(SIRFSOC_I2C_START_CMD, siic->base + SIRFSOC_I2C_CMD_START);
+}
+
+static irqreturn_t i2c_sirfsoc_irq(int irq, void *dev_id)
+{
+       struct sirfsoc_i2c *siic = (struct sirfsoc_i2c *)dev_id;
+       u32 i2c_stat = readl(siic->base + SIRFSOC_I2C_STATUS);
+
+       if (i2c_stat & SIRFSOC_I2C_STAT_ERR) {
+               /* Error conditions */
+               siic->err_status = 1;
+               writel(SIRFSOC_I2C_STAT_ERR, siic->base + SIRFSOC_I2C_STATUS);
+
+               if (i2c_stat & SIRFSOC_I2C_STAT_NACK)
+                       dev_err(&siic->adapter.dev, "ACK not received\n");
+               else
+                       dev_err(&siic->adapter.dev, "I2C error\n");
+
+               complete(&siic->done);
+       } else if (i2c_stat & SIRFSOC_I2C_STAT_CMD_DONE) {
+               /* CMD buffer execution complete */
+               if (siic->msg_read)
+                       i2c_sirfsoc_read_data(siic);
+               if (siic->finished_len == siic->msg_len)
+                       complete(&siic->done);
+               else /* Fill a new CMD buffer for left data */
+                       i2c_sirfsoc_queue_cmd(siic);
+
+               writel(SIRFSOC_I2C_STAT_CMD_DONE, siic->base + SIRFSOC_I2C_STATUS);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static void i2c_sirfsoc_set_address(struct sirfsoc_i2c *siic,
+       struct i2c_msg *msg)
+{
+       unsigned char addr;
+       u32 regval = SIRFSOC_I2C_START | SIRFSOC_I2C_CMD_RP(0) | SIRFSOC_I2C_WRITE;
+
+       /* no data and last message -> add STOP */
+       if (siic->last && (msg->len == 0))
+               regval |= SIRFSOC_I2C_STOP;
+
+       writel(regval, siic->base + SIRFSOC_I2C_CMD(siic->cmd_ptr++));
+
+       addr = msg->addr << 1;  /* Generate address */
+       if (msg->flags & I2C_M_RD)
+               addr |= 1;
+
+       writel(addr, siic->base + SIRFSOC_I2C_CMD(siic->cmd_ptr++));
+}
+
+static int i2c_sirfsoc_xfer_msg(struct sirfsoc_i2c *siic, struct i2c_msg *msg)
+{
+       u32 regval = readl(siic->base + SIRFSOC_I2C_CTRL);
+       /* timeout waiting for the xfer to finish or fail */
+       int timeout = msecs_to_jiffies((msg->len + 1) * 50);
+       int ret = 0;
+
+       i2c_sirfsoc_set_address(siic, msg);
+
+       writel(regval | SIRFSOC_I2C_CMD_DONE_EN | SIRFSOC_I2C_ERR_INT_EN,
+               siic->base + SIRFSOC_I2C_CTRL);
+       i2c_sirfsoc_queue_cmd(siic);
+
+       if (wait_for_completion_timeout(&siic->done, timeout) == 0) {
+               siic->err_status = 1;
+               dev_err(&siic->adapter.dev, "Transfer timeout\n");
+       }
+
+       writel(regval & ~(SIRFSOC_I2C_CMD_DONE_EN | SIRFSOC_I2C_ERR_INT_EN),
+               siic->base + SIRFSOC_I2C_CTRL);
+       writel(0, siic->base + SIRFSOC_I2C_CMD_START);
+
+       if (siic->err_status) {
+               writel(readl(siic->base + SIRFSOC_I2C_CTRL) | SIRFSOC_I2C_RESET,
+                       siic->base + SIRFSOC_I2C_CTRL);
+               while (readl(siic->base + SIRFSOC_I2C_CTRL) & SIRFSOC_I2C_RESET)
+                       cpu_relax();
+
+               ret = -EIO;
+       }
+
+       return ret;
+}
+
+static u32 i2c_sirfsoc_func(struct i2c_adapter *adap)
+{
+       return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static int i2c_sirfsoc_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+       int num)
+{
+       struct sirfsoc_i2c *siic = adap->algo_data;
+       int i, ret;
+
+       clk_enable(siic->clk);
+
+       for (i = 0; i < num; i++) {
+               siic->buf = msgs[i].buf;
+               siic->msg_len = msgs[i].len;
+               siic->msg_read = !!(msgs[i].flags & I2C_M_RD);
+               siic->err_status = 0;
+               siic->cmd_ptr = 0;
+               siic->finished_len = 0;
+               siic->last = (i == (num - 1));
+
+               ret = i2c_sirfsoc_xfer_msg(siic, &msgs[i]);
+               if (ret) {
+                       clk_disable(siic->clk);
+                       return ret;
+               }
+       }
+
+       clk_disable(siic->clk);
+       return num;
+}
+
+/* I2C algorithms associated with this master controller driver */
+static const struct i2c_algorithm i2c_sirfsoc_algo = {
+       .master_xfer = i2c_sirfsoc_xfer,
+       .functionality = i2c_sirfsoc_func,
+};
+
+static int __devinit i2c_sirfsoc_probe(struct platform_device *pdev)
+{
+       struct sirfsoc_i2c *siic;
+       struct i2c_adapter *adap;
+       struct resource *mem_res;
+       struct clk *clk;
+       int bitrate;
+       int ctrl_speed;
+       int irq;
+
+       int err;
+       u32 regval;
+
+       clk = clk_get(&pdev->dev, NULL);
+       if (IS_ERR(clk)) {
+               err = PTR_ERR(clk);
+               dev_err(&pdev->dev, "Clock get failed\n");
+               goto err_get_clk;
+       }
+
+       err = clk_prepare(clk);
+       if (err) {
+               dev_err(&pdev->dev, "Clock prepare failed\n");
+               goto err_clk_prep;
+       }
+
+       err = clk_enable(clk);
+       if (err) {
+               dev_err(&pdev->dev, "Clock enable failed\n");
+               goto err_clk_en;
+       }
+
+       ctrl_speed = clk_get_rate(clk);
+
+       siic = devm_kzalloc(&pdev->dev, sizeof(*siic), GFP_KERNEL);
+       if (!siic) {
+               dev_err(&pdev->dev, "Can't allocate driver data\n");
+               err = -ENOMEM;
+               goto out;
+       }
+       adap = &siic->adapter;
+       adap->class = I2C_CLASS_HWMON;
+
+       mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (mem_res == NULL) {
+               dev_err(&pdev->dev, "Unable to get MEM resource\n");
+               err = -EINVAL;
+               goto out;
+       }
+
+       siic->base = devm_request_and_ioremap(&pdev->dev, mem_res);
+       if (siic->base == NULL) {
+               dev_err(&pdev->dev, "IO remap failed!\n");
+               err = -ENOMEM;
+               goto out;
+       }
+
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0) {
+               err = irq;
+               goto out;
+       }
+       err = devm_request_irq(&pdev->dev, irq, i2c_sirfsoc_irq, 0,
+               dev_name(&pdev->dev), siic);
+       if (err)
+               goto out;
+
+       adap->algo = &i2c_sirfsoc_algo;
+       adap->algo_data = siic;
+
+       adap->dev.parent = &pdev->dev;
+       adap->nr = pdev->id;
+
+       strlcpy(adap->name, "sirfsoc-i2c", sizeof(adap->name));
+
+       platform_set_drvdata(pdev, adap);
+       init_completion(&siic->done);
+
+       /* Controller Initalisation */
+
+       writel(SIRFSOC_I2C_RESET, siic->base + SIRFSOC_I2C_CTRL);
+       while (readl(siic->base + SIRFSOC_I2C_CTRL) & SIRFSOC_I2C_RESET)
+               cpu_relax();
+       writel(SIRFSOC_I2C_CORE_EN | SIRFSOC_I2C_MASTER_MODE,
+               siic->base + SIRFSOC_I2C_CTRL);
+
+       siic->clk = clk;
+
+       err = of_property_read_u32(pdev->dev.of_node,
+               "clock-frequency", &bitrate);
+       if (err < 0)
+               bitrate = SIRFSOC_I2C_DEFAULT_SPEED;
+
+       if (bitrate < 100000)
+               regval =
+                       (2 * ctrl_speed) / (2 * bitrate * 11);
+       else
+               regval = ctrl_speed / (bitrate * 5);
+
+       writel(regval, siic->base + SIRFSOC_I2C_CLK_CTRL);
+       if (regval > 0xFF)
+               writel(0xFF, siic->base + SIRFSOC_I2C_SDA_DELAY);
+       else
+               writel(regval, siic->base + SIRFSOC_I2C_SDA_DELAY);
+
+       err = i2c_add_numbered_adapter(adap);
+       if (err < 0) {
+               dev_err(&pdev->dev, "Can't add new i2c adapter\n");
+               goto out;
+       }
+
+       clk_disable(clk);
+
+       dev_info(&pdev->dev, " I2C adapter ready to operate\n");
+
+       return 0;
+
+out:
+       clk_disable(clk);
+err_clk_en:
+       clk_unprepare(clk);
+err_clk_prep:
+       clk_put(clk);
+err_get_clk:
+       return err;
+}
+
+static int __devexit i2c_sirfsoc_remove(struct platform_device *pdev)
+{
+       struct i2c_adapter *adapter = platform_get_drvdata(pdev);
+       struct sirfsoc_i2c *siic = adapter->algo_data;
+
+       writel(SIRFSOC_I2C_RESET, siic->base + SIRFSOC_I2C_CTRL);
+       i2c_del_adapter(adapter);
+       clk_unprepare(siic->clk);
+       clk_put(siic->clk);
+       return 0;
+}
+
+#ifdef CONFIG_PM
+static int i2c_sirfsoc_suspend(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct i2c_adapter *adapter = platform_get_drvdata(pdev);
+       struct sirfsoc_i2c *siic = adapter->algo_data;
+
+       clk_enable(siic->clk);
+       siic->sda_delay = readl(siic->base + SIRFSOC_I2C_SDA_DELAY);
+       siic->clk_div = readl(siic->base + SIRFSOC_I2C_CLK_CTRL);
+       clk_disable(siic->clk);
+       return 0;
+}
+
+static int i2c_sirfsoc_resume(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct i2c_adapter *adapter = platform_get_drvdata(pdev);
+       struct sirfsoc_i2c *siic = adapter->algo_data;
+
+       clk_enable(siic->clk);
+       writel(SIRFSOC_I2C_RESET, siic->base + SIRFSOC_I2C_CTRL);
+       writel(SIRFSOC_I2C_CORE_EN | SIRFSOC_I2C_MASTER_MODE,
+               siic->base + SIRFSOC_I2C_CTRL);
+       writel(siic->clk_div, siic->base + SIRFSOC_I2C_CLK_CTRL);
+       writel(siic->sda_delay, siic->base + SIRFSOC_I2C_SDA_DELAY);
+       clk_disable(siic->clk);
+       return 0;
+}
+
+static const struct dev_pm_ops i2c_sirfsoc_pm_ops = {
+       .suspend = i2c_sirfsoc_suspend,
+       .resume = i2c_sirfsoc_resume,
+};
+#endif
+
+static const struct of_device_id sirfsoc_i2c_of_match[] __devinitconst = {
+       { .compatible = "sirf,prima2-i2c", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, sirfsoc_i2c_of_match);
+
+static struct platform_driver i2c_sirfsoc_driver = {
+       .driver = {
+               .name = "sirfsoc_i2c",
+               .owner = THIS_MODULE,
+#ifdef CONFIG_PM
+               .pm = &i2c_sirfsoc_pm_ops,
+#endif
+               .of_match_table = sirfsoc_i2c_of_match,
+       },
+       .probe = i2c_sirfsoc_probe,
+       .remove = __devexit_p(i2c_sirfsoc_remove),
+};
+module_platform_driver(i2c_sirfsoc_driver);
+
+MODULE_DESCRIPTION("SiRF SoC I2C master controller driver");
+MODULE_AUTHOR("Zhiwu Song <Zhiwu.Song@csr.com>, "
+       "Xiangzhen Ye <Xiangzhen.Ye@csr.com>");
+MODULE_LICENSE("GPL v2");
index 0ab4a9548745015cfae6633376527663e6a79708..e978635e60f04189e6478ccb112a0df4a2596ed3 100644 (file)
@@ -457,7 +457,6 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev,
        int ret;
 
        tegra_i2c_flush_fifos(i2c_dev);
-       i2c_writel(i2c_dev, 0xFF, I2C_INT_STATUS);
 
        if (msg->len == 0)
                return -EINVAL;
index 60556012312f27145745d62ee86f37f63107f021..f585aead50cc32eb045010406ab8e7e7e0b61d97 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/io.h>
+#include <linux/of_i2c.h>
 
 #define I2C_CONTROL    0x00
 #define I2C_CONTROLS   0x00
@@ -99,6 +100,7 @@ static int i2c_versatile_probe(struct platform_device *dev)
        strlcpy(i2c->adap.name, "Versatile I2C adapter", sizeof(i2c->adap.name));
        i2c->adap.algo_data = &i2c->algo;
        i2c->adap.dev.parent = &dev->dev;
+       i2c->adap.dev.of_node = dev->dev.of_node;
        i2c->algo = i2c_versatile_algo;
        i2c->algo.data = i2c;
 
@@ -111,6 +113,7 @@ static int i2c_versatile_probe(struct platform_device *dev)
                ret = i2c_bit_add_bus(&i2c->adap);
        if (ret >= 0) {
                platform_set_drvdata(dev, i2c);
+               of_i2c_register_devices(&i2c->adap);
                return 0;
        }
 
@@ -133,12 +136,19 @@ static int i2c_versatile_remove(struct platform_device *dev)
        return 0;
 }
 
+static const struct of_device_id i2c_versatile_match[] = {
+       { .compatible = "arm,versatile-i2c", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, i2c_versatile_match);
+
 static struct platform_driver i2c_versatile_driver = {
        .probe          = i2c_versatile_probe,
        .remove         = i2c_versatile_remove,
        .driver         = {
                .name   = "versatile-i2c",
                .owner  = THIS_MODULE,
+               .of_match_table = i2c_versatile_match,
        },
 };
 
diff --git a/drivers/i2c/busses/i2c-xlr.c b/drivers/i2c/busses/i2c-xlr.c
new file mode 100644 (file)
index 0000000..96d3fab
--- /dev/null
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2011, Netlogic Microsystems Inc.
+ * Copyright 2004, Matt Porter <mporter@kernel.crashing.org>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+
+/* XLR I2C REGISTERS */
+#define XLR_I2C_CFG            0x00
+#define XLR_I2C_CLKDIV         0x01
+#define XLR_I2C_DEVADDR                0x02
+#define XLR_I2C_ADDR           0x03
+#define XLR_I2C_DATAOUT                0x04
+#define XLR_I2C_DATAIN         0x05
+#define XLR_I2C_STATUS         0x06
+#define XLR_I2C_STARTXFR       0x07
+#define XLR_I2C_BYTECNT                0x08
+#define XLR_I2C_HDSTATIM       0x09
+
+/* XLR I2C REGISTERS FLAGS */
+#define XLR_I2C_BUS_BUSY       0x01
+#define XLR_I2C_SDOEMPTY       0x02
+#define XLR_I2C_RXRDY          0x04
+#define XLR_I2C_ACK_ERR                0x08
+#define XLR_I2C_ARB_STARTERR   0x30
+
+/* Register Values */
+#define XLR_I2C_CFG_ADDR       0xF8
+#define XLR_I2C_CFG_NOADDR     0xFA
+#define XLR_I2C_STARTXFR_ND    0x02    /* No Data */
+#define XLR_I2C_STARTXFR_RD    0x01    /* Read */
+#define XLR_I2C_STARTXFR_WR    0x00    /* Write */
+
+#define XLR_I2C_TIMEOUT                10      /* timeout per byte in msec */
+
+/*
+ * On XLR/XLS, we need to use __raw_ IO to read the I2C registers
+ * because they are in the big-endian MMIO area on the SoC.
+ *
+ * The readl/writel implementation on XLR/XLS byteswaps, because
+ * those are for its little-endian PCI space (see arch/mips/Kconfig).
+ */
+static inline void xlr_i2c_wreg(u32 __iomem *base, unsigned int reg, u32 val)
+{
+       __raw_writel(val, base + reg);
+}
+
+static inline u32 xlr_i2c_rdreg(u32 __iomem *base, unsigned int reg)
+{
+       return __raw_readl(base + reg);
+}
+
+struct xlr_i2c_private {
+       struct i2c_adapter adap;
+       u32 __iomem *iobase;
+};
+
+static int xlr_i2c_tx(struct xlr_i2c_private *priv,  u16 len,
+       u8 *buf, u16 addr)
+{
+       struct i2c_adapter *adap = &priv->adap;
+       unsigned long timeout, stoptime, checktime;
+       u32 i2c_status;
+       int pos, timedout;
+       u8 offset, byte;
+
+       offset = buf[0];
+       xlr_i2c_wreg(priv->iobase, XLR_I2C_ADDR, offset);
+       xlr_i2c_wreg(priv->iobase, XLR_I2C_DEVADDR, addr);
+       xlr_i2c_wreg(priv->iobase, XLR_I2C_CFG, XLR_I2C_CFG_ADDR);
+       xlr_i2c_wreg(priv->iobase, XLR_I2C_BYTECNT, len - 1);
+
+       timeout = msecs_to_jiffies(XLR_I2C_TIMEOUT);
+       stoptime = jiffies + timeout;
+       timedout = 0;
+       pos = 1;
+retry:
+       if (len == 1) {
+               xlr_i2c_wreg(priv->iobase, XLR_I2C_STARTXFR,
+                               XLR_I2C_STARTXFR_ND);
+       } else {
+               xlr_i2c_wreg(priv->iobase, XLR_I2C_DATAOUT, buf[pos]);
+               xlr_i2c_wreg(priv->iobase, XLR_I2C_STARTXFR,
+                               XLR_I2C_STARTXFR_WR);
+       }
+
+       while (!timedout) {
+               checktime = jiffies;
+               i2c_status = xlr_i2c_rdreg(priv->iobase, XLR_I2C_STATUS);
+
+               if (i2c_status & XLR_I2C_SDOEMPTY) {
+                       pos++;
+                       /* need to do a empty dataout after the last byte */
+                       byte = (pos < len) ? buf[pos] : 0;
+                       xlr_i2c_wreg(priv->iobase, XLR_I2C_DATAOUT, byte);
+
+                       /* reset timeout on successful xmit */
+                       stoptime = jiffies + timeout;
+               }
+               timedout = time_after(checktime, stoptime);
+
+               if (i2c_status & XLR_I2C_ARB_STARTERR) {
+                       if (timedout)
+                               break;
+                       goto retry;
+               }
+
+               if (i2c_status & XLR_I2C_ACK_ERR)
+                       return -EIO;
+
+               if ((i2c_status & XLR_I2C_BUS_BUSY) == 0 && pos >= len)
+                       return 0;
+       }
+       dev_err(&adap->dev, "I2C transmit timeout\n");
+       return -ETIMEDOUT;
+}
+
+static int xlr_i2c_rx(struct xlr_i2c_private *priv, u16 len, u8 *buf, u16 addr)
+{
+       struct i2c_adapter *adap = &priv->adap;
+       u32 i2c_status;
+       unsigned long timeout, stoptime, checktime;
+       int nbytes, timedout;
+       u8 byte;
+
+       xlr_i2c_wreg(priv->iobase, XLR_I2C_CFG, XLR_I2C_CFG_NOADDR);
+       xlr_i2c_wreg(priv->iobase, XLR_I2C_BYTECNT, len);
+       xlr_i2c_wreg(priv->iobase, XLR_I2C_DEVADDR, addr);
+
+       timeout = msecs_to_jiffies(XLR_I2C_TIMEOUT);
+       stoptime = jiffies + timeout;
+       timedout = 0;
+       nbytes = 0;
+retry:
+       xlr_i2c_wreg(priv->iobase, XLR_I2C_STARTXFR, XLR_I2C_STARTXFR_RD);
+
+       while (!timedout) {
+               checktime = jiffies;
+               i2c_status = xlr_i2c_rdreg(priv->iobase, XLR_I2C_STATUS);
+               if (i2c_status & XLR_I2C_RXRDY) {
+                       if (nbytes > len)
+                               return -EIO;    /* should not happen */
+
+                       /* we need to do a dummy datain when nbytes == len */
+                       byte = xlr_i2c_rdreg(priv->iobase, XLR_I2C_DATAIN);
+                       if (nbytes < len)
+                               buf[nbytes] = byte;
+                       nbytes++;
+
+                       /* reset timeout on successful read */
+                       stoptime = jiffies + timeout;
+               }
+
+               timedout = time_after(checktime, stoptime);
+               if (i2c_status & XLR_I2C_ARB_STARTERR) {
+                       if (timedout)
+                               break;
+                       goto retry;
+               }
+
+               if (i2c_status & XLR_I2C_ACK_ERR)
+                       return -EIO;
+
+               if ((i2c_status & XLR_I2C_BUS_BUSY) == 0)
+                       return 0;
+       }
+
+       dev_err(&adap->dev, "I2C receive timeout\n");
+       return -ETIMEDOUT;
+}
+
+static int xlr_i2c_xfer(struct i2c_adapter *adap,
+       struct i2c_msg *msgs, int num)
+{
+       struct i2c_msg *msg;
+       int i;
+       int ret = 0;
+       struct xlr_i2c_private *priv = i2c_get_adapdata(adap);
+
+       for (i = 0; ret == 0 && i < num; i++) {
+               msg = &msgs[i];
+               if (msg->flags & I2C_M_RD)
+                       ret = xlr_i2c_rx(priv, msg->len, &msg->buf[0],
+                                       msg->addr);
+               else
+                       ret = xlr_i2c_tx(priv, msg->len, &msg->buf[0],
+                                       msg->addr);
+       }
+
+       return (ret != 0) ? ret : num;
+}
+
+static u32 xlr_func(struct i2c_adapter *adap)
+{
+       /* Emulate SMBUS over I2C */
+       return I2C_FUNC_SMBUS_EMUL | I2C_FUNC_I2C;
+}
+
+static struct i2c_algorithm xlr_i2c_algo = {
+       .master_xfer    = xlr_i2c_xfer,
+       .functionality  = xlr_func,
+};
+
+static int __devinit xlr_i2c_probe(struct platform_device *pdev)
+{
+       struct xlr_i2c_private  *priv;
+       struct resource *res;
+       int ret;
+
+       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       priv->iobase = devm_request_and_ioremap(&pdev->dev, res);
+       if (!priv->iobase) {
+               dev_err(&pdev->dev, "devm_request_and_ioremap failed\n");
+               return -EBUSY;
+       }
+
+       priv->adap.dev.parent = &pdev->dev;
+       priv->adap.owner        = THIS_MODULE;
+       priv->adap.algo_data    = priv;
+       priv->adap.algo         = &xlr_i2c_algo;
+       priv->adap.nr           = pdev->id;
+       priv->adap.class        = I2C_CLASS_HWMON;
+       snprintf(priv->adap.name, sizeof(priv->adap.name), "xlr-i2c");
+
+       i2c_set_adapdata(&priv->adap, priv);
+       ret = i2c_add_numbered_adapter(&priv->adap);
+       if (ret < 0) {
+               dev_err(&priv->adap.dev, "Failed to add i2c bus.\n");
+               return ret;
+       }
+
+       platform_set_drvdata(pdev, priv);
+       dev_info(&priv->adap.dev, "Added I2C Bus.\n");
+       return 0;
+}
+
+static int __devexit xlr_i2c_remove(struct platform_device *pdev)
+{
+       struct xlr_i2c_private *priv;
+
+       priv = platform_get_drvdata(pdev);
+       i2c_del_adapter(&priv->adap);
+       platform_set_drvdata(pdev, NULL);
+       return 0;
+}
+
+static struct platform_driver xlr_i2c_driver = {
+       .probe  = xlr_i2c_probe,
+       .remove = __devexit_p(xlr_i2c_remove),
+       .driver = {
+               .name   = "xlr-i2cbus",
+               .owner  = THIS_MODULE,
+       },
+};
+
+module_platform_driver(xlr_i2c_driver);
+
+MODULE_AUTHOR("Ganesan Ramalingam <ganesanr@netlogicmicro.com>");
+MODULE_DESCRIPTION("XLR/XLS SoC I2C Controller driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:xlr-i2cbus");
index 6bea6962f8ee6d8d8f5a73c7f5309654725054e5..3bd9fff5c589301113e5e0501662b211b42e2088 100644 (file)
@@ -142,4 +142,24 @@ config OMAP_IOMMU_DEBUG
 
          Say N unless you know you need this.
 
+config TEGRA_IOMMU_GART
+       bool "Tegra GART IOMMU Support"
+       depends on ARCH_TEGRA_2x_SOC
+       select IOMMU_API
+       help
+         Enables support for remapping discontiguous physical memory
+         shared with the operating system into contiguous I/O virtual
+         space through the GART (Graphics Address Relocation Table)
+         hardware included on Tegra SoCs.
+
+config TEGRA_IOMMU_SMMU
+       bool "Tegra SMMU IOMMU Support"
+       depends on ARCH_TEGRA_3x_SOC
+       select IOMMU_API
+       help
+         Enables support for remapping discontiguous physical memory
+         shared with the operating system into contiguous I/O virtual
+         space through the SMMU (System Memory Management Unit)
+         hardware included on Tegra SoCs.
+
 endif # IOMMU_SUPPORT
index 0e36b4934affc0a3b4a2f7debc8809babd719734..7ad7a3bc1242f9533072fe6d3a52209e6442e15a 100644 (file)
@@ -8,3 +8,5 @@ obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
 obj-$(CONFIG_OMAP_IOVMM) += omap-iovmm.o
 obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
+obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
+obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
index a35e98ad97258a47e39152361e2b435fa9ba7f07..c56790375e0fd25d66c9b09cb3cfa694fae05acc 100644 (file)
@@ -196,6 +196,8 @@ static u32 rlookup_table_size;      /* size if the rlookup table */
  */
 extern void iommu_flush_all_caches(struct amd_iommu *iommu);
 
+static int amd_iommu_enable_interrupts(void);
+
 static inline void update_last_devid(u16 devid)
 {
        if (devid > amd_iommu_last_bdf)
@@ -358,8 +360,6 @@ static void iommu_disable(struct amd_iommu *iommu)
  */
 static u8 * __init iommu_map_mmio_space(u64 address)
 {
-       u8 *ret;
-
        if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) {
                pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n",
                        address);
@@ -367,13 +367,7 @@ static u8 * __init iommu_map_mmio_space(u64 address)
                return NULL;
        }
 
-       ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
-       if (ret != NULL)
-               return ret;
-
-       release_mem_region(address, MMIO_REGION_LENGTH);
-
-       return NULL;
+       return ioremap_nocache(address, MMIO_REGION_LENGTH);
 }
 
 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
@@ -1131,8 +1125,9 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 {
        int r;
 
-       if (pci_enable_msi(iommu->dev))
-               return 1;
+       r = pci_enable_msi(iommu->dev);
+       if (r)
+               return r;
 
        r = request_threaded_irq(iommu->dev->irq,
                                 amd_iommu_int_handler,
@@ -1142,27 +1137,36 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 
        if (r) {
                pci_disable_msi(iommu->dev);
-               return 1;
+               return r;
        }
 
        iommu->int_enabled = true;
-       iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
-
-       if (iommu->ppr_log != NULL)
-               iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
 
        return 0;
 }
 
 static int iommu_init_msi(struct amd_iommu *iommu)
 {
+       int ret;
+
        if (iommu->int_enabled)
-               return 0;
+               goto enable_faults;
 
        if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
-               return iommu_setup_msi(iommu);
+               ret = iommu_setup_msi(iommu);
+       else
+               ret = -ENODEV;
 
-       return 1;
+       if (ret)
+               return ret;
+
+enable_faults:
+       iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+
+       if (iommu->ppr_log != NULL)
+               iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
+
+       return 0;
 }
 
 /****************************************************************************
@@ -1381,7 +1385,6 @@ static void enable_iommus(void)
                iommu_enable_ppr_log(iommu);
                iommu_enable_gt(iommu);
                iommu_set_exclusion_range(iommu);
-               iommu_init_msi(iommu);
                iommu_enable(iommu);
                iommu_flush_all_caches(iommu);
        }
@@ -1409,6 +1412,8 @@ static void amd_iommu_resume(void)
 
        /* re-load the hardware */
        enable_iommus();
+
+       amd_iommu_enable_interrupts();
 }
 
 static int amd_iommu_suspend(void)
@@ -1424,10 +1429,40 @@ static struct syscore_ops amd_iommu_syscore_ops = {
        .resume = amd_iommu_resume,
 };
 
+static void __init free_on_init_error(void)
+{
+       amd_iommu_uninit_devices();
+
+       free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
+                  get_order(MAX_DOMAIN_ID/8));
+
+       free_pages((unsigned long)amd_iommu_rlookup_table,
+                  get_order(rlookup_table_size));
+
+       free_pages((unsigned long)amd_iommu_alias_table,
+                  get_order(alias_table_size));
+
+       free_pages((unsigned long)amd_iommu_dev_table,
+                  get_order(dev_table_size));
+
+       free_iommu_all();
+
+       free_unity_maps();
+
+#ifdef CONFIG_GART_IOMMU
+       /*
+        * We failed to initialize the AMD IOMMU - try fallback to GART
+        * if possible.
+        */
+       gart_iommu_init();
+
+#endif
+}
+
 /*
- * This is the core init function for AMD IOMMU hardware in the system.
- * This function is called from the generic x86 DMA layer initialization
- * code.
+ * This is the hardware init function for AMD IOMMU in the system.
+ * This function is called either from amd_iommu_init or from the interrupt
+ * remapping setup code.
  *
  * This function basically parses the ACPI table for AMD IOMMU (IVRS)
  * three times:
@@ -1446,16 +1481,21 @@ static struct syscore_ops amd_iommu_syscore_ops = {
  *             remapping requirements parsed out of the ACPI table in
  *             this last pass.
  *
- * After that the hardware is initialized and ready to go. In the last
- * step we do some Linux specific things like registering the driver in
- * the dma_ops interface and initializing the suspend/resume support
- * functions. Finally it prints some information about AMD IOMMUs and
- * the driver state and enables the hardware.
+ * After everything is set up the IOMMUs are enabled and the necessary
+ * hotplug and suspend notifiers are registered.
  */
-static int __init amd_iommu_init(void)
+int __init amd_iommu_init_hardware(void)
 {
        int i, ret = 0;
 
+       if (!amd_iommu_detected)
+               return -ENODEV;
+
+       if (amd_iommu_dev_table != NULL) {
+               /* Hardware already initialized */
+               return 0;
+       }
+
        /*
         * First parse ACPI tables to find the largest Bus/Dev/Func
         * we need to handle. Upon this information the shared data
@@ -1472,9 +1512,8 @@ static int __init amd_iommu_init(void)
        alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
        rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
 
-       ret = -ENOMEM;
-
        /* Device table - directly used by all IOMMUs */
+       ret = -ENOMEM;
        amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
                                      get_order(dev_table_size));
        if (amd_iommu_dev_table == NULL)
@@ -1546,20 +1585,65 @@ static int __init amd_iommu_init(void)
 
        enable_iommus();
 
+       amd_iommu_init_notifier();
+
+       register_syscore_ops(&amd_iommu_syscore_ops);
+
+out:
+       return ret;
+
+free:
+       free_on_init_error();
+
+       return ret;
+}
+
+static int amd_iommu_enable_interrupts(void)
+{
+       struct amd_iommu *iommu;
+       int ret = 0;
+
+       for_each_iommu(iommu) {
+               ret = iommu_init_msi(iommu);
+               if (ret)
+                       goto out;
+       }
+
+out:
+       return ret;
+}
+
+/*
+ * This is the core init function for AMD IOMMU hardware in the system.
+ * This function is called from the generic x86 DMA layer initialization
+ * code.
+ *
+ * The function calls amd_iommu_init_hardware() to setup and enable the
+ * IOMMU hardware if this has not happened yet. After that the driver
+ * registers for the DMA-API and for the IOMMU-API as necessary.
+ */
+static int __init amd_iommu_init(void)
+{
+       int ret = 0;
+
+       ret = amd_iommu_init_hardware();
+       if (ret)
+               goto out;
+
+       ret = amd_iommu_enable_interrupts();
+       if (ret)
+               goto free;
+
        if (iommu_pass_through)
                ret = amd_iommu_init_passthrough();
        else
                ret = amd_iommu_init_dma_ops();
 
        if (ret)
-               goto free_disable;
+               goto free;
 
        amd_iommu_init_api();
 
-       amd_iommu_init_notifier();
-
-       register_syscore_ops(&amd_iommu_syscore_ops);
-
        if (iommu_pass_through)
                goto out;
 
@@ -1569,39 +1653,14 @@ static int __init amd_iommu_init(void)
                printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
 
        x86_platform.iommu_shutdown = disable_iommus;
+
 out:
        return ret;
 
-free_disable:
-       disable_iommus();
-
 free:
-       amd_iommu_uninit_devices();
-
-       free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
-                  get_order(MAX_DOMAIN_ID/8));
-
-       free_pages((unsigned long)amd_iommu_rlookup_table,
-                  get_order(rlookup_table_size));
-
-       free_pages((unsigned long)amd_iommu_alias_table,
-                  get_order(alias_table_size));
-
-       free_pages((unsigned long)amd_iommu_dev_table,
-                  get_order(dev_table_size));
-
-       free_iommu_all();
-
-       free_unity_maps();
-
-#ifdef CONFIG_GART_IOMMU
-       /*
-        * We failed to initialize the AMD IOMMU - try fallback to GART
-        * if possible.
-        */
-       gart_iommu_init();
+       disable_iommus();
 
-#endif
+       free_on_init_error();
 
        goto out;
 }
index 8add9f125d3efed3d066109fba44b472f450301e..036fe9bf157e0341862c8452ed47ca02b32db3c3 100644 (file)
@@ -921,7 +921,16 @@ static int __init amd_iommu_v2_init(void)
        size_t state_table_size;
        int ret;
 
-       pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
+       pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>\n");
+
+       if (!amd_iommu_v2_supported()) {
+               pr_info("AMD IOMMUv2 functionality not available on this sytem\n");
+               /*
+                * Load anyway to provide the symbols to other modules
+                * which may use AMD IOMMUv2 optionally.
+                */
+               return 0;
+       }
 
        spin_lock_init(&state_lock);
 
@@ -961,6 +970,9 @@ static void __exit amd_iommu_v2_exit(void)
        size_t state_table_size;
        int i;
 
+       if (!amd_iommu_v2_supported())
+               return;
+
        profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb);
        amd_iommu_unregister_ppr_notifier(&ppr_nb);
 
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
new file mode 100644 (file)
index 0000000..779306e
--- /dev/null
@@ -0,0 +1,451 @@
+/*
+ * IOMMU API for GART in Tegra20
+ *
+ * Copyright (c) 2010-2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define pr_fmt(fmt)    "%s(): " fmt, __func__
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+
+#include <asm/cacheflush.h>
+
+/* bitmap of the page sizes currently supported */
+#define GART_IOMMU_PGSIZES     (SZ_4K)
+
+#define GART_CONFIG            0x24
+#define GART_ENTRY_ADDR                0x28
+#define GART_ENTRY_DATA                0x2c
+#define GART_ENTRY_PHYS_ADDR_VALID     (1 << 31)
+
+#define GART_PAGE_SHIFT                12
+#define GART_PAGE_SIZE         (1 << GART_PAGE_SHIFT)
+#define GART_PAGE_MASK                                         \
+       (~(GART_PAGE_SIZE - 1) & ~GART_ENTRY_PHYS_ADDR_VALID)
+
+struct gart_client {
+       struct device           *dev;
+       struct list_head        list;
+};
+
+struct gart_device {
+       void __iomem            *regs;
+       u32                     *savedata;
+       u32                     page_count;     /* total remappable size */
+       dma_addr_t              iovmm_base;     /* offset to vmm_area */
+       spinlock_t              pte_lock;       /* for pagetable */
+       struct list_head        client;
+       spinlock_t              client_lock;    /* for client list */
+       struct device           *dev;
+};
+
+static struct gart_device *gart_handle; /* unique for a system */
+
+#define GART_PTE(_pfn)                                         \
+       (GART_ENTRY_PHYS_ADDR_VALID | ((_pfn) << PAGE_SHIFT))
+
+/*
+ * Any interaction between any block on PPSB and a block on APB or AHB
+ * must have these read-back to ensure the APB/AHB bus transaction is
+ * complete before initiating activity on the PPSB block.
+ */
+#define FLUSH_GART_REGS(gart)  ((void)readl((gart)->regs + GART_CONFIG))
+
+#define for_each_gart_pte(gart, iova)                                  \
+       for (iova = gart->iovmm_base;                                   \
+            iova < gart->iovmm_base + GART_PAGE_SIZE * gart->page_count; \
+            iova += GART_PAGE_SIZE)
+
+static inline void gart_set_pte(struct gart_device *gart,
+                               unsigned long offs, u32 pte)
+{
+       writel(offs, gart->regs + GART_ENTRY_ADDR);
+       writel(pte, gart->regs + GART_ENTRY_DATA);
+
+       dev_dbg(gart->dev, "%s %08lx:%08x\n",
+                pte ? "map" : "unmap", offs, pte & GART_PAGE_MASK);
+}
+
+static inline unsigned long gart_read_pte(struct gart_device *gart,
+                                         unsigned long offs)
+{
+       unsigned long pte;
+
+       writel(offs, gart->regs + GART_ENTRY_ADDR);
+       pte = readl(gart->regs + GART_ENTRY_DATA);
+
+       return pte;
+}
+
+static void do_gart_setup(struct gart_device *gart, const u32 *data)
+{
+       unsigned long iova;
+
+       for_each_gart_pte(gart, iova)
+               gart_set_pte(gart, iova, data ? *(data++) : 0);
+
+       writel(1, gart->regs + GART_CONFIG);
+       FLUSH_GART_REGS(gart);
+}
+
+#ifdef DEBUG
+static void gart_dump_table(struct gart_device *gart)
+{
+       unsigned long iova;
+       unsigned long flags;
+
+       spin_lock_irqsave(&gart->pte_lock, flags);
+       for_each_gart_pte(gart, iova) {
+               unsigned long pte;
+
+               pte = gart_read_pte(gart, iova);
+
+               dev_dbg(gart->dev, "%s %08lx:%08lx\n",
+                       (GART_ENTRY_PHYS_ADDR_VALID & pte) ? "v" : " ",
+                       iova, pte & GART_PAGE_MASK);
+       }
+       spin_unlock_irqrestore(&gart->pte_lock, flags);
+}
+#else
+static inline void gart_dump_table(struct gart_device *gart)
+{
+}
+#endif
+
+static inline bool gart_iova_range_valid(struct gart_device *gart,
+                                        unsigned long iova, size_t bytes)
+{
+       unsigned long iova_start, iova_end, gart_start, gart_end;
+
+       iova_start = iova;
+       iova_end = iova_start + bytes - 1;
+       gart_start = gart->iovmm_base;
+       gart_end = gart_start + gart->page_count * GART_PAGE_SIZE - 1;
+
+       if (iova_start < gart_start)
+               return false;
+       if (iova_end > gart_end)
+               return false;
+       return true;
+}
+
+static int gart_iommu_attach_dev(struct iommu_domain *domain,
+                                struct device *dev)
+{
+       struct gart_device *gart;
+       struct gart_client *client, *c;
+       int err = 0;
+
+       gart = dev_get_drvdata(dev->parent);
+       if (!gart)
+               return -EINVAL;
+       domain->priv = gart;
+
+       client = devm_kzalloc(gart->dev, sizeof(*c), GFP_KERNEL);
+       if (!client)
+               return -ENOMEM;
+       client->dev = dev;
+
+       spin_lock(&gart->client_lock);
+       list_for_each_entry(c, &gart->client, list) {
+               if (c->dev == dev) {
+                       dev_err(gart->dev,
+                               "%s is already attached\n", dev_name(dev));
+                       err = -EINVAL;
+                       goto fail;
+               }
+       }
+       list_add(&client->list, &gart->client);
+       spin_unlock(&gart->client_lock);
+       dev_dbg(gart->dev, "Attached %s\n", dev_name(dev));
+       return 0;
+
+fail:
+       devm_kfree(gart->dev, client);
+       spin_unlock(&gart->client_lock);
+       return err;
+}
+
+static void gart_iommu_detach_dev(struct iommu_domain *domain,
+                                 struct device *dev)
+{
+       struct gart_device *gart = domain->priv;
+       struct gart_client *c;
+
+       spin_lock(&gart->client_lock);
+
+       list_for_each_entry(c, &gart->client, list) {
+               if (c->dev == dev) {
+                       list_del(&c->list);
+                       devm_kfree(gart->dev, c);
+                       dev_dbg(gart->dev, "Detached %s\n", dev_name(dev));
+                       goto out;
+               }
+       }
+       dev_err(gart->dev, "Couldn't find\n");
+out:
+       spin_unlock(&gart->client_lock);
+}
+
+static int gart_iommu_domain_init(struct iommu_domain *domain)
+{
+       return 0;
+}
+
+static void gart_iommu_domain_destroy(struct iommu_domain *domain)
+{
+       struct gart_device *gart = domain->priv;
+
+       if (!gart)
+               return;
+
+       spin_lock(&gart->client_lock);
+       if (!list_empty(&gart->client)) {
+               struct gart_client *c;
+
+               list_for_each_entry(c, &gart->client, list)
+                       gart_iommu_detach_dev(domain, c->dev);
+       }
+       spin_unlock(&gart->client_lock);
+       domain->priv = NULL;
+}
+
+static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
+                         phys_addr_t pa, size_t bytes, int prot)
+{
+       struct gart_device *gart = domain->priv;
+       unsigned long flags;
+       unsigned long pfn;
+
+       if (!gart_iova_range_valid(gart, iova, bytes))
+               return -EINVAL;
+
+       spin_lock_irqsave(&gart->pte_lock, flags);
+       pfn = __phys_to_pfn(pa);
+       if (!pfn_valid(pfn)) {
+               dev_err(gart->dev, "Invalid page: %08x\n", pa);
+               spin_unlock_irqrestore(&gart->pte_lock, flags);
+               return -EINVAL;
+       }
+       gart_set_pte(gart, iova, GART_PTE(pfn));
+       FLUSH_GART_REGS(gart);
+       spin_unlock_irqrestore(&gart->pte_lock, flags);
+       return 0;
+}
+
+static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+                              size_t bytes)
+{
+       struct gart_device *gart = domain->priv;
+       unsigned long flags;
+
+       if (!gart_iova_range_valid(gart, iova, bytes))
+               return 0;
+
+       spin_lock_irqsave(&gart->pte_lock, flags);
+       gart_set_pte(gart, iova, 0);
+       FLUSH_GART_REGS(gart);
+       spin_unlock_irqrestore(&gart->pte_lock, flags);
+       return 0;
+}
+
+static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain,
+                                          unsigned long iova)
+{
+       struct gart_device *gart = domain->priv;
+       unsigned long pte;
+       phys_addr_t pa;
+       unsigned long flags;
+
+       if (!gart_iova_range_valid(gart, iova, 0))
+               return -EINVAL;
+
+       spin_lock_irqsave(&gart->pte_lock, flags);
+       pte = gart_read_pte(gart, iova);
+       spin_unlock_irqrestore(&gart->pte_lock, flags);
+
+       pa = (pte & GART_PAGE_MASK);
+       if (!pfn_valid(__phys_to_pfn(pa))) {
+               dev_err(gart->dev, "No entry for %08lx:%08x\n", iova, pa);
+               gart_dump_table(gart);
+               return -EINVAL;
+       }
+       return pa;
+}
+
+static int gart_iommu_domain_has_cap(struct iommu_domain *domain,
+                                    unsigned long cap)
+{
+       return 0;
+}
+
+static struct iommu_ops gart_iommu_ops = {
+       .domain_init    = gart_iommu_domain_init,
+       .domain_destroy = gart_iommu_domain_destroy,
+       .attach_dev     = gart_iommu_attach_dev,
+       .detach_dev     = gart_iommu_detach_dev,
+       .map            = gart_iommu_map,
+       .unmap          = gart_iommu_unmap,
+       .iova_to_phys   = gart_iommu_iova_to_phys,
+       .domain_has_cap = gart_iommu_domain_has_cap,
+       .pgsize_bitmap  = GART_IOMMU_PGSIZES,
+};
+
+static int tegra_gart_suspend(struct device *dev)
+{
+       struct gart_device *gart = dev_get_drvdata(dev);
+       unsigned long iova;
+       u32 *data = gart->savedata;
+       unsigned long flags;
+
+       spin_lock_irqsave(&gart->pte_lock, flags);
+       for_each_gart_pte(gart, iova)
+               *(data++) = gart_read_pte(gart, iova);
+       spin_unlock_irqrestore(&gart->pte_lock, flags);
+       return 0;
+}
+
+static int tegra_gart_resume(struct device *dev)
+{
+       struct gart_device *gart = dev_get_drvdata(dev);
+       unsigned long flags;
+
+       spin_lock_irqsave(&gart->pte_lock, flags);
+       do_gart_setup(gart, gart->savedata);
+       spin_unlock_irqrestore(&gart->pte_lock, flags);
+       return 0;
+}
+
+static int tegra_gart_probe(struct platform_device *pdev)
+{
+       struct gart_device *gart;
+       struct resource *res, *res_remap;
+       void __iomem *gart_regs;
+       int err;
+       struct device *dev = &pdev->dev;
+
+       if (gart_handle)
+               return -EIO;
+
+       BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT);
+
+       /* the GART memory aperture is required */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       res_remap = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       if (!res || !res_remap) {
+               dev_err(dev, "GART memory aperture expected\n");
+               return -ENXIO;
+       }
+
+       gart = devm_kzalloc(dev, sizeof(*gart), GFP_KERNEL);
+       if (!gart) {
+               dev_err(dev, "failed to allocate gart_device\n");
+               return -ENOMEM;
+       }
+
+       gart_regs = devm_ioremap(dev, res->start, resource_size(res));
+       if (!gart_regs) {
+               dev_err(dev, "failed to remap GART registers\n");
+               err = -ENXIO;
+               goto fail;
+       }
+
+       gart->dev = &pdev->dev;
+       spin_lock_init(&gart->pte_lock);
+       spin_lock_init(&gart->client_lock);
+       INIT_LIST_HEAD(&gart->client);
+       gart->regs = gart_regs;
+       gart->iovmm_base = (dma_addr_t)res_remap->start;
+       gart->page_count = (resource_size(res_remap) >> GART_PAGE_SHIFT);
+
+       gart->savedata = vmalloc(sizeof(u32) * gart->page_count);
+       if (!gart->savedata) {
+               dev_err(dev, "failed to allocate context save area\n");
+               err = -ENOMEM;
+               goto fail;
+       }
+
+       platform_set_drvdata(pdev, gart);
+       do_gart_setup(gart, NULL);
+
+       gart_handle = gart;
+       return 0;
+
+fail:
+       if (gart_regs)
+               devm_iounmap(dev, gart_regs);
+       if (gart && gart->savedata)
+               vfree(gart->savedata);
+       devm_kfree(dev, gart);
+       return err;
+}
+
+static int tegra_gart_remove(struct platform_device *pdev)
+{
+       struct gart_device *gart = platform_get_drvdata(pdev);
+       struct device *dev = gart->dev;
+
+       writel(0, gart->regs + GART_CONFIG);
+       if (gart->savedata)
+               vfree(gart->savedata);
+       if (gart->regs)
+               devm_iounmap(dev, gart->regs);
+       devm_kfree(dev, gart);
+       gart_handle = NULL;
+       return 0;
+}
+
+const struct dev_pm_ops tegra_gart_pm_ops = {
+       .suspend        = tegra_gart_suspend,
+       .resume         = tegra_gart_resume,
+};
+
+static struct platform_driver tegra_gart_driver = {
+       .probe          = tegra_gart_probe,
+       .remove         = tegra_gart_remove,
+       .driver = {
+               .owner  = THIS_MODULE,
+               .name   = "tegra-gart",
+               .pm     = &tegra_gart_pm_ops,
+       },
+};
+
+static int __devinit tegra_gart_init(void)
+{
+       bus_set_iommu(&platform_bus_type, &gart_iommu_ops);
+       return platform_driver_register(&tegra_gart_driver);
+}
+
+static void __exit tegra_gart_exit(void)
+{
+       platform_driver_unregister(&tegra_gart_driver);
+}
+
+subsys_initcall(tegra_gart_init);
+module_exit(tegra_gart_exit);
+
+MODULE_DESCRIPTION("IOMMU API for GART in Tegra20");
+MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
new file mode 100644 (file)
index 0000000..eb93c82
--- /dev/null
@@ -0,0 +1,1034 @@
+/*
+ * IOMMU API for SMMU in Tegra30
+ *
+ * Copyright (c) 2011-2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define pr_fmt(fmt)    "%s(): " fmt, __func__
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+#include <linux/iommu.h>
+#include <linux/io.h>
+
+#include <asm/page.h>
+#include <asm/cacheflush.h>
+
+#include <mach/iomap.h>
+#include <mach/smmu.h>
+
+/* bitmap of the page sizes currently supported */
+#define SMMU_IOMMU_PGSIZES     (SZ_4K)
+
+#define SMMU_CONFIG                            0x10
+#define SMMU_CONFIG_DISABLE                    0
+#define SMMU_CONFIG_ENABLE                     1
+
+#define SMMU_TLB_CONFIG                                0x14
+#define SMMU_TLB_CONFIG_STATS__MASK            (1 << 31)
+#define SMMU_TLB_CONFIG_STATS__ENABLE          (1 << 31)
+#define SMMU_TLB_CONFIG_HIT_UNDER_MISS__ENABLE (1 << 29)
+#define SMMU_TLB_CONFIG_ACTIVE_LINES__VALUE    0x10
+#define SMMU_TLB_CONFIG_RESET_VAL              0x20000010
+
+#define SMMU_PTC_CONFIG                                0x18
+#define SMMU_PTC_CONFIG_STATS__MASK            (1 << 31)
+#define SMMU_PTC_CONFIG_STATS__ENABLE          (1 << 31)
+#define SMMU_PTC_CONFIG_CACHE__ENABLE          (1 << 29)
+#define SMMU_PTC_CONFIG_INDEX_MAP__PATTERN     0x3f
+#define SMMU_PTC_CONFIG_RESET_VAL              0x2000003f
+
+#define SMMU_PTB_ASID                          0x1c
+#define SMMU_PTB_ASID_CURRENT_SHIFT            0
+
+#define SMMU_PTB_DATA                          0x20
+#define SMMU_PTB_DATA_RESET_VAL                        0
+#define SMMU_PTB_DATA_ASID_NONSECURE_SHIFT     29
+#define SMMU_PTB_DATA_ASID_WRITABLE_SHIFT      30
+#define SMMU_PTB_DATA_ASID_READABLE_SHIFT      31
+
+#define SMMU_TLB_FLUSH                         0x30
+#define SMMU_TLB_FLUSH_VA_MATCH_ALL            0
+#define SMMU_TLB_FLUSH_VA_MATCH_SECTION                2
+#define SMMU_TLB_FLUSH_VA_MATCH_GROUP          3
+#define SMMU_TLB_FLUSH_ASID_SHIFT              29
+#define SMMU_TLB_FLUSH_ASID_MATCH_DISABLE      0
+#define SMMU_TLB_FLUSH_ASID_MATCH_ENABLE       1
+#define SMMU_TLB_FLUSH_ASID_MATCH_SHIFT                31
+
+#define SMMU_PTC_FLUSH                         0x34
+#define SMMU_PTC_FLUSH_TYPE_ALL                        0
+#define SMMU_PTC_FLUSH_TYPE_ADR                        1
+#define SMMU_PTC_FLUSH_ADR_SHIFT               4
+
+#define SMMU_ASID_SECURITY                     0x38
+
+#define SMMU_STATS_TLB_HIT_COUNT               0x1f0
+#define SMMU_STATS_TLB_MISS_COUNT              0x1f4
+#define SMMU_STATS_PTC_HIT_COUNT               0x1f8
+#define SMMU_STATS_PTC_MISS_COUNT              0x1fc
+
+#define SMMU_TRANSLATION_ENABLE_0              0x228
+#define SMMU_TRANSLATION_ENABLE_1              0x22c
+#define SMMU_TRANSLATION_ENABLE_2              0x230
+
+#define SMMU_AFI_ASID  0x238   /* PCIE */
+#define SMMU_AVPC_ASID 0x23c   /* AVP */
+#define SMMU_DC_ASID   0x240   /* Display controller */
+#define SMMU_DCB_ASID  0x244   /* Display controller B */
+#define SMMU_EPP_ASID  0x248   /* Encoder pre-processor */
+#define SMMU_G2_ASID   0x24c   /* 2D engine */
+#define SMMU_HC_ASID   0x250   /* Host1x */
+#define SMMU_HDA_ASID  0x254   /* High-def audio */
+#define SMMU_ISP_ASID  0x258   /* Image signal processor */
+#define SMMU_MPE_ASID  0x264   /* MPEG encoder */
+#define SMMU_NV_ASID   0x268   /* (3D) */
+#define SMMU_NV2_ASID  0x26c   /* (3D) */
+#define SMMU_PPCS_ASID 0x270   /* AHB */
+#define SMMU_SATA_ASID 0x278   /* SATA */
+#define SMMU_VDE_ASID  0x27c   /* Video decoder */
+#define SMMU_VI_ASID   0x280   /* Video input */
+
+#define SMMU_PDE_NEXT_SHIFT            28
+
+/* AHB Arbiter Registers */
+#define AHB_XBAR_CTRL                          0xe0
+#define AHB_XBAR_CTRL_SMMU_INIT_DONE_DONE      1
+#define AHB_XBAR_CTRL_SMMU_INIT_DONE_SHIFT     17
+
+#define SMMU_NUM_ASIDS                         4
+#define SMMU_TLB_FLUSH_VA_SECTION__MASK                0xffc00000
+#define SMMU_TLB_FLUSH_VA_SECTION__SHIFT       12 /* right shift */
+#define SMMU_TLB_FLUSH_VA_GROUP__MASK          0xffffc000
+#define SMMU_TLB_FLUSH_VA_GROUP__SHIFT         12 /* right shift */
+#define SMMU_TLB_FLUSH_VA(iova, which) \
+       ((((iova) & SMMU_TLB_FLUSH_VA_##which##__MASK) >> \
+               SMMU_TLB_FLUSH_VA_##which##__SHIFT) |   \
+       SMMU_TLB_FLUSH_VA_MATCH_##which)
+#define SMMU_PTB_ASID_CUR(n)   \
+               ((n) << SMMU_PTB_ASID_CURRENT_SHIFT)
+#define SMMU_TLB_FLUSH_ASID_MATCH_disable              \
+               (SMMU_TLB_FLUSH_ASID_MATCH_DISABLE <<   \
+                       SMMU_TLB_FLUSH_ASID_MATCH_SHIFT)
+#define SMMU_TLB_FLUSH_ASID_MATCH__ENABLE              \
+               (SMMU_TLB_FLUSH_ASID_MATCH_ENABLE <<    \
+                       SMMU_TLB_FLUSH_ASID_MATCH_SHIFT)
+
+#define SMMU_PAGE_SHIFT 12
+#define SMMU_PAGE_SIZE (1 << SMMU_PAGE_SHIFT)
+
+#define SMMU_PDIR_COUNT        1024
+#define SMMU_PDIR_SIZE (sizeof(unsigned long) * SMMU_PDIR_COUNT)
+#define SMMU_PTBL_COUNT        1024
+#define SMMU_PTBL_SIZE (sizeof(unsigned long) * SMMU_PTBL_COUNT)
+#define SMMU_PDIR_SHIFT        12
+#define SMMU_PDE_SHIFT 12
+#define SMMU_PTE_SHIFT 12
+#define SMMU_PFN_MASK  0x000fffff
+
+#define SMMU_ADDR_TO_PFN(addr) ((addr) >> 12)
+#define SMMU_ADDR_TO_PDN(addr) ((addr) >> 22)
+#define SMMU_PDN_TO_ADDR(addr) ((pdn) << 22)
+
+#define _READABLE      (1 << SMMU_PTB_DATA_ASID_READABLE_SHIFT)
+#define _WRITABLE      (1 << SMMU_PTB_DATA_ASID_WRITABLE_SHIFT)
+#define _NONSECURE     (1 << SMMU_PTB_DATA_ASID_NONSECURE_SHIFT)
+#define _PDE_NEXT      (1 << SMMU_PDE_NEXT_SHIFT)
+#define _MASK_ATTR     (_READABLE | _WRITABLE | _NONSECURE)
+
+#define _PDIR_ATTR     (_READABLE | _WRITABLE | _NONSECURE)
+
+#define _PDE_ATTR      (_READABLE | _WRITABLE | _NONSECURE)
+#define _PDE_ATTR_N    (_PDE_ATTR | _PDE_NEXT)
+#define _PDE_VACANT(pdn)       (((pdn) << 10) | _PDE_ATTR)
+
+#define _PTE_ATTR      (_READABLE | _WRITABLE | _NONSECURE)
+#define _PTE_VACANT(addr)      (((addr) >> SMMU_PAGE_SHIFT) | _PTE_ATTR)
+
+#define SMMU_MK_PDIR(page, attr)       \
+               ((page_to_phys(page) >> SMMU_PDIR_SHIFT) | (attr))
+#define SMMU_MK_PDE(page, attr)                \
+               (unsigned long)((page_to_phys(page) >> SMMU_PDE_SHIFT) | (attr))
+#define SMMU_EX_PTBL_PAGE(pde)         \
+               pfn_to_page((unsigned long)(pde) & SMMU_PFN_MASK)
+#define SMMU_PFN_TO_PTE(pfn, attr)     (unsigned long)((pfn) | (attr))
+
+#define SMMU_ASID_ENABLE(asid) ((asid) | (1 << 31))
+#define SMMU_ASID_DISABLE      0
+#define SMMU_ASID_ASID(n)      ((n) & ~SMMU_ASID_ENABLE(0))
+
+#define smmu_client_enable_hwgrp(c, m) smmu_client_set_hwgrp(c, m, 1)
+#define smmu_client_disable_hwgrp(c)   smmu_client_set_hwgrp(c, 0, 0)
+#define __smmu_client_enable_hwgrp(c, m) __smmu_client_set_hwgrp(c, m, 1)
+#define __smmu_client_disable_hwgrp(c) __smmu_client_set_hwgrp(c, 0, 0)
+
+#define HWGRP_INIT(client) [HWGRP_##client] = SMMU_##client##_ASID
+
+static const u32 smmu_hwgrp_asid_reg[] = {
+       HWGRP_INIT(AFI),
+       HWGRP_INIT(AVPC),
+       HWGRP_INIT(DC),
+       HWGRP_INIT(DCB),
+       HWGRP_INIT(EPP),
+       HWGRP_INIT(G2),
+       HWGRP_INIT(HC),
+       HWGRP_INIT(HDA),
+       HWGRP_INIT(ISP),
+       HWGRP_INIT(MPE),
+       HWGRP_INIT(NV),
+       HWGRP_INIT(NV2),
+       HWGRP_INIT(PPCS),
+       HWGRP_INIT(SATA),
+       HWGRP_INIT(VDE),
+       HWGRP_INIT(VI),
+};
+#define HWGRP_ASID_REG(x) (smmu_hwgrp_asid_reg[x])
+
+/*
+ * Per client for address space
+ */
+struct smmu_client {
+       struct device           *dev;
+       struct list_head        list;
+       struct smmu_as          *as;
+       u32                     hwgrp;
+};
+
+/*
+ * Per address space
+ */
+struct smmu_as {
+       struct smmu_device      *smmu;  /* back pointer to container */
+       unsigned int            asid;
+       spinlock_t              lock;   /* for pagetable */
+       struct page             *pdir_page;
+       unsigned long           pdir_attr;
+       unsigned long           pde_attr;
+       unsigned long           pte_attr;
+       unsigned int            *pte_count;
+
+       struct list_head        client;
+       spinlock_t              client_lock; /* for client list */
+};
+
+/*
+ * Per SMMU device - IOMMU device
+ */
+struct smmu_device {
+       void __iomem    *regs, *regs_ahbarb;
+       unsigned long   iovmm_base;     /* remappable base address */
+       unsigned long   page_count;     /* total remappable size */
+       spinlock_t      lock;
+       char            *name;
+       struct device   *dev;
+       int             num_as;
+       struct smmu_as  *as;            /* Run-time allocated array */
+       struct page *avp_vector_page;   /* dummy page shared by all AS's */
+
+       /*
+        * Register image savers for suspend/resume
+        */
+       unsigned long translation_enable_0;
+       unsigned long translation_enable_1;
+       unsigned long translation_enable_2;
+       unsigned long asid_security;
+};
+
+static struct smmu_device *smmu_handle; /* unique for a system */
+
+/*
+ *     SMMU/AHB register accessors
+ */
+static inline u32 smmu_read(struct smmu_device *smmu, size_t offs)
+{
+       return readl(smmu->regs + offs);
+}
+static inline void smmu_write(struct smmu_device *smmu, u32 val, size_t offs)
+{
+       writel(val, smmu->regs + offs);
+}
+
+static inline u32 ahb_read(struct smmu_device *smmu, size_t offs)
+{
+       return readl(smmu->regs_ahbarb + offs);
+}
+static inline void ahb_write(struct smmu_device *smmu, u32 val, size_t offs)
+{
+       writel(val, smmu->regs_ahbarb + offs);
+}
+
+#define VA_PAGE_TO_PA(va, page)        \
+       (page_to_phys(page) + ((unsigned long)(va) & ~PAGE_MASK))
+
+#define FLUSH_CPU_DCACHE(va, page, size)       \
+       do {    \
+               unsigned long _pa_ = VA_PAGE_TO_PA(va, page);           \
+               __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
+               outer_flush_range(_pa_, _pa_+(size_t)(size));           \
+       } while (0)
+
+/*
+ * Any interaction between any block on PPSB and a block on APB or AHB
+ * must have these read-back barriers to ensure the APB/AHB bus
+ * transaction is complete before initiating activity on the PPSB
+ * block.
+ */
+#define FLUSH_SMMU_REGS(smmu)  smmu_read(smmu, SMMU_CONFIG)
+
+#define smmu_client_hwgrp(c) (u32)((c)->dev->platform_data)
+
+static int __smmu_client_set_hwgrp(struct smmu_client *c,
+                                  unsigned long map, int on)
+{
+       int i;
+       struct smmu_as *as = c->as;
+       u32 val, offs, mask = SMMU_ASID_ENABLE(as->asid);
+       struct smmu_device *smmu = as->smmu;
+
+       WARN_ON(!on && map);
+       if (on && !map)
+               return -EINVAL;
+       if (!on)
+               map = smmu_client_hwgrp(c);
+
+       for_each_set_bit(i, &map, HWGRP_COUNT) {
+               offs = HWGRP_ASID_REG(i);
+               val = smmu_read(smmu, offs);
+               if (on) {
+                       if (WARN_ON(val & mask))
+                               goto err_hw_busy;
+                       val |= mask;
+               } else {
+                       WARN_ON((val & mask) == mask);
+                       val &= ~mask;
+               }
+               smmu_write(smmu, val, offs);
+       }
+       FLUSH_SMMU_REGS(smmu);
+       c->hwgrp = map;
+       return 0;
+
+err_hw_busy:
+       for_each_set_bit(i, &map, HWGRP_COUNT) {
+               offs = HWGRP_ASID_REG(i);
+               val = smmu_read(smmu, offs);
+               val &= ~mask;
+               smmu_write(smmu, val, offs);
+       }
+       return -EBUSY;
+}
+
+static int smmu_client_set_hwgrp(struct smmu_client *c, u32 map, int on)
+{
+       u32 val;
+       unsigned long flags;
+       struct smmu_as *as = c->as;
+       struct smmu_device *smmu = as->smmu;
+
+       spin_lock_irqsave(&smmu->lock, flags);
+       val = __smmu_client_set_hwgrp(c, map, on);
+       spin_unlock_irqrestore(&smmu->lock, flags);
+       return val;
+}
+
+/*
+ * Flush all TLB entries and all PTC entries
+ * Caller must lock smmu
+ */
+static void smmu_flush_regs(struct smmu_device *smmu, int enable)
+{
+       u32 val;
+
+       smmu_write(smmu, SMMU_PTC_FLUSH_TYPE_ALL, SMMU_PTC_FLUSH);
+       FLUSH_SMMU_REGS(smmu);
+       val = SMMU_TLB_FLUSH_VA_MATCH_ALL |
+               SMMU_TLB_FLUSH_ASID_MATCH_disable;
+       smmu_write(smmu, val, SMMU_TLB_FLUSH);
+
+       if (enable)
+               smmu_write(smmu, SMMU_CONFIG_ENABLE, SMMU_CONFIG);
+       FLUSH_SMMU_REGS(smmu);
+}
+
+static void smmu_setup_regs(struct smmu_device *smmu)
+{
+       int i;
+       u32 val;
+
+       for (i = 0; i < smmu->num_as; i++) {
+               struct smmu_as *as = &smmu->as[i];
+               struct smmu_client *c;
+
+               smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID);
+               val = as->pdir_page ?
+                       SMMU_MK_PDIR(as->pdir_page, as->pdir_attr) :
+                       SMMU_PTB_DATA_RESET_VAL;
+               smmu_write(smmu, val, SMMU_PTB_DATA);
+
+               list_for_each_entry(c, &as->client, list)
+                       __smmu_client_set_hwgrp(c, c->hwgrp, 1);
+       }
+
+       smmu_write(smmu, smmu->translation_enable_0, SMMU_TRANSLATION_ENABLE_0);
+       smmu_write(smmu, smmu->translation_enable_1, SMMU_TRANSLATION_ENABLE_1);
+       smmu_write(smmu, smmu->translation_enable_2, SMMU_TRANSLATION_ENABLE_2);
+       smmu_write(smmu, smmu->asid_security, SMMU_ASID_SECURITY);
+       smmu_write(smmu, SMMU_TLB_CONFIG_RESET_VAL, SMMU_TLB_CONFIG);
+       smmu_write(smmu, SMMU_PTC_CONFIG_RESET_VAL, SMMU_PTC_CONFIG);
+
+       smmu_flush_regs(smmu, 1);
+
+       val = ahb_read(smmu, AHB_XBAR_CTRL);
+       val |= AHB_XBAR_CTRL_SMMU_INIT_DONE_DONE <<
+               AHB_XBAR_CTRL_SMMU_INIT_DONE_SHIFT;
+       ahb_write(smmu, val, AHB_XBAR_CTRL);
+}
+
+static void flush_ptc_and_tlb(struct smmu_device *smmu,
+                     struct smmu_as *as, dma_addr_t iova,
+                     unsigned long *pte, struct page *page, int is_pde)
+{
+       u32 val;
+       unsigned long tlb_flush_va = is_pde
+               ?  SMMU_TLB_FLUSH_VA(iova, SECTION)
+               :  SMMU_TLB_FLUSH_VA(iova, GROUP);
+
+       val = SMMU_PTC_FLUSH_TYPE_ADR | VA_PAGE_TO_PA(pte, page);
+       smmu_write(smmu, val, SMMU_PTC_FLUSH);
+       FLUSH_SMMU_REGS(smmu);
+       val = tlb_flush_va |
+               SMMU_TLB_FLUSH_ASID_MATCH__ENABLE |
+               (as->asid << SMMU_TLB_FLUSH_ASID_SHIFT);
+       smmu_write(smmu, val, SMMU_TLB_FLUSH);
+       FLUSH_SMMU_REGS(smmu);
+}
+
+static void free_ptbl(struct smmu_as *as, dma_addr_t iova)
+{
+       unsigned long pdn = SMMU_ADDR_TO_PDN(iova);
+       unsigned long *pdir = (unsigned long *)page_address(as->pdir_page);
+
+       if (pdir[pdn] != _PDE_VACANT(pdn)) {
+               dev_dbg(as->smmu->dev, "pdn: %lx\n", pdn);
+
+               ClearPageReserved(SMMU_EX_PTBL_PAGE(pdir[pdn]));
+               __free_page(SMMU_EX_PTBL_PAGE(pdir[pdn]));
+               pdir[pdn] = _PDE_VACANT(pdn);
+               FLUSH_CPU_DCACHE(&pdir[pdn], as->pdir_page, sizeof pdir[pdn]);
+               flush_ptc_and_tlb(as->smmu, as, iova, &pdir[pdn],
+                                 as->pdir_page, 1);
+       }
+}
+
+static void free_pdir(struct smmu_as *as)
+{
+       unsigned addr;
+       int count;
+       struct device *dev = as->smmu->dev;
+
+       if (!as->pdir_page)
+               return;
+
+       addr = as->smmu->iovmm_base;
+       count = as->smmu->page_count;
+       while (count-- > 0) {
+               free_ptbl(as, addr);
+               addr += SMMU_PAGE_SIZE * SMMU_PTBL_COUNT;
+       }
+       ClearPageReserved(as->pdir_page);
+       __free_page(as->pdir_page);
+       as->pdir_page = NULL;
+       devm_kfree(dev, as->pte_count);
+       as->pte_count = NULL;
+}
+
+/*
+ * Maps PTBL for given iova and returns the PTE address
+ * Caller must unmap the mapped PTBL returned in *ptbl_page_p
+ */
+static unsigned long *locate_pte(struct smmu_as *as,
+                                dma_addr_t iova, bool allocate,
+                                struct page **ptbl_page_p,
+                                unsigned int **count)
+{
+       unsigned long ptn = SMMU_ADDR_TO_PFN(iova);
+       unsigned long pdn = SMMU_ADDR_TO_PDN(iova);
+       unsigned long *pdir = page_address(as->pdir_page);
+       unsigned long *ptbl;
+
+       if (pdir[pdn] != _PDE_VACANT(pdn)) {
+               /* Mapped entry table already exists */
+               *ptbl_page_p = SMMU_EX_PTBL_PAGE(pdir[pdn]);
+               ptbl = page_address(*ptbl_page_p);
+       } else if (!allocate) {
+               return NULL;
+       } else {
+               int pn;
+               unsigned long addr = SMMU_PDN_TO_ADDR(pdn);
+
+               /* Vacant - allocate a new page table */
+               dev_dbg(as->smmu->dev, "New PTBL pdn: %lx\n", pdn);
+
+               *ptbl_page_p = alloc_page(GFP_ATOMIC);
+               if (!*ptbl_page_p) {
+                       dev_err(as->smmu->dev,
+                               "failed to allocate smmu_device page table\n");
+                       return NULL;
+               }
+               SetPageReserved(*ptbl_page_p);
+               ptbl = (unsigned long *)page_address(*ptbl_page_p);
+               for (pn = 0; pn < SMMU_PTBL_COUNT;
+                    pn++, addr += SMMU_PAGE_SIZE) {
+                       ptbl[pn] = _PTE_VACANT(addr);
+               }
+               FLUSH_CPU_DCACHE(ptbl, *ptbl_page_p, SMMU_PTBL_SIZE);
+               pdir[pdn] = SMMU_MK_PDE(*ptbl_page_p,
+                                       as->pde_attr | _PDE_NEXT);
+               FLUSH_CPU_DCACHE(&pdir[pdn], as->pdir_page, sizeof pdir[pdn]);
+               flush_ptc_and_tlb(as->smmu, as, iova, &pdir[pdn],
+                                 as->pdir_page, 1);
+       }
+       *count = &as->pte_count[pdn];
+
+       return &ptbl[ptn % SMMU_PTBL_COUNT];
+}
+
+#ifdef CONFIG_SMMU_SIG_DEBUG
+static void put_signature(struct smmu_as *as,
+                         dma_addr_t iova, unsigned long pfn)
+{
+       struct page *page;
+       unsigned long *vaddr;
+
+       page = pfn_to_page(pfn);
+       vaddr = page_address(page);
+       if (!vaddr)
+               return;
+
+       vaddr[0] = iova;
+       vaddr[1] = pfn << PAGE_SHIFT;
+       FLUSH_CPU_DCACHE(vaddr, page, sizeof(vaddr[0]) * 2);
+}
+#else
+static inline void put_signature(struct smmu_as *as,
+                                unsigned long addr, unsigned long pfn)
+{
+}
+#endif
+
+/*
+ * Caller must lock/unlock as
+ */
+static int alloc_pdir(struct smmu_as *as)
+{
+       unsigned long *pdir;
+       int pdn;
+       u32 val;
+       struct smmu_device *smmu = as->smmu;
+
+       if (as->pdir_page)
+               return 0;
+
+       as->pte_count = devm_kzalloc(smmu->dev,
+                    sizeof(as->pte_count[0]) * SMMU_PDIR_COUNT, GFP_KERNEL);
+       if (!as->pte_count) {
+               dev_err(smmu->dev,
+                       "failed to allocate smmu_device PTE cunters\n");
+               return -ENOMEM;
+       }
+       as->pdir_page = alloc_page(GFP_KERNEL | __GFP_DMA);
+       if (!as->pdir_page) {
+               dev_err(smmu->dev,
+                       "failed to allocate smmu_device page directory\n");
+               devm_kfree(smmu->dev, as->pte_count);
+               as->pte_count = NULL;
+               return -ENOMEM;
+       }
+       SetPageReserved(as->pdir_page);
+       pdir = page_address(as->pdir_page);
+
+       for (pdn = 0; pdn < SMMU_PDIR_COUNT; pdn++)
+               pdir[pdn] = _PDE_VACANT(pdn);
+       FLUSH_CPU_DCACHE(pdir, as->pdir_page, SMMU_PDIR_SIZE);
+       val = SMMU_PTC_FLUSH_TYPE_ADR | VA_PAGE_TO_PA(pdir, as->pdir_page);
+       smmu_write(smmu, val, SMMU_PTC_FLUSH);
+       FLUSH_SMMU_REGS(as->smmu);
+       val = SMMU_TLB_FLUSH_VA_MATCH_ALL |
+               SMMU_TLB_FLUSH_ASID_MATCH__ENABLE |
+               (as->asid << SMMU_TLB_FLUSH_ASID_SHIFT);
+       smmu_write(smmu, val, SMMU_TLB_FLUSH);
+       FLUSH_SMMU_REGS(as->smmu);
+
+       return 0;
+}
+
+static void __smmu_iommu_unmap(struct smmu_as *as, dma_addr_t iova)
+{
+       unsigned long *pte;
+       struct page *page;
+       unsigned int *count;
+
+       pte = locate_pte(as, iova, false, &page, &count);
+       if (WARN_ON(!pte))
+               return;
+
+       if (WARN_ON(*pte == _PTE_VACANT(iova)))
+               return;
+
+       *pte = _PTE_VACANT(iova);
+       FLUSH_CPU_DCACHE(pte, page, sizeof(*pte));
+       flush_ptc_and_tlb(as->smmu, as, iova, pte, page, 0);
+       if (!--(*count)) {
+               free_ptbl(as, iova);
+               smmu_flush_regs(as->smmu, 0);
+       }
+}
+
+static void __smmu_iommu_map_pfn(struct smmu_as *as, dma_addr_t iova,
+                                unsigned long pfn)
+{
+       struct smmu_device *smmu = as->smmu;
+       unsigned long *pte;
+       unsigned int *count;
+       struct page *page;
+
+       pte = locate_pte(as, iova, true, &page, &count);
+       if (WARN_ON(!pte))
+               return;
+
+       if (*pte == _PTE_VACANT(iova))
+               (*count)++;
+       *pte = SMMU_PFN_TO_PTE(pfn, as->pte_attr);
+       if (unlikely((*pte == _PTE_VACANT(iova))))
+               (*count)--;
+       FLUSH_CPU_DCACHE(pte, page, sizeof(*pte));
+       flush_ptc_and_tlb(smmu, as, iova, pte, page, 0);
+       put_signature(as, iova, pfn);
+}
+
+static int smmu_iommu_map(struct iommu_domain *domain, unsigned long iova,
+                         phys_addr_t pa, size_t bytes, int prot)
+{
+       struct smmu_as *as = domain->priv;
+       unsigned long pfn = __phys_to_pfn(pa);
+       unsigned long flags;
+
+       dev_dbg(as->smmu->dev, "[%d] %08lx:%08x\n", as->asid, iova, pa);
+
+       if (!pfn_valid(pfn))
+               return -ENOMEM;
+
+       spin_lock_irqsave(&as->lock, flags);
+       __smmu_iommu_map_pfn(as, iova, pfn);
+       spin_unlock_irqrestore(&as->lock, flags);
+       return 0;
+}
+
+static size_t smmu_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+                              size_t bytes)
+{
+       struct smmu_as *as = domain->priv;
+       unsigned long flags;
+
+       dev_dbg(as->smmu->dev, "[%d] %08lx\n", as->asid, iova);
+
+       spin_lock_irqsave(&as->lock, flags);
+       __smmu_iommu_unmap(as, iova);
+       spin_unlock_irqrestore(&as->lock, flags);
+       return SMMU_PAGE_SIZE;
+}
+
+static phys_addr_t smmu_iommu_iova_to_phys(struct iommu_domain *domain,
+                                          unsigned long iova)
+{
+       struct smmu_as *as = domain->priv;
+       unsigned long *pte;
+       unsigned int *count;
+       struct page *page;
+       unsigned long pfn;
+       unsigned long flags;
+
+       spin_lock_irqsave(&as->lock, flags);
+
+       pte = locate_pte(as, iova, true, &page, &count);
+       pfn = *pte & SMMU_PFN_MASK;
+       WARN_ON(!pfn_valid(pfn));
+       dev_dbg(as->smmu->dev,
+               "iova:%08lx pfn:%08lx asid:%d\n", iova, pfn, as->asid);
+
+       spin_unlock_irqrestore(&as->lock, flags);
+       return PFN_PHYS(pfn);
+}
+
+static int smmu_iommu_domain_has_cap(struct iommu_domain *domain,
+                                    unsigned long cap)
+{
+       return 0;
+}
+
+static int smmu_iommu_attach_dev(struct iommu_domain *domain,
+                                struct device *dev)
+{
+       struct smmu_as *as = domain->priv;
+       struct smmu_device *smmu = as->smmu;
+       struct smmu_client *client, *c;
+       u32 map;
+       int err;
+
+       client = devm_kzalloc(smmu->dev, sizeof(*c), GFP_KERNEL);
+       if (!client)
+               return -ENOMEM;
+       client->dev = dev;
+       client->as = as;
+       map = (unsigned long)dev->platform_data;
+       if (!map)
+               return -EINVAL;
+
+       err = smmu_client_enable_hwgrp(client, map);
+       if (err)
+               goto err_hwgrp;
+
+       spin_lock(&as->client_lock);
+       list_for_each_entry(c, &as->client, list) {
+               if (c->dev == dev) {
+                       dev_err(smmu->dev,
+                               "%s is already attached\n", dev_name(c->dev));
+                       err = -EINVAL;
+                       goto err_client;
+               }
+       }
+       list_add(&client->list, &as->client);
+       spin_unlock(&as->client_lock);
+
+       /*
+        * Reserve "page zero" for AVP vectors using a common dummy
+        * page.
+        */
+       if (map & HWG_AVPC) {
+               struct page *page;
+
+               page = as->smmu->avp_vector_page;
+               __smmu_iommu_map_pfn(as, 0, page_to_pfn(page));
+
+               pr_info("Reserve \"page zero\" for AVP vectors using a common dummy\n");
+       }
+
+       dev_dbg(smmu->dev, "%s is attached\n", dev_name(c->dev));
+       return 0;
+
+err_client:
+       smmu_client_disable_hwgrp(client);
+       spin_unlock(&as->client_lock);
+err_hwgrp:
+       devm_kfree(smmu->dev, client);
+       return err;
+}
+
+static void smmu_iommu_detach_dev(struct iommu_domain *domain,
+                                 struct device *dev)
+{
+       struct smmu_as *as = domain->priv;
+       struct smmu_device *smmu = as->smmu;
+       struct smmu_client *c;
+
+       spin_lock(&as->client_lock);
+
+       list_for_each_entry(c, &as->client, list) {
+               if (c->dev == dev) {
+                       smmu_client_disable_hwgrp(c);
+                       list_del(&c->list);
+                       devm_kfree(smmu->dev, c);
+                       c->as = NULL;
+                       dev_dbg(smmu->dev,
+                               "%s is detached\n", dev_name(c->dev));
+                       goto out;
+               }
+       }
+       dev_err(smmu->dev, "Couldn't find %s\n", dev_name(c->dev));
+out:
+       spin_unlock(&as->client_lock);
+}
+
+static int smmu_iommu_domain_init(struct iommu_domain *domain)
+{
+       int i;
+       unsigned long flags;
+       struct smmu_as *as;
+       struct smmu_device *smmu = smmu_handle;
+
+       /* Look for a free AS with lock held */
+       for  (i = 0; i < smmu->num_as; i++) {
+               struct smmu_as *tmp = &smmu->as[i];
+
+               spin_lock_irqsave(&tmp->lock, flags);
+               if (!tmp->pdir_page) {
+                       as = tmp;
+                       goto found;
+               }
+               spin_unlock_irqrestore(&tmp->lock, flags);
+       }
+       dev_err(smmu->dev, "no free AS\n");
+       return -ENODEV;
+
+found:
+       if (alloc_pdir(as) < 0)
+               goto err_alloc_pdir;
+
+       spin_lock(&smmu->lock);
+
+       /* Update PDIR register */
+       smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID);
+       smmu_write(smmu,
+                  SMMU_MK_PDIR(as->pdir_page, as->pdir_attr), SMMU_PTB_DATA);
+       FLUSH_SMMU_REGS(smmu);
+
+       spin_unlock(&smmu->lock);
+
+       spin_unlock_irqrestore(&as->lock, flags);
+       domain->priv = as;
+
+       dev_dbg(smmu->dev, "smmu_as@%p\n", as);
+       return 0;
+
+err_alloc_pdir:
+       spin_unlock_irqrestore(&as->lock, flags);
+       return -ENODEV;
+}
+
+static void smmu_iommu_domain_destroy(struct iommu_domain *domain)
+{
+       struct smmu_as *as = domain->priv;
+       struct smmu_device *smmu = as->smmu;
+       unsigned long flags;
+
+       spin_lock_irqsave(&as->lock, flags);
+
+       if (as->pdir_page) {
+               spin_lock(&smmu->lock);
+               smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID);
+               smmu_write(smmu, SMMU_PTB_DATA_RESET_VAL, SMMU_PTB_DATA);
+               FLUSH_SMMU_REGS(smmu);
+               spin_unlock(&smmu->lock);
+
+               free_pdir(as);
+       }
+
+       if (!list_empty(&as->client)) {
+               struct smmu_client *c;
+
+               list_for_each_entry(c, &as->client, list)
+                       smmu_iommu_detach_dev(domain, c->dev);
+       }
+
+       spin_unlock_irqrestore(&as->lock, flags);
+
+       domain->priv = NULL;
+       dev_dbg(smmu->dev, "smmu_as@%p\n", as);
+}
+
+static struct iommu_ops smmu_iommu_ops = {
+       .domain_init    = smmu_iommu_domain_init,
+       .domain_destroy = smmu_iommu_domain_destroy,
+       .attach_dev     = smmu_iommu_attach_dev,
+       .detach_dev     = smmu_iommu_detach_dev,
+       .map            = smmu_iommu_map,
+       .unmap          = smmu_iommu_unmap,
+       .iova_to_phys   = smmu_iommu_iova_to_phys,
+       .domain_has_cap = smmu_iommu_domain_has_cap,
+       .pgsize_bitmap  = SMMU_IOMMU_PGSIZES,
+};
+
+static int tegra_smmu_suspend(struct device *dev)
+{
+       struct smmu_device *smmu = dev_get_drvdata(dev);
+
+       smmu->translation_enable_0 = smmu_read(smmu, SMMU_TRANSLATION_ENABLE_0);
+       smmu->translation_enable_1 = smmu_read(smmu, SMMU_TRANSLATION_ENABLE_1);
+       smmu->translation_enable_2 = smmu_read(smmu, SMMU_TRANSLATION_ENABLE_2);
+       smmu->asid_security = smmu_read(smmu, SMMU_ASID_SECURITY);
+       return 0;
+}
+
+static int tegra_smmu_resume(struct device *dev)
+{
+       struct smmu_device *smmu = dev_get_drvdata(dev);
+       unsigned long flags;
+
+       spin_lock_irqsave(&smmu->lock, flags);
+       smmu_setup_regs(smmu);
+       spin_unlock_irqrestore(&smmu->lock, flags);
+       return 0;
+}
+
+static int tegra_smmu_probe(struct platform_device *pdev)
+{
+       struct smmu_device *smmu;
+       struct resource *regs, *regs2, *window;
+       struct device *dev = &pdev->dev;
+       int i, err = 0;
+
+       if (smmu_handle)
+               return -EIO;
+
+       BUILD_BUG_ON(PAGE_SHIFT != SMMU_PAGE_SHIFT);
+
+       regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       regs2 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       window = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+       if (!regs || !regs2 || !window) {
+               dev_err(dev, "No SMMU resources\n");
+               return -ENODEV;
+       }
+
+       smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
+       if (!smmu) {
+               dev_err(dev, "failed to allocate smmu_device\n");
+               return -ENOMEM;
+       }
+
+       smmu->dev = dev;
+       smmu->num_as = SMMU_NUM_ASIDS;
+       smmu->iovmm_base = (unsigned long)window->start;
+       smmu->page_count = resource_size(window) >> SMMU_PAGE_SHIFT;
+       smmu->regs = devm_ioremap(dev, regs->start, resource_size(regs));
+       smmu->regs_ahbarb = devm_ioremap(dev, regs2->start,
+                                        resource_size(regs2));
+       if (!smmu->regs || !smmu->regs_ahbarb) {
+               dev_err(dev, "failed to remap SMMU registers\n");
+               err = -ENXIO;
+               goto fail;
+       }
+
+       smmu->translation_enable_0 = ~0;
+       smmu->translation_enable_1 = ~0;
+       smmu->translation_enable_2 = ~0;
+       smmu->asid_security = 0;
+
+       smmu->as = devm_kzalloc(dev,
+                       sizeof(smmu->as[0]) * smmu->num_as, GFP_KERNEL);
+       if (!smmu->as) {
+               dev_err(dev, "failed to allocate smmu_as\n");
+               err = -ENOMEM;
+               goto fail;
+       }
+
+       for (i = 0; i < smmu->num_as; i++) {
+               struct smmu_as *as = &smmu->as[i];
+
+               as->smmu = smmu;
+               as->asid = i;
+               as->pdir_attr = _PDIR_ATTR;
+               as->pde_attr = _PDE_ATTR;
+               as->pte_attr = _PTE_ATTR;
+
+               spin_lock_init(&as->lock);
+               INIT_LIST_HEAD(&as->client);
+       }
+       spin_lock_init(&smmu->lock);
+       smmu_setup_regs(smmu);
+       platform_set_drvdata(pdev, smmu);
+
+       smmu->avp_vector_page = alloc_page(GFP_KERNEL);
+       if (!smmu->avp_vector_page)
+               goto fail;
+
+       smmu_handle = smmu;
+       return 0;
+
+fail:
+       if (smmu->avp_vector_page)
+               __free_page(smmu->avp_vector_page);
+       if (smmu->regs)
+               devm_iounmap(dev, smmu->regs);
+       if (smmu->regs_ahbarb)
+               devm_iounmap(dev, smmu->regs_ahbarb);
+       if (smmu && smmu->as) {
+               for (i = 0; i < smmu->num_as; i++) {
+                       if (smmu->as[i].pdir_page) {
+                               ClearPageReserved(smmu->as[i].pdir_page);
+                               __free_page(smmu->as[i].pdir_page);
+                       }
+               }
+               devm_kfree(dev, smmu->as);
+       }
+       devm_kfree(dev, smmu);
+       return err;
+}
+
+static int tegra_smmu_remove(struct platform_device *pdev)
+{
+       struct smmu_device *smmu = platform_get_drvdata(pdev);
+       struct device *dev = smmu->dev;
+
+       smmu_write(smmu, SMMU_CONFIG_DISABLE, SMMU_CONFIG);
+       platform_set_drvdata(pdev, NULL);
+       if (smmu->as) {
+               int i;
+
+               for (i = 0; i < smmu->num_as; i++)
+                       free_pdir(&smmu->as[i]);
+               devm_kfree(dev, smmu->as);
+       }
+       if (smmu->avp_vector_page)
+               __free_page(smmu->avp_vector_page);
+       if (smmu->regs)
+               devm_iounmap(dev, smmu->regs);
+       if (smmu->regs_ahbarb)
+               devm_iounmap(dev, smmu->regs_ahbarb);
+       devm_kfree(dev, smmu);
+       smmu_handle = NULL;
+       return 0;
+}
+
+const struct dev_pm_ops tegra_smmu_pm_ops = {
+       .suspend        = tegra_smmu_suspend,
+       .resume         = tegra_smmu_resume,
+};
+
+static struct platform_driver tegra_smmu_driver = {
+       .probe          = tegra_smmu_probe,
+       .remove         = tegra_smmu_remove,
+       .driver = {
+               .owner  = THIS_MODULE,
+               .name   = "tegra-smmu",
+               .pm     = &tegra_smmu_pm_ops,
+       },
+};
+
+static int __devinit tegra_smmu_init(void)
+{
+       bus_set_iommu(&platform_bus_type, &smmu_iommu_ops);
+       return platform_driver_register(&tegra_smmu_driver);
+}
+
+static void __exit tegra_smmu_exit(void)
+{
+       platform_driver_unregister(&tegra_smmu_driver);
+}
+
+subsys_initcall(tegra_smmu_init);
+module_exit(tegra_smmu_exit);
+
+MODULE_DESCRIPTION("IOMMU API for SMMU in Tegra30");
+MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>");
+MODULE_LICENSE("GPL v2");
index a7dc4672d996c5e67bb112371e92bfd634f2d1c7..a5c591ffe395d01b6e7e09f927c03f3048688620 100644 (file)
@@ -346,7 +346,7 @@ static int mpt_remove_dead_ioc_func(void *arg)
        if ((pdev == NULL))
                return -1;
 
-       pci_remove_bus_device(pdev);
+       pci_stop_and_remove_bus_device(pdev);
        return 0;
 }
 
index 115749f20f9e5402d62785950092b05a4ec1206e..0fde9fc7d2e5415d032d57104694c9ef0e61f763 100644 (file)
@@ -945,12 +945,8 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
                goto out_free;
 
        err = -ENOMEM;
-       ubi->peb_buf1 = vmalloc(ubi->peb_size);
-       if (!ubi->peb_buf1)
-               goto out_free;
-
-       ubi->peb_buf2 = vmalloc(ubi->peb_size);
-       if (!ubi->peb_buf2)
+       ubi->peb_buf = vmalloc(ubi->peb_size);
+       if (!ubi->peb_buf)
                goto out_free;
 
        err = ubi_debugging_init_dev(ubi);
@@ -1029,8 +1025,7 @@ out_detach:
 out_debugging:
        ubi_debugging_exit_dev(ubi);
 out_free:
-       vfree(ubi->peb_buf1);
-       vfree(ubi->peb_buf2);
+       vfree(ubi->peb_buf);
        if (ref)
                put_device(&ubi->dev);
        else
@@ -1101,8 +1096,7 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
        vfree(ubi->vtbl);
        put_mtd_device(ubi->mtd);
        ubi_debugging_exit_dev(ubi);
-       vfree(ubi->peb_buf1);
-       vfree(ubi->peb_buf2);
+       vfree(ubi->peb_buf);
        ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num);
        put_device(&ubi->dev);
        return 0;
index cd26da8ad225b882d918188e46c6c2d38cccc803..2455d620d96b8a4c1ecc86def50d1bd580766ece 100644 (file)
@@ -529,18 +529,18 @@ retry:
 
        data_size = offset + len;
        mutex_lock(&ubi->buf_mutex);
-       memset(ubi->peb_buf1 + offset, 0xFF, len);
+       memset(ubi->peb_buf + offset, 0xFF, len);
 
        /* Read everything before the area where the write failure happened */
        if (offset > 0) {
-               err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset);
+               err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, offset);
                if (err && err != UBI_IO_BITFLIPS)
                        goto out_unlock;
        }
 
-       memcpy(ubi->peb_buf1 + offset, buf, len);
+       memcpy(ubi->peb_buf + offset, buf, len);
 
-       err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size);
+       err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size);
        if (err) {
                mutex_unlock(&ubi->buf_mutex);
                goto write_error;
@@ -979,7 +979,7 @@ static int is_error_sane(int err)
  * physical eraseblock @to. The @vid_hdr buffer may be changed by this
  * function. Returns:
  *   o %0 in case of success;
- *   o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_CANCEL_BITFLIPS, etc;
+ *   o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_TARGET_BITFLIPS, etc;
  *   o a negative error code in case of failure.
  */
 int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
@@ -1053,13 +1053,13 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 
        /*
         * OK, now the LEB is locked and we can safely start moving it. Since
-        * this function utilizes the @ubi->peb_buf1 buffer which is shared
+        * this function utilizes the @ubi->peb_buf buffer which is shared
         * with some other functions - we lock the buffer by taking the
         * @ubi->buf_mutex.
         */
        mutex_lock(&ubi->buf_mutex);
        dbg_wl("read %d bytes of data", aldata_size);
-       err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size);
+       err = ubi_io_read_data(ubi, ubi->peb_buf, from, 0, aldata_size);
        if (err && err != UBI_IO_BITFLIPS) {
                ubi_warn("error %d while reading data from PEB %d",
                         err, from);
@@ -1079,10 +1079,10 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
         */
        if (vid_hdr->vol_type == UBI_VID_DYNAMIC)
                aldata_size = data_size =
-                       ubi_calc_data_len(ubi, ubi->peb_buf1, data_size);
+                       ubi_calc_data_len(ubi, ubi->peb_buf, data_size);
 
        cond_resched();
-       crc = crc32(UBI_CRC32_INIT, ubi->peb_buf1, data_size);
+       crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size);
        cond_resched();
 
        /*
@@ -1116,12 +1116,12 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
                        if (is_error_sane(err))
                                err = MOVE_TARGET_RD_ERR;
                } else
-                       err = MOVE_CANCEL_BITFLIPS;
+                       err = MOVE_TARGET_BITFLIPS;
                goto out_unlock_buf;
        }
 
        if (data_size > 0) {
-               err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size);
+               err = ubi_io_write_data(ubi, ubi->peb_buf, to, 0, aldata_size);
                if (err) {
                        if (err == -EIO)
                                err = MOVE_TARGET_WR_ERR;
@@ -1134,8 +1134,8 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
                 * We've written the data and are going to read it back to make
                 * sure it was written correctly.
                 */
-
-               err = ubi_io_read_data(ubi, ubi->peb_buf2, to, 0, aldata_size);
+               memset(ubi->peb_buf, 0xFF, aldata_size);
+               err = ubi_io_read_data(ubi, ubi->peb_buf, to, 0, aldata_size);
                if (err) {
                        if (err != UBI_IO_BITFLIPS) {
                                ubi_warn("error %d while reading data back "
@@ -1143,13 +1143,13 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
                                if (is_error_sane(err))
                                        err = MOVE_TARGET_RD_ERR;
                        } else
-                               err = MOVE_CANCEL_BITFLIPS;
+                               err = MOVE_TARGET_BITFLIPS;
                        goto out_unlock_buf;
                }
 
                cond_resched();
 
-               if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) {
+               if (crc != crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size)) {
                        ubi_warn("read data back from PEB %d and it is "
                                 "different", to);
                        err = -EINVAL;
index 5cde4e5ca3e542a38945be3d12f106b4c3853e80..43f1a0011a55cad9a9d876cff0241ecda9876ba3 100644 (file)
@@ -431,11 +431,11 @@ static int torture_peb(struct ubi_device *ubi, int pnum)
                        goto out;
 
                /* Make sure the PEB contains only 0xFF bytes */
-               err = ubi_io_read(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size);
+               err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size);
                if (err)
                        goto out;
 
-               err = ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size);
+               err = ubi_check_pattern(ubi->peb_buf, 0xFF, ubi->peb_size);
                if (err == 0) {
                        ubi_err("erased PEB %d, but a non-0xFF byte found",
                                pnum);
@@ -444,17 +444,17 @@ static int torture_peb(struct ubi_device *ubi, int pnum)
                }
 
                /* Write a pattern and check it */
-               memset(ubi->peb_buf1, patterns[i], ubi->peb_size);
-               err = ubi_io_write(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size);
+               memset(ubi->peb_buf, patterns[i], ubi->peb_size);
+               err = ubi_io_write(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size);
                if (err)
                        goto out;
 
-               memset(ubi->peb_buf1, ~patterns[i], ubi->peb_size);
-               err = ubi_io_read(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size);
+               memset(ubi->peb_buf, ~patterns[i], ubi->peb_size);
+               err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size);
                if (err)
                        goto out;
 
-               err = ubi_check_pattern(ubi->peb_buf1, patterns[i],
+               err = ubi_check_pattern(ubi->peb_buf, patterns[i],
                                        ubi->peb_size);
                if (err == 0) {
                        ubi_err("pattern %x checking failed for PEB %d",
index 0cb17d936b5a46581ab6360c1b8307211b3c01c0..12c43b44f81578a4b78369e5140b66d34d26a188 100644 (file)
@@ -789,9 +789,9 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr,
        int err;
 
        mutex_lock(&ubi->buf_mutex);
-       memset(ubi->peb_buf1, 0x00, ubi->leb_size);
+       memset(ubi->peb_buf, 0x00, ubi->leb_size);
 
-       err = ubi_io_read(ubi, ubi->peb_buf1, pnum, ubi->leb_start,
+       err = ubi_io_read(ubi, ubi->peb_buf, pnum, ubi->leb_start,
                          ubi->leb_size);
        if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) {
                /*
@@ -808,7 +808,7 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr,
        if (err)
                goto out_unlock;
 
-       if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size))
+       if (ubi_check_pattern(ubi->peb_buf, 0xFF, ubi->leb_size))
                goto out_unlock;
 
        ubi_err("PEB %d contains corrupted VID header, and the data does not "
@@ -818,7 +818,7 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr,
        dbg_msg("hexdump of PEB %d offset %d, length %d",
                pnum, ubi->leb_start, ubi->leb_size);
        ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
-                              ubi->peb_buf1, ubi->leb_size, 1);
+                              ubi->peb_buf, ubi->leb_size, 1);
        err = 1;
 
 out_unlock:
@@ -1174,7 +1174,7 @@ struct ubi_scan_info *ubi_scan(struct ubi_device *ubi)
 
        ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
        if (!ech)
-               goto out_slab;
+               goto out_si;
 
        vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
        if (!vidh)
@@ -1235,8 +1235,6 @@ out_vidh:
        ubi_free_vid_hdr(ubi, vidh);
 out_ech:
        kfree(ech);
-out_slab:
-       kmem_cache_destroy(si->scan_leb_slab);
 out_si:
        ubi_scan_destroy_si(si);
        return ERR_PTR(err);
@@ -1325,7 +1323,9 @@ void ubi_scan_destroy_si(struct ubi_scan_info *si)
                }
        }
 
-       kmem_cache_destroy(si->scan_leb_slab);
+       if (si->scan_leb_slab)
+               kmem_cache_destroy(si->scan_leb_slab);
+
        kfree(si);
 }
 
index d51d75d344462c1ec3b0ae657b423594f5857517..b162790790a99d58f5abe69a1eb4ea4030c7cf6a 100644 (file)
@@ -118,7 +118,7 @@ enum {
  *                     PEB
  * MOVE_TARGET_WR_ERR: canceled because there was a write error to the target
  *                     PEB
- * MOVE_CANCEL_BITFLIPS: canceled because a bit-flip was detected in the
+ * MOVE_TARGET_BITFLIPS: canceled because a bit-flip was detected in the
  *                       target PEB
  * MOVE_RETRY: retry scrubbing the PEB
  */
@@ -127,7 +127,7 @@ enum {
        MOVE_SOURCE_RD_ERR,
        MOVE_TARGET_RD_ERR,
        MOVE_TARGET_WR_ERR,
-       MOVE_CANCEL_BITFLIPS,
+       MOVE_TARGET_BITFLIPS,
        MOVE_RETRY,
 };
 
@@ -387,9 +387,8 @@ struct ubi_wl_entry;
  *                  time (MTD write buffer size)
  * @mtd: MTD device descriptor
  *
- * @peb_buf1: a buffer of PEB size used for different purposes
- * @peb_buf2: another buffer of PEB size used for different purposes
- * @buf_mutex: protects @peb_buf1 and @peb_buf2
+ * @peb_buf: a buffer of PEB size used for different purposes
+ * @buf_mutex: protects @peb_buf
  * @ckvol_mutex: serializes static volume checking when opening
  *
  * @dbg: debugging information for this UBI device
@@ -471,8 +470,7 @@ struct ubi_device {
        int max_write_size;
        struct mtd_info *mtd;
 
-       void *peb_buf1;
-       void *peb_buf2;
+       void *peb_buf;
        struct mutex buf_mutex;
        struct mutex ckvol_mutex;
 
index 0696e36b053939e878d0215aefe4f4598ba61ac3..7c1a9bf8ac869592e2697b1217bc9261197dc11b 100644 (file)
@@ -350,18 +350,19 @@ static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e)
 /**
  * find_wl_entry - find wear-leveling entry closest to certain erase counter.
  * @root: the RB-tree where to look for
- * @max: highest possible erase counter
+ * @diff: maximum possible difference from the smallest erase counter
  *
  * This function looks for a wear leveling entry with erase counter closest to
- * @max and less than @max.
+ * min + @diff, where min is the smallest erase counter.
  */
-static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max)
+static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int diff)
 {
        struct rb_node *p;
        struct ubi_wl_entry *e;
+       int max;
 
        e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
-       max += e->ec;
+       max = e->ec + diff;
 
        p = root->rb_node;
        while (p) {
@@ -389,7 +390,7 @@ static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max)
  */
 int ubi_wl_get_peb(struct ubi_device *ubi, int dtype)
 {
-       int err, medium_ec;
+       int err;
        struct ubi_wl_entry *e, *first, *last;
 
        ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM ||
@@ -427,7 +428,7 @@ retry:
                 * For unknown data we pick a physical eraseblock with medium
                 * erase counter. But we by no means can pick a physical
                 * eraseblock with erase counter greater or equivalent than the
-                * lowest erase counter plus %WL_FREE_MAX_DIFF.
+                * lowest erase counter plus %WL_FREE_MAX_DIFF/2.
                 */
                first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry,
                                        u.rb);
@@ -436,10 +437,8 @@ retry:
                if (last->ec - first->ec < WL_FREE_MAX_DIFF)
                        e = rb_entry(ubi->free.rb_node,
                                        struct ubi_wl_entry, u.rb);
-               else {
-                       medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
-                       e = find_wl_entry(&ubi->free, medium_ec);
-               }
+               else
+                       e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF/2);
                break;
        case UBI_SHORTTERM:
                /*
@@ -799,7 +798,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
                        scrubbing = 1;
                        goto out_not_moved;
                }
-               if (err == MOVE_CANCEL_BITFLIPS || err == MOVE_TARGET_WR_ERR ||
+               if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR ||
                    err == MOVE_TARGET_RD_ERR) {
                        /*
                         * Target PEB had bit-flips or write error - torture it.
index 0730203a19f21c6019499c9b487a1f87fabd984e..b920d829692aa35394962f87910678bfe1d4cbb7 100644 (file)
@@ -2573,12 +2573,16 @@ re_arm:
 static int bond_has_this_ip(struct bonding *bond, __be32 ip)
 {
        struct vlan_entry *vlan;
+       struct net_device *vlan_dev;
 
-       if (ip == bond->master_ip)
+       if (ip == bond_confirm_addr(bond->dev, 0, ip))
                return 1;
 
        list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
-               if (ip == vlan->vlan_ip)
+               rcu_read_lock();
+               vlan_dev = __vlan_find_dev_deep(bond->dev, vlan->vlan_id);
+               rcu_read_unlock();
+               if (vlan_dev && ip == bond_confirm_addr(vlan_dev, 0, ip))
                        return 1;
        }
 
@@ -2620,17 +2624,19 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
        int i, vlan_id;
        __be32 *targets = bond->params.arp_targets;
        struct vlan_entry *vlan;
-       struct net_device *vlan_dev;
+       struct net_device *vlan_dev = NULL;
        struct rtable *rt;
 
        for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) {
+               __be32 addr;
                if (!targets[i])
                        break;
                pr_debug("basa: target %x\n", targets[i]);
                if (!bond_vlan_used(bond)) {
                        pr_debug("basa: empty vlan: arp_send\n");
+                       addr = bond_confirm_addr(bond->dev, targets[i], 0);
                        bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
-                                     bond->master_ip, 0);
+                                     addr, 0);
                        continue;
                }
 
@@ -2655,8 +2661,9 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
                if (rt->dst.dev == bond->dev) {
                        ip_rt_put(rt);
                        pr_debug("basa: rtdev == bond->dev: arp_send\n");
+                       addr = bond_confirm_addr(bond->dev, targets[i], 0);
                        bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
-                                     bond->master_ip, 0);
+                                     addr, 0);
                        continue;
                }
 
@@ -2674,10 +2681,11 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
                        }
                }
 
-               if (vlan_id) {
+               if (vlan_id && vlan_dev) {
                        ip_rt_put(rt);
+                       addr = bond_confirm_addr(vlan_dev, targets[i], 0);
                        bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
-                                     vlan->vlan_ip, vlan_id);
+                                     addr, vlan_id);
                        continue;
                }
 
@@ -3299,68 +3307,10 @@ static int bond_netdev_event(struct notifier_block *this,
        return NOTIFY_DONE;
 }
 
-/*
- * bond_inetaddr_event: handle inetaddr notifier chain events.
- *
- * We keep track of device IPs primarily to use as source addresses in
- * ARP monitor probes (rather than spewing out broadcasts all the time).
- *
- * We track one IP for the main device (if it has one), plus one per VLAN.
- */
-static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
-       struct in_ifaddr *ifa = ptr;
-       struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev;
-       struct bond_net *bn = net_generic(dev_net(event_dev), bond_net_id);
-       struct bonding *bond;
-       struct vlan_entry *vlan;
-
-       /* we only care about primary address */
-       if(ifa->ifa_flags & IFA_F_SECONDARY)
-               return NOTIFY_DONE;
-
-       list_for_each_entry(bond, &bn->dev_list, bond_list) {
-               if (bond->dev == event_dev) {
-                       switch (event) {
-                       case NETDEV_UP:
-                               bond->master_ip = ifa->ifa_local;
-                               return NOTIFY_OK;
-                       case NETDEV_DOWN:
-                               bond->master_ip = 0;
-                               return NOTIFY_OK;
-                       default:
-                               return NOTIFY_DONE;
-                       }
-               }
-
-               list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
-                       vlan_dev = __vlan_find_dev_deep(bond->dev,
-                                                       vlan->vlan_id);
-                       if (vlan_dev == event_dev) {
-                               switch (event) {
-                               case NETDEV_UP:
-                                       vlan->vlan_ip = ifa->ifa_local;
-                                       return NOTIFY_OK;
-                               case NETDEV_DOWN:
-                                       vlan->vlan_ip = 0;
-                                       return NOTIFY_OK;
-                               default:
-                                       return NOTIFY_DONE;
-                               }
-                       }
-               }
-       }
-       return NOTIFY_DONE;
-}
-
 static struct notifier_block bond_netdev_notifier = {
        .notifier_call = bond_netdev_event,
 };
 
-static struct notifier_block bond_inetaddr_notifier = {
-       .notifier_call = bond_inetaddr_event,
-};
-
 /*---------------------------- Hashing Policies -----------------------------*/
 
 /*
@@ -4929,7 +4879,6 @@ static int __init bonding_init(void)
        }
 
        register_netdevice_notifier(&bond_netdev_notifier);
-       register_inetaddr_notifier(&bond_inetaddr_notifier);
 out:
        return res;
 err:
@@ -4943,7 +4892,6 @@ err_link:
 static void __exit bonding_exit(void)
 {
        unregister_netdevice_notifier(&bond_netdev_notifier);
-       unregister_inetaddr_notifier(&bond_inetaddr_notifier);
 
        bond_destroy_debugfs();
 
index 1aecc37e5b4dcd041cefecc1fb3e7813ab39d8b0..9f2bae6616d3f455ed097f927ba1ca9fd27be945 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/cpumask.h>
 #include <linux/in6.h>
 #include <linux/netpoll.h>
+#include <linux/inetdevice.h>
 #include "bond_3ad.h"
 #include "bond_alb.h"
 
@@ -166,7 +167,6 @@ struct bond_parm_tbl {
 
 struct vlan_entry {
        struct list_head vlan_list;
-       __be32 vlan_ip;
        unsigned short vlan_id;
 };
 
@@ -232,7 +232,6 @@ struct bonding {
        struct   list_head bond_list;
        struct   netdev_hw_addr_list mc_list;
        int      (*xmit_hash_policy)(struct sk_buff *, int);
-       __be32   master_ip;
        u16      rr_tx_counter;
        struct   ad_bond_info ad_info;
        struct   alb_bond_info alb_info;
@@ -378,6 +377,21 @@ static inline bool bond_is_slave_inactive(struct slave *slave)
        return slave->inactive;
 }
 
+static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local)
+{
+       struct in_device *in_dev;
+       __be32 addr = 0;
+
+       rcu_read_lock();
+       in_dev = __in_dev_get_rcu(dev);
+
+       if (in_dev)
+               addr = inet_confirm_addr(in_dev, dst, local, RT_SCOPE_HOST);
+
+       rcu_read_unlock();
+       return addr;
+}
+
 struct bond_net;
 
 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr);
index 7b65716b8734451356d9f49584db8bdb55615776..c95e7b5e2b85589db86db943c3c77b697394312b 100644 (file)
@@ -47,6 +47,7 @@
 #include "bnx2x/bnx2x_hsi.h"
 #include "../../../scsi/bnx2i/57xx_iscsi_constants.h"
 #include "../../../scsi/bnx2i/57xx_iscsi_hsi.h"
+#include "../../../scsi/bnx2fc/bnx2fc_constants.h"
 #include "cnic.h"
 #include "cnic_defs.h"
 
@@ -2547,7 +2548,7 @@ static void cnic_bnx2x_kwqe_err(struct cnic_dev *dev, struct kwqe *kwqe)
                }
                kcqe.kcqe_op_flag = kcqe_op << KCQE_FLAGS_OPCODE_SHIFT;
                kcqe.kcqe_op_flag |= KCQE_FLAGS_LAYER_MASK_L5_FCOE;
-               kcqe.kcqe_info1 = FCOE_KCQE_COMPLETION_STATUS_NIC_ERROR;
+               kcqe.kcqe_info1 = FCOE_KCQE_COMPLETION_STATUS_PARITY_ERROR;
                kcqe.kcqe_info2 = cid;
                kcqe.kcqe_info0 = l5_cid;
 
@@ -2558,7 +2559,7 @@ static void cnic_bnx2x_kwqe_err(struct cnic_dev *dev, struct kwqe *kwqe)
 
                kcqe.kcqe_op_flag = (opcode + 0x10) << KCQE_FLAGS_OPCODE_SHIFT;
                kcqe.kcqe_op_flag |= KCQE_FLAGS_LAYER_MASK_L5_ISCSI;
-               kcqe.kcqe_info1 = ISCSI_KCQE_COMPLETION_STATUS_NIC_ERROR;
+               kcqe.kcqe_info1 = ISCSI_KCQE_COMPLETION_STATUS_PARITY_ERR;
                kcqe.kcqe_info2 = cid;
                cnic_get_l5_cid(cp, BNX2X_SW_CID(cid), &kcqe.kcqe_info0);
 
@@ -2577,7 +2578,7 @@ static void cnic_bnx2x_kwqe_err(struct cnic_dev *dev, struct kwqe *kwqe)
 
                kcqe.kcqe_op_flag = (kcqe_op << KCQE_FLAGS_OPCODE_SHIFT) |
                                    KCQE_FLAGS_LAYER_MASK_L4;
-               l4kcqe->status = L4_KCQE_COMPLETION_STATUS_NIC_ERROR;
+               l4kcqe->status = L4_KCQE_COMPLETION_STATUS_PARITY_ERROR;
                l4kcqe->cid = cid;
                cnic_get_l5_cid(cp, BNX2X_SW_CID(cid), &l4kcqe->conn_id);
        } else {
@@ -3933,7 +3934,8 @@ static void cnic_cm_process_kcqe(struct cnic_dev *dev, struct kcqe *kcqe)
        case L4_KCQE_OPCODE_VALUE_CONNECT_COMPLETE:
                if (l4kcqe->status == 0)
                        set_bit(SK_F_OFFLD_COMPLETE, &csk->flags);
-               else if (l4kcqe->status == L4_KCQE_COMPLETION_STATUS_NIC_ERROR)
+               else if (l4kcqe->status ==
+                        L4_KCQE_COMPLETION_STATUS_PARITY_ERROR)
                        set_bit(SK_F_HW_ERR, &csk->flags);
 
                smp_mb__before_clear_bit();
@@ -3946,7 +3948,7 @@ static void cnic_cm_process_kcqe(struct cnic_dev *dev, struct kcqe *kcqe)
        case L4_KCQE_OPCODE_VALUE_RESET_COMP:
        case L5CM_RAMROD_CMD_ID_SEARCHER_DELETE:
        case L5CM_RAMROD_CMD_ID_TERMINATE_OFFLOAD:
-               if (l4kcqe->status == L4_KCQE_COMPLETION_STATUS_NIC_ERROR)
+               if (l4kcqe->status == L4_KCQE_COMPLETION_STATUS_PARITY_ERROR)
                        set_bit(SK_F_HW_ERR, &csk->flags);
 
                cp->close_conn(csk, opcode);
index 06ca00266d70fee861473830cfda954995cce27e..382c98b0cc0c6feb959cd0f4e9dca7a078159c84 100644 (file)
 #define L5CM_RAMROD_CMD_ID_SEARCHER_DELETE     (L5CM_RAMROD_CMD_ID_BASE + 14)
 #define L5CM_RAMROD_CMD_ID_TERMINATE_OFFLOAD   (L5CM_RAMROD_CMD_ID_BASE + 15)
 
-#define FCOE_KCQE_OPCODE_INIT_FUNC                     (0x10)
-#define FCOE_KCQE_OPCODE_DESTROY_FUNC                  (0x11)
-#define FCOE_KCQE_OPCODE_STAT_FUNC                     (0x12)
-#define FCOE_KCQE_OPCODE_OFFLOAD_CONN                  (0x15)
-#define FCOE_KCQE_OPCODE_ENABLE_CONN                   (0x16)
-#define FCOE_KCQE_OPCODE_DISABLE_CONN                  (0x17)
-#define FCOE_KCQE_OPCODE_DESTROY_CONN                  (0x18)
-#define FCOE_KCQE_OPCODE_CQ_EVENT_NOTIFICATION  (0x20)
-#define FCOE_KCQE_OPCODE_FCOE_ERROR                            (0x21)
-
 #define FCOE_RAMROD_CMD_ID_INIT_FUNC           (FCOE_KCQE_OPCODE_INIT_FUNC)
 #define FCOE_RAMROD_CMD_ID_DESTROY_FUNC                (FCOE_KCQE_OPCODE_DESTROY_FUNC)
 #define FCOE_RAMROD_CMD_ID_STAT_FUNC           (FCOE_KCQE_OPCODE_STAT_FUNC)
 #define FCOE_RAMROD_CMD_ID_DESTROY_CONN                (FCOE_KCQE_OPCODE_DESTROY_CONN)
 #define FCOE_RAMROD_CMD_ID_TERMINATE_CONN      (0x81)
 
-#define FCOE_KWQE_OPCODE_INIT1                  (0)
-#define FCOE_KWQE_OPCODE_INIT2                  (1)
-#define FCOE_KWQE_OPCODE_INIT3                  (2)
-#define FCOE_KWQE_OPCODE_OFFLOAD_CONN1  (3)
-#define FCOE_KWQE_OPCODE_OFFLOAD_CONN2  (4)
-#define FCOE_KWQE_OPCODE_OFFLOAD_CONN3  (5)
-#define FCOE_KWQE_OPCODE_OFFLOAD_CONN4  (6)
-#define FCOE_KWQE_OPCODE_ENABLE_CONN   (7)
-#define FCOE_KWQE_OPCODE_DISABLE_CONN  (8)
-#define FCOE_KWQE_OPCODE_DESTROY_CONN  (9)
-#define FCOE_KWQE_OPCODE_DESTROY               (10)
-#define FCOE_KWQE_OPCODE_STAT                  (11)
-
-#define FCOE_KCQE_COMPLETION_STATUS_ERROR      (0x1)
-#define FCOE_KCQE_COMPLETION_STATUS_CTX_ALLOC_FAILURE  (0x3)
-#define FCOE_KCQE_COMPLETION_STATUS_NIC_ERROR  (0x5)
-
 /* KCQ (kernel completion queue) response op codes */
 #define L4_KCQE_OPCODE_VALUE_CLOSE_COMP             (53)
 #define L4_KCQE_OPCODE_VALUE_RESET_COMP             (54)
@@ -87,6 +60,7 @@
 /* KCQ (kernel completion queue) completion status */
 #define L4_KCQE_COMPLETION_STATUS_SUCCESS           (0)
 #define L4_KCQE_COMPLETION_STATUS_NIC_ERROR         (4)
+#define L4_KCQE_COMPLETION_STATUS_PARITY_ERROR     (0x81)
 #define L4_KCQE_COMPLETION_STATUS_TIMEOUT           (0x93)
 
 #define L4_KCQE_COMPLETION_STATUS_CTX_ALLOC_FAIL    (0x83)
index 60deb84d36bd651d0f09d08cc5792165dfdd0674..289274e546beea4346181a2252794627eba5fd7e 100644 (file)
@@ -12,8 +12,8 @@
 #ifndef CNIC_IF_H
 #define CNIC_IF_H
 
-#define CNIC_MODULE_VERSION    "2.5.9"
-#define CNIC_MODULE_RELDATE    "Feb 8, 2012"
+#define CNIC_MODULE_VERSION    "2.5.10"
+#define CNIC_MODULE_RELDATE    "March 21, 2012"
 
 #define CNIC_ULP_RDMA          0
 #define CNIC_ULP_ISCSI         1
index b0657466041d733900199124ffab0f6dbaf088af..7b71387cf93cfe6ed34e1baf0cdc9ff21a81e1d1 100644 (file)
@@ -89,10 +89,10 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
 
 #define DRV_MODULE_NAME                "tg3"
 #define TG3_MAJ_NUM                    3
-#define TG3_MIN_NUM                    122
+#define TG3_MIN_NUM                    123
 #define DRV_MODULE_VERSION     \
        __stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
-#define DRV_MODULE_RELDATE     "December 7, 2011"
+#define DRV_MODULE_RELDATE     "March 21, 2012"
 
 #define RESET_KIND_SHUTDOWN    0
 #define RESET_KIND_INIT                1
@@ -5953,8 +5953,10 @@ next_pkt_nopost:
                tpr->rx_std_prod_idx = std_prod_idx & tp->rx_std_ring_mask;
                tpr->rx_jmb_prod_idx = jmb_prod_idx & tp->rx_jmb_ring_mask;
 
-               if (tnapi != &tp->napi[1])
+               if (tnapi != &tp->napi[1]) {
+                       tp->rx_refill = true;
                        napi_schedule(&tp->napi[1].napi);
+               }
        }
 
        return received;
@@ -6134,6 +6136,7 @@ static int tg3_poll_work(struct tg3_napi *tnapi, int work_done, int budget)
                u32 std_prod_idx = dpr->rx_std_prod_idx;
                u32 jmb_prod_idx = dpr->rx_jmb_prod_idx;
 
+               tp->rx_refill = false;
                for (i = 1; i < tp->irq_cnt; i++)
                        err |= tg3_rx_prodring_xfer(tp, dpr,
                                                    &tp->napi[i].prodring);
@@ -6197,9 +6200,25 @@ static int tg3_poll_msix(struct napi_struct *napi, int budget)
                /* check for RX/TX work to do */
                if (likely(sblk->idx[0].tx_consumer == tnapi->tx_cons &&
                           *(tnapi->rx_rcb_prod_idx) == tnapi->rx_rcb_ptr)) {
+
+                       /* This test here is not race free, but will reduce
+                        * the number of interrupts by looping again.
+                        */
+                       if (tnapi == &tp->napi[1] && tp->rx_refill)
+                               continue;
+
                        napi_complete(napi);
                        /* Reenable interrupts. */
                        tw32_mailbox(tnapi->int_mbox, tnapi->last_tag << 24);
+
+                       /* This test here is synchronized by napi_schedule()
+                        * and napi_complete() to close the race condition.
+                        */
+                       if (unlikely(tnapi == &tp->napi[1] && tp->rx_refill)) {
+                               tw32(HOSTCC_MODE, tp->coalesce_mode |
+                                                 HOSTCC_MODE_ENABLE |
+                                                 tnapi->coal_now);
+                       }
                        mmiowb();
                        break;
                }
index 66bcfca5526114d07c080e2641eff1f4e60ca49d..93865f899a4fc36c4f2d458311c52ba995cc63d1 100644 (file)
@@ -3007,6 +3007,7 @@ struct tg3 {
        u32                             rx_std_max_post;
        u32                             rx_offset;
        u32                             rx_pkt_map_sz;
+       bool                            rx_refill;
 
 
        /* begin "everything else" cacheline(s) section */
index 82c2c86a19518c37df31d48bb6240930b8963367..423a1a2a702e77e6034f9ca31b8a80177d33e110 100644 (file)
@@ -95,6 +95,10 @@ static int disable_msi = 0;
 module_param(disable_msi, int, 0);
 MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)");
 
+static int legacy_pme = 0;
+module_param(legacy_pme, int, 0);
+MODULE_PARM_DESC(legacy_pme, "Legacy power management");
+
 static DEFINE_PCI_DEVICE_TABLE(sky2_id_table) = {
        { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9000) }, /* SK-9Sxx */
        { PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9E00) }, /* SK-9Exx */
@@ -867,6 +871,13 @@ static void sky2_wol_init(struct sky2_port *sky2)
        /* Disable PiG firmware */
        sky2_write16(hw, B0_CTST, Y2_HW_WOL_OFF);
 
+       /* Needed by some broken BIOSes, use PCI rather than PCI-e for WOL */
+       if (legacy_pme) {
+               u32 reg1 = sky2_pci_read32(hw, PCI_DEV_REG1);
+               reg1 |= PCI_Y2_PME_LEGACY;
+               sky2_pci_write32(hw, PCI_DEV_REG1, reg1);
+       }
+
        /* block receiver */
        sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET);
        sky2_read32(hw, B0_CTST);
index 4b8b52ca09d866001bb9302d42103e5266fdee53..b7b3f5b0d40654c3c50b18ae56c969c03382c71c 100644 (file)
@@ -493,6 +493,7 @@ block:
                if (netif_running (dev->net) &&
                    !test_bit (EVENT_RX_HALT, &dev->flags)) {
                        rx_submit (dev, urb, GFP_ATOMIC);
+                       usb_mark_last_busy(dev->udev);
                        return;
                }
                usb_free_urb (urb);
@@ -589,6 +590,14 @@ static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q)
                entry = (struct skb_data *) skb->cb;
                urb = entry->urb;
 
+               /*
+                * Get reference count of the URB to avoid it to be
+                * freed during usb_unlink_urb, which may trigger
+                * use-after-free problem inside usb_unlink_urb since
+                * usb_unlink_urb is always racing with .complete
+                * handler(include defer_bh).
+                */
+               usb_get_urb(urb);
                spin_unlock_irqrestore(&q->lock, flags);
                // during some PM-driven resume scenarios,
                // these (async) unlinks complete immediately
@@ -597,6 +606,7 @@ static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q)
                        netdev_dbg(dev->net, "unlink urb err, %d\n", retval);
                else
                        count++;
+               usb_put_urb(urb);
                spin_lock_irqsave(&q->lock, flags);
        }
        spin_unlock_irqrestore (&q->lock, flags);
@@ -1028,7 +1038,6 @@ static void tx_complete (struct urb *urb)
        }
 
        usb_autopm_put_interface_async(dev->intf);
-       urb->dev = NULL;
        entry->state = tx_done;
        defer_bh(dev, skb, &dev->txq);
 }
index 7ff10c1e8664806f941785619ed9fd8ed0582e71..0610e91bceb27ec5d7d71b7ddfedf5f6dbdb0d84 100644 (file)
@@ -553,7 +553,6 @@ dino_fixup_bus(struct pci_bus *bus)
        struct list_head *ln;
         struct pci_dev *dev;
         struct dino_device *dino_dev = DINO_DEV(parisc_walk_tree(bus->bridge));
-       int port_base = HBA_PORT_BASE(dino_dev->hba.hba_num);
 
        DBG(KERN_WARNING "%s(0x%p) bus %d platform_data 0x%p\n",
            __func__, bus, bus->secondary,
@@ -599,8 +598,6 @@ dino_fixup_bus(struct pci_bus *bus)
 
 
        list_for_each(ln, &bus->devices) {
-               int i;
-
                dev = pci_dev_b(ln);
                if (is_card_dino(&dino_dev->hba.dev->id))
                        dino_card_fixup(dev);
@@ -612,21 +609,6 @@ dino_fixup_bus(struct pci_bus *bus)
                if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)
                        continue;
 
-               /* Adjust the I/O Port space addresses */
-               for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-                       struct resource *res = &dev->resource[i];
-                       if (res->flags & IORESOURCE_IO) {
-                               res->start |= port_base;
-                               res->end |= port_base;
-                       }
-#ifdef __LP64__
-                       /* Sign Extend MMIO addresses */
-                       else if (res->flags & IORESOURCE_MEM) {
-                               res->start |= F_EXTEND(0UL);
-                               res->end   |= F_EXTEND(0UL);
-                       }
-#endif
-               }
                /* null out the ROM resource if there is one (we don't
                 * care about an expansion rom on parisc, since it
                 * usually contains (x86) bios code) */
@@ -991,11 +973,14 @@ static int __init dino_probe(struct parisc_device *dev)
 
        dev->dev.platform_data = dino_dev;
 
-       pci_add_resource(&resources, &dino_dev->hba.io_space);
+       pci_add_resource_offset(&resources, &dino_dev->hba.io_space,
+                               HBA_PORT_BASE(dino_dev->hba.hba_num));
        if (dino_dev->hba.lmmio_space.flags)
-               pci_add_resource(&resources, &dino_dev->hba.lmmio_space);
+               pci_add_resource_offset(&resources, &dino_dev->hba.lmmio_space,
+                                       dino_dev->hba.lmmio_space_offset);
        if (dino_dev->hba.elmmio_space.flags)
-               pci_add_resource(&resources, &dino_dev->hba.elmmio_space);
+               pci_add_resource_offset(&resources, &dino_dev->hba.elmmio_space,
+                                       dino_dev->hba.lmmio_space_offset);
        if (dino_dev->hba.gmmio_space.flags)
                pci_add_resource(&resources, &dino_dev->hba.gmmio_space);
 
index d5f3d753a108693abae1143f3a6f51959a4f4139..e8857647e21090c460d4a99a39102ea142698c88 100644 (file)
@@ -635,7 +635,6 @@ lba_fixup_bus(struct pci_bus *bus)
        u16 status;
 #endif
        struct lba_device *ldev = LBA_DEV(parisc_walk_tree(bus->bridge));
-       int lba_portbase = HBA_PORT_BASE(ldev->hba.hba_num);
 
        DBG("lba_fixup_bus(0x%p) bus %d platform_data 0x%p\n",
                bus, bus->secondary, bus->bridge->platform_data);
@@ -726,27 +725,6 @@ lba_fixup_bus(struct pci_bus *bus)
                        if (!res->start)
                                continue;
 
-                       if (res->flags & IORESOURCE_IO) {
-                               DBG("lba_fixup_bus() I/O Ports [%lx/%lx] -> ",
-                                       res->start, res->end);
-                               res->start |= lba_portbase;
-                               res->end   |= lba_portbase;
-                               DBG("[%lx/%lx]\n", res->start, res->end);
-                       } else if (res->flags & IORESOURCE_MEM) {
-                               /*
-                               ** Convert PCI (IO_VIEW) addresses to
-                               ** processor (PA_VIEW) addresses
-                                */
-                               DBG("lba_fixup_bus() MMIO [%lx/%lx] -> ",
-                                       res->start, res->end);
-                               res->start = PCI_HOST_ADDR(HBA_DATA(ldev), res->start);
-                               res->end   = PCI_HOST_ADDR(HBA_DATA(ldev), res->end);
-                               DBG("[%lx/%lx]\n", res->start, res->end);
-                       } else {
-                               DBG("lba_fixup_bus() WTF? 0x%lx [%lx/%lx] XXX",
-                                       res->flags, res->start, res->end);
-                       }
-
                        /*
                        ** FIXME: this will result in whinging for devices
                        ** that share expansion ROMs (think quad tulip), but
@@ -1514,11 +1492,14 @@ lba_driver_probe(struct parisc_device *dev)
                lba_dev->hba.lmmio_space.flags = 0;
        }
 
-       pci_add_resource(&resources, &lba_dev->hba.io_space);
+       pci_add_resource_offset(&resources, &lba_dev->hba.io_space,
+                               HBA_PORT_BASE(lba_dev->hba.hba_num));
        if (lba_dev->hba.elmmio_space.start)
-               pci_add_resource(&resources, &lba_dev->hba.elmmio_space);
+               pci_add_resource_offset(&resources, &lba_dev->hba.elmmio_space,
+                                       lba_dev->hba.lmmio_space_offset);
        if (lba_dev->hba.lmmio_space.flags)
-               pci_add_resource(&resources, &lba_dev->hba.lmmio_space);
+               pci_add_resource_offset(&resources, &lba_dev->hba.lmmio_space,
+                                       lba_dev->hba.lmmio_space_offset);
        if (lba_dev->hba.gmmio_space.flags)
                pci_add_resource(&resources, &lba_dev->hba.gmmio_space);
 
index 37856f7c778185488125874d2af802ffba5c32e1..848bfb84c04ccb2adfcbb2c53722af1bca0dbb80 100644 (file)
@@ -31,6 +31,19 @@ config PCI_DEBUG
 
          When in doubt, say N.
 
+config PCI_REALLOC_ENABLE_AUTO
+       bool "Enable PCI resource re-allocation detection"
+       depends on PCI
+       help
+         Say Y here if you want the PCI core to detect if PCI resource
+         re-allocation needs to be enabled. You can always use pci=realloc=on
+          or pci=realloc=off to override it.  Note this feature is a no-op
+          unless PCI_IOV support is also enabled; in that case it will
+          automatically re-allocate PCI resources if SR-IOV BARs have not
+          been allocated by the BIOS.
+
+         When in doubt, say N.
+
 config PCI_STUB
        tristate "PCI Stub driver"
        depends on PCI
index 398f5d8597910c7a661525434e001fc6d42de259..4ce5ef2f2826787862cb4c0f7ac85895877d6863 100644 (file)
 
 #include "pci.h"
 
-void pci_add_resource(struct list_head *resources, struct resource *res)
+void pci_add_resource_offset(struct list_head *resources, struct resource *res,
+                            resource_size_t offset)
 {
-       struct pci_bus_resource *bus_res;
+       struct pci_host_bridge_window *window;
 
-       bus_res = kzalloc(sizeof(struct pci_bus_resource), GFP_KERNEL);
-       if (!bus_res) {
-               printk(KERN_ERR "PCI: can't add bus resource %pR\n", res);
+       window = kzalloc(sizeof(struct pci_host_bridge_window), GFP_KERNEL);
+       if (!window) {
+               printk(KERN_ERR "PCI: can't add host bridge window %pR\n", res);
                return;
        }
 
-       bus_res->res = res;
-       list_add_tail(&bus_res->list, resources);
+       window->res = res;
+       window->offset = offset;
+       list_add_tail(&window->list, resources);
+}
+EXPORT_SYMBOL(pci_add_resource_offset);
+
+void pci_add_resource(struct list_head *resources, struct resource *res)
+{
+       pci_add_resource_offset(resources, res, 0);
 }
 EXPORT_SYMBOL(pci_add_resource);
 
 void pci_free_resource_list(struct list_head *resources)
 {
-       struct pci_bus_resource *bus_res, *tmp;
+       struct pci_host_bridge_window *window, *tmp;
 
-       list_for_each_entry_safe(bus_res, tmp, resources, list) {
-               list_del(&bus_res->list);
-               kfree(bus_res);
+       list_for_each_entry_safe(window, tmp, resources, list) {
+               list_del(&window->list);
+               kfree(window);
        }
 }
 EXPORT_SYMBOL(pci_free_resource_list);
index 9ddf69e3bbef03487e12ea814be79bb3258fc0fa..806c44fa645a57bb355cbab72276cba2a10be60e 100644 (file)
@@ -800,20 +800,10 @@ static int __ref enable_device(struct acpiphp_slot *slot)
        if (slot->flags & SLOT_ENABLED)
                goto err_exit;
 
-       /* sanity check: dev should be NULL when hot-plugged in */
-       dev = pci_get_slot(bus, PCI_DEVFN(slot->device, 0));
-       if (dev) {
-               /* This case shouldn't happen */
-               err("pci_dev structure already exists.\n");
-               pci_dev_put(dev);
-               retval = -1;
-               goto err_exit;
-       }
-
        num = pci_scan_slot(bus, PCI_DEVFN(slot->device, 0));
        if (num == 0) {
-               err("No new device found\n");
-               retval = -1;
+               /* Maybe only part of funcs are added. */
+               dbg("No new device found\n");
                goto err_exit;
        }
 
@@ -848,11 +838,16 @@ static int __ref enable_device(struct acpiphp_slot *slot)
 
        pci_bus_add_devices(bus);
 
+       slot->flags |= SLOT_ENABLED;
        list_for_each_entry(func, &slot->funcs, sibling) {
                dev = pci_get_slot(bus, PCI_DEVFN(slot->device,
                                                  func->function));
-               if (!dev)
+               if (!dev) {
+                       /* Do not set SLOT_ENABLED flag if some funcs
+                          are not added. */
+                       slot->flags &= (~SLOT_ENABLED);
                        continue;
+               }
 
                if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE &&
                    dev->hdr_type != PCI_HEADER_TYPE_CARDBUS) {
@@ -867,7 +862,6 @@ static int __ref enable_device(struct acpiphp_slot *slot)
                pci_dev_put(dev);
        }
 
-       slot->flags |= SLOT_ENABLED;
 
  err_exit:
        return retval;
@@ -892,9 +886,12 @@ static int disable_device(struct acpiphp_slot *slot)
 {
        struct acpiphp_func *func;
        struct pci_dev *pdev;
+       struct pci_bus *bus = slot->bridge->pci_bus;
 
-       /* is this slot already disabled? */
-       if (!(slot->flags & SLOT_ENABLED))
+       /* The slot will be enabled when func 0 is added, so check
+          func 0 before disable the slot. */
+       pdev = pci_get_slot(bus, PCI_DEVFN(slot->device, 0));
+       if (!pdev)
                goto err_exit;
 
        list_for_each_entry(func, &slot->funcs, sibling) {
@@ -913,7 +910,7 @@ static int disable_device(struct acpiphp_slot *slot)
                                disable_bridges(pdev->subordinate);
                                pci_disable_device(pdev);
                        }
-                       pci_remove_bus_device(pdev);
+                       __pci_remove_bus_device(pdev);
                        pci_dev_put(pdev);
                }
        }
@@ -1070,7 +1067,7 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus)
                                        res->end) {
                                /* Could not assign a required resources
                                 * for this device, remove it */
-                               pci_remove_bus_device(dev);
+                               pci_stop_and_remove_bus_device(dev);
                                break;
                        }
                }
index 829c327cfb5e5780bf9cde69877cdec759c63549..ae853ccd0cd5f4e9faadb3bc1361647b2db3cab9 100644 (file)
@@ -341,7 +341,7 @@ int cpci_unconfigure_slot(struct slot* slot)
                dev = pci_get_slot(slot->bus,
                                    PCI_DEVFN(PCI_SLOT(slot->devfn), i));
                if (dev) {
-                       pci_remove_bus_device(dev);
+                       pci_stop_and_remove_bus_device(dev);
                        pci_dev_put(dev);
                }
        }
index fb3f84661bdc93c890d63111567216d505ed877b..81af764c629b6d34573fc2bb9297b26316fa79c6 100644 (file)
@@ -62,7 +62,7 @@
 #define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME , ## arg)
 
 /* local variables */
-static int debug;
+static bool debug;
 static char *bridge;
 static u8 bridge_busnr;
 static u8 bridge_slot;
index 6173b9a4544efec9d79ef45f6c67a68a495987d8..1c8494021a42762ac7b6270ae519df9c44355669 100644 (file)
@@ -127,7 +127,7 @@ int cpqhp_unconfigure_device(struct pci_func* func)
                struct pci_dev* temp = pci_get_bus_and_slot(func->bus, PCI_DEVFN(func->device, j));
                if (temp) {
                        pci_dev_put(temp);
-                       pci_remove_bus_device(temp);
+                       pci_stop_and_remove_bus_device(temp);
                }
        }
        return 0;
index 17d10e2e8fb692f2f605fc6a63571df63ff46a14..a019c9a712bed03bd297d65b653119afc732d1b1 100644 (file)
@@ -40,7 +40,7 @@ static ssize_t legacy_show(struct kobject *kobj, struct attribute *attr,
 
 static void remove_callback(void *data)
 {
-       pci_remove_bus_device((struct pci_dev *)data);
+       pci_stop_and_remove_bus_device((struct pci_dev *)data);
 }
 
 static ssize_t legacy_store(struct kobject *kobj, struct attribute *attr,
index 5506e0e8fbc0055f354705c35bd2276419ba3917..4fda7e6a86a7342a66ba7577008db3abc209cb6d 100644 (file)
@@ -721,7 +721,7 @@ static void ibm_unconfigure_device(struct pci_func *func)
        for (j = 0; j < 0x08; j++) {
                temp = pci_get_bus_and_slot(func->busno, (func->device << 3) | j);
                if (temp) {
-                       pci_remove_bus_device(temp);
+                       pci_stop_and_remove_bus_device(temp);
                        pci_dev_put(temp);
                }
        }
index 2850e64dedae3e0cb9e258d113b75156aff891d3..714ca5c4ed505da0f47eed969c4cff84108ba75e 100644 (file)
@@ -368,8 +368,10 @@ int __init ibmphp_access_ebda (void)
                        debug ("rio blk id: %x\n", blk_id);
 
                        rio_table_ptr = kzalloc(sizeof(struct rio_table_hdr), GFP_KERNEL);
-                       if (!rio_table_ptr)
-                               return -ENOMEM; 
+                       if (!rio_table_ptr) {
+                               rc = -ENOMEM;
+                               goto out;
+                       }
                        rio_table_ptr->ver_num = readb (io_mem + offset);
                        rio_table_ptr->scal_count = readb (io_mem + offset + 1);
                        rio_table_ptr->riodev_count = readb (io_mem + offset + 2);
index bcdbb16436216886949ed44352d1c0ea4606f21a..a960faec1021e2cdb351509d747e3fddc3fb1af5 100644 (file)
@@ -241,34 +241,79 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
        return retval;
 }
 
-static inline int check_link_active(struct controller *ctrl)
+static bool check_link_active(struct controller *ctrl)
 {
-       u16 link_status;
+       bool ret = false;
+       u16 lnk_status;
 
-       if (pciehp_readw(ctrl, PCI_EXP_LNKSTA, &link_status))
-               return 0;
-       return !!(link_status & PCI_EXP_LNKSTA_DLLLA);
+       if (pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status))
+               return ret;
+
+       ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
+
+       if (ret)
+               ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
+
+       return ret;
 }
 
-static void pcie_wait_link_active(struct controller *ctrl)
+static void __pcie_wait_link_active(struct controller *ctrl, bool active)
 {
        int timeout = 1000;
 
-       if (check_link_active(ctrl))
+       if (check_link_active(ctrl) == active)
                return;
        while (timeout > 0) {
                msleep(10);
                timeout -= 10;
-               if (check_link_active(ctrl))
+               if (check_link_active(ctrl) == active)
                        return;
        }
-       ctrl_dbg(ctrl, "Data Link Layer Link Active not set in 1000 msec\n");
+       ctrl_dbg(ctrl, "Data Link Layer Link Active not %s in 1000 msec\n",
+                       active ? "set" : "cleared");
+}
+
+static void pcie_wait_link_active(struct controller *ctrl)
+{
+       __pcie_wait_link_active(ctrl, true);
+}
+
+static void pcie_wait_link_not_active(struct controller *ctrl)
+{
+       __pcie_wait_link_active(ctrl, false);
+}
+
+static bool pci_bus_check_dev(struct pci_bus *bus, int devfn)
+{
+       u32 l;
+       int count = 0;
+       int delay = 1000, step = 20;
+       bool found = false;
+
+       do {
+               found = pci_bus_read_dev_vendor_id(bus, devfn, &l, 0);
+               count++;
+
+               if (found)
+                       break;
+
+               msleep(step);
+               delay -= step;
+       } while (delay > 0);
+
+       if (count > 1 && pciehp_debug)
+               printk(KERN_DEBUG "pci %04x:%02x:%02x.%d id reading try %d times with interval %d ms to get %08x\n",
+                       pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
+                       PCI_FUNC(devfn), count, step, l);
+
+       return found;
 }
 
 int pciehp_check_link_status(struct controller *ctrl)
 {
        u16 lnk_status;
        int retval = 0;
+       bool found = false;
 
         /*
          * Data Link Layer Link Active Reporting must be capable for
@@ -280,13 +325,10 @@ int pciehp_check_link_status(struct controller *ctrl)
         else
                 msleep(1000);
 
-       /*
-        * Need to wait for 1000 ms after Data Link Layer Link Active
-        * (DLLLA) bit reads 1b before sending configuration request.
-        * We need it before checking Link Training (LT) bit becuase
-        * LT is still set even after DLLLA bit is set on some platform.
-        */
-       msleep(1000);
+       /* wait 100ms before read pci conf, and try in 1s */
+       msleep(100);
+       found = pci_bus_check_dev(ctrl->pcie->port->subordinate,
+                                       PCI_DEVFN(0, 0));
 
        retval = pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status);
        if (retval) {
@@ -302,19 +344,50 @@ int pciehp_check_link_status(struct controller *ctrl)
                return retval;
        }
 
-       /*
-        * If the port supports Link speeds greater than 5.0 GT/s, we
-        * must wait for 100 ms after Link training completes before
-        * sending configuration request.
-        */
-       if (ctrl->pcie->port->subordinate->max_bus_speed > PCIE_SPEED_5_0GT)
-               msleep(100);
-
        pcie_update_link_speed(ctrl->pcie->port->subordinate, lnk_status);
 
+       if (!found && !retval)
+               retval = -1;
+
        return retval;
 }
 
+static int __pciehp_link_set(struct controller *ctrl, bool enable)
+{
+       u16 lnk_ctrl;
+       int retval = 0;
+
+       retval = pciehp_readw(ctrl, PCI_EXP_LNKCTL, &lnk_ctrl);
+       if (retval) {
+               ctrl_err(ctrl, "Cannot read LNKCTRL register\n");
+               return retval;
+       }
+
+       if (enable)
+               lnk_ctrl &= ~PCI_EXP_LNKCTL_LD;
+       else
+               lnk_ctrl |= PCI_EXP_LNKCTL_LD;
+
+       retval = pciehp_writew(ctrl, PCI_EXP_LNKCTL, lnk_ctrl);
+       if (retval) {
+               ctrl_err(ctrl, "Cannot write LNKCTRL register\n");
+               return retval;
+       }
+       ctrl_dbg(ctrl, "%s: lnk_ctrl = %x\n", __func__, lnk_ctrl);
+
+       return retval;
+}
+
+static int pciehp_link_enable(struct controller *ctrl)
+{
+       return __pciehp_link_set(ctrl, true);
+}
+
+static int pciehp_link_disable(struct controller *ctrl)
+{
+       return __pciehp_link_set(ctrl, false);
+}
+
 int pciehp_get_attention_status(struct slot *slot, u8 *status)
 {
        struct controller *ctrl = slot->ctrl;
@@ -533,6 +606,10 @@ int pciehp_power_on_slot(struct slot * slot)
        ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
                 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd);
 
+       retval = pciehp_link_enable(ctrl);
+       if (retval)
+               ctrl_err(ctrl, "%s: Can not enable the link!\n", __func__);
+
        return retval;
 }
 
@@ -543,6 +620,14 @@ int pciehp_power_off_slot(struct slot * slot)
        u16 cmd_mask;
        int retval;
 
+       /* Disable the link at first */
+       pciehp_link_disable(ctrl);
+       /* wait the link is down */
+       if (ctrl->link_active_reporting)
+               pcie_wait_link_not_active(ctrl);
+       else
+               msleep(1000);
+
        slot_cmd = POWER_OFF;
        cmd_mask = PCI_EXP_SLTCTL_PCC;
        retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
index a4031dfe938ecd037227896fac3fabe5ca24e56b..47d9dc06b109a632557232b92e7932918ab5a9db 100644 (file)
@@ -141,7 +141,7 @@ int pciehp_unconfigure_device(struct slot *p_slot)
                                break;
                        }
                }
-               pci_remove_bus_device(temp);
+               pci_stop_and_remove_bus_device(temp);
                /*
                 * Ensure that no new Requests will be generated from
                 * the device.
index c56a9413e1afc62596c98cdbd54ddfdd345429a9..1e117c2a3cad032390fe9ab1a12745b6ce405313 100644 (file)
@@ -389,7 +389,7 @@ int dlpar_remove_pci_slot(char *drc_name, struct device_node *dn)
        BUG_ON(!bus->self);
        pr_debug("PCI: Now removing bridge device %s\n", pci_name(bus->self));
        eeh_remove_bus_device(bus->self);
-       pci_remove_bus_device(bus->self);
+       pci_stop_and_remove_bus_device(bus->self);
 
        return 0;
 }
index 72d507b6a2aa8d446d06fcbbcfc3aa68799434f5..de573113c102519e9851e70fc20c62fea5517683 100644 (file)
@@ -554,7 +554,7 @@ static int disable_slot(struct hotplug_slot *bss_hotplug_slot)
                                             PCI_FUNC(func)));
                if (dev) {
                        sn_bus_free_data(dev);
-                       pci_remove_bus_device(dev);
+                       pci_stop_and_remove_bus_device(dev);
                        pci_dev_put(dev);
                }
        }
index a2ccfcd3c29840ea9bfa6384b79e813cba67e21e..df7e4bfadae35fc5398b5a52f8c1443ef8827b6a 100644 (file)
@@ -124,7 +124,7 @@ int shpchp_unconfigure_device(struct slot *p_slot)
                                break;
                        }
                }
-               pci_remove_bus_device(temp);
+               pci_stop_and_remove_bus_device(temp);
                pci_dev_put(temp);
        }
        return rc;
index 0dab5ecf61bb26ad1c7b96684f56b52e7442928d..6554e1a0f63480c0837cf6be5bed4f920f0dc4df 100644 (file)
@@ -142,7 +142,7 @@ failed2:
 failed1:
        pci_dev_put(dev);
        mutex_lock(&iov->dev->sriov->lock);
-       pci_remove_bus_device(virtfn);
+       pci_stop_and_remove_bus_device(virtfn);
        virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
        mutex_unlock(&iov->dev->sriov->lock);
 
@@ -173,10 +173,16 @@ static void virtfn_remove(struct pci_dev *dev, int id, int reset)
 
        sprintf(buf, "virtfn%u", id);
        sysfs_remove_link(&dev->dev.kobj, buf);
-       sysfs_remove_link(&virtfn->dev.kobj, "physfn");
+       /*
+        * pci_stop_dev() could have been called for this virtfn already,
+        * so the directory for the virtfn may have been removed before.
+        * Double check to avoid spurious sysfs warnings.
+        */
+       if (virtfn->dev.kobj.sd)
+               sysfs_remove_link(&virtfn->dev.kobj, "physfn");
 
        mutex_lock(&iov->dev->sriov->lock);
-       pci_remove_bus_device(virtfn);
+       pci_stop_and_remove_bus_device(virtfn);
        virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
        mutex_unlock(&iov->dev->sriov->lock);
 
index 8d9616b821ca272c9770b4471ca6b5da2325fbd4..6b54b23b990be0be03dddb49f5e02b55e5272c56 100644 (file)
@@ -419,6 +419,16 @@ static void pci_device_shutdown(struct device *dev)
                drv->shutdown(pci_dev);
        pci_msi_shutdown(pci_dev);
        pci_msix_shutdown(pci_dev);
+
+       /*
+        * Devices may be enabled to wake up by runtime PM, but they need not
+        * be supposed to wake up the system from its "power off" state (e.g.
+        * ACPI S5).  Therefore disable wakeup for all devices that aren't
+        * supposed to wake up the system at this point.  The state argument
+        * will be ignored by pci_enable_wake().
+        */
+       if (!device_may_wakeup(dev))
+               pci_enable_wake(pci_dev, PCI_UNKNOWN, false);
 }
 
 #ifdef CONFIG_PM
index a3cd8cad532ac218c2b1055f21e27d6e2bec696e..a55e248618cd00e7de495a078c3bb59d177123f4 100644 (file)
@@ -330,7 +330,7 @@ static void remove_callback(struct device *dev)
        struct pci_dev *pdev = to_pci_dev(dev);
 
        mutex_lock(&pci_remove_rescan_mutex);
-       pci_remove_bus_device(pdev);
+       pci_stop_and_remove_bus_device(pdev);
        mutex_unlock(&pci_remove_rescan_mutex);
 }
 
@@ -366,7 +366,10 @@ dev_bus_rescan_store(struct device *dev, struct device_attribute *attr,
 
        if (val) {
                mutex_lock(&pci_remove_rescan_mutex);
-               pci_rescan_bus(bus);
+               if (!pci_is_root_bus(bus) && list_empty(&bus->devices))
+                       pci_rescan_bus_bridge_resize(bus->self);
+               else
+                       pci_rescan_bus(bus);
                mutex_unlock(&pci_remove_rescan_mutex);
        }
        return count;
index 053670e09e2b93bafa15e3662c8672fdebd8ef34..815674415267d831a2618b1e1f977c080d34ac92 100644 (file)
@@ -94,6 +94,9 @@ u8 pci_cache_line_size;
  */
 unsigned int pcibios_max_latency = 255;
 
+/* If set, the PCIe ARI capability will not be used. */
+static bool pcie_ari_disabled;
+
 /**
  * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
  * @bus: pointer to PCI bus structure to search
@@ -825,6 +828,19 @@ EXPORT_SYMBOL(pci_choose_state);
 #define pcie_cap_has_sltctl2(type, flags)              \
                ((flags & PCI_EXP_FLAGS_VERS) > 1)
 
+static struct pci_cap_saved_state *pci_find_saved_cap(
+       struct pci_dev *pci_dev, char cap)
+{
+       struct pci_cap_saved_state *tmp;
+       struct hlist_node *pos;
+
+       hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) {
+               if (tmp->cap.cap_nr == cap)
+                       return tmp;
+       }
+       return NULL;
+}
+
 static int pci_save_pcie_state(struct pci_dev *dev)
 {
        int pos, i = 0;
@@ -959,6 +975,7 @@ void pci_restore_state(struct pci_dev *dev)
 {
        int i;
        u32 val;
+       int tries;
 
        if (!dev->state_saved)
                return;
@@ -973,12 +990,16 @@ void pci_restore_state(struct pci_dev *dev)
         */
        for (i = 15; i >= 0; i--) {
                pci_read_config_dword(dev, i * 4, &val);
-               if (val != dev->saved_config_space[i]) {
+               tries = 10;             
+               while (tries && val != dev->saved_config_space[i]) {
                        dev_dbg(&dev->dev, "restoring config "
                                "space at offset %#x (was %#x, writing %#x)\n",
                                i, val, (int)dev->saved_config_space[i]);
                        pci_write_config_dword(dev,i * 4,
                                dev->saved_config_space[i]);
+                       pci_read_config_dword(dev, i * 4, &val);
+                       mdelay(10);
+                       tries--;
                }
        }
        pci_restore_pcix_state(dev);
@@ -1864,6 +1885,12 @@ void platform_pci_wakeup_init(struct pci_dev *dev)
        platform_pci_sleep_wake(dev, false);
 }
 
+static void pci_add_saved_cap(struct pci_dev *pci_dev,
+       struct pci_cap_saved_state *new_cap)
+{
+       hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space);
+}
+
 /**
  * pci_add_save_buffer - allocate buffer for saving given capability registers
  * @dev: the PCI device
@@ -1911,6 +1938,15 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev)
                        "unable to preallocate PCI-X save buffer\n");
 }
 
+void pci_free_cap_save_buffers(struct pci_dev *dev)
+{
+       struct pci_cap_saved_state *tmp;
+       struct hlist_node *pos, *n;
+
+       hlist_for_each_entry_safe(tmp, pos, n, &dev->saved_cap_space, next)
+               kfree(tmp);
+}
+
 /**
  * pci_enable_ari - enable ARI forwarding if hardware support it
  * @dev: the PCI device
@@ -1922,7 +1958,7 @@ void pci_enable_ari(struct pci_dev *dev)
        u16 flags, ctrl;
        struct pci_dev *bridge;
 
-       if (!pci_is_pcie(dev) || dev->devfn)
+       if (pcie_ari_disabled || !pci_is_pcie(dev) || dev->devfn)
                return;
 
        pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ARI);
@@ -3661,6 +3697,68 @@ int pci_is_reassigndev(struct pci_dev *dev)
        return (pci_specified_resource_alignment(dev) != 0);
 }
 
+/*
+ * This function disables memory decoding and releases memory resources
+ * of the device specified by kernel's boot parameter 'pci=resource_alignment='.
+ * It also rounds up size to specified alignment.
+ * Later on, the kernel will assign page-aligned memory resource back
+ * to the device.
+ */
+void pci_reassigndev_resource_alignment(struct pci_dev *dev)
+{
+       int i;
+       struct resource *r;
+       resource_size_t align, size;
+       u16 command;
+
+       if (!pci_is_reassigndev(dev))
+               return;
+
+       if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+           (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
+               dev_warn(&dev->dev,
+                       "Can't reassign resources to host bridge.\n");
+               return;
+       }
+
+       dev_info(&dev->dev,
+               "Disabling memory decoding and releasing memory resources.\n");
+       pci_read_config_word(dev, PCI_COMMAND, &command);
+       command &= ~PCI_COMMAND_MEMORY;
+       pci_write_config_word(dev, PCI_COMMAND, command);
+
+       align = pci_specified_resource_alignment(dev);
+       for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+               r = &dev->resource[i];
+               if (!(r->flags & IORESOURCE_MEM))
+                       continue;
+               size = resource_size(r);
+               if (size < align) {
+                       size = align;
+                       dev_info(&dev->dev,
+                               "Rounding up size of resource #%d to %#llx.\n",
+                               i, (unsigned long long)size);
+               }
+               r->end = size - 1;
+               r->start = 0;
+       }
+       /* Need to disable bridge's resource window,
+        * to enable the kernel to reassign new resource
+        * window later on.
+        */
+       if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+           (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+               for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
+                       r = &dev->resource[i];
+                       if (!(r->flags & IORESOURCE_MEM))
+                               continue;
+                       r->end = resource_size(r) - 1;
+                       r->start = 0;
+               }
+               pci_disable_bridge_window(dev);
+       }
+}
+
 ssize_t pci_set_resource_alignment_param(const char *buf, size_t count)
 {
        if (count > RESOURCE_ALIGNMENT_PARAM_SIZE - 1)
@@ -3739,10 +3837,14 @@ static int __init pci_setup(char *str)
                                pci_no_msi();
                        } else if (!strcmp(str, "noaer")) {
                                pci_no_aer();
+                       } else if (!strncmp(str, "realloc=", 8)) {
+                               pci_realloc_get_opt(str + 8);
                        } else if (!strncmp(str, "realloc", 7)) {
-                               pci_realloc();
+                               pci_realloc_get_opt("on");
                        } else if (!strcmp(str, "nodomains")) {
                                pci_no_domains();
+                       } else if (!strncmp(str, "noari", 5)) {
+                               pcie_ari_disabled = true;
                        } else if (!strncmp(str, "cbiosize=", 9)) {
                                pci_cardbus_io_size = memparse(str + 9, &str);
                        } else if (!strncmp(str, "cbmemsize=", 10)) {
index 1009a5e88e533f37736710895ffc6ba5194f0498..e4943479b234d1f4ff6fc5121a28dbb322fa2ba6 100644 (file)
@@ -73,6 +73,7 @@ extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
 extern void pci_pm_init(struct pci_dev *dev);
 extern void platform_pci_wakeup_init(struct pci_dev *dev);
 extern void pci_allocate_cap_save_buffers(struct pci_dev *dev);
+void pci_free_cap_save_buffers(struct pci_dev *dev);
 
 static inline void pci_wakeup_event(struct pci_dev *dev)
 {
@@ -148,7 +149,7 @@ static inline void pci_no_msi(void) { }
 static inline void pci_msi_init_pci_dev(struct pci_dev *dev) { }
 #endif
 
-extern void pci_realloc(void);
+void pci_realloc_get_opt(char *);
 
 static inline int pci_no_d1d2(struct pci_dev *dev)
 {
@@ -207,6 +208,8 @@ enum pci_bar_type {
        pci_bar_mem64,          /* A 64-bit memory BAR */
 };
 
+bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl,
+                               int crs_timeout);
 extern int pci_setup_device(struct pci_dev *dev);
 extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
                                struct resource *res, unsigned int reg);
@@ -225,11 +228,8 @@ static inline int pci_ari_enabled(struct pci_bus *bus)
        return bus->self && bus->self->ari_enabled;
 }
 
-#ifdef CONFIG_PCI_QUIRKS
-extern int pci_is_reassigndev(struct pci_dev *dev);
-resource_size_t pci_specified_resource_alignment(struct pci_dev *dev);
+void pci_reassigndev_resource_alignment(struct pci_dev *dev);
 extern void pci_disable_bridge_window(struct pci_dev *dev);
-#endif
 
 /* Single Root I/O Virtualization */
 struct pci_sriov {
index 72962cc92e0af46402c0c6c4d38a15d82666931d..6c8bc5809787d7e2a4b82c72c590918c20f49665 100644 (file)
@@ -55,6 +55,31 @@ config PCIEASPM_DEBUG
          This enables PCI Express ASPM debug support. It will add per-device
          interface to control ASPM.
 
+choice
+       prompt "Default ASPM policy"
+       default PCIEASPM_DEFAULT
+       depends on PCIEASPM
+
+config PCIEASPM_DEFAULT
+        bool "BIOS default"
+       depends on PCIEASPM
+       help
+         Use the BIOS defaults for PCI Express ASPM.
+
+config PCIEASPM_POWERSAVE
+        bool "Powersave"
+       depends on PCIEASPM
+       help
+         Enable PCI Express ASPM L0s and L1 where possible, even if the
+         BIOS did not.
+
+config PCIEASPM_PERFORMANCE
+        bool "Performance"
+       depends on PCIEASPM
+       help
+         Disable PCI Express ASPM L0s and L1, even if the BIOS enabled them.
+endchoice
+
 config PCIE_PME
        def_bool y
        depends on PCIEPORTBUS && PM_RUNTIME && EXPERIMENTAL && ACPI
index 24f049e7395291d59be49a4db9c3d13dce841532..4bdef24cd412ff75db214f0461ab98e908adccda 100644 (file)
@@ -76,7 +76,15 @@ static LIST_HEAD(link_list);
 #define POLICY_DEFAULT 0       /* BIOS default setting */
 #define POLICY_PERFORMANCE 1   /* high performance */
 #define POLICY_POWERSAVE 2     /* high power saving */
+
+#ifdef CONFIG_PCIEASPM_PERFORMANCE
+static int aspm_policy = POLICY_PERFORMANCE;
+#elif defined CONFIG_PCIEASPM_POWERSAVE
+static int aspm_policy = POLICY_POWERSAVE;
+#else
 static int aspm_policy;
+#endif
+
 static const char *policy_str[] = {
        [POLICY_DEFAULT] = "default",
        [POLICY_PERFORMANCE] = "performance",
index bd00a01aef1463a09691944446991bb14bddcfed..eea2ca2375e6be13ef07cb6d766d78b4d96c5d12 100644 (file)
@@ -34,6 +34,18 @@ struct pci_dev;
 
 extern void pcie_clear_root_pme_status(struct pci_dev *dev);
 
+#ifdef CONFIG_HOTPLUG_PCI_PCIE
+extern bool pciehp_msi_disabled;
+
+static inline bool pciehp_no_msi(void)
+{
+       return pciehp_msi_disabled;
+}
+
+#else  /* !CONFIG_HOTPLUG_PCI_PCIE */
+static inline bool pciehp_no_msi(void) { return false; }
+#endif /* !CONFIG_HOTPLUG_PCI_PCIE */
+
 #ifdef CONFIG_PCIE_PME
 extern bool pcie_pme_msi_disabled;
 
index 595654a1a6a6eec86e8011f1efe667bf318bf257..2f589a54f9bdf75864452602de0614bd8a3feb35 100644 (file)
 #include "../pci.h"
 #include "portdrv.h"
 
+bool pciehp_msi_disabled;
+
+static int __init pciehp_setup(char *str)
+{
+       if (!strncmp(str, "nomsi", 5))
+               pciehp_msi_disabled = true;
+
+       return 1;
+}
+__setup("pcie_hp=", pciehp_setup);
+
 /**
  * release_pcie_device - free PCI Express port service device structure
  * @dev: Port service device to release
@@ -189,8 +200,9 @@ static int init_service_irqs(struct pci_dev *dev, int *irqs, int mask)
 {
        int i, irq = -1;
 
-       /* We have to use INTx if MSI cannot be used for PCIe PME. */
-       if ((mask & PCIE_PORT_SERVICE_PME) && pcie_pme_no_msi()) {
+       /* We have to use INTx if MSI cannot be used for PCIe PME or pciehp. */
+       if (((mask & PCIE_PORT_SERVICE_PME) && pcie_pme_no_msi()) ||
+           ((mask & PCIE_PORT_SERVICE_HP) && pciehp_no_msi())) {
                if (dev->pin)
                        irq = dev->irq;
                goto no_msi;
index 71eac9cd724d7b12c77d68cc0eb68a3b50a9f287..5e1ca3c58a7d4502b966974ec13c55a892e9be2b 100644 (file)
@@ -15,6 +15,8 @@
 #define CARDBUS_LATENCY_TIMER  176     /* secondary latency timer */
 #define CARDBUS_RESERVE_BUSNR  3
 
+static LIST_HEAD(pci_host_bridges);
+
 /* Ugh.  Need to stop exporting this to modules. */
 LIST_HEAD(pci_root_buses);
 EXPORT_SYMBOL(pci_root_buses);
@@ -42,6 +44,82 @@ int no_pci_devices(void)
 }
 EXPORT_SYMBOL(no_pci_devices);
 
+static struct pci_host_bridge *pci_host_bridge(struct pci_dev *dev)
+{
+       struct pci_bus *bus;
+       struct pci_host_bridge *bridge;
+
+       bus = dev->bus;
+       while (bus->parent)
+               bus = bus->parent;
+
+       list_for_each_entry(bridge, &pci_host_bridges, list) {
+               if (bridge->bus == bus)
+                       return bridge;
+       }
+
+       return NULL;
+}
+
+static bool resource_contains(struct resource *res1, struct resource *res2)
+{
+       return res1->start <= res2->start && res1->end >= res2->end;
+}
+
+void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+                            struct resource *res)
+{
+       struct pci_host_bridge *bridge = pci_host_bridge(dev);
+       struct pci_host_bridge_window *window;
+       resource_size_t offset = 0;
+
+       list_for_each_entry(window, &bridge->windows, list) {
+               if (resource_type(res) != resource_type(window->res))
+                       continue;
+
+               if (resource_contains(window->res, res)) {
+                       offset = window->offset;
+                       break;
+               }
+       }
+
+       region->start = res->start - offset;
+       region->end = res->end - offset;
+}
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+
+static bool region_contains(struct pci_bus_region *region1,
+                           struct pci_bus_region *region2)
+{
+       return region1->start <= region2->start && region1->end >= region2->end;
+}
+
+void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                            struct pci_bus_region *region)
+{
+       struct pci_host_bridge *bridge = pci_host_bridge(dev);
+       struct pci_host_bridge_window *window;
+       struct pci_bus_region bus_region;
+       resource_size_t offset = 0;
+
+       list_for_each_entry(window, &bridge->windows, list) {
+               if (resource_type(res) != resource_type(window->res))
+                       continue;
+
+               bus_region.start = window->res->start - window->offset;
+               bus_region.end = window->res->end - window->offset;
+
+               if (region_contains(&bus_region, region)) {
+                       offset = window->offset;
+                       break;
+               }
+       }
+
+       res->start = region->start + offset;
+       res->end = region->end + offset;
+}
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+
 /*
  * PCI Bus Class
  */
@@ -135,6 +213,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 {
        u32 l, sz, mask;
        u16 orig_cmd;
+       struct pci_bus_region region;
 
        mask = type ? PCI_ROM_ADDRESS_MASK : ~0;
 
@@ -214,11 +293,13 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
                        /* Address above 32-bit boundary; disable the BAR */
                        pci_write_config_dword(dev, pos, 0);
                        pci_write_config_dword(dev, pos + 4, 0);
-                       res->start = 0;
-                       res->end = sz64;
+                       region.start = 0;
+                       region.end = sz64;
+                       pcibios_bus_to_resource(dev, res, &region);
                } else {
-                       res->start = l64;
-                       res->end = l64 + sz64;
+                       region.start = l64;
+                       region.end = l64 + sz64;
+                       pcibios_bus_to_resource(dev, res, &region);
                        dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pR\n",
                                   pos, res);
                }
@@ -228,8 +309,9 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
                if (!sz)
                        goto fail;
 
-               res->start = l;
-               res->end = l + sz;
+               region.start = l;
+               region.end = l + sz;
+               pcibios_bus_to_resource(dev, res, &region);
 
                dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pR\n", pos, res);
        }
@@ -266,7 +348,8 @@ static void __devinit pci_read_bridge_io(struct pci_bus *child)
        struct pci_dev *dev = child->self;
        u8 io_base_lo, io_limit_lo;
        unsigned long base, limit;
-       struct resource *res;
+       struct pci_bus_region region;
+       struct resource *res, res2;
 
        res = child->resource[0];
        pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo);
@@ -284,10 +367,14 @@ static void __devinit pci_read_bridge_io(struct pci_bus *child)
 
        if (base && base <= limit) {
                res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
+               res2.flags = res->flags;
+               region.start = base;
+               region.end = limit + 0xfff;
+               pcibios_bus_to_resource(dev, &res2, &region);
                if (!res->start)
-                       res->start = base;
+                       res->start = res2.start;
                if (!res->end)
-                       res->end = limit + 0xfff;
+                       res->end = res2.end;
                dev_printk(KERN_DEBUG, &dev->dev, "  bridge window %pR\n", res);
        }
 }
@@ -297,6 +384,7 @@ static void __devinit pci_read_bridge_mmio(struct pci_bus *child)
        struct pci_dev *dev = child->self;
        u16 mem_base_lo, mem_limit_lo;
        unsigned long base, limit;
+       struct pci_bus_region region;
        struct resource *res;
 
        res = child->resource[1];
@@ -306,8 +394,9 @@ static void __devinit pci_read_bridge_mmio(struct pci_bus *child)
        limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
        if (base && base <= limit) {
                res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
-               res->start = base;
-               res->end = limit + 0xfffff;
+               region.start = base;
+               region.end = limit + 0xfffff;
+               pcibios_bus_to_resource(dev, res, &region);
                dev_printk(KERN_DEBUG, &dev->dev, "  bridge window %pR\n", res);
        }
 }
@@ -317,6 +406,7 @@ static void __devinit pci_read_bridge_mmio_pref(struct pci_bus *child)
        struct pci_dev *dev = child->self;
        u16 mem_base_lo, mem_limit_lo;
        unsigned long base, limit;
+       struct pci_bus_region region;
        struct resource *res;
 
        res = child->resource[2];
@@ -353,8 +443,9 @@ static void __devinit pci_read_bridge_mmio_pref(struct pci_bus *child)
                                         IORESOURCE_MEM | IORESOURCE_PREFETCH;
                if (res->flags & PCI_PREF_RANGE_TYPE_64)
                        res->flags |= IORESOURCE_MEM_64;
-               res->start = base;
-               res->end = limit + 0xfffff;
+               region.start = base;
+               region.end = limit + 0xfffff;
+               pcibios_bus_to_resource(dev, res, &region);
                dev_printk(KERN_DEBUG, &dev->dev, "  bridge window %pR\n", res);
        }
 }
@@ -900,6 +991,8 @@ int pci_setup_device(struct pci_dev *dev)
        u8 hdr_type;
        struct pci_slot *slot;
        int pos = 0;
+       struct pci_bus_region region;
+       struct resource *res;
 
        if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type))
                return -EIO;
@@ -926,12 +1019,10 @@ int pci_setup_device(struct pci_dev *dev)
 
        pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
        dev->revision = class & 0xff;
-       class >>= 8;                                /* upper 3 bytes */
-       dev->class = class;
-       class >>= 8;
+       dev->class = class >> 8;                    /* upper 3 bytes */
 
-       dev_printk(KERN_DEBUG, &dev->dev, "[%04x:%04x] type %d class %#08x\n",
-                  dev->vendor, dev->device, dev->hdr_type, class);
+       dev_printk(KERN_DEBUG, &dev->dev, "[%04x:%04x] type %02x class %#08x\n",
+                  dev->vendor, dev->device, dev->hdr_type, dev->class);
 
        /* need to have dev->class ready */
        dev->cfg_size = pci_cfg_space_size(dev);
@@ -963,20 +1054,28 @@ int pci_setup_device(struct pci_dev *dev)
                        u8 progif;
                        pci_read_config_byte(dev, PCI_CLASS_PROG, &progif);
                        if ((progif & 1) == 0) {
-                               dev->resource[0].start = 0x1F0;
-                               dev->resource[0].end = 0x1F7;
-                               dev->resource[0].flags = LEGACY_IO_RESOURCE;
-                               dev->resource[1].start = 0x3F6;
-                               dev->resource[1].end = 0x3F6;
-                               dev->resource[1].flags = LEGACY_IO_RESOURCE;
+                               region.start = 0x1F0;
+                               region.end = 0x1F7;
+                               res = &dev->resource[0];
+                               res->flags = LEGACY_IO_RESOURCE;
+                               pcibios_bus_to_resource(dev, res, &region);
+                               region.start = 0x3F6;
+                               region.end = 0x3F6;
+                               res = &dev->resource[1];
+                               res->flags = LEGACY_IO_RESOURCE;
+                               pcibios_bus_to_resource(dev, res, &region);
                        }
                        if ((progif & 4) == 0) {
-                               dev->resource[2].start = 0x170;
-                               dev->resource[2].end = 0x177;
-                               dev->resource[2].flags = LEGACY_IO_RESOURCE;
-                               dev->resource[3].start = 0x376;
-                               dev->resource[3].end = 0x376;
-                               dev->resource[3].flags = LEGACY_IO_RESOURCE;
+                               region.start = 0x170;
+                               region.end = 0x177;
+                               res = &dev->resource[2];
+                               res->flags = LEGACY_IO_RESOURCE;
+                               pcibios_bus_to_resource(dev, res, &region);
+                               region.start = 0x376;
+                               region.end = 0x376;
+                               res = &dev->resource[3];
+                               res->flags = LEGACY_IO_RESOURCE;
+                               pcibios_bus_to_resource(dev, res, &region);
                        }
                }
                break;
@@ -1013,8 +1112,8 @@ int pci_setup_device(struct pci_dev *dev)
                return -EIO;
 
        bad:
-               dev_err(&dev->dev, "ignoring class %02x (doesn't match header "
-                       "type %02x)\n", class, dev->hdr_type);
+               dev_err(&dev->dev, "ignoring class %#08x (doesn't match header "
+                       "type %02x)\n", dev->class, dev->hdr_type);
                dev->class = PCI_CLASS_NOT_DEFINED;
        }
 
@@ -1026,6 +1125,7 @@ static void pci_release_capabilities(struct pci_dev *dev)
 {
        pci_vpd_release(dev);
        pci_iov_release(dev);
+       pci_free_cap_save_buffers(dev);
 }
 
 /**
@@ -1118,40 +1218,54 @@ struct pci_dev *alloc_pci_dev(void)
 }
 EXPORT_SYMBOL(alloc_pci_dev);
 
-/*
- * Read the config data for a PCI device, sanity-check it
- * and fill in the dev structure...
- */
-static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
+bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
+                                int crs_timeout)
 {
-       struct pci_dev *dev;
-       u32 l;
        int delay = 1;
 
-       if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l))
-               return NULL;
+       if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
+               return false;
 
        /* some broken boards return 0 or ~0 if a slot is empty: */
-       if (l == 0xffffffff || l == 0x00000000 ||
-           l == 0x0000ffff || l == 0xffff0000)
-               return NULL;
+       if (*l == 0xffffffff || *l == 0x00000000 ||
+           *l == 0x0000ffff || *l == 0xffff0000)
+               return false;
 
        /* Configuration request Retry Status */
-       while (l == 0xffff0001) {
+       while (*l == 0xffff0001) {
+               if (!crs_timeout)
+                       return false;
+
                msleep(delay);
                delay *= 2;
-               if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l))
-                       return NULL;
+               if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
+                       return false;
                /* Card hasn't responded in 60 seconds?  Must be stuck. */
-               if (delay > 60 * 1000) {
+               if (delay > crs_timeout) {
                        printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not "
                                        "responding\n", pci_domain_nr(bus),
                                        bus->number, PCI_SLOT(devfn),
                                        PCI_FUNC(devfn));
-                       return NULL;
+                       return false;
                }
        }
 
+       return true;
+}
+EXPORT_SYMBOL(pci_bus_read_dev_vendor_id);
+
+/*
+ * Read the config data for a PCI device, sanity-check it
+ * and fill in the dev structure...
+ */
+static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
+{
+       struct pci_dev *dev;
+       u32 l;
+
+       if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
+               return NULL;
+
        dev = alloc_pci_dev();
        if (!dev)
                return NULL;
@@ -1212,6 +1326,9 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
        /* Fix up broken headers */
        pci_fixup_device(pci_fixup_header, dev);
 
+       /* moved out from quirk header fixup code */
+       pci_reassigndev_resource_alignment(dev);
+
        /* Clear the state_saved flag. */
        dev->state_saved = false;
 
@@ -1530,21 +1647,27 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
                struct pci_ops *ops, void *sysdata, struct list_head *resources)
 {
-       int error, i;
+       int error;
+       struct pci_host_bridge *bridge;
        struct pci_bus *b, *b2;
        struct device *dev;
-       struct pci_bus_resource *bus_res, *n;
+       struct pci_host_bridge_window *window, *n;
        struct resource *res;
+       resource_size_t offset;
+       char bus_addr[64];
+       char *fmt;
+
+       bridge = kzalloc(sizeof(*bridge), GFP_KERNEL);
+       if (!bridge)
+               return NULL;
 
        b = pci_alloc_bus();
        if (!b)
-               return NULL;
+               goto err_bus;
 
        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-       if (!dev) {
-               kfree(b);
-               return NULL;
-       }
+       if (!dev)
+               goto err_dev;
 
        b->sysdata = sysdata;
        b->ops = ops;
@@ -1556,10 +1679,6 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
                goto err_out;
        }
 
-       down_write(&pci_bus_sem);
-       list_add_tail(&b->node, &pci_root_buses);
-       up_write(&pci_bus_sem);
-
        dev->parent = parent;
        dev->release = pci_release_bus_bridge_dev;
        dev_set_name(dev, "pci%04x:%02x", pci_domain_nr(b), bus);
@@ -1585,31 +1704,53 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 
        b->number = b->secondary = bus;
 
-       /* Add initial resources to the bus */
-       list_for_each_entry_safe(bus_res, n, resources, list)
-               list_move_tail(&bus_res->list, &b->resources);
+       bridge->bus = b;
+       INIT_LIST_HEAD(&bridge->windows);
 
        if (parent)
                dev_info(parent, "PCI host bridge to bus %s\n", dev_name(&b->dev));
        else
                printk(KERN_INFO "PCI host bridge to bus %s\n", dev_name(&b->dev));
 
-       pci_bus_for_each_resource(b, res, i) {
-               if (res)
-                       dev_info(&b->dev, "root bus resource %pR\n", res);
+       /* Add initial resources to the bus */
+       list_for_each_entry_safe(window, n, resources, list) {
+               list_move_tail(&window->list, &bridge->windows);
+               res = window->res;
+               offset = window->offset;
+               pci_bus_add_resource(b, res, 0);
+               if (offset) {
+                       if (resource_type(res) == IORESOURCE_IO)
+                               fmt = " (bus address [%#06llx-%#06llx])";
+                       else
+                               fmt = " (bus address [%#010llx-%#010llx])";
+                       snprintf(bus_addr, sizeof(bus_addr), fmt,
+                                (unsigned long long) (res->start - offset),
+                                (unsigned long long) (res->end - offset));
+               } else
+                       bus_addr[0] = '\0';
+               dev_info(&b->dev, "root bus resource %pR%s\n", res, bus_addr);
        }
 
+       down_write(&pci_bus_sem);
+       list_add_tail(&bridge->list, &pci_host_bridges);
+       list_add_tail(&b->node, &pci_root_buses);
+       up_write(&pci_bus_sem);
+
        return b;
 
 class_dev_reg_err:
        device_unregister(dev);
 dev_reg_err:
        down_write(&pci_bus_sem);
+       list_del(&bridge->list);
        list_del(&b->node);
        up_write(&pci_bus_sem);
 err_out:
        kfree(dev);
+err_dev:
        kfree(b);
+err_bus:
+       kfree(bridge);
        return NULL;
 }
 
@@ -1667,36 +1808,29 @@ EXPORT_SYMBOL(pci_scan_bus);
 
 #ifdef CONFIG_HOTPLUG
 /**
- * pci_rescan_bus - scan a PCI bus for devices.
- * @bus: PCI bus to scan
+ * pci_rescan_bus_bridge_resize - scan a PCI bus for devices.
+ * @bridge: PCI bridge for the bus to scan
  *
- * Scan a PCI bus and child buses for new devices, adds them,
- * and enables them.
+ * Scan a PCI bus and child buses for new devices, add them,
+ * and enable them, resizing bridge mmio/io resource if necessary
+ * and possible.  The caller must ensure the child devices are already
+ * removed for resizing to occur.
  *
  * Returns the max number of subordinate bus discovered.
  */
-unsigned int __ref pci_rescan_bus(struct pci_bus *bus)
+unsigned int __ref pci_rescan_bus_bridge_resize(struct pci_dev *bridge)
 {
        unsigned int max;
-       struct pci_dev *dev;
+       struct pci_bus *bus = bridge->subordinate;
 
        max = pci_scan_child_bus(bus);
 
-       down_read(&pci_bus_sem);
-       list_for_each_entry(dev, &bus->devices, bus_list)
-               if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
-                   dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
-                       if (dev->subordinate)
-                               pci_bus_size_bridges(dev->subordinate);
-       up_read(&pci_bus_sem);
+       pci_assign_unassigned_bridge_resources(bridge);
 
-       pci_bus_assign_resources(bus);
-       pci_enable_bridges(bus);
        pci_bus_add_devices(bus);
 
        return max;
 }
-EXPORT_SYMBOL_GPL(pci_rescan_bus);
 
 EXPORT_SYMBOL(pci_add_new_bus);
 EXPORT_SYMBOL(pci_scan_slot);
index f722c5f6951aba99929d7dd3d12b6fcd0f8f05d3..4bf71028556b995bd48c0ea9caec081445fa4234 100644 (file)
 #include <linux/dmi.h>
 #include <linux/pci-aspm.h>
 #include <linux/ioport.h>
+#include <linux/sched.h>
+#include <linux/ktime.h>
 #include <asm/dma.h>   /* isa_dma_bridge_buggy */
 #include "pci.h"
 
-/*
- * This quirk function disables memory decoding and releases memory resources
- * of the device specified by kernel's boot parameter 'pci=resource_alignment='.
- * It also rounds up size to specified alignment.
- * Later on, the kernel will assign page-aligned memory resource back
- * to the device.
- */
-static void __devinit quirk_resource_alignment(struct pci_dev *dev)
-{
-       int i;
-       struct resource *r;
-       resource_size_t align, size;
-       u16 command;
-
-       if (!pci_is_reassigndev(dev))
-               return;
-
-       if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
-           (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
-               dev_warn(&dev->dev,
-                       "Can't reassign resources to host bridge.\n");
-               return;
-       }
-
-       dev_info(&dev->dev,
-               "Disabling memory decoding and releasing memory resources.\n");
-       pci_read_config_word(dev, PCI_COMMAND, &command);
-       command &= ~PCI_COMMAND_MEMORY;
-       pci_write_config_word(dev, PCI_COMMAND, command);
-
-       align = pci_specified_resource_alignment(dev);
-       for (i=0; i < PCI_BRIDGE_RESOURCES; i++) {
-               r = &dev->resource[i];
-               if (!(r->flags & IORESOURCE_MEM))
-                       continue;
-               size = resource_size(r);
-               if (size < align) {
-                       size = align;
-                       dev_info(&dev->dev,
-                               "Rounding up size of resource #%d to %#llx.\n",
-                               i, (unsigned long long)size);
-               }
-               r->end = size - 1;
-               r->start = 0;
-       }
-       /* Need to disable bridge's resource window,
-        * to enable the kernel to reassign new resource
-        * window later on.
-        */
-       if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
-           (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
-               for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
-                       r = &dev->resource[i];
-                       if (!(r->flags & IORESOURCE_MEM))
-                               continue;
-                       r->end = resource_size(r) - 1;
-                       r->start = 0;
-               }
-               pci_disable_bridge_window(dev);
-       }
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment);
-
 /*
  * Decoding should be disabled for a PCI device during BAR sizing to avoid
  * conflict. But doing so may cause problems on host bridge and perhaps other
@@ -100,10 +39,10 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment);
  */
 static void __devinit quirk_mmio_always_on(struct pci_dev *dev)
 {
-       if ((dev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
-               dev->mmio_always_on = 1;
+       dev->mmio_always_on = 1;
 }
-DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, quirk_mmio_always_on);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_ANY_ID, PCI_ANY_ID,
+                               PCI_CLASS_BRIDGE_HOST, 8, quirk_mmio_always_on);
 
 /* The Mellanox Tavor device gives false positive parity errors
  * Mark this device with a broken_parity_status, to allow
@@ -1002,12 +941,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C597_0,     quirk_vt
  */
 static void quirk_cardbus_legacy(struct pci_dev *dev)
 {
-       if ((PCI_CLASS_BRIDGE_CARDBUS << 8) ^ dev->class)
-               return;
        pci_write_config_dword(dev, PCI_CB_LEGACY_MODE_BASE, 0);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy);
-DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
+                       PCI_CLASS_BRIDGE_CARDBUS, 8, quirk_cardbus_legacy);
+DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(PCI_ANY_ID, PCI_ANY_ID,
+                       PCI_CLASS_BRIDGE_CARDBUS, 8, quirk_cardbus_legacy);
 
 /*
  * Following the PCI ordering rules is optional on the AMD762. I'm not
@@ -1164,17 +1103,20 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, qui
 
 static void __devinit quirk_no_ata_d3(struct pci_dev *pdev)
 {
-       /* Quirk the legacy ATA devices only. The AHCI ones are ok */
-       if ((pdev->class >> 8) == PCI_CLASS_STORAGE_IDE)
-               pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
+       pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
 }
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID, quirk_no_ata_d3);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID, quirk_no_ata_d3);
+/* Quirk the legacy ATA devices only. The AHCI ones are ok */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID,
+                               PCI_CLASS_STORAGE_IDE, 8, quirk_no_ata_d3);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
+                               PCI_CLASS_STORAGE_IDE, 8, quirk_no_ata_d3);
 /* ALi loses some register settings that we cannot then restore */
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AL, PCI_ANY_ID, quirk_no_ata_d3);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AL, PCI_ANY_ID,
+                               PCI_CLASS_STORAGE_IDE, 8, quirk_no_ata_d3);
 /* VIA comes back fine but we need to keep it alive or ACPI GTM failures
    occur when mode detecting */
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_VIA, PCI_ANY_ID, quirk_no_ata_d3);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_VIA, PCI_ANY_ID,
+                               PCI_CLASS_STORAGE_IDE, 8, quirk_no_ata_d3);
 
 /* This was originally an Alpha specific thing, but it really fits here.
  * The i82375 PCI/EISA bridge appears as non-classified. Fix that.
@@ -1873,8 +1815,7 @@ static void __devinit quirk_netmos(struct pci_dev *dev)
        case PCI_DEVICE_ID_NETMOS_9745:
        case PCI_DEVICE_ID_NETMOS_9845:
        case PCI_DEVICE_ID_NETMOS_9855:
-               if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_SERIAL &&
-                   num_parallel) {
+               if (num_parallel) {
                        dev_info(&dev->dev, "Netmos %04x (%u parallel, "
                                "%u serial); changing class SERIAL to OTHER "
                                "(use parport_serial)\n",
@@ -1884,7 +1825,8 @@ static void __devinit quirk_netmos(struct pci_dev *dev)
                }
        }
 }
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETMOS, PCI_ANY_ID, quirk_netmos);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_NETMOS, PCI_ANY_ID,
+                        PCI_CLASS_COMMUNICATION_SERIAL, 8, quirk_netmos);
 
 static void __devinit quirk_e100_interrupt(struct pci_dev *dev)
 {
@@ -1952,7 +1894,8 @@ static void __devinit quirk_e100_interrupt(struct pci_dev *dev)
 
        iounmap(csr);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_e100_interrupt);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
+                       PCI_CLASS_NETWORK_ETHERNET, 8, quirk_e100_interrupt);
 
 /*
  * The 82575 and 82598 may experience data corruption issues when transitioning
@@ -2834,12 +2777,11 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors);
 static void __devinit fixup_ti816x_class(struct pci_dev* dev)
 {
        /* TI 816x devices do not have class code set when in PCIe boot mode */
-       if (dev->class == PCI_CLASS_NOT_DEFINED) {
-               dev_info(&dev->dev, "Setting PCI class for 816x PCIe device\n");
-               dev->class = PCI_CLASS_MULTIMEDIA_VIDEO;
-       }
+       dev_info(&dev->dev, "Setting PCI class for 816x PCIe device\n");
+       dev->class = PCI_CLASS_MULTIMEDIA_VIDEO;
 }
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_TI, 0xb800, fixup_ti816x_class);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_TI, 0xb800,
+                                PCI_CLASS_NOT_DEFINED, 0, fixup_ti816x_class);
 
 /* Some PCIe devices do not work reliably with the claimed maximum
  * payload size supported.
@@ -2924,17 +2866,73 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f8, quirk_intel_mc_errata);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f9, quirk_intel_mc_errata);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65fa, quirk_intel_mc_errata);
 
+
+static void do_one_fixup_debug(void (*fn)(struct pci_dev *dev), struct pci_dev *dev)
+{
+       ktime_t calltime, delta, rettime;
+       unsigned long long duration;
+
+       printk(KERN_DEBUG "calling  %pF @ %i for %s\n",
+                       fn, task_pid_nr(current), dev_name(&dev->dev));
+       calltime = ktime_get();
+       fn(dev);
+       rettime = ktime_get();
+       delta = ktime_sub(rettime, calltime);
+       duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+       printk(KERN_DEBUG "pci fixup %pF returned after %lld usecs for %s\n",
+                       fn, duration, dev_name(&dev->dev));
+}
+
+/*
+ * Some BIOS implementations leave the Intel GPU interrupts enabled,
+ * even though no one is handling them (f.e. i915 driver is never loaded).
+ * Additionally the interrupt destination is not set up properly
+ * and the interrupt ends up -somewhere-.
+ *
+ * These spurious interrupts are "sticky" and the kernel disables
+ * the (shared) interrupt line after 100.000+ generated interrupts.
+ *
+ * Fix it by disabling the still enabled interrupts.
+ * This resolves crashes often seen on monitor unplug.
+ */
+#define I915_DEIER_REG 0x4400c
+static void __devinit disable_igfx_irq(struct pci_dev *dev)
+{
+       void __iomem *regs = pci_iomap(dev, 0, 0);
+       if (regs == NULL) {
+               dev_warn(&dev->dev, "igfx quirk: Can't iomap PCI device\n");
+               return;
+       }
+
+       /* Check if any interrupt line is still enabled */
+       if (readl(regs + I915_DEIER_REG) != 0) {
+               dev_warn(&dev->dev, "BIOS left Intel GPU interrupts enabled; "
+                       "disabling\n");
+
+               writel(0, regs + I915_DEIER_REG);
+       }
+
+       pci_iounmap(dev, regs);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq);
+
 static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
                          struct pci_fixup *end)
 {
-       while (f < end) {
-               if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
-                   (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
+       for (; f < end; f++)
+               if ((f->class == (u32) (dev->class >> f->class_shift) ||
+                    f->class == (u32) PCI_ANY_ID) &&
+                   (f->vendor == dev->vendor ||
+                    f->vendor == (u16) PCI_ANY_ID) &&
+                   (f->device == dev->device ||
+                    f->device == (u16) PCI_ANY_ID)) {
                        dev_dbg(&dev->dev, "calling %pF\n", f->hook);
-                       f->hook(dev);
+                       if (initcall_debug)
+                               do_one_fixup_debug(f->hook, dev);
+                       else
+                               f->hook(dev);
                }
-               f++;
-       }
 }
 
 extern struct pci_fixup __start_pci_fixups_early[];
index ef8b18c48f2641993f56aa643ed8a35579856971..fd77e2bde2e8feadc7c5b1cceab94374f00100fe 100644 (file)
@@ -79,7 +79,7 @@ EXPORT_SYMBOL(pci_remove_bus);
 
 static void __pci_remove_behind_bridge(struct pci_dev *dev);
 /**
- * pci_remove_bus_device - remove a PCI device and any children
+ * pci_stop_and_remove_bus_device - remove a PCI device and any children
  * @dev: the device to remove
  *
  * Remove a PCI device from the device lists, informing the drivers
@@ -90,7 +90,7 @@ static void __pci_remove_behind_bridge(struct pci_dev *dev);
  * device lists, remove the /proc entry, and notify userspace
  * (/sbin/hotplug).
  */
-static void __pci_remove_bus_device(struct pci_dev *dev)
+void __pci_remove_bus_device(struct pci_dev *dev)
 {
        if (dev->subordinate) {
                struct pci_bus *b = dev->subordinate;
@@ -102,7 +102,9 @@ static void __pci_remove_bus_device(struct pci_dev *dev)
 
        pci_destroy_dev(dev);
 }
-void pci_remove_bus_device(struct pci_dev *dev)
+EXPORT_SYMBOL(__pci_remove_bus_device);
+
+void pci_stop_and_remove_bus_device(struct pci_dev *dev)
 {
        pci_stop_bus_device(dev);
        __pci_remove_bus_device(dev);
@@ -127,14 +129,15 @@ static void pci_stop_behind_bridge(struct pci_dev *dev)
 }
 
 /**
- * pci_remove_behind_bridge - remove all devices behind a PCI bridge
+ * pci_stop_and_remove_behind_bridge - stop and remove all devices behind
+ *                                      a PCI bridge
  * @dev: PCI bridge device
  *
  * Remove all devices on the bus, except for the parent bridge.
  * This also removes any child buses, and any devices they may
  * contain in a depth-first manner.
  */
-void pci_remove_behind_bridge(struct pci_dev *dev)
+void pci_stop_and_remove_behind_bridge(struct pci_dev *dev)
 {
        pci_stop_behind_bridge(dev);
        __pci_remove_behind_bridge(dev);
@@ -144,7 +147,15 @@ static void pci_stop_bus_devices(struct pci_bus *bus)
 {
        struct list_head *l, *n;
 
-       list_for_each_safe(l, n, &bus->devices) {
+       /*
+        * VFs could be removed by pci_stop_and_remove_bus_device() in the
+        *  pci_stop_bus_devices() code path for PF.
+        *  aka, bus->devices get updated in the process.
+        * but VFs are inserted after PFs when SRIOV is enabled for PF,
+        * We can iterate the list backwards to get prev valid PF instead
+        *  of removed VF.
+        */
+       list_for_each_prev_safe(l, n, &bus->devices) {
                struct pci_dev *dev = pci_dev_b(l);
                pci_stop_bus_device(dev);
        }
@@ -166,6 +177,6 @@ void pci_stop_bus_device(struct pci_dev *dev)
        pci_stop_dev(dev);
 }
 
-EXPORT_SYMBOL(pci_remove_bus_device);
-EXPORT_SYMBOL(pci_remove_behind_bridge);
+EXPORT_SYMBOL(pci_stop_and_remove_bus_device);
+EXPORT_SYMBOL(pci_stop_and_remove_behind_bridge);
 EXPORT_SYMBOL_GPL(pci_stop_bus_device);
index 86b69f85f9005f778634d81ab52f548f091978e4..8fa2d4be88dea9d90e7aa93bab89e60d1e30b7bf 100644 (file)
 #include <linux/ioport.h>
 #include <linux/cache.h>
 #include <linux/slab.h>
+#include <asm-generic/pci-bridge.h>
 #include "pci.h"
 
-struct resource_list_x {
-       struct resource_list_x *next;
+unsigned int pci_flags;
+
+struct pci_dev_resource {
+       struct list_head list;
        struct resource *res;
        struct pci_dev *dev;
        resource_size_t start;
@@ -38,21 +41,14 @@ struct resource_list_x {
        unsigned long flags;
 };
 
-#define free_list(type, head) do {                      \
-       struct type *list, *tmp;                        \
-       for (list = (head)->next; list;) {              \
-               tmp = list;                             \
-               list = list->next;                      \
-               kfree(tmp);                             \
-       }                                               \
-       (head)->next = NULL;                            \
-} while (0)
-
-int pci_realloc_enable = 0;
-#define pci_realloc_enabled() pci_realloc_enable
-void pci_realloc(void)
+static void free_list(struct list_head *head)
 {
-       pci_realloc_enable = 1;
+       struct pci_dev_resource *dev_res, *tmp;
+
+       list_for_each_entry_safe(dev_res, tmp, head, list) {
+               list_del(&dev_res->list);
+               kfree(dev_res);
+       }
 }
 
 /**
@@ -64,21 +60,18 @@ void pci_realloc(void)
  * @add_size:  additional size to be optionally added
  *              to the resource
  */
-static void add_to_list(struct resource_list_x *head,
+static int add_to_list(struct list_head *head,
                 struct pci_dev *dev, struct resource *res,
                 resource_size_t add_size, resource_size_t min_align)
 {
-       struct resource_list_x *list = head;
-       struct resource_list_x *ln = list->next;
-       struct resource_list_x *tmp;
+       struct pci_dev_resource *tmp;
 
-       tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+       tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
        if (!tmp) {
                pr_warning("add_to_list: kmalloc() failed!\n");
-               return;
+               return -ENOMEM;
        }
 
-       tmp->next = ln;
        tmp->res = res;
        tmp->dev = dev;
        tmp->start = res->start;
@@ -86,19 +79,100 @@ static void add_to_list(struct resource_list_x *head,
        tmp->flags = res->flags;
        tmp->add_size = add_size;
        tmp->min_align = min_align;
-       list->next = tmp;
+
+       list_add(&tmp->list, head);
+
+       return 0;
 }
 
-static void add_to_failed_list(struct resource_list_x *head,
-                               struct pci_dev *dev, struct resource *res)
+static void remove_from_list(struct list_head *head,
+                                struct resource *res)
 {
-       add_to_list(head, dev, res,
-                       0 /* dont care */,
-                       0 /* dont care */);
+       struct pci_dev_resource *dev_res, *tmp;
+
+       list_for_each_entry_safe(dev_res, tmp, head, list) {
+               if (dev_res->res == res) {
+                       list_del(&dev_res->list);
+                       kfree(dev_res);
+                       break;
+               }
+       }
+}
+
+static resource_size_t get_res_add_size(struct list_head *head,
+                                       struct resource *res)
+{
+       struct pci_dev_resource *dev_res;
+
+       list_for_each_entry(dev_res, head, list) {
+               if (dev_res->res == res) {
+                       int idx = res - &dev_res->dev->resource[0];
+
+                       dev_printk(KERN_DEBUG, &dev_res->dev->dev,
+                                "res[%d]=%pR get_res_add_size add_size %llx\n",
+                                idx, dev_res->res,
+                                (unsigned long long)dev_res->add_size);
+
+                       return dev_res->add_size;
+               }
+       }
+
+       return 0;
+}
+
+/* Sort resources by alignment */
+static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head)
+{
+       int i;
+
+       for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+               struct resource *r;
+               struct pci_dev_resource *dev_res, *tmp;
+               resource_size_t r_align;
+               struct list_head *n;
+
+               r = &dev->resource[i];
+
+               if (r->flags & IORESOURCE_PCI_FIXED)
+                       continue;
+
+               if (!(r->flags) || r->parent)
+                       continue;
+
+               r_align = pci_resource_alignment(dev, r);
+               if (!r_align) {
+                       dev_warn(&dev->dev, "BAR %d: %pR has bogus alignment\n",
+                                i, r);
+                       continue;
+               }
+
+               tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+               if (!tmp)
+                       panic("pdev_sort_resources(): "
+                             "kmalloc() failed!\n");
+               tmp->res = r;
+               tmp->dev = dev;
+
+               /* fallback is smallest one or list is empty*/
+               n = head;
+               list_for_each_entry(dev_res, head, list) {
+                       resource_size_t align;
+
+                       align = pci_resource_alignment(dev_res->dev,
+                                                        dev_res->res);
+
+                       if (r_align > align) {
+                               n = &dev_res->list;
+                               break;
+                       }
+               }
+               /* Insert it just before n*/
+               list_add_tail(&tmp->list, n);
+       }
 }
 
 static void __dev_sort_resources(struct pci_dev *dev,
-                                struct resource_list *head)
+                                struct list_head *head)
 {
        u16 class = dev->class >> 8;
 
@@ -136,49 +210,54 @@ static inline void reset_resource(struct resource *res)
  * additional resources for the element, provided the element
  * is in the head list.
  */
-static void reassign_resources_sorted(struct resource_list_x *realloc_head,
-               struct resource_list *head)
+static void reassign_resources_sorted(struct list_head *realloc_head,
+               struct list_head *head)
 {
        struct resource *res;
-       struct resource_list_x *list, *tmp, *prev;
-       struct resource_list *hlist;
+       struct pci_dev_resource *add_res, *tmp;
+       struct pci_dev_resource *dev_res;
        resource_size_t add_size;
        int idx;
 
-       prev = realloc_head;
-       for (list = realloc_head->next; list;) {
-               res = list->res;
+       list_for_each_entry_safe(add_res, tmp, realloc_head, list) {
+               bool found_match = false;
+
+               res = add_res->res;
                /* skip resource that has been reset */
                if (!res->flags)
                        goto out;
 
                /* skip this resource if not found in head list */
-               for (hlist = head->next; hlist && hlist->res != res;
-                               hlist = hlist->next);
-               if (!hlist) { /* just skip */
-                       prev = list;
-                       list = list->next;
-                       continue;
+               list_for_each_entry(dev_res, head, list) {
+                       if (dev_res->res == res) {
+                               found_match = true;
+                               break;
+                       }
                }
+               if (!found_match)/* just skip */
+                       continue;
 
-               idx = res - &list->dev->resource[0];
-               add_size=list->add_size;
+               idx = res - &add_res->dev->resource[0];
+               add_size = add_res->add_size;
                if (!resource_size(res)) {
-                       res->start = list->start;
+                       res->start = add_res->start;
                        res->end = res->start + add_size - 1;
-                       if(pci_assign_resource(list->dev, idx))
+                       if (pci_assign_resource(add_res->dev, idx))
                                reset_resource(res);
                } else {
-                       resource_size_t align = list->min_align;
-                       res->flags |= list->flags & (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN);
-                       if (pci_reassign_resource(list->dev, idx, add_size, align))
-                               dev_printk(KERN_DEBUG, &list->dev->dev, "failed to add optional resources res=%pR\n",
-                                                       res);
+                       resource_size_t align = add_res->min_align;
+                       res->flags |= add_res->flags &
+                                (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN);
+                       if (pci_reassign_resource(add_res->dev, idx,
+                                                 add_size, align))
+                               dev_printk(KERN_DEBUG, &add_res->dev->dev,
+                                          "failed to add %llx res[%d]=%pR\n",
+                                          (unsigned long long)add_size,
+                                          idx, res);
                }
 out:
-               tmp = list;
-               prev->next = list = list->next;
-               kfree(tmp);
+               list_del(&add_res->list);
+               kfree(add_res);
        }
 }
 
@@ -192,35 +271,99 @@ out:
  * Satisfy resource requests of each element in the list. Add
  * requests that could not satisfied to the failed_list.
  */
-static void assign_requested_resources_sorted(struct resource_list *head,
-                                struct resource_list_x *fail_head)
+static void assign_requested_resources_sorted(struct list_head *head,
+                                struct list_head *fail_head)
 {
        struct resource *res;
-       struct resource_list *list;
+       struct pci_dev_resource *dev_res;
        int idx;
 
-       for (list = head->next; list; list = list->next) {
-               res = list->res;
-               idx = res - &list->dev->resource[0];
-               if (resource_size(res) && pci_assign_resource(list->dev, idx)) {
-                       if (fail_head && !pci_is_root_bus(list->dev->bus)) {
+       list_for_each_entry(dev_res, head, list) {
+               res = dev_res->res;
+               idx = res - &dev_res->dev->resource[0];
+               if (resource_size(res) &&
+                   pci_assign_resource(dev_res->dev, idx)) {
+                       if (fail_head && !pci_is_root_bus(dev_res->dev->bus)) {
                                /*
                                 * if the failed res is for ROM BAR, and it will
                                 * be enabled later, don't add it to the list
                                 */
                                if (!((idx == PCI_ROM_RESOURCE) &&
                                      (!(res->flags & IORESOURCE_ROM_ENABLE))))
-                                       add_to_failed_list(fail_head, list->dev, res);
+                                       add_to_list(fail_head,
+                                                   dev_res->dev, res,
+                                                   0 /* dont care */,
+                                                   0 /* dont care */);
                        }
                        reset_resource(res);
                }
        }
 }
 
-static void __assign_resources_sorted(struct resource_list *head,
-                                struct resource_list_x *realloc_head,
-                                struct resource_list_x *fail_head)
+static void __assign_resources_sorted(struct list_head *head,
+                                struct list_head *realloc_head,
+                                struct list_head *fail_head)
 {
+       /*
+        * Should not assign requested resources at first.
+        *   they could be adjacent, so later reassign can not reallocate
+        *   them one by one in parent resource window.
+        * Try to assign requested + add_size at begining
+        *  if could do that, could get out early.
+        *  if could not do that, we still try to assign requested at first,
+        *    then try to reassign add_size for some resources.
+        */
+       LIST_HEAD(save_head);
+       LIST_HEAD(local_fail_head);
+       struct pci_dev_resource *save_res;
+       struct pci_dev_resource *dev_res;
+
+       /* Check if optional add_size is there */
+       if (!realloc_head || list_empty(realloc_head))
+               goto requested_and_reassign;
+
+       /* Save original start, end, flags etc at first */
+       list_for_each_entry(dev_res, head, list) {
+               if (add_to_list(&save_head, dev_res->dev, dev_res->res, 0, 0)) {
+                       free_list(&save_head);
+                       goto requested_and_reassign;
+               }
+       }
+
+       /* Update res in head list with add_size in realloc_head list */
+       list_for_each_entry(dev_res, head, list)
+               dev_res->res->end += get_res_add_size(realloc_head,
+                                                       dev_res->res);
+
+       /* Try updated head list with add_size added */
+       assign_requested_resources_sorted(head, &local_fail_head);
+
+       /* all assigned with add_size ? */
+       if (list_empty(&local_fail_head)) {
+               /* Remove head list from realloc_head list */
+               list_for_each_entry(dev_res, head, list)
+                       remove_from_list(realloc_head, dev_res->res);
+               free_list(&save_head);
+               free_list(head);
+               return;
+       }
+
+       free_list(&local_fail_head);
+       /* Release assigned resource */
+       list_for_each_entry(dev_res, head, list)
+               if (dev_res->res->parent)
+                       release_resource(dev_res->res);
+       /* Restore start/end/flags from saved list */
+       list_for_each_entry(save_res, &save_head, list) {
+               struct resource *res = save_res->res;
+
+               res->start = save_res->start;
+               res->end = save_res->end;
+               res->flags = save_res->flags;
+       }
+       free_list(&save_head);
+
+requested_and_reassign:
        /* Satisfy the must-have resource requests */
        assign_requested_resources_sorted(head, fail_head);
 
@@ -228,28 +371,27 @@ static void __assign_resources_sorted(struct resource_list *head,
                requests */
        if (realloc_head)
                reassign_resources_sorted(realloc_head, head);
-       free_list(resource_list, head);
+       free_list(head);
 }
 
 static void pdev_assign_resources_sorted(struct pci_dev *dev,
-                                struct resource_list_x *fail_head)
+                                struct list_head *add_head,
+                                struct list_head *fail_head)
 {
-       struct resource_list head;
+       LIST_HEAD(head);
 
-       head.next = NULL;
        __dev_sort_resources(dev, &head);
-       __assign_resources_sorted(&head, NULL, fail_head);
+       __assign_resources_sorted(&head, add_head, fail_head);
 
 }
 
 static void pbus_assign_resources_sorted(const struct pci_bus *bus,
-                                        struct resource_list_x *realloc_head,
-                                        struct resource_list_x *fail_head)
+                                        struct list_head *realloc_head,
+                                        struct list_head *fail_head)
 {
        struct pci_dev *dev;
-       struct resource_list head;
+       LIST_HEAD(head);
 
-       head.next = NULL;
        list_for_each_entry(dev, &bus->devices, bus_list)
                __dev_sort_resources(dev, &head);
 
@@ -548,20 +690,6 @@ static resource_size_t calculate_memsize(resource_size_t size,
        return size;
 }
 
-static resource_size_t get_res_add_size(struct resource_list_x *realloc_head,
-                                       struct resource *res)
-{
-       struct resource_list_x *list;
-
-       /* check if it is in realloc_head list */
-       for (list = realloc_head->next; list && list->res != res;
-                       list = list->next);
-       if (list)
-               return list->add_size;
-
-       return 0;
-}
-
 /**
  * pbus_size_io() - size the io window of a given bus
  *
@@ -576,7 +704,7 @@ static resource_size_t get_res_add_size(struct resource_list_x *realloc_head,
  * We must be careful with the ISA aliasing though.
  */
 static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
-               resource_size_t add_size, struct resource_list_x *realloc_head)
+               resource_size_t add_size, struct list_head *realloc_head)
 {
        struct pci_dev *dev;
        struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
@@ -612,7 +740,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
        if (children_add_size > add_size)
                add_size = children_add_size;
        size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 :
-               calculate_iosize(size, min_size+add_size, size1,
+               calculate_iosize(size, min_size, add_size + size1,
                        resource_size(b_res), 4096);
        if (!size0 && !size1) {
                if (b_res->start || b_res->end)
@@ -626,8 +754,12 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
        b_res->start = 4096;
        b_res->end = b_res->start + size0 - 1;
        b_res->flags |= IORESOURCE_STARTALIGN;
-       if (size1 > size0 && realloc_head)
+       if (size1 > size0 && realloc_head) {
                add_to_list(realloc_head, bus->self, b_res, size1-size0, 4096);
+               dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window "
+                                "%pR to [bus %02x-%02x] add_size %lx\n", b_res,
+                                bus->secondary, bus->subordinate, size1-size0);
+       }
 }
 
 /**
@@ -644,7 +776,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
 static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
                         unsigned long type, resource_size_t min_size,
                        resource_size_t add_size,
-                       struct resource_list_x *realloc_head)
+                       struct list_head *realloc_head)
 {
        struct pci_dev *dev;
        resource_size_t min_align, align, size, size0, size1;
@@ -726,7 +858,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
        if (children_add_size > add_size)
                add_size = children_add_size;
        size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 :
-               calculate_memsize(size, min_size+add_size, 0,
+               calculate_memsize(size, min_size, add_size,
                                resource_size(b_res), min_align);
        if (!size0 && !size1) {
                if (b_res->start || b_res->end)
@@ -739,8 +871,12 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
        b_res->start = min_align;
        b_res->end = size0 + min_align - 1;
        b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask;
-       if (size1 > size0 && realloc_head)
+       if (size1 > size0 && realloc_head) {
                add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align);
+               dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window "
+                                "%pR to [bus %02x-%02x] add_size %llx\n", b_res,
+                                bus->secondary, bus->subordinate, (unsigned long long)size1-size0);
+       }
        return 1;
 }
 
@@ -754,25 +890,48 @@ unsigned long pci_cardbus_resource_alignment(struct resource *res)
 }
 
 static void pci_bus_size_cardbus(struct pci_bus *bus,
-                       struct resource_list_x *realloc_head)
+                       struct list_head *realloc_head)
 {
        struct pci_dev *bridge = bus->self;
        struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES];
+       resource_size_t b_res_3_size = pci_cardbus_mem_size * 2;
        u16 ctrl;
 
+       if (b_res[0].parent)
+               goto handle_b_res_1;
        /*
         * Reserve some resources for CardBus.  We reserve
         * a fixed amount of bus space for CardBus bridges.
         */
-       b_res[0].start = 0;
-       b_res[0].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN;
-       if (realloc_head)
-               add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size, 0 /* dont care */);
+       b_res[0].start = pci_cardbus_io_size;
+       b_res[0].end = b_res[0].start + pci_cardbus_io_size - 1;
+       b_res[0].flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN;
+       if (realloc_head) {
+               b_res[0].end -= pci_cardbus_io_size;
+               add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size,
+                               pci_cardbus_io_size);
+       }
 
-       b_res[1].start = 0;
-       b_res[1].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN;
-       if (realloc_head)
-               add_to_list(realloc_head, bridge, b_res+1, pci_cardbus_io_size, 0 /* dont care */);
+handle_b_res_1:
+       if (b_res[1].parent)
+               goto handle_b_res_2;
+       b_res[1].start = pci_cardbus_io_size;
+       b_res[1].end = b_res[1].start + pci_cardbus_io_size - 1;
+       b_res[1].flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN;
+       if (realloc_head) {
+               b_res[1].end -= pci_cardbus_io_size;
+               add_to_list(realloc_head, bridge, b_res+1, pci_cardbus_io_size,
+                                pci_cardbus_io_size);
+       }
+
+handle_b_res_2:
+       /* MEM1 must not be pref mmio */
+       pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl);
+       if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM1) {
+               ctrl &= ~PCI_CB_BRIDGE_CTL_PREFETCH_MEM1;
+               pci_write_config_word(bridge, PCI_CB_BRIDGE_CONTROL, ctrl);
+               pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl);
+       }
 
        /*
         * Check whether prefetchable memory is supported
@@ -785,38 +944,46 @@ static void pci_bus_size_cardbus(struct pci_bus *bus,
                pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl);
        }
 
+       if (b_res[2].parent)
+               goto handle_b_res_3;
        /*
         * If we have prefetchable memory support, allocate
         * two regions.  Otherwise, allocate one region of
         * twice the size.
         */
        if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) {
-               b_res[2].start = 0;
-               b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_SIZEALIGN;
-               if (realloc_head)
-                       add_to_list(realloc_head, bridge, b_res+2, pci_cardbus_mem_size, 0 /* dont care */);
-
-               b_res[3].start = 0;
-               b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN;
-               if (realloc_head)
-                       add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size, 0 /* dont care */);
-       } else {
-               b_res[3].start = 0;
-               b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN;
-               if (realloc_head)
-                       add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size * 2, 0 /* dont care */);
+               b_res[2].start = pci_cardbus_mem_size;
+               b_res[2].end = b_res[2].start + pci_cardbus_mem_size - 1;
+               b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH |
+                                 IORESOURCE_STARTALIGN;
+               if (realloc_head) {
+                       b_res[2].end -= pci_cardbus_mem_size;
+                       add_to_list(realloc_head, bridge, b_res+2,
+                                pci_cardbus_mem_size, pci_cardbus_mem_size);
+               }
+
+               /* reduce that to half */
+               b_res_3_size = pci_cardbus_mem_size;
+       }
+
+handle_b_res_3:
+       if (b_res[3].parent)
+               goto handle_done;
+       b_res[3].start = pci_cardbus_mem_size;
+       b_res[3].end = b_res[3].start + b_res_3_size - 1;
+       b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_STARTALIGN;
+       if (realloc_head) {
+               b_res[3].end -= b_res_3_size;
+               add_to_list(realloc_head, bridge, b_res+3, b_res_3_size,
+                                pci_cardbus_mem_size);
        }
 
-       /* set the size of the resource to zero, so that the resource does not
-        * get assigned during required-resource allocation cycle but gets assigned
-        * during the optional-resource allocation cycle.
-        */
-       b_res[0].start = b_res[1].start = b_res[2].start = b_res[3].start = 1;
-       b_res[0].end = b_res[1].end = b_res[2].end = b_res[3].end = 0;
+handle_done:
+       ;
 }
 
 void __ref __pci_bus_size_bridges(struct pci_bus *bus,
-                       struct resource_list_x *realloc_head)
+                       struct list_head *realloc_head)
 {
        struct pci_dev *dev;
        unsigned long mask, prefmask;
@@ -858,7 +1025,8 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
                 * Follow thru
                 */
        default:
-               pbus_size_io(bus, 0, additional_io_size, realloc_head);
+               pbus_size_io(bus, realloc_head ? 0 : additional_io_size,
+                            additional_io_size, realloc_head);
                /* If the bridge supports prefetchable range, size it
                   separately. If it doesn't, or its prefetchable window
                   has already been allocated by arch code, try
@@ -866,11 +1034,15 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
                   resources. */
                mask = IORESOURCE_MEM;
                prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
-               if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, realloc_head))
+               if (pbus_size_mem(bus, prefmask, prefmask,
+                                 realloc_head ? 0 : additional_mem_size,
+                                 additional_mem_size, realloc_head))
                        mask = prefmask; /* Success, size non-prefetch only. */
                else
                        additional_mem_size += additional_mem_size;
-               pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, realloc_head);
+               pbus_size_mem(bus, mask, IORESOURCE_MEM,
+                               realloc_head ? 0 : additional_mem_size,
+                               additional_mem_size, realloc_head);
                break;
        }
 }
@@ -882,8 +1054,8 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
 EXPORT_SYMBOL(pci_bus_size_bridges);
 
 static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
-                                        struct resource_list_x *realloc_head,
-                                        struct resource_list_x *fail_head)
+                                        struct list_head *realloc_head,
+                                        struct list_head *fail_head)
 {
        struct pci_bus *b;
        struct pci_dev *dev;
@@ -922,17 +1094,19 @@ void __ref pci_bus_assign_resources(const struct pci_bus *bus)
 EXPORT_SYMBOL(pci_bus_assign_resources);
 
 static void __ref __pci_bridge_assign_resources(const struct pci_dev *bridge,
-                                        struct resource_list_x *fail_head)
+                                        struct list_head *add_head,
+                                        struct list_head *fail_head)
 {
        struct pci_bus *b;
 
-       pdev_assign_resources_sorted((struct pci_dev *)bridge, fail_head);
+       pdev_assign_resources_sorted((struct pci_dev *)bridge,
+                                        add_head, fail_head);
 
        b = bridge->subordinate;
        if (!b)
                return;
 
-       __pci_bus_assign_resources(b, NULL, fail_head);
+       __pci_bus_assign_resources(b, add_head, fail_head);
 
        switch (bridge->class >> 8) {
        case PCI_CLASS_BRIDGE_PCI:
@@ -1095,6 +1269,58 @@ static int __init pci_get_max_depth(void)
        return depth;
 }
 
+/*
+ * -1: undefined, will auto detect later
+ *  0: disabled by user
+ *  1: disabled by auto detect
+ *  2: enabled by user
+ *  3: enabled by auto detect
+ */
+enum enable_type {
+       undefined = -1,
+       user_disabled,
+       auto_disabled,
+       user_enabled,
+       auto_enabled,
+};
+
+static enum enable_type pci_realloc_enable __initdata = undefined;
+void __init pci_realloc_get_opt(char *str)
+{
+       if (!strncmp(str, "off", 3))
+               pci_realloc_enable = user_disabled;
+       else if (!strncmp(str, "on", 2))
+               pci_realloc_enable = user_enabled;
+}
+static bool __init pci_realloc_enabled(void)
+{
+       return pci_realloc_enable >= user_enabled;
+}
+
+static void __init pci_realloc_detect(void)
+{
+#if defined(CONFIG_PCI_IOV) && defined(CONFIG_PCI_REALLOC_ENABLE_AUTO)
+       struct pci_dev *dev = NULL;
+
+       if (pci_realloc_enable != undefined)
+               return;
+
+       for_each_pci_dev(dev) {
+               int i;
+
+               for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) {
+                       struct resource *r = &dev->resource[i];
+
+                       /* Not assigned, or rejected by kernel ? */
+                       if (r->flags && !r->start) {
+                               pci_realloc_enable = auto_enabled;
+
+                               return;
+                       }
+               }
+       }
+#endif
+}
 
 /*
  * first try will not touch pci bridge res
@@ -1105,59 +1331,57 @@ void __init
 pci_assign_unassigned_resources(void)
 {
        struct pci_bus *bus;
-       struct resource_list_x realloc_list; /* list of resources that
+       LIST_HEAD(realloc_head); /* list of resources that
                                        want additional resources */
+       struct list_head *add_list = NULL;
        int tried_times = 0;
        enum release_type rel_type = leaf_only;
-       struct resource_list_x head, *list;
+       LIST_HEAD(fail_head);
+       struct pci_dev_resource *fail_res;
        unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
                                  IORESOURCE_PREFETCH;
-       unsigned long failed_type;
-       int max_depth = pci_get_max_depth();
-       int pci_try_num;
-
+       int pci_try_num = 1;
 
-       head.next = NULL;
-       realloc_list.next = NULL;
+       /* don't realloc if asked to do so */
+       pci_realloc_detect();
+       if (pci_realloc_enabled()) {
+               int max_depth = pci_get_max_depth();
 
-       pci_try_num = max_depth + 1;
-       printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n",
-                max_depth, pci_try_num);
+               pci_try_num = max_depth + 1;
+               printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n",
+                        max_depth, pci_try_num);
+       }
 
 again:
+       /*
+        * last try will use add_list, otherwise will try good to have as
+        * must have, so can realloc parent bridge resource
+        */
+       if (tried_times + 1 == pci_try_num)
+               add_list = &realloc_head;
        /* Depth first, calculate sizes and alignments of all
           subordinate buses. */
        list_for_each_entry(bus, &pci_root_buses, node)
-               __pci_bus_size_bridges(bus, &realloc_list);
+               __pci_bus_size_bridges(bus, add_list);
 
        /* Depth last, allocate resources and update the hardware. */
        list_for_each_entry(bus, &pci_root_buses, node)
-               __pci_bus_assign_resources(bus, &realloc_list, &head);
-       BUG_ON(realloc_list.next);
+               __pci_bus_assign_resources(bus, add_list, &fail_head);
+       if (add_list)
+               BUG_ON(!list_empty(add_list));
        tried_times++;
 
        /* any device complain? */
-       if (!head.next)
+       if (list_empty(&fail_head))
                goto enable_and_dump;
 
-       /* don't realloc if asked to do so */
-       if (!pci_realloc_enabled()) {
-               free_list(resource_list_x, &head);
-               goto enable_and_dump;
-       }
+       if (tried_times >= pci_try_num) {
+               if (pci_realloc_enable == undefined)
+                       printk(KERN_INFO "Some PCI device resources are unassigned, try booting with pci=realloc\n");
+               else if (pci_realloc_enable == auto_enabled)
+                       printk(KERN_INFO "Automatically enabled pci realloc, if you have problem, try booting with pci=realloc=off\n");
 
-       failed_type = 0;
-       for (list = head.next; list;) {
-               failed_type |= list->flags;
-               list = list->next;
-       }
-       /*
-        * io port are tight, don't try extra
-        * or if reach the limit, don't want to try more
-        */
-       failed_type &= type_mask;
-       if ((failed_type == IORESOURCE_IO) || (tried_times >= pci_try_num)) {
-               free_list(resource_list_x, &head);
+               free_list(&fail_head);
                goto enable_and_dump;
        }
 
@@ -1172,25 +1396,23 @@ again:
         * Try to release leaf bridge's resources that doesn't fit resource of
         * child device under that bridge
         */
-       for (list = head.next; list;) {
-               bus = list->dev->bus;
-               pci_bus_release_bridge_resources(bus, list->flags & type_mask,
-                                                 rel_type);
-               list = list->next;
+       list_for_each_entry(fail_res, &fail_head, list) {
+               bus = fail_res->dev->bus;
+               pci_bus_release_bridge_resources(bus,
+                                                fail_res->flags & type_mask,
+                                                rel_type);
        }
        /* restore size and flags */
-       for (list = head.next; list;) {
-               struct resource *res = list->res;
+       list_for_each_entry(fail_res, &fail_head, list) {
+               struct resource *res = fail_res->res;
 
-               res->start = list->start;
-               res->end = list->end;
-               res->flags = list->flags;
-               if (list->dev->subordinate)
+               res->start = fail_res->start;
+               res->end = fail_res->end;
+               res->flags = fail_res->flags;
+               if (fail_res->dev->subordinate)
                        res->flags = 0;
-
-               list = list->next;
        }
-       free_list(resource_list_x, &head);
+       free_list(&fail_head);
 
        goto again;
 
@@ -1207,26 +1429,27 @@ enable_and_dump:
 void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
 {
        struct pci_bus *parent = bridge->subordinate;
+       LIST_HEAD(add_list); /* list of resources that
+                                       want additional resources */
        int tried_times = 0;
-       struct resource_list_x head, *list;
+       LIST_HEAD(fail_head);
+       struct pci_dev_resource *fail_res;
        int retval;
        unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
                                  IORESOURCE_PREFETCH;
 
-       head.next = NULL;
-
 again:
-       pci_bus_size_bridges(parent);
-       __pci_bridge_assign_resources(bridge, &head);
-
+       __pci_bus_size_bridges(parent, &add_list);
+       __pci_bridge_assign_resources(bridge, &add_list, &fail_head);
+       BUG_ON(!list_empty(&add_list));
        tried_times++;
 
-       if (!head.next)
+       if (list_empty(&fail_head))
                goto enable_all;
 
        if (tried_times >= 2) {
                /* still fail, don't need to try more */
-               free_list(resource_list_x, &head);
+               free_list(&fail_head);
                goto enable_all;
        }
 
@@ -1237,27 +1460,24 @@ again:
         * Try to release leaf bridge's resources that doesn't fit resource of
         * child device under that bridge
         */
-       for (list = head.next; list;) {
-               struct pci_bus *bus = list->dev->bus;
-               unsigned long flags = list->flags;
+       list_for_each_entry(fail_res, &fail_head, list) {
+               struct pci_bus *bus = fail_res->dev->bus;
+               unsigned long flags = fail_res->flags;
 
                pci_bus_release_bridge_resources(bus, flags & type_mask,
                                                 whole_subtree);
-               list = list->next;
        }
        /* restore size and flags */
-       for (list = head.next; list;) {
-               struct resource *res = list->res;
+       list_for_each_entry(fail_res, &fail_head, list) {
+               struct resource *res = fail_res->res;
 
-               res->start = list->start;
-               res->end = list->end;
-               res->flags = list->flags;
-               if (list->dev->subordinate)
+               res->start = fail_res->start;
+               res->end = fail_res->end;
+               res->flags = fail_res->flags;
+               if (fail_res->dev->subordinate)
                        res->flags = 0;
-
-               list = list->next;
        }
-       free_list(resource_list_x, &head);
+       free_list(&fail_head);
 
        goto again;
 
@@ -1267,3 +1487,41 @@ enable_all:
        pci_enable_bridges(parent);
 }
 EXPORT_SYMBOL_GPL(pci_assign_unassigned_bridge_resources);
+
+#ifdef CONFIG_HOTPLUG
+/**
+ * pci_rescan_bus - scan a PCI bus for devices.
+ * @bus: PCI bus to scan
+ *
+ * Scan a PCI bus and child buses for new devices, adds them,
+ * and enables them.
+ *
+ * Returns the max number of subordinate bus discovered.
+ */
+unsigned int __ref pci_rescan_bus(struct pci_bus *bus)
+{
+       unsigned int max;
+       struct pci_dev *dev;
+       LIST_HEAD(add_list); /* list of resources that
+                                       want additional resources */
+
+       max = pci_scan_child_bus(bus);
+
+       down_read(&pci_bus_sem);
+       list_for_each_entry(dev, &bus->devices, bus_list)
+               if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+                   dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+                       if (dev->subordinate)
+                               __pci_bus_size_bridges(dev->subordinate,
+                                                        &add_list);
+       up_read(&pci_bus_sem);
+       __pci_bus_assign_resources(bus, &add_list, NULL);
+       BUG_ON(!list_empty(&add_list));
+
+       pci_enable_bridges(bus);
+       pci_bus_add_devices(bus);
+
+       return max;
+}
+EXPORT_SYMBOL_GPL(pci_rescan_bus);
+#endif
index b66bfdbd21f7aafa55f3ea9e6b4782bf29dc2f01..eea85dafc7632ce149d432355301ed3157eee195 100644 (file)
@@ -114,7 +114,6 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
 }
 EXPORT_SYMBOL(pci_claim_resource);
 
-#ifdef CONFIG_PCI_QUIRKS
 void pci_disable_bridge_window(struct pci_dev *dev)
 {
        dev_info(&dev->dev, "disabling bridge mem windows\n");
@@ -127,9 +126,6 @@ void pci_disable_bridge_window(struct pci_dev *dev)
        pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0);
        pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff);
 }
-#endif /* CONFIG_PCI_QUIRKS */
-
-
 
 static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
                int resno, resource_size_t size, resource_size_t align)
@@ -158,22 +154,44 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
        return ret;
 }
 
+/*
+ * Generic function that returns a value indicating that the device's
+ * original BIOS BAR address was not saved and so is not available for
+ * reinstatement.
+ *
+ * Can be over-ridden by architecture specific code that implements
+ * reinstatement functionality rather than leaving it disabled when
+ * normal allocation attempts fail.
+ */
+resource_size_t __weak pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx)
+{
+       return 0;
+}
+
 static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, 
                int resno, resource_size_t size)
 {
        struct resource *root, *conflict;
-       resource_size_t start, end;
+       resource_size_t fw_addr, start, end;
        int ret = 0;
 
-       if (res->flags & IORESOURCE_IO)
-               root = &ioport_resource;
-       else
-               root = &iomem_resource;
+       fw_addr = pcibios_retrieve_fw_addr(dev, resno);
+       if (!fw_addr)
+               return 1;
 
        start = res->start;
        end = res->end;
-       res->start = dev->fw_addr[resno];
+       res->start = fw_addr;
        res->end = res->start + size - 1;
+
+       root = pci_find_parent_resource(dev, res);
+       if (!root) {
+               if (res->flags & IORESOURCE_IO)
+                       root = &ioport_resource;
+               else
+                       root = &iomem_resource;
+       }
+
        dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n",
                 resno, res);
        conflict = request_resource_conflict(root, res);
@@ -228,16 +246,17 @@ int pci_reassign_resource(struct pci_dev *dev, int resno, resource_size_t addsiz
        int ret;
 
        if (!res->parent) {
-               dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resouce %pR "
+               dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resource %pR "
                         "\n", resno, res);
                return -EINVAL;
        }
 
-       new_size = resource_size(res) + addsize + min_align;
+       /* already aligned with min_align */
+       new_size = resource_size(res) + addsize;
        ret = _pci_assign_resource(dev, resno, new_size, min_align);
        if (!ret) {
                res->flags &= ~IORESOURCE_STARTALIGN;
-               dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res);
+               dev_info(&dev->dev, "BAR %d: reassigned %pR\n", resno, res);
                if (resno < PCI_BRIDGE_RESOURCES)
                        pci_update_resource(dev, resno);
        }
@@ -267,7 +286,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
         * where firmware left it.  That at least has a chance of
         * working, which is better than just leaving it disabled.
         */
-       if (ret < 0 && dev->fw_addr[resno])
+       if (ret < 0)
                ret = pci_revert_fw_address(res, dev, resno, size);
 
        if (!ret) {
@@ -279,53 +298,6 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
        return ret;
 }
 
-
-/* Sort resources by alignment */
-void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
-{
-       int i;
-
-       for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-               struct resource *r;
-               struct resource_list *list, *tmp;
-               resource_size_t r_align;
-
-               r = &dev->resource[i];
-
-               if (r->flags & IORESOURCE_PCI_FIXED)
-                       continue;
-
-               if (!(r->flags) || r->parent)
-                       continue;
-
-               r_align = pci_resource_alignment(dev, r);
-               if (!r_align) {
-                       dev_warn(&dev->dev, "BAR %d: %pR has bogus alignment\n",
-                                i, r);
-                       continue;
-               }
-               for (list = head; ; list = list->next) {
-                       resource_size_t align = 0;
-                       struct resource_list *ln = list->next;
-
-                       if (ln)
-                               align = pci_resource_alignment(ln->dev, ln->res);
-
-                       if (r_align > align) {
-                               tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
-                               if (!tmp)
-                                       panic("pdev_sort_resources(): "
-                                             "kmalloc() failed!\n");
-                               tmp->next = ln;
-                               tmp->res = r;
-                               tmp->dev = dev;
-                               list->next = tmp;
-                               break;
-                       }
-               }
-       }
-}
-
 int pci_enable_resources(struct pci_dev *dev, int mask)
 {
        u16 cmd, old_cmd;
index 401090110922b5495040ddbf083d3051f4bc49c5..fd00ff02ab4d0c51cf7b6eabe6b2a5ce9880fb12 100644 (file)
@@ -544,7 +544,7 @@ static void free_root_bus_devs(struct pci_bus *bus)
                dev = container_of(bus->devices.next, struct pci_dev,
                                   bus_list);
                dev_dbg(&dev->dev, "removing device\n");
-               pci_remove_bus_device(dev);
+               pci_stop_and_remove_bus_device(dev);
        }
 }
 
@@ -1044,7 +1044,7 @@ static int pcifront_detach_devices(struct pcifront_device *pdev)
                                domain, bus, slot, func);
                        continue;
                }
-               pci_remove_bus_device(pci_dev);
+               pci_stop_and_remove_bus_device(pci_dev);
                pci_dev_put(pci_dev);
 
                dev_dbg(&pdev->xdev->dev,
index 9a58862f1401a298f51010d1ca157f3453ed9a5d..6e75153c5b4f165371e84dac0dcd035f06fc1ea0 100644 (file)
@@ -108,5 +108,5 @@ void cb_free(struct pcmcia_socket *s)
        struct pci_dev *bridge = s->cb_dev;
 
        if (bridge)
-               pci_remove_behind_bridge(bridge);
+               pci_stop_and_remove_behind_bridge(bridge);
 }
index 72d731c21d45c19ddb1502899214f1408cdb7184..9929246895deb386a75dd08b00722f0424257595 100644 (file)
@@ -571,7 +571,7 @@ static void asus_rfkill_hotplug(struct asus_wmi *asus)
                } else {
                        dev = pci_get_slot(bus, 0);
                        if (dev) {
-                               pci_remove_bus_device(dev);
+                               pci_stop_and_remove_bus_device(dev);
                                pci_dev_put(dev);
                        }
                }
index ea44abd8df48d7db82c3b8e8a7d7c671cd00b028..d9a9e2bedb30311f9423adc276c29d19c47503f5 100644 (file)
@@ -646,7 +646,7 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc, acpi_handle handle)
                } else {
                        dev = pci_get_slot(bus, 0);
                        if (dev) {
-                               pci_remove_bus_device(dev);
+                               pci_stop_and_remove_bus_device(dev);
                                pci_dev_put(dev);
                        }
                }
index c12702bb16d6b22b352a7cf4acc33ce566316923..dad9924abbbb61d32cd0efc80f7fe33f24f1a285 100644 (file)
@@ -47,6 +47,7 @@
 #define FCOE_KCQE_COMPLETION_STATUS_CTX_FREE_FAILURE   (0x4)
 #define FCOE_KCQE_COMPLETION_STATUS_NIC_ERROR                  (0x5)
 #define FCOE_KCQE_COMPLETION_STATUS_WRONG_HSI_VERSION   (0x6)
+#define FCOE_KCQE_COMPLETION_STATUS_PARITY_ERROR       (0x81)
 
 /* CQE type */
 #define FCOE_PENDING_CQE_TYPE                  0
index 57515f1f1690aa7ef6ccd71ba01d3125a7b2ebf0..495a841645f99dd2dc8c281219c96f162ac81ccb 100644 (file)
 #define ISCSI_KCQE_COMPLETION_STATUS_LOM_ISCSI_NOT_ENABLED              (0x51)
 
 #define ISCSI_KCQE_COMPLETION_STATUS_CID_BUSY                          (0x80)
+#define ISCSI_KCQE_COMPLETION_STATUS_PARITY_ERR                         (0x81)
 
 /* SQ/RQ/CQ DB structure sizes */
 #define ISCSI_SQ_DB_SIZE    (16)
index 82fa6ce481f0c131855667fbfda247d620cb2bfa..5e69f468535f243d200aaf1a6af31f4c8b9205cb 100644 (file)
@@ -132,7 +132,7 @@ static int mpt2sas_remove_dead_ioc_func(void *arg)
                pdev = ioc->pdev;
                if ((pdev == NULL))
                        return -1;
-               pci_remove_bus_device(pdev);
+               pci_stop_and_remove_bus_device(pdev);
                return 0;
 }
 
index 7732d69e49e012cca439ef21ea6b911cf2942c91..11de5f1be9819311254bae707cff67cb56c6cd99 100644 (file)
@@ -893,4 +893,5 @@ static void __devinit quirk_usb_early_handoff(struct pci_dev *pdev)
                quirk_usb_handoff_xhci(pdev);
        pci_disable_device(pdev);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, quirk_usb_early_handoff);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
+                       PCI_CLASS_SERIAL_USB, 8, quirk_usb_early_handoff);
index 895da1dc1550f6753cad5418953051c1704049c0..b7d782bab79731694adc8d55489d8e258aace167 100644 (file)
@@ -753,10 +753,6 @@ module loading or during the runtime by using the interface
 
 i.e. echo "value" > /sys/module/cifs/parameters/<param>
 
-1. echo_retries - The number of echo attempts before giving up and
-                 reconnecting to the server. The default is 5. The value 0
-                 means never reconnect.
-
-2. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default.
+1. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default.
                    [Y/y/1]. To disable use any of [N/n/0].
 
index 24b3dfc05282e2214df5f3eb014eb124651d6ffe..573b899b5a5dfc55a4f6ab0f0e6dc679beacd6a2 100644 (file)
@@ -171,8 +171,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
                        seq_printf(m, "TCP status: %d\n\tLocal Users To "
                                   "Server: %d SecMode: 0x%x Req On Wire: %d",
                                   server->tcpStatus, server->srv_count,
-                                  server->sec_mode,
-                                  atomic_read(&server->inFlight));
+                                  server->sec_mode, in_flight(server));
 
 #ifdef CONFIG_CIFS_STATS2
                        seq_printf(m, " In Send: %d In MaxReq Wait: %d",
index 418fc42fb8b284d3b43c9ccc1c6ac2046e944f28..eee522c56ef0aa5478e7e4e5919e19082f402485 100644 (file)
@@ -76,12 +76,7 @@ MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 "
 unsigned int cifs_max_pending = CIFS_MAX_REQ;
 module_param(cifs_max_pending, int, 0444);
 MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. "
-                                  "Default: 50 Range: 2 to 256");
-unsigned short echo_retries = 5;
-module_param(echo_retries, ushort, 0644);
-MODULE_PARM_DESC(echo_retries, "Number of echo attempts before giving up and "
-                              "reconnecting server. Default: 5. 0 means "
-                              "never reconnect.");
+                                  "Default: 32767 Range: 2 to 32767.");
 module_param(enable_oplocks, bool, 0644);
 MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks (bool). Default:"
                                 "y/Y/1");
@@ -1111,9 +1106,9 @@ init_cifs(void)
        if (cifs_max_pending < 2) {
                cifs_max_pending = 2;
                cFYI(1, "cifs_max_pending set to min of 2");
-       } else if (cifs_max_pending > 256) {
-               cifs_max_pending = 256;
-               cFYI(1, "cifs_max_pending set to max of 256");
+       } else if (cifs_max_pending > CIFS_MAX_REQ) {
+               cifs_max_pending = CIFS_MAX_REQ;
+               cFYI(1, "cifs_max_pending set to max of %u", CIFS_MAX_REQ);
        }
 
        rc = cifs_fscache_register();
@@ -1175,11 +1170,8 @@ static void __exit
 exit_cifs(void)
 {
        cFYI(DBG2, "exit_cifs");
-       cifs_proc_clean();
-       cifs_fscache_unregister();
-#ifdef CONFIG_CIFS_DFS_UPCALL
+       unregister_filesystem(&cifs_fs_type);
        cifs_dfs_release_automount_timer();
-#endif
 #ifdef CONFIG_CIFS_ACL
        cifs_destroy_idmaptrees();
        exit_cifs_idmap();
@@ -1187,10 +1179,11 @@ exit_cifs(void)
 #ifdef CONFIG_CIFS_UPCALL
        unregister_key_type(&cifs_spnego_key_type);
 #endif
-       unregister_filesystem(&cifs_fs_type);
-       cifs_destroy_inodecache();
-       cifs_destroy_mids();
        cifs_destroy_request_bufs();
+       cifs_destroy_mids();
+       cifs_destroy_inodecache();
+       cifs_fscache_unregister();
+       cifs_proc_clean();
 }
 
 MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");
index 76e7d8b6da171c6af1d8cb8466c9bfbc77d00dbf..339ebe3ebc0da2284c3d52d83eddce4b43a0855b 100644 (file)
 
 /*
  * MAX_REQ is the maximum number of requests that WE will send
- * on one socket concurrently. It also matches the most common
- * value of max multiplex returned by servers.  We may
- * eventually want to use the negotiated value (in case
- * future servers can handle more) when we are more confident that
- * we will not have problems oveloading the socket with pending
- * write data.
+ * on one socket concurrently.
  */
-#define CIFS_MAX_REQ 50
+#define CIFS_MAX_REQ 32767
 
 #define RFC1001_NAME_LEN 15
 #define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1)
@@ -255,7 +250,9 @@ struct TCP_Server_Info {
        bool noblocksnd;                /* use blocking sendmsg */
        bool noautotune;                /* do not autotune send buf sizes */
        bool tcp_nodelay;
-       atomic_t inFlight;  /* number of requests on the wire to server */
+       int credits;  /* send no more requests at once */
+       unsigned int in_flight;  /* number of requests on the wire to server */
+       spinlock_t req_lock;  /* protect the two values above */
        struct mutex srv_mutex;
        struct task_struct *tsk;
        char server_GUID[16];
@@ -263,6 +260,7 @@ struct TCP_Server_Info {
        bool session_estab; /* mark when very first sess is established */
        u16 dialect; /* dialect index that server chose */
        enum securityEnum secType;
+       bool oplocks:1; /* enable oplocks */
        unsigned int maxReq;    /* Clients should submit no more */
        /* than maxReq distinct unanswered SMBs to the server when using  */
        /* multiplexed reads or writes */
@@ -307,6 +305,36 @@ struct TCP_Server_Info {
 #endif
 };
 
+static inline unsigned int
+in_flight(struct TCP_Server_Info *server)
+{
+       unsigned int num;
+       spin_lock(&server->req_lock);
+       num = server->in_flight;
+       spin_unlock(&server->req_lock);
+       return num;
+}
+
+static inline int*
+get_credits_field(struct TCP_Server_Info *server)
+{
+       /*
+        * This will change to switch statement when we reserve slots for echos
+        * and oplock breaks.
+        */
+       return &server->credits;
+}
+
+static inline bool
+has_credits(struct TCP_Server_Info *server, int *credits)
+{
+       int num;
+       spin_lock(&server->req_lock);
+       num = *credits;
+       spin_unlock(&server->req_lock);
+       return num > 0;
+}
+
 /*
  * Macros to allow the TCP_Server_Info->net field and related code to drop out
  * when CONFIG_NET_NS isn't set.
@@ -1010,9 +1038,6 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
 GLOBAL_EXTERN unsigned int cifs_min_small;  /* min size of small buf pool */
 GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
 
-/* reconnect after this many failed echo attempts */
-GLOBAL_EXTERN unsigned short echo_retries;
-
 #ifdef CONFIG_CIFS_ACL
 GLOBAL_EXTERN struct rb_root uidtree;
 GLOBAL_EXTERN struct rb_root gidtree;
index 6f4e243e0f624759251472d611353c8a2e32241e..503e73d8bdb7beddab4e2f495a2467e7efd17de1 100644 (file)
@@ -88,6 +88,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
                        struct smb_hdr *in_buf ,
                        struct smb_hdr *out_buf,
                        int *bytes_returned);
+extern void cifs_add_credits(struct TCP_Server_Info *server,
+                            const unsigned int add);
+extern void cifs_set_credits(struct TCP_Server_Info *server, const int val);
 extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length);
 extern bool is_valid_oplock_break(struct smb_hdr *smb,
                                  struct TCP_Server_Info *);
@@ -168,7 +171,13 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data,
                                            const char *devname);
 extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
 extern void cifs_umount(struct cifs_sb_info *);
+
+#if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL)
 extern void cifs_dfs_release_automount_timer(void);
+#else /* ! IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) */
+#define cifs_dfs_release_automount_timer()     do { } while (0)
+#endif /* ! IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) */
+
 void cifs_proc_init(void);
 void cifs_proc_clean(void);
 
index 8b7794c315919c5328a5bffbcb2aa5a9d7ad5b30..70aac35c398f2ed4ddf6bd296e8223e68569d12d 100644 (file)
@@ -458,7 +458,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses)
                        goto neg_err_exit;
                }
                server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode);
-               server->maxReq = le16_to_cpu(rsp->MaxMpxCount);
+               server->maxReq = min_t(unsigned int,
+                                      le16_to_cpu(rsp->MaxMpxCount),
+                                      cifs_max_pending);
+               cifs_set_credits(server, server->maxReq);
                server->maxBuf = le16_to_cpu(rsp->MaxBufSize);
                server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs);
                /* even though we do not use raw we might as well set this
@@ -564,7 +567,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses)
 
        /* one byte, so no need to convert this or EncryptionKeyLen from
           little endian */
-       server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount);
+       server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount),
+                              cifs_max_pending);
+       cifs_set_credits(server, server->maxReq);
        /* probably no need to store and check maxvcs */
        server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize);
        server->max_rw = le32_to_cpu(pSMBr->MaxRawSize);
@@ -716,8 +721,7 @@ cifs_echo_callback(struct mid_q_entry *mid)
        struct TCP_Server_Info *server = mid->callback_data;
 
        DeleteMidQEntry(mid);
-       atomic_dec(&server->inFlight);
-       wake_up(&server->request_q);
+       cifs_add_credits(server, 1);
 }
 
 int
@@ -1669,8 +1673,7 @@ cifs_readv_callback(struct mid_q_entry *mid)
 
        queue_work(system_nrt_wq, &rdata->work);
        DeleteMidQEntry(mid);
-       atomic_dec(&server->inFlight);
-       wake_up(&server->request_q);
+       cifs_add_credits(server, 1);
 }
 
 /* cifs_async_readv - send an async write, and set up mid to handle result */
@@ -2110,8 +2113,7 @@ cifs_writev_callback(struct mid_q_entry *mid)
 
        queue_work(system_nrt_wq, &wdata->work);
        DeleteMidQEntry(mid);
-       atomic_dec(&tcon->ses->server->inFlight);
-       wake_up(&tcon->ses->server->request_q);
+       cifs_add_credits(tcon->ses->server, 1);
 }
 
 /* cifs_async_writev - send an async write, and set up mid to handle result */
index 602f77c304c90fc402c9ec221eff2b78be818081..5560e1d5e54b5f7fe7f2f7d6bf9fe0a2d2a8852f 100644 (file)
@@ -373,12 +373,22 @@ allocate_buffers(struct TCP_Server_Info *server)
 static bool
 server_unresponsive(struct TCP_Server_Info *server)
 {
-       if (echo_retries > 0 && server->tcpStatus == CifsGood &&
-           time_after(jiffies, server->lstrp +
-                               (echo_retries * SMB_ECHO_INTERVAL))) {
+       /*
+        * We need to wait 2 echo intervals to make sure we handle such
+        * situations right:
+        * 1s  client sends a normal SMB request
+        * 2s  client gets a response
+        * 30s echo workqueue job pops, and decides we got a response recently
+        *     and don't need to send another
+        * ...
+        * 65s kernel_recvmsg times out, and we see that we haven't gotten
+        *     a response in >60s.
+        */
+       if (server->tcpStatus == CifsGood &&
+           time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
                cERROR(1, "Server %s has not responded in %d seconds. "
                          "Reconnecting...", server->hostname,
-                         (echo_retries * SMB_ECHO_INTERVAL / HZ));
+                         (2 * SMB_ECHO_INTERVAL) / HZ);
                cifs_reconnect(server);
                wake_up(&server->response_q);
                return true;
@@ -642,19 +652,11 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
        spin_unlock(&GlobalMid_Lock);
        wake_up_all(&server->response_q);
 
-       /*
-        * Check if we have blocked requests that need to free. Note that
-        * cifs_max_pending is normally 50, but can be set at module install
-        * time to as little as two.
-        */
-       spin_lock(&GlobalMid_Lock);
-       if (atomic_read(&server->inFlight) >= cifs_max_pending)
-               atomic_set(&server->inFlight, cifs_max_pending - 1);
-       /*
-        * We do not want to set the max_pending too low or we could end up
-        * with the counter going negative.
-        */
-       spin_unlock(&GlobalMid_Lock);
+       /* check if we have blocked requests that need to free */
+       spin_lock(&server->req_lock);
+       if (server->credits <= 0)
+               server->credits = 1;
+       spin_unlock(&server->req_lock);
        /*
         * Although there should not be any requests blocked on this queue it
         * can not hurt to be paranoid and try to wake up requests that may
@@ -1909,7 +1911,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
        tcp_ses->noblocksnd = volume_info->noblocksnd;
        tcp_ses->noautotune = volume_info->noautotune;
        tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
-       atomic_set(&tcp_ses->inFlight, 0);
+       tcp_ses->in_flight = 0;
+       tcp_ses->credits = 1;
        init_waitqueue_head(&tcp_ses->response_q);
        init_waitqueue_head(&tcp_ses->request_q);
        INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
@@ -3371,7 +3374,7 @@ cifs_ra_pages(struct cifs_sb_info *cifs_sb)
 int
 cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
 {
-       int rc = 0;
+       int rc;
        int xid;
        struct cifs_ses *pSesInfo;
        struct cifs_tcon *tcon;
@@ -3398,6 +3401,7 @@ try_mount_again:
                FreeXid(xid);
        }
 #endif
+       rc = 0;
        tcon = NULL;
        pSesInfo = NULL;
        srvTcp = NULL;
@@ -3759,9 +3763,11 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses)
        if (server->maxBuf != 0)
                return 0;
 
+       cifs_set_credits(server, 1);
        rc = CIFSSMBNegotiate(xid, ses);
        if (rc == -EAGAIN) {
                /* retry only once on 1st time connection */
+               cifs_set_credits(server, 1);
                rc = CIFSSMBNegotiate(xid, ses);
                if (rc == -EAGAIN)
                        rc = -EHOSTDOWN;
index bc7e24420ac0bee0ac2cab03e96224d1edf4adc7..d172c8ed901786f9e72ee40a50cb6bc22a3be571 100644 (file)
@@ -171,7 +171,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
        }
        tcon = tlink_tcon(tlink);
 
-       if (enable_oplocks)
+       if (tcon->ses->server->oplocks)
                oplock = REQ_OPLOCK;
 
        if (nd)
@@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 {
        int xid;
        int rc = 0; /* to get around spurious gcc warning, set to zero here */
-       __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0;
+       __u32 oplock;
        __u16 fileHandle = 0;
        bool posix_open = false;
        struct cifs_sb_info *cifs_sb;
@@ -518,6 +518,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
        }
        pTcon = tlink_tcon(tlink);
 
+       oplock = pTcon->ses->server->oplocks ? REQ_OPLOCK : 0;
+
        /*
         * Don't allow the separator character in a path component.
         * The VFS will not allow "/", but "\" is allowed by posix.
index 5e64748a29173d75ad976c0375f7152d5ae0171d..159fcc56dc2d4c4f7ae4ac80e110cc59140f6001 100644 (file)
@@ -380,7 +380,7 @@ int cifs_open(struct inode *inode, struct file *file)
        cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
                 inode, file->f_flags, full_path);
 
-       if (enable_oplocks)
+       if (tcon->ses->server->oplocks)
                oplock = REQ_OPLOCK;
        else
                oplock = 0;
@@ -505,7 +505,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
        cFYI(1, "inode = 0x%p file flags 0x%x for %s",
                 inode, pCifsFile->f_flags, full_path);
 
-       if (enable_oplocks)
+       if (tcon->ses->server->oplocks)
                oplock = REQ_OPLOCK;
        else
                oplock = 0;
@@ -960,9 +960,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
        INIT_LIST_HEAD(&locks_to_send);
 
        /*
-        * Allocating count locks is enough because no locks can be added to
-        * the list while we are holding cinode->lock_mutex that protects
-        * locking operations of this inode.
+        * Allocating count locks is enough because no FL_POSIX locks can be
+        * added to the list while we are holding cinode->lock_mutex that
+        * protects locking operations of this inode.
         */
        for (; i < count; i++) {
                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
@@ -973,18 +973,20 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
                list_add_tail(&lck->llist, &locks_to_send);
        }
 
-       i = 0;
        el = locks_to_send.next;
        lock_flocks();
        cifs_for_each_lock(cfile->dentry->d_inode, before) {
+               flock = *before;
+               if ((flock->fl_flags & FL_POSIX) == 0)
+                       continue;
                if (el == &locks_to_send) {
-                       /* something is really wrong */
+                       /*
+                        * The list ended. We don't have enough allocated
+                        * structures - something is really wrong.
+                        */
                        cERROR(1, "Can't push all brlocks!");
                        break;
                }
-               flock = *before;
-               if ((flock->fl_flags & FL_POSIX) == 0)
-                       continue;
                length = 1 + flock->fl_end - flock->fl_start;
                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
                        type = CIFS_RDLCK;
@@ -996,7 +998,6 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
                lck->length = length;
                lck->type = type;
                lck->offset = flock->fl_start;
-               i++;
                el = el->next;
        }
        unlock_flocks();
index 703ef5c6fdb1e5716f63fe2d01cddce92c15f7af..c273c12de98eb129f1bcb8f149dbfecb83032597 100644 (file)
@@ -690,3 +690,22 @@ backup_cred(struct cifs_sb_info *cifs_sb)
 
        return false;
 }
+
+void
+cifs_add_credits(struct TCP_Server_Info *server, const unsigned int add)
+{
+       spin_lock(&server->req_lock);
+       server->credits += add;
+       server->in_flight--;
+       spin_unlock(&server->req_lock);
+       wake_up(&server->request_q);
+}
+
+void
+cifs_set_credits(struct TCP_Server_Info *server, const int val)
+{
+       spin_lock(&server->req_lock);
+       server->credits = val;
+       server->oplocks = val > 1 ? enable_oplocks : false;
+       spin_unlock(&server->req_lock);
+}
index 0cc9584f5889f051208a00bb346869d73f6ed1ba..310918b6fcb46ff8f252d2557dabf30e7ea0181b 100644 (file)
@@ -254,44 +254,60 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer,
        return smb_sendv(server, &iov, 1);
 }
 
-static int wait_for_free_request(struct TCP_Server_Info *server,
-                                const int long_op)
+static int
+wait_for_free_credits(struct TCP_Server_Info *server, const int optype,
+                     int *credits)
 {
-       if (long_op == CIFS_ASYNC_OP) {
+       int rc;
+
+       spin_lock(&server->req_lock);
+       if (optype == CIFS_ASYNC_OP) {
                /* oplock breaks must not be held up */
-               atomic_inc(&server->inFlight);
+               server->in_flight++;
+               *credits -= 1;
+               spin_unlock(&server->req_lock);
                return 0;
        }
 
-       spin_lock(&GlobalMid_Lock);
        while (1) {
-               if (atomic_read(&server->inFlight) >= cifs_max_pending) {
-                       spin_unlock(&GlobalMid_Lock);
+               if (*credits <= 0) {
+                       spin_unlock(&server->req_lock);
                        cifs_num_waiters_inc(server);
-                       wait_event(server->request_q,
-                                  atomic_read(&server->inFlight)
-                                    < cifs_max_pending);
+                       rc = wait_event_killable(server->request_q,
+                                                has_credits(server, credits));
                        cifs_num_waiters_dec(server);
-                       spin_lock(&GlobalMid_Lock);
+                       if (rc)
+                               return rc;
+                       spin_lock(&server->req_lock);
                } else {
                        if (server->tcpStatus == CifsExiting) {
-                               spin_unlock(&GlobalMid_Lock);
+                               spin_unlock(&server->req_lock);
                                return -ENOENT;
                        }
 
-                       /* can not count locking commands against total
-                          as they are allowed to block on server */
+                       /*
+                        * Can not count locking commands against total
+                        * as they are allowed to block on server.
+                        */
 
                        /* update # of requests on the wire to server */
-                       if (long_op != CIFS_BLOCKING_OP)
-                               atomic_inc(&server->inFlight);
-                       spin_unlock(&GlobalMid_Lock);
+                       if (optype != CIFS_BLOCKING_OP) {
+                               *credits -= 1;
+                               server->in_flight++;
+                       }
+                       spin_unlock(&server->req_lock);
                        break;
                }
        }
        return 0;
 }
 
+static int
+wait_for_free_request(struct TCP_Server_Info *server, const int optype)
+{
+       return wait_for_free_credits(server, optype, get_credits_field(server));
+}
+
 static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
                        struct mid_q_entry **ppmidQ)
 {
@@ -359,7 +375,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
        mid = AllocMidQEntry(hdr, server);
        if (mid == NULL) {
                mutex_unlock(&server->srv_mutex);
-               atomic_dec(&server->inFlight);
+               cifs_add_credits(server, 1);
                wake_up(&server->request_q);
                return -ENOMEM;
        }
@@ -392,7 +408,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
        return rc;
 out_err:
        delete_mid(mid);
-       atomic_dec(&server->inFlight);
+       cifs_add_credits(server, 1);
        wake_up(&server->request_q);
        return rc;
 }
@@ -564,8 +580,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
                mutex_unlock(&ses->server->srv_mutex);
                cifs_small_buf_release(in_buf);
                /* Update # of requests on wire to server */
-               atomic_dec(&ses->server->inFlight);
-               wake_up(&ses->server->request_q);
+               cifs_add_credits(ses->server, 1);
                return rc;
        }
        rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number);
@@ -601,8 +616,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
                        midQ->callback = DeleteMidQEntry;
                        spin_unlock(&GlobalMid_Lock);
                        cifs_small_buf_release(in_buf);
-                       atomic_dec(&ses->server->inFlight);
-                       wake_up(&ses->server->request_q);
+                       cifs_add_credits(ses->server, 1);
                        return rc;
                }
                spin_unlock(&GlobalMid_Lock);
@@ -612,8 +626,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
 
        rc = cifs_sync_mid_result(midQ, ses->server);
        if (rc != 0) {
-               atomic_dec(&ses->server->inFlight);
-               wake_up(&ses->server->request_q);
+               cifs_add_credits(ses->server, 1);
                return rc;
        }
 
@@ -637,8 +650,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
                midQ->resp_buf = NULL;
 out:
        delete_mid(midQ);
-       atomic_dec(&ses->server->inFlight);
-       wake_up(&ses->server->request_q);
+       cifs_add_credits(ses->server, 1);
 
        return rc;
 }
@@ -688,8 +700,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
        if (rc) {
                mutex_unlock(&ses->server->srv_mutex);
                /* Update # of requests on wire to server */
-               atomic_dec(&ses->server->inFlight);
-               wake_up(&ses->server->request_q);
+               cifs_add_credits(ses->server, 1);
                return rc;
        }
 
@@ -721,8 +732,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
                        /* no longer considered to be "in-flight" */
                        midQ->callback = DeleteMidQEntry;
                        spin_unlock(&GlobalMid_Lock);
-                       atomic_dec(&ses->server->inFlight);
-                       wake_up(&ses->server->request_q);
+                       cifs_add_credits(ses->server, 1);
                        return rc;
                }
                spin_unlock(&GlobalMid_Lock);
@@ -730,8 +740,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
 
        rc = cifs_sync_mid_result(midQ, ses->server);
        if (rc != 0) {
-               atomic_dec(&ses->server->inFlight);
-               wake_up(&ses->server->request_q);
+               cifs_add_credits(ses->server, 1);
                return rc;
        }
 
@@ -747,8 +756,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
        rc = cifs_check_receive(midQ, ses->server, 0);
 out:
        delete_mid(midQ);
-       atomic_dec(&ses->server->inFlight);
-       wake_up(&ses->server->request_q);
+       cifs_add_credits(ses->server, 1);
 
        return rc;
 }
index f848b52c67b19e565567168a2bd5810fcc5f0a6c..3ddcbb1c0a432728f626986b1d057d11aafdce84 100644 (file)
@@ -598,7 +598,7 @@ static struct rpc_procinfo  nlm4_procedures[] = {
        PROC(GRANTED_RES,       res,            norep),
 };
 
-struct rpc_version     nlm_version4 = {
+const struct rpc_version nlm_version4 = {
        .number         = 4,
        .nrprocs        = ARRAY_SIZE(nlm4_procedures),
        .procs          = nlm4_procedures,
index 8d4ea8351e3d4e093263104764d0f959aa35c05b..ba1dc2eebd1ef8413d0593abfde9e14229169ab3 100644 (file)
@@ -62,7 +62,8 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
 
        host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
                                   nlm_init->protocol, nlm_version,
-                                  nlm_init->hostname, nlm_init->noresvport);
+                                  nlm_init->hostname, nlm_init->noresvport,
+                                  nlm_init->net);
        if (host == NULL) {
                lockd_down();
                return ERR_PTR(-ENOLCK);
index 180ac34feb9a8630e3bbeff633b06d858a1420f8..3d35e3e80c1ccfac1367647b6ac417ba3f5bd1b2 100644 (file)
@@ -596,19 +596,19 @@ static struct rpc_procinfo        nlm_procedures[] = {
        PROC(GRANTED_RES,       res,            norep),
 };
 
-static struct rpc_version      nlm_version1 = {
+static const struct rpc_version        nlm_version1 = {
                .number         = 1,
                .nrprocs        = ARRAY_SIZE(nlm_procedures),
                .procs          = nlm_procedures,
 };
 
-static struct rpc_version      nlm_version3 = {
+static const struct rpc_version        nlm_version3 = {
                .number         = 3,
                .nrprocs        = ARRAY_SIZE(nlm_procedures),
                .procs          = nlm_procedures,
 };
 
-static struct rpc_version      *nlm_versions[] = {
+static const struct rpc_version        *nlm_versions[] = {
        [1] = &nlm_version1,
        [3] = &nlm_version3,
 #ifdef CONFIG_LOCKD_V4
@@ -618,7 +618,7 @@ static struct rpc_version   *nlm_versions[] = {
 
 static struct rpc_stat         nlm_rpc_stats;
 
-struct rpc_program             nlm_program = {
+const struct rpc_program       nlm_program = {
                .name           = "lockd",
                .number         = NLM_PROGRAM,
                .nrvers         = ARRAY_SIZE(nlm_versions),
index 6f29836ec0cbd81913cfda0f41852e3a4aa3fb4b..eb75ca7c2d6edd4782ad9f025b6115c78b3c9307 100644 (file)
@@ -17,6 +17,8 @@
 #include <linux/lockd/lockd.h>
 #include <linux/mutex.h>
 
+#include <linux/sunrpc/svc_xprt.h>
+
 #include <net/ipv6.h>
 
 #define NLMDBG_FACILITY                NLMDBG_HOSTCACHE
@@ -54,6 +56,7 @@ struct nlm_lookup_host_info {
        const char              *hostname;      /* remote's hostname */
        const size_t            hostname_len;   /* it's length */
        const int               noresvport;     /* use non-priv port */
+       struct net              *net;           /* network namespace to bind */
 };
 
 /*
@@ -155,6 +158,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
        INIT_LIST_HEAD(&host->h_reclaim);
        host->h_nsmhandle  = nsm;
        host->h_addrbuf    = nsm->sm_addrbuf;
+       host->net          = ni->net;
 
 out:
        return host;
@@ -206,7 +210,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
                                     const unsigned short protocol,
                                     const u32 version,
                                     const char *hostname,
-                                    int noresvport)
+                                    int noresvport,
+                                    struct net *net)
 {
        struct nlm_lookup_host_info ni = {
                .server         = 0,
@@ -217,6 +222,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
                .hostname       = hostname,
                .hostname_len   = strlen(hostname),
                .noresvport     = noresvport,
+               .net            = net,
        };
        struct hlist_head *chain;
        struct hlist_node *pos;
@@ -231,6 +237,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 
        chain = &nlm_client_hosts[nlm_hash_address(sap)];
        hlist_for_each_entry(host, pos, chain, h_hash) {
+               if (host->net != net)
+                       continue;
                if (!rpc_cmp_addr(nlm_addr(host), sap))
                        continue;
 
@@ -318,6 +326,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
        struct nsm_handle *nsm = NULL;
        struct sockaddr *src_sap = svc_daddr(rqstp);
        size_t src_len = rqstp->rq_daddrlen;
+       struct net *net = rqstp->rq_xprt->xpt_net;
        struct nlm_lookup_host_info ni = {
                .server         = 1,
                .sap            = svc_addr(rqstp),
@@ -326,6 +335,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
                .version        = rqstp->rq_vers,
                .hostname       = hostname,
                .hostname_len   = hostname_len,
+               .net            = net,
        };
 
        dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__,
@@ -339,6 +349,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 
        chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
        hlist_for_each_entry(host, pos, chain, h_hash) {
+               if (host->net != net)
+                       continue;
                if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
                        continue;
 
@@ -431,7 +443,7 @@ nlm_bind_host(struct nlm_host *host)
                        .to_retries     = 5U,
                };
                struct rpc_create_args args = {
-                       .net            = &init_net,
+                       .net            = host->net,
                        .protocol       = host->h_proto,
                        .address        = nlm_addr(host),
                        .addrsize       = host->h_addrlen,
@@ -553,12 +565,8 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
        nsm_release(nsm);
 }
 
-/*
- * Shut down the hosts module.
- * Note that this routine is called only at server shutdown time.
- */
 void
-nlm_shutdown_hosts(void)
+nlm_shutdown_hosts_net(struct net *net)
 {
        struct hlist_head *chain;
        struct hlist_node *pos;
@@ -570,6 +578,8 @@ nlm_shutdown_hosts(void)
        /* First, make all hosts eligible for gc */
        dprintk("lockd: nuking all hosts...\n");
        for_each_host(host, pos, chain, nlm_server_hosts) {
+               if (net && host->net != net)
+                       continue;
                host->h_expires = jiffies - 1;
                if (host->h_rpcclnt) {
                        rpc_shutdown_client(host->h_rpcclnt);
@@ -580,15 +590,29 @@ nlm_shutdown_hosts(void)
        /* Then, perform a garbage collection pass */
        nlm_gc_hosts();
        mutex_unlock(&nlm_host_mutex);
+}
+
+/*
+ * Shut down the hosts module.
+ * Note that this routine is called only at server shutdown time.
+ */
+void
+nlm_shutdown_hosts(void)
+{
+       struct hlist_head *chain;
+       struct hlist_node *pos;
+       struct nlm_host *host;
+
+       nlm_shutdown_hosts_net(NULL);
 
        /* complain if any hosts are left */
        if (nrhosts != 0) {
                printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
                dprintk("lockd: %lu hosts left:\n", nrhosts);
                for_each_host(host, pos, chain, nlm_server_hosts) {
-                       dprintk("       %s (cnt %d use %d exp %ld)\n",
+                       dprintk("       %s (cnt %d use %d exp %ld net %p)\n",
                                host->h_name, atomic_read(&host->h_count),
-                               host->h_inuse, host->h_expires);
+                               host->h_inuse, host->h_expires, host->net);
                }
        }
 }
index 65ba36b80a9e1a482f838d0846a07d0ec800b015..7ef14b3c5bee9460609d863a973f0dfad93f163c 100644 (file)
@@ -47,7 +47,7 @@ struct nsm_res {
        u32                     state;
 };
 
-static struct rpc_program      nsm_program;
+static const struct rpc_program        nsm_program;
 static                         LIST_HEAD(nsm_handles);
 static                         DEFINE_SPINLOCK(nsm_lock);
 
@@ -62,14 +62,14 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
        return (struct sockaddr *)&nsm->sm_addr;
 }
 
-static struct rpc_clnt *nsm_create(void)
+static struct rpc_clnt *nsm_create(struct net *net)
 {
        struct sockaddr_in sin = {
                .sin_family             = AF_INET,
                .sin_addr.s_addr        = htonl(INADDR_LOOPBACK),
        };
        struct rpc_create_args args = {
-               .net                    = &init_net,
+               .net                    = net,
                .protocol               = XPRT_TRANSPORT_UDP,
                .address                = (struct sockaddr *)&sin,
                .addrsize               = sizeof(sin),
@@ -83,7 +83,8 @@ static struct rpc_clnt *nsm_create(void)
        return rpc_create(&args);
 }
 
-static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
+static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
+                        struct net *net)
 {
        struct rpc_clnt *clnt;
        int             status;
@@ -99,7 +100,7 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
                .rpc_resp       = res,
        };
 
-       clnt = nsm_create();
+       clnt = nsm_create(net);
        if (IS_ERR(clnt)) {
                status = PTR_ERR(clnt);
                dprintk("lockd: failed to create NSM upcall transport, "
@@ -149,7 +150,7 @@ int nsm_monitor(const struct nlm_host *host)
         */
        nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
 
-       status = nsm_mon_unmon(nsm, NSMPROC_MON, &res);
+       status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host->net);
        if (unlikely(res.status != 0))
                status = -EIO;
        if (unlikely(status < 0)) {
@@ -183,7 +184,7 @@ void nsm_unmonitor(const struct nlm_host *host)
         && nsm->sm_monitored && !nsm->sm_sticky) {
                dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
 
-               status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res);
+               status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host->net);
                if (res.status != 0)
                        status = -EIO;
                if (status < 0)
@@ -534,19 +535,19 @@ static struct rpc_procinfo        nsm_procedures[] = {
        },
 };
 
-static struct rpc_version      nsm_version1 = {
+static const struct rpc_version nsm_version1 = {
                .number         = 1,
                .nrprocs        = ARRAY_SIZE(nsm_procedures),
                .procs          = nsm_procedures
 };
 
-static struct rpc_version *    nsm_version[] = {
+static const struct rpc_version *nsm_version[] = {
        [1] = &nsm_version1,
 };
 
 static struct rpc_stat         nsm_stats;
 
-static struct rpc_program      nsm_program = {
+static const struct rpc_program nsm_program = {
                .name           = "statd",
                .number         = NSM_PROGRAM,
                .nrvers         = ARRAY_SIZE(nsm_version),
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
new file mode 100644 (file)
index 0000000..ce227e0
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef __LOCKD_NETNS_H__
+#define __LOCKD_NETNS_H__
+
+#include <net/netns/generic.h>
+
+struct lockd_net {
+       unsigned int nlmsvc_users;
+};
+
+extern int lockd_net_id;
+
+#endif
index c061b9aa7ddb165c4b5241e73d9c8bb943f2f678..2774e1013b34467acc3c1c6bc55f47fcac8d3ca7 100644 (file)
@@ -35,6 +35,8 @@
 #include <linux/lockd/lockd.h>
 #include <linux/nfs.h>
 
+#include "netns.h"
+
 #define NLMDBG_FACILITY                NLMDBG_SVC
 #define LOCKD_BUFSIZE          (1024 + NLMSVC_XDRSIZE)
 #define ALLOWED_SIGS           (sigmask(SIGKILL))
@@ -50,6 +52,8 @@ static struct task_struct     *nlmsvc_task;
 static struct svc_rqst         *nlmsvc_rqst;
 unsigned long                  nlmsvc_timeout;
 
+int lockd_net_id;
+
 /*
  * These can be set at insmod time (useful for NFS as root filesystem),
  * and also changed through the sysctl interface.  -- Jamie Lokier, Aug 2003
@@ -189,27 +193,29 @@ lockd(void *vrqstp)
 }
 
 static int create_lockd_listener(struct svc_serv *serv, const char *name,
-                                const int family, const unsigned short port)
+                                struct net *net, const int family,
+                                const unsigned short port)
 {
        struct svc_xprt *xprt;
 
-       xprt = svc_find_xprt(serv, name, family, 0);
+       xprt = svc_find_xprt(serv, name, net, family, 0);
        if (xprt == NULL)
-               return svc_create_xprt(serv, name, &init_net, family, port,
+               return svc_create_xprt(serv, name, net, family, port,
                                                SVC_SOCK_DEFAULTS);
        svc_xprt_put(xprt);
        return 0;
 }
 
-static int create_lockd_family(struct svc_serv *serv, const int family)
+static int create_lockd_family(struct svc_serv *serv, struct net *net,
+                              const int family)
 {
        int err;
 
-       err = create_lockd_listener(serv, "udp", family, nlm_udpport);
+       err = create_lockd_listener(serv, "udp", net, family, nlm_udpport);
        if (err < 0)
                return err;
 
-       return create_lockd_listener(serv, "tcp", family, nlm_tcpport);
+       return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport);
 }
 
 /*
@@ -222,16 +228,16 @@ static int create_lockd_family(struct svc_serv *serv, const int family)
  * Returns zero if all listeners are available; otherwise a
  * negative errno value is returned.
  */
-static int make_socks(struct svc_serv *serv)
+static int make_socks(struct svc_serv *serv, struct net *net)
 {
        static int warned;
        int err;
 
-       err = create_lockd_family(serv, PF_INET);
+       err = create_lockd_family(serv, net, PF_INET);
        if (err < 0)
                goto out_err;
 
-       err = create_lockd_family(serv, PF_INET6);
+       err = create_lockd_family(serv, net, PF_INET6);
        if (err < 0 && err != -EAFNOSUPPORT)
                goto out_err;
 
@@ -245,6 +251,47 @@ out_err:
        return err;
 }
 
+static int lockd_up_net(struct net *net)
+{
+       struct lockd_net *ln = net_generic(net, lockd_net_id);
+       struct svc_serv *serv = nlmsvc_rqst->rq_server;
+       int error;
+
+       if (ln->nlmsvc_users)
+               return 0;
+
+       error = svc_rpcb_setup(serv, net);
+       if (error)
+               goto err_rpcb;
+
+       error = make_socks(serv, net);
+       if (error < 0)
+               goto err_socks;
+       return 0;
+
+err_socks:
+       svc_rpcb_cleanup(serv, net);
+err_rpcb:
+       return error;
+}
+
+static void lockd_down_net(struct net *net)
+{
+       struct lockd_net *ln = net_generic(net, lockd_net_id);
+       struct svc_serv *serv = nlmsvc_rqst->rq_server;
+
+       if (ln->nlmsvc_users) {
+               if (--ln->nlmsvc_users == 0) {
+                       nlm_shutdown_hosts_net(net);
+                       svc_shutdown_net(serv, net);
+               }
+       } else {
+               printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n",
+                               nlmsvc_task, net);
+               BUG();
+       }
+}
+
 /*
  * Bring up the lockd process if it's not already up.
  */
@@ -252,13 +299,16 @@ int lockd_up(void)
 {
        struct svc_serv *serv;
        int             error = 0;
+       struct net *net = current->nsproxy->net_ns;
 
        mutex_lock(&nlmsvc_mutex);
        /*
         * Check whether we're already up and running.
         */
-       if (nlmsvc_rqst)
+       if (nlmsvc_rqst) {
+               error = lockd_up_net(net);
                goto out;
+       }
 
        /*
         * Sanity check: if there's no pid,
@@ -275,7 +325,7 @@ int lockd_up(void)
                goto out;
        }
 
-       error = make_socks(serv);
+       error = make_socks(serv, net);
        if (error < 0)
                goto destroy_and_out;
 
@@ -313,8 +363,12 @@ int lockd_up(void)
 destroy_and_out:
        svc_destroy(serv);
 out:
-       if (!error)
+       if (!error) {
+               struct lockd_net *ln = net_generic(net, lockd_net_id);
+
+               ln->nlmsvc_users++;
                nlmsvc_users++;
+       }
        mutex_unlock(&nlmsvc_mutex);
        return error;
 }
@@ -328,8 +382,10 @@ lockd_down(void)
 {
        mutex_lock(&nlmsvc_mutex);
        if (nlmsvc_users) {
-               if (--nlmsvc_users)
+               if (--nlmsvc_users) {
+                       lockd_down_net(current->nsproxy->net_ns);
                        goto out;
+               }
        } else {
                printk(KERN_ERR "lockd_down: no users! task=%p\n",
                        nlmsvc_task);
@@ -497,24 +553,55 @@ module_param_call(nlm_tcpport, param_set_port, param_get_int,
 module_param(nsm_use_hostnames, bool, 0644);
 module_param(nlm_max_connections, uint, 0644);
 
+static int lockd_init_net(struct net *net)
+{
+       return 0;
+}
+
+static void lockd_exit_net(struct net *net)
+{
+}
+
+static struct pernet_operations lockd_net_ops = {
+       .init = lockd_init_net,
+       .exit = lockd_exit_net,
+       .id = &lockd_net_id,
+       .size = sizeof(struct lockd_net),
+};
+
+
 /*
  * Initialising and terminating the module.
  */
 
 static int __init init_nlm(void)
 {
+       int err;
+
 #ifdef CONFIG_SYSCTL
+       err = -ENOMEM;
        nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root);
-       return nlm_sysctl_table ? 0 : -ENOMEM;
-#else
+       if (nlm_sysctl_table == NULL)
+               goto err_sysctl;
+#endif
+       err = register_pernet_subsys(&lockd_net_ops);
+       if (err)
+               goto err_pernet;
        return 0;
+
+err_pernet:
+#ifdef CONFIG_SYSCTL
+       unregister_sysctl_table(nlm_sysctl_table);
 #endif
+err_sysctl:
+       return err;
 }
 
 static void __exit exit_nlm(void)
 {
        /* FIXME: delete all NLM clients */
        nlm_shutdown_hosts();
+       unregister_pernet_subsys(&lockd_net_ops);
 #ifdef CONFIG_SYSCTL
        unregister_sysctl_table(nlm_sysctl_table);
 #endif
index f0179c3745d27936f850c46d25ee7f6418f13ea7..e46353f41a4202ec2138998ab5449b833dc139db 100644 (file)
@@ -46,7 +46,6 @@ static void   nlmsvc_remove_block(struct nlm_block *block);
 static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
 static void nlmsvc_freegrantargs(struct nlm_rqst *call);
 static const struct rpc_call_ops nlmsvc_grant_ops;
-static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie);
 
 /*
  * The list of blocked locks to retry
@@ -54,6 +53,35 @@ static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie);
 static LIST_HEAD(nlm_blocked);
 static DEFINE_SPINLOCK(nlm_blocked_lock);
 
+#ifdef LOCKD_DEBUG
+static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
+{
+       /*
+        * We can get away with a static buffer because we're only
+        * called with BKL held.
+        */
+       static char buf[2*NLM_MAXCOOKIELEN+1];
+       unsigned int i, len = sizeof(buf);
+       char *p = buf;
+
+       len--;  /* allow for trailing \0 */
+       if (len < 3)
+               return "???";
+       for (i = 0 ; i < cookie->len ; i++) {
+               if (len < 2) {
+                       strcpy(p-3, "...");
+                       break;
+               }
+               sprintf(p, "%02x", cookie->data[i]);
+               p += 2;
+               len -= 2;
+       }
+       *p = '\0';
+
+       return buf;
+}
+#endif
+
 /*
  * Insert a blocked lock into the global list
  */
@@ -935,32 +963,3 @@ nlmsvc_retry_blocked(void)
 
        return timeout;
 }
-
-#ifdef RPC_DEBUG
-static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
-{
-       /*
-        * We can get away with a static buffer because we're only
-        * called with BKL held.
-        */
-       static char buf[2*NLM_MAXCOOKIELEN+1];
-       unsigned int i, len = sizeof(buf);
-       char *p = buf;
-
-       len--;  /* allow for trailing \0 */
-       if (len < 3)
-               return "???";
-       for (i = 0 ; i < cookie->len ; i++) {
-               if (len < 2) {
-                       strcpy(p-3, "...");
-                       break;
-               }
-               sprintf(p, "%02x", cookie->data[i]);
-               p += 2;
-               len -= 2;
-       }
-       *p = '\0';
-
-       return buf;
-}
-#endif
index dbcd82126aed309026dc55f33754f8209ceaaeb5..2a0e6c599147aac9e66a5969c9c00593aa0dd380 100644 (file)
@@ -64,6 +64,7 @@ config NFS_V4
        bool "NFS client support for NFS version 4"
        depends on NFS_FS
        select SUNRPC_GSS
+       select KEYS
        help
          This option enables support for version 4 of the NFS protocol
          (RFC 3530) in the kernel's NFS client.
@@ -98,6 +99,18 @@ config PNFS_OBJLAYOUT
        depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
        default m
 
+config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
+       string "NFSv4.1 Implementation ID Domain"
+       depends on NFS_V4_1
+       default "kernel.org"
+       help
+         This option defines the domain portion of the implementation ID that
+         may be sent in the NFS exchange_id operation.  The value must be in
+         the format of a DNS domain name and should be set to the DNS domain
+         name of the distribution.
+         If the NFS client is unchanged from the upstream kernel, this
+         option should be set to the default "kernel.org".
+
 config ROOT_NFS
        bool "Root file system on NFS"
        depends on NFS_FS=y && IP_PNP
@@ -130,16 +143,10 @@ config NFS_USE_KERNEL_DNS
        bool
        depends on NFS_V4 && !NFS_USE_LEGACY_DNS
        select DNS_RESOLVER
-       select KEYS
        default y
 
-config NFS_USE_NEW_IDMAPPER
-       bool "Use the new idmapper upcall routine"
-       depends on NFS_V4 && KEYS
-       help
-         Say Y here if you want NFS to use the new idmapper upcall functions.
-         You will need /sbin/request-key (usually provided by the keyutils
-         package).  For details, read
-         <file:Documentation/filesystems/nfs/idmapper.txt>.
-
-         If you are unsure, say N.
+config NFS_DEBUG
+       bool
+       depends on NFS_FS && SUNRPC_DEBUG
+       select CRC32
+       default y
index 48cfac31f64ce2b3679362b91f324ff9afc4e262..9c94297bb70e9502c40825249eecb24093e142d2 100644 (file)
@@ -46,9 +46,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
 MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
 
-struct dentry *bl_device_pipe;
-wait_queue_head_t bl_wq;
-
 static void print_page(struct page *page)
 {
        dprintk("PRINTPAGE page %p\n", page);
@@ -236,12 +233,11 @@ bl_read_pagelist(struct nfs_read_data *rdata)
        sector_t isect, extent_length = 0;
        struct parallel_io *par;
        loff_t f_offset = rdata->args.offset;
-       size_t count = rdata->args.count;
        struct page **pages = rdata->args.pages;
        int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
 
-       dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__,
-              rdata->npages, f_offset, count);
+       dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
+              rdata->npages, f_offset, (unsigned int)rdata->args.count);
 
        par = alloc_parallel(rdata);
        if (!par)
@@ -1025,10 +1021,128 @@ static const struct rpc_pipe_ops bl_upcall_ops = {
        .destroy_msg    = bl_pipe_destroy_msg,
 };
 
+static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb,
+                                           struct rpc_pipe *pipe)
+{
+       struct dentry *dir, *dentry;
+
+       dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME);
+       if (dir == NULL)
+               return ERR_PTR(-ENOENT);
+       dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe);
+       dput(dir);
+       return dentry;
+}
+
+static void nfs4blocklayout_unregister_sb(struct super_block *sb,
+                                         struct rpc_pipe *pipe)
+{
+       if (pipe->dentry)
+               rpc_unlink(pipe->dentry);
+}
+
+static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
+                          void *ptr)
+{
+       struct super_block *sb = ptr;
+       struct net *net = sb->s_fs_info;
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
+       struct dentry *dentry;
+       int ret = 0;
+
+       if (!try_module_get(THIS_MODULE))
+               return 0;
+
+       if (nn->bl_device_pipe == NULL) {
+               module_put(THIS_MODULE);
+               return 0;
+       }
+
+       switch (event) {
+       case RPC_PIPEFS_MOUNT:
+               dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe);
+               if (IS_ERR(dentry)) {
+                       ret = PTR_ERR(dentry);
+                       break;
+               }
+               nn->bl_device_pipe->dentry = dentry;
+               break;
+       case RPC_PIPEFS_UMOUNT:
+               if (nn->bl_device_pipe->dentry)
+                       nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe);
+               break;
+       default:
+               ret = -ENOTSUPP;
+               break;
+       }
+       module_put(THIS_MODULE);
+       return ret;
+}
+
+static struct notifier_block nfs4blocklayout_block = {
+       .notifier_call = rpc_pipefs_event,
+};
+
+static struct dentry *nfs4blocklayout_register_net(struct net *net,
+                                                  struct rpc_pipe *pipe)
+{
+       struct super_block *pipefs_sb;
+       struct dentry *dentry;
+
+       pipefs_sb = rpc_get_sb_net(net);
+       if (!pipefs_sb)
+               return NULL;
+       dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe);
+       rpc_put_sb_net(net);
+       return dentry;
+}
+
+static void nfs4blocklayout_unregister_net(struct net *net,
+                                          struct rpc_pipe *pipe)
+{
+       struct super_block *pipefs_sb;
+
+       pipefs_sb = rpc_get_sb_net(net);
+       if (pipefs_sb) {
+               nfs4blocklayout_unregister_sb(pipefs_sb, pipe);
+               rpc_put_sb_net(net);
+       }
+}
+
+static int nfs4blocklayout_net_init(struct net *net)
+{
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
+       struct dentry *dentry;
+
+       init_waitqueue_head(&nn->bl_wq);
+       nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0);
+       if (IS_ERR(nn->bl_device_pipe))
+               return PTR_ERR(nn->bl_device_pipe);
+       dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe);
+       if (IS_ERR(dentry)) {
+               rpc_destroy_pipe_data(nn->bl_device_pipe);
+               return PTR_ERR(dentry);
+       }
+       nn->bl_device_pipe->dentry = dentry;
+       return 0;
+}
+
+static void nfs4blocklayout_net_exit(struct net *net)
+{
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
+
+       nfs4blocklayout_unregister_net(net, nn->bl_device_pipe);
+       rpc_destroy_pipe_data(nn->bl_device_pipe);
+       nn->bl_device_pipe = NULL;
+}
+
+static struct pernet_operations nfs4blocklayout_net_ops = {
+       .init = nfs4blocklayout_net_init,
+       .exit = nfs4blocklayout_net_exit,
+};
+
 static int __init nfs4blocklayout_init(void)
 {
-       struct vfsmount *mnt;
-       struct path path;
        int ret;
 
        dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
@@ -1037,32 +1151,17 @@ static int __init nfs4blocklayout_init(void)
        if (ret)
                goto out;
 
-       init_waitqueue_head(&bl_wq);
-
-       mnt = rpc_get_mount();
-       if (IS_ERR(mnt)) {
-               ret = PTR_ERR(mnt);
+       ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block);
+       if (ret)
                goto out_remove;
-       }
-
-       ret = vfs_path_lookup(mnt->mnt_root,
-                             mnt,
-                             NFS_PIPE_DIRNAME, 0, &path);
+       ret = register_pernet_subsys(&nfs4blocklayout_net_ops);
        if (ret)
-               goto out_putrpc;
-
-       bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL,
-                                   &bl_upcall_ops, 0);
-       path_put(&path);
-       if (IS_ERR(bl_device_pipe)) {
-               ret = PTR_ERR(bl_device_pipe);
-               goto out_putrpc;
-       }
+               goto out_notifier;
 out:
        return ret;
 
-out_putrpc:
-       rpc_put_mount();
+out_notifier:
+       rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
 out_remove:
        pnfs_unregister_layoutdriver(&blocklayout_type);
        return ret;
@@ -1073,9 +1172,9 @@ static void __exit nfs4blocklayout_exit(void)
        dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
               __func__);
 
+       rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
+       unregister_pernet_subsys(&nfs4blocklayout_net_ops);
        pnfs_unregister_layoutdriver(&blocklayout_type);
-       rpc_unlink(bl_device_pipe);
-       rpc_put_mount();
 }
 
 MODULE_ALIAS("nfs-layouttype4-3");
index e31a2df28e70aca040560b8d94403d85d67cd170..03350690118e239161fceb18e5939b97d7e062b4 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/sunrpc/rpc_pipe_fs.h>
 
 #include "../pnfs.h"
+#include "../netns.h"
 
 #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
 #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
@@ -50,6 +51,7 @@ struct pnfs_block_dev {
        struct list_head                bm_node;
        struct nfs4_deviceid            bm_mdevid;    /* associated devid */
        struct block_device             *bm_mdev;     /* meta device itself */
+       struct net                      *net;
 };
 
 enum exstate4 {
@@ -151,9 +153,9 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
        return BLK_LO2EXT(lseg->pls_layout);
 }
 
-struct bl_dev_msg {
-       int32_t status;
-       uint32_t major, minor;
+struct bl_pipe_msg {
+       struct rpc_pipe_msg msg;
+       wait_queue_head_t *bl_wq;
 };
 
 struct bl_msg_hdr {
@@ -161,9 +163,6 @@ struct bl_msg_hdr {
        u16 totallen; /* length of entire message, including hdr itself */
 };
 
-extern struct dentry *bl_device_pipe;
-extern wait_queue_head_t bl_wq;
-
 #define BL_DEVICE_UMOUNT               0x0 /* Umount--delete devices */
 #define BL_DEVICE_MOUNT                0x1 /* Mount--create devices*/
 #define BL_DEVICE_REQUEST_INIT         0x0 /* Start request */
index d08ba9107fde2fa5c608a9b581cc4f7e12e5cb4f..a5c88a554d921455256bb4dbeea7eaa498da5499 100644 (file)
@@ -46,7 +46,7 @@ static int decode_sector_number(__be32 **rp, sector_t *sp)
 
        *rp = xdr_decode_hyper(*rp, &s);
        if (s & 0x1ff) {
-               printk(KERN_WARNING "%s: sector not aligned\n", __func__);
+               printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__);
                return -1;
        }
        *sp = s >> SECTOR_SHIFT;
@@ -79,27 +79,30 @@ int nfs4_blkdev_put(struct block_device *bdev)
        return blkdev_put(bdev, FMODE_READ);
 }
 
-static struct bl_dev_msg bl_mount_reply;
-
 ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
                         size_t mlen)
 {
+       struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info,
+                                        nfs_net_id);
+
        if (mlen != sizeof (struct bl_dev_msg))
                return -EINVAL;
 
-       if (copy_from_user(&bl_mount_reply, src, mlen) != 0)
+       if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0)
                return -EFAULT;
 
-       wake_up(&bl_wq);
+       wake_up(&nn->bl_wq);
 
        return mlen;
 }
 
 void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 {
+       struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg);
+
        if (msg->errno >= 0)
                return;
-       wake_up(&bl_wq);
+       wake_up(bl_pipe_msg->bl_wq);
 }
 
 /*
@@ -111,29 +114,33 @@ nfs4_blk_decode_device(struct nfs_server *server,
 {
        struct pnfs_block_dev *rv;
        struct block_device *bd = NULL;
-       struct rpc_pipe_msg msg;
+       struct bl_pipe_msg bl_pipe_msg;
+       struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
        struct bl_msg_hdr bl_msg = {
                .type = BL_DEVICE_MOUNT,
                .totallen = dev->mincount,
        };
        uint8_t *dataptr;
        DECLARE_WAITQUEUE(wq, current);
-       struct bl_dev_msg *reply = &bl_mount_reply;
        int offset, len, i, rc;
+       struct net *net = server->nfs_client->net;
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
+       struct bl_dev_msg *reply = &nn->bl_mount_reply;
 
        dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
        dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
                dev->mincount);
 
-       memset(&msg, 0, sizeof(msg));
-       msg.data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS);
-       if (!msg.data) {
+       bl_pipe_msg.bl_wq = &nn->bl_wq;
+       memset(msg, 0, sizeof(*msg));
+       msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS);
+       if (!msg->data) {
                rv = ERR_PTR(-ENOMEM);
                goto out;
        }
 
-       memcpy(msg.data, &bl_msg, sizeof(bl_msg));
-       dataptr = (uint8_t *) msg.data;
+       memcpy(msg->data, &bl_msg, sizeof(bl_msg));
+       dataptr = (uint8_t *) msg->data;
        len = dev->mincount;
        offset = sizeof(bl_msg);
        for (i = 0; len > 0; i++) {
@@ -142,13 +149,13 @@ nfs4_blk_decode_device(struct nfs_server *server,
                len -= PAGE_CACHE_SIZE;
                offset += PAGE_CACHE_SIZE;
        }
-       msg.len = sizeof(bl_msg) + dev->mincount;
+       msg->len = sizeof(bl_msg) + dev->mincount;
 
        dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
-       add_wait_queue(&bl_wq, &wq);
-       rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg);
+       add_wait_queue(&nn->bl_wq, &wq);
+       rc = rpc_queue_upcall(nn->bl_device_pipe, msg);
        if (rc < 0) {
-               remove_wait_queue(&bl_wq, &wq);
+               remove_wait_queue(&nn->bl_wq, &wq);
                rv = ERR_PTR(rc);
                goto out;
        }
@@ -156,7 +163,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
        set_current_state(TASK_UNINTERRUPTIBLE);
        schedule();
        __set_current_state(TASK_RUNNING);
-       remove_wait_queue(&bl_wq, &wq);
+       remove_wait_queue(&nn->bl_wq, &wq);
 
        if (reply->status != BL_DEVICE_REQUEST_PROC) {
                dprintk("%s failed to open device: %d\n",
@@ -181,13 +188,14 @@ nfs4_blk_decode_device(struct nfs_server *server,
 
        rv->bm_mdev = bd;
        memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid));
+       rv->net = net;
        dprintk("%s Created device %s with bd_block_size %u\n",
                __func__,
                bd->bd_disk->disk_name,
                bd->bd_block_size);
 
 out:
-       kfree(msg.data);
+       kfree(msg->data);
        return rv;
 }
 
index d055c75580734853a29ae2b3553d5c0268bf44f9..737d839bc17b5aa0ae58e5350a235af1f8adfb2b 100644 (file)
 
 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
 
-static void dev_remove(dev_t dev)
+static void dev_remove(struct net *net, dev_t dev)
 {
-       struct rpc_pipe_msg msg;
+       struct bl_pipe_msg bl_pipe_msg;
+       struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
        struct bl_dev_msg bl_umount_request;
        struct bl_msg_hdr bl_msg = {
                .type = BL_DEVICE_UMOUNT,
@@ -48,36 +49,38 @@ static void dev_remove(dev_t dev)
        };
        uint8_t *dataptr;
        DECLARE_WAITQUEUE(wq, current);
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
 
        dprintk("Entering %s\n", __func__);
 
-       memset(&msg, 0, sizeof(msg));
-       msg.data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
-       if (!msg.data)
+       bl_pipe_msg.bl_wq = &nn->bl_wq;
+       memset(msg, 0, sizeof(*msg));
+       msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
+       if (!msg->data)
                goto out;
 
        memset(&bl_umount_request, 0, sizeof(bl_umount_request));
        bl_umount_request.major = MAJOR(dev);
        bl_umount_request.minor = MINOR(dev);
 
-       memcpy(msg.data, &bl_msg, sizeof(bl_msg));
-       dataptr = (uint8_t *) msg.data;
+       memcpy(msg->data, &bl_msg, sizeof(bl_msg));
+       dataptr = (uint8_t *) msg->data;
        memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
-       msg.len = sizeof(bl_msg) + bl_msg.totallen;
+       msg->len = sizeof(bl_msg) + bl_msg.totallen;
 
-       add_wait_queue(&bl_wq, &wq);
-       if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) {
-               remove_wait_queue(&bl_wq, &wq);
+       add_wait_queue(&nn->bl_wq, &wq);
+       if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
+               remove_wait_queue(&nn->bl_wq, &wq);
                goto out;
        }
 
        set_current_state(TASK_UNINTERRUPTIBLE);
        schedule();
        __set_current_state(TASK_RUNNING);
-       remove_wait_queue(&bl_wq, &wq);
+       remove_wait_queue(&nn->bl_wq, &wq);
 
 out:
-       kfree(msg.data);
+       kfree(msg->data);
 }
 
 /*
@@ -90,10 +93,10 @@ static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
        dprintk("%s Releasing\n", __func__);
        rv = nfs4_blkdev_put(bdev->bm_mdev);
        if (rv)
-               printk(KERN_ERR "%s nfs4_blkdev_put returns %d\n",
+               printk(KERN_ERR "NFS: %s nfs4_blkdev_put returns %d\n",
                                __func__, rv);
 
-       dev_remove(bdev->bm_mdev->bd_dev);
+       dev_remove(bdev->net, bdev->bm_mdev->bd_dev);
 }
 
 void bl_free_block_dev(struct pnfs_block_dev *bdev)
index 1abac09f7cd5f9fd46cc07401873067e49a4b7f7..1f9a6032796b0ff239f2337fae7b656deb71ced6 100644 (file)
@@ -147,7 +147,7 @@ static int _preload_range(struct pnfs_inval_markings *marks,
        count = (int)(end - start) / (int)tree->mtt_step_size;
 
        /* Pre-malloc what memory we might need */
-       storage = kmalloc(sizeof(*storage) * count, GFP_NOFS);
+       storage = kcalloc(count, sizeof(*storage), GFP_NOFS);
        if (!storage)
                return -ENOMEM;
        for (i = 0; i < count; i++) {
index c98b439332fcf913bcc4dfb4e34242dfed5c70a0..dded2636811182497c5c78d24f51e81577d7aad5 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
+#include <net/net_namespace.h>
 
 #include "cache_lib.h"
 
@@ -111,30 +112,54 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq)
        return 0;
 }
 
-int nfs_cache_register(struct cache_detail *cd)
+int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd)
 {
-       struct vfsmount *mnt;
-       struct path path;
        int ret;
+       struct dentry *dir;
 
-       mnt = rpc_get_mount();
-       if (IS_ERR(mnt))
-               return PTR_ERR(mnt);
-       ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path);
-       if (ret)
-               goto err;
-       ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd);
-       path_put(&path);
-       if (!ret)
-               return ret;
-err:
-       rpc_put_mount();
+       dir = rpc_d_lookup_sb(sb, "cache");
+       BUG_ON(dir == NULL);
+       ret = sunrpc_cache_register_pipefs(dir, cd->name, 0600, cd);
+       dput(dir);
        return ret;
 }
 
-void nfs_cache_unregister(struct cache_detail *cd)
+int nfs_cache_register_net(struct net *net, struct cache_detail *cd)
 {
-       sunrpc_cache_unregister_pipefs(cd);
-       rpc_put_mount();
+       struct super_block *pipefs_sb;
+       int ret = 0;
+
+       pipefs_sb = rpc_get_sb_net(net);
+       if (pipefs_sb) {
+               ret = nfs_cache_register_sb(pipefs_sb, cd);
+               rpc_put_sb_net(net);
+       }
+       return ret;
+}
+
+void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd)
+{
+       if (cd->u.pipefs.dir)
+               sunrpc_cache_unregister_pipefs(cd);
+}
+
+void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd)
+{
+       struct super_block *pipefs_sb;
+
+       pipefs_sb = rpc_get_sb_net(net);
+       if (pipefs_sb) {
+               nfs_cache_unregister_sb(pipefs_sb, cd);
+               rpc_put_sb_net(net);
+       }
+}
+
+void nfs_cache_init(struct cache_detail *cd)
+{
+       sunrpc_init_cache_detail(cd);
 }
 
+void nfs_cache_destroy(struct cache_detail *cd)
+{
+       sunrpc_destroy_cache_detail(cd);
+}
index 7cf6cafcc007d8a5350aee6eae30647dceaab802..317db95e37f80375b371130afd58cb31f39161ed 100644 (file)
@@ -23,5 +23,11 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void);
 extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq);
 extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq);
 
-extern int nfs_cache_register(struct cache_detail *cd);
-extern void nfs_cache_unregister(struct cache_detail *cd);
+extern void nfs_cache_init(struct cache_detail *cd);
+extern void nfs_cache_destroy(struct cache_detail *cd);
+extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd);
+extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd);
+extern int nfs_cache_register_sb(struct super_block *sb,
+                                struct cache_detail *cd);
+extern void nfs_cache_unregister_sb(struct super_block *sb,
+                                   struct cache_detail *cd);
index 516f3375e067d584aa26b2a65d82f4946b647e30..eb95f5091c1aff93930e17a829a808023edc2e12 100644 (file)
@@ -85,7 +85,7 @@ nfs4_callback_svc(void *vrqstp)
                }
                if (err < 0) {
                        if (err != preverr) {
-                               printk(KERN_WARNING "%s: unexpected error "
+                               printk(KERN_WARNING "NFS: %s: unexpected error "
                                        "from svc_recv (%d)\n", __func__, err);
                                preverr = err;
                        }
@@ -101,12 +101,12 @@ nfs4_callback_svc(void *vrqstp)
 /*
  * Prepare to bring up the NFSv4 callback service
  */
-struct svc_rqst *
-nfs4_callback_up(struct svc_serv *serv)
+static struct svc_rqst *
+nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
 {
        int ret;
 
-       ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
+       ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET,
                                nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
        if (ret <= 0)
                goto out_err;
@@ -114,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv)
        dprintk("NFS: Callback listener port = %u (af %u)\n",
                        nfs_callback_tcpport, PF_INET);
 
-       ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
+       ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET6,
                                nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
        if (ret > 0) {
                nfs_callback_tcpport6 = ret;
@@ -172,7 +172,7 @@ nfs41_callback_svc(void *vrqstp)
 /*
  * Bring up the NFSv4.1 callback service
  */
-struct svc_rqst *
+static struct svc_rqst *
 nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
 {
        struct svc_rqst *rqstp;
@@ -183,7 +183,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
         * fore channel connection.
         * Returns the input port (0) and sets the svc_serv bc_xprt on success
         */
-       ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0,
+       ret = svc_create_xprt(serv, "tcp-bc", xprt->xprt_net, PF_INET, 0,
                              SVC_SOCK_ANONYMOUS);
        if (ret < 0) {
                rqstp = ERR_PTR(ret);
@@ -269,7 +269,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
                                        serv, xprt, &rqstp, &callback_svc);
        if (!minorversion_setup) {
                /* v4.0 callback setup */
-               rqstp = nfs4_callback_up(serv);
+               rqstp = nfs4_callback_up(serv, xprt);
                callback_svc = nfs4_callback_svc;
        }
 
@@ -332,7 +332,6 @@ void nfs_callback_down(int minorversion)
 int
 check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
 {
-       struct rpc_clnt *r = clp->cl_rpcclient;
        char *p = svc_gss_principal(rqstp);
 
        if (rqstp->rq_authop->flavour != RPC_AUTH_GSS)
@@ -353,7 +352,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
        if (memcmp(p, "nfs@", 4) != 0)
                return 0;
        p += 4;
-       if (strcmp(p, r->cl_server) != 0)
+       if (strcmp(p, clp->cl_hostname) != 0)
                return 0;
        return 1;
 }
index c89d3b9e483c463cb1b9232e4b97520a7b7e1eaf..a5527c90a5aae67a67e2320ffa9d2dea1b00d4d3 100644 (file)
@@ -38,7 +38,8 @@ enum nfs4_callback_opnum {
 struct cb_process_state {
        __be32                  drc_status;
        struct nfs_client       *clp;
-       int                     slotid;
+       u32                     slotid;
+       struct net              *net;
 };
 
 struct cb_compound_hdr_arg {
index 54cea8ad5a76ff6f8796030c4f42ad7f70d12ca6..1b5d809a105e42d992344aad78fb57526c2af8fa 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/slab.h>
+#include <linux/rcupdate.h>
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
@@ -33,7 +34,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
        res->bitmap[0] = res->bitmap[1] = 0;
        res->status = htonl(NFS4ERR_BADHANDLE);
 
-       dprintk("NFS: GETATTR callback request from %s\n",
+       dprintk_rcu("NFS: GETATTR callback request from %s\n",
                rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
 
        inode = nfs_delegation_find_inode(cps->clp, &args->fh);
@@ -73,7 +74,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
        if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
                goto out;
 
-       dprintk("NFS: RECALL callback request from %s\n",
+       dprintk_rcu("NFS: RECALL callback request from %s\n",
                rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
 
        res = htonl(NFS4ERR_BADHANDLE);
@@ -86,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
                res = 0;
                break;
        case -ENOENT:
-               if (res != 0)
-                       res = htonl(NFS4ERR_BAD_STATEID);
+               res = htonl(NFS4ERR_BAD_STATEID);
                break;
        default:
                res = htonl(NFS4ERR_RESOURCE);
@@ -98,52 +98,64 @@ out:
        return res;
 }
 
-int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
-{
-       if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
-                                        sizeof(delegation->stateid.data)) != 0)
-               return 0;
-       return 1;
-}
-
 #if defined(CONFIG_NFS_V4_1)
 
-static u32 initiate_file_draining(struct nfs_client *clp,
-                                 struct cb_layoutrecallargs *args)
+/*
+ * Lookup a layout by filehandle.
+ *
+ * Note: gets a refcount on the layout hdr and on its respective inode.
+ * Caller must put the layout hdr and the inode.
+ *
+ * TODO: keep track of all layouts (and delegations) in a hash table
+ * hashed by filehandle.
+ */
+static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh)
 {
        struct nfs_server *server;
-       struct pnfs_layout_hdr *lo;
        struct inode *ino;
-       bool found = false;
-       u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
-       LIST_HEAD(free_me_list);
+       struct pnfs_layout_hdr *lo;
 
-       spin_lock(&clp->cl_lock);
-       rcu_read_lock();
        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
                list_for_each_entry(lo, &server->layouts, plh_layouts) {
-                       if (nfs_compare_fh(&args->cbl_fh,
-                                          &NFS_I(lo->plh_inode)->fh))
+                       if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh))
                                continue;
                        ino = igrab(lo->plh_inode);
                        if (!ino)
                                continue;
-                       found = true;
-                       /* Without this, layout can be freed as soon
-                        * as we release cl_lock.
-                        */
                        get_layout_hdr(lo);
-                       break;
+                       return lo;
                }
-               if (found)
-                       break;
        }
+
+       return NULL;
+}
+
+static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh)
+{
+       struct pnfs_layout_hdr *lo;
+
+       spin_lock(&clp->cl_lock);
+       rcu_read_lock();
+       lo = get_layout_by_fh_locked(clp, fh);
        rcu_read_unlock();
        spin_unlock(&clp->cl_lock);
 
-       if (!found)
+       return lo;
+}
+
+static u32 initiate_file_draining(struct nfs_client *clp,
+                                 struct cb_layoutrecallargs *args)
+{
+       struct inode *ino;
+       struct pnfs_layout_hdr *lo;
+       u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
+       LIST_HEAD(free_me_list);
+
+       lo = get_layout_by_fh(clp, &args->cbl_fh);
+       if (!lo)
                return NFS4ERR_NOMATCHING_LAYOUT;
 
+       ino = lo->plh_inode;
        spin_lock(&ino->i_lock);
        if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
            mark_matching_lsegs_invalid(lo, &free_me_list,
@@ -213,17 +225,13 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
 static u32 do_callback_layoutrecall(struct nfs_client *clp,
                                    struct cb_layoutrecallargs *args)
 {
-       u32 res = NFS4ERR_DELAY;
+       u32 res;
 
        dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
-       if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
-               goto out;
        if (args->cbl_recall_type == RETURN_FILE)
                res = initiate_file_draining(clp, args);
        else
                res = initiate_bulk_draining(clp, args);
-       clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
-out:
        dprintk("%s returning %i\n", __func__, res);
        return res;
 
@@ -303,21 +311,6 @@ out:
        return res;
 }
 
-int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
-{
-       if (delegation == NULL)
-               return 0;
-
-       if (stateid->stateid.seqid != 0)
-               return 0;
-       if (memcmp(&delegation->stateid.stateid.other,
-                  &stateid->stateid.other,
-                  NFS4_STATEID_OTHER_SIZE))
-               return 0;
-
-       return 1;
-}
-
 /*
  * Validate the sequenceID sent by the server.
  * Return success if the sequenceID is one more than what we last saw on
@@ -441,7 +434,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
        int i;
        __be32 status = htonl(NFS4ERR_BADSESSION);
 
-       clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
+       clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid);
        if (clp == NULL)
                goto out;
 
@@ -517,7 +510,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
        if (!cps->clp) /* set in cb_sequence */
                goto out;
 
-       dprintk("NFS: RECALL_ANY callback request from %s\n",
+       dprintk_rcu("NFS: RECALL_ANY callback request from %s\n",
                rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
 
        status = cpu_to_be32(NFS4ERR_INVAL);
@@ -552,7 +545,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
        if (!cps->clp) /* set in cb_sequence */
                goto out;
 
-       dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
+       dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
                rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
                args->crsa_target_max_slots);
 
index d50b2742f23baeb20d54c44d6919ed126faf74c8..95bfc243992c1a822041d7a205bbca23162bf91d 100644 (file)
@@ -9,6 +9,8 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
+#include <linux/ratelimit.h>
+#include <linux/printk.h>
 #include <linux/slab.h>
 #include <linux/sunrpc/bc_xprt.h>
 #include "nfs4_fs.h"
@@ -73,7 +75,7 @@ static __be32 *read_buf(struct xdr_stream *xdr, int nbytes)
 
        p = xdr_inline_decode(xdr, nbytes);
        if (unlikely(p == NULL))
-               printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n");
+               printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n");
        return p;
 }
 
@@ -138,10 +140,10 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
 {
        __be32 *p;
 
-       p = read_buf(xdr, 16);
+       p = read_buf(xdr, NFS4_STATEID_SIZE);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_RESOURCE);
-       memcpy(stateid->data, p, 16);
+       memcpy(stateid, p, NFS4_STATEID_SIZE);
        return 0;
 }
 
@@ -155,7 +157,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
                return status;
        /* We do not like overly long tags! */
        if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) {
-               printk("NFSv4 CALLBACK %s: client sent tag of length %u\n",
+               printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n",
                                __func__, hdr->taglen);
                return htonl(NFS4ERR_RESOURCE);
        }
@@ -167,7 +169,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
        if (hdr->minorversion <= 1) {
                hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */
        } else {
-               printk(KERN_WARNING "%s: NFSv4 server callback with "
+               pr_warn_ratelimited("NFS: %s: NFSv4 server callback with "
                        "illegal minor version %u!\n",
                        __func__, hdr->minorversion);
                return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
@@ -759,14 +761,14 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
         * Let the state manager know callback processing done.
         * A single slot, so highest used slotid is either 0 or -1
         */
-       tbl->highest_used_slotid = -1;
+       tbl->highest_used_slotid = NFS4_NO_SLOT;
        nfs4_check_drain_bc_complete(session);
        spin_unlock(&tbl->slot_tbl_lock);
 }
 
 static void nfs4_cb_free_slot(struct cb_process_state *cps)
 {
-       if (cps->slotid != -1)
+       if (cps->slotid != NFS4_NO_SLOT)
                nfs4_callback_free_slot(cps->clp->cl_session);
 }
 
@@ -860,7 +862,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
        struct cb_process_state cps = {
                .drc_status = 0,
                .clp = NULL,
-               .slotid = -1,
+               .slotid = NFS4_NO_SLOT,
+               .net = rqstp->rq_xprt->xpt_net,
        };
        unsigned int nops = 0;
 
@@ -876,7 +879,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
                return rpc_garbage_args;
 
        if (hdr_arg.minorversion == 0) {
-               cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident);
+               cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident);
                if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
                        return rpc_drop_reply;
        }
index d4f772ebd1efd7f86d4da5f9c83f96da8a76c1bf..4a108a0a2a6085e75c4cb42a2564d32013e28bce 100644 (file)
@@ -40,6 +40,8 @@
 #include <net/ipv6.h>
 #include <linux/nfs_xdr.h>
 #include <linux/sunrpc/bc_xprt.h>
+#include <linux/nsproxy.h>
+#include <linux/pid_namespace.h>
 
 #include <asm/system.h>
 
 #include "internal.h"
 #include "fscache.h"
 #include "pnfs.h"
+#include "netns.h"
 
 #define NFSDBG_FACILITY                NFSDBG_CLIENT
 
-static DEFINE_SPINLOCK(nfs_client_lock);
-static LIST_HEAD(nfs_client_list);
-static LIST_HEAD(nfs_volume_list);
 static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
 #ifdef CONFIG_NFS_V4
-static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
 
 /*
  * Get a unique NFSv4.0 callback identifier which will be used
@@ -67,15 +66,16 @@ static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
 static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
 {
        int ret = 0;
+       struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
 
        if (clp->rpc_ops->version != 4 || minorversion != 0)
                return ret;
 retry:
-       if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL))
+       if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL))
                return -ENOMEM;
-       spin_lock(&nfs_client_lock);
-       ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident);
-       spin_unlock(&nfs_client_lock);
+       spin_lock(&nn->nfs_client_lock);
+       ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident);
+       spin_unlock(&nn->nfs_client_lock);
        if (ret == -EAGAIN)
                goto retry;
        return ret;
@@ -90,7 +90,7 @@ static bool nfs4_disable_idmapping = true;
 /*
  * RPC cruft for NFS
  */
-static struct rpc_version *nfs_version[5] = {
+static const struct rpc_version *nfs_version[5] = {
        [2]                     = &nfs_version2,
 #ifdef CONFIG_NFS_V3
        [3]                     = &nfs_version3,
@@ -100,7 +100,7 @@ static struct rpc_version *nfs_version[5] = {
 #endif
 };
 
-struct rpc_program nfs_program = {
+const struct rpc_program nfs_program = {
        .name                   = "nfs",
        .number                 = NFS_PROGRAM,
        .nrvers                 = ARRAY_SIZE(nfs_version),
@@ -116,11 +116,11 @@ struct rpc_stat nfs_rpcstat = {
 
 #ifdef CONFIG_NFS_V3_ACL
 static struct rpc_stat         nfsacl_rpcstat = { &nfsacl_program };
-static struct rpc_version *    nfsacl_version[] = {
+static const struct rpc_version *nfsacl_version[] = {
        [3]                     = &nfsacl_version3,
 };
 
-struct rpc_program             nfsacl_program = {
+const struct rpc_program nfsacl_program = {
        .name                   = "nfsacl",
        .number                 = NFS_ACL_PROGRAM,
        .nrvers                 = ARRAY_SIZE(nfsacl_version),
@@ -136,6 +136,7 @@ struct nfs_client_initdata {
        const struct nfs_rpc_ops *rpc_ops;
        int proto;
        u32 minorversion;
+       struct net *net;
 };
 
 /*
@@ -172,6 +173,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
        clp->cl_rpcclient = ERR_PTR(-EINVAL);
 
        clp->cl_proto = cl_init->proto;
+       clp->net = get_net(cl_init->net);
 
 #ifdef CONFIG_NFS_V4
        err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
@@ -203,8 +205,11 @@ error_0:
 #ifdef CONFIG_NFS_V4_1
 static void nfs4_shutdown_session(struct nfs_client *clp)
 {
-       if (nfs4_has_session(clp))
+       if (nfs4_has_session(clp)) {
+               nfs4_deviceid_purge_client(clp);
                nfs4_destroy_session(clp->cl_session);
+       }
+
 }
 #else /* CONFIG_NFS_V4_1 */
 static void nfs4_shutdown_session(struct nfs_client *clp)
@@ -234,16 +239,20 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 }
 
 /* idr_remove_all is not needed as all id's are removed by nfs_put_client */
-void nfs_cleanup_cb_ident_idr(void)
+void nfs_cleanup_cb_ident_idr(struct net *net)
 {
-       idr_destroy(&cb_ident_idr);
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
+
+       idr_destroy(&nn->cb_ident_idr);
 }
 
 /* nfs_client_lock held */
 static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
 {
+       struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
+
        if (clp->cl_cb_ident)
-               idr_remove(&cb_ident_idr, clp->cl_cb_ident);
+               idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident);
 }
 
 static void pnfs_init_server(struct nfs_server *server)
@@ -261,7 +270,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 {
 }
 
-void nfs_cleanup_cb_ident_idr(void)
+void nfs_cleanup_cb_ident_idr(struct net *net)
 {
 }
 
@@ -293,10 +302,10 @@ static void nfs_free_client(struct nfs_client *clp)
        if (clp->cl_machine_cred != NULL)
                put_rpccred(clp->cl_machine_cred);
 
-       nfs4_deviceid_purge_client(clp);
-
+       put_net(clp->net);
        kfree(clp->cl_hostname);
        kfree(clp->server_scope);
+       kfree(clp->impl_id);
        kfree(clp);
 
        dprintk("<-- nfs_free_client()\n");
@@ -307,15 +316,18 @@ static void nfs_free_client(struct nfs_client *clp)
  */
 void nfs_put_client(struct nfs_client *clp)
 {
+       struct nfs_net *nn;
+
        if (!clp)
                return;
 
        dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
+       nn = net_generic(clp->net, nfs_net_id);
 
-       if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
+       if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
                list_del(&clp->cl_share_link);
                nfs_cb_idr_remove_locked(clp);
-               spin_unlock(&nfs_client_lock);
+               spin_unlock(&nn->nfs_client_lock);
 
                BUG_ON(!list_empty(&clp->cl_superblocks));
 
@@ -393,6 +405,7 @@ static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
                (sin1->sin_port == sin2->sin_port);
 }
 
+#if defined(CONFIG_NFS_V4_1)
 /*
  * Test if two socket addresses represent the same actual socket,
  * by comparing (only) relevant fields, excluding the port number.
@@ -411,6 +424,7 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
        }
        return 0;
 }
+#endif /* CONFIG_NFS_V4_1 */
 
 /*
  * Test if two socket addresses represent the same actual socket,
@@ -431,10 +445,10 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
        return 0;
 }
 
+#if defined(CONFIG_NFS_V4_1)
 /* Common match routine for v4.0 and v4.1 callback services */
-bool
-nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
-                    u32 minorversion)
+static bool nfs4_cb_match_client(const struct sockaddr *addr,
+               struct nfs_client *clp, u32 minorversion)
 {
        struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
 
@@ -454,6 +468,7 @@ nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
 
        return true;
 }
+#endif /* CONFIG_NFS_V4_1 */
 
 /*
  * Find an nfs_client on the list that matches the initialisation data
@@ -463,8 +478,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
 {
        struct nfs_client *clp;
        const struct sockaddr *sap = data->addr;
+       struct nfs_net *nn = net_generic(data->net, nfs_net_id);
 
-       list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+       list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
                const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
                /* Don't match clients that failed to initialise properly */
                if (clp->cl_cons_state < 0)
@@ -502,13 +518,14 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
 {
        struct nfs_client *clp, *new = NULL;
        int error;
+       struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
 
        dprintk("--> nfs_get_client(%s,v%u)\n",
                cl_init->hostname ?: "", cl_init->rpc_ops->version);
 
        /* see if the client already exists */
        do {
-               spin_lock(&nfs_client_lock);
+               spin_lock(&nn->nfs_client_lock);
 
                clp = nfs_match_client(cl_init);
                if (clp)
@@ -516,7 +533,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
                if (new)
                        goto install_client;
 
-               spin_unlock(&nfs_client_lock);
+               spin_unlock(&nn->nfs_client_lock);
 
                new = nfs_alloc_client(cl_init);
        } while (!IS_ERR(new));
@@ -527,8 +544,8 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
        /* install a new client and return with it unready */
 install_client:
        clp = new;
-       list_add(&clp->cl_share_link, &nfs_client_list);
-       spin_unlock(&nfs_client_lock);
+       list_add(&clp->cl_share_link, &nn->nfs_client_list);
+       spin_unlock(&nn->nfs_client_lock);
 
        error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
                                              authflavour, noresvport);
@@ -543,7 +560,7 @@ install_client:
         * - make sure it's ready before returning
         */
 found_client:
-       spin_unlock(&nfs_client_lock);
+       spin_unlock(&nn->nfs_client_lock);
 
        if (new)
                nfs_free_client(new);
@@ -643,7 +660,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
 {
        struct rpc_clnt         *clnt = NULL;
        struct rpc_create_args args = {
-               .net            = &init_net,
+               .net            = clp->net,
                .protocol       = clp->cl_proto,
                .address        = (struct sockaddr *)&clp->cl_addr,
                .addrsize       = clp->cl_addrlen,
@@ -697,6 +714,7 @@ static int nfs_start_lockd(struct nfs_server *server)
                .nfs_version    = clp->rpc_ops->version,
                .noresvport     = server->flags & NFS_MOUNT_NORESVPORT ?
                                        1 : 0,
+               .net            = clp->net,
        };
 
        if (nlm_init.nfs_version > 3)
@@ -832,6 +850,7 @@ static int nfs_init_server(struct nfs_server *server,
                .addrlen = data->nfs_server.addrlen,
                .rpc_ops = &nfs_v2_clientops,
                .proto = data->nfs_server.protocol,
+               .net = data->net,
        };
        struct rpc_timeout timeparms;
        struct nfs_client *clp;
@@ -1030,25 +1049,30 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
 static void nfs_server_insert_lists(struct nfs_server *server)
 {
        struct nfs_client *clp = server->nfs_client;
+       struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
 
-       spin_lock(&nfs_client_lock);
+       spin_lock(&nn->nfs_client_lock);
        list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
-       list_add_tail(&server->master_link, &nfs_volume_list);
+       list_add_tail(&server->master_link, &nn->nfs_volume_list);
        clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
-       spin_unlock(&nfs_client_lock);
+       spin_unlock(&nn->nfs_client_lock);
 
 }
 
 static void nfs_server_remove_lists(struct nfs_server *server)
 {
        struct nfs_client *clp = server->nfs_client;
+       struct nfs_net *nn;
 
-       spin_lock(&nfs_client_lock);
+       if (clp == NULL)
+               return;
+       nn = net_generic(clp->net, nfs_net_id);
+       spin_lock(&nn->nfs_client_lock);
        list_del_rcu(&server->client_link);
-       if (clp && list_empty(&clp->cl_superblocks))
+       if (list_empty(&clp->cl_superblocks))
                set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
        list_del(&server->master_link);
-       spin_unlock(&nfs_client_lock);
+       spin_unlock(&nn->nfs_client_lock);
 
        synchronize_rcu();
 }
@@ -1087,6 +1111,8 @@ static struct nfs_server *nfs_alloc_server(void)
                return NULL;
        }
 
+       ida_init(&server->openowner_id);
+       ida_init(&server->lockowner_id);
        pnfs_init_server(server);
 
        return server;
@@ -1112,6 +1138,8 @@ void nfs_free_server(struct nfs_server *server)
 
        nfs_put_client(server->nfs_client);
 
+       ida_destroy(&server->lockowner_id);
+       ida_destroy(&server->openowner_id);
        nfs_free_iostats(server->io_stats);
        bdi_destroy(&server->backing_dev_info);
        kfree(server);
@@ -1187,48 +1215,22 @@ error:
 }
 
 #ifdef CONFIG_NFS_V4
-/*
- * NFSv4.0 callback thread helper
- *
- * Find a client by IP address, protocol version, and minorversion
- *
- * Called from the pg_authenticate method. The callback identifier
- * is not used as it has not been decoded.
- *
- * Returns NULL if no such client
- */
-struct nfs_client *
-nfs4_find_client_no_ident(const struct sockaddr *addr)
-{
-       struct nfs_client *clp;
-
-       spin_lock(&nfs_client_lock);
-       list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
-               if (nfs4_cb_match_client(addr, clp, 0) == false)
-                       continue;
-               atomic_inc(&clp->cl_count);
-               spin_unlock(&nfs_client_lock);
-               return clp;
-       }
-       spin_unlock(&nfs_client_lock);
-       return NULL;
-}
-
 /*
  * NFSv4.0 callback thread helper
  *
  * Find a client by callback identifier
  */
 struct nfs_client *
-nfs4_find_client_ident(int cb_ident)
+nfs4_find_client_ident(struct net *net, int cb_ident)
 {
        struct nfs_client *clp;
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
 
-       spin_lock(&nfs_client_lock);
-       clp = idr_find(&cb_ident_idr, cb_ident);
+       spin_lock(&nn->nfs_client_lock);
+       clp = idr_find(&nn->cb_ident_idr, cb_ident);
        if (clp)
                atomic_inc(&clp->cl_count);
-       spin_unlock(&nfs_client_lock);
+       spin_unlock(&nn->nfs_client_lock);
        return clp;
 }
 
@@ -1241,13 +1243,14 @@ nfs4_find_client_ident(int cb_ident)
  * Returns NULL if no such client
  */
 struct nfs_client *
-nfs4_find_client_sessionid(const struct sockaddr *addr,
+nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
                           struct nfs4_sessionid *sid)
 {
        struct nfs_client *clp;
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
 
-       spin_lock(&nfs_client_lock);
-       list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+       spin_lock(&nn->nfs_client_lock);
+       list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
                if (nfs4_cb_match_client(addr, clp, 1) == false)
                        continue;
 
@@ -1260,17 +1263,17 @@ nfs4_find_client_sessionid(const struct sockaddr *addr,
                        continue;
 
                atomic_inc(&clp->cl_count);
-               spin_unlock(&nfs_client_lock);
+               spin_unlock(&nn->nfs_client_lock);
                return clp;
        }
-       spin_unlock(&nfs_client_lock);
+       spin_unlock(&nn->nfs_client_lock);
        return NULL;
 }
 
 #else /* CONFIG_NFS_V4_1 */
 
 struct nfs_client *
-nfs4_find_client_sessionid(const struct sockaddr *addr,
+nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
                           struct nfs4_sessionid *sid)
 {
        return NULL;
@@ -1285,16 +1288,18 @@ static int nfs4_init_callback(struct nfs_client *clp)
        int error;
 
        if (clp->rpc_ops->version == 4) {
+               struct rpc_xprt *xprt;
+
+               xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt);
+
                if (nfs4_has_session(clp)) {
-                       error = xprt_setup_backchannel(
-                                               clp->cl_rpcclient->cl_xprt,
+                       error = xprt_setup_backchannel(xprt,
                                                NFS41_BC_MIN_CALLBACKS);
                        if (error < 0)
                                return error;
                }
 
-               error = nfs_callback_up(clp->cl_mvops->minor_version,
-                                       clp->cl_rpcclient->cl_xprt);
+               error = nfs_callback_up(clp->cl_mvops->minor_version, xprt);
                if (error < 0) {
                        dprintk("%s: failed to start callback. Error = %d\n",
                                __func__, error);
@@ -1345,6 +1350,7 @@ int nfs4_init_client(struct nfs_client *clp,
                     rpc_authflavor_t authflavour,
                     int noresvport)
 {
+       char buf[INET6_ADDRSTRLEN + 1];
        int error;
 
        if (clp->cl_cons_state == NFS_CS_READY) {
@@ -1360,6 +1366,20 @@ int nfs4_init_client(struct nfs_client *clp,
                                      1, noresvport);
        if (error < 0)
                goto error;
+
+       /* If no clientaddr= option was specified, find a usable cb address */
+       if (ip_addr == NULL) {
+               struct sockaddr_storage cb_addr;
+               struct sockaddr *sap = (struct sockaddr *)&cb_addr;
+
+               error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr));
+               if (error < 0)
+                       goto error;
+               error = rpc_ntop(sap, buf, sizeof(buf));
+               if (error < 0)
+                       goto error;
+               ip_addr = (const char *)buf;
+       }
        strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
 
        error = nfs_idmap_new(clp);
@@ -1394,7 +1414,7 @@ static int nfs4_set_client(struct nfs_server *server,
                const char *ip_addr,
                rpc_authflavor_t authflavour,
                int proto, const struct rpc_timeout *timeparms,
-               u32 minorversion)
+               u32 minorversion, struct net *net)
 {
        struct nfs_client_initdata cl_init = {
                .hostname = hostname,
@@ -1403,6 +1423,7 @@ static int nfs4_set_client(struct nfs_server *server,
                .rpc_ops = &nfs_v4_clientops,
                .proto = proto,
                .minorversion = minorversion,
+               .net = net,
        };
        struct nfs_client *clp;
        int error;
@@ -1454,6 +1475,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
                .rpc_ops = &nfs_v4_clientops,
                .proto = ds_proto,
                .minorversion = mds_clp->cl_minorversion,
+               .net = mds_clp->net,
        };
        struct rpc_timeout ds_timeout = {
                .to_initval = 15 * HZ,
@@ -1581,7 +1603,8 @@ static int nfs4_init_server(struct nfs_server *server,
                        data->auth_flavors[0],
                        data->nfs_server.protocol,
                        &timeparms,
-                       data->minorversion);
+                       data->minorversion,
+                       data->net);
        if (error < 0)
                goto error;
 
@@ -1676,9 +1699,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
                                data->addrlen,
                                parent_client->cl_ipaddr,
                                data->authflavor,
-                               parent_server->client->cl_xprt->prot,
+                               rpc_protocol(parent_server->client),
                                parent_server->client->cl_timeout,
-                               parent_client->cl_mvops->minor_version);
+                               parent_client->cl_mvops->minor_version,
+                               parent_client->net);
        if (error < 0)
                goto error;
 
@@ -1771,6 +1795,18 @@ out_free_server:
        return ERR_PTR(error);
 }
 
+void nfs_clients_init(struct net *net)
+{
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
+
+       INIT_LIST_HEAD(&nn->nfs_client_list);
+       INIT_LIST_HEAD(&nn->nfs_volume_list);
+#ifdef CONFIG_NFS_V4
+       idr_init(&nn->cb_ident_idr);
+#endif
+       spin_lock_init(&nn->nfs_client_lock);
+}
+
 #ifdef CONFIG_PROC_FS
 static struct proc_dir_entry *proc_fs_nfs;
 
@@ -1824,13 +1860,15 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
 {
        struct seq_file *m;
        int ret;
+       struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
+       struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
 
        ret = seq_open(file, &nfs_server_list_ops);
        if (ret < 0)
                return ret;
 
        m = file->private_data;
-       m->private = PDE(inode)->data;
+       m->private = net;
 
        return 0;
 }
@@ -1840,9 +1878,11 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
  */
 static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
 {
+       struct nfs_net *nn = net_generic(m->private, nfs_net_id);
+
        /* lock the list against modification */
-       spin_lock(&nfs_client_lock);
-       return seq_list_start_head(&nfs_client_list, *_pos);
+       spin_lock(&nn->nfs_client_lock);
+       return seq_list_start_head(&nn->nfs_client_list, *_pos);
 }
 
 /*
@@ -1850,7 +1890,9 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
  */
 static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
 {
-       return seq_list_next(v, &nfs_client_list, pos);
+       struct nfs_net *nn = net_generic(p->private, nfs_net_id);
+
+       return seq_list_next(v, &nn->nfs_client_list, pos);
 }
 
 /*
@@ -1858,7 +1900,9 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
  */
 static void nfs_server_list_stop(struct seq_file *p, void *v)
 {
-       spin_unlock(&nfs_client_lock);
+       struct nfs_net *nn = net_generic(p->private, nfs_net_id);
+
+       spin_unlock(&nn->nfs_client_lock);
 }
 
 /*
@@ -1867,9 +1911,10 @@ static void nfs_server_list_stop(struct seq_file *p, void *v)
 static int nfs_server_list_show(struct seq_file *m, void *v)
 {
        struct nfs_client *clp;
+       struct nfs_net *nn = net_generic(m->private, nfs_net_id);
 
        /* display header on line 1 */
-       if (v == &nfs_client_list) {
+       if (v == &nn->nfs_client_list) {
                seq_puts(m, "NV SERVER   PORT USE HOSTNAME\n");
                return 0;
        }
@@ -1881,12 +1926,14 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
        if (clp->cl_cons_state != NFS_CS_READY)
                return 0;
 
+       rcu_read_lock();
        seq_printf(m, "v%u %s %s %3d %s\n",
                   clp->rpc_ops->version,
                   rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
                   rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
                   atomic_read(&clp->cl_count),
                   clp->cl_hostname);
+       rcu_read_unlock();
 
        return 0;
 }
@@ -1898,13 +1945,15 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
 {
        struct seq_file *m;
        int ret;
+       struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
+       struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
 
        ret = seq_open(file, &nfs_volume_list_ops);
        if (ret < 0)
                return ret;
 
        m = file->private_data;
-       m->private = PDE(inode)->data;
+       m->private = net;
 
        return 0;
 }
@@ -1914,9 +1963,11 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
  */
 static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
 {
+       struct nfs_net *nn = net_generic(m->private, nfs_net_id);
+
        /* lock the list against modification */
-       spin_lock(&nfs_client_lock);
-       return seq_list_start_head(&nfs_volume_list, *_pos);
+       spin_lock(&nn->nfs_client_lock);
+       return seq_list_start_head(&nn->nfs_volume_list, *_pos);
 }
 
 /*
@@ -1924,7 +1975,9 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
  */
 static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
 {
-       return seq_list_next(v, &nfs_volume_list, pos);
+       struct nfs_net *nn = net_generic(p->private, nfs_net_id);
+
+       return seq_list_next(v, &nn->nfs_volume_list, pos);
 }
 
 /*
@@ -1932,7 +1985,9 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
  */
 static void nfs_volume_list_stop(struct seq_file *p, void *v)
 {
-       spin_unlock(&nfs_client_lock);
+       struct nfs_net *nn = net_generic(p->private, nfs_net_id);
+
+       spin_unlock(&nn->nfs_client_lock);
 }
 
 /*
@@ -1943,9 +1998,10 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
        struct nfs_server *server;
        struct nfs_client *clp;
        char dev[8], fsid[17];
+       struct nfs_net *nn = net_generic(m->private, nfs_net_id);
 
        /* display header on line 1 */
-       if (v == &nfs_volume_list) {
+       if (v == &nn->nfs_volume_list) {
                seq_puts(m, "NV SERVER   PORT DEV     FSID              FSC\n");
                return 0;
        }
@@ -1960,6 +2016,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
                 (unsigned long long) server->fsid.major,
                 (unsigned long long) server->fsid.minor);
 
+       rcu_read_lock();
        seq_printf(m, "v%u %s %s %-7s %-17s %s\n",
                   clp->rpc_ops->version,
                   rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
@@ -1967,6 +2024,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
                   dev,
                   fsid,
                   nfs_server_fscache_state(server));
+       rcu_read_unlock();
 
        return 0;
 }
index 7f2654069806f011c4361900041ef6ae64f17de4..89af1d269274f3a91401f704226dec43f9c02528 100644 (file)
@@ -105,7 +105,7 @@ again:
                        continue;
                if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
                        continue;
-               if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
+               if (!nfs4_stateid_match(&state->stateid, stateid))
                        continue;
                get_nfs_open_context(ctx);
                spin_unlock(&inode->i_lock);
@@ -139,8 +139,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
        if (delegation != NULL) {
                spin_lock(&delegation->lock);
                if (delegation->inode != NULL) {
-                       memcpy(delegation->stateid.data, res->delegation.data,
-                              sizeof(delegation->stateid.data));
+                       nfs4_stateid_copy(&delegation->stateid, &res->delegation);
                        delegation->type = res->delegation_type;
                        delegation->maxsize = res->maxsize;
                        oldcred = delegation->cred;
@@ -236,8 +235,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
        delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
        if (delegation == NULL)
                return -ENOMEM;
-       memcpy(delegation->stateid.data, res->delegation.data,
-                       sizeof(delegation->stateid.data));
+       nfs4_stateid_copy(&delegation->stateid, &res->delegation);
        delegation->type = res->delegation_type;
        delegation->maxsize = res->maxsize;
        delegation->change_attr = inode->i_version;
@@ -250,19 +248,22 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
        old_delegation = rcu_dereference_protected(nfsi->delegation,
                                        lockdep_is_held(&clp->cl_lock));
        if (old_delegation != NULL) {
-               if (memcmp(&delegation->stateid, &old_delegation->stateid,
-                                       sizeof(old_delegation->stateid)) == 0 &&
+               if (nfs4_stateid_match(&delegation->stateid,
+                                       &old_delegation->stateid) &&
                                delegation->type == old_delegation->type) {
                        goto out;
                }
                /*
                 * Deal with broken servers that hand out two
                 * delegations for the same file.
+                * Allow for upgrades to a WRITE delegation, but
+                * nothing else.
                 */
                dfprintk(FILE, "%s: server %s handed out "
                                "a duplicate delegation!\n",
                                __func__, clp->cl_hostname);
-               if (delegation->type <= old_delegation->type) {
+               if (delegation->type == old_delegation->type ||
+                   !(delegation->type & FMODE_WRITE)) {
                        freeme = delegation;
                        delegation = NULL;
                        goto out;
@@ -455,17 +456,24 @@ static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp,
        rcu_read_unlock();
 }
 
-static void nfs_client_mark_return_all_delegations(struct nfs_client *clp)
-{
-       nfs_client_mark_return_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
-}
-
 static void nfs_delegation_run_state_manager(struct nfs_client *clp)
 {
        if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
                nfs4_schedule_state_manager(clp);
 }
 
+void nfs_remove_bad_delegation(struct inode *inode)
+{
+       struct nfs_delegation *delegation;
+
+       delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode));
+       if (delegation) {
+               nfs_inode_find_state_and_recover(inode, &delegation->stateid);
+               nfs_free_delegation(delegation);
+       }
+}
+EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
+
 /**
  * nfs_expire_all_delegation_types
  * @clp: client to process
@@ -488,18 +496,6 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
        nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
 }
 
-/**
- * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
- * @clp: client to process
- *
- */
-void nfs_handle_cb_pathdown(struct nfs_client *clp)
-{
-       if (clp == NULL)
-               return;
-       nfs_client_mark_return_all_delegations(clp);
-}
-
 static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
 {
        struct nfs_delegation *delegation;
@@ -531,7 +527,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
 /**
  * nfs_async_inode_return_delegation - asynchronously return a delegation
  * @inode: inode to process
- * @stateid: state ID information from CB_RECALL arguments
+ * @stateid: state ID information
  *
  * Returns zero on success, or a negative errno value.
  */
@@ -545,7 +541,7 @@ int nfs_async_inode_return_delegation(struct inode *inode,
        rcu_read_lock();
        delegation = rcu_dereference(NFS_I(inode)->delegation);
 
-       if (!clp->cl_mvops->validate_stateid(delegation, stateid)) {
+       if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) {
                rcu_read_unlock();
                return -ENOENT;
        }
@@ -684,21 +680,25 @@ int nfs_delegations_present(struct nfs_client *clp)
  * nfs4_copy_delegation_stateid - Copy inode's state ID information
  * @dst: stateid data structure to fill in
  * @inode: inode to check
+ * @flags: delegation type requirement
  *
- * Returns one and fills in "dst->data" * if inode had a delegation,
- * otherwise zero is returned.
+ * Returns "true" and fills in "dst->data" * if inode had a delegation,
+ * otherwise "false" is returned.
  */
-int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode,
+               fmode_t flags)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_delegation *delegation;
-       int ret = 0;
+       bool ret;
 
+       flags &= FMODE_READ|FMODE_WRITE;
        rcu_read_lock();
        delegation = rcu_dereference(nfsi->delegation);
-       if (delegation != NULL) {
-               memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
-               ret = 1;
+       ret = (delegation != NULL && (delegation->type & flags) == flags);
+       if (ret) {
+               nfs4_stateid_copy(dst, &delegation->stateid);
+               nfs_mark_delegation_referenced(delegation);
        }
        rcu_read_unlock();
        return ret;
index d9322e490c56ff98a39e79295186653e0e80589e..cd6a7a8dadae9054e5bd5557c0226bfd8accc116 100644 (file)
@@ -42,9 +42,9 @@ void nfs_super_return_all_delegations(struct super_block *sb);
 void nfs_expire_all_delegations(struct nfs_client *clp);
 void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
 void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
-void nfs_handle_cb_pathdown(struct nfs_client *clp);
 int nfs_client_return_marked_delegations(struct nfs_client *clp);
 int nfs_delegations_present(struct nfs_client *clp);
+void nfs_remove_bad_delegation(struct inode *inode);
 
 void nfs_delegation_mark_reclaim(struct nfs_client *clp);
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
@@ -53,7 +53,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
 int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
 int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
-int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
+bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
 
 void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
 int nfs_have_delegation(struct inode *inode, fmode_t flags);
index 32aa6917265a388bafc582cddb5c803d38605a5c..4aaf0316d76a040a1e17e60e00060b28005d59a2 100644 (file)
@@ -207,7 +207,7 @@ struct nfs_cache_array_entry {
 };
 
 struct nfs_cache_array {
-       unsigned int size;
+       int size;
        int eof_index;
        u64 last_cookie;
        struct nfs_cache_array_entry array[0];
@@ -1429,6 +1429,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
        }
 
        open_flags = nd->intent.open.flags;
+       attr.ia_valid = 0;
 
        ctx = create_nfs_open_context(dentry, open_flags);
        res = ERR_CAST(ctx);
@@ -1437,11 +1438,14 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 
        if (nd->flags & LOOKUP_CREATE) {
                attr.ia_mode = nd->intent.open.create_mode;
-               attr.ia_valid = ATTR_MODE;
+               attr.ia_valid |= ATTR_MODE;
                attr.ia_mode &= ~current_umask();
-       } else {
+       } else
                open_flags &= ~(O_EXCL | O_CREAT);
-               attr.ia_valid = 0;
+
+       if (open_flags & O_TRUNC) {
+               attr.ia_valid |= ATTR_SIZE;
+               attr.ia_size = 0;
        }
 
        /* Open the file on the server */
@@ -1495,6 +1499,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
        struct inode *inode;
        struct inode *dir;
        struct nfs_open_context *ctx;
+       struct iattr attr;
        int openflags, ret = 0;
 
        if (nd->flags & LOOKUP_RCU)
@@ -1523,19 +1528,27 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
        /* We cannot do exclusive creation on a positive dentry */
        if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
                goto no_open_dput;
-       /* We can't create new files, or truncate existing ones here */
-       openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
+       /* We can't create new files here */
+       openflags &= ~(O_CREAT|O_EXCL);
 
        ctx = create_nfs_open_context(dentry, openflags);
        ret = PTR_ERR(ctx);
        if (IS_ERR(ctx))
                goto out;
+
+       attr.ia_valid = 0;
+       if (openflags & O_TRUNC) {
+               attr.ia_valid |= ATTR_SIZE;
+               attr.ia_size = 0;
+               nfs_wb_all(inode);
+       }
+
        /*
         * Note: we're not holding inode->i_mutex and so may be racing with
         * operations that change the directory. We therefore save the
         * change attribute *before* we do the RPC call.
         */
-       inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL);
+       inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
        if (IS_ERR(inode)) {
                ret = PTR_ERR(inode);
                switch (ret) {
index 1940f1a56a5fe059cac63144a4f17cdbf26ab1da..9c7f66ac6cc2ad2d40f7eec79f931f205b96a310 100644 (file)
@@ -265,9 +265,7 @@ static void nfs_direct_read_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_read_direct_ops = {
-#if defined(CONFIG_NFS_V4_1)
        .rpc_call_prepare = nfs_read_prepare,
-#endif /* CONFIG_NFS_V4_1 */
        .rpc_call_done = nfs_direct_read_result,
        .rpc_release = nfs_direct_read_release,
 };
@@ -554,9 +552,7 @@ static void nfs_direct_commit_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_commit_direct_ops = {
-#if defined(CONFIG_NFS_V4_1)
        .rpc_call_prepare = nfs_write_prepare,
-#endif /* CONFIG_NFS_V4_1 */
        .rpc_call_done = nfs_direct_commit_result,
        .rpc_release = nfs_direct_commit_release,
 };
@@ -696,9 +692,7 @@ out_unlock:
 }
 
 static const struct rpc_call_ops nfs_write_direct_ops = {
-#if defined(CONFIG_NFS_V4_1)
        .rpc_call_prepare = nfs_write_prepare,
-#endif /* CONFIG_NFS_V4_1 */
        .rpc_call_done = nfs_direct_write_result,
        .rpc_release = nfs_direct_write_release,
 };
index a6e711ad130f9fdb456ad2a09b056214815af4f5..b3924b8a600021e27c89fc6c9ff910a514aec9eb 100644 (file)
@@ -10,8 +10,9 @@
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/dns_resolver.h>
+#include "dns_resolve.h"
 
-ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
+ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
                struct sockaddr *sa, size_t salen)
 {
        ssize_t ret;
@@ -20,7 +21,7 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
 
        ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL);
        if (ip_len > 0)
-               ret = rpc_pton(ip_addr, ip_len, sa, salen);
+               ret = rpc_pton(net, ip_addr, ip_len, sa, salen);
        else
                ret = -ESRCH;
        kfree(ip_addr);
@@ -40,15 +41,15 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/svcauth.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
 
 #include "dns_resolve.h"
 #include "cache_lib.h"
+#include "netns.h"
 
 #define NFS_DNS_HASHBITS 4
 #define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS)
 
-static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE];
-
 struct nfs_dns_ent {
        struct cache_head h;
 
@@ -224,7 +225,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
        len = qword_get(&buf, buf1, sizeof(buf1));
        if (len <= 0)
                goto out;
-       key.addrlen = rpc_pton(buf1, len,
+       key.addrlen = rpc_pton(cd->net, buf1, len,
                        (struct sockaddr *)&key.addr,
                        sizeof(key.addr));
 
@@ -259,21 +260,6 @@ out:
        return ret;
 }
 
-static struct cache_detail nfs_dns_resolve = {
-       .owner = THIS_MODULE,
-       .hash_size = NFS_DNS_HASHTBL_SIZE,
-       .hash_table = nfs_dns_table,
-       .name = "dns_resolve",
-       .cache_put = nfs_dns_ent_put,
-       .cache_upcall = nfs_dns_upcall,
-       .cache_parse = nfs_dns_parse,
-       .cache_show = nfs_dns_show,
-       .match = nfs_dns_match,
-       .init = nfs_dns_ent_init,
-       .update = nfs_dns_ent_update,
-       .alloc = nfs_dns_ent_alloc,
-};
-
 static int do_cache_lookup(struct cache_detail *cd,
                struct nfs_dns_ent *key,
                struct nfs_dns_ent **item,
@@ -336,8 +322,8 @@ out:
        return ret;
 }
 
-ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
-               struct sockaddr *sa, size_t salen)
+ssize_t nfs_dns_resolve_name(struct net *net, char *name,
+               size_t namelen, struct sockaddr *sa, size_t salen)
 {
        struct nfs_dns_ent key = {
                .hostname = name,
@@ -345,28 +331,118 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
        };
        struct nfs_dns_ent *item = NULL;
        ssize_t ret;
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
 
-       ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item);
+       ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item);
        if (ret == 0) {
                if (salen >= item->addrlen) {
                        memcpy(sa, &item->addr, item->addrlen);
                        ret = item->addrlen;
                } else
                        ret = -EOVERFLOW;
-               cache_put(&item->h, &nfs_dns_resolve);
+               cache_put(&item->h, nn->nfs_dns_resolve);
        } else if (ret == -ENOENT)
                ret = -ESRCH;
        return ret;
 }
 
+int nfs_dns_resolver_cache_init(struct net *net)
+{
+       int err = -ENOMEM;
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
+       struct cache_detail *cd;
+       struct cache_head **tbl;
+
+       cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
+       if (cd == NULL)
+               goto err_cd;
+
+       tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *),
+                       GFP_KERNEL);
+       if (tbl == NULL)
+               goto err_tbl;
+
+       cd->owner = THIS_MODULE,
+       cd->hash_size = NFS_DNS_HASHTBL_SIZE,
+       cd->hash_table = tbl,
+       cd->name = "dns_resolve",
+       cd->cache_put = nfs_dns_ent_put,
+       cd->cache_upcall = nfs_dns_upcall,
+       cd->cache_parse = nfs_dns_parse,
+       cd->cache_show = nfs_dns_show,
+       cd->match = nfs_dns_match,
+       cd->init = nfs_dns_ent_init,
+       cd->update = nfs_dns_ent_update,
+       cd->alloc = nfs_dns_ent_alloc,
+
+       nfs_cache_init(cd);
+       err = nfs_cache_register_net(net, cd);
+       if (err)
+               goto err_reg;
+       nn->nfs_dns_resolve = cd;
+       return 0;
+
+err_reg:
+       nfs_cache_destroy(cd);
+       kfree(cd->hash_table);
+err_tbl:
+       kfree(cd);
+err_cd:
+       return err;
+}
+
+void nfs_dns_resolver_cache_destroy(struct net *net)
+{
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
+       struct cache_detail *cd = nn->nfs_dns_resolve;
+
+       nfs_cache_unregister_net(net, cd);
+       nfs_cache_destroy(cd);
+       kfree(cd->hash_table);
+       kfree(cd);
+}
+
+static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
+                          void *ptr)
+{
+       struct super_block *sb = ptr;
+       struct net *net = sb->s_fs_info;
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
+       struct cache_detail *cd = nn->nfs_dns_resolve;
+       int ret = 0;
+
+       if (cd == NULL)
+               return 0;
+
+       if (!try_module_get(THIS_MODULE))
+               return 0;
+
+       switch (event) {
+       case RPC_PIPEFS_MOUNT:
+               ret = nfs_cache_register_sb(sb, cd);
+               break;
+       case RPC_PIPEFS_UMOUNT:
+               nfs_cache_unregister_sb(sb, cd);
+               break;
+       default:
+               ret = -ENOTSUPP;
+               break;
+       }
+       module_put(THIS_MODULE);
+       return ret;
+}
+
+static struct notifier_block nfs_dns_resolver_block = {
+       .notifier_call  = rpc_pipefs_event,
+};
+
 int nfs_dns_resolver_init(void)
 {
-       return nfs_cache_register(&nfs_dns_resolve);
+       return rpc_pipefs_notifier_register(&nfs_dns_resolver_block);
 }
 
 void nfs_dns_resolver_destroy(void)
 {
-       nfs_cache_unregister(&nfs_dns_resolve);
+       rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block);
 }
-
 #endif
index 199bb5543a91ad3dfc1e03e2f85894f78661e4cb..2e4f596d2923d5876b685fb9fb8bdc0ed13c2de6 100644 (file)
@@ -15,12 +15,22 @@ static inline int nfs_dns_resolver_init(void)
 
 static inline void nfs_dns_resolver_destroy(void)
 {}
+
+static inline int nfs_dns_resolver_cache_init(struct net *net)
+{
+       return 0;
+}
+
+static inline void nfs_dns_resolver_cache_destroy(struct net *net)
+{}
 #else
 extern int nfs_dns_resolver_init(void);
 extern void nfs_dns_resolver_destroy(void);
+extern int nfs_dns_resolver_cache_init(struct net *net);
+extern void nfs_dns_resolver_cache_destroy(struct net *net);
 #endif
 
-extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
-               struct sockaddr *sa, size_t salen);
+extern ssize_t nfs_dns_resolve_name(struct net *net, char *name,
+               size_t namelen, struct sockaddr *sa, size_t salen);
 
 #endif
index c43a452f7da2e70c084bddb7dfe6415d194bdf3e..4fdaaa63cf1c3f0d72ca58088ba312026881aa19 100644 (file)
@@ -530,6 +530,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        if (mapping != dentry->d_inode->i_mapping)
                goto out_unlock;
 
+       wait_on_page_writeback(page);
+
        pagelen = nfs_page_length(page);
        if (pagelen == 0)
                goto out_unlock;
index 419119c371bf81d3a5487448a289cf5e3e219314..ae65c16b3670ebb5ed6da1d214f2cde16fd7f73e 100644 (file)
@@ -327,7 +327,7 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_server *nfss = NFS_SERVER(inode);
-       struct fscache_cookie *old = nfsi->fscache;
+       NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache);
 
        nfs_fscache_inode_lock(inode);
        if (nfsi->fscache) {
index a1bbf7780dfcec3e1a1dd0c608d3e88842db8820..b7f348bb618b8d8864f7a5e2569aff8f4ef4e3a7 100644 (file)
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
+#include <linux/parser.h>
+#include <linux/fs.h>
 #include <linux/nfs_idmap.h>
+#include <net/net_namespace.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/key-type.h>
+#include <keys/user-type.h>
+#include <linux/module.h>
+
+#include "internal.h"
+#include "netns.h"
+
+#define NFS_UINT_MAXLEN 11
+
+/* Default cache timeout is 10 minutes */
+unsigned int nfs_idmap_cache_timeout = 600;
+static const struct cred *id_resolver_cache;
+static struct key_type key_type_id_resolver_legacy;
+
 
 /**
  * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
@@ -142,24 +160,7 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
        return snprintf(buf, buflen, "%u", id);
 }
 
-#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
-
-#include <linux/cred.h>
-#include <linux/sunrpc/sched.h>
-#include <linux/nfs4.h>
-#include <linux/nfs_fs_sb.h>
-#include <linux/keyctl.h>
-#include <linux/key-type.h>
-#include <linux/rcupdate.h>
-#include <linux/err.h>
-
-#include <keys/user-type.h>
-
-#define NFS_UINT_MAXLEN 11
-
-const struct cred *id_resolver_cache;
-
-struct key_type key_type_id_resolver = {
+static struct key_type key_type_id_resolver = {
        .name           = "id_resolver",
        .instantiate    = user_instantiate,
        .match          = user_match,
@@ -169,13 +170,14 @@ struct key_type key_type_id_resolver = {
        .read           = user_read,
 };
 
-int nfs_idmap_init(void)
+static int nfs_idmap_init_keyring(void)
 {
        struct cred *cred;
        struct key *keyring;
        int ret = 0;
 
-       printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name);
+       printk(KERN_NOTICE "NFS: Registering the %s key type\n",
+               key_type_id_resolver.name);
 
        cred = prepare_kernel_cred(NULL);
        if (!cred)
@@ -211,7 +213,7 @@ failed_put_cred:
        return ret;
 }
 
-void nfs_idmap_quit(void)
+static void nfs_idmap_quit_keyring(void)
 {
        key_revoke(id_resolver_cache->thread_keyring);
        unregister_key_type(&key_type_id_resolver);
@@ -246,8 +248,10 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
        return desclen;
 }
 
-static ssize_t nfs_idmap_request_key(const char *name, size_t namelen,
-               const char *type, void *data, size_t data_size)
+static ssize_t nfs_idmap_request_key(struct key_type *key_type,
+                                    const char *name, size_t namelen,
+                                    const char *type, void *data,
+                                    size_t data_size, struct idmap *idmap)
 {
        const struct cred *saved_cred;
        struct key *rkey;
@@ -260,8 +264,12 @@ static ssize_t nfs_idmap_request_key(const char *name, size_t namelen,
                goto out;
 
        saved_cred = override_creds(id_resolver_cache);
-       rkey = request_key(&key_type_id_resolver, desc, "");
+       if (idmap)
+               rkey = request_key_with_auxdata(key_type, desc, "", 0, idmap);
+       else
+               rkey = request_key(&key_type_id_resolver, desc, "");
        revert_creds(saved_cred);
+
        kfree(desc);
        if (IS_ERR(rkey)) {
                ret = PTR_ERR(rkey);
@@ -294,31 +302,46 @@ out:
        return ret;
 }
 
+static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
+                                const char *type, void *data,
+                                size_t data_size, struct idmap *idmap)
+{
+       ssize_t ret = nfs_idmap_request_key(&key_type_id_resolver,
+                                           name, namelen, type, data,
+                                           data_size, NULL);
+       if (ret < 0) {
+               ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,
+                                           name, namelen, type, data,
+                                           data_size, idmap);
+       }
+       return ret;
+}
 
 /* ID -> Name */
-static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen)
+static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf,
+                                    size_t buflen, struct idmap *idmap)
 {
        char id_str[NFS_UINT_MAXLEN];
        int id_len;
        ssize_t ret;
 
        id_len = snprintf(id_str, sizeof(id_str), "%u", id);
-       ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen);
+       ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap);
        if (ret < 0)
                return -EINVAL;
        return ret;
 }
 
 /* Name -> ID */
-static int nfs_idmap_lookup_id(const char *name, size_t namelen,
-                               const char *type, __u32 *id)
+static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type,
+                              __u32 *id, struct idmap *idmap)
 {
        char id_str[NFS_UINT_MAXLEN];
        long id_long;
        ssize_t data_size;
        int ret = 0;
 
-       data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN);
+       data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap);
        if (data_size <= 0) {
                ret = -EINVAL;
        } else {
@@ -328,114 +351,103 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen,
        return ret;
 }
 
-int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
-{
-       if (nfs_map_string_to_numeric(name, namelen, uid))
-               return 0;
-       return nfs_idmap_lookup_id(name, namelen, "uid", uid);
-}
-
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
-{
-       if (nfs_map_string_to_numeric(name, namelen, gid))
-               return 0;
-       return nfs_idmap_lookup_id(name, namelen, "gid", gid);
-}
-
-int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
-{
-       int ret = -EINVAL;
-
-       if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
-               ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
-       if (ret < 0)
-               ret = nfs_map_numeric_to_string(uid, buf, buflen);
-       return ret;
-}
-int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
-{
-       int ret = -EINVAL;
+/* idmap classic begins here */
+module_param(nfs_idmap_cache_timeout, int, 0644);
 
-       if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
-               ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
-       if (ret < 0)
-               ret = nfs_map_numeric_to_string(gid, buf, buflen);
-       return ret;
-}
-
-#else  /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
-
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/init.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/sched.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/workqueue.h>
-#include <linux/sunrpc/rpc_pipe_fs.h>
-
-#include <linux/nfs_fs.h>
-
-#include "nfs4_fs.h"
-
-#define IDMAP_HASH_SZ          128
-
-/* Default cache timeout is 10 minutes */
-unsigned int nfs_idmap_cache_timeout = 600 * HZ;
-
-static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
-{
-       char *endp;
-       int num = simple_strtol(val, &endp, 0);
-       int jif = num * HZ;
-       if (endp == val || *endp || num < 0 || jif < num)
-               return -EINVAL;
-       *((int *)kp->arg) = jif;
-       return 0;
-}
-
-module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
-                &nfs_idmap_cache_timeout, 0644);
-
-struct idmap_hashent {
-       unsigned long           ih_expires;
-       __u32                   ih_id;
-       size_t                  ih_namelen;
-       char                    ih_name[IDMAP_NAMESZ];
+struct idmap {
+       struct rpc_pipe         *idmap_pipe;
+       struct key_construction *idmap_key_cons;
 };
 
-struct idmap_hashtable {
-       __u8                    h_type;
-       struct idmap_hashent    h_entries[IDMAP_HASH_SZ];
+enum {
+       Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err
 };
 
-struct idmap {
-       struct dentry           *idmap_dentry;
-       wait_queue_head_t       idmap_wq;
-       struct idmap_msg        idmap_im;
-       struct mutex            idmap_lock;     /* Serializes upcalls */
-       struct mutex            idmap_im_lock;  /* Protects the hashtable */
-       struct idmap_hashtable  idmap_user_hash;
-       struct idmap_hashtable  idmap_group_hash;
+static const match_table_t nfs_idmap_tokens = {
+       { Opt_find_uid, "uid:%s" },
+       { Opt_find_gid, "gid:%s" },
+       { Opt_find_user, "user:%s" },
+       { Opt_find_group, "group:%s" },
+       { Opt_find_err, NULL }
 };
 
+static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
 static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
                                   size_t);
 static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
 
-static unsigned int fnvhash32(const void *, size_t);
-
 static const struct rpc_pipe_ops idmap_upcall_ops = {
        .upcall         = rpc_pipe_generic_upcall,
        .downcall       = idmap_pipe_downcall,
        .destroy_msg    = idmap_pipe_destroy_msg,
 };
 
+static struct key_type key_type_id_resolver_legacy = {
+       .name           = "id_resolver",
+       .instantiate    = user_instantiate,
+       .match          = user_match,
+       .revoke         = user_revoke,
+       .destroy        = user_destroy,
+       .describe       = user_describe,
+       .read           = user_read,
+       .request_key    = nfs_idmap_legacy_upcall,
+};
+
+static void __nfs_idmap_unregister(struct rpc_pipe *pipe)
+{
+       if (pipe->dentry)
+               rpc_unlink(pipe->dentry);
+}
+
+static int __nfs_idmap_register(struct dentry *dir,
+                                    struct idmap *idmap,
+                                    struct rpc_pipe *pipe)
+{
+       struct dentry *dentry;
+
+       dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+       pipe->dentry = dentry;
+       return 0;
+}
+
+static void nfs_idmap_unregister(struct nfs_client *clp,
+                                     struct rpc_pipe *pipe)
+{
+       struct net *net = clp->net;
+       struct super_block *pipefs_sb;
+
+       pipefs_sb = rpc_get_sb_net(net);
+       if (pipefs_sb) {
+               __nfs_idmap_unregister(pipe);
+               rpc_put_sb_net(net);
+       }
+}
+
+static int nfs_idmap_register(struct nfs_client *clp,
+                                  struct idmap *idmap,
+                                  struct rpc_pipe *pipe)
+{
+       struct net *net = clp->net;
+       struct super_block *pipefs_sb;
+       int err = 0;
+
+       pipefs_sb = rpc_get_sb_net(net);
+       if (pipefs_sb) {
+               if (clp->cl_rpcclient->cl_dentry)
+                       err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
+                                                  idmap, pipe);
+               rpc_put_sb_net(net);
+       }
+       return err;
+}
+
 int
 nfs_idmap_new(struct nfs_client *clp)
 {
        struct idmap *idmap;
+       struct rpc_pipe *pipe;
        int error;
 
        BUG_ON(clp->cl_idmap != NULL);
@@ -444,19 +456,19 @@ nfs_idmap_new(struct nfs_client *clp)
        if (idmap == NULL)
                return -ENOMEM;
 
-       idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry,
-                       "idmap", idmap, &idmap_upcall_ops, 0);
-       if (IS_ERR(idmap->idmap_dentry)) {
-               error = PTR_ERR(idmap->idmap_dentry);
+       pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0);
+       if (IS_ERR(pipe)) {
+               error = PTR_ERR(pipe);
                kfree(idmap);
                return error;
        }
-
-       mutex_init(&idmap->idmap_lock);
-       mutex_init(&idmap->idmap_im_lock);
-       init_waitqueue_head(&idmap->idmap_wq);
-       idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
-       idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
+       error = nfs_idmap_register(clp, idmap, pipe);
+       if (error) {
+               rpc_destroy_pipe_data(pipe);
+               kfree(idmap);
+               return error;
+       }
+       idmap->idmap_pipe = pipe;
 
        clp->cl_idmap = idmap;
        return 0;
@@ -469,211 +481,220 @@ nfs_idmap_delete(struct nfs_client *clp)
 
        if (!idmap)
                return;
-       rpc_unlink(idmap->idmap_dentry);
+       nfs_idmap_unregister(clp, idmap->idmap_pipe);
+       rpc_destroy_pipe_data(idmap->idmap_pipe);
        clp->cl_idmap = NULL;
        kfree(idmap);
 }
 
-/*
- * Helper routines for manipulating the hashtable
- */
-static inline struct idmap_hashent *
-idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len)
-{
-       return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ];
-}
-
-static struct idmap_hashent *
-idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
+static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event,
+                             struct super_block *sb)
 {
-       struct idmap_hashent *he = idmap_name_hash(h, name, len);
+       int err = 0;
 
-       if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0)
-               return NULL;
-       if (time_after(jiffies, he->ih_expires))
-               return NULL;
-       return he;
+       switch (event) {
+       case RPC_PIPEFS_MOUNT:
+               BUG_ON(clp->cl_rpcclient->cl_dentry == NULL);
+               err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
+                                               clp->cl_idmap,
+                                               clp->cl_idmap->idmap_pipe);
+               break;
+       case RPC_PIPEFS_UMOUNT:
+               if (clp->cl_idmap->idmap_pipe) {
+                       struct dentry *parent;
+
+                       parent = clp->cl_idmap->idmap_pipe->dentry->d_parent;
+                       __nfs_idmap_unregister(clp->cl_idmap->idmap_pipe);
+                       /*
+                        * Note: This is a dirty hack. SUNRPC hook has been
+                        * called already but simple_rmdir() call for the
+                        * directory returned with error because of idmap pipe
+                        * inside. Thus now we have to remove this directory
+                        * here.
+                        */
+                       if (rpc_rmdir(parent))
+                               printk(KERN_ERR "NFS: %s: failed to remove "
+                                       "clnt dir!\n", __func__);
+               }
+               break;
+       default:
+               printk(KERN_ERR "NFS: %s: unknown event: %ld\n", __func__,
+                       event);
+               return -ENOTSUPP;
+       }
+       return err;
+}
+
+static struct nfs_client *nfs_get_client_for_event(struct net *net, int event)
+{
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
+       struct dentry *cl_dentry;
+       struct nfs_client *clp;
+
+       spin_lock(&nn->nfs_client_lock);
+       list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
+               if (clp->rpc_ops != &nfs_v4_clientops)
+                       continue;
+               cl_dentry = clp->cl_idmap->idmap_pipe->dentry;
+               if (((event == RPC_PIPEFS_MOUNT) && cl_dentry) ||
+                   ((event == RPC_PIPEFS_UMOUNT) && !cl_dentry))
+                       continue;
+               atomic_inc(&clp->cl_count);
+               spin_unlock(&nn->nfs_client_lock);
+               return clp;
+       }
+       spin_unlock(&nn->nfs_client_lock);
+       return NULL;
 }
 
-static inline struct idmap_hashent *
-idmap_id_hash(struct idmap_hashtable* h, __u32 id)
+static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
+                           void *ptr)
 {
-       return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ];
-}
+       struct super_block *sb = ptr;
+       struct nfs_client *clp;
+       int error = 0;
 
-static struct idmap_hashent *
-idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
-{
-       struct idmap_hashent *he = idmap_id_hash(h, id);
-       if (he->ih_id != id || he->ih_namelen == 0)
-               return NULL;
-       if (time_after(jiffies, he->ih_expires))
-               return NULL;
-       return he;
+       while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) {
+               error = __rpc_pipefs_event(clp, event, sb);
+               nfs_put_client(clp);
+               if (error)
+                       break;
+       }
+       return error;
 }
 
-/*
- * Routines for allocating new entries in the hashtable.
- * For now, we just have 1 entry per bucket, so it's all
- * pretty trivial.
- */
-static inline struct idmap_hashent *
-idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len)
-{
-       return idmap_name_hash(h, name, len);
-}
+#define PIPEFS_NFS_PRIO                1
+
+static struct notifier_block nfs_idmap_block = {
+       .notifier_call  = rpc_pipefs_event,
+       .priority       = SUNRPC_PIPEFS_NFS_PRIO,
+};
 
-static inline struct idmap_hashent *
-idmap_alloc_id(struct idmap_hashtable *h, __u32 id)
+int nfs_idmap_init(void)
 {
-       return idmap_id_hash(h, id);
+       int ret;
+       ret = nfs_idmap_init_keyring();
+       if (ret != 0)
+               goto out;
+       ret = rpc_pipefs_notifier_register(&nfs_idmap_block);
+       if (ret != 0)
+               nfs_idmap_quit_keyring();
+out:
+       return ret;
 }
 
-static void
-idmap_update_entry(struct idmap_hashent *he, const char *name,
-               size_t namelen, __u32 id)
+void nfs_idmap_quit(void)
 {
-       he->ih_id = id;
-       memcpy(he->ih_name, name, namelen);
-       he->ih_name[namelen] = '\0';
-       he->ih_namelen = namelen;
-       he->ih_expires = jiffies + nfs_idmap_cache_timeout;
+       rpc_pipefs_notifier_unregister(&nfs_idmap_block);
+       nfs_idmap_quit_keyring();
 }
 
-/*
- * Name -> ID
- */
-static int
-nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
-               const char *name, size_t namelen, __u32 *id)
+static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im,
+                                    struct rpc_pipe_msg *msg)
 {
-       struct rpc_pipe_msg msg;
-       struct idmap_msg *im;
-       struct idmap_hashent *he;
-       DECLARE_WAITQUEUE(wq, current);
-       int ret = -EIO;
-
-       im = &idmap->idmap_im;
-
-       /*
-        * String sanity checks
-        * Note that the userland daemon expects NUL terminated strings
-        */
-       for (;;) {
-               if (namelen == 0)
-                       return -EINVAL;
-               if (name[namelen-1] != '\0')
-                       break;
-               namelen--;
-       }
-       if (namelen >= IDMAP_NAMESZ)
-               return -EINVAL;
+       substring_t substr;
+       int token, ret;
 
-       mutex_lock(&idmap->idmap_lock);
-       mutex_lock(&idmap->idmap_im_lock);
-
-       he = idmap_lookup_name(h, name, namelen);
-       if (he != NULL) {
-               *id = he->ih_id;
-               ret = 0;
-               goto out;
-       }
+       memset(im,  0, sizeof(*im));
+       memset(msg, 0, sizeof(*msg));
 
-       memset(im, 0, sizeof(*im));
-       memcpy(im->im_name, name, namelen);
+       im->im_type = IDMAP_TYPE_GROUP;
+       token = match_token(desc, nfs_idmap_tokens, &substr);
 
-       im->im_type = h->h_type;
-       im->im_conv = IDMAP_CONV_NAMETOID;
+       switch (token) {
+       case Opt_find_uid:
+               im->im_type = IDMAP_TYPE_USER;
+       case Opt_find_gid:
+               im->im_conv = IDMAP_CONV_NAMETOID;
+               ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ);
+               break;
 
-       memset(&msg, 0, sizeof(msg));
-       msg.data = im;
-       msg.len = sizeof(*im);
+       case Opt_find_user:
+               im->im_type = IDMAP_TYPE_USER;
+       case Opt_find_group:
+               im->im_conv = IDMAP_CONV_IDTONAME;
+               ret = match_int(&substr, &im->im_id);
+               break;
 
-       add_wait_queue(&idmap->idmap_wq, &wq);
-       if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
-               remove_wait_queue(&idmap->idmap_wq, &wq);
+       default:
+               ret = -EINVAL;
                goto out;
        }
 
-       set_current_state(TASK_UNINTERRUPTIBLE);
-       mutex_unlock(&idmap->idmap_im_lock);
-       schedule();
-       __set_current_state(TASK_RUNNING);
-       remove_wait_queue(&idmap->idmap_wq, &wq);
-       mutex_lock(&idmap->idmap_im_lock);
+       msg->data = im;
+       msg->len  = sizeof(struct idmap_msg);
 
-       if (im->im_status & IDMAP_STATUS_SUCCESS) {
-               *id = im->im_id;
-               ret = 0;
-       }
-
- out:
-       memset(im, 0, sizeof(*im));
-       mutex_unlock(&idmap->idmap_im_lock);
-       mutex_unlock(&idmap->idmap_lock);
+out:
        return ret;
 }
 
-/*
- * ID -> Name
- */
-static int
-nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
-               __u32 id, char *name)
+static int nfs_idmap_legacy_upcall(struct key_construction *cons,
+                                  const char *op,
+                                  void *aux)
 {
-       struct rpc_pipe_msg msg;
+       struct rpc_pipe_msg *msg;
        struct idmap_msg *im;
-       struct idmap_hashent *he;
-       DECLARE_WAITQUEUE(wq, current);
-       int ret = -EIO;
-       unsigned int len;
-
-       im = &idmap->idmap_im;
+       struct idmap *idmap = (struct idmap *)aux;
+       struct key *key = cons->key;
+       int ret;
 
-       mutex_lock(&idmap->idmap_lock);
-       mutex_lock(&idmap->idmap_im_lock);
+       /* msg and im are freed in idmap_pipe_destroy_msg */
+       msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+       if (IS_ERR(msg)) {
+               ret = PTR_ERR(msg);
+               goto out0;
+       }
 
-       he = idmap_lookup_id(h, id);
-       if (he) {
-               memcpy(name, he->ih_name, he->ih_namelen);
-               ret = he->ih_namelen;
-               goto out;
+       im = kmalloc(sizeof(*im), GFP_KERNEL);
+       if (IS_ERR(im)) {
+               ret = PTR_ERR(im);
+               goto out1;
        }
 
-       memset(im, 0, sizeof(*im));
-       im->im_type = h->h_type;
-       im->im_conv = IDMAP_CONV_IDTONAME;
-       im->im_id = id;
+       ret = nfs_idmap_prepare_message(key->description, im, msg);
+       if (ret < 0)
+               goto out2;
 
-       memset(&msg, 0, sizeof(msg));
-       msg.data = im;
-       msg.len = sizeof(*im);
+       idmap->idmap_key_cons = cons;
 
-       add_wait_queue(&idmap->idmap_wq, &wq);
+       ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
+       if (ret < 0)
+               goto out2;
 
-       if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
-               remove_wait_queue(&idmap->idmap_wq, &wq);
-               goto out;
-       }
+       return ret;
+
+out2:
+       kfree(im);
+out1:
+       kfree(msg);
+out0:
+       key_revoke(cons->key);
+       key_revoke(cons->authkey);
+       return ret;
+}
+
+static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data)
+{
+       return key_instantiate_and_link(key, data, strlen(data) + 1,
+                                       id_resolver_cache->thread_keyring,
+                                       authkey);
+}
 
-       set_current_state(TASK_UNINTERRUPTIBLE);
-       mutex_unlock(&idmap->idmap_im_lock);
-       schedule();
-       __set_current_state(TASK_RUNNING);
-       remove_wait_queue(&idmap->idmap_wq, &wq);
-       mutex_lock(&idmap->idmap_im_lock);
-
-       if (im->im_status & IDMAP_STATUS_SUCCESS) {
-               if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
-                       goto out;
-               memcpy(name, im->im_name, len);
-               ret = len;
+static int nfs_idmap_read_message(struct idmap_msg *im, struct key *key, struct key *authkey)
+{
+       char id_str[NFS_UINT_MAXLEN];
+       int ret = -EINVAL;
+
+       switch (im->im_conv) {
+       case IDMAP_CONV_NAMETOID:
+               sprintf(id_str, "%d", im->im_id);
+               ret = nfs_idmap_instantiate(key, authkey, id_str);
+               break;
+       case IDMAP_CONV_IDTONAME:
+               ret = nfs_idmap_instantiate(key, authkey, im->im_name);
+               break;
        }
 
- out:
-       memset(im, 0, sizeof(*im));
-       mutex_unlock(&idmap->idmap_im_lock);
-       mutex_unlock(&idmap->idmap_lock);
        return ret;
 }
 
@@ -682,115 +703,51 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
        struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
        struct idmap *idmap = (struct idmap *)rpci->private;
-       struct idmap_msg im_in, *im = &idmap->idmap_im;
-       struct idmap_hashtable *h;
-       struct idmap_hashent *he = NULL;
+       struct key_construction *cons = idmap->idmap_key_cons;
+       struct idmap_msg im;
        size_t namelen_in;
        int ret;
 
-       if (mlen != sizeof(im_in))
-               return -ENOSPC;
-
-       if (copy_from_user(&im_in, src, mlen) != 0)
-               return -EFAULT;
-
-       mutex_lock(&idmap->idmap_im_lock);
-
-       ret = mlen;
-       im->im_status = im_in.im_status;
-       /* If we got an error, terminate now, and wake up pending upcalls */
-       if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) {
-               wake_up(&idmap->idmap_wq);
+       if (mlen != sizeof(im)) {
+               ret = -ENOSPC;
                goto out;
        }
 
-       /* Sanity checking of strings */
-       ret = -EINVAL;
-       namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ);
-       if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ)
+       if (copy_from_user(&im, src, mlen) != 0) {
+               ret = -EFAULT;
                goto out;
+       }
 
-       switch (im_in.im_type) {
-               case IDMAP_TYPE_USER:
-                       h = &idmap->idmap_user_hash;
-                       break;
-               case IDMAP_TYPE_GROUP:
-                       h = &idmap->idmap_group_hash;
-                       break;
-               default:
-                       goto out;
+       if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
+               ret = mlen;
+               complete_request_key(idmap->idmap_key_cons, -ENOKEY);
+               goto out_incomplete;
        }
 
-       switch (im_in.im_conv) {
-       case IDMAP_CONV_IDTONAME:
-               /* Did we match the current upcall? */
-               if (im->im_conv == IDMAP_CONV_IDTONAME
-                               && im->im_type == im_in.im_type
-                               && im->im_id == im_in.im_id) {
-                       /* Yes: copy string, including the terminating '\0'  */
-                       memcpy(im->im_name, im_in.im_name, namelen_in);
-                       im->im_name[namelen_in] = '\0';
-                       wake_up(&idmap->idmap_wq);
-               }
-               he = idmap_alloc_id(h, im_in.im_id);
-               break;
-       case IDMAP_CONV_NAMETOID:
-               /* Did we match the current upcall? */
-               if (im->im_conv == IDMAP_CONV_NAMETOID
-                               && im->im_type == im_in.im_type
-                               && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in
-                               && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) {
-                       im->im_id = im_in.im_id;
-                       wake_up(&idmap->idmap_wq);
-               }
-               he = idmap_alloc_name(h, im_in.im_name, namelen_in);
-               break;
-       default:
+       namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
+       if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) {
+               ret = -EINVAL;
                goto out;
        }
 
-       /* If the entry is valid, also copy it to the cache */
-       if (he != NULL)
-               idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
-       ret = mlen;
+       ret = nfs_idmap_read_message(&im, cons->key, cons->authkey);
+       if (ret >= 0) {
+               key_set_timeout(cons->key, nfs_idmap_cache_timeout);
+               ret = mlen;
+       }
+
 out:
-       mutex_unlock(&idmap->idmap_im_lock);
+       complete_request_key(idmap->idmap_key_cons, ret);
+out_incomplete:
        return ret;
 }
 
 static void
 idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 {
-       struct idmap_msg *im = msg->data;
-       struct idmap *idmap = container_of(im, struct idmap, idmap_im); 
-
-       if (msg->errno >= 0)
-               return;
-       mutex_lock(&idmap->idmap_im_lock);
-       im->im_status = IDMAP_STATUS_LOOKUPFAIL;
-       wake_up(&idmap->idmap_wq);
-       mutex_unlock(&idmap->idmap_im_lock);
-}
-
-/* 
- * Fowler/Noll/Vo hash
- *    http://www.isthe.com/chongo/tech/comp/fnv/
- */
-
-#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */
-#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */
-
-static unsigned int fnvhash32(const void *buf, size_t buflen)
-{
-       const unsigned char *p, *end = (const unsigned char *)buf + buflen;
-       unsigned int hash = FNV_1_32;
-
-       for (p = buf; p < end; p++) {
-               hash *= FNV_P_32;
-               hash ^= (unsigned int)*p;
-       }
-
-       return hash;
+       /* Free memory allocated in nfs_idmap_legacy_upcall() */
+       kfree(msg->data);
+       kfree(msg);
 }
 
 int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
@@ -799,16 +756,16 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_
 
        if (nfs_map_string_to_numeric(name, namelen, uid))
                return 0;
-       return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
+       return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap);
 }
 
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
 {
        struct idmap *idmap = server->nfs_client->cl_idmap;
 
-       if (nfs_map_string_to_numeric(name, namelen, uid))
+       if (nfs_map_string_to_numeric(name, namelen, gid))
                return 0;
-       return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
+       return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap);
 }
 
 int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
@@ -817,21 +774,19 @@ int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, s
        int ret = -EINVAL;
 
        if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
-               ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
+               ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap);
        if (ret < 0)
                ret = nfs_map_numeric_to_string(uid, buf, buflen);
        return ret;
 }
-int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
+int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
 {
        struct idmap *idmap = server->nfs_client->cl_idmap;
        int ret = -EINVAL;
 
        if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
-               ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
+               ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap);
        if (ret < 0)
-               ret = nfs_map_numeric_to_string(uid, buf, buflen);
+               ret = nfs_map_numeric_to_string(gid, buf, buflen);
        return ret;
 }
-
-#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
index f649fba8c38489e2cae05eb60e4ead032aca963d..7bb4d13c1cd5ecaa10bbab88b8942ccbd9f9b444 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/slab.h>
 #include <linux/compat.h>
 #include <linux/freezer.h>
+#include <linux/crc32.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -51,6 +52,7 @@
 #include "fscache.h"
 #include "dns_resolve.h"
 #include "pnfs.h"
+#include "netns.h"
 
 #define NFSDBG_FACILITY                NFSDBG_VFS
 
@@ -388,9 +390,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                unlock_new_inode(inode);
        } else
                nfs_refresh_inode(inode, fattr);
-       dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n",
+       dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n",
                inode->i_sb->s_id,
                (long long)NFS_FILEID(inode),
+               nfs_display_fhandle_hash(fh),
                atomic_read(&inode->i_count));
 
 out:
@@ -401,7 +404,7 @@ out_no_inode:
        goto out;
 }
 
-#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE)
+#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
 
 int
 nfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -423,7 +426,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 
        /* Optimization: if the end result is no change, don't RPC */
        attr->ia_valid &= NFS_VALID_ATTRS;
-       if ((attr->ia_valid & ~ATTR_FILE) == 0)
+       if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
                return 0;
 
        /* Write all dirty data */
@@ -1044,6 +1047,67 @@ struct nfs_fh *nfs_alloc_fhandle(void)
        return fh;
 }
 
+#ifdef NFS_DEBUG
+/*
+ * _nfs_display_fhandle_hash - calculate the crc32 hash for the filehandle
+ *                             in the same way that wireshark does
+ *
+ * @fh: file handle
+ *
+ * For debugging only.
+ */
+u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh)
+{
+       /* wireshark uses 32-bit AUTODIN crc and does a bitwise
+        * not on the result */
+       return ~crc32(0xFFFFFFFF, &fh->data[0], fh->size);
+}
+
+/*
+ * _nfs_display_fhandle - display an NFS file handle on the console
+ *
+ * @fh: file handle to display
+ * @caption: display caption
+ *
+ * For debugging only.
+ */
+void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption)
+{
+       unsigned short i;
+
+       if (fh == NULL || fh->size == 0) {
+               printk(KERN_DEFAULT "%s at %p is empty\n", caption, fh);
+               return;
+       }
+
+       printk(KERN_DEFAULT "%s at %p is %u bytes, crc: 0x%08x:\n",
+              caption, fh, fh->size, _nfs_display_fhandle_hash(fh));
+       for (i = 0; i < fh->size; i += 16) {
+               __be32 *pos = (__be32 *)&fh->data[i];
+
+               switch ((fh->size - i - 1) >> 2) {
+               case 0:
+                       printk(KERN_DEFAULT " %08x\n",
+                               be32_to_cpup(pos));
+                       break;
+               case 1:
+                       printk(KERN_DEFAULT " %08x %08x\n",
+                               be32_to_cpup(pos), be32_to_cpup(pos + 1));
+                       break;
+               case 2:
+                       printk(KERN_DEFAULT " %08x %08x %08x\n",
+                               be32_to_cpup(pos), be32_to_cpup(pos + 1),
+                               be32_to_cpup(pos + 2));
+                       break;
+               default:
+                       printk(KERN_DEFAULT " %08x %08x %08x %08x\n",
+                               be32_to_cpup(pos), be32_to_cpup(pos + 1),
+                               be32_to_cpup(pos + 2), be32_to_cpup(pos + 3));
+               }
+       }
+}
+#endif
+
 /**
  * nfs_inode_attrs_need_update - check if the inode attributes need updating
  * @inode - pointer to inode
@@ -1211,8 +1275,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
        unsigned long now = jiffies;
        unsigned long save_cache_validity;
 
-       dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
+       dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n",
                        __func__, inode->i_sb->s_id, inode->i_ino,
+                       nfs_display_fhandle_hash(NFS_FH(inode)),
                        atomic_read(&inode->i_count), fattr->valid);
 
        if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
@@ -1406,7 +1471,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
        /*
         * Big trouble! The inode has become a different object.
         */
-       printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n",
+       printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
                        __func__, inode->i_ino, inode->i_mode, fattr->mode);
  out_err:
        /*
@@ -1495,7 +1560,7 @@ static void init_once(void *foo)
        INIT_LIST_HEAD(&nfsi->open_files);
        INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
        INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
-       INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
+       INIT_LIST_HEAD(&nfsi->commit_list);
        nfsi->npages = 0;
        nfsi->ncommit = 0;
        atomic_set(&nfsi->silly_count, 1);
@@ -1552,6 +1617,28 @@ static void nfsiod_stop(void)
        destroy_workqueue(wq);
 }
 
+int nfs_net_id;
+EXPORT_SYMBOL_GPL(nfs_net_id);
+
+static int nfs_net_init(struct net *net)
+{
+       nfs_clients_init(net);
+       return nfs_dns_resolver_cache_init(net);
+}
+
+static void nfs_net_exit(struct net *net)
+{
+       nfs_dns_resolver_cache_destroy(net);
+       nfs_cleanup_cb_ident_idr(net);
+}
+
+static struct pernet_operations nfs_net_ops = {
+       .init = nfs_net_init,
+       .exit = nfs_net_exit,
+       .id   = &nfs_net_id,
+       .size = sizeof(struct nfs_net),
+};
+
 /*
  * Initialize NFS
  */
@@ -1561,9 +1648,13 @@ static int __init init_nfs_fs(void)
 
        err = nfs_idmap_init();
        if (err < 0)
-               goto out9;
+               goto out10;
 
        err = nfs_dns_resolver_init();
+       if (err < 0)
+               goto out9;
+
+       err = register_pernet_subsys(&nfs_net_ops);
        if (err < 0)
                goto out8;
 
@@ -1600,14 +1691,14 @@ static int __init init_nfs_fs(void)
                goto out0;
 
 #ifdef CONFIG_PROC_FS
-       rpc_proc_register(&nfs_rpcstat);
+       rpc_proc_register(&init_net, &nfs_rpcstat);
 #endif
        if ((err = register_nfs_fs()) != 0)
                goto out;
        return 0;
 out:
 #ifdef CONFIG_PROC_FS
-       rpc_proc_unregister("nfs");
+       rpc_proc_unregister(&init_net, "nfs");
 #endif
        nfs_destroy_directcache();
 out0:
@@ -1625,10 +1716,12 @@ out5:
 out6:
        nfs_fscache_unregister();
 out7:
-       nfs_dns_resolver_destroy();
+       unregister_pernet_subsys(&nfs_net_ops);
 out8:
-       nfs_idmap_quit();
+       nfs_dns_resolver_destroy();
 out9:
+       nfs_idmap_quit();
+out10:
        return err;
 }
 
@@ -1640,12 +1733,12 @@ static void __exit exit_nfs_fs(void)
        nfs_destroy_inodecache();
        nfs_destroy_nfspagecache();
        nfs_fscache_unregister();
+       unregister_pernet_subsys(&nfs_net_ops);
        nfs_dns_resolver_destroy();
        nfs_idmap_quit();
 #ifdef CONFIG_PROC_FS
-       rpc_proc_unregister("nfs");
+       rpc_proc_unregister(&init_net, "nfs");
 #endif
-       nfs_cleanup_cb_ident_idr();
        unregister_nfs_fs();
        nfs_fs_proc_exit();
        nfsiod_stop();
index 8102db9b926c2eb56d9035126fa27b33058d711f..2476dc69365f223d78a0b514991bcb88fa144ec9 100644 (file)
@@ -123,6 +123,7 @@ struct nfs_parsed_mount_data {
        } nfs_server;
 
        struct security_mnt_opts lsm_opts;
+       struct net              *net;
 };
 
 /* mount_clnt.c */
@@ -137,20 +138,22 @@ struct nfs_mount_request {
        int                     noresvport;
        unsigned int            *auth_flav_len;
        rpc_authflavor_t        *auth_flavs;
+       struct net              *net;
 };
 
 extern int nfs_mount(struct nfs_mount_request *info);
 extern void nfs_umount(const struct nfs_mount_request *info);
 
 /* client.c */
-extern struct rpc_program nfs_program;
+extern const struct rpc_program nfs_program;
+extern void nfs_clients_init(struct net *net);
 
-extern void nfs_cleanup_cb_ident_idr(void);
+extern void nfs_cleanup_cb_ident_idr(struct net *);
 extern void nfs_put_client(struct nfs_client *);
-extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *);
-extern struct nfs_client *nfs4_find_client_ident(int);
+extern struct nfs_client *nfs4_find_client_ident(struct net *, int);
 extern struct nfs_client *
-nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *);
+nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
+                               struct nfs4_sessionid *);
 extern struct nfs_server *nfs_create_server(
                                        const struct nfs_parsed_mount_data *,
                                        struct nfs_fh *);
@@ -329,6 +332,8 @@ void nfs_retry_commit(struct list_head *page_list,
 void nfs_commit_clear_lock(struct nfs_inode *nfsi);
 void nfs_commitdata_release(void *data);
 void nfs_commit_release_pages(struct nfs_write_data *data);
+void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head);
+void nfs_request_remove_commit_list(struct nfs_page *req);
 
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
index d4c2d6b7507e791044df2837c1d0803699a2a518..8e65c7f1f87c526707959c0e691e36532406d1fa 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/nfs_fs.h>
 #include "internal.h"
 
-#ifdef RPC_DEBUG
+#ifdef NFS_DEBUG
 # define NFSDBG_FACILITY       NFSDBG_MOUNT
 #endif
 
@@ -67,7 +67,7 @@ enum {
        MOUNTPROC3_EXPORT       = 5,
 };
 
-static struct rpc_program      mnt_program;
+static const struct rpc_program mnt_program;
 
 /*
  * Defined by OpenGroup XNFS Version 3W, chapter 8
@@ -153,7 +153,7 @@ int nfs_mount(struct nfs_mount_request *info)
                .rpc_resp       = &result,
        };
        struct rpc_create_args args = {
-               .net            = &init_net,
+               .net            = info->net,
                .protocol       = info->protocol,
                .address        = info->sap,
                .addrsize       = info->salen,
@@ -225,7 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info)
                .to_retries = 2,
        };
        struct rpc_create_args args = {
-               .net            = &init_net,
+               .net            = info->net,
                .protocol       = IPPROTO_UDP,
                .address        = info->sap,
                .addrsize       = info->salen,
@@ -488,19 +488,19 @@ static struct rpc_procinfo mnt3_procedures[] = {
 };
 
 
-static struct rpc_version mnt_version1 = {
+static const struct rpc_version mnt_version1 = {
        .number         = 1,
        .nrprocs        = ARRAY_SIZE(mnt_procedures),
        .procs          = mnt_procedures,
 };
 
-static struct rpc_version mnt_version3 = {
+static const struct rpc_version mnt_version3 = {
        .number         = 3,
        .nrprocs        = ARRAY_SIZE(mnt3_procedures),
        .procs          = mnt3_procedures,
 };
 
-static struct rpc_version *mnt_version[] = {
+static const struct rpc_version *mnt_version[] = {
        NULL,
        &mnt_version1,
        NULL,
@@ -509,7 +509,7 @@ static struct rpc_version *mnt_version[] = {
 
 static struct rpc_stat mnt_stats;
 
-static struct rpc_program mnt_program = {
+static const struct rpc_program mnt_program = {
        .name           = "mount",
        .number         = NFS_MNT_PROGRAM,
        .nrvers         = ARRAY_SIZE(mnt_version),
index 8102391bb3744077ae778af2d5bdb5d135ed3fbf..1807866bb3ab845098de2a95c695bee5460aaf37 100644 (file)
@@ -276,7 +276,10 @@ out:
        nfs_free_fattr(fattr);
        nfs_free_fhandle(fh);
 out_nofree:
-       dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt);
+       if (IS_ERR(mnt))
+               dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt));
+       else
+               dprintk("<-- %s() = %p\n", __func__, mnt);
        return mnt;
 }
 
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
new file mode 100644 (file)
index 0000000..aa14ec3
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef __NFS_NETNS_H__
+#define __NFS_NETNS_H__
+
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+struct bl_dev_msg {
+       int32_t status;
+       uint32_t major, minor;
+};
+
+struct nfs_net {
+       struct cache_detail *nfs_dns_resolve;
+       struct rpc_pipe *bl_device_pipe;
+       struct bl_dev_msg bl_mount_reply;
+       wait_queue_head_t bl_wq;
+       struct list_head nfs_client_list;
+       struct list_head nfs_volume_list;
+#ifdef CONFIG_NFS_V4
+       struct idr cb_ident_idr; /* Protected by nfs_client_lock */
+#endif
+       spinlock_t nfs_client_lock;
+};
+
+extern int nfs_net_id;
+
+#endif
index 792cb13a430425c522e662b1e4915ffe7a893e9d..1f56000fabbdc1b1283961e340661e20e986e3bf 100644 (file)
@@ -1150,7 +1150,7 @@ struct rpc_procinfo       nfs_procedures[] = {
        PROC(STATFS,    fhandle,        statfsres,      0),
 };
 
-struct rpc_version             nfs_version2 = {
+const struct rpc_version nfs_version2 = {
        .number                 = 2,
        .nrprocs                = ARRAY_SIZE(nfs_procedures),
        .procs                  = nfs_procedures
index 7ef23979896dd2cffdbc43aa8b0a5a9e3aaecb09..e4498dc351a834fcc35722645abfa50eaefa80fd 100644 (file)
@@ -192,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
                .pages = pages,
        };
        struct nfs3_getaclres res = {
-               0
+               NULL,
        };
        struct rpc_message msg = {
                .rpc_argp       = &args,
index 91943953a3703edb21e30e96b2b01655d3fa3234..5242eae6711a0b5323b1dcec13d57ecf01c47986 100644 (file)
@@ -428,6 +428,11 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
        msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
 }
 
+static void nfs3_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
+{
+       rpc_call_start(task);
+}
+
 static int
 nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 {
@@ -445,6 +450,11 @@ nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
        msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME];
 }
 
+static void nfs3_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
+{
+       rpc_call_start(task);
+}
+
 static int
 nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
                      struct inode *new_dir)
@@ -814,6 +824,11 @@ static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message
        msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
 }
 
+static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+{
+       rpc_call_start(task);
+}
+
 static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
        if (nfs3_async_handle_jukebox(task, data->inode))
@@ -828,6 +843,11 @@ static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
        msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
 }
 
+static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+{
+       rpc_call_start(task);
+}
+
 static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
        if (nfs3_async_handle_jukebox(task, data->inode))
@@ -864,9 +884,11 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
        .create         = nfs3_proc_create,
        .remove         = nfs3_proc_remove,
        .unlink_setup   = nfs3_proc_unlink_setup,
+       .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare,
        .unlink_done    = nfs3_proc_unlink_done,
        .rename         = nfs3_proc_rename,
        .rename_setup   = nfs3_proc_rename_setup,
+       .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare,
        .rename_done    = nfs3_proc_rename_done,
        .link           = nfs3_proc_link,
        .symlink        = nfs3_proc_symlink,
@@ -879,8 +901,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
        .pathconf       = nfs3_proc_pathconf,
        .decode_dirent  = nfs3_decode_dirent,
        .read_setup     = nfs3_proc_read_setup,
+       .read_rpc_prepare = nfs3_proc_read_rpc_prepare,
        .read_done      = nfs3_read_done,
        .write_setup    = nfs3_proc_write_setup,
+       .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
        .write_done     = nfs3_write_done,
        .commit_setup   = nfs3_proc_commit_setup,
        .commit_done    = nfs3_commit_done,
index 183c6b123d0f53bd43209cd6a6634aaa8b9f484f..a77cc9a3ce5561f1d8b23e78bb16ac49fcbf14b4 100644 (file)
@@ -2461,7 +2461,7 @@ struct rpc_procinfo       nfs3_procedures[] = {
        PROC(COMMIT,            commit,         commit,         5),
 };
 
-struct rpc_version             nfs_version3 = {
+const struct rpc_version nfs_version3 = {
        .number                 = 3,
        .nrprocs                = ARRAY_SIZE(nfs3_procedures),
        .procs                  = nfs3_procedures
@@ -2489,7 +2489,7 @@ static struct rpc_procinfo        nfs3_acl_procedures[] = {
        },
 };
 
-struct rpc_version             nfsacl_version3 = {
+const struct rpc_version nfsacl_version3 = {
        .number                 = 3,
        .nrprocs                = sizeof(nfs3_acl_procedures)/
                                  sizeof(nfs3_acl_procedures[0]),
index 4d7d0aedc101831ecb3b10cf345f0e18e7ca56ad..97ecc863dd76b46900e23758d4cbdda2f28f63d0 100644 (file)
@@ -20,7 +20,6 @@ enum nfs4_client_state {
        NFS4CLNT_RECLAIM_REBOOT,
        NFS4CLNT_RECLAIM_NOGRACE,
        NFS4CLNT_DELEGRETURN,
-       NFS4CLNT_LAYOUTRECALL,
        NFS4CLNT_SESSION_RESET,
        NFS4CLNT_RECALL_SLOT,
        NFS4CLNT_LEASE_CONFIRM,
@@ -44,7 +43,7 @@ struct nfs4_minor_version_ops {
                        struct nfs4_sequence_args *args,
                        struct nfs4_sequence_res *res,
                        int cache_reply);
-       int     (*validate_stateid)(struct nfs_delegation *,
+       bool    (*match_stateid)(const nfs4_stateid *,
                        const nfs4_stateid *);
        int     (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
                        struct nfs_fsinfo *);
@@ -53,26 +52,25 @@ struct nfs4_minor_version_ops {
        const struct nfs4_state_maintenance_ops *state_renewal_ops;
 };
 
-/*
- * struct rpc_sequence ensures that RPC calls are sent in the exact
- * order that they appear on the list.
- */
-struct rpc_sequence {
-       struct rpc_wait_queue   wait;   /* RPC call delay queue */
-       spinlock_t lock;                /* Protects the list */
-       struct list_head list;          /* Defines sequence of RPC calls */
+struct nfs_unique_id {
+       struct rb_node rb_node;
+       __u64 id;
 };
 
 #define NFS_SEQID_CONFIRMED 1
 struct nfs_seqid_counter {
-       struct rpc_sequence *sequence;
+       int owner_id;
        int flags;
        u32 counter;
+       spinlock_t lock;                /* Protects the list */
+       struct list_head list;          /* Defines sequence of RPC calls */
+       struct rpc_wait_queue   wait;   /* RPC call delay queue */
 };
 
 struct nfs_seqid {
        struct nfs_seqid_counter *sequence;
        struct list_head list;
+       struct rpc_task *task;
 };
 
 static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status)
@@ -81,18 +79,12 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
                seqid->flags |= NFS_SEQID_CONFIRMED;
 }
 
-struct nfs_unique_id {
-       struct rb_node rb_node;
-       __u64 id;
-};
-
 /*
  * NFS4 state_owners and lock_owners are simply labels for ordered
  * sequences of RPC calls. Their sole purpose is to provide once-only
  * semantics by allowing the server to identify replayed requests.
  */
 struct nfs4_state_owner {
-       struct nfs_unique_id so_owner_id;
        struct nfs_server    *so_server;
        struct list_head     so_lru;
        unsigned long        so_expires;
@@ -105,7 +97,6 @@ struct nfs4_state_owner {
        unsigned long        so_flags;
        struct list_head     so_states;
        struct nfs_seqid_counter so_seqid;
-       struct rpc_sequence  so_sequence;
 };
 
 enum {
@@ -146,8 +137,6 @@ struct nfs4_lock_state {
 #define NFS_LOCK_INITIALIZED 1
        int                     ls_flags;
        struct nfs_seqid_counter        ls_seqid;
-       struct rpc_sequence     ls_sequence;
-       struct nfs_unique_id    ls_id;
        nfs4_stateid            ls_stateid;
        atomic_t                ls_count;
        struct nfs4_lock_owner  ls_owner;
@@ -193,6 +182,7 @@ struct nfs4_exception {
        long timeout;
        int retry;
        struct nfs4_state *state;
+       struct inode *inode;
 };
 
 struct nfs4_state_recovery_ops {
@@ -224,7 +214,7 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, boo
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
 extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
                struct nfs4_fs_locations *fs_locations, struct page *page);
-extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
+extern int nfs4_release_lockowner(struct nfs4_lock_state *);
 extern const struct xattr_handler *nfs4_xattr_handlers[];
 
 #if defined(CONFIG_NFS_V4_1)
@@ -233,12 +223,13 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
        return server->nfs_client->cl_session;
 }
 
+extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy);
 extern int nfs4_setup_sequence(const struct nfs_server *server,
                struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
-               int cache_reply, struct rpc_task *task);
+               struct rpc_task *task);
 extern int nfs41_setup_sequence(struct nfs4_session *session,
                struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
-               int cache_reply, struct rpc_task *task);
+               struct rpc_task *task);
 extern void nfs4_destroy_session(struct nfs4_session *session);
 extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
 extern int nfs4_proc_create_session(struct nfs_client *);
@@ -269,7 +260,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
 
 static inline int nfs4_setup_sequence(const struct nfs_server *server,
                struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
-               int cache_reply, struct rpc_task *task)
+               struct rpc_task *task)
 {
        return 0;
 }
@@ -319,7 +310,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
 }
 #endif /* CONFIG_NFS_V4_1 */
 
-extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
+extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
 extern void nfs4_purge_state_owners(struct nfs_server *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
@@ -327,6 +318,8 @@ extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, fmode_t);
 extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
+extern void nfs_inode_find_state_and_recover(struct inode *inode,
+               const nfs4_stateid *stateid);
 extern void nfs4_schedule_lease_recovery(struct nfs_client *);
 extern void nfs4_schedule_state_manager(struct nfs_client *);
 extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
@@ -337,7 +330,8 @@ extern void nfs41_handle_server_scope(struct nfs_client *,
                                      struct server_scope **);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
-extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
+extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
+               fmode_t, fl_owner_t, pid_t);
 
 extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
 extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
@@ -346,6 +340,8 @@ extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
 extern void nfs_release_seqid(struct nfs_seqid *seqid);
 extern void nfs_free_seqid(struct nfs_seqid *seqid);
 
+extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp);
+
 extern const nfs4_stateid zero_stateid;
 
 /* nfs4xdr.c */
@@ -357,6 +353,16 @@ struct nfs4_mount_data;
 extern struct svc_version nfs4_callback_version1;
 extern struct svc_version nfs4_callback_version4;
 
+static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src)
+{
+       memcpy(dst, src, sizeof(*dst));
+}
+
+static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src)
+{
+       return memcmp(dst, src, sizeof(*dst)) == 0;
+}
+
 #else
 
 #define nfs4_close_state(a, b) do { } while (0)
index 71ec08617e23820b8e3d11ee8e47a4217ee52e55..634c0bcb4fd6878776f9d560e8ad023199b975d4 100644 (file)
 #include <linux/nfs_page.h>
 #include <linux/module.h>
 
+#include <linux/sunrpc/metrics.h>
+
 #include "internal.h"
+#include "delegation.h"
 #include "nfs4filelayout.h"
 
 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
@@ -84,12 +87,27 @@ static int filelayout_async_handle_error(struct rpc_task *task,
                                         struct nfs_client *clp,
                                         int *reset)
 {
+       struct nfs_server *mds_server = NFS_SERVER(state->inode);
+       struct nfs_client *mds_client = mds_server->nfs_client;
+
        if (task->tk_status >= 0)
                return 0;
-
        *reset = 0;
 
        switch (task->tk_status) {
+       /* MDS state errors */
+       case -NFS4ERR_DELEG_REVOKED:
+       case -NFS4ERR_ADMIN_REVOKED:
+       case -NFS4ERR_BAD_STATEID:
+               nfs_remove_bad_delegation(state->inode);
+       case -NFS4ERR_OPENMODE:
+               nfs4_schedule_stateid_recovery(mds_server, state);
+               goto wait_on_recovery;
+       case -NFS4ERR_EXPIRED:
+               nfs4_schedule_stateid_recovery(mds_server, state);
+               nfs4_schedule_lease_recovery(mds_client);
+               goto wait_on_recovery;
+       /* DS session errors */
        case -NFS4ERR_BADSESSION:
        case -NFS4ERR_BADSLOT:
        case -NFS4ERR_BAD_HIGH_SLOT:
@@ -115,8 +133,14 @@ static int filelayout_async_handle_error(struct rpc_task *task,
                *reset = 1;
                break;
        }
+out:
        task->tk_status = 0;
        return -EAGAIN;
+wait_on_recovery:
+       rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
+       if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
+               rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
+       goto out;
 }
 
 /* NFS_PROTO call done callback routines */
@@ -173,7 +197,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
 
        if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
                                &rdata->args.seq_args, &rdata->res.seq_res,
-                               0, task))
+                               task))
                return;
 
        rpc_call_start(task);
@@ -189,10 +213,18 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
        rdata->mds_ops->rpc_call_done(task, data);
 }
 
+static void filelayout_read_count_stats(struct rpc_task *task, void *data)
+{
+       struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+
+       rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics);
+}
+
 static void filelayout_read_release(void *data)
 {
        struct nfs_read_data *rdata = (struct nfs_read_data *)data;
 
+       put_lseg(rdata->lseg);
        rdata->mds_ops->rpc_release(data);
 }
 
@@ -254,7 +286,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
 
        if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
                                &wdata->args.seq_args, &wdata->res.seq_res,
-                               0, task))
+                               task))
                return;
 
        rpc_call_start(task);
@@ -268,10 +300,18 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
        wdata->mds_ops->rpc_call_done(task, data);
 }
 
+static void filelayout_write_count_stats(struct rpc_task *task, void *data)
+{
+       struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+       rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics);
+}
+
 static void filelayout_write_release(void *data)
 {
        struct nfs_write_data *wdata = (struct nfs_write_data *)data;
 
+       put_lseg(wdata->lseg);
        wdata->mds_ops->rpc_release(data);
 }
 
@@ -282,24 +322,28 @@ static void filelayout_commit_release(void *data)
        nfs_commit_release_pages(wdata);
        if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
                nfs_commit_clear_lock(NFS_I(wdata->inode));
+       put_lseg(wdata->lseg);
        nfs_commitdata_release(wdata);
 }
 
-struct rpc_call_ops filelayout_read_call_ops = {
+static const struct rpc_call_ops filelayout_read_call_ops = {
        .rpc_call_prepare = filelayout_read_prepare,
        .rpc_call_done = filelayout_read_call_done,
+       .rpc_count_stats = filelayout_read_count_stats,
        .rpc_release = filelayout_read_release,
 };
 
-struct rpc_call_ops filelayout_write_call_ops = {
+static const struct rpc_call_ops filelayout_write_call_ops = {
        .rpc_call_prepare = filelayout_write_prepare,
        .rpc_call_done = filelayout_write_call_done,
+       .rpc_count_stats = filelayout_write_count_stats,
        .rpc_release = filelayout_write_release,
 };
 
-struct rpc_call_ops filelayout_commit_call_ops = {
+static const struct rpc_call_ops filelayout_commit_call_ops = {
        .rpc_call_prepare = filelayout_write_prepare,
        .rpc_call_done = filelayout_write_call_done,
+       .rpc_count_stats = filelayout_write_count_stats,
        .rpc_release = filelayout_commit_release,
 };
 
@@ -367,7 +411,8 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
        idx = nfs4_fl_calc_ds_index(lseg, j);
        ds = nfs4_fl_prepare_ds(lseg, idx);
        if (!ds) {
-               printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
+               printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
+                       __func__);
                set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
                set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
                return PNFS_NOT_ATTEMPTED;
@@ -575,7 +620,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
                        goto out_err_free;
                fl->fh_array[i]->size = be32_to_cpup(p++);
                if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
-                       printk(KERN_ERR "Too big fh %d received %d\n",
+                       printk(KERN_ERR "NFS: Too big fh %d received %d\n",
                               i, fl->fh_array[i]->size);
                        goto out_err_free;
                }
@@ -640,14 +685,16 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
                int size = (fl->stripe_type == STRIPE_SPARSE) ?
                        fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
 
-               fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags);
+               fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
                if (!fl->commit_buckets) {
                        filelayout_free_lseg(&fl->generic_hdr);
                        return NULL;
                }
                fl->number_of_buckets = size;
-               for (i = 0; i < size; i++)
-                       INIT_LIST_HEAD(&fl->commit_buckets[i]);
+               for (i = 0; i < size; i++) {
+                       INIT_LIST_HEAD(&fl->commit_buckets[i].written);
+                       INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
+               }
        }
        return &fl->generic_hdr;
 }
@@ -679,7 +726,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
        return (p_stripe == r_stripe);
 }
 
-void
+static void
 filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
                        struct nfs_page *req)
 {
@@ -696,7 +743,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
                nfs_pageio_reset_read_mds(pgio);
 }
 
-void
+static void
 filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
                         struct nfs_page *req)
 {
@@ -725,11 +772,6 @@ static const struct nfs_pageio_ops filelayout_pg_write_ops = {
        .pg_doio = pnfs_generic_pg_writepages,
 };
 
-static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
-{
-       return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
-}
-
 static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
 {
        if (fl->stripe_type == STRIPE_SPARSE)
@@ -738,13 +780,49 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
                return j;
 }
 
-struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
+/* The generic layer is about to remove the req from the commit list.
+ * If this will make the bucket empty, it will need to put the lseg reference.
+ */
+static void
+filelayout_clear_request_commit(struct nfs_page *req)
+{
+       struct pnfs_layout_segment *freeme = NULL;
+       struct inode *inode = req->wb_context->dentry->d_inode;
+
+       spin_lock(&inode->i_lock);
+       if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
+               goto out;
+       if (list_is_singular(&req->wb_list)) {
+               struct inode *inode = req->wb_context->dentry->d_inode;
+               struct pnfs_layout_segment *lseg;
+
+               /* From here we can find the bucket, but for the moment,
+                * since there is only one relevant lseg...
+                */
+               list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
+                       if (lseg->pls_range.iomode == IOMODE_RW) {
+                               freeme = lseg;
+                               break;
+                       }
+               }
+       }
+out:
+       nfs_request_remove_commit_list(req);
+       spin_unlock(&inode->i_lock);
+       put_lseg(freeme);
+}
+
+static struct list_head *
+filelayout_choose_commit_list(struct nfs_page *req,
+                             struct pnfs_layout_segment *lseg)
 {
-       struct pnfs_layout_segment *lseg = req->wb_commit_lseg;
        struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
        u32 i, j;
        struct list_head *list;
 
+       if (fl->commit_through_mds)
+               return &NFS_I(req->wb_context->dentry->d_inode)->commit_list;
+
        /* Note that we are calling nfs4_fl_calc_j_index on each page
         * that ends up being committed to a data server.  An attractive
         * alternative is to add a field to nfs_write_data and nfs_page
@@ -754,14 +832,30 @@ struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
        j = nfs4_fl_calc_j_index(lseg,
                                 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
        i = select_bucket_index(fl, j);
-       list = &fl->commit_buckets[i];
+       list = &fl->commit_buckets[i].written;
        if (list_empty(list)) {
-               /* Non-empty buckets hold a reference on the lseg */
+               /* Non-empty buckets hold a reference on the lseg.  That ref
+                * is normally transferred to the COMMIT call and released
+                * there.  It could also be released if the last req is pulled
+                * off due to a rewrite, in which case it will be done in
+                * filelayout_remove_commit_req
+                */
                get_lseg(lseg);
        }
+       set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
        return list;
 }
 
+static void
+filelayout_mark_request_commit(struct nfs_page *req,
+               struct pnfs_layout_segment *lseg)
+{
+       struct list_head *list;
+
+       list = filelayout_choose_commit_list(req, lseg);
+       nfs_request_add_commit_list(req, list);
+}
+
 static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
 {
        struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
@@ -797,11 +891,12 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
        idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
        ds = nfs4_fl_prepare_ds(lseg, idx);
        if (!ds) {
-               printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
+               printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n",
+                       __func__);
                set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
                set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
                prepare_to_resend_writes(data);
-               data->mds_ops->rpc_release(data);
+               filelayout_commit_release(data);
                return -EAGAIN;
        }
        dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
@@ -817,24 +912,87 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
 /*
  * This is only useful while we are using whole file layouts.
  */
-static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
+static struct pnfs_layout_segment *
+find_only_write_lseg_locked(struct inode *inode)
 {
-       struct pnfs_layout_segment *lseg, *rv = NULL;
+       struct pnfs_layout_segment *lseg;
 
-       spin_lock(&inode->i_lock);
        list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
                if (lseg->pls_range.iomode == IOMODE_RW)
-                       rv = get_lseg(lseg);
+                       return lseg;
+       return NULL;
+}
+
+static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
+{
+       struct pnfs_layout_segment *rv;
+
+       spin_lock(&inode->i_lock);
+       rv = find_only_write_lseg_locked(inode);
+       if (rv)
+               get_lseg(rv);
        spin_unlock(&inode->i_lock);
        return rv;
 }
 
-static int alloc_ds_commits(struct inode *inode, struct list_head *list)
+static int
+filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
+               spinlock_t *lock)
+{
+       struct list_head *src = &bucket->written;
+       struct list_head *dst = &bucket->committing;
+       struct nfs_page *req, *tmp;
+       int ret = 0;
+
+       list_for_each_entry_safe(req, tmp, src, wb_list) {
+               if (!nfs_lock_request(req))
+                       continue;
+               if (cond_resched_lock(lock))
+                       list_safe_reset_next(req, tmp, wb_list);
+               nfs_request_remove_commit_list(req);
+               clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
+               nfs_list_add_request(req, dst);
+               ret++;
+               if (ret == max)
+                       break;
+       }
+       return ret;
+}
+
+/* Move reqs from written to committing lists, returning count of number moved.
+ * Note called with i_lock held.
+ */
+static int filelayout_scan_commit_lists(struct inode *inode, int max,
+               spinlock_t *lock)
+{
+       struct pnfs_layout_segment *lseg;
+       struct nfs4_filelayout_segment *fl;
+       int i, rv = 0, cnt;
+
+       lseg = find_only_write_lseg_locked(inode);
+       if (!lseg)
+               goto out_done;
+       fl = FILELAYOUT_LSEG(lseg);
+       if (fl->commit_through_mds)
+               goto out_done;
+       for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
+               cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
+                               max, lock);
+               max -= cnt;
+               rv += cnt;
+       }
+out_done:
+       return rv;
+}
+
+static unsigned int
+alloc_ds_commits(struct inode *inode, struct list_head *list)
 {
        struct pnfs_layout_segment *lseg;
        struct nfs4_filelayout_segment *fl;
        struct nfs_write_data *data;
        int i, j;
+       unsigned int nreq = 0;
 
        /* Won't need this when non-whole file layout segments are supported
         * instead we will use a pnfs_layout_hdr structure */
@@ -843,28 +1001,27 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list)
                return 0;
        fl = FILELAYOUT_LSEG(lseg);
        for (i = 0; i < fl->number_of_buckets; i++) {
-               if (list_empty(&fl->commit_buckets[i]))
+               if (list_empty(&fl->commit_buckets[i].committing))
                        continue;
                data = nfs_commitdata_alloc();
                if (!data)
-                       goto out_bad;
+                       break;
                data->ds_commit_index = i;
                data->lseg = lseg;
                list_add(&data->pages, list);
+               nreq++;
        }
-       put_lseg(lseg);
-       return 0;
 
-out_bad:
+       /* Clean up on error */
        for (j = i; j < fl->number_of_buckets; j++) {
-               if (list_empty(&fl->commit_buckets[i]))
+               if (list_empty(&fl->commit_buckets[i].committing))
                        continue;
-               nfs_retry_commit(&fl->commit_buckets[i], lseg);
+               nfs_retry_commit(&fl->commit_buckets[i].committing, lseg);
                put_lseg(lseg);  /* associated with emptying bucket */
        }
        put_lseg(lseg);
        /* Caller will clean up entries put on list */
-       return -ENOMEM;
+       return nreq;
 }
 
 /* This follows nfs_commit_list pretty closely */
@@ -874,40 +1031,40 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 {
        struct nfs_write_data   *data, *tmp;
        LIST_HEAD(list);
+       unsigned int nreq = 0;
 
        if (!list_empty(mds_pages)) {
                data = nfs_commitdata_alloc();
-               if (!data)
-                       goto out_bad;
-               data->lseg = NULL;
-               list_add(&data->pages, &list);
+               if (data != NULL) {
+                       data->lseg = NULL;
+                       list_add(&data->pages, &list);
+                       nreq++;
+               } else
+                       nfs_retry_commit(mds_pages, NULL);
        }
 
-       if (alloc_ds_commits(inode, &list))
-               goto out_bad;
+       nreq += alloc_ds_commits(inode, &list);
+
+       if (nreq == 0) {
+               nfs_commit_clear_lock(NFS_I(inode));
+               goto out;
+       }
+
+       atomic_add(nreq, &NFS_I(inode)->commits_outstanding);
 
        list_for_each_entry_safe(data, tmp, &list, pages) {
                list_del_init(&data->pages);
-               atomic_inc(&NFS_I(inode)->commits_outstanding);
                if (!data->lseg) {
                        nfs_init_commit(data, mds_pages, NULL);
                        nfs_initiate_commit(data, NFS_CLIENT(inode),
                                            data->mds_ops, how);
                } else {
-                       nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg);
+                       nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg);
                        filelayout_initiate_commit(data, how);
                }
        }
-       return 0;
- out_bad:
-       list_for_each_entry_safe(data, tmp, &list, pages) {
-               nfs_retry_commit(&data->pages, data->lseg);
-               list_del_init(&data->pages);
-               nfs_commit_free(data);
-       }
-       nfs_retry_commit(mds_pages, NULL);
-       nfs_commit_clear_lock(NFS_I(inode));
-       return -ENOMEM;
+out:
+       return PNFS_ATTEMPTED;
 }
 
 static void
@@ -924,8 +1081,9 @@ static struct pnfs_layoutdriver_type filelayout_type = {
        .free_lseg              = filelayout_free_lseg,
        .pg_read_ops            = &filelayout_pg_read_ops,
        .pg_write_ops           = &filelayout_pg_write_ops,
-       .mark_pnfs_commit       = filelayout_mark_pnfs_commit,
-       .choose_commit_list     = filelayout_choose_commit_list,
+       .mark_request_commit    = filelayout_mark_request_commit,
+       .clear_request_commit   = filelayout_clear_request_commit,
+       .scan_commit_lists      = filelayout_scan_commit_lists,
        .commit_pagelist        = filelayout_commit_pagelist,
        .read_pagelist          = filelayout_read_pagelist,
        .write_pagelist         = filelayout_write_pagelist,
index 2e42284253fa600ba9266afc6111711653558949..21190bb1f5e348c5549e5985afb8cdf896aa72dd 100644 (file)
@@ -74,6 +74,11 @@ struct nfs4_file_layout_dsaddr {
        struct nfs4_pnfs_ds             *ds_list[1];
 };
 
+struct nfs4_fl_commit_bucket {
+       struct list_head written;
+       struct list_head committing;
+};
+
 struct nfs4_filelayout_segment {
        struct pnfs_layout_segment generic_hdr;
        u32 stripe_type;
@@ -84,7 +89,7 @@ struct nfs4_filelayout_segment {
        struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
        unsigned int num_fh;
        struct nfs_fh **fh_array;
-       struct list_head *commit_buckets; /* Sort commits to ds */
+       struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */
        int number_of_buckets;
 };
 
index 8ae91908f5aa6fa38c128348beb272ddb6226a3b..a866bbd2890a056b530ebe3ae91cd74b0862f4a1 100644 (file)
@@ -45,7 +45,7 @@
  *   - incremented when a device id maps a data server already in the cache.
  *   - decremented when deviceid is removed from the cache.
  */
-DEFINE_SPINLOCK(nfs4_ds_cache_lock);
+static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
 static LIST_HEAD(nfs4_data_server_cache);
 
 /* Debug routines */
@@ -108,58 +108,40 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
        return false;
 }
 
-/*
- * Lookup DS by addresses.  The first matching address returns true.
- * nfs4_ds_cache_lock is held
- */
-static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(struct list_head *dsaddrs)
+static bool
+_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
+                              const struct list_head *dsaddrs2)
 {
-       struct nfs4_pnfs_ds *ds;
        struct nfs4_pnfs_ds_addr *da1, *da2;
 
-       list_for_each_entry(da1, dsaddrs, da_node) {
-               list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
-                       list_for_each_entry(da2, &ds->ds_addrs, da_node) {
-                               if (same_sockaddr(
-                                       (struct sockaddr *)&da1->da_addr,
-                                       (struct sockaddr *)&da2->da_addr))
-                                       return ds;
-                       }
-               }
+       /* step through both lists, comparing as we go */
+       for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node),
+            da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node);
+            da1 != NULL && da2 != NULL;
+            da1 = list_entry(da1->da_node.next, typeof(*da1), da_node),
+            da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) {
+               if (!same_sockaddr((struct sockaddr *)&da1->da_addr,
+                                  (struct sockaddr *)&da2->da_addr))
+                       return false;
        }
-       return NULL;
+       if (da1 == NULL && da2 == NULL)
+               return true;
+
+       return false;
 }
 
 /*
- * Compare two lists of addresses.
+ * Lookup DS by addresses.  nfs4_ds_cache_lock is held
  */
-static bool
-_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
-                                   struct list_head *dsaddrs2)
+static struct nfs4_pnfs_ds *
+_data_server_lookup_locked(const struct list_head *dsaddrs)
 {
-       struct nfs4_pnfs_ds_addr *da1, *da2;
-       size_t count1 = 0,
-              count2 = 0;
-
-       list_for_each_entry(da1, dsaddrs1, da_node)
-               count1++;
-
-       list_for_each_entry(da2, dsaddrs2, da_node) {
-               bool found = false;
-               count2++;
-               list_for_each_entry(da1, dsaddrs1, da_node) {
-                       if (same_sockaddr((struct sockaddr *)&da1->da_addr,
-                               (struct sockaddr *)&da2->da_addr)) {
-                               found = true;
-                               break;
-                       }
-               }
-               if (!found)
-                       return false;
-       }
+       struct nfs4_pnfs_ds *ds;
 
-       return (count1 == count2);
+       list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
+               if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
+                       return ds;
+       return NULL;
 }
 
 /*
@@ -356,11 +338,6 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
                dprintk("%s add new data server %s\n", __func__,
                        ds->ds_remotestr);
        } else {
-               if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
-                                                        dsaddrs)) {
-                       dprintk("%s:  multipath address mismatch: %s != %s",
-                               __func__, tmp_ds->ds_remotestr, remotestr);
-               }
                kfree(remotestr);
                kfree(ds);
                atomic_inc(&tmp_ds->ds_count);
@@ -378,7 +355,7 @@ out:
  * Currently only supports ipv4, ipv6 and one multi-path address.
  */
 static struct nfs4_pnfs_ds_addr *
-decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
+decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags)
 {
        struct nfs4_pnfs_ds_addr *da = NULL;
        char *buf, *portstr;
@@ -457,7 +434,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
 
        INIT_LIST_HEAD(&da->da_node);
 
-       if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
+       if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
                      sizeof(da->da_addr))) {
                dprintk("%s: error parsing address %s\n", __func__, buf);
                goto out_free_da;
@@ -554,7 +531,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
        cnt = be32_to_cpup(p);
        dprintk("%s stripe count  %d\n", __func__, cnt);
        if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
-               printk(KERN_WARNING "%s: stripe count %d greater than "
+               printk(KERN_WARNING "NFS: %s: stripe count %d greater than "
                       "supported maximum %d\n", __func__,
                        cnt, NFS4_PNFS_MAX_STRIPE_CNT);
                goto out_err_free_scratch;
@@ -585,7 +562,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
        num = be32_to_cpup(p);
        dprintk("%s ds_num %u\n", __func__, num);
        if (num > NFS4_PNFS_MAX_MULTI_CNT) {
-               printk(KERN_WARNING "%s: multipath count %d greater than "
+               printk(KERN_WARNING "NFS: %s: multipath count %d greater than "
                        "supported maximum %d\n", __func__,
                        num, NFS4_PNFS_MAX_MULTI_CNT);
                goto out_err_free_stripe_indices;
@@ -593,7 +570,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
 
        /* validate stripe indices are all < num */
        if (max_stripe_index >= num) {
-               printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n",
+               printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n",
                        __func__, max_stripe_index, num);
                goto out_err_free_stripe_indices;
        }
@@ -625,7 +602,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
 
                mp_count = be32_to_cpup(p); /* multipath count */
                for (j = 0; j < mp_count; j++) {
-                       da = decode_ds_addr(&stream, gfp_flags);
+                       da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net,
+                                           &stream, gfp_flags);
                        if (da)
                                list_add_tail(&da->da_node, &dsaddrs);
                }
@@ -686,7 +664,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl
 
        new = decode_device(inode, dev, gfp_flags);
        if (!new) {
-               printk(KERN_WARNING "%s: Could not decode or add device\n",
+               printk(KERN_WARNING "NFS: %s: Could not decode or add device\n",
                        __func__);
                return NULL;
        }
@@ -835,7 +813,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
        struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
 
        if (ds == NULL) {
-               printk(KERN_ERR "%s: No data server for offset index %d\n",
+               printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
                        __func__, ds_idx);
                return NULL;
        }
index bb80c49b6533b44aab58171dcd69397ff02f39a2..9c8eca315f431199aa481c0eabc6ae3e044f8267 100644 (file)
@@ -94,13 +94,14 @@ static int nfs4_validate_fspath(struct dentry *dentry,
 }
 
 static size_t nfs_parse_server_name(char *string, size_t len,
-               struct sockaddr *sa, size_t salen)
+               struct sockaddr *sa, size_t salen, struct nfs_server *server)
 {
+       struct net *net = rpc_net_ns(server->client);
        ssize_t ret;
 
-       ret = rpc_pton(string, len, sa, salen);
+       ret = rpc_pton(net, string, len, sa, salen);
        if (ret == 0) {
-               ret = nfs_dns_resolve_name(string, len, sa, salen);
+               ret = nfs_dns_resolve_name(net, string, len, sa, salen);
                if (ret < 0)
                        ret = 0;
        }
@@ -137,7 +138,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
                        continue;
 
                mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
-                               mountdata->addr, addr_bufsize);
+                               mountdata->addr, addr_bufsize,
+                               NFS_SB(mountdata->sb));
                if (mountdata->addrlen == 0)
                        continue;
 
index caf92d05c3a901adabe5117a895107952e5cff23..e809d2305ebf3a6431c52a6f0a805672365557fb 100644 (file)
 
 #define NFS4_MAX_LOOP_ON_RECOVER (10)
 
+static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE;
+
 struct nfs4_opendata;
 static int _nfs4_proc_open(struct nfs4_opendata *data);
 static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
 static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
+static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
 static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
                            struct nfs_fattr *fattr, struct iattr *sattr,
                            struct nfs4_state *state);
 #ifdef CONFIG_NFS_V4_1
-static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *);
-static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *);
+static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *);
+static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *);
 #endif
 /* Prevent leaks of NFSv4 errors into userland */
 static int nfs4_map_errors(int err)
@@ -259,15 +262,28 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
 {
        struct nfs_client *clp = server->nfs_client;
        struct nfs4_state *state = exception->state;
+       struct inode *inode = exception->inode;
        int ret = errorcode;
 
        exception->retry = 0;
        switch(errorcode) {
                case 0:
                        return 0;
+               case -NFS4ERR_OPENMODE:
+                       if (nfs_have_delegation(inode, FMODE_READ)) {
+                               nfs_inode_return_delegation(inode);
+                               exception->retry = 1;
+                               return 0;
+                       }
+                       if (state == NULL)
+                               break;
+                       nfs4_schedule_stateid_recovery(server, state);
+                       goto wait_on_recovery;
+               case -NFS4ERR_DELEG_REVOKED:
                case -NFS4ERR_ADMIN_REVOKED:
                case -NFS4ERR_BAD_STATEID:
-               case -NFS4ERR_OPENMODE:
+                       if (state != NULL)
+                               nfs_remove_bad_delegation(state->inode);
                        if (state == NULL)
                                break;
                        nfs4_schedule_stateid_recovery(server, state);
@@ -360,16 +376,14 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
  * When updating highest_used_slotid there may be "holes" in the bitmap
  * so we need to scan down from highest_used_slotid to 0 looking for the now
  * highest slotid in use.
- * If none found, highest_used_slotid is set to -1.
+ * If none found, highest_used_slotid is set to NFS4_NO_SLOT.
  *
  * Must be called while holding tbl->slot_tbl_lock
  */
 static void
-nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
+nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid)
 {
-       int slotid = free_slotid;
-
-       BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE);
+       BUG_ON(slotid >= NFS4_MAX_SLOT_TABLE);
        /* clear used bit in bitmap */
        __clear_bit(slotid, tbl->used_slots);
 
@@ -379,10 +393,16 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
                if (slotid < tbl->max_slots)
                        tbl->highest_used_slotid = slotid;
                else
-                       tbl->highest_used_slotid = -1;
+                       tbl->highest_used_slotid = NFS4_NO_SLOT;
        }
-       dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__,
-               free_slotid, tbl->highest_used_slotid);
+       dprintk("%s: slotid %u highest_used_slotid %d\n", __func__,
+               slotid, tbl->highest_used_slotid);
+}
+
+bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy)
+{
+       rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+       return true;
 }
 
 /*
@@ -390,16 +410,13 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
  */
 static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
 {
-       struct rpc_task *task;
-
        if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
-               task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
-               if (task)
-                       rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+               rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq,
+                               nfs4_set_task_privileged, NULL);
                return;
        }
 
-       if (ses->fc_slot_table.highest_used_slotid != -1)
+       if (ses->fc_slot_table.highest_used_slotid != NFS4_NO_SLOT)
                return;
 
        dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__);
@@ -412,7 +429,7 @@ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
 void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
 {
        if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) ||
-           ses->bc_slot_table.highest_used_slotid != -1)
+           ses->bc_slot_table.highest_used_slotid != NFS4_NO_SLOT)
                return;
        dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__);
        complete(&ses->bc_slot_table.complete);
@@ -507,25 +524,25 @@ static int nfs4_sequence_done(struct rpc_task *task,
  * nfs4_find_slot looks for an unset bit in the used_slots bitmap.
  * If found, we mark the slot as used, update the highest_used_slotid,
  * and respectively set up the sequence operation args.
- * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise.
+ * The slot number is returned if found, or NFS4_NO_SLOT otherwise.
  *
  * Note: must be called with under the slot_tbl_lock.
  */
-static u8
+static u32
 nfs4_find_slot(struct nfs4_slot_table *tbl)
 {
-       int slotid;
-       u8 ret_id = NFS4_MAX_SLOT_TABLE;
-       BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE);
+       u32 slotid;
+       u32 ret_id = NFS4_NO_SLOT;
 
-       dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n",
+       dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n",
                __func__, tbl->used_slots[0], tbl->highest_used_slotid,
                tbl->max_slots);
        slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots);
        if (slotid >= tbl->max_slots)
                goto out;
        __set_bit(slotid, tbl->used_slots);
-       if (slotid > tbl->highest_used_slotid)
+       if (slotid > tbl->highest_used_slotid ||
+                       tbl->highest_used_slotid == NFS4_NO_SLOT)
                tbl->highest_used_slotid = slotid;
        ret_id = slotid;
 out:
@@ -534,15 +551,25 @@ out:
        return ret_id;
 }
 
+static void nfs41_init_sequence(struct nfs4_sequence_args *args,
+               struct nfs4_sequence_res *res, int cache_reply)
+{
+       args->sa_session = NULL;
+       args->sa_cache_this = 0;
+       if (cache_reply)
+               args->sa_cache_this = 1;
+       res->sr_session = NULL;
+       res->sr_slot = NULL;
+}
+
 int nfs41_setup_sequence(struct nfs4_session *session,
                                struct nfs4_sequence_args *args,
                                struct nfs4_sequence_res *res,
-                               int cache_reply,
                                struct rpc_task *task)
 {
        struct nfs4_slot *slot;
        struct nfs4_slot_table *tbl;
-       u8 slotid;
+       u32 slotid;
 
        dprintk("--> %s\n", __func__);
        /* slot already allocated? */
@@ -570,7 +597,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
        }
 
        slotid = nfs4_find_slot(tbl);
-       if (slotid == NFS4_MAX_SLOT_TABLE) {
+       if (slotid == NFS4_NO_SLOT) {
                rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
                spin_unlock(&tbl->slot_tbl_lock);
                dprintk("<-- %s: no free slots\n", __func__);
@@ -582,7 +609,6 @@ int nfs41_setup_sequence(struct nfs4_session *session,
        slot = tbl->slots + slotid;
        args->sa_session = session;
        args->sa_slotid = slotid;
-       args->sa_cache_this = cache_reply;
 
        dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
 
@@ -602,24 +628,19 @@ EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
 int nfs4_setup_sequence(const struct nfs_server *server,
                        struct nfs4_sequence_args *args,
                        struct nfs4_sequence_res *res,
-                       int cache_reply,
                        struct rpc_task *task)
 {
        struct nfs4_session *session = nfs4_get_session(server);
        int ret = 0;
 
-       if (session == NULL) {
-               args->sa_session = NULL;
-               res->sr_session = NULL;
+       if (session == NULL)
                goto out;
-       }
 
        dprintk("--> %s clp %p session %p sr_slot %td\n",
                __func__, session->clp, session, res->sr_slot ?
                        res->sr_slot - session->fc_slot_table.slots : -1);
 
-       ret = nfs41_setup_sequence(session, args, res, cache_reply,
-                                  task);
+       ret = nfs41_setup_sequence(session, args, res, task);
 out:
        dprintk("<-- %s status=%d\n", __func__, ret);
        return ret;
@@ -629,7 +650,6 @@ struct nfs41_call_sync_data {
        const struct nfs_server *seq_server;
        struct nfs4_sequence_args *seq_args;
        struct nfs4_sequence_res *seq_res;
-       int cache_reply;
 };
 
 static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
@@ -639,7 +659,7 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
        dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
 
        if (nfs4_setup_sequence(data->seq_server, data->seq_args,
-                               data->seq_res, data->cache_reply, task))
+                               data->seq_res, task))
                return;
        rpc_call_start(task);
 }
@@ -657,12 +677,12 @@ static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
        nfs41_sequence_done(task, data->seq_res);
 }
 
-struct rpc_call_ops nfs41_call_sync_ops = {
+static const struct rpc_call_ops nfs41_call_sync_ops = {
        .rpc_call_prepare = nfs41_call_sync_prepare,
        .rpc_call_done = nfs41_call_sync_done,
 };
 
-struct rpc_call_ops nfs41_call_priv_sync_ops = {
+static const struct rpc_call_ops nfs41_call_priv_sync_ops = {
        .rpc_call_prepare = nfs41_call_priv_sync_prepare,
        .rpc_call_done = nfs41_call_sync_done,
 };
@@ -672,7 +692,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
                                   struct rpc_message *msg,
                                   struct nfs4_sequence_args *args,
                                   struct nfs4_sequence_res *res,
-                                  int cache_reply,
                                   int privileged)
 {
        int ret;
@@ -681,7 +700,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
                .seq_server = server,
                .seq_args = args,
                .seq_res = res,
-               .cache_reply = cache_reply,
        };
        struct rpc_task_setup task_setup = {
                .rpc_client = clnt,
@@ -690,7 +708,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
                .callback_data = &data
        };
 
-       res->sr_slot = NULL;
        if (privileged)
                task_setup.callback_ops = &nfs41_call_priv_sync_ops;
        task = rpc_run_task(&task_setup);
@@ -710,10 +727,17 @@ int _nfs4_call_sync_session(struct rpc_clnt *clnt,
                            struct nfs4_sequence_res *res,
                            int cache_reply)
 {
-       return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0);
+       nfs41_init_sequence(args, res, cache_reply);
+       return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0);
 }
 
 #else
+static inline
+void nfs41_init_sequence(struct nfs4_sequence_args *args,
+               struct nfs4_sequence_res *res, int cache_reply)
+{
+}
+
 static int nfs4_sequence_done(struct rpc_task *task,
                               struct nfs4_sequence_res *res)
 {
@@ -728,7 +752,7 @@ int _nfs4_call_sync(struct rpc_clnt *clnt,
                    struct nfs4_sequence_res *res,
                    int cache_reply)
 {
-       args->sa_session = res->sr_session = NULL;
+       nfs41_init_sequence(args, res, cache_reply);
        return rpc_call_sync(clnt, msg, 0);
 }
 
@@ -815,20 +839,22 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
        p->o_arg.open_flags = flags;
        p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
        p->o_arg.clientid = server->nfs_client->cl_clientid;
-       p->o_arg.id = sp->so_owner_id.id;
+       p->o_arg.id = sp->so_seqid.owner_id;
        p->o_arg.name = &dentry->d_name;
        p->o_arg.server = server;
        p->o_arg.bitmask = server->attr_bitmask;
        p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
        p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
-       if (flags & O_CREAT) {
-               u32 *s;
+       if (attrs != NULL && attrs->ia_valid != 0) {
+               __be32 verf[2];
 
                p->o_arg.u.attrs = &p->attrs;
                memcpy(&p->attrs, attrs, sizeof(p->attrs));
-               s = (u32 *) p->o_arg.u.verifier.data;
-               s[0] = jiffies;
-               s[1] = current->pid;
+
+               verf[0] = jiffies;
+               verf[1] = current->pid;
+               memcpy(p->o_arg.u.verifier.data, verf,
+                               sizeof(p->o_arg.u.verifier.data));
        }
        p->c_arg.fh = &p->o_res.fh;
        p->c_arg.stateid = &p->o_res.stateid;
@@ -878,7 +904,7 @@ static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode
 {
        int ret = 0;
 
-       if (open_mode & O_EXCL)
+       if (open_mode & (O_EXCL|O_TRUNC))
                goto out;
        switch (mode & (FMODE_READ|FMODE_WRITE)) {
                case FMODE_READ:
@@ -927,8 +953,8 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
 static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
 {
        if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
-               memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
-       memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
+               nfs4_stateid_copy(&state->stateid, stateid);
+       nfs4_stateid_copy(&state->open_stateid, stateid);
        switch (fmode) {
                case FMODE_READ:
                        set_bit(NFS_O_RDONLY_STATE, &state->flags);
@@ -956,7 +982,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s
         */
        write_seqlock(&state->seqlock);
        if (deleg_stateid != NULL) {
-               memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
+               nfs4_stateid_copy(&state->stateid, deleg_stateid);
                set_bit(NFS_DELEGATED_STATE, &state->flags);
        }
        if (open_stateid != NULL)
@@ -987,7 +1013,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
 
        if (delegation == NULL)
                delegation = &deleg_cur->stateid;
-       else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0)
+       else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation))
                goto no_delegation_unlock;
 
        nfs_mark_delegation_referenced(deleg_cur);
@@ -1026,7 +1052,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
        struct nfs4_state *state = opendata->state;
        struct nfs_inode *nfsi = NFS_I(state->inode);
        struct nfs_delegation *delegation;
-       int open_mode = opendata->o_arg.open_flags & O_EXCL;
+       int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC);
        fmode_t fmode = opendata->o_arg.fmode;
        nfs4_stateid stateid;
        int ret = -EAGAIN;
@@ -1048,7 +1074,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
                        break;
                }
                /* Save the delegation */
-               memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
+               nfs4_stateid_copy(&stateid, &delegation->stateid);
                rcu_read_unlock();
                ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
                if (ret != 0)
@@ -1090,6 +1116,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
        if (state == NULL)
                goto err_put_inode;
        if (data->o_res.delegation_type != 0) {
+               struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
                int delegation_flags = 0;
 
                rcu_read_lock();
@@ -1101,7 +1128,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
                        pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
                                        "returning a delegation for "
                                        "OPEN(CLAIM_DELEGATE_CUR)\n",
-                                       NFS_CLIENT(inode)->cl_server);
+                                       clp->cl_hostname);
                } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
                        nfs_inode_set_delegation(state->inode,
                                        data->owner->so_cred,
@@ -1210,10 +1237,10 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
         * Check if we need to update the current stateid.
         */
        if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
-           memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
+           !nfs4_stateid_match(&state->stateid, &state->open_stateid)) {
                write_seqlock(&state->seqlock);
                if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
-                       memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
+                       nfs4_stateid_copy(&state->stateid, &state->open_stateid);
                write_sequnlock(&state->seqlock);
        }
        return 0;
@@ -1282,8 +1309,7 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs
        if (IS_ERR(opendata))
                return PTR_ERR(opendata);
        opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
-       memcpy(opendata->o_arg.u.delegation.data, stateid->data,
-                       sizeof(opendata->o_arg.u.delegation.data));
+       nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
        ret = nfs4_open_recover(opendata, state);
        nfs4_opendata_put(opendata);
        return ret;
@@ -1319,8 +1345,11 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
                                 * The show must go on: exit, but mark the
                                 * stateid as needing recovery.
                                 */
+                       case -NFS4ERR_DELEG_REVOKED:
                        case -NFS4ERR_ADMIN_REVOKED:
                        case -NFS4ERR_BAD_STATEID:
+                               nfs_inode_find_state_and_recover(state->inode,
+                                               stateid);
                                nfs4_schedule_stateid_recovery(server, state);
                        case -EKEYEXPIRED:
                                /*
@@ -1345,8 +1374,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
 
        data->rpc_status = task->tk_status;
        if (data->rpc_status == 0) {
-               memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
-                               sizeof(data->o_res.stateid.data));
+               nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid);
                nfs_confirm_seqid(&data->owner->so_seqid, 0);
                renew_lease(data->o_res.server, data->timestamp);
                data->rpc_done = 1;
@@ -1440,7 +1468,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
                rcu_read_unlock();
        }
        /* Update sequence id. */
-       data->o_arg.id = sp->so_owner_id.id;
+       data->o_arg.id = sp->so_seqid.owner_id;
        data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
        if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
                task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
@@ -1449,7 +1477,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
        data->timestamp = jiffies;
        if (nfs4_setup_sequence(data->o_arg.server,
                                &data->o_arg.seq_args,
-                               &data->o_res.seq_res, 1, task))
+                               &data->o_res.seq_res, task))
                return;
        rpc_call_start(task);
        return;
@@ -1551,6 +1579,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
        };
        int status;
 
+       nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
        kref_get(&data->kref);
        data->rpc_done = 0;
        data->rpc_status = 0;
@@ -1712,15 +1741,32 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
 }
 
 #if defined(CONFIG_NFS_V4_1)
-static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags)
 {
-       int status;
+       int status = NFS_OK;
        struct nfs_server *server = NFS_SERVER(state->inode);
 
-       status = nfs41_test_stateid(server, state);
-       if (status == NFS_OK)
-               return 0;
-       nfs41_free_stateid(server, state);
+       if (state->flags & flags) {
+               status = nfs41_test_stateid(server, stateid);
+               if (status != NFS_OK) {
+                       nfs41_free_stateid(server, stateid);
+                       state->flags &= ~flags;
+               }
+       }
+       return status;
+}
+
+static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+       int deleg_status, open_status;
+       int deleg_flags = 1 << NFS_DELEGATED_STATE;
+       int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE);
+
+       deleg_status = nfs41_check_expired_stateid(state, &state->stateid, deleg_flags);
+       open_status = nfs41_check_expired_stateid(state,  &state->open_stateid, open_flags);
+
+       if ((deleg_status == NFS_OK) && (open_status == NFS_OK))
+               return NFS_OK;
        return nfs4_open_expired(sp, state);
 }
 #endif
@@ -1754,7 +1800,8 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
 
        /* Protect against reboot recovery conflicts */
        status = -ENOMEM;
-       if (!(sp = nfs4_get_state_owner(server, cred))) {
+       sp = nfs4_get_state_owner(server, cred, GFP_KERNEL);
+       if (sp == NULL) {
                dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
                goto out_err;
        }
@@ -1829,7 +1876,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
                 * the user though...
                 */
                if (status == -NFS4ERR_BAD_SEQID) {
-                       printk(KERN_WARNING "NFS: v4 server %s "
+                       pr_warn_ratelimited("NFS: v4 server %s "
                                        " returned a bad sequence-id error!\n",
                                        NFS_SERVER(dir)->nfs_client->cl_hostname);
                        exception.retry = 1;
@@ -1882,12 +1929,14 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
 
        nfs_fattr_init(fattr);
 
-       if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+       if (state != NULL) {
+               nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE,
+                               current->files, current->tgid);
+       } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode,
+                               FMODE_WRITE)) {
                /* Use that stateid */
-       } else if (state != NULL) {
-               nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid);
        } else
-               memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
+               nfs4_stateid_copy(&arg.stateid, &zero_stateid);
 
        status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
        if (status == 0 && state != NULL)
@@ -1900,7 +1949,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
                           struct nfs4_state *state)
 {
        struct nfs_server *server = NFS_SERVER(inode);
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .state = state,
+               .inode = inode,
+       };
        int err;
        do {
                err = nfs4_handle_exception(server,
@@ -1954,6 +2006,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
        struct nfs4_state *state = calldata->state;
        struct nfs_server *server = NFS_SERVER(calldata->inode);
 
+       dprintk("%s: begin!\n", __func__);
        if (!nfs4_sequence_done(task, &calldata->res.seq_res))
                return;
         /* hmm. we are done with the inode, and in the process of freeing
@@ -1981,6 +2034,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
        }
        nfs_release_seqid(calldata->arg.seqid);
        nfs_refresh_inode(calldata->inode, calldata->res.fattr);
+       dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
 }
 
 static void nfs4_close_prepare(struct rpc_task *task, void *data)
@@ -1989,6 +2043,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
        struct nfs4_state *state = calldata->state;
        int call_close = 0;
 
+       dprintk("%s: begin!\n", __func__);
        if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
                return;
 
@@ -2013,7 +2068,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
        if (!call_close) {
                /* Note: exit _without_ calling nfs4_close_done */
                task->tk_action = NULL;
-               return;
+               goto out;
        }
 
        if (calldata->arg.fmode == 0) {
@@ -2022,17 +2077,20 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
                    pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
                        rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
                                     task, NULL);
-                       return;
+                       goto out;
                }
        }
 
        nfs_fattr_init(calldata->res.fattr);
        calldata->timestamp = jiffies;
        if (nfs4_setup_sequence(NFS_SERVER(calldata->inode),
-                               &calldata->arg.seq_args, &calldata->res.seq_res,
-                               1, task))
-               return;
+                               &calldata->arg.seq_args,
+                               &calldata->res.seq_res,
+                               task))
+               goto out;
        rpc_call_start(task);
+out:
+       dprintk("%s: done!\n", __func__);
 }
 
 static const struct rpc_call_ops nfs4_close_ops = {
@@ -2074,6 +2132,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
        calldata = kzalloc(sizeof(*calldata), gfp_mask);
        if (calldata == NULL)
                goto out;
+       nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1);
        calldata->inode = state->inode;
        calldata->state = state;
        calldata->arg.fh = NFS_FH(state->inode);
@@ -2182,6 +2241,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
                server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
                server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
                server->acl_bitmask = res.acl_bitmask;
+               server->fh_expire_type = res.fh_expire_type;
        }
 
        return status;
@@ -2303,7 +2363,6 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
        return nfs4_map_errors(status);
 }
 
-static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
 /*
  * Get locations and (maybe) other attributes of a referral.
  * Note that we'll actually follow the referral later when
@@ -2420,6 +2479,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
                }
        }
 
+       /* Deal with open(O_TRUNC) */
+       if (sattr->ia_valid & ATTR_OPEN)
+               sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
+
        status = nfs4_do_setattr(inode, cred, fattr, sattr, state);
        if (status == 0)
                nfs_setattr_update_inode(inode, sattr);
@@ -2494,7 +2557,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
        struct nfs_server *server = NFS_SERVER(inode);
        struct nfs4_accessargs args = {
                .fh = NFS_FH(inode),
-               .bitmask = server->attr_bitmask,
+               .bitmask = server->cache_consistency_bitmask,
        };
        struct nfs4_accessres res = {
                .server = server,
@@ -2712,8 +2775,18 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
 
        args->bitmask = server->cache_consistency_bitmask;
        res->server = server;
-       res->seq_res.sr_slot = NULL;
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
+       nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
+}
+
+static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
+{
+       if (nfs4_setup_sequence(NFS_SERVER(data->dir),
+                               &data->args.seq_args,
+                               &data->res.seq_res,
+                               task))
+               return;
+       rpc_call_start(task);
 }
 
 static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
@@ -2738,6 +2811,17 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
        arg->bitmask = server->attr_bitmask;
        res->server = server;
+       nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
+}
+
+static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
+{
+       if (nfs4_setup_sequence(NFS_SERVER(data->old_dir),
+                               &data->args.seq_args,
+                               &data->res.seq_res,
+                               task))
+               return;
+       rpc_call_start(task);
 }
 
 static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
@@ -3232,6 +3316,17 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
        data->timestamp   = jiffies;
        data->read_done_cb = nfs4_read_done_cb;
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+       nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
+}
+
+static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+{
+       if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+                               &data->args.seq_args,
+                               &data->res.seq_res,
+                               task))
+               return;
+       rpc_call_start(task);
 }
 
 /* Reset the the nfs_read_data to send the read to the MDS. */
@@ -3305,6 +3400,17 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
        data->timestamp   = jiffies;
 
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
+       nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+}
+
+static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+{
+       if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+                               &data->args.seq_args,
+                               &data->res.seq_res,
+                               task))
+               return;
+       rpc_call_start(task);
 }
 
 static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
@@ -3339,6 +3445,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
                data->write_done_cb = nfs4_commit_done_cb;
        data->res.server = server;
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
+       nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
 }
 
 struct nfs4_renewdata {
@@ -3714,8 +3821,11 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
        if (task->tk_status >= 0)
                return 0;
        switch(task->tk_status) {
+               case -NFS4ERR_DELEG_REVOKED:
                case -NFS4ERR_ADMIN_REVOKED:
                case -NFS4ERR_BAD_STATEID:
+                       if (state != NULL)
+                               nfs_remove_bad_delegation(state->inode);
                case -NFS4ERR_OPENMODE:
                        if (state == NULL)
                                break;
@@ -3764,6 +3874,16 @@ wait_on_recovery:
        return -EAGAIN;
 }
 
+static void nfs4_construct_boot_verifier(struct nfs_client *clp,
+                                        nfs4_verifier *bootverf)
+{
+       __be32 verf[2];
+
+       verf[0] = htonl((u32)clp->cl_boot_time.tv_sec);
+       verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec);
+       memcpy(bootverf->data, verf, sizeof(bootverf->data));
+}
+
 int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
                unsigned short port, struct rpc_cred *cred,
                struct nfs4_setclientid_res *res)
@@ -3780,15 +3900,13 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
                .rpc_resp = res,
                .rpc_cred = cred,
        };
-       __be32 *p;
        int loop = 0;
        int status;
 
-       p = (__be32*)sc_verifier.data;
-       *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
-       *p = htonl((u32)clp->cl_boot_time.tv_nsec);
+       nfs4_construct_boot_verifier(clp, &sc_verifier);
 
        for(;;) {
+               rcu_read_lock();
                setclientid.sc_name_len = scnprintf(setclientid.sc_name,
                                sizeof(setclientid.sc_name), "%s/%s %s %s %u",
                                clp->cl_ipaddr,
@@ -3805,6 +3923,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
                setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
                                sizeof(setclientid.sc_uaddr), "%s.%u.%u",
                                clp->cl_ipaddr, port >> 8, port & 255);
+               rcu_read_unlock();
 
                status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
                if (status != -NFS4ERR_CLID_INUSE)
@@ -3891,7 +4010,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
 
        if (nfs4_setup_sequence(d_data->res.server,
                                &d_data->args.seq_args,
-                               &d_data->res.seq_res, 1, task))
+                               &d_data->res.seq_res, task))
                return;
        rpc_call_start(task);
 }
@@ -3925,11 +4044,12 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
        data = kzalloc(sizeof(*data), GFP_NOFS);
        if (data == NULL)
                return -ENOMEM;
+       nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
        data->args.fhandle = &data->fh;
        data->args.stateid = &data->stateid;
        data->args.bitmask = server->attr_bitmask;
        nfs_copy_fh(&data->fh, NFS_FH(inode));
-       memcpy(&data->stateid, stateid, sizeof(data->stateid));
+       nfs4_stateid_copy(&data->stateid, stateid);
        data->res.fattr = &data->fattr;
        data->res.server = server;
        nfs_fattr_init(data->res.fattr);
@@ -4016,7 +4136,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
        if (status != 0)
                goto out;
        lsp = request->fl_u.nfs4_fl.owner;
-       arg.lock_owner.id = lsp->ls_id.id;
+       arg.lock_owner.id = lsp->ls_seqid.owner_id;
        arg.lock_owner.s_dev = server->s_dev;
        status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
        switch (status) {
@@ -4112,9 +4232,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
                return;
        switch (task->tk_status) {
                case 0:
-                       memcpy(calldata->lsp->ls_stateid.data,
-                                       calldata->res.stateid.data,
-                                       sizeof(calldata->lsp->ls_stateid.data));
+                       nfs4_stateid_copy(&calldata->lsp->ls_stateid,
+                                       &calldata->res.stateid);
                        renew_lease(calldata->server, calldata->timestamp);
                        break;
                case -NFS4ERR_BAD_STATEID:
@@ -4142,7 +4261,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
        calldata->timestamp = jiffies;
        if (nfs4_setup_sequence(calldata->server,
                                &calldata->arg.seq_args,
-                               &calldata->res.seq_res, 1, task))
+                               &calldata->res.seq_res, task))
                return;
        rpc_call_start(task);
 }
@@ -4182,6 +4301,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
                return ERR_PTR(-ENOMEM);
        }
 
+       nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
        msg.rpc_argp = &data->arg;
        msg.rpc_resp = &data->res;
        task_setup_data.callback_data = data;
@@ -4261,7 +4381,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
                goto out_free_seqid;
        p->arg.lock_stateid = &lsp->ls_stateid;
        p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
-       p->arg.lock_owner.id = lsp->ls_id.id;
+       p->arg.lock_owner.id = lsp->ls_seqid.owner_id;
        p->arg.lock_owner.s_dev = server->s_dev;
        p->res.lock_seqid = p->arg.lock_seqid;
        p->lsp = lsp;
@@ -4297,7 +4417,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
        data->timestamp = jiffies;
        if (nfs4_setup_sequence(data->server,
                                &data->arg.seq_args,
-                               &data->res.seq_res, 1, task))
+                               &data->res.seq_res, task))
                return;
        rpc_call_start(task);
        dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
@@ -4326,8 +4446,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
                        goto out;
        }
        if (data->rpc_status == 0) {
-               memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
-                                       sizeof(data->lsp->ls_stateid.data));
+               nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid);
                data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
                renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
        }
@@ -4415,6 +4534,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
                        data->arg.reclaim = NFS_LOCK_RECLAIM;
                task_setup_data.callback_ops = &nfs4_recover_lock_ops;
        }
+       nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
        msg.rpc_argp = &data->arg;
        msg.rpc_resp = &data->res;
        task_setup_data.callback_data = data;
@@ -4479,15 +4599,34 @@ out:
 }
 
 #if defined(CONFIG_NFS_V4_1)
-static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
+static int nfs41_check_expired_locks(struct nfs4_state *state)
 {
-       int status;
+       int status, ret = NFS_OK;
+       struct nfs4_lock_state *lsp;
        struct nfs_server *server = NFS_SERVER(state->inode);
 
-       status = nfs41_test_stateid(server, state);
+       list_for_each_entry(lsp, &state->lock_states, ls_locks) {
+               if (lsp->ls_flags & NFS_LOCK_INITIALIZED) {
+                       status = nfs41_test_stateid(server, &lsp->ls_stateid);
+                       if (status != NFS_OK) {
+                               nfs41_free_stateid(server, &lsp->ls_stateid);
+                               lsp->ls_flags &= ~NFS_LOCK_INITIALIZED;
+                               ret = status;
+                       }
+               }
+       };
+
+       return ret;
+}
+
+static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
+{
+       int status = NFS_OK;
+
+       if (test_bit(LK_STATE_IN_USE, &state->flags))
+               status = nfs41_check_expired_locks(state);
        if (status == NFS_OK)
-               return 0;
-       nfs41_free_stateid(server, state);
+               return status;
        return nfs4_lock_expired(state, request);
 }
 #endif
@@ -4523,7 +4662,8 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
        /* Note: we always want to sleep here! */
        request->fl_flags = fl_flags | FL_SLEEP;
        if (do_vfs_lock(request->fl_file, request) < 0)
-               printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
+               printk(KERN_WARNING "NFS: %s: VFS is out of sync with lock "
+                       "manager!\n", __func__);
 out_unlock:
        up_read(&nfsi->rwsem);
 out:
@@ -4533,7 +4673,9 @@ out:
 
 static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .state = state,
+       };
        int err;
 
        do {
@@ -4603,8 +4745,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
                err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
                switch (err) {
                        default:
-                               printk(KERN_ERR "%s: unhandled error %d.\n",
-                                               __func__, err);
+                               printk(KERN_ERR "NFS: %s: unhandled error "
+                                       "%d.\n", __func__, err);
                        case 0:
                        case -ESTALE:
                                goto out;
@@ -4626,6 +4768,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
                                 * The show must go on: exit, but mark the
                                 * stateid as needing recovery.
                                 */
+                       case -NFS4ERR_DELEG_REVOKED:
                        case -NFS4ERR_ADMIN_REVOKED:
                        case -NFS4ERR_BAD_STATEID:
                        case -NFS4ERR_OPENMODE:
@@ -4655,33 +4798,44 @@ out:
        return err;
 }
 
+struct nfs_release_lockowner_data {
+       struct nfs4_lock_state *lsp;
+       struct nfs_server *server;
+       struct nfs_release_lockowner_args args;
+};
+
 static void nfs4_release_lockowner_release(void *calldata)
 {
+       struct nfs_release_lockowner_data *data = calldata;
+       nfs4_free_lock_state(data->server, data->lsp);
        kfree(calldata);
 }
 
-const struct rpc_call_ops nfs4_release_lockowner_ops = {
+static const struct rpc_call_ops nfs4_release_lockowner_ops = {
        .rpc_release = nfs4_release_lockowner_release,
 };
 
-void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
+int nfs4_release_lockowner(struct nfs4_lock_state *lsp)
 {
        struct nfs_server *server = lsp->ls_state->owner->so_server;
-       struct nfs_release_lockowner_args *args;
+       struct nfs_release_lockowner_data *data;
        struct rpc_message msg = {
                .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
        };
 
        if (server->nfs_client->cl_mvops->minor_version != 0)
-               return;
-       args = kmalloc(sizeof(*args), GFP_NOFS);
-       if (!args)
-               return;
-       args->lock_owner.clientid = server->nfs_client->cl_clientid;
-       args->lock_owner.id = lsp->ls_id.id;
-       args->lock_owner.s_dev = server->s_dev;
-       msg.rpc_argp = args;
-       rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
+               return -EINVAL;
+       data = kmalloc(sizeof(*data), GFP_NOFS);
+       if (!data)
+               return -ENOMEM;
+       data->lsp = lsp;
+       data->server = server;
+       data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
+       data->args.lock_owner.id = lsp->ls_seqid.owner_id;
+       data->args.lock_owner.s_dev = server->s_dev;
+       msg.rpc_argp = &data->args;
+       rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
+       return 0;
 }
 
 #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
@@ -4727,11 +4881,11 @@ static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
        if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) ||
               (fattr->valid & NFS_ATTR_FATTR_FILEID)) &&
              (fattr->valid & NFS_ATTR_FATTR_FSID) &&
-             (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
+             (fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS)))
                return;
 
        fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
-               NFS_ATTR_FATTR_NLINK;
+               NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_V4_REFERRAL;
        fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
        fattr->nlink = 2;
 }
@@ -4798,7 +4952,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
        return status;
 }
 
-int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
+static int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
+               struct nfs4_secinfo_flavors *flavors)
 {
        struct nfs4_exception exception = { };
        int err;
@@ -4852,6 +5007,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 {
        nfs4_verifier verifier;
        struct nfs41_exchange_id_args args = {
+               .verifier = &verifier,
                .client = clp,
                .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
        };
@@ -4865,15 +5021,11 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
                .rpc_resp = &res,
                .rpc_cred = cred,
        };
-       __be32 *p;
 
        dprintk("--> %s\n", __func__);
        BUG_ON(clp == NULL);
 
-       p = (u32 *)verifier.data;
-       *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
-       *p = htonl((u32)clp->cl_boot_time.tv_nsec);
-       args.verifier = &verifier;
+       nfs4_construct_boot_verifier(clp, &verifier);
 
        args.id_len = scnprintf(args.id, sizeof(args.id),
                                "%s/%s.%s/%u",
@@ -4888,10 +5040,23 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
                goto out;
        }
 
+       res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL);
+       if (unlikely(!res.impl_id)) {
+               status = -ENOMEM;
+               goto out_server_scope;
+       }
+
        status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
        if (!status)
                status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
 
+       if (!status) {
+               /* use the most recent implementation id */
+               kfree(clp->impl_id);
+               clp->impl_id = res.impl_id;
+       } else
+               kfree(res.impl_id);
+
        if (!status) {
                if (clp->server_scope &&
                    !nfs41_same_server_scope(clp->server_scope,
@@ -4908,8 +5073,16 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
                        goto out;
                }
        }
+
+out_server_scope:
        kfree(res.server_scope);
 out:
+       if (clp->impl_id)
+               dprintk("%s: Server Implementation ID: "
+                       "domain: %s, name: %s, date: %llu,%u\n",
+                       __func__, clp->impl_id->domain, clp->impl_id->name,
+                       clp->impl_id->date.seconds,
+                       clp->impl_id->date.nseconds);
        dprintk("<-- %s status= %d\n", __func__, status);
        return status;
 }
@@ -4933,7 +5106,7 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
           since we're invoked within one */
        ret = nfs41_setup_sequence(data->clp->cl_session,
                                   &data->args->la_seq_args,
-                                  &data->res->lr_seq_res, 0, task);
+                                  &data->res->lr_seq_res, task);
 
        BUG_ON(ret == -EAGAIN);
        rpc_call_start(task);
@@ -4966,7 +5139,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
        dprintk("<-- %s\n", __func__);
 }
 
-struct rpc_call_ops nfs4_get_lease_time_ops = {
+static const struct rpc_call_ops nfs4_get_lease_time_ops = {
        .rpc_call_prepare = nfs4_get_lease_time_prepare,
        .rpc_call_done = nfs4_get_lease_time_done,
 };
@@ -4997,6 +5170,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
        };
        int status;
 
+       nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
        dprintk("--> %s\n", __func__);
        task = rpc_run_task(&task_setup);
 
@@ -5113,13 +5287,13 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
                return NULL;
 
        tbl = &session->fc_slot_table;
-       tbl->highest_used_slotid = -1;
+       tbl->highest_used_slotid = NFS4_NO_SLOT;
        spin_lock_init(&tbl->slot_tbl_lock);
        rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
        init_completion(&tbl->complete);
 
        tbl = &session->bc_slot_table;
-       tbl->highest_used_slotid = -1;
+       tbl->highest_used_slotid = NFS4_NO_SLOT;
        spin_lock_init(&tbl->slot_tbl_lock);
        rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
        init_completion(&tbl->complete);
@@ -5132,11 +5306,16 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
 
 void nfs4_destroy_session(struct nfs4_session *session)
 {
+       struct rpc_xprt *xprt;
+
        nfs4_proc_destroy_session(session);
+
+       rcu_read_lock();
+       xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
+       rcu_read_unlock();
        dprintk("%s Destroy backchannel for xprt %p\n",
-               __func__, session->clp->cl_rpcclient->cl_xprt);
-       xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt,
-                               NFS41_BC_MIN_CALLBACKS);
+               __func__, xprt);
+       xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS);
        nfs4_destroy_slot_tables(session);
        kfree(session);
 }
@@ -5164,7 +5343,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
        args->fc_attrs.max_rqst_sz = mxrqst_sz;
        args->fc_attrs.max_resp_sz = mxresp_sz;
        args->fc_attrs.max_ops = NFS4_MAX_OPS;
-       args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs;
+       args->fc_attrs.max_reqs = max_session_slots;
 
        dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u "
                "max_ops=%u max_reqs=%u\n",
@@ -5204,6 +5383,8 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args
                return -EINVAL;
        if (rcvd->max_reqs == 0)
                return -EINVAL;
+       if (rcvd->max_reqs > NFS4_MAX_SLOT_TABLE)
+               rcvd->max_reqs = NFS4_MAX_SLOT_TABLE;
        return 0;
 }
 
@@ -5219,9 +5400,9 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args
        if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
                return -EINVAL;
        /* These would render the backchannel useless: */
-       if (rcvd->max_ops  == 0)
+       if (rcvd->max_ops != sent->max_ops)
                return -EINVAL;
-       if (rcvd->max_reqs == 0)
+       if (rcvd->max_reqs != sent->max_reqs)
                return -EINVAL;
        return 0;
 }
@@ -5324,7 +5505,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
 
        if (status)
                printk(KERN_WARNING
-                       "Got error %d from the server on DESTROY_SESSION. "
+                       "NFS: Got error %d from the server on DESTROY_SESSION. "
                        "Session has been destroyed regardless...\n", status);
 
        dprintk("<-- nfs4_proc_destroy_session\n");
@@ -5447,7 +5628,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
        args = task->tk_msg.rpc_argp;
        res = task->tk_msg.rpc_resp;
 
-       if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task))
+       if (nfs41_setup_sequence(clp->cl_session, args, res, task))
                return;
        rpc_call_start(task);
 }
@@ -5479,6 +5660,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
                nfs_put_client(clp);
                return ERR_PTR(-ENOMEM);
        }
+       nfs41_init_sequence(&calldata->args, &calldata->res, 0);
        msg.rpc_argp = &calldata->args;
        msg.rpc_resp = &calldata->res;
        calldata->clp = clp;
@@ -5540,7 +5722,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
        rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
        if (nfs41_setup_sequence(calldata->clp->cl_session,
                                &calldata->arg.seq_args,
-                               &calldata->res.seq_res, 0, task))
+                               &calldata->res.seq_res, task))
                return;
 
        rpc_call_start(task);
@@ -5619,6 +5801,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
        calldata->clp = clp;
        calldata->arg.one_fs = 0;
 
+       nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0);
        msg.rpc_argp = &calldata->arg;
        msg.rpc_resp = &calldata->res;
        task_setup_data.callback_data = calldata;
@@ -5650,7 +5833,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
         * to be no way to prevent it completely.
         */
        if (nfs4_setup_sequence(server, &lgp->args.seq_args,
-                               &lgp->res.seq_res, 0, task))
+                               &lgp->res.seq_res, task))
                return;
        if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
                                          NFS_I(lgp->args.inode)->layout,
@@ -5725,6 +5908,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
 
        lgp->res.layoutp = &lgp->args.layout;
        lgp->res.seq_res.sr_slot = NULL;
+       nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
        task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return PTR_ERR(task);
@@ -5745,7 +5929,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
 
        dprintk("--> %s\n", __func__);
        if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
-                               &lrp->res.seq_res, 0, task))
+                               &lrp->res.seq_res, task))
                return;
        rpc_call_start(task);
 }
@@ -5811,6 +5995,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp)
        int status;
 
        dprintk("--> %s\n", __func__);
+       nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1);
        task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return PTR_ERR(task);
@@ -5911,7 +6096,7 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
        struct nfs_server *server = NFS_SERVER(data->args.inode);
 
        if (nfs4_setup_sequence(server, &data->args.seq_args,
-                               &data->res.seq_res, 1, task))
+                               &data->res.seq_res, task))
                return;
        rpc_call_start(task);
 }
@@ -5998,6 +6183,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
                data->args.lastbytewritten,
                data->args.inode->i_ino);
 
+       nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
        task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return PTR_ERR(task);
@@ -6091,11 +6277,12 @@ out_freepage:
 out:
        return err;
 }
-static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
+
+static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
 {
        int status;
        struct nfs41_test_stateid_args args = {
-               .stateid = &state->stateid,
+               .stateid = stateid,
        };
        struct nfs41_test_stateid_res res;
        struct rpc_message msg = {
@@ -6103,28 +6290,31 @@ static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *sta
                .rpc_argp = &args,
                .rpc_resp = &res,
        };
-       args.seq_args.sa_session = res.seq_res.sr_session = NULL;
-       status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
+
+       nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
+       status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
+
+       if (status == NFS_OK)
+               return res.status;
        return status;
 }
 
-static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
+static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
 {
        struct nfs4_exception exception = { };
        int err;
        do {
                err = nfs4_handle_exception(server,
-                               _nfs41_test_stateid(server, state),
+                               _nfs41_test_stateid(server, stateid),
                                &exception);
        } while (exception.retry);
        return err;
 }
 
-static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state)
+static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
 {
-       int status;
        struct nfs41_free_stateid_args args = {
-               .stateid = &state->stateid,
+               .stateid = stateid,
        };
        struct nfs41_free_stateid_res res;
        struct rpc_message msg = {
@@ -6133,25 +6323,46 @@ static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *stat
                .rpc_resp = &res,
        };
 
-       args.seq_args.sa_session = res.seq_res.sr_session = NULL;
-       status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
-       return status;
+       nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
+       return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
 }
 
-static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state)
+static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
 {
        struct nfs4_exception exception = { };
        int err;
        do {
                err = nfs4_handle_exception(server,
-                               _nfs4_free_stateid(server, state),
+                               _nfs4_free_stateid(server, stateid),
                                &exception);
        } while (exception.retry);
        return err;
 }
+
+static bool nfs41_match_stateid(const nfs4_stateid *s1,
+               const nfs4_stateid *s2)
+{
+       if (memcmp(s1->other, s2->other, sizeof(s1->other)) != 0)
+               return false;
+
+       if (s1->seqid == s2->seqid)
+               return true;
+       if (s1->seqid == 0 || s2->seqid == 0)
+               return true;
+
+       return false;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
-struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
+static bool nfs4_match_stateid(const nfs4_stateid *s1,
+               const nfs4_stateid *s2)
+{
+       return nfs4_stateid_match(s1, s2);
+}
+
+
+static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
        .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
        .state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
        .recover_open   = nfs4_open_reclaim,
@@ -6161,7 +6372,7 @@ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
 };
 
 #if defined(CONFIG_NFS_V4_1)
-struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
+static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
        .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
        .state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
        .recover_open   = nfs4_open_reclaim,
@@ -6172,7 +6383,7 @@ struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
 };
 #endif /* CONFIG_NFS_V4_1 */
 
-struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
+static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
        .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
        .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
        .recover_open   = nfs4_open_expired,
@@ -6182,7 +6393,7 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
 };
 
 #if defined(CONFIG_NFS_V4_1)
-struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
+static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
        .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
        .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
        .recover_open   = nfs41_open_expired,
@@ -6192,14 +6403,14 @@ struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
 };
 #endif /* CONFIG_NFS_V4_1 */
 
-struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = {
+static const struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = {
        .sched_state_renewal = nfs4_proc_async_renew,
        .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked,
        .renew_lease = nfs4_proc_renew,
 };
 
 #if defined(CONFIG_NFS_V4_1)
-struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
+static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
        .sched_state_renewal = nfs41_proc_async_sequence,
        .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked,
        .renew_lease = nfs4_proc_sequence,
@@ -6209,7 +6420,7 @@ struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
 static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
        .minor_version = 0,
        .call_sync = _nfs4_call_sync,
-       .validate_stateid = nfs4_validate_delegation_stateid,
+       .match_stateid = nfs4_match_stateid,
        .find_root_sec = nfs4_find_root_sec,
        .reboot_recovery_ops = &nfs40_reboot_recovery_ops,
        .nograce_recovery_ops = &nfs40_nograce_recovery_ops,
@@ -6220,7 +6431,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
 static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
        .minor_version = 1,
        .call_sync = _nfs4_call_sync_session,
-       .validate_stateid = nfs41_validate_delegation_stateid,
+       .match_stateid = nfs41_match_stateid,
        .find_root_sec = nfs41_find_root_sec,
        .reboot_recovery_ops = &nfs41_reboot_recovery_ops,
        .nograce_recovery_ops = &nfs41_nograce_recovery_ops,
@@ -6260,9 +6471,11 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
        .create         = nfs4_proc_create,
        .remove         = nfs4_proc_remove,
        .unlink_setup   = nfs4_proc_unlink_setup,
+       .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare,
        .unlink_done    = nfs4_proc_unlink_done,
        .rename         = nfs4_proc_rename,
        .rename_setup   = nfs4_proc_rename_setup,
+       .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare,
        .rename_done    = nfs4_proc_rename_done,
        .link           = nfs4_proc_link,
        .symlink        = nfs4_proc_symlink,
@@ -6276,8 +6489,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
        .set_capabilities = nfs4_server_capabilities,
        .decode_dirent  = nfs4_decode_dirent,
        .read_setup     = nfs4_proc_read_setup,
+       .read_rpc_prepare = nfs4_proc_read_rpc_prepare,
        .read_done      = nfs4_read_done,
        .write_setup    = nfs4_proc_write_setup,
+       .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
        .write_done     = nfs4_write_done,
        .commit_setup   = nfs4_proc_commit_setup,
        .commit_done    = nfs4_commit_done,
@@ -6301,6 +6516,10 @@ const struct xattr_handler *nfs4_xattr_handlers[] = {
        NULL
 };
 
+module_param(max_session_slots, ushort, 0644);
+MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 "
+               "requests the client will negotiate");
+
 /*
  * Local variables:
  *  c-basic-offset: 8
index 45392032e7bd60b85b00fb74f86ca99a603e31d4..0f43414eb25a141be336c34bef78cc126cd9039f 100644 (file)
@@ -146,6 +146,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
        struct rpc_cred *cred = NULL;
        struct nfs_server *server;
 
+       /* Use machine credentials if available */
+       cred = nfs4_get_machine_cred_locked(clp);
+       if (cred != NULL)
+               goto out;
+
        rcu_read_lock();
        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
                cred = nfs4_get_renew_cred_server_locked(server);
@@ -153,6 +158,8 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
                        break;
        }
        rcu_read_unlock();
+
+out:
        return cred;
 }
 
@@ -190,30 +197,29 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
 static void nfs4_end_drain_session(struct nfs_client *clp)
 {
        struct nfs4_session *ses = clp->cl_session;
+       struct nfs4_slot_table *tbl;
        int max_slots;
 
        if (ses == NULL)
                return;
+       tbl = &ses->fc_slot_table;
        if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
-               spin_lock(&ses->fc_slot_table.slot_tbl_lock);
-               max_slots = ses->fc_slot_table.max_slots;
+               spin_lock(&tbl->slot_tbl_lock);
+               max_slots = tbl->max_slots;
                while (max_slots--) {
-                       struct rpc_task *task;
-
-                       task = rpc_wake_up_next(&ses->fc_slot_table.
-                                               slot_tbl_waitq);
-                       if (!task)
+                       if (rpc_wake_up_first(&tbl->slot_tbl_waitq,
+                                               nfs4_set_task_privileged,
+                                               NULL) == NULL)
                                break;
-                       rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
                }
-               spin_unlock(&ses->fc_slot_table.slot_tbl_lock);
+               spin_unlock(&tbl->slot_tbl_lock);
        }
 }
 
 static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
 {
        spin_lock(&tbl->slot_tbl_lock);
-       if (tbl->highest_used_slotid != -1) {
+       if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
                INIT_COMPLETION(tbl->complete);
                spin_unlock(&tbl->slot_tbl_lock);
                return wait_for_completion_interruptible(&tbl->complete);
@@ -317,62 +323,6 @@ out:
        return cred;
 }
 
-static void nfs_alloc_unique_id_locked(struct rb_root *root,
-                                      struct nfs_unique_id *new,
-                                      __u64 minval, int maxbits)
-{
-       struct rb_node **p, *parent;
-       struct nfs_unique_id *pos;
-       __u64 mask = ~0ULL;
-
-       if (maxbits < 64)
-               mask = (1ULL << maxbits) - 1ULL;
-
-       /* Ensure distribution is more or less flat */
-       get_random_bytes(&new->id, sizeof(new->id));
-       new->id &= mask;
-       if (new->id < minval)
-               new->id += minval;
-retry:
-       p = &root->rb_node;
-       parent = NULL;
-
-       while (*p != NULL) {
-               parent = *p;
-               pos = rb_entry(parent, struct nfs_unique_id, rb_node);
-
-               if (new->id < pos->id)
-                       p = &(*p)->rb_left;
-               else if (new->id > pos->id)
-                       p = &(*p)->rb_right;
-               else
-                       goto id_exists;
-       }
-       rb_link_node(&new->rb_node, parent, p);
-       rb_insert_color(&new->rb_node, root);
-       return;
-id_exists:
-       for (;;) {
-               new->id++;
-               if (new->id < minval || (new->id & mask) != new->id) {
-                       new->id = minval;
-                       break;
-               }
-               parent = rb_next(parent);
-               if (parent == NULL)
-                       break;
-               pos = rb_entry(parent, struct nfs_unique_id, rb_node);
-               if (new->id < pos->id)
-                       break;
-       }
-       goto retry;
-}
-
-static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
-{
-       rb_erase(&id->rb_node, root);
-}
-
 static struct nfs4_state_owner *
 nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
 {
@@ -405,6 +355,7 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
        struct rb_node **p = &server->state_owners.rb_node,
                       *parent = NULL;
        struct nfs4_state_owner *sp;
+       int err;
 
        while (*p != NULL) {
                parent = *p;
@@ -421,8 +372,9 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
                        return sp;
                }
        }
-       nfs_alloc_unique_id_locked(&server->openowner_id,
-                                       &new->so_owner_id, 1, 64);
+       err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id);
+       if (err)
+               return ERR_PTR(err);
        rb_link_node(&new->so_server_node, parent, p);
        rb_insert_color(&new->so_server_node, &server->state_owners);
        return new;
@@ -435,7 +387,23 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
 
        if (!RB_EMPTY_NODE(&sp->so_server_node))
                rb_erase(&sp->so_server_node, &server->state_owners);
-       nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id);
+       ida_remove(&server->openowner_id, sp->so_seqid.owner_id);
+}
+
+static void
+nfs4_init_seqid_counter(struct nfs_seqid_counter *sc)
+{
+       sc->flags = 0;
+       sc->counter = 0;
+       spin_lock_init(&sc->lock);
+       INIT_LIST_HEAD(&sc->list);
+       rpc_init_wait_queue(&sc->wait, "Seqid_waitqueue");
+}
+
+static void
+nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc)
+{
+       rpc_destroy_wait_queue(&sc->wait);
 }
 
 /*
@@ -444,19 +412,20 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
  *
  */
 static struct nfs4_state_owner *
-nfs4_alloc_state_owner(void)
+nfs4_alloc_state_owner(struct nfs_server *server,
+               struct rpc_cred *cred,
+               gfp_t gfp_flags)
 {
        struct nfs4_state_owner *sp;
 
-       sp = kzalloc(sizeof(*sp),GFP_NOFS);
+       sp = kzalloc(sizeof(*sp), gfp_flags);
        if (!sp)
                return NULL;
+       sp->so_server = server;
+       sp->so_cred = get_rpccred(cred);
        spin_lock_init(&sp->so_lock);
        INIT_LIST_HEAD(&sp->so_states);
-       rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
-       sp->so_seqid.sequence = &sp->so_sequence;
-       spin_lock_init(&sp->so_sequence.lock);
-       INIT_LIST_HEAD(&sp->so_sequence.list);
+       nfs4_init_seqid_counter(&sp->so_seqid);
        atomic_set(&sp->so_count, 1);
        INIT_LIST_HEAD(&sp->so_lru);
        return sp;
@@ -478,7 +447,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 
 static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
 {
-       rpc_destroy_wait_queue(&sp->so_sequence.wait);
+       nfs4_destroy_seqid_counter(&sp->so_seqid);
        put_rpccred(sp->so_cred);
        kfree(sp);
 }
@@ -516,7 +485,8 @@ static void nfs4_gc_state_owners(struct nfs_server *server)
  * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
  */
 struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
-                                             struct rpc_cred *cred)
+                                             struct rpc_cred *cred,
+                                             gfp_t gfp_flags)
 {
        struct nfs_client *clp = server->nfs_client;
        struct nfs4_state_owner *sp, *new;
@@ -526,20 +496,18 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
        spin_unlock(&clp->cl_lock);
        if (sp != NULL)
                goto out;
-       new = nfs4_alloc_state_owner();
+       new = nfs4_alloc_state_owner(server, cred, gfp_flags);
        if (new == NULL)
                goto out;
-       new->so_server = server;
-       new->so_cred = cred;
-       spin_lock(&clp->cl_lock);
-       sp = nfs4_insert_state_owner_locked(new);
-       spin_unlock(&clp->cl_lock);
-       if (sp == new)
-               get_rpccred(cred);
-       else {
-               rpc_destroy_wait_queue(&new->so_sequence.wait);
-               kfree(new);
-       }
+       do {
+               if (ida_pre_get(&server->openowner_id, gfp_flags) == 0)
+                       break;
+               spin_lock(&clp->cl_lock);
+               sp = nfs4_insert_state_owner_locked(new);
+               spin_unlock(&clp->cl_lock);
+       } while (sp == ERR_PTR(-EAGAIN));
+       if (sp != new)
+               nfs4_free_state_owner(new);
 out:
        nfs4_gc_state_owners(server);
        return sp;
@@ -795,15 +763,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
 {
        struct nfs4_lock_state *lsp;
        struct nfs_server *server = state->owner->so_server;
-       struct nfs_client *clp = server->nfs_client;
 
        lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
        if (lsp == NULL)
                return NULL;
-       rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue");
-       spin_lock_init(&lsp->ls_sequence.lock);
-       INIT_LIST_HEAD(&lsp->ls_sequence.list);
-       lsp->ls_seqid.sequence = &lsp->ls_sequence;
+       nfs4_init_seqid_counter(&lsp->ls_seqid);
        atomic_set(&lsp->ls_count, 1);
        lsp->ls_state = state;
        lsp->ls_owner.lo_type = type;
@@ -815,25 +779,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
                lsp->ls_owner.lo_u.posix_owner = fl_owner;
                break;
        default:
-               kfree(lsp);
-               return NULL;
+               goto out_free;
        }
-       spin_lock(&clp->cl_lock);
-       nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64);
-       spin_unlock(&clp->cl_lock);
+       lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
+       if (lsp->ls_seqid.owner_id < 0)
+               goto out_free;
        INIT_LIST_HEAD(&lsp->ls_locks);
        return lsp;
+out_free:
+       kfree(lsp);
+       return NULL;
 }
 
-static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
 {
-       struct nfs_server *server = lsp->ls_state->owner->so_server;
-       struct nfs_client *clp = server->nfs_client;
-
-       spin_lock(&clp->cl_lock);
-       nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id);
-       spin_unlock(&clp->cl_lock);
-       rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
+       ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id);
+       nfs4_destroy_seqid_counter(&lsp->ls_seqid);
        kfree(lsp);
 }
 
@@ -865,7 +826,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
        }
        spin_unlock(&state->state_lock);
        if (new != NULL)
-               nfs4_free_lock_state(new);
+               nfs4_free_lock_state(state->owner->so_server, new);
        return lsp;
 }
 
@@ -886,9 +847,11 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
        if (list_empty(&state->lock_states))
                clear_bit(LK_STATE_IN_USE, &state->flags);
        spin_unlock(&state->state_lock);
-       if (lsp->ls_flags & NFS_LOCK_INITIALIZED)
-               nfs4_release_lockowner(lsp);
-       nfs4_free_lock_state(lsp);
+       if (lsp->ls_flags & NFS_LOCK_INITIALIZED) {
+               if (nfs4_release_lockowner(lsp) == 0)
+                       return;
+       }
+       nfs4_free_lock_state(lsp->ls_state->owner->so_server, lsp);
 }
 
 static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -918,7 +881,8 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
        if (fl->fl_flags & FL_POSIX)
                lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
        else if (fl->fl_flags & FL_FLOCK)
-               lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE);
+               lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid,
+                               NFS4_FLOCK_LOCK_TYPE);
        else
                return -EINVAL;
        if (lsp == NULL)
@@ -928,28 +892,49 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
        return 0;
 }
 
-/*
- * Byte-range lock aware utility to initialize the stateid of read/write
- * requests.
- */
-void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid)
+static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state,
+               fl_owner_t fl_owner, pid_t fl_pid)
 {
        struct nfs4_lock_state *lsp;
-       int seq;
+       bool ret = false;
 
-       do {
-               seq = read_seqbegin(&state->seqlock);
-               memcpy(dst, &state->stateid, sizeof(*dst));
-       } while (read_seqretry(&state->seqlock, seq));
        if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
-               return;
+               goto out;
 
        spin_lock(&state->state_lock);
        lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
-       if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
-               memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+       if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) {
+               nfs4_stateid_copy(dst, &lsp->ls_stateid);
+               ret = true;
+       }
        spin_unlock(&state->state_lock);
        nfs4_put_lock_state(lsp);
+out:
+       return ret;
+}
+
+static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
+{
+       int seq;
+
+       do {
+               seq = read_seqbegin(&state->seqlock);
+               nfs4_stateid_copy(dst, &state->stateid);
+       } while (read_seqretry(&state->seqlock, seq));
+}
+
+/*
+ * Byte-range lock aware utility to initialize the stateid of read/write
+ * requests.
+ */
+void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
+               fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid)
+{
+       if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
+               return;
+       if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid))
+               return;
+       nfs4_copy_open_stateid(dst, state);
 }
 
 struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
@@ -960,20 +945,28 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_m
        if (new != NULL) {
                new->sequence = counter;
                INIT_LIST_HEAD(&new->list);
+               new->task = NULL;
        }
        return new;
 }
 
 void nfs_release_seqid(struct nfs_seqid *seqid)
 {
-       if (!list_empty(&seqid->list)) {
-               struct rpc_sequence *sequence = seqid->sequence->sequence;
+       struct nfs_seqid_counter *sequence;
 
-               spin_lock(&sequence->lock);
-               list_del_init(&seqid->list);
-               spin_unlock(&sequence->lock);
-               rpc_wake_up(&sequence->wait);
+       if (list_empty(&seqid->list))
+               return;
+       sequence = seqid->sequence;
+       spin_lock(&sequence->lock);
+       list_del_init(&seqid->list);
+       if (!list_empty(&sequence->list)) {
+               struct nfs_seqid *next;
+
+               next = list_first_entry(&sequence->list,
+                               struct nfs_seqid, list);
+               rpc_wake_up_queued_task(&sequence->wait, next->task);
        }
+       spin_unlock(&sequence->lock);
 }
 
 void nfs_free_seqid(struct nfs_seqid *seqid)
@@ -989,14 +982,14 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
  */
 static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
 {
-       BUG_ON(list_first_entry(&seqid->sequence->sequence->list, struct nfs_seqid, list) != seqid);
+       BUG_ON(list_first_entry(&seqid->sequence->list, struct nfs_seqid, list) != seqid);
        switch (status) {
                case 0:
                        break;
                case -NFS4ERR_BAD_SEQID:
                        if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
                                return;
-                       printk(KERN_WARNING "NFS: v4 server returned a bad"
+                       pr_warn_ratelimited("NFS: v4 server returned a bad"
                                        " sequence-id error on an"
                                        " unconfirmed sequence %p!\n",
                                        seqid->sequence);
@@ -1040,10 +1033,11 @@ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
 
 int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
 {
-       struct rpc_sequence *sequence = seqid->sequence->sequence;
+       struct nfs_seqid_counter *sequence = seqid->sequence;
        int status = 0;
 
        spin_lock(&sequence->lock);
+       seqid->task = task;
        if (list_empty(&seqid->list))
                list_add_tail(&seqid->list, &sequence->list);
        if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
@@ -1072,19 +1066,28 @@ static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
 void nfs4_schedule_state_manager(struct nfs_client *clp)
 {
        struct task_struct *task;
+       char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
 
        if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
                return;
        __module_get(THIS_MODULE);
        atomic_inc(&clp->cl_count);
-       task = kthread_run(nfs4_run_state_manager, clp, "%s-manager",
-                               rpc_peeraddr2str(clp->cl_rpcclient,
-                                                       RPC_DISPLAY_ADDR));
-       if (!IS_ERR(task))
-               return;
-       nfs4_clear_state_manager_bit(clp);
-       nfs_put_client(clp);
-       module_put(THIS_MODULE);
+
+       /* The rcu_read_lock() is not strictly necessary, as the state
+        * manager is the only thread that ever changes the rpc_xprt
+        * after it's initialized.  At this point, we're single threaded. */
+       rcu_read_lock();
+       snprintf(buf, sizeof(buf), "%s-manager",
+                       rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+       rcu_read_unlock();
+       task = kthread_run(nfs4_run_state_manager, clp, buf);
+       if (IS_ERR(task)) {
+               printk(KERN_ERR "%s: kthread_run: %ld\n",
+                       __func__, PTR_ERR(task));
+               nfs4_clear_state_manager_bit(clp);
+               nfs_put_client(clp);
+               module_put(THIS_MODULE);
+       }
 }
 
 /*
@@ -1098,10 +1101,25 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
                set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
        nfs4_schedule_state_manager(clp);
 }
+EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
+
+/*
+ * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
+ * @clp: client to process
+ *
+ * Set the NFS4CLNT_LEASE_EXPIRED state in order to force a
+ * resend of the SETCLIENTID and hence re-establish the
+ * callback channel. Then return all existing delegations.
+ */
+static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
+{
+       set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+       nfs_expire_all_delegations(clp);
+}
 
 void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
 {
-       nfs_handle_cb_pathdown(clp);
+       nfs40_handle_cb_pathdown(clp);
        nfs4_schedule_state_manager(clp);
 }
 
@@ -1132,11 +1150,37 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
 {
        struct nfs_client *clp = server->nfs_client;
 
-       if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags))
-               nfs_async_inode_return_delegation(state->inode, &state->stateid);
        nfs4_state_mark_reclaim_nograce(clp, state);
        nfs4_schedule_state_manager(clp);
 }
+EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
+
+void nfs_inode_find_state_and_recover(struct inode *inode,
+               const nfs4_stateid *stateid)
+{
+       struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+       struct nfs_inode *nfsi = NFS_I(inode);
+       struct nfs_open_context *ctx;
+       struct nfs4_state *state;
+       bool found = false;
+
+       spin_lock(&inode->i_lock);
+       list_for_each_entry(ctx, &nfsi->open_files, list) {
+               state = ctx->state;
+               if (state == NULL)
+                       continue;
+               if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+                       continue;
+               if (!nfs4_stateid_match(&state->stateid, stateid))
+                       continue;
+               nfs4_state_mark_reclaim_nograce(clp, state);
+               found = true;
+       }
+       spin_unlock(&inode->i_lock);
+       if (found)
+               nfs4_schedule_state_manager(clp);
+}
+
 
 static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
 {
@@ -1175,8 +1219,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
                        case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
                                goto out;
                        default:
-                               printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
-                                               __func__, status);
+                               printk(KERN_ERR "NFS: %s: unhandled error %d. "
+                                       "Zeroing state\n", __func__, status);
                        case -ENOMEM:
                        case -NFS4ERR_DENIED:
                        case -NFS4ERR_RECLAIM_BAD:
@@ -1222,8 +1266,9 @@ restart:
                                spin_lock(&state->state_lock);
                                list_for_each_entry(lock, &state->lock_states, ls_locks) {
                                        if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
-                                               printk("%s: Lock reclaim failed!\n",
-                                                       __func__);
+                                               pr_warn_ratelimited("NFS: "
+                                                       "%s: Lock reclaim "
+                                                       "failed!\n", __func__);
                                }
                                spin_unlock(&state->state_lock);
                                nfs4_put_open_state(state);
@@ -1232,8 +1277,8 @@ restart:
                }
                switch (status) {
                        default:
-                               printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
-                                               __func__, status);
+                               printk(KERN_ERR "NFS: %s: unhandled error %d. "
+                                       "Zeroing state\n", __func__, status);
                        case -ENOENT:
                        case -ENOMEM:
                        case -ESTALE:
@@ -1241,8 +1286,8 @@ restart:
                                 * Open state on this file cannot be recovered
                                 * All we can do is revert to using the zero stateid.
                                 */
-                               memset(state->stateid.data, 0,
-                                       sizeof(state->stateid.data));
+                               memset(&state->stateid, 0,
+                                       sizeof(state->stateid));
                                /* Mark the file as being 'closed' */
                                state->state = 0;
                                break;
@@ -1420,7 +1465,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
                case 0:
                        break;
                case -NFS4ERR_CB_PATH_DOWN:
-                       nfs_handle_cb_pathdown(clp);
+                       nfs40_handle_cb_pathdown(clp);
                        break;
                case -NFS4ERR_NO_GRACE:
                        nfs4_state_end_reclaim_reboot(clp);
@@ -1801,7 +1846,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
        } while (atomic_read(&clp->cl_count) > 1);
        return;
 out_error:
-       printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
+       pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s"
                        " with error %d\n", clp->cl_hostname, -status);
        nfs4_end_drain_session(clp);
        nfs4_clear_state_manager_bit(clp);
index 33bd8d0f745d8baaa11b41fc3fcffde52ee3f02a..c74fdb114b48af141a719d1facd11ed249c5f5d1 100644 (file)
@@ -44,6 +44,8 @@
 #include <linux/pagemap.h>
 #include <linux/proc_fs.h>
 #include <linux/kdev_t.h>
+#include <linux/module.h>
+#include <linux/utsname.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/gss_api.h>
@@ -271,7 +273,12 @@ static int nfs4_stat_to_errno(int);
                                1 /* flags */ + \
                                1 /* spa_how */ + \
                                0 /* SP4_NONE (for now) */ + \
-                               1 /* zero implemetation id array */)
+                               1 /* implementation id array of size 1 */ + \
+                               1 /* nii_domain */ + \
+                               XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
+                               1 /* nii_name */ + \
+                               XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
+                               3 /* nii_date */)
 #define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \
                                2 /* eir_clientid */ + \
                                1 /* eir_sequenceid */ + \
@@ -284,7 +291,11 @@ static int nfs4_stat_to_errno(int);
                                /* eir_server_scope<> */ \
                                XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
                                1 /* eir_server_impl_id array length */ + \
-                               0 /* ignored eir_server_impl_id contents */)
+                               1 /* nii_domain */ + \
+                               XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
+                               1 /* nii_name */ + \
+                               XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
+                               3 /* nii_date */)
 #define encode_channel_attrs_maxsz  (6 + 1 /* ca_rdma_ird.len (0) */)
 #define decode_channel_attrs_maxsz  (6 + \
                                     1 /* ca_rdma_ird.len */ + \
@@ -838,6 +849,12 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
                                    XDR_UNIT);
 #endif /* CONFIG_NFS_V4_1 */
 
+static unsigned short send_implementation_id = 1;
+
+module_param(send_implementation_id, ushort, 0644);
+MODULE_PARM_DESC(send_implementation_id,
+               "Send implementation ID with NFSv4.1 exchange_id");
+
 static const umode_t nfs_type2fmt[] = {
        [NF4BAD] = 0,
        [NF4REG] = S_IFREG,
@@ -868,15 +885,44 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
        return p;
 }
 
+static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
+{
+       __be32 *p;
+
+       p = xdr_reserve_space(xdr, len);
+       xdr_encode_opaque_fixed(p, buf, len);
+}
+
 static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
 {
        __be32 *p;
 
-       p = xdr_reserve_space(xdr, 4 + len);
-       BUG_ON(p == NULL);
+       p = reserve_space(xdr, 4 + len);
        xdr_encode_opaque(p, str, len);
 }
 
+static void encode_uint32(struct xdr_stream *xdr, u32 n)
+{
+       __be32 *p;
+
+       p = reserve_space(xdr, 4);
+       *p = cpu_to_be32(n);
+}
+
+static void encode_uint64(struct xdr_stream *xdr, u64 n)
+{
+       __be32 *p;
+
+       p = reserve_space(xdr, 8);
+       xdr_encode_hyper(p, n);
+}
+
+static void encode_nfs4_seqid(struct xdr_stream *xdr,
+               const struct nfs_seqid *seqid)
+{
+       encode_uint32(xdr, seqid->sequence->counter);
+}
+
 static void encode_compound_hdr(struct xdr_stream *xdr,
                                struct rpc_rqst *req,
                                struct compound_hdr *hdr)
@@ -889,28 +935,37 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
         * but this is not required as a MUST for the server to do so. */
        hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen;
 
-       dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
        BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
-       p = reserve_space(xdr, 4 + hdr->taglen + 8);
-       p = xdr_encode_opaque(p, hdr->tag, hdr->taglen);
+       encode_string(xdr, hdr->taglen, hdr->tag);
+       p = reserve_space(xdr, 8);
        *p++ = cpu_to_be32(hdr->minorversion);
        hdr->nops_p = p;
        *p = cpu_to_be32(hdr->nops);
 }
 
+static void encode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 op,
+               uint32_t replen,
+               struct compound_hdr *hdr)
+{
+       encode_uint32(xdr, op);
+       hdr->nops++;
+       hdr->replen += replen;
+}
+
 static void encode_nops(struct compound_hdr *hdr)
 {
        BUG_ON(hdr->nops > NFS4_MAX_OPS);
        *hdr->nops_p = htonl(hdr->nops);
 }
 
-static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
+static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid)
 {
-       __be32 *p;
+       encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
+}
 
-       p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
-       BUG_ON(p == NULL);
-       xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE);
+static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
+{
+       encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE);
 }
 
 static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server)
@@ -1023,7 +1078,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
         * Now we backfill the bitmap and the attribute buffer length.
         */
        if (len != ((char *)p - (char *)q) + 4) {
-               printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n",
+               printk(KERN_ERR "NFS: Attr length error, %u != %Zu\n",
                                len, ((char *)p - (char *)q) + 4);
                BUG();
        }
@@ -1037,46 +1092,33 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
 
 static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 8);
-       *p++ = cpu_to_be32(OP_ACCESS);
-       *p = cpu_to_be32(access);
-       hdr->nops++;
-       hdr->replen += decode_access_maxsz;
+       encode_op_hdr(xdr, OP_ACCESS, decode_access_maxsz, hdr);
+       encode_uint32(xdr, access);
 }
 
 static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
-       *p++ = cpu_to_be32(OP_CLOSE);
-       *p++ = cpu_to_be32(arg->seqid->sequence->counter);
-       xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
-       hdr->nops++;
-       hdr->replen += decode_close_maxsz;
+       encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr);
+       encode_nfs4_seqid(xdr, arg->seqid);
+       encode_nfs4_stateid(xdr, arg->stateid);
 }
 
 static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 16);
-       *p++ = cpu_to_be32(OP_COMMIT);
+       encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr);
+       p = reserve_space(xdr, 12);
        p = xdr_encode_hyper(p, args->offset);
        *p = cpu_to_be32(args->count);
-       hdr->nops++;
-       hdr->replen += decode_commit_maxsz;
 }
 
 static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr)
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 8);
-       *p++ = cpu_to_be32(OP_CREATE);
-       *p = cpu_to_be32(create->ftype);
+       encode_op_hdr(xdr, OP_CREATE, decode_create_maxsz, hdr);
+       encode_uint32(xdr, create->ftype);
 
        switch (create->ftype) {
        case NF4LNK:
@@ -1096,9 +1138,6 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
        }
 
        encode_string(xdr, create->name->len, create->name->name);
-       hdr->nops++;
-       hdr->replen += decode_create_maxsz;
-
        encode_attrs(xdr, create->attrs, create->server);
 }
 
@@ -1106,25 +1145,21 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 12);
-       *p++ = cpu_to_be32(OP_GETATTR);
+       encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
+       p = reserve_space(xdr, 8);
        *p++ = cpu_to_be32(1);
        *p = cpu_to_be32(bitmap);
-       hdr->nops++;
-       hdr->replen += decode_getattr_maxsz;
 }
 
 static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr)
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 16);
-       *p++ = cpu_to_be32(OP_GETATTR);
+       encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
+       p = reserve_space(xdr, 12);
        *p++ = cpu_to_be32(2);
        *p++ = cpu_to_be32(bm0);
        *p = cpu_to_be32(bm1);
-       hdr->nops++;
-       hdr->replen += decode_getattr_maxsz;
 }
 
 static void
@@ -1134,8 +1169,7 @@ encode_getattr_three(struct xdr_stream *xdr,
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 4);
-       *p = cpu_to_be32(OP_GETATTR);
+       encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
        if (bm2) {
                p = reserve_space(xdr, 16);
                *p++ = cpu_to_be32(3);
@@ -1152,8 +1186,6 @@ encode_getattr_three(struct xdr_stream *xdr,
                *p++ = cpu_to_be32(1);
                *p = cpu_to_be32(bm0);
        }
-       hdr->nops++;
-       hdr->replen += decode_getattr_maxsz;
 }
 
 static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
@@ -1179,23 +1211,13 @@ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, stru
 
 static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 4);
-       *p = cpu_to_be32(OP_GETFH);
-       hdr->nops++;
-       hdr->replen += decode_getfh_maxsz;
+       encode_op_hdr(xdr, OP_GETFH, decode_getfh_maxsz, hdr);
 }
 
 static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 8 + name->len);
-       *p++ = cpu_to_be32(OP_LINK);
-       xdr_encode_opaque(p, name->name, name->len);
-       hdr->nops++;
-       hdr->replen += decode_link_maxsz;
+       encode_op_hdr(xdr, OP_LINK, decode_link_maxsz, hdr);
+       encode_string(xdr, name->len, name->name);
 }
 
 static inline int nfs4_lock_type(struct file_lock *fl, int block)
@@ -1232,79 +1254,60 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 32);
-       *p++ = cpu_to_be32(OP_LOCK);
+       encode_op_hdr(xdr, OP_LOCK, decode_lock_maxsz, hdr);
+       p = reserve_space(xdr, 28);
        *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block));
        *p++ = cpu_to_be32(args->reclaim);
        p = xdr_encode_hyper(p, args->fl->fl_start);
        p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
        *p = cpu_to_be32(args->new_lock_owner);
        if (args->new_lock_owner){
-               p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
-               *p++ = cpu_to_be32(args->open_seqid->sequence->counter);
-               p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
-               *p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
+               encode_nfs4_seqid(xdr, args->open_seqid);
+               encode_nfs4_stateid(xdr, args->open_stateid);
+               encode_nfs4_seqid(xdr, args->lock_seqid);
                encode_lockowner(xdr, &args->lock_owner);
        }
        else {
-               p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
-               p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
-               *p = cpu_to_be32(args->lock_seqid->sequence->counter);
+               encode_nfs4_stateid(xdr, args->lock_stateid);
+               encode_nfs4_seqid(xdr, args->lock_seqid);
        }
-       hdr->nops++;
-       hdr->replen += decode_lock_maxsz;
 }
 
 static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr)
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 24);
-       *p++ = cpu_to_be32(OP_LOCKT);
+       encode_op_hdr(xdr, OP_LOCKT, decode_lockt_maxsz, hdr);
+       p = reserve_space(xdr, 20);
        *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
        p = xdr_encode_hyper(p, args->fl->fl_start);
        p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
        encode_lockowner(xdr, &args->lock_owner);
-       hdr->nops++;
-       hdr->replen += decode_lockt_maxsz;
 }
 
 static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr)
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16);
-       *p++ = cpu_to_be32(OP_LOCKU);
-       *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
-       *p++ = cpu_to_be32(args->seqid->sequence->counter);
-       p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+       encode_op_hdr(xdr, OP_LOCKU, decode_locku_maxsz, hdr);
+       encode_uint32(xdr, nfs4_lock_type(args->fl, 0));
+       encode_nfs4_seqid(xdr, args->seqid);
+       encode_nfs4_stateid(xdr, args->stateid);
+       p = reserve_space(xdr, 16);
        p = xdr_encode_hyper(p, args->fl->fl_start);
        xdr_encode_hyper(p, nfs4_lock_length(args->fl));
-       hdr->nops++;
-       hdr->replen += decode_locku_maxsz;
 }
 
 static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 4);
-       *p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
+       encode_op_hdr(xdr, OP_RELEASE_LOCKOWNER, decode_release_lockowner_maxsz, hdr);
        encode_lockowner(xdr, lowner);
-       hdr->nops++;
-       hdr->replen += decode_release_lockowner_maxsz;
 }
 
 static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
 {
-       int len = name->len;
-       __be32 *p;
-
-       p = reserve_space(xdr, 8 + len);
-       *p++ = cpu_to_be32(OP_LOOKUP);
-       xdr_encode_opaque(p, name->name, len);
-       hdr->nops++;
-       hdr->replen += decode_lookup_maxsz;
+       encode_op_hdr(xdr, OP_LOOKUP, decode_lookup_maxsz, hdr);
+       encode_string(xdr, name->len, name->name);
 }
 
 static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
@@ -1335,9 +1338,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
  * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
  * owner 4 = 32
  */
-       p = reserve_space(xdr, 8);
-       *p++ = cpu_to_be32(OP_OPEN);
-       *p = cpu_to_be32(arg->seqid->sequence->counter);
+       encode_nfs4_seqid(xdr, arg->seqid);
        encode_share_access(xdr, arg->fmode);
        p = reserve_space(xdr, 32);
        p = xdr_encode_hyper(p, arg->clientid);
@@ -1437,14 +1438,15 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
-       *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
-       xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
+       p = reserve_space(xdr, 4);
+       *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
+       encode_nfs4_stateid(xdr, stateid);
        encode_string(xdr, name->len, name->name);
 }
 
 static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr)
 {
+       encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr);
        encode_openhdr(xdr, arg);
        encode_opentype(xdr, arg);
        switch (arg->claim) {
@@ -1460,88 +1462,64 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg,
        default:
                BUG();
        }
-       hdr->nops++;
-       hdr->replen += decode_open_maxsz;
 }
 
 static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
-       *p++ = cpu_to_be32(OP_OPEN_CONFIRM);
-       p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
-       *p = cpu_to_be32(arg->seqid->sequence->counter);
-       hdr->nops++;
-       hdr->replen += decode_open_confirm_maxsz;
+       encode_op_hdr(xdr, OP_OPEN_CONFIRM, decode_open_confirm_maxsz, hdr);
+       encode_nfs4_stateid(xdr, arg->stateid);
+       encode_nfs4_seqid(xdr, arg->seqid);
 }
 
 static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
-       *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
-       p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
-       *p = cpu_to_be32(arg->seqid->sequence->counter);
+       encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr);
+       encode_nfs4_stateid(xdr, arg->stateid);
+       encode_nfs4_seqid(xdr, arg->seqid);
        encode_share_access(xdr, arg->fmode);
-       hdr->nops++;
-       hdr->replen += decode_open_downgrade_maxsz;
 }
 
 static void
 encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr)
 {
-       int len = fh->size;
-       __be32 *p;
-
-       p = reserve_space(xdr, 8 + len);
-       *p++ = cpu_to_be32(OP_PUTFH);
-       xdr_encode_opaque(p, fh->data, len);
-       hdr->nops++;
-       hdr->replen += decode_putfh_maxsz;
+       encode_op_hdr(xdr, OP_PUTFH, decode_putfh_maxsz, hdr);
+       encode_string(xdr, fh->size, fh->data);
 }
 
 static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 4);
-       *p = cpu_to_be32(OP_PUTROOTFH);
-       hdr->nops++;
-       hdr->replen += decode_putrootfh_maxsz;
+       encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
 }
 
-static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid)
+static void encode_open_stateid(struct xdr_stream *xdr,
+               const struct nfs_open_context *ctx,
+               const struct nfs_lock_context *l_ctx,
+               fmode_t fmode,
+               int zero_seqid)
 {
        nfs4_stateid stateid;
-       __be32 *p;
 
-       p = reserve_space(xdr, NFS4_STATEID_SIZE);
        if (ctx->state != NULL) {
-               nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
+               nfs4_select_rw_stateid(&stateid, ctx->state,
+                               fmode, l_ctx->lockowner, l_ctx->pid);
                if (zero_seqid)
-                       stateid.stateid.seqid = 0;
-               xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
+                       stateid.seqid = 0;
+               encode_nfs4_stateid(xdr, &stateid);
        } else
-               xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
+               encode_nfs4_stateid(xdr, &zero_stateid);
 }
 
 static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 4);
-       *p = cpu_to_be32(OP_READ);
-
-       encode_stateid(xdr, args->context, args->lock_context,
-                      hdr->minorversion);
+       encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr);
+       encode_open_stateid(xdr, args->context, args->lock_context,
+                       FMODE_READ, hdr->minorversion);
 
        p = reserve_space(xdr, 12);
        p = xdr_encode_hyper(p, args->offset);
        *p = cpu_to_be32(args->count);
-       hdr->nops++;
-       hdr->replen += decode_read_maxsz;
 }
 
 static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
@@ -1551,7 +1529,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
                FATTR4_WORD1_MOUNTED_ON_FILEID,
        };
        uint32_t dircount = readdir->count >> 1;
-       __be32 *p;
+       __be32 *p, verf[2];
 
        if (readdir->plus) {
                attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
@@ -1566,80 +1544,54 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
        if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
                attrs[0] |= FATTR4_WORD0_FILEID;
 
-       p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
-       *p++ = cpu_to_be32(OP_READDIR);
-       p = xdr_encode_hyper(p, readdir->cookie);
-       p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
+       encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr);
+       encode_uint64(xdr, readdir->cookie);
+       encode_nfs4_verifier(xdr, &readdir->verifier);
+       p = reserve_space(xdr, 20);
        *p++ = cpu_to_be32(dircount);
        *p++ = cpu_to_be32(readdir->count);
        *p++ = cpu_to_be32(2);
 
        *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
        *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
-       hdr->nops++;
-       hdr->replen += decode_readdir_maxsz;
+       memcpy(verf, readdir->verifier.data, sizeof(verf));
        dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
                        __func__,
                        (unsigned long long)readdir->cookie,
-                       ((u32 *)readdir->verifier.data)[0],
-                       ((u32 *)readdir->verifier.data)[1],
+                       verf[0], verf[1],
                        attrs[0] & readdir->bitmask[0],
                        attrs[1] & readdir->bitmask[1]);
 }
 
 static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 4);
-       *p = cpu_to_be32(OP_READLINK);
-       hdr->nops++;
-       hdr->replen += decode_readlink_maxsz;
+       encode_op_hdr(xdr, OP_READLINK, decode_readlink_maxsz, hdr);
 }
 
 static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 8 + name->len);
-       *p++ = cpu_to_be32(OP_REMOVE);
-       xdr_encode_opaque(p, name->name, name->len);
-       hdr->nops++;
-       hdr->replen += decode_remove_maxsz;
+       encode_op_hdr(xdr, OP_REMOVE, decode_remove_maxsz, hdr);
+       encode_string(xdr, name->len, name->name);
 }
 
 static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 4);
-       *p = cpu_to_be32(OP_RENAME);
+       encode_op_hdr(xdr, OP_RENAME, decode_rename_maxsz, hdr);
        encode_string(xdr, oldname->len, oldname->name);
        encode_string(xdr, newname->len, newname->name);
-       hdr->nops++;
-       hdr->replen += decode_rename_maxsz;
 }
 
-static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr)
+static void encode_renew(struct xdr_stream *xdr, clientid4 clid,
+                        struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 12);
-       *p++ = cpu_to_be32(OP_RENEW);
-       xdr_encode_hyper(p, client_stateid->cl_clientid);
-       hdr->nops++;
-       hdr->replen += decode_renew_maxsz;
+       encode_op_hdr(xdr, OP_RENEW, decode_renew_maxsz, hdr);
+       encode_uint64(xdr, clid);
 }
 
 static void
 encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 4);
-       *p = cpu_to_be32(OP_RESTOREFH);
-       hdr->nops++;
-       hdr->replen += decode_restorefh_maxsz;
+       encode_op_hdr(xdr, OP_RESTOREFH, decode_restorefh_maxsz, hdr);
 }
 
 static void
@@ -1647,9 +1599,8 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
-       *p++ = cpu_to_be32(OP_SETATTR);
-       xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
+       encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr);
+       encode_nfs4_stateid(xdr, &zero_stateid);
        p = reserve_space(xdr, 2*4);
        *p++ = cpu_to_be32(1);
        *p = cpu_to_be32(FATTR4_WORD0_ACL);
@@ -1657,30 +1608,18 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
        p = reserve_space(xdr, 4);
        *p = cpu_to_be32(arg->acl_len);
        xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
-       hdr->nops++;
-       hdr->replen += decode_setacl_maxsz;
 }
 
 static void
 encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 4);
-       *p = cpu_to_be32(OP_SAVEFH);
-       hdr->nops++;
-       hdr->replen += decode_savefh_maxsz;
+       encode_op_hdr(xdr, OP_SAVEFH, decode_savefh_maxsz, hdr);
 }
 
 static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
-       *p++ = cpu_to_be32(OP_SETATTR);
-       xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
-       hdr->nops++;
-       hdr->replen += decode_setattr_maxsz;
+       encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr);
+       encode_nfs4_stateid(xdr, &arg->stateid);
        encode_attrs(xdr, arg->iap, server);
 }
 
@@ -1688,9 +1627,8 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE);
-       *p++ = cpu_to_be32(OP_SETCLIENTID);
-       xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
+       encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr);
+       encode_nfs4_verifier(xdr, setclientid->sc_verifier);
 
        encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
        p = reserve_space(xdr, 4);
@@ -1699,31 +1637,23 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
        encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
        p = reserve_space(xdr, 4);
        *p = cpu_to_be32(setclientid->sc_cb_ident);
-       hdr->nops++;
-       hdr->replen += decode_setclientid_maxsz;
 }
 
 static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
-       *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
-       p = xdr_encode_hyper(p, arg->clientid);
-       xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE);
-       hdr->nops++;
-       hdr->replen += decode_setclientid_confirm_maxsz;
+       encode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM,
+                       decode_setclientid_confirm_maxsz, hdr);
+       encode_uint64(xdr, arg->clientid);
+       encode_nfs4_verifier(xdr, &arg->confirm);
 }
 
 static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 4);
-       *p = cpu_to_be32(OP_WRITE);
-
-       encode_stateid(xdr, args->context, args->lock_context,
-                      hdr->minorversion);
+       encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr);
+       encode_open_stateid(xdr, args->context, args->lock_context,
+                       FMODE_WRITE, hdr->minorversion);
 
        p = reserve_space(xdr, 16);
        p = xdr_encode_hyper(p, args->offset);
@@ -1731,32 +1661,18 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
        *p = cpu_to_be32(args->count);
 
        xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
-       hdr->nops++;
-       hdr->replen += decode_write_maxsz;
 }
 
 static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
-
-       *p++ = cpu_to_be32(OP_DELEGRETURN);
-       xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
-       hdr->nops++;
-       hdr->replen += decode_delegreturn_maxsz;
+       encode_op_hdr(xdr, OP_DELEGRETURN, decode_delegreturn_maxsz, hdr);
+       encode_nfs4_stateid(xdr, stateid);
 }
 
 static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
 {
-       int len = name->len;
-       __be32 *p;
-
-       p = reserve_space(xdr, 8 + len);
-       *p++ = cpu_to_be32(OP_SECINFO);
-       xdr_encode_opaque(p, name->name, len);
-       hdr->nops++;
-       hdr->replen += decode_secinfo_maxsz;
+       encode_op_hdr(xdr, OP_SECINFO, decode_secinfo_maxsz, hdr);
+       encode_string(xdr, name->len, name->name);
 }
 
 #if defined(CONFIG_NFS_V4_1)
@@ -1766,19 +1682,39 @@ static void encode_exchange_id(struct xdr_stream *xdr,
                               struct compound_hdr *hdr)
 {
        __be32 *p;
+       char impl_name[NFS4_OPAQUE_LIMIT];
+       int len = 0;
 
-       p = reserve_space(xdr, 4 + sizeof(args->verifier->data));
-       *p++ = cpu_to_be32(OP_EXCHANGE_ID);
-       xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data));
+       encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr);
+       encode_nfs4_verifier(xdr, args->verifier);
 
        encode_string(xdr, args->id_len, args->id);
 
        p = reserve_space(xdr, 12);
        *p++ = cpu_to_be32(args->flags);
        *p++ = cpu_to_be32(0);  /* zero length state_protect4_a */
-       *p = cpu_to_be32(0);    /* zero length implementation id array */
-       hdr->nops++;
-       hdr->replen += decode_exchange_id_maxsz;
+
+       if (send_implementation_id &&
+           sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 &&
+           sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN)
+               <= NFS4_OPAQUE_LIMIT + 1)
+               len = snprintf(impl_name, sizeof(impl_name), "%s %s %s %s",
+                              utsname()->sysname, utsname()->release,
+                              utsname()->version, utsname()->machine);
+
+       if (len > 0) {
+               *p = cpu_to_be32(1);    /* implementation id array length=1 */
+
+               encode_string(xdr,
+                       sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - 1,
+                       CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN);
+               encode_string(xdr, len, impl_name);
+               /* just send zeros for nii_date - the date is in nii_name */
+               p = reserve_space(xdr, 12);
+               p = xdr_encode_hyper(p, 0);
+               *p = cpu_to_be32(0);
+       } else
+               *p = cpu_to_be32(0);    /* implementation id array length=0 */
 }
 
 static void encode_create_session(struct xdr_stream *xdr,
@@ -1801,8 +1737,8 @@ static void encode_create_session(struct xdr_stream *xdr,
        len = scnprintf(machine_name, sizeof(machine_name), "%s",
                        clp->cl_ipaddr);
 
-       p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
-       *p++ = cpu_to_be32(OP_CREATE_SESSION);
+       encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr);
+       p = reserve_space(xdr, 16 + 2*28 + 20 + len + 12);
        p = xdr_encode_hyper(p, clp->cl_clientid);
        *p++ = cpu_to_be32(clp->cl_seqid);                      /*Sequence id */
        *p++ = cpu_to_be32(args->flags);                        /*flags */
@@ -1835,33 +1771,22 @@ static void encode_create_session(struct xdr_stream *xdr,
        *p++ = cpu_to_be32(0);                          /* UID */
        *p++ = cpu_to_be32(0);                          /* GID */
        *p = cpu_to_be32(0);                            /* No more gids */
-       hdr->nops++;
-       hdr->replen += decode_create_session_maxsz;
 }
 
 static void encode_destroy_session(struct xdr_stream *xdr,
                                   struct nfs4_session *session,
                                   struct compound_hdr *hdr)
 {
-       __be32 *p;
-       p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN);
-       *p++ = cpu_to_be32(OP_DESTROY_SESSION);
-       xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
-       hdr->nops++;
-       hdr->replen += decode_destroy_session_maxsz;
+       encode_op_hdr(xdr, OP_DESTROY_SESSION, decode_destroy_session_maxsz, hdr);
+       encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
 }
 
 static void encode_reclaim_complete(struct xdr_stream *xdr,
                                    struct nfs41_reclaim_complete_args *args,
                                    struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 8);
-       *p++ = cpu_to_be32(OP_RECLAIM_COMPLETE);
-       *p++ = cpu_to_be32(args->one_fs);
-       hdr->nops++;
-       hdr->replen += decode_reclaim_complete_maxsz;
+       encode_op_hdr(xdr, OP_RECLAIM_COMPLETE, decode_reclaim_complete_maxsz, hdr);
+       encode_uint32(xdr, args->one_fs);
 }
 #endif /* CONFIG_NFS_V4_1 */
 
@@ -1883,8 +1808,7 @@ static void encode_sequence(struct xdr_stream *xdr,
        WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
        slot = tp->slots + args->sa_slotid;
 
-       p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16);
-       *p++ = cpu_to_be32(OP_SEQUENCE);
+       encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr);
 
        /*
         * Sessionid + seqid + slotid + max slotid + cache_this
@@ -1898,13 +1822,12 @@ static void encode_sequence(struct xdr_stream *xdr,
                ((u32 *)session->sess_id.data)[3],
                slot->seq_nr, args->sa_slotid,
                tp->highest_used_slotid, args->sa_cache_this);
+       p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16);
        p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
        *p++ = cpu_to_be32(slot->seq_nr);
        *p++ = cpu_to_be32(args->sa_slotid);
        *p++ = cpu_to_be32(tp->highest_used_slotid);
        *p = cpu_to_be32(args->sa_cache_this);
-       hdr->nops++;
-       hdr->replen += decode_sequence_maxsz;
 #endif /* CONFIG_NFS_V4_1 */
 }
 
@@ -1919,14 +1842,12 @@ encode_getdevicelist(struct xdr_stream *xdr,
                .data = "dummmmmy",
        };
 
-       p = reserve_space(xdr, 20);
-       *p++ = cpu_to_be32(OP_GETDEVICELIST);
+       encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr);
+       p = reserve_space(xdr, 16);
        *p++ = cpu_to_be32(args->layoutclass);
        *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM);
        xdr_encode_hyper(p, 0ULL);                          /* cookie */
        encode_nfs4_verifier(xdr, &dummy);
-       hdr->nops++;
-       hdr->replen += decode_getdevicelist_maxsz;
 }
 
 static void
@@ -1936,15 +1857,13 @@ encode_getdeviceinfo(struct xdr_stream *xdr,
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE);
-       *p++ = cpu_to_be32(OP_GETDEVICEINFO);
+       encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr);
+       p = reserve_space(xdr, 12 + NFS4_DEVICEID4_SIZE);
        p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
                                    NFS4_DEVICEID4_SIZE);
        *p++ = cpu_to_be32(args->pdev->layout_type);
        *p++ = cpu_to_be32(args->pdev->pglen);          /* gdia_maxcount */
        *p++ = cpu_to_be32(0);                          /* bitmap length 0 */
-       hdr->nops++;
-       hdr->replen += decode_getdeviceinfo_maxsz;
 }
 
 static void
@@ -1954,16 +1873,16 @@ encode_layoutget(struct xdr_stream *xdr,
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
-       *p++ = cpu_to_be32(OP_LAYOUTGET);
+       encode_op_hdr(xdr, OP_LAYOUTGET, decode_layoutget_maxsz, hdr);
+       p = reserve_space(xdr, 36);
        *p++ = cpu_to_be32(0);     /* Signal layout available */
        *p++ = cpu_to_be32(args->type);
        *p++ = cpu_to_be32(args->range.iomode);
        p = xdr_encode_hyper(p, args->range.offset);
        p = xdr_encode_hyper(p, args->range.length);
        p = xdr_encode_hyper(p, args->minlength);
-       p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
-       *p = cpu_to_be32(args->maxcount);
+       encode_nfs4_stateid(xdr, &args->stateid);
+       encode_uint32(xdr, args->maxcount);
 
        dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
                __func__,
@@ -1972,8 +1891,6 @@ encode_layoutget(struct xdr_stream *xdr,
                (unsigned long)args->range.offset,
                (unsigned long)args->range.length,
                args->maxcount);
-       hdr->nops++;
-       hdr->replen += decode_layoutget_maxsz;
 }
 
 static int
@@ -1987,13 +1904,14 @@ encode_layoutcommit(struct xdr_stream *xdr,
        dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
                NFS_SERVER(args->inode)->pnfs_curr_ld->id);
 
-       p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
-       *p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
+       encode_op_hdr(xdr, OP_LAYOUTCOMMIT, decode_layoutcommit_maxsz, hdr);
+       p = reserve_space(xdr, 20);
        /* Only whole file layouts */
        p = xdr_encode_hyper(p, 0); /* offset */
        p = xdr_encode_hyper(p, args->lastbytewritten + 1);     /* length */
-       *p++ = cpu_to_be32(0); /* reclaim */
-       p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE);
+       *p = cpu_to_be32(0); /* reclaim */
+       encode_nfs4_stateid(xdr, &args->stateid);
+       p = reserve_space(xdr, 20);
        *p++ = cpu_to_be32(1); /* newoffset = TRUE */
        p = xdr_encode_hyper(p, args->lastbytewritten);
        *p++ = cpu_to_be32(0); /* Never send time_modify_changed */
@@ -2002,13 +1920,9 @@ encode_layoutcommit(struct xdr_stream *xdr,
        if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit)
                NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit(
                        NFS_I(inode)->layout, xdr, args);
-       else {
-               p = reserve_space(xdr, 4);
-               *p = cpu_to_be32(0); /* no layout-type payload */
-       }
+       else
+               encode_uint32(xdr, 0); /* no layout-type payload */
 
-       hdr->nops++;
-       hdr->replen += decode_layoutcommit_maxsz;
        return 0;
 }
 
@@ -2019,27 +1933,23 @@ encode_layoutreturn(struct xdr_stream *xdr,
 {
        __be32 *p;
 
-       p = reserve_space(xdr, 20);
-       *p++ = cpu_to_be32(OP_LAYOUTRETURN);
+       encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr);
+       p = reserve_space(xdr, 16);
        *p++ = cpu_to_be32(0);          /* reclaim. always 0 for now */
        *p++ = cpu_to_be32(args->layout_type);
        *p++ = cpu_to_be32(IOMODE_ANY);
        *p = cpu_to_be32(RETURN_FILE);
-       p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
+       p = reserve_space(xdr, 16);
        p = xdr_encode_hyper(p, 0);
        p = xdr_encode_hyper(p, NFS4_MAX_UINT64);
        spin_lock(&args->inode->i_lock);
-       xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
+       encode_nfs4_stateid(xdr, &args->stateid);
        spin_unlock(&args->inode->i_lock);
        if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) {
                NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn(
                        NFS_I(args->inode)->layout, xdr, args);
-       } else {
-               p = reserve_space(xdr, 4);
-               *p = cpu_to_be32(0);
-       }
-       hdr->nops++;
-       hdr->replen += decode_layoutreturn_maxsz;
+       } else
+               encode_uint32(xdr, 0);
 }
 
 static int
@@ -2047,12 +1957,8 @@ encode_secinfo_no_name(struct xdr_stream *xdr,
                       const struct nfs41_secinfo_no_name_args *args,
                       struct compound_hdr *hdr)
 {
-       __be32 *p;
-       p = reserve_space(xdr, 8);
-       *p++ = cpu_to_be32(OP_SECINFO_NO_NAME);
-       *p++ = cpu_to_be32(args->style);
-       hdr->nops++;
-       hdr->replen += decode_secinfo_no_name_maxsz;
+       encode_op_hdr(xdr, OP_SECINFO_NO_NAME, decode_secinfo_no_name_maxsz, hdr);
+       encode_uint32(xdr, args->style);
        return 0;
 }
 
@@ -2060,26 +1966,17 @@ static void encode_test_stateid(struct xdr_stream *xdr,
                                struct nfs41_test_stateid_args *args,
                                struct compound_hdr *hdr)
 {
-       __be32 *p;
-
-       p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE);
-       *p++ = cpu_to_be32(OP_TEST_STATEID);
-       *p++ = cpu_to_be32(1);
-       xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
-       hdr->nops++;
-       hdr->replen += decode_test_stateid_maxsz;
+       encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr);
+       encode_uint32(xdr, 1);
+       encode_nfs4_stateid(xdr, args->stateid);
 }
 
 static void encode_free_stateid(struct xdr_stream *xdr,
                                struct nfs41_free_stateid_args *args,
                                struct compound_hdr *hdr)
 {
-       __be32 *p;
-       p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE);
-       *p++ = cpu_to_be32(OP_FREE_STATEID);
-       xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
-       hdr->nops++;
-       hdr->replen += decode_free_stateid_maxsz;
+       encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr);
+       encode_nfs4_stateid(xdr, args->stateid);
 }
 #endif /* CONFIG_NFS_V4_1 */
 
@@ -2633,6 +2530,7 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
        encode_sequence(xdr, &args->seq_args, &hdr);
        encode_putfh(xdr, args->fhandle, &hdr);
        encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
+                          FATTR4_WORD0_FH_EXPIRE_TYPE|
                           FATTR4_WORD0_LINK_SUPPORT|
                           FATTR4_WORD0_SYMLINK_SUPPORT|
                           FATTR4_WORD0_ACLSUPPORT, &hdr);
@@ -2650,7 +2548,7 @@ static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
        };
 
        encode_compound_hdr(xdr, req, &hdr);
-       encode_renew(xdr, clp, &hdr);
+       encode_renew(xdr, clp->cl_clientid, &hdr);
        encode_nops(&hdr);
 }
 
@@ -3180,6 +3078,28 @@ out_overflow:
        return -EIO;
 }
 
+static int decode_attr_fh_expire_type(struct xdr_stream *xdr,
+                                     uint32_t *bitmap, uint32_t *type)
+{
+       __be32 *p;
+
+       *type = 0;
+       if (unlikely(bitmap[0] & (FATTR4_WORD0_FH_EXPIRE_TYPE - 1U)))
+               return -EIO;
+       if (likely(bitmap[0] & FATTR4_WORD0_FH_EXPIRE_TYPE)) {
+               p = xdr_inline_decode(xdr, 4);
+               if (unlikely(!p))
+                       goto out_overflow;
+               *type = be32_to_cpup(p);
+               bitmap[0] &= ~FATTR4_WORD0_FH_EXPIRE_TYPE;
+       }
+       dprintk("%s: expire type=0x%x\n", __func__, *type);
+       return 0;
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+       return -EIO;
+}
+
 static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
 {
        __be32 *p;
@@ -3513,16 +3433,17 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
        n = be32_to_cpup(p);
        if (n == 0)
                goto root_path;
-       dprintk("path ");
+       dprintk("pathname4: ");
        path->ncomponents = 0;
        while (path->ncomponents < n) {
                struct nfs4_string *component = &path->components[path->ncomponents];
                status = decode_opaque_inline(xdr, &component->len, &component->data);
                if (unlikely(status != 0))
                        goto out_eio;
-               if (path->ncomponents != n)
-                       dprintk("/");
-               dprintk("%s", component->data);
+               ifdebug (XDR)
+                       pr_cont("%s%.*s ",
+                               (path->ncomponents != n ? "/ " : ""),
+                               component->len, component->data);
                if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS)
                        path->ncomponents++;
                else {
@@ -3531,14 +3452,13 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
                }
        }
 out:
-       dprintk("\n");
        return status;
 root_path:
 /* a root pathname is sent as a zero component4 */
        path->ncomponents = 1;
        path->components[0].len=0;
        path->components[0].data=NULL;
-       dprintk("path /\n");
+       dprintk("pathname4: /\n");
        goto out;
 out_eio:
        dprintk(" status %d", status);
@@ -3560,7 +3480,11 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
        status = 0;
        if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS)))
                goto out;
-       dprintk("%s: fsroot ", __func__);
+       status = -EIO;
+       /* Ignore borken servers that return unrequested attrs */
+       if (unlikely(res == NULL))
+               goto out;
+       dprintk("%s: fsroot:\n", __func__);
        status = decode_pathname(xdr, &res->fs_path);
        if (unlikely(status != 0))
                goto out;
@@ -3581,7 +3505,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
                m = be32_to_cpup(p);
 
                loc->nservers = 0;
-               dprintk("%s: servers ", __func__);
+               dprintk("%s: servers:\n", __func__);
                while (loc->nservers < m) {
                        struct nfs4_string *server = &loc->servers[loc->nservers];
                        status = decode_opaque_inline(xdr, &server->len, &server->data);
@@ -3613,7 +3537,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
                        res->nlocations++;
        }
        if (res->nlocations != 0)
-               status = NFS_ATTR_FATTR_V4_REFERRAL;
+               status = NFS_ATTR_FATTR_V4_LOCATIONS;
 out:
        dprintk("%s: fs_locations done, error = %d\n", __func__, status);
        return status;
@@ -4157,7 +4081,7 @@ static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
 
 static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
 {
-       return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
+       return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
 }
 
 static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
@@ -4174,7 +4098,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
 
 static int decode_verifier(struct xdr_stream *xdr, void *verifier)
 {
-       return decode_opaque_fixed(xdr, verifier, 8);
+       return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
 }
 
 static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
@@ -4224,6 +4148,9 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
                goto xdr_error;
        if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0)
                goto xdr_error;
+       if ((status = decode_attr_fh_expire_type(xdr, bitmap,
+                                                &res->fh_expire_type)) != 0)
+               goto xdr_error;
        if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0)
                goto xdr_error;
        if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0)
@@ -4294,6 +4221,7 @@ xdr_error:
 
 static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
                struct nfs_fattr *fattr, struct nfs_fh *fh,
+               struct nfs4_fs_locations *fs_loc,
                const struct nfs_server *server)
 {
        int status;
@@ -4341,9 +4269,7 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
                goto xdr_error;
        fattr->valid |= status;
 
-       status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr,
-                                               struct nfs4_fs_locations,
-                                               fattr));
+       status = decode_attr_fs_locations(xdr, bitmap, fs_loc);
        if (status < 0)
                goto xdr_error;
        fattr->valid |= status;
@@ -4407,7 +4333,8 @@ xdr_error:
 }
 
 static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
-               struct nfs_fh *fh, const struct nfs_server *server)
+               struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc,
+               const struct nfs_server *server)
 {
        __be32 *savep;
        uint32_t attrlen,
@@ -4426,7 +4353,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat
        if (status < 0)
                goto xdr_error;
 
-       status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server);
+       status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server);
        if (status < 0)
                goto xdr_error;
 
@@ -4439,7 +4366,7 @@ xdr_error:
 static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
                const struct nfs_server *server)
 {
-       return decode_getfattr_generic(xdr, fattr, NULL, server);
+       return decode_getfattr_generic(xdr, fattr, NULL, NULL, server);
 }
 
 /*
@@ -4463,8 +4390,8 @@ static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
                return 0;
        }
        if (num > 1)
-               printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
-                       "per filesystem not supported\n", __func__);
+               printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
+                       "drivers per filesystem not supported\n", __func__);
 
        /* Decode and set first layout type, move xdr->p past unused types */
        p = xdr_inline_decode(xdr, num * 4);
@@ -4863,17 +4790,16 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
        size_t          hdrlen;
        u32             recvd, pglen = rcvbuf->page_len;
        int             status;
+       __be32          verf[2];
 
        status = decode_op_hdr(xdr, OP_READDIR);
        if (!status)
                status = decode_verifier(xdr, readdir->verifier.data);
        if (unlikely(status))
                return status;
+       memcpy(verf, readdir->verifier.data, sizeof(verf));
        dprintk("%s: verifier = %08x:%08x\n",
-                       __func__,
-                       ((u32 *)readdir->verifier.data)[0],
-                       ((u32 *)readdir->verifier.data)[1]);
-
+                       __func__, verf[0], verf[1]);
 
        hdrlen = (char *) xdr->p - (char *) iov->iov_base;
        recvd = rcvbuf->len - hdrlen;
@@ -5120,7 +5046,7 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
                goto out_overflow;
        res->count = be32_to_cpup(p++);
        res->verf->committed = be32_to_cpup(p++);
-       memcpy(res->verf->verifier, p, 8);
+       memcpy(res->verf->verifier, p, NFS4_VERIFIER_SIZE);
        return 0;
 out_overflow:
        print_overflow_msg(__func__, xdr);
@@ -5214,6 +5140,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
        char *dummy_str;
        int status;
        struct nfs_client *clp = res->client;
+       uint32_t impl_id_count;
 
        status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
        if (status)
@@ -5255,11 +5182,38 @@ static int decode_exchange_id(struct xdr_stream *xdr,
        memcpy(res->server_scope->server_scope, dummy_str, dummy);
        res->server_scope->server_scope_sz = dummy;
 
-       /* Throw away Implementation id array */
-       status = decode_opaque_inline(xdr, &dummy, &dummy_str);
-       if (unlikely(status))
-               return status;
+       /* Implementation Id */
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       impl_id_count = be32_to_cpup(p++);
+
+       if (impl_id_count) {
+               /* nii_domain */
+               status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+               if (unlikely(status))
+                       return status;
+               if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+                       return -EIO;
+               memcpy(res->impl_id->domain, dummy_str, dummy);
 
+               /* nii_name */
+               status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+               if (unlikely(status))
+                       return status;
+               if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+                       return -EIO;
+               memcpy(res->impl_id->name, dummy_str, dummy);
+
+               /* nii_date */
+               p = xdr_inline_decode(xdr, 12);
+               if (unlikely(!p))
+                       goto out_overflow;
+               p = xdr_decode_hyper(p, &res->impl_id->date.seconds);
+               res->impl_id->date.nseconds = be32_to_cpup(p);
+
+               /* if there's more than one entry, ignore the rest */
+       }
        return 0;
 out_overflow:
        print_overflow_msg(__func__, xdr);
@@ -5285,8 +5239,8 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
        attrs->max_reqs = be32_to_cpup(p++);
        nr_attrs = be32_to_cpup(p);
        if (unlikely(nr_attrs > 1)) {
-               printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n",
-                       __func__, nr_attrs);
+               printk(KERN_WARNING "NFS: %s: Invalid rdma channel attrs "
+                       "count %u\n", __func__, nr_attrs);
                return -EINVAL;
        }
        if (nr_attrs == 1) {
@@ -5436,14 +5390,14 @@ static int decode_getdevicelist(struct xdr_stream *xdr,
        p += 2;
 
        /* Read verifier */
-       p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8);
+       p = xdr_decode_opaque_fixed(p, verftemp.verifier, NFS4_VERIFIER_SIZE);
 
        res->num_devs = be32_to_cpup(p);
 
        dprintk("%s: num_dev %d\n", __func__, res->num_devs);
 
        if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) {
-               printk(KERN_ERR "%s too many result dev_num %u\n",
+               printk(KERN_ERR "NFS: %s too many result dev_num %u\n",
                                __func__, res->num_devs);
                return -EIO;
        }
@@ -5537,11 +5491,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
        status = decode_op_hdr(xdr, OP_LAYOUTGET);
        if (status)
                return status;
-       p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       res->return_on_close = be32_to_cpup(p);
+       decode_stateid(xdr, &res->stateid);
+       p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
                goto out_overflow;
-       res->return_on_close = be32_to_cpup(p++);
-       p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
        layout_count = be32_to_cpup(p);
        if (!layout_count) {
                dprintk("%s: server responded with empty layout array\n",
@@ -5666,7 +5623,8 @@ static int decode_test_stateid(struct xdr_stream *xdr,
        if (unlikely(!p))
                goto out_overflow;
        res->status = be32_to_cpup(p++);
-       return res->status;
+
+       return status;
 out_overflow:
        print_overflow_msg(__func__, xdr);
 out:
@@ -6583,8 +6541,9 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
        if (status)
                goto out;
        xdr_enter_page(xdr, PAGE_SIZE);
-       status = decode_getfattr(xdr, &res->fs_locations->fattr,
-                                res->fs_locations->server);
+       status = decode_getfattr_generic(xdr, &res->fs_locations->fattr,
+                                        NULL, res->fs_locations,
+                                        res->fs_locations->server);
 out:
        return status;
 }
@@ -6964,7 +6923,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
                goto out_overflow;
 
        if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
-                                       entry->server) < 0)
+                                 NULL, entry->server) < 0)
                goto out_overflow;
        if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
                entry->ino = entry->fattr->mounted_on_fileid;
@@ -7112,7 +7071,7 @@ struct rpc_procinfo       nfs4_procedures[] = {
 #endif /* CONFIG_NFS_V4_1 */
 };
 
-struct rpc_version             nfs_version4 = {
+const struct rpc_version nfs_version4 = {
        .number                 = 4,
        .nrprocs                = ARRAY_SIZE(nfs4_procedures),
        .procs                  = nfs4_procedures
index c4744e1d513c826545898e3310631c5f8153ae98..cd3c910d2d129ee687d197da97b00c9c0cb3cc13 100644 (file)
@@ -104,7 +104,7 @@ static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = "";
 /* server:export path string passed to super.c */
 static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = "";
 
-#ifdef RPC_DEBUG
+#ifdef NFS_DEBUG
 /*
  * When the "nfsrootdebug" kernel command line option is specified,
  * enable debugging messages for NFSROOT.
index 55d01280a6098264cc5e6d7133c72347e392d109..4bff4a3dab4602ffa8fe1f48df5d3adc3e8709c3 100644 (file)
@@ -137,6 +137,7 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
        struct objio_dev_ent *ode;
        struct osd_dev *od;
        struct osd_dev_info odi;
+       bool retry_flag = true;
        int err;
 
        ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id);
@@ -171,10 +172,18 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
                goto out;
        }
 
+retry_lookup:
        od = osduld_info_lookup(&odi);
        if (unlikely(IS_ERR(od))) {
                err = PTR_ERR(od);
                dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
+               if (err == -ENODEV && retry_flag) {
+                       err = objlayout_autologin(deviceaddr);
+                       if (likely(!err)) {
+                               retry_flag = false;
+                               goto retry_lookup;
+                       }
+               }
                goto out;
        }
 
@@ -205,25 +214,36 @@ static void copy_single_comp(struct ore_components *oc, unsigned c,
 int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
                       struct objio_segment **pseg)
 {
-       struct __alloc_objio_segment {
-               struct objio_segment olseg;
-               struct ore_dev *ods[numdevs];
-               struct ore_comp comps[numdevs];
-       } *aolseg;
-
-       aolseg = kzalloc(sizeof(*aolseg), gfp_flags);
-       if (unlikely(!aolseg)) {
+/*     This is the in memory structure of the objio_segment
+ *
+ *     struct __alloc_objio_segment {
+ *             struct objio_segment olseg;
+ *             struct ore_dev *ods[numdevs];
+ *             struct ore_comp comps[numdevs];
+ *     } *aolseg;
+ *     NOTE: The code as above compiles and runs perfectly. It is elegant,
+ *     type safe and compact. At some Past time Linus has decided he does not
+ *     like variable length arrays, For the sake of this principal we uglify
+ *     the code as below.
+ */
+       struct objio_segment *lseg;
+       size_t lseg_size = sizeof(*lseg) +
+                       numdevs * sizeof(lseg->oc.ods[0]) +
+                       numdevs * sizeof(*lseg->oc.comps);
+
+       lseg = kzalloc(lseg_size, gfp_flags);
+       if (unlikely(!lseg)) {
                dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__,
-                       numdevs, sizeof(*aolseg));
+                       numdevs, lseg_size);
                return -ENOMEM;
        }
 
-       aolseg->olseg.oc.numdevs = numdevs;
-       aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS;
-       aolseg->olseg.oc.comps = aolseg->comps;
-       aolseg->olseg.oc.ods = aolseg->ods;
+       lseg->oc.numdevs = numdevs;
+       lseg->oc.single_comp = EC_MULTPLE_COMPS;
+       lseg->oc.ods = (void *)(lseg + 1);
+       lseg->oc.comps = (void *)(lseg->oc.ods + numdevs);
 
-       *pseg = &aolseg->olseg;
+       *pseg = lseg;
        return 0;
 }
 
@@ -582,10 +602,10 @@ objlayout_init(void)
 
        if (ret)
                printk(KERN_INFO
-                       "%s: Registering OSD pNFS Layout Driver failed: error=%d\n",
+                       "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n",
                        __func__, ret);
        else
-               printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n",
+               printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n",
                        __func__);
        return ret;
 }
@@ -594,7 +614,7 @@ static void __exit
 objlayout_exit(void)
 {
        pnfs_unregister_layoutdriver(&objlayout_type);
-       printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n",
+       printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n",
               __func__);
 }
 
index b3c29039f5b893e69058cd404547d218cfa8dff8..8d45f1c318ce40ac453b7b4a71288e71ba3c6a34 100644 (file)
@@ -37,6 +37,9 @@
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/kmod.h>
+#include <linux/moduleparam.h>
+#include <linux/ratelimit.h>
 #include <scsi/osd_initiator.h>
 #include "objlayout.h"
 
@@ -156,7 +159,7 @@ last_byte_offset(u64 start, u64 len)
        return end > start ? end - 1 : NFS4_MAX_UINT64;
 }
 
-void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
+static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
                           struct page ***p_pages, unsigned *p_pgbase,
                           u64 offset, unsigned long count)
 {
@@ -490,9 +493,9 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p)
                        if (!ioerr->oer_errno)
                                continue;
 
-                       printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d "
-                               "dev(%llx:%llx) par=0x%llx obj=0x%llx "
-                               "offset=0x%llx length=0x%llx\n",
+                       printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
+                               "is_write=%d dev(%llx:%llx) par=0x%llx "
+                               "obj=0x%llx offset=0x%llx length=0x%llx\n",
                                __func__, i, ioerr->oer_errno,
                                ioerr->oer_iswrite,
                                _DEVID_LO(&ioerr->oer_component.oid_device_id),
@@ -651,3 +654,134 @@ void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
        __free_page(odi->page);
        kfree(odi);
 }
+
+enum {
+       OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
+       OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
+       OSD_LOGIN_UPCALL_PATHLEN  = 256
+};
+
+static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
+
+module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
+                   0600);
+MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
+
+struct __auto_login {
+       char uri[OBJLAYOUT_MAX_URI_LEN];
+       char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
+       char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
+};
+
+static int __objlayout_upcall(struct __auto_login *login)
+{
+       static char *envp[] = { "HOME=/",
+               "TERM=linux",
+               "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+               NULL
+       };
+       char *argv[8];
+       int ret;
+
+       if (unlikely(!osd_login_prog[0])) {
+               dprintk("%s: osd_login_prog is disabled\n", __func__);
+               return -EACCES;
+       }
+
+       dprintk("%s uri: %s\n", __func__, login->uri);
+       dprintk("%s osdname %s\n", __func__, login->osdname);
+       dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
+
+       argv[0] = (char *)osd_login_prog;
+       argv[1] = "-u";
+       argv[2] = login->uri;
+       argv[3] = "-o";
+       argv[4] = login->osdname;
+       argv[5] = "-s";
+       argv[6] = login->systemid_hex;
+       argv[7] = NULL;
+
+       ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+       /*
+        * Disable the upcall mechanism if we're getting an ENOENT or
+        * EACCES error. The admin can re-enable it on the fly by using
+        * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
+        * the problem has been fixed.
+        */
+       if (ret == -ENOENT || ret == -EACCES) {
+               printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
+                       "objlayoutdriver.osd_login_prog kernel parameter!\n",
+                       osd_login_prog);
+               osd_login_prog[0] = '\0';
+       }
+       dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
+
+       return ret;
+}
+
+/* Assume dest is all zeros */
+static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
+                                          char *dest, int max_len,
+                                          const char *var_name)
+{
+       if (!s.len)
+               return;
+
+       if (s.len >= max_len) {
+               pr_warn_ratelimited(
+                       "objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
+                       var_name, s.len, max_len);
+               s.len = max_len - 1; /* space for null terminator */
+       }
+
+       memcpy(dest, s.data, s.len);
+}
+
+/* Assume sysid is all zeros */
+static void _sysid_2_hex(struct nfs4_string s,
+                 char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
+{
+       int i;
+       char *cur;
+
+       if (!s.len)
+               return;
+
+       if (s.len != OSD_SYSTEMID_LEN) {
+               pr_warn_ratelimited(
+                   "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
+                   s.len);
+               if (s.len > OSD_SYSTEMID_LEN)
+                       s.len = OSD_SYSTEMID_LEN;
+       }
+
+       cur = sysid;
+       for (i = 0; i < s.len; i++)
+               cur = hex_byte_pack(cur, s.data[i]);
+}
+
+int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
+{
+       int rc;
+       struct __auto_login login;
+
+       if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
+               return -ENODEV;
+
+       memset(&login, 0, sizeof(login));
+       __copy_nfsS_and_zero_terminate(
+               deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
+               login.uri, sizeof(login.uri), "URI");
+
+       __copy_nfsS_and_zero_terminate(
+               deviceaddr->oda_osdname,
+               login.osdname, sizeof(login.osdname), "OSDNAME");
+
+       _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
+
+       rc = __objlayout_upcall(&login);
+       if (rc > 0) /* script returns positive values */
+               rc = -ENODEV;
+
+       return rc;
+}
index 8ec34727ed210fcf306376a0e7e9a7cf835ba435..880ba086be9499315d59d3957ed29d5e915f0ceb 100644 (file)
@@ -184,4 +184,6 @@ extern void objlayout_encode_layoutreturn(
        struct xdr_stream *,
        const struct nfs4_layoutreturn_args *);
 
+extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr);
+
 #endif /* _OBJLAYOUT_H */
index 5668f7c54c41e2d35ff1afd2c24a2bf786406c71..d21fceaa9f6263fecff450506653c21ba055872f 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/file.h>
 #include <linux/sched.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
 #include <linux/nfs3.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_page.h>
@@ -106,36 +107,6 @@ void nfs_unlock_request(struct nfs_page *req)
        nfs_release_request(req);
 }
 
-/**
- * nfs_set_page_tag_locked - Tag a request as locked
- * @req:
- */
-int nfs_set_page_tag_locked(struct nfs_page *req)
-{
-       if (!nfs_lock_request_dontget(req))
-               return 0;
-       if (test_bit(PG_MAPPED, &req->wb_flags))
-               radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
-       return 1;
-}
-
-/**
- * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
- */
-void nfs_clear_page_tag_locked(struct nfs_page *req)
-{
-       if (test_bit(PG_MAPPED, &req->wb_flags)) {
-               struct inode *inode = req->wb_context->dentry->d_inode;
-               struct nfs_inode *nfsi = NFS_I(inode);
-
-               spin_lock(&inode->i_lock);
-               radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
-               nfs_unlock_request(req);
-               spin_unlock(&inode->i_lock);
-       } else
-               nfs_unlock_request(req);
-}
-
 /*
  * nfs_clear_request - Free up all resources allocated to the request
  * @req:
@@ -425,67 +396,6 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
        }
 }
 
-#define NFS_SCAN_MAXENTRIES 16
-/**
- * nfs_scan_list - Scan a list for matching requests
- * @nfsi: NFS inode
- * @dst: Destination list
- * @idx_start: lower bound of page->index to scan
- * @npages: idx_start + npages sets the upper bound to scan.
- * @tag: tag to scan for
- *
- * Moves elements from one of the inode request lists.
- * If the number of requests is set to 0, the entire address_space
- * starting at index idx_start, is scanned.
- * The requests are *not* checked to ensure that they form a contiguous set.
- * You must be holding the inode's i_lock when calling this function
- */
-int nfs_scan_list(struct nfs_inode *nfsi,
-               struct list_head *dst, pgoff_t idx_start,
-               unsigned int npages, int tag)
-{
-       struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
-       struct nfs_page *req;
-       pgoff_t idx_end;
-       int found, i;
-       int res;
-       struct list_head *list;
-
-       res = 0;
-       if (npages == 0)
-               idx_end = ~0;
-       else
-               idx_end = idx_start + npages - 1;
-
-       for (;;) {
-               found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
-                               (void **)&pgvec[0], idx_start,
-                               NFS_SCAN_MAXENTRIES, tag);
-               if (found <= 0)
-                       break;
-               for (i = 0; i < found; i++) {
-                       req = pgvec[i];
-                       if (req->wb_index > idx_end)
-                               goto out;
-                       idx_start = req->wb_index + 1;
-                       if (nfs_set_page_tag_locked(req)) {
-                               kref_get(&req->wb_kref);
-                               radix_tree_tag_clear(&nfsi->nfs_page_tree,
-                                               req->wb_index, tag);
-                               list = pnfs_choose_commit_list(req, dst);
-                               nfs_list_add_request(req, list);
-                               res++;
-                               if (res == INT_MAX)
-                                       goto out;
-                       }
-               }
-               /* for latency reduction */
-               cond_resched_lock(&nfsi->vfs_inode.i_lock);
-       }
-out:
-       return res;
-}
-
 int __init nfs_init_nfspagecache(void)
 {
        nfs_page_cachep = kmem_cache_create("nfs_page",
index 17149a4900653af5326dfd6e0490e6e439b1426f..b5d4515869436dc6bd16a483590a433ac04c665c 100644 (file)
@@ -101,8 +101,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
                goto out_no_driver;
        if (!(server->nfs_client->cl_exchange_flags &
                 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
-               printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
-                      id, server->nfs_client->cl_exchange_flags);
+               printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
+                       __func__, id, server->nfs_client->cl_exchange_flags);
                goto out_no_driver;
        }
        ld_type = find_pnfs_driver(id);
@@ -122,8 +122,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
        server->pnfs_curr_ld = ld_type;
        if (ld_type->set_layoutdriver
            && ld_type->set_layoutdriver(server, mntfh)) {
-               printk(KERN_ERR "%s: Error initializing pNFS layout driver %u.\n",
-                               __func__, id);
+               printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
+                       "driver %u.\n", __func__, id);
                module_put(ld_type->owner);
                goto out_no_driver;
        }
@@ -143,11 +143,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
        struct pnfs_layoutdriver_type *tmp;
 
        if (ld_type->id == 0) {
-               printk(KERN_ERR "%s id 0 is reserved\n", __func__);
+               printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__);
                return status;
        }
        if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
-               printk(KERN_ERR "%s Layout driver must provide "
+               printk(KERN_ERR "NFS: %s Layout driver must provide "
                       "alloc_lseg and free_lseg.\n", __func__);
                return status;
        }
@@ -160,7 +160,7 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
                dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
                        ld_type->name);
        } else {
-               printk(KERN_ERR "%s Module with id %d already loaded!\n",
+               printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n",
                        __func__, ld_type->id);
        }
        spin_unlock(&pnfs_spinlock);
@@ -496,12 +496,12 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
 {
        u32 oldseq, newseq;
 
-       oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
-       newseq = be32_to_cpu(new->stateid.seqid);
+       oldseq = be32_to_cpu(lo->plh_stateid.seqid);
+       newseq = be32_to_cpu(new->seqid);
        if ((int)(newseq - oldseq) > 0) {
-               memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
+               nfs4_stateid_copy(&lo->plh_stateid, new);
                if (update_barrier) {
-                       u32 new_barrier = be32_to_cpu(new->stateid.seqid);
+                       u32 new_barrier = be32_to_cpu(new->seqid);
 
                        if ((int)(new_barrier - lo->plh_barrier))
                                lo->plh_barrier = new_barrier;
@@ -525,7 +525,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
                        int lget)
 {
        if ((stateid) &&
-           (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
+           (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0)
                return true;
        return lo->plh_block_lgets ||
                test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
@@ -549,11 +549,10 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 
                do {
                        seq = read_seqbegin(&open_state->seqlock);
-                       memcpy(dst->data, open_state->stateid.data,
-                              sizeof(open_state->stateid.data));
+                       nfs4_stateid_copy(dst, &open_state->stateid);
                } while (read_seqretry(&open_state->seqlock, seq));
        } else
-               memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
+               nfs4_stateid_copy(dst, &lo->plh_stateid);
        spin_unlock(&lo->plh_inode->i_lock);
        dprintk("<-- %s\n", __func__);
        return status;
@@ -590,7 +589,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
        max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
        max_pages = max_resp_sz >> PAGE_SHIFT;
 
-       pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
+       pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
        if (!pages)
                goto out_err_free;
 
@@ -760,7 +759,7 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
                }
        if (!found) {
                struct pnfs_layout_hdr *lo = nfsi->layout;
-               u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
+               u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
 
                /* Since close does not return a layout stateid for use as
                 * a barrier, we choose the worst-case barrier.
@@ -966,8 +965,7 @@ pnfs_update_layout(struct inode *ino,
        }
 
        /* Do we even need to bother with this? */
-       if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
-           test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+       if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
                dprintk("%s matches recall, use MDS\n", __func__);
                goto out_unlock;
        }
@@ -1032,7 +1030,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
        struct nfs4_layoutget_res *res = &lgp->res;
        struct pnfs_layout_segment *lseg;
        struct inode *ino = lo->plh_inode;
-       struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
        int status = 0;
 
        /* Inject layout blob into I/O device driver */
@@ -1048,8 +1045,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
        }
 
        spin_lock(&ino->i_lock);
-       if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
-           test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+       if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
                dprintk("%s forget reply due to recall\n", __func__);
                goto out_forget_reply;
        }
@@ -1214,6 +1210,7 @@ void pnfs_ld_write_done(struct nfs_write_data *data)
                }
                data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
        }
+       put_lseg(data->lseg);
        data->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
@@ -1227,6 +1224,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
                nfs_list_add_request(data->req, &desc->pg_list);
        nfs_pageio_reset_write_mds(desc);
        desc->pg_recoalesce = 1;
+       put_lseg(data->lseg);
        nfs_writedata_release(data);
 }
 
@@ -1327,6 +1325,7 @@ void pnfs_ld_read_done(struct nfs_read_data *data)
                data->mds_ops->rpc_call_done(&data->task, data);
        } else
                pnfs_ld_handle_read_error(data);
+       put_lseg(data->lseg);
        data->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
@@ -1530,8 +1529,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
        end_pos = nfsi->layout->plh_lwb;
        nfsi->layout->plh_lwb = 0;
 
-       memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data,
-               sizeof(nfsi->layout->plh_stateid.data));
+       nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
        spin_unlock(&inode->i_lock);
 
        data->args.inode = inode;
index 53d593a0a4f265a69c9f4fbc5d2ccb759291686a..442ebf68eeecf51dfaa6b8835318b53010eefe19 100644 (file)
@@ -94,11 +94,10 @@ struct pnfs_layoutdriver_type {
        const struct nfs_pageio_ops *pg_read_ops;
        const struct nfs_pageio_ops *pg_write_ops;
 
-       /* Returns true if layoutdriver wants to divert this request to
-        * driver's commit routine.
-        */
-       bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg);
-       struct list_head * (*choose_commit_list) (struct nfs_page *req);
+       void (*mark_request_commit) (struct nfs_page *req,
+                                       struct pnfs_layout_segment *lseg);
+       void (*clear_request_commit) (struct nfs_page *req);
+       int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock);
        int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
 
        /*
@@ -229,7 +228,6 @@ struct nfs4_deviceid_node {
        atomic_t                        ref;
 };
 
-void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
 struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
 void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
 void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
@@ -262,20 +260,6 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
        return nfss->pnfs_curr_ld != NULL;
 }
 
-static inline void
-pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
-{
-       if (lseg) {
-               struct pnfs_layoutdriver_type *ld;
-
-               ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld;
-               if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) {
-                       set_bit(PG_PNFS_COMMIT, &req->wb_flags);
-                       req->wb_commit_lseg = get_lseg(lseg);
-               }
-       }
-}
-
 static inline int
 pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
 {
@@ -284,27 +268,42 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
        return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
 }
 
-static inline struct list_head *
-pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
+static inline bool
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
 {
-       struct list_head *rv;
+       struct inode *inode = req->wb_context->dentry->d_inode;
+       struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 
-       if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) {
-               struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode;
+       if (lseg == NULL || ld->mark_request_commit == NULL)
+               return false;
+       ld->mark_request_commit(req, lseg);
+       return true;
+}
 
-               set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
-               rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req);
-               /* matched by ref taken when PG_PNFS_COMMIT is set */
-               put_lseg(req->wb_commit_lseg);
-       } else
-               rv = mds;
-       return rv;
+static inline bool
+pnfs_clear_request_commit(struct nfs_page *req)
+{
+       struct inode *inode = req->wb_context->dentry->d_inode;
+       struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+
+       if (ld == NULL || ld->clear_request_commit == NULL)
+               return false;
+       ld->clear_request_commit(req);
+       return true;
 }
 
-static inline void pnfs_clear_request_commit(struct nfs_page *req)
+static inline int
+pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
 {
-       if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags))
-               put_lseg(req->wb_commit_lseg);
+       struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+       int ret;
+
+       if (ld == NULL || ld->scan_commit_lists == NULL)
+               return 0;
+       ret = ld->scan_commit_lists(inode, max, lock);
+       if (ret != 0)
+               set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
+       return ret;
 }
 
 /* Should the pNFS client commit and return the layout upon a setattr */
@@ -328,6 +327,13 @@ static inline int pnfs_return_layout(struct inode *ino)
        return 0;
 }
 
+#ifdef NFS_DEBUG
+void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
+#else
+static inline void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id)
+{
+}
+#endif /* NFS_DEBUG */
 #else  /* CONFIG_NFS_V4_1 */
 
 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -400,35 +406,35 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st
        return false;
 }
 
-static inline void
-pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
-{
-}
-
 static inline int
 pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
 {
        return PNFS_NOT_ATTEMPTED;
 }
 
-static inline struct list_head *
-pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
+static inline bool
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
 {
-       return mds;
+       return false;
 }
 
-static inline void pnfs_clear_request_commit(struct nfs_page *req)
+static inline bool
+pnfs_clear_request_commit(struct nfs_page *req)
 {
+       return false;
 }
 
-static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
+static inline int
+pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
 {
        return 0;
 }
 
-static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
+static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 {
+       return 0;
 }
+
 #endif /* CONFIG_NFS_V4_1 */
 
 #endif /* FS_NFS_PNFS_H */
index 4f359d2a26ebe3ce4a2160758c2a5e6c69163ba6..73f701f1f4d3325e2c54efb68e14b1df40eb90e1 100644 (file)
@@ -43,6 +43,7 @@
 static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE];
 static DEFINE_SPINLOCK(nfs4_deviceid_lock);
 
+#ifdef NFS_DEBUG
 void
 nfs4_print_deviceid(const struct nfs4_deviceid *id)
 {
@@ -52,6 +53,7 @@ nfs4_print_deviceid(const struct nfs4_deviceid *id)
                p[0], p[1], p[2], p[3]);
 }
 EXPORT_SYMBOL_GPL(nfs4_print_deviceid);
+#endif
 
 static inline u32
 nfs4_deviceid_hash(const struct nfs4_deviceid *id)
@@ -92,7 +94,7 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
  * @clp nfs_client associated with deviceid
  * @id deviceid to look up
  */
-struct nfs4_deviceid_node *
+static struct nfs4_deviceid_node *
 _find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
                   const struct nfs_client *clp, const struct nfs4_deviceid *id,
                   long hash)
index 0c672588fe5a71217ac83df8e1a11701934c5f9c..b63b6f4d14fbd5f54bdf265461c2a0069cbc5db0 100644 (file)
@@ -358,6 +358,11 @@ nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
        msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE];
 }
 
+static void nfs_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
+{
+       rpc_call_start(task);
+}
+
 static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 {
        if (nfs_async_handle_expired_key(task))
@@ -372,6 +377,11 @@ nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
        msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME];
 }
 
+static void nfs_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
+{
+       rpc_call_start(task);
+}
+
 static int
 nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
                     struct inode *new_dir)
@@ -651,6 +661,11 @@ static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *
        msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
 }
 
+static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+{
+       rpc_call_start(task);
+}
+
 static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
        if (nfs_async_handle_expired_key(task))
@@ -668,6 +683,11 @@ static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message
        msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
 }
 
+static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+{
+       rpc_call_start(task);
+}
+
 static void
 nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
@@ -721,9 +741,11 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
        .create         = nfs_proc_create,
        .remove         = nfs_proc_remove,
        .unlink_setup   = nfs_proc_unlink_setup,
+       .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare,
        .unlink_done    = nfs_proc_unlink_done,
        .rename         = nfs_proc_rename,
        .rename_setup   = nfs_proc_rename_setup,
+       .rename_rpc_prepare = nfs_proc_rename_rpc_prepare,
        .rename_done    = nfs_proc_rename_done,
        .link           = nfs_proc_link,
        .symlink        = nfs_proc_symlink,
@@ -736,8 +758,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
        .pathconf       = nfs_proc_pathconf,
        .decode_dirent  = nfs2_decode_dirent,
        .read_setup     = nfs_proc_read_setup,
+       .read_rpc_prepare = nfs_proc_read_rpc_prepare,
        .read_done      = nfs_read_done,
        .write_setup    = nfs_proc_write_setup,
+       .write_rpc_prepare = nfs_proc_write_rpc_prepare,
        .write_done     = nfs_write_done,
        .commit_setup   = nfs_proc_commit_setup,
        .lock           = nfs_proc_lock,
index cfa175c223dcfa5b79ebf17b3d609649fbd7188d..cc1f758a7ee1a0234d491da9de8572eb1b45a44f 100644 (file)
@@ -66,7 +66,6 @@ void nfs_readdata_free(struct nfs_read_data *p)
 
 void nfs_readdata_release(struct nfs_read_data *rdata)
 {
-       put_lseg(rdata->lseg);
        put_nfs_open_context(rdata->args.context);
        nfs_readdata_free(rdata);
 }
@@ -465,23 +464,14 @@ static void nfs_readpage_release_partial(void *calldata)
        nfs_readdata_release(calldata);
 }
 
-#if defined(CONFIG_NFS_V4_1)
 void nfs_read_prepare(struct rpc_task *task, void *calldata)
 {
        struct nfs_read_data *data = calldata;
-
-       if (nfs4_setup_sequence(NFS_SERVER(data->inode),
-                               &data->args.seq_args, &data->res.seq_res,
-                               0, task))
-               return;
-       rpc_call_start(task);
+       NFS_PROTO(data->inode)->read_rpc_prepare(task, data);
 }
-#endif /* CONFIG_NFS_V4_1 */
 
 static const struct rpc_call_ops nfs_read_partial_ops = {
-#if defined(CONFIG_NFS_V4_1)
        .rpc_call_prepare = nfs_read_prepare,
-#endif /* CONFIG_NFS_V4_1 */
        .rpc_call_done = nfs_readpage_result_partial,
        .rpc_release = nfs_readpage_release_partial,
 };
@@ -545,9 +535,7 @@ static void nfs_readpage_release_full(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_read_full_ops = {
-#if defined(CONFIG_NFS_V4_1)
        .rpc_call_prepare = nfs_read_prepare,
-#endif /* CONFIG_NFS_V4_1 */
        .rpc_call_done = nfs_readpage_result_full,
        .rpc_release = nfs_readpage_release_full,
 };
index 3dfa4f112c0ab8be8d5b3f897173a68502406b6c..ccc4cdb1efe9a9e7842718ef24407557b73b382d 100644 (file)
@@ -52,6 +52,8 @@
 #include <linux/nfs_xdr.h>
 #include <linux/magic.h>
 #include <linux/parser.h>
+#include <linux/nsproxy.h>
+#include <linux/rcupdate.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -79,7 +81,6 @@ enum {
        Opt_cto, Opt_nocto,
        Opt_ac, Opt_noac,
        Opt_lock, Opt_nolock,
-       Opt_v2, Opt_v3, Opt_v4,
        Opt_udp, Opt_tcp, Opt_rdma,
        Opt_acl, Opt_noacl,
        Opt_rdirplus, Opt_nordirplus,
@@ -97,10 +98,10 @@ enum {
        Opt_namelen,
        Opt_mountport,
        Opt_mountvers,
-       Opt_nfsvers,
        Opt_minorversion,
 
        /* Mount options that take string arguments */
+       Opt_nfsvers,
        Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
        Opt_addr, Opt_mountaddr, Opt_clientaddr,
        Opt_lookupcache,
@@ -132,9 +133,6 @@ static const match_table_t nfs_mount_option_tokens = {
        { Opt_noac, "noac" },
        { Opt_lock, "lock" },
        { Opt_nolock, "nolock" },
-       { Opt_v2, "v2" },
-       { Opt_v3, "v3" },
-       { Opt_v4, "v4" },
        { Opt_udp, "udp" },
        { Opt_tcp, "tcp" },
        { Opt_rdma, "rdma" },
@@ -163,9 +161,10 @@ static const match_table_t nfs_mount_option_tokens = {
        { Opt_namelen, "namlen=%s" },
        { Opt_mountport, "mountport=%s" },
        { Opt_mountvers, "mountvers=%s" },
+       { Opt_minorversion, "minorversion=%s" },
+
        { Opt_nfsvers, "nfsvers=%s" },
        { Opt_nfsvers, "vers=%s" },
-       { Opt_minorversion, "minorversion=%s" },
 
        { Opt_sec, "sec=%s" },
        { Opt_proto, "proto=%s" },
@@ -179,6 +178,9 @@ static const match_table_t nfs_mount_option_tokens = {
        { Opt_fscache_uniq, "fsc=%s" },
        { Opt_local_lock, "local_lock=%s" },
 
+       /* The following needs to be listed after all other options */
+       { Opt_nfsvers, "v%s" },
+
        { Opt_err, NULL }
 };
 
@@ -259,6 +261,22 @@ static match_table_t nfs_local_lock_tokens = {
        { Opt_local_lock_err, NULL }
 };
 
+enum {
+       Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0,
+       Opt_vers_4_1,
+
+       Opt_vers_err
+};
+
+static match_table_t nfs_vers_tokens = {
+       { Opt_vers_2, "2" },
+       { Opt_vers_3, "3" },
+       { Opt_vers_4, "4" },
+       { Opt_vers_4_0, "4.0" },
+       { Opt_vers_4_1, "4.1" },
+
+       { Opt_vers_err, NULL }
+};
 
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
@@ -620,7 +638,6 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
        struct nfs_client *clp = nfss->nfs_client;
 
        seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
-       seq_printf(m, ",minorversion=%u", clp->cl_minorversion);
 }
 #else
 static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
@@ -629,6 +646,15 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
 }
 #endif
 
+static void nfs_show_nfs_version(struct seq_file *m,
+               unsigned int version,
+               unsigned int minorversion)
+{
+       seq_printf(m, ",vers=%u", version);
+       if (version == 4)
+               seq_printf(m, ".%u", minorversion);
+}
+
 /*
  * Describe the mount options in force on this server representation
  */
@@ -656,7 +682,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
        u32 version = clp->rpc_ops->version;
        int local_flock, local_fcntl;
 
-       seq_printf(m, ",vers=%u", version);
+       nfs_show_nfs_version(m, version, clp->cl_minorversion);
        seq_printf(m, ",rsize=%u", nfss->rsize);
        seq_printf(m, ",wsize=%u", nfss->wsize);
        if (nfss->bsize != 0)
@@ -676,8 +702,10 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
                else
                        seq_puts(m, nfs_infop->nostr);
        }
+       rcu_read_lock();
        seq_printf(m, ",proto=%s",
                   rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID));
+       rcu_read_unlock();
        if (version == 4) {
                if (nfss->port != NFS_PORT)
                        seq_printf(m, ",port=%u", nfss->port);
@@ -726,9 +754,11 @@ static int nfs_show_options(struct seq_file *m, struct dentry *root)
 
        nfs_show_mount_options(m, nfss, 0);
 
+       rcu_read_lock();
        seq_printf(m, ",addr=%s",
                        rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient,
                                                        RPC_DISPLAY_ADDR));
+       rcu_read_unlock();
 
        return 0;
 }
@@ -745,7 +775,6 @@ static void show_sessions(struct seq_file *m, struct nfs_server *server) {}
 #endif
 #endif
 
-#ifdef CONFIG_NFS_V4
 #ifdef CONFIG_NFS_V4_1
 static void show_pnfs(struct seq_file *m, struct nfs_server *server)
 {
@@ -755,9 +784,26 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server)
        else
                seq_printf(m, "not configured");
 }
+
+static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
+{
+       if (nfss->nfs_client && nfss->nfs_client->impl_id) {
+               struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id;
+               seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s',"
+                          "date='%llu,%u'",
+                          impl_id->name, impl_id->domain,
+                          impl_id->date.seconds, impl_id->date.nseconds);
+       }
+}
 #else
-static void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
+#ifdef CONFIG_NFS_V4
+static void show_pnfs(struct seq_file *m, struct nfs_server *server)
+{
+}
 #endif
+static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
+{
+}
 #endif
 
 static int nfs_show_devname(struct seq_file *m, struct dentry *root)
@@ -806,6 +852,8 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
 
        seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
 
+       show_implementation_id(m, nfss);
+
        seq_printf(m, "\n\tcaps:\t");
        seq_printf(m, "caps=0x%x", nfss->caps);
        seq_printf(m, ",wtmult=%u", nfss->wtmult);
@@ -908,6 +956,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
                data->auth_flavor_len   = 1;
                data->version           = version;
                data->minorversion      = 0;
+               data->net               = current->nsproxy->net_ns;
                security_init_mnt_opts(&data->lsm_opts);
        }
        return data;
@@ -1052,6 +1101,40 @@ static int nfs_parse_security_flavors(char *value,
        return 1;
 }
 
+static int nfs_parse_version_string(char *string,
+               struct nfs_parsed_mount_data *mnt,
+               substring_t *args)
+{
+       mnt->flags &= ~NFS_MOUNT_VER3;
+       switch (match_token(string, nfs_vers_tokens, args)) {
+       case Opt_vers_2:
+               mnt->version = 2;
+               break;
+       case Opt_vers_3:
+               mnt->flags |= NFS_MOUNT_VER3;
+               mnt->version = 3;
+               break;
+       case Opt_vers_4:
+               /* Backward compatibility option. In future,
+                * the mount program should always supply
+                * a NFSv4 minor version number.
+                */
+               mnt->version = 4;
+               break;
+       case Opt_vers_4_0:
+               mnt->version = 4;
+               mnt->minorversion = 0;
+               break;
+       case Opt_vers_4_1:
+               mnt->version = 4;
+               mnt->minorversion = 1;
+               break;
+       default:
+               return 0;
+       }
+       return 1;
+}
+
 static int nfs_get_option_str(substring_t args[], char **option)
 {
        kfree(*option);
@@ -1157,18 +1240,6 @@ static int nfs_parse_mount_options(char *raw,
                        mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
                                       NFS_MOUNT_LOCAL_FCNTL);
                        break;
-               case Opt_v2:
-                       mnt->flags &= ~NFS_MOUNT_VER3;
-                       mnt->version = 2;
-                       break;
-               case Opt_v3:
-                       mnt->flags |= NFS_MOUNT_VER3;
-                       mnt->version = 3;
-                       break;
-               case Opt_v4:
-                       mnt->flags &= ~NFS_MOUNT_VER3;
-                       mnt->version = 4;
-                       break;
                case Opt_udp:
                        mnt->flags &= ~NFS_MOUNT_TCP;
                        mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
@@ -1295,26 +1366,6 @@ static int nfs_parse_mount_options(char *raw,
                                goto out_invalid_value;
                        mnt->mount_server.version = option;
                        break;
-               case Opt_nfsvers:
-                       if (nfs_get_option_ul(args, &option))
-                               goto out_invalid_value;
-                       switch (option) {
-                       case NFS2_VERSION:
-                               mnt->flags &= ~NFS_MOUNT_VER3;
-                               mnt->version = 2;
-                               break;
-                       case NFS3_VERSION:
-                               mnt->flags |= NFS_MOUNT_VER3;
-                               mnt->version = 3;
-                               break;
-                       case NFS4_VERSION:
-                               mnt->flags &= ~NFS_MOUNT_VER3;
-                               mnt->version = 4;
-                               break;
-                       default:
-                               goto out_invalid_value;
-                       }
-                       break;
                case Opt_minorversion:
                        if (nfs_get_option_ul(args, &option))
                                goto out_invalid_value;
@@ -1326,6 +1377,15 @@ static int nfs_parse_mount_options(char *raw,
                /*
                 * options that take text values
                 */
+               case Opt_nfsvers:
+                       string = match_strdup(args);
+                       if (string == NULL)
+                               goto out_nomem;
+                       rc = nfs_parse_version_string(string, mnt, args);
+                       kfree(string);
+                       if (!rc)
+                               goto out_invalid_value;
+                       break;
                case Opt_sec:
                        string = match_strdup(args);
                        if (string == NULL)
@@ -1405,7 +1465,7 @@ static int nfs_parse_mount_options(char *raw,
                        if (string == NULL)
                                goto out_nomem;
                        mnt->nfs_server.addrlen =
-                               rpc_pton(string, strlen(string),
+                               rpc_pton(mnt->net, string, strlen(string),
                                        (struct sockaddr *)
                                        &mnt->nfs_server.address,
                                        sizeof(mnt->nfs_server.address));
@@ -1427,7 +1487,7 @@ static int nfs_parse_mount_options(char *raw,
                        if (string == NULL)
                                goto out_nomem;
                        mnt->mount_server.addrlen =
-                               rpc_pton(string, strlen(string),
+                               rpc_pton(mnt->net, string, strlen(string),
                                        (struct sockaddr *)
                                        &mnt->mount_server.address,
                                        sizeof(mnt->mount_server.address));
@@ -1516,6 +1576,9 @@ static int nfs_parse_mount_options(char *raw,
        if (!sloppy && invalid_option)
                return 0;
 
+       if (mnt->minorversion && mnt->version != 4)
+               goto out_minorversion_mismatch;
+
        /*
         * verify that any proto=/mountproto= options match the address
         * familiies in the addr=/mountaddr= options.
@@ -1549,6 +1612,10 @@ out_invalid_address:
 out_invalid_value:
        printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p);
        return 0;
+out_minorversion_mismatch:
+       printk(KERN_INFO "NFS: mount option vers=%u does not support "
+                        "minorversion=%u\n", mnt->version, mnt->minorversion);
+       return 0;
 out_nomem:
        printk(KERN_INFO "NFS: not enough memory to parse option\n");
        return 0;
@@ -1622,6 +1689,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
                .noresvport     = args->flags & NFS_MOUNT_NORESVPORT,
                .auth_flav_len  = &server_authlist_len,
                .auth_flavs     = server_authlist,
+               .net            = args->net,
        };
        int status;
 
@@ -2047,7 +2115,7 @@ static inline void nfs_initialise_sb(struct super_block *sb)
 
        /* We probably want something more informative here */
        snprintf(sb->s_id, sizeof(sb->s_id),
-                "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
+                "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev));
 
        if (sb->s_blocksize == 0)
                sb->s_blocksize = nfs_block_bits(server->wsize,
@@ -2499,12 +2567,6 @@ static int nfs4_validate_text_mount_data(void *options,
                return -EINVAL;
        }
 
-       if (args->client_address == NULL) {
-               dfprintk(MOUNT,
-                        "NFS4: mount program didn't pass callback address\n");
-               return -EINVAL;
-       }
-
        return nfs_parse_devname(dev_name,
                                   &args->nfs_server.hostname,
                                   NFS4_MAXNAMLEN,
@@ -2663,8 +2725,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
        if (!s->s_root) {
                /* initial superblock/root creation */
                nfs4_fill_super(s);
-               nfs_fscache_get_super_cookie(
-                       s, data ? data->fscache_uniq : NULL, NULL);
+               nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
        }
 
        mntroot = nfs4_get_root(s, mntfh, dev_name);
index 978aaeb8a0936617ed19cde6c5995e247b77af4c..ad4d2e787b2041d17eaacc4a1ce8097f0cc13aca 100644 (file)
@@ -32,7 +32,6 @@ static ctl_table nfs_cb_sysctls[] = {
                .extra1 = (int *)&nfs_set_port_min,
                .extra2 = (int *)&nfs_set_port_max,
        },
-#ifndef CONFIG_NFS_USE_NEW_IDMAPPER
        {
                .procname = "idmap_cache_timeout",
                .data = &nfs_idmap_cache_timeout,
@@ -40,7 +39,6 @@ static ctl_table nfs_cb_sysctls[] = {
                .mode = 0644,
                .proc_handler = proc_dointvec_jiffies,
        },
-#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
 #endif
        {
                .procname       = "nfs_mountpoint_timeout",
index 4f9319a2e5674554d48a01af2189011a5e35346c..3210a03342f924e886e5c7f174c9147de54911a4 100644 (file)
 #include "iostat.h"
 #include "delegation.h"
 
-struct nfs_unlinkdata {
-       struct hlist_node list;
-       struct nfs_removeargs args;
-       struct nfs_removeres res;
-       struct inode *dir;
-       struct rpc_cred *cred;
-       struct nfs_fattr dir_attr;
-};
-
 /**
  * nfs_free_unlinkdata - release data from a sillydelete operation.
  * @data: pointer to unlink structure.
@@ -107,25 +98,16 @@ static void nfs_async_unlink_release(void *calldata)
        nfs_sb_deactive(sb);
 }
 
-#if defined(CONFIG_NFS_V4_1)
-void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
+static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
 {
        struct nfs_unlinkdata *data = calldata;
-       struct nfs_server *server = NFS_SERVER(data->dir);
-
-       if (nfs4_setup_sequence(server, &data->args.seq_args,
-                               &data->res.seq_res, 1, task))
-               return;
-       rpc_call_start(task);
+       NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data);
 }
-#endif /* CONFIG_NFS_V4_1 */
 
 static const struct rpc_call_ops nfs_unlink_ops = {
        .rpc_call_done = nfs_async_unlink_done,
        .rpc_release = nfs_async_unlink_release,
-#if defined(CONFIG_NFS_V4_1)
        .rpc_call_prepare = nfs_unlink_prepare,
-#endif /* CONFIG_NFS_V4_1 */
 };
 
 static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
@@ -341,18 +323,6 @@ nfs_cancel_async_unlink(struct dentry *dentry)
        spin_unlock(&dentry->d_lock);
 }
 
-struct nfs_renamedata {
-       struct nfs_renameargs   args;
-       struct nfs_renameres    res;
-       struct rpc_cred         *cred;
-       struct inode            *old_dir;
-       struct dentry           *old_dentry;
-       struct nfs_fattr        old_fattr;
-       struct inode            *new_dir;
-       struct dentry           *new_dentry;
-       struct nfs_fattr        new_fattr;
-};
-
 /**
  * nfs_async_rename_done - Sillyrename post-processing
  * @task: rpc_task of the sillyrename
@@ -403,25 +373,16 @@ static void nfs_async_rename_release(void *calldata)
        kfree(data);
 }
 
-#if defined(CONFIG_NFS_V4_1)
 static void nfs_rename_prepare(struct rpc_task *task, void *calldata)
 {
        struct nfs_renamedata *data = calldata;
-       struct nfs_server *server = NFS_SERVER(data->old_dir);
-
-       if (nfs4_setup_sequence(server, &data->args.seq_args,
-                               &data->res.seq_res, 1, task))
-               return;
-       rpc_call_start(task);
+       NFS_PROTO(data->old_dir)->rename_rpc_prepare(task, data);
 }
-#endif /* CONFIG_NFS_V4_1 */
 
 static const struct rpc_call_ops nfs_rename_ops = {
        .rpc_call_done = nfs_async_rename_done,
        .rpc_release = nfs_async_rename_release,
-#if defined(CONFIG_NFS_V4_1)
        .rpc_call_prepare = nfs_rename_prepare,
-#endif /* CONFIG_NFS_V4_1 */
 };
 
 /**
index 834f0fe96f89f4acf707df504e1a244fc56d466b..2c68818f68ac056b8587c22bf40f8f5d229a6403 100644 (file)
@@ -100,7 +100,6 @@ void nfs_writedata_free(struct nfs_write_data *p)
 
 void nfs_writedata_release(struct nfs_write_data *wdata)
 {
-       put_lseg(wdata->lseg);
        put_nfs_open_context(wdata->args.context);
        nfs_writedata_free(wdata);
 }
@@ -236,10 +235,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo
                req = nfs_page_find_request_locked(page);
                if (req == NULL)
                        break;
-               if (nfs_set_page_tag_locked(req))
+               if (nfs_lock_request_dontget(req))
                        break;
                /* Note: If we hold the page lock, as is the case in nfs_writepage,
-                *       then the call to nfs_set_page_tag_locked() will always
+                *       then the call to nfs_lock_request_dontget() will always
                 *       succeed provided that someone hasn't already marked the
                 *       request as dirty (in which case we don't care).
                 */
@@ -375,21 +374,14 @@ out_err:
 /*
  * Insert a write request into an inode
  */
-static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
+static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
-       int error;
-
-       error = radix_tree_preload(GFP_NOFS);
-       if (error != 0)
-               goto out;
 
        /* Lock the request! */
        nfs_lock_request_dontget(req);
 
        spin_lock(&inode->i_lock);
-       error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
-       BUG_ON(error);
        if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
                inode->i_version++;
        set_bit(PG_MAPPED, &req->wb_flags);
@@ -397,12 +389,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
        set_page_private(req->wb_page, (unsigned long)req);
        nfsi->npages++;
        kref_get(&req->wb_kref);
-       radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
-                               NFS_PAGE_TAG_LOCKED);
        spin_unlock(&inode->i_lock);
-       radix_tree_preload_end();
-out:
-       return error;
 }
 
 /*
@@ -419,7 +406,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
        set_page_private(req->wb_page, 0);
        ClearPagePrivate(req->wb_page);
        clear_bit(PG_MAPPED, &req->wb_flags);
-       radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
        nfsi->npages--;
        spin_unlock(&inode->i_lock);
        nfs_release_request(req);
@@ -432,39 +418,90 @@ nfs_mark_request_dirty(struct nfs_page *req)
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-/*
- * Add a request to the inode's commit list.
+/**
+ * nfs_request_add_commit_list - add request to a commit list
+ * @req: pointer to a struct nfs_page
+ * @head: commit list head
+ *
+ * This sets the PG_CLEAN bit, updates the inode global count of
+ * number of outstanding requests requiring a commit as well as
+ * the MM page stats.
+ *
+ * The caller must _not_ hold the inode->i_lock, but must be
+ * holding the nfs_page lock.
  */
-static void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+void
+nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head)
 {
        struct inode *inode = req->wb_context->dentry->d_inode;
-       struct nfs_inode *nfsi = NFS_I(inode);
 
-       spin_lock(&inode->i_lock);
        set_bit(PG_CLEAN, &(req)->wb_flags);
-       radix_tree_tag_set(&nfsi->nfs_page_tree,
-                       req->wb_index,
-                       NFS_PAGE_TAG_COMMIT);
-       nfsi->ncommit++;
+       spin_lock(&inode->i_lock);
+       nfs_list_add_request(req, head);
+       NFS_I(inode)->ncommit++;
        spin_unlock(&inode->i_lock);
-       pnfs_mark_request_commit(req, lseg);
        inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
        inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 }
+EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
 
-static int
+/**
+ * nfs_request_remove_commit_list - Remove request from a commit list
+ * @req: pointer to a nfs_page
+ *
+ * This clears the PG_CLEAN bit, and updates the inode global count of
+ * number of outstanding requests requiring a commit
+ * It does not update the MM page stats.
+ *
+ * The caller _must_ hold the inode->i_lock and the nfs_page lock.
+ */
+void
+nfs_request_remove_commit_list(struct nfs_page *req)
+{
+       struct inode *inode = req->wb_context->dentry->d_inode;
+
+       if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
+               return;
+       nfs_list_remove_request(req);
+       NFS_I(inode)->ncommit--;
+}
+EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
+
+
+/*
+ * Add a request to the inode's commit list.
+ */
+static void
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+{
+       struct inode *inode = req->wb_context->dentry->d_inode;
+
+       if (pnfs_mark_request_commit(req, lseg))
+               return;
+       nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list);
+}
+
+static void
+nfs_clear_page_commit(struct page *page)
+{
+       dec_zone_page_state(page, NR_UNSTABLE_NFS);
+       dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+}
+
+static void
 nfs_clear_request_commit(struct nfs_page *req)
 {
-       struct page *page = req->wb_page;
+       if (test_bit(PG_CLEAN, &req->wb_flags)) {
+               struct inode *inode = req->wb_context->dentry->d_inode;
 
-       if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) {
-               dec_zone_page_state(page, NR_UNSTABLE_NFS);
-               dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
-               return 1;
+               if (!pnfs_clear_request_commit(req)) {
+                       spin_lock(&inode->i_lock);
+                       nfs_request_remove_commit_list(req);
+                       spin_unlock(&inode->i_lock);
+               }
+               nfs_clear_page_commit(req->wb_page);
        }
-       return 0;
 }
 
 static inline
@@ -491,15 +528,14 @@ int nfs_reschedule_unstable_write(struct nfs_page *req,
        return 0;
 }
 #else
-static inline void
+static void
 nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
 {
 }
 
-static inline int
+static void
 nfs_clear_request_commit(struct nfs_page *req)
 {
-       return 0;
 }
 
 static inline
@@ -520,46 +556,65 @@ int nfs_reschedule_unstable_write(struct nfs_page *req,
 static int
 nfs_need_commit(struct nfs_inode *nfsi)
 {
-       return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT);
+       return nfsi->ncommit > 0;
+}
+
+/* i_lock held by caller */
+static int
+nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
+               spinlock_t *lock)
+{
+       struct nfs_page *req, *tmp;
+       int ret = 0;
+
+       list_for_each_entry_safe(req, tmp, src, wb_list) {
+               if (!nfs_lock_request(req))
+                       continue;
+               if (cond_resched_lock(lock))
+                       list_safe_reset_next(req, tmp, wb_list);
+               nfs_request_remove_commit_list(req);
+               nfs_list_add_request(req, dst);
+               ret++;
+               if (ret == max)
+                       break;
+       }
+       return ret;
 }
 
 /*
  * nfs_scan_commit - Scan an inode for commit requests
  * @inode: NFS inode to scan
  * @dst: destination list
- * @idx_start: lower bound of page->index to scan.
- * @npages: idx_start + npages sets the upper bound to scan.
  *
  * Moves requests from the inode's 'commit' request list.
  * The requests are *not* checked to ensure that they form a contiguous set.
  */
 static int
-nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
+nfs_scan_commit(struct inode *inode, struct list_head *dst)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
-       int ret;
-
-       if (!nfs_need_commit(nfsi))
-               return 0;
+       int ret = 0;
 
        spin_lock(&inode->i_lock);
-       ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
-       if (ret > 0)
-               nfsi->ncommit -= ret;
-       spin_unlock(&inode->i_lock);
-
-       if (nfs_need_commit(NFS_I(inode)))
-               __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+       if (nfsi->ncommit > 0) {
+               const int max = INT_MAX;
 
+               ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max,
+                               &inode->i_lock);
+               ret += pnfs_scan_commit_lists(inode, max - ret,
+                               &inode->i_lock);
+       }
+       spin_unlock(&inode->i_lock);
        return ret;
 }
+
 #else
 static inline int nfs_need_commit(struct nfs_inode *nfsi)
 {
        return 0;
 }
 
-static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
+static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst)
 {
        return 0;
 }
@@ -604,7 +659,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
                    || end < req->wb_offset)
                        goto out_flushme;
 
-               if (nfs_set_page_tag_locked(req))
+               if (nfs_lock_request_dontget(req))
                        break;
 
                /* The request is locked, so wait and then retry */
@@ -616,13 +671,6 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
                spin_lock(&inode->i_lock);
        }
 
-       if (nfs_clear_request_commit(req) &&
-           radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
-                                req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) {
-               NFS_I(inode)->ncommit--;
-               pnfs_clear_request_commit(req);
-       }
-
        /* Okay, the request matches. Update the region */
        if (offset < req->wb_offset) {
                req->wb_offset = offset;
@@ -634,6 +682,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
                req->wb_bytes = rqend - req->wb_offset;
 out_unlock:
        spin_unlock(&inode->i_lock);
+       nfs_clear_request_commit(req);
        return req;
 out_flushme:
        spin_unlock(&inode->i_lock);
@@ -655,7 +704,6 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
 {
        struct inode *inode = page->mapping->host;
        struct nfs_page *req;
-       int error;
 
        req = nfs_try_to_update_request(inode, page, offset, bytes);
        if (req != NULL)
@@ -663,11 +711,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
        req = nfs_create_request(ctx, inode, page, offset, bytes);
        if (IS_ERR(req))
                goto out;
-       error = nfs_inode_add_request(inode, req);
-       if (error != 0) {
-               nfs_release_request(req);
-               req = ERR_PTR(error);
-       }
+       nfs_inode_add_request(inode, req);
 out:
        return req;
 }
@@ -684,7 +728,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
        nfs_grow_file(page, offset, count);
        nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
        nfs_mark_request_dirty(req);
-       nfs_clear_page_tag_locked(req);
+       nfs_unlock_request(req);
        return 0;
 }
 
@@ -777,7 +821,7 @@ static void nfs_writepage_release(struct nfs_page *req,
 
        if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
                nfs_inode_remove_request(req);
-       nfs_clear_page_tag_locked(req);
+       nfs_unlock_request(req);
        nfs_end_page_writeback(page);
 }
 
@@ -925,7 +969,7 @@ static void nfs_redirty_request(struct nfs_page *req)
        struct page *page = req->wb_page;
 
        nfs_mark_request_dirty(req);
-       nfs_clear_page_tag_locked(req);
+       nfs_unlock_request(req);
        nfs_end_page_writeback(page);
 }
 
@@ -1128,23 +1172,14 @@ out:
        nfs_writedata_release(calldata);
 }
 
-#if defined(CONFIG_NFS_V4_1)
 void nfs_write_prepare(struct rpc_task *task, void *calldata)
 {
        struct nfs_write_data *data = calldata;
-
-       if (nfs4_setup_sequence(NFS_SERVER(data->inode),
-                               &data->args.seq_args,
-                               &data->res.seq_res, 1, task))
-               return;
-       rpc_call_start(task);
+       NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
 }
-#endif /* CONFIG_NFS_V4_1 */
 
 static const struct rpc_call_ops nfs_write_partial_ops = {
-#if defined(CONFIG_NFS_V4_1)
        .rpc_call_prepare = nfs_write_prepare,
-#endif /* CONFIG_NFS_V4_1 */
        .rpc_call_done = nfs_writeback_done_partial,
        .rpc_release = nfs_writeback_release_partial,
 };
@@ -1199,16 +1234,14 @@ static void nfs_writeback_release_full(void *calldata)
 remove_request:
                nfs_inode_remove_request(req);
        next:
-               nfs_clear_page_tag_locked(req);
+               nfs_unlock_request(req);
                nfs_end_page_writeback(page);
        }
        nfs_writedata_release(calldata);
 }
 
 static const struct rpc_call_ops nfs_write_full_ops = {
-#if defined(CONFIG_NFS_V4_1)
        .rpc_call_prepare = nfs_write_prepare,
-#endif /* CONFIG_NFS_V4_1 */
        .rpc_call_done = nfs_writeback_done_full,
        .rpc_release = nfs_writeback_release_full,
 };
@@ -1325,7 +1358,6 @@ void nfs_commitdata_release(void *data)
 {
        struct nfs_write_data *wdata = data;
 
-       put_lseg(wdata->lseg);
        put_nfs_open_context(wdata->args.context);
        nfs_commit_free(wdata);
 }
@@ -1411,7 +1443,7 @@ void nfs_retry_commit(struct list_head *page_list,
                dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
                dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
                             BDI_RECLAIMABLE);
-               nfs_clear_page_tag_locked(req);
+               nfs_unlock_request(req);
        }
 }
 EXPORT_SYMBOL_GPL(nfs_retry_commit);
@@ -1460,7 +1492,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
        while (!list_empty(&data->pages)) {
                req = nfs_list_entry(data->pages.next);
                nfs_list_remove_request(req);
-               nfs_clear_request_commit(req);
+               nfs_clear_page_commit(req->wb_page);
 
                dprintk("NFS:       commit (%s/%lld %d@%lld)",
                        req->wb_context->dentry->d_sb->s_id,
@@ -1486,7 +1518,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
                dprintk(" mismatch\n");
                nfs_mark_request_dirty(req);
        next:
-               nfs_clear_page_tag_locked(req);
+               nfs_unlock_request(req);
        }
 }
 EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
@@ -1501,9 +1533,7 @@ static void nfs_commit_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_commit_ops = {
-#if defined(CONFIG_NFS_V4_1)
        .rpc_call_prepare = nfs_write_prepare,
-#endif /* CONFIG_NFS_V4_1 */
        .rpc_call_done = nfs_commit_done,
        .rpc_release = nfs_commit_release,
 };
@@ -1517,7 +1547,7 @@ int nfs_commit_inode(struct inode *inode, int how)
        res = nfs_commit_set_lock(NFS_I(inode), may_wait);
        if (res <= 0)
                goto out_mark_dirty;
-       res = nfs_scan_commit(inode, &head, 0, 0);
+       res = nfs_scan_commit(inode, &head);
        if (res) {
                int error;
 
@@ -1635,6 +1665,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
                if (req == NULL)
                        break;
                if (nfs_lock_request_dontget(req)) {
+                       nfs_clear_request_commit(req);
                        nfs_inode_remove_request(req);
                        /*
                         * In case nfs_inode_remove_request has marked the
index 6f3ebb48b12fad4532884df525e8942bd3564c90..0e262f32ac415a577793c74bb8cf6e7cd8d9202f 100644 (file)
@@ -605,24 +605,24 @@ static struct rpc_version nfs_cb_version4 = {
        .procs                  = nfs4_cb_procedures
 };
 
-static struct rpc_version *nfs_cb_version[] = {
+static const struct rpc_version *nfs_cb_version[] = {
        &nfs_cb_version4,
 };
 
-static struct rpc_program cb_program;
+static const struct rpc_program cb_program;
 
 static struct rpc_stat cb_stats = {
        .program                = &cb_program
 };
 
 #define NFS4_CALLBACK 0x40000000
-static struct rpc_program cb_program = {
+static const struct rpc_program cb_program = {
        .name                   = "nfs4_cb",
        .number                 = NFS4_CALLBACK,
        .nrvers                 = ARRAY_SIZE(nfs_cb_version),
        .version                = nfs_cb_version,
        .stats                  = &cb_stats,
-       .pipe_dir_name          = "/nfsd4_cb",
+       .pipe_dir_name          = "nfsd4_cb",
 };
 
 static int max_cb_time(void)
index e8c98f0096706c04e70456c35af2f8123241af7a..c5cddd659429f33b371ea03a3d920808911ce8f1 100644 (file)
@@ -1308,7 +1308,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
        else
                goto out_err;
 
-       conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
+       conn->cb_addrlen = rpc_uaddr2sockaddr(&init_net, se->se_callback_addr_val,
                                            se->se_callback_addr_len,
                                            (struct sockaddr *)&conn->cb_addr,
                                            sizeof(conn->cb_addr));
index 748eda93ce590d1ad1e4f7892f29e25f8ad8856a..64c24af8d7eaf40d5436aea2b2a6c44d588102e5 100644 (file)
@@ -223,7 +223,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
        if (qword_get(&buf, fo_path, size) < 0)
                return -EINVAL;
 
-       if (rpc_pton(fo_path, size, sap, salen) == 0)
+       if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0)
                return -EINVAL;
 
        return nlmsvc_unlock_all_by_ip(sap);
@@ -722,7 +722,7 @@ static ssize_t __write_ports_addxprt(char *buf)
        nfsd_serv->sv_nrthreads--;
        return 0;
 out_close:
-       xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port);
+       xprt = svc_find_xprt(nfsd_serv, transport, &init_net, PF_INET, port);
        if (xprt != NULL) {
                svc_close_xprt(xprt);
                svc_xprt_put(xprt);
@@ -748,7 +748,7 @@ static ssize_t __write_ports_delxprt(char *buf)
        if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL)
                return -EINVAL;
 
-       xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port);
+       xprt = svc_find_xprt(nfsd_serv, transport, &init_net, AF_UNSPEC, port);
        if (xprt == NULL)
                return -ENOTCONN;
 
index eda7d7e55e05c45aa309f1465f60368bf3f00242..fce472f5f39e74f2fb9ab36bdf70019f573a73af 100644 (file)
@@ -251,13 +251,13 @@ static void nfsd_shutdown(void)
        nfsd_up = false;
 }
 
-static void nfsd_last_thread(struct svc_serv *serv)
+static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
 {
        /* When last nfsd thread exits we need to do some clean-up */
        nfsd_serv = NULL;
        nfsd_shutdown();
 
-       svc_rpcb_cleanup(serv);
+       svc_rpcb_cleanup(serv, net);
 
        printk(KERN_WARNING "nfsd: last server has exited, flushing export "
                            "cache\n");
index a2e2402b2afb5a45200b5902fadabcb144d16ebf..6d4521feb6e339729d9e9e8583088fcd1e0ec6f9 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/nfsd/stats.h>
+#include <net/net_namespace.h>
 
 #include "nfsd.h"
 
@@ -94,11 +95,11 @@ static const struct file_operations nfsd_proc_fops = {
 void
 nfsd_stat_init(void)
 {
-       svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops);
+       svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_fops);
 }
 
 void
 nfsd_stat_shutdown(void)
 {
-       svc_proc_unregister("nfsd");
+       svc_proc_unregister(&init_net, "nfsd");
 }
index 9ec22d3b4293f3c2d2c994927fd7102e4a20baec..82c585f715e341c36666fdc378f75408e3ea3d14 100644 (file)
@@ -68,9 +68,25 @@ void pstore_set_kmsg_bytes(int bytes)
 /* Tag each group of saved records with a sequence number */
 static int     oopscount;
 
-static char *reason_str[] = {
-       "Oops", "Panic", "Kexec", "Restart", "Halt", "Poweroff", "Emergency"
-};
+static const char *get_reason_str(enum kmsg_dump_reason reason)
+{
+       switch (reason) {
+       case KMSG_DUMP_PANIC:
+               return "Panic";
+       case KMSG_DUMP_OOPS:
+               return "Oops";
+       case KMSG_DUMP_EMERG:
+               return "Emergency";
+       case KMSG_DUMP_RESTART:
+               return "Restart";
+       case KMSG_DUMP_HALT:
+               return "Halt";
+       case KMSG_DUMP_POWEROFF:
+               return "Poweroff";
+       default:
+               return "Unknown";
+       }
+}
 
 /*
  * callback from kmsg_dump. (s2,l2) has the most recently
@@ -85,17 +101,15 @@ static void pstore_dump(struct kmsg_dumper *dumper,
        unsigned long   s1_start, s2_start;
        unsigned long   l1_cpy, l2_cpy;
        unsigned long   size, total = 0;
-       char            *dst, *why;
+       char            *dst;
+       const char      *why;
        u64             id;
        int             hsize, ret;
        unsigned int    part = 1;
        unsigned long   flags = 0;
        int             is_locked = 0;
 
-       if (reason < ARRAY_SIZE(reason_str))
-               why = reason_str[reason];
-       else
-               why = "Unknown";
+       why = get_reason_str(reason);
 
        if (in_nmi()) {
                is_locked = spin_trylock(&psinfo->buf_lock);
index fc2c4388d1262a1771d27acb7c55507369cf0dba..9a391204ca278e4186300215ef521183af70eb68 100644 (file)
@@ -282,10 +282,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
        case Q_XGETQUOTA:
                return quota_getxquota(sb, type, id, addr);
        case Q_XQUOTASYNC:
-               /* caller already holds s_umount */
                if (sb->s_flags & MS_RDONLY)
                        return -EROFS;
-               writeback_inodes_sb(sb, WB_REASON_SYNC);
+               /* XFS quotas are fully coherent now, making this call a noop */
                return 0;
        default:
                return -EINVAL;
index f922cbacdb96e3e6f4f238cf7360079162452147..1934084e20884ba692b4fd9cea2ce58c8d6e4c4c 100644 (file)
@@ -36,7 +36,7 @@
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
 
-DEFINE_SPINLOCK(dbg_lock);
+static DEFINE_SPINLOCK(dbg_lock);
 
 static const char *get_key_fmt(int fmt)
 {
@@ -221,15 +221,15 @@ const char *dbg_jhead(int jhead)
 
 static void dump_ch(const struct ubifs_ch *ch)
 {
-       printk(KERN_DEBUG "\tmagic          %#x\n", le32_to_cpu(ch->magic));
-       printk(KERN_DEBUG "\tcrc            %#x\n", le32_to_cpu(ch->crc));
-       printk(KERN_DEBUG "\tnode_type      %d (%s)\n", ch->node_type,
+       printk(KERN_ERR "\tmagic          %#x\n", le32_to_cpu(ch->magic));
+       printk(KERN_ERR "\tcrc            %#x\n", le32_to_cpu(ch->crc));
+       printk(KERN_ERR "\tnode_type      %d (%s)\n", ch->node_type,
               dbg_ntype(ch->node_type));
-       printk(KERN_DEBUG "\tgroup_type     %d (%s)\n", ch->group_type,
+       printk(KERN_ERR "\tgroup_type     %d (%s)\n", ch->group_type,
               dbg_gtype(ch->group_type));
-       printk(KERN_DEBUG "\tsqnum          %llu\n",
+       printk(KERN_ERR "\tsqnum          %llu\n",
               (unsigned long long)le64_to_cpu(ch->sqnum));
-       printk(KERN_DEBUG "\tlen            %u\n", le32_to_cpu(ch->len));
+       printk(KERN_ERR "\tlen            %u\n", le32_to_cpu(ch->len));
 }
 
 void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
@@ -240,43 +240,43 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
        struct ubifs_dent_node *dent, *pdent = NULL;
        int count = 2;
 
-       printk(KERN_DEBUG "Dump in-memory inode:");
-       printk(KERN_DEBUG "\tinode          %lu\n", inode->i_ino);
-       printk(KERN_DEBUG "\tsize           %llu\n",
+       printk(KERN_ERR "Dump in-memory inode:");
+       printk(KERN_ERR "\tinode          %lu\n", inode->i_ino);
+       printk(KERN_ERR "\tsize           %llu\n",
               (unsigned long long)i_size_read(inode));
-       printk(KERN_DEBUG "\tnlink          %u\n", inode->i_nlink);
-       printk(KERN_DEBUG "\tuid            %u\n", (unsigned int)inode->i_uid);
-       printk(KERN_DEBUG "\tgid            %u\n", (unsigned int)inode->i_gid);
-       printk(KERN_DEBUG "\tatime          %u.%u\n",
+       printk(KERN_ERR "\tnlink          %u\n", inode->i_nlink);
+       printk(KERN_ERR "\tuid            %u\n", (unsigned int)inode->i_uid);
+       printk(KERN_ERR "\tgid            %u\n", (unsigned int)inode->i_gid);
+       printk(KERN_ERR "\tatime          %u.%u\n",
               (unsigned int)inode->i_atime.tv_sec,
               (unsigned int)inode->i_atime.tv_nsec);
-       printk(KERN_DEBUG "\tmtime          %u.%u\n",
+       printk(KERN_ERR "\tmtime          %u.%u\n",
               (unsigned int)inode->i_mtime.tv_sec,
               (unsigned int)inode->i_mtime.tv_nsec);
-       printk(KERN_DEBUG "\tctime          %u.%u\n",
+       printk(KERN_ERR "\tctime          %u.%u\n",
               (unsigned int)inode->i_ctime.tv_sec,
               (unsigned int)inode->i_ctime.tv_nsec);
-       printk(KERN_DEBUG "\tcreat_sqnum    %llu\n", ui->creat_sqnum);
-       printk(KERN_DEBUG "\txattr_size     %u\n", ui->xattr_size);
-       printk(KERN_DEBUG "\txattr_cnt      %u\n", ui->xattr_cnt);
-       printk(KERN_DEBUG "\txattr_names    %u\n", ui->xattr_names);
-       printk(KERN_DEBUG "\tdirty          %u\n", ui->dirty);
-       printk(KERN_DEBUG "\txattr          %u\n", ui->xattr);
-       printk(KERN_DEBUG "\tbulk_read      %u\n", ui->xattr);
-       printk(KERN_DEBUG "\tsynced_i_size  %llu\n",
+       printk(KERN_ERR "\tcreat_sqnum    %llu\n", ui->creat_sqnum);
+       printk(KERN_ERR "\txattr_size     %u\n", ui->xattr_size);
+       printk(KERN_ERR "\txattr_cnt      %u\n", ui->xattr_cnt);
+       printk(KERN_ERR "\txattr_names    %u\n", ui->xattr_names);
+       printk(KERN_ERR "\tdirty          %u\n", ui->dirty);
+       printk(KERN_ERR "\txattr          %u\n", ui->xattr);
+       printk(KERN_ERR "\tbulk_read      %u\n", ui->xattr);
+       printk(KERN_ERR "\tsynced_i_size  %llu\n",
               (unsigned long long)ui->synced_i_size);
-       printk(KERN_DEBUG "\tui_size        %llu\n",
+       printk(KERN_ERR "\tui_size        %llu\n",
               (unsigned long long)ui->ui_size);
-       printk(KERN_DEBUG "\tflags          %d\n", ui->flags);
-       printk(KERN_DEBUG "\tcompr_type     %d\n", ui->compr_type);
-       printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read);
-       printk(KERN_DEBUG "\tread_in_a_row  %lu\n", ui->read_in_a_row);
-       printk(KERN_DEBUG "\tdata_len       %d\n", ui->data_len);
+       printk(KERN_ERR "\tflags          %d\n", ui->flags);
+       printk(KERN_ERR "\tcompr_type     %d\n", ui->compr_type);
+       printk(KERN_ERR "\tlast_page_read %lu\n", ui->last_page_read);
+       printk(KERN_ERR "\tread_in_a_row  %lu\n", ui->read_in_a_row);
+       printk(KERN_ERR "\tdata_len       %d\n", ui->data_len);
 
        if (!S_ISDIR(inode->i_mode))
                return;
 
-       printk(KERN_DEBUG "List of directory entries:\n");
+       printk(KERN_ERR "List of directory entries:\n");
        ubifs_assert(!mutex_is_locked(&c->tnc_mutex));
 
        lowest_dent_key(c, &key, inode->i_ino);
@@ -284,11 +284,11 @@ void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
                dent = ubifs_tnc_next_ent(c, &key, &nm);
                if (IS_ERR(dent)) {
                        if (PTR_ERR(dent) != -ENOENT)
-                               printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent));
+                               printk(KERN_ERR "error %ld\n", PTR_ERR(dent));
                        break;
                }
 
-               printk(KERN_DEBUG "\t%d: %s (%s)\n",
+               printk(KERN_ERR "\t%d: %s (%s)\n",
                       count++, dent->name, get_dent_type(dent->type));
 
                nm.name = dent->name;
@@ -312,8 +312,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
 
        /* If the magic is incorrect, just hexdump the first bytes */
        if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) {
-               printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ);
-               print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
+               printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ);
+               print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1,
                               (void *)node, UBIFS_CH_SZ, 1);
                return;
        }
@@ -326,7 +326,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
        {
                const struct ubifs_pad_node *pad = node;
 
-               printk(KERN_DEBUG "\tpad_len        %u\n",
+               printk(KERN_ERR "\tpad_len        %u\n",
                       le32_to_cpu(pad->pad_len));
                break;
        }
@@ -335,50 +335,50 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
                const struct ubifs_sb_node *sup = node;
                unsigned int sup_flags = le32_to_cpu(sup->flags);
 
-               printk(KERN_DEBUG "\tkey_hash       %d (%s)\n",
+               printk(KERN_ERR "\tkey_hash       %d (%s)\n",
                       (int)sup->key_hash, get_key_hash(sup->key_hash));
-               printk(KERN_DEBUG "\tkey_fmt        %d (%s)\n",
+               printk(KERN_ERR "\tkey_fmt        %d (%s)\n",
                       (int)sup->key_fmt, get_key_fmt(sup->key_fmt));
-               printk(KERN_DEBUG "\tflags          %#x\n", sup_flags);
-               printk(KERN_DEBUG "\t  big_lpt      %u\n",
+               printk(KERN_ERR "\tflags          %#x\n", sup_flags);
+               printk(KERN_ERR "\t  big_lpt      %u\n",
                       !!(sup_flags & UBIFS_FLG_BIGLPT));
-               printk(KERN_DEBUG "\t  space_fixup  %u\n",
+               printk(KERN_ERR "\t  space_fixup  %u\n",
                       !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
-               printk(KERN_DEBUG "\tmin_io_size    %u\n",
+               printk(KERN_ERR "\tmin_io_size    %u\n",
                       le32_to_cpu(sup->min_io_size));
-               printk(KERN_DEBUG "\tleb_size       %u\n",
+               printk(KERN_ERR "\tleb_size       %u\n",
                       le32_to_cpu(sup->leb_size));
-               printk(KERN_DEBUG "\tleb_cnt        %u\n",
+               printk(KERN_ERR "\tleb_cnt        %u\n",
                       le32_to_cpu(sup->leb_cnt));
-               printk(KERN_DEBUG "\tmax_leb_cnt    %u\n",
+               printk(KERN_ERR "\tmax_leb_cnt    %u\n",
                       le32_to_cpu(sup->max_leb_cnt));
-               printk(KERN_DEBUG "\tmax_bud_bytes  %llu\n",
+               printk(KERN_ERR "\tmax_bud_bytes  %llu\n",
                       (unsigned long long)le64_to_cpu(sup->max_bud_bytes));
-               printk(KERN_DEBUG "\tlog_lebs       %u\n",
+               printk(KERN_ERR "\tlog_lebs       %u\n",
                       le32_to_cpu(sup->log_lebs));
-               printk(KERN_DEBUG "\tlpt_lebs       %u\n",
+               printk(KERN_ERR "\tlpt_lebs       %u\n",
                       le32_to_cpu(sup->lpt_lebs));
-               printk(KERN_DEBUG "\torph_lebs      %u\n",
+               printk(KERN_ERR "\torph_lebs      %u\n",
                       le32_to_cpu(sup->orph_lebs));
-               printk(KERN_DEBUG "\tjhead_cnt      %u\n",
+               printk(KERN_ERR "\tjhead_cnt      %u\n",
                       le32_to_cpu(sup->jhead_cnt));
-               printk(KERN_DEBUG "\tfanout         %u\n",
+               printk(KERN_ERR "\tfanout         %u\n",
                       le32_to_cpu(sup->fanout));
-               printk(KERN_DEBUG "\tlsave_cnt      %u\n",
+               printk(KERN_ERR "\tlsave_cnt      %u\n",
                       le32_to_cpu(sup->lsave_cnt));
-               printk(KERN_DEBUG "\tdefault_compr  %u\n",
+               printk(KERN_ERR "\tdefault_compr  %u\n",
                       (int)le16_to_cpu(sup->default_compr));
-               printk(KERN_DEBUG "\trp_size        %llu\n",
+               printk(KERN_ERR "\trp_size        %llu\n",
                       (unsigned long long)le64_to_cpu(sup->rp_size));
-               printk(KERN_DEBUG "\trp_uid         %u\n",
+               printk(KERN_ERR "\trp_uid         %u\n",
                       le32_to_cpu(sup->rp_uid));
-               printk(KERN_DEBUG "\trp_gid         %u\n",
+               printk(KERN_ERR "\trp_gid         %u\n",
                       le32_to_cpu(sup->rp_gid));
-               printk(KERN_DEBUG "\tfmt_version    %u\n",
+               printk(KERN_ERR "\tfmt_version    %u\n",
                       le32_to_cpu(sup->fmt_version));
-               printk(KERN_DEBUG "\ttime_gran      %u\n",
+               printk(KERN_ERR "\ttime_gran      %u\n",
                       le32_to_cpu(sup->time_gran));
-               printk(KERN_DEBUG "\tUUID           %pUB\n",
+               printk(KERN_ERR "\tUUID           %pUB\n",
                       sup->uuid);
                break;
        }
@@ -386,61 +386,61 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
        {
                const struct ubifs_mst_node *mst = node;
 
-               printk(KERN_DEBUG "\thighest_inum   %llu\n",
+               printk(KERN_ERR "\thighest_inum   %llu\n",
                       (unsigned long long)le64_to_cpu(mst->highest_inum));
-               printk(KERN_DEBUG "\tcommit number  %llu\n",
+               printk(KERN_ERR "\tcommit number  %llu\n",
                       (unsigned long long)le64_to_cpu(mst->cmt_no));
-               printk(KERN_DEBUG "\tflags          %#x\n",
+               printk(KERN_ERR "\tflags          %#x\n",
                       le32_to_cpu(mst->flags));
-               printk(KERN_DEBUG "\tlog_lnum       %u\n",
+               printk(KERN_ERR "\tlog_lnum       %u\n",
                       le32_to_cpu(mst->log_lnum));
-               printk(KERN_DEBUG "\troot_lnum      %u\n",
+               printk(KERN_ERR "\troot_lnum      %u\n",
                       le32_to_cpu(mst->root_lnum));
-               printk(KERN_DEBUG "\troot_offs      %u\n",
+               printk(KERN_ERR "\troot_offs      %u\n",
                       le32_to_cpu(mst->root_offs));
-               printk(KERN_DEBUG "\troot_len       %u\n",
+               printk(KERN_ERR "\troot_len       %u\n",
                       le32_to_cpu(mst->root_len));
-               printk(KERN_DEBUG "\tgc_lnum        %u\n",
+               printk(KERN_ERR "\tgc_lnum        %u\n",
                       le32_to_cpu(mst->gc_lnum));
-               printk(KERN_DEBUG "\tihead_lnum     %u\n",
+               printk(KERN_ERR "\tihead_lnum     %u\n",
                       le32_to_cpu(mst->ihead_lnum));
-               printk(KERN_DEBUG "\tihead_offs     %u\n",
+               printk(KERN_ERR "\tihead_offs     %u\n",
                       le32_to_cpu(mst->ihead_offs));
-               printk(KERN_DEBUG "\tindex_size     %llu\n",
+               printk(KERN_ERR "\tindex_size     %llu\n",
                       (unsigned long long)le64_to_cpu(mst->index_size));
-               printk(KERN_DEBUG "\tlpt_lnum       %u\n",
+               printk(KERN_ERR "\tlpt_lnum       %u\n",
                       le32_to_cpu(mst->lpt_lnum));
-               printk(KERN_DEBUG "\tlpt_offs       %u\n",
+               printk(KERN_ERR "\tlpt_offs       %u\n",
                       le32_to_cpu(mst->lpt_offs));
-               printk(KERN_DEBUG "\tnhead_lnum     %u\n",
+               printk(KERN_ERR "\tnhead_lnum     %u\n",
                       le32_to_cpu(mst->nhead_lnum));
-               printk(KERN_DEBUG "\tnhead_offs     %u\n",
+               printk(KERN_ERR "\tnhead_offs     %u\n",
                       le32_to_cpu(mst->nhead_offs));
-               printk(KERN_DEBUG "\tltab_lnum      %u\n",
+               printk(KERN_ERR "\tltab_lnum      %u\n",
                       le32_to_cpu(mst->ltab_lnum));
-               printk(KERN_DEBUG "\tltab_offs      %u\n",
+               printk(KERN_ERR "\tltab_offs      %u\n",
                       le32_to_cpu(mst->ltab_offs));
-               printk(KERN_DEBUG "\tlsave_lnum     %u\n",
+               printk(KERN_ERR "\tlsave_lnum     %u\n",
                       le32_to_cpu(mst->lsave_lnum));
-               printk(KERN_DEBUG "\tlsave_offs     %u\n",
+               printk(KERN_ERR "\tlsave_offs     %u\n",
                       le32_to_cpu(mst->lsave_offs));
-               printk(KERN_DEBUG "\tlscan_lnum     %u\n",
+               printk(KERN_ERR "\tlscan_lnum     %u\n",
                       le32_to_cpu(mst->lscan_lnum));
-               printk(KERN_DEBUG "\tleb_cnt        %u\n",
+               printk(KERN_ERR "\tleb_cnt        %u\n",
                       le32_to_cpu(mst->leb_cnt));
-               printk(KERN_DEBUG "\tempty_lebs     %u\n",
+               printk(KERN_ERR "\tempty_lebs     %u\n",
                       le32_to_cpu(mst->empty_lebs));
-               printk(KERN_DEBUG "\tidx_lebs       %u\n",
+               printk(KERN_ERR "\tidx_lebs       %u\n",
                       le32_to_cpu(mst->idx_lebs));
-               printk(KERN_DEBUG "\ttotal_free     %llu\n",
+               printk(KERN_ERR "\ttotal_free     %llu\n",
                       (unsigned long long)le64_to_cpu(mst->total_free));
-               printk(KERN_DEBUG "\ttotal_dirty    %llu\n",
+               printk(KERN_ERR "\ttotal_dirty    %llu\n",
                       (unsigned long long)le64_to_cpu(mst->total_dirty));
-               printk(KERN_DEBUG "\ttotal_used     %llu\n",
+               printk(KERN_ERR "\ttotal_used     %llu\n",
                       (unsigned long long)le64_to_cpu(mst->total_used));
-               printk(KERN_DEBUG "\ttotal_dead     %llu\n",
+               printk(KERN_ERR "\ttotal_dead     %llu\n",
                       (unsigned long long)le64_to_cpu(mst->total_dead));
-               printk(KERN_DEBUG "\ttotal_dark     %llu\n",
+               printk(KERN_ERR "\ttotal_dark     %llu\n",
                       (unsigned long long)le64_to_cpu(mst->total_dark));
                break;
        }
@@ -448,11 +448,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
        {
                const struct ubifs_ref_node *ref = node;
 
-               printk(KERN_DEBUG "\tlnum           %u\n",
+               printk(KERN_ERR "\tlnum           %u\n",
                       le32_to_cpu(ref->lnum));
-               printk(KERN_DEBUG "\toffs           %u\n",
+               printk(KERN_ERR "\toffs           %u\n",
                       le32_to_cpu(ref->offs));
-               printk(KERN_DEBUG "\tjhead          %u\n",
+               printk(KERN_ERR "\tjhead          %u\n",
                       le32_to_cpu(ref->jhead));
                break;
        }
@@ -461,40 +461,40 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
                const struct ubifs_ino_node *ino = node;
 
                key_read(c, &ino->key, &key);
-               printk(KERN_DEBUG "\tkey            %s\n",
+               printk(KERN_ERR "\tkey            %s\n",
                       dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
-               printk(KERN_DEBUG "\tcreat_sqnum    %llu\n",
+               printk(KERN_ERR "\tcreat_sqnum    %llu\n",
                       (unsigned long long)le64_to_cpu(ino->creat_sqnum));
-               printk(KERN_DEBUG "\tsize           %llu\n",
+               printk(KERN_ERR "\tsize           %llu\n",
                       (unsigned long long)le64_to_cpu(ino->size));
-               printk(KERN_DEBUG "\tnlink          %u\n",
+               printk(KERN_ERR "\tnlink          %u\n",
                       le32_to_cpu(ino->nlink));
-               printk(KERN_DEBUG "\tatime          %lld.%u\n",
+               printk(KERN_ERR "\tatime          %lld.%u\n",
                       (long long)le64_to_cpu(ino->atime_sec),
                       le32_to_cpu(ino->atime_nsec));
-               printk(KERN_DEBUG "\tmtime          %lld.%u\n",
+               printk(KERN_ERR "\tmtime          %lld.%u\n",
                       (long long)le64_to_cpu(ino->mtime_sec),
                       le32_to_cpu(ino->mtime_nsec));
-               printk(KERN_DEBUG "\tctime          %lld.%u\n",
+               printk(KERN_ERR "\tctime          %lld.%u\n",
                       (long long)le64_to_cpu(ino->ctime_sec),
                       le32_to_cpu(ino->ctime_nsec));
-               printk(KERN_DEBUG "\tuid            %u\n",
+               printk(KERN_ERR "\tuid            %u\n",
                       le32_to_cpu(ino->uid));
-               printk(KERN_DEBUG "\tgid            %u\n",
+               printk(KERN_ERR "\tgid            %u\n",
                       le32_to_cpu(ino->gid));
-               printk(KERN_DEBUG "\tmode           %u\n",
+               printk(KERN_ERR "\tmode           %u\n",
                       le32_to_cpu(ino->mode));
-               printk(KERN_DEBUG "\tflags          %#x\n",
+               printk(KERN_ERR "\tflags          %#x\n",
                       le32_to_cpu(ino->flags));
-               printk(KERN_DEBUG "\txattr_cnt      %u\n",
+               printk(KERN_ERR "\txattr_cnt      %u\n",
                       le32_to_cpu(ino->xattr_cnt));
-               printk(KERN_DEBUG "\txattr_size     %u\n",
+               printk(KERN_ERR "\txattr_size     %u\n",
                       le32_to_cpu(ino->xattr_size));
-               printk(KERN_DEBUG "\txattr_names    %u\n",
+               printk(KERN_ERR "\txattr_names    %u\n",
                       le32_to_cpu(ino->xattr_names));
-               printk(KERN_DEBUG "\tcompr_type     %#x\n",
+               printk(KERN_ERR "\tcompr_type     %#x\n",
                       (int)le16_to_cpu(ino->compr_type));
-               printk(KERN_DEBUG "\tdata len       %u\n",
+               printk(KERN_ERR "\tdata len       %u\n",
                       le32_to_cpu(ino->data_len));
                break;
        }
@@ -505,16 +505,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
                int nlen = le16_to_cpu(dent->nlen);
 
                key_read(c, &dent->key, &key);
-               printk(KERN_DEBUG "\tkey            %s\n",
+               printk(KERN_ERR "\tkey            %s\n",
                       dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
-               printk(KERN_DEBUG "\tinum           %llu\n",
+               printk(KERN_ERR "\tinum           %llu\n",
                       (unsigned long long)le64_to_cpu(dent->inum));
-               printk(KERN_DEBUG "\ttype           %d\n", (int)dent->type);
-               printk(KERN_DEBUG "\tnlen           %d\n", nlen);
-               printk(KERN_DEBUG "\tname           ");
+               printk(KERN_ERR "\ttype           %d\n", (int)dent->type);
+               printk(KERN_ERR "\tnlen           %d\n", nlen);
+               printk(KERN_ERR "\tname           ");
 
                if (nlen > UBIFS_MAX_NLEN)
-                       printk(KERN_DEBUG "(bad name length, not printing, "
+                       printk(KERN_ERR "(bad name length, not printing, "
                                          "bad or corrupted node)");
                else {
                        for (i = 0; i < nlen && dent->name[i]; i++)
@@ -530,16 +530,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
                int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
 
                key_read(c, &dn->key, &key);
-               printk(KERN_DEBUG "\tkey            %s\n",
+               printk(KERN_ERR "\tkey            %s\n",
                       dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
-               printk(KERN_DEBUG "\tsize           %u\n",
+               printk(KERN_ERR "\tsize           %u\n",
                       le32_to_cpu(dn->size));
-               printk(KERN_DEBUG "\tcompr_typ      %d\n",
+               printk(KERN_ERR "\tcompr_typ      %d\n",
                       (int)le16_to_cpu(dn->compr_type));
-               printk(KERN_DEBUG "\tdata size      %d\n",
+               printk(KERN_ERR "\tdata size      %d\n",
                       dlen);
-               printk(KERN_DEBUG "\tdata:\n");
-               print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1,
+               printk(KERN_ERR "\tdata:\n");
+               print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
                               (void *)&dn->data, dlen, 0);
                break;
        }
@@ -547,11 +547,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
        {
                const struct ubifs_trun_node *trun = node;
 
-               printk(KERN_DEBUG "\tinum           %u\n",
+               printk(KERN_ERR "\tinum           %u\n",
                       le32_to_cpu(trun->inum));
-               printk(KERN_DEBUG "\told_size       %llu\n",
+               printk(KERN_ERR "\told_size       %llu\n",
                       (unsigned long long)le64_to_cpu(trun->old_size));
-               printk(KERN_DEBUG "\tnew_size       %llu\n",
+               printk(KERN_ERR "\tnew_size       %llu\n",
                       (unsigned long long)le64_to_cpu(trun->new_size));
                break;
        }
@@ -560,17 +560,17 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
                const struct ubifs_idx_node *idx = node;
 
                n = le16_to_cpu(idx->child_cnt);
-               printk(KERN_DEBUG "\tchild_cnt      %d\n", n);
-               printk(KERN_DEBUG "\tlevel          %d\n",
+               printk(KERN_ERR "\tchild_cnt      %d\n", n);
+               printk(KERN_ERR "\tlevel          %d\n",
                       (int)le16_to_cpu(idx->level));
-               printk(KERN_DEBUG "\tBranches:\n");
+               printk(KERN_ERR "\tBranches:\n");
 
                for (i = 0; i < n && i < c->fanout - 1; i++) {
                        const struct ubifs_branch *br;
 
                        br = ubifs_idx_branch(c, idx, i);
                        key_read(c, &br->key, &key);
-                       printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n",
+                       printk(KERN_ERR "\t%d: LEB %d:%d len %d key %s\n",
                               i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs),
                               le32_to_cpu(br->len),
                               dbg_snprintf_key(c, &key, key_buf,
@@ -584,20 +584,20 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
        {
                const struct ubifs_orph_node *orph = node;
 
-               printk(KERN_DEBUG "\tcommit number  %llu\n",
+               printk(KERN_ERR "\tcommit number  %llu\n",
                       (unsigned long long)
                                le64_to_cpu(orph->cmt_no) & LLONG_MAX);
-               printk(KERN_DEBUG "\tlast node flag %llu\n",
+               printk(KERN_ERR "\tlast node flag %llu\n",
                       (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63);
                n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3;
-               printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n);
+               printk(KERN_ERR "\t%d orphan inode numbers:\n", n);
                for (i = 0; i < n; i++)
-                       printk(KERN_DEBUG "\t  ino %llu\n",
+                       printk(KERN_ERR "\t  ino %llu\n",
                               (unsigned long long)le64_to_cpu(orph->inos[i]));
                break;
        }
        default:
-               printk(KERN_DEBUG "node type %d was not recognized\n",
+               printk(KERN_ERR "node type %d was not recognized\n",
                       (int)ch->node_type);
        }
        spin_unlock(&dbg_lock);
@@ -606,16 +606,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
 void dbg_dump_budget_req(const struct ubifs_budget_req *req)
 {
        spin_lock(&dbg_lock);
-       printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n",
+       printk(KERN_ERR "Budgeting request: new_ino %d, dirtied_ino %d\n",
               req->new_ino, req->dirtied_ino);
-       printk(KERN_DEBUG "\tnew_ino_d   %d, dirtied_ino_d %d\n",
+       printk(KERN_ERR "\tnew_ino_d   %d, dirtied_ino_d %d\n",
               req->new_ino_d, req->dirtied_ino_d);
-       printk(KERN_DEBUG "\tnew_page    %d, dirtied_page %d\n",
+       printk(KERN_ERR "\tnew_page    %d, dirtied_page %d\n",
               req->new_page, req->dirtied_page);
-       printk(KERN_DEBUG "\tnew_dent    %d, mod_dent     %d\n",
+       printk(KERN_ERR "\tnew_dent    %d, mod_dent     %d\n",
               req->new_dent, req->mod_dent);
-       printk(KERN_DEBUG "\tidx_growth  %d\n", req->idx_growth);
-       printk(KERN_DEBUG "\tdata_growth %d dd_growth     %d\n",
+       printk(KERN_ERR "\tidx_growth  %d\n", req->idx_growth);
+       printk(KERN_ERR "\tdata_growth %d dd_growth     %d\n",
               req->data_growth, req->dd_growth);
        spin_unlock(&dbg_lock);
 }
@@ -623,12 +623,12 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req)
 void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
 {
        spin_lock(&dbg_lock);
-       printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, "
+       printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, "
               "idx_lebs  %d\n", current->pid, lst->empty_lebs, lst->idx_lebs);
-       printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, "
+       printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, "
               "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free,
               lst->total_dirty);
-       printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, "
+       printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, "
               "total_dead %lld\n", lst->total_used, lst->total_dark,
               lst->total_dead);
        spin_unlock(&dbg_lock);
@@ -644,21 +644,21 @@ void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
 
        spin_lock(&c->space_lock);
        spin_lock(&dbg_lock);
-       printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, "
+       printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, "
               "total budget sum %lld\n", current->pid,
               bi->data_growth + bi->dd_growth,
               bi->data_growth + bi->dd_growth + bi->idx_growth);
-       printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, "
+       printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, "
               "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
               bi->idx_growth);
-       printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, "
+       printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, "
               "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
               bi->uncommitted_idx);
-       printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
+       printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
               bi->page_budget, bi->inode_budget, bi->dent_budget);
-       printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n",
+       printk(KERN_ERR "\tnospace %u, nospace_rp %u\n",
               bi->nospace, bi->nospace_rp);
-       printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
+       printk(KERN_ERR "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
               c->dark_wm, c->dead_wm, c->max_idx_node_sz);
 
        if (bi != &c->bi)
@@ -669,38 +669,38 @@ void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
                 */
                goto out_unlock;
 
-       printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
+       printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
               c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
-       printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
+       printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
               "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
               atomic_long_read(&c->dirty_zn_cnt),
               atomic_long_read(&c->clean_zn_cnt));
-       printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
+       printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n",
               c->gc_lnum, c->ihead_lnum);
 
        /* If we are in R/O mode, journal heads do not exist */
        if (c->jheads)
                for (i = 0; i < c->jhead_cnt; i++)
-                       printk(KERN_DEBUG "\tjhead %s\t LEB %d\n",
+                       printk(KERN_ERR "\tjhead %s\t LEB %d\n",
                               dbg_jhead(c->jheads[i].wbuf.jhead),
                               c->jheads[i].wbuf.lnum);
        for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
                bud = rb_entry(rb, struct ubifs_bud, rb);
-               printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum);
+               printk(KERN_ERR "\tbud LEB %d\n", bud->lnum);
        }
        list_for_each_entry(bud, &c->old_buds, list)
-               printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum);
+               printk(KERN_ERR "\told bud LEB %d\n", bud->lnum);
        list_for_each_entry(idx_gc, &c->idx_gc, list)
-               printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n",
+               printk(KERN_ERR "\tGC'ed idx LEB %d unmap %d\n",
                       idx_gc->lnum, idx_gc->unmap);
-       printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
+       printk(KERN_ERR "\tcommit state %d\n", c->cmt_state);
 
        /* Print budgeting predictions */
        available = ubifs_calc_available(c, c->bi.min_idx_lebs);
        outstanding = c->bi.data_growth + c->bi.dd_growth;
        free = ubifs_get_free_space_nolock(c);
-       printk(KERN_DEBUG "Budgeting predictions:\n");
-       printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
+       printk(KERN_ERR "Budgeting predictions:\n");
+       printk(KERN_ERR "\tavailable: %lld, outstanding %lld, free %lld\n",
               available, outstanding, free);
 out_unlock:
        spin_unlock(&dbg_lock);
@@ -720,11 +720,11 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
                dark = ubifs_calc_dark(c, spc);
 
        if (lp->flags & LPROPS_INDEX)
-               printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d "
+               printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d "
                       "free + dirty %-8d flags %#x (", lp->lnum, lp->free,
                       lp->dirty, c->leb_size - spc, spc, lp->flags);
        else
-               printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d "
+               printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d "
                       "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d "
                       "flags %#-4x (", lp->lnum, lp->free, lp->dirty,
                       c->leb_size - spc, spc, dark, dead,
@@ -807,7 +807,7 @@ void dbg_dump_lprops(struct ubifs_info *c)
        struct ubifs_lprops lp;
        struct ubifs_lp_stats lst;
 
-       printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n",
+       printk(KERN_ERR "(pid %d) start dumping LEB properties\n",
               current->pid);
        ubifs_get_lp_stats(c, &lst);
        dbg_dump_lstats(&lst);
@@ -819,7 +819,7 @@ void dbg_dump_lprops(struct ubifs_info *c)
 
                dbg_dump_lprop(c, &lp);
        }
-       printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n",
+       printk(KERN_ERR "(pid %d) finish dumping LEB properties\n",
               current->pid);
 }
 
@@ -828,35 +828,35 @@ void dbg_dump_lpt_info(struct ubifs_info *c)
        int i;
 
        spin_lock(&dbg_lock);
-       printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid);
-       printk(KERN_DEBUG "\tlpt_sz:        %lld\n", c->lpt_sz);
-       printk(KERN_DEBUG "\tpnode_sz:      %d\n", c->pnode_sz);
-       printk(KERN_DEBUG "\tnnode_sz:      %d\n", c->nnode_sz);
-       printk(KERN_DEBUG "\tltab_sz:       %d\n", c->ltab_sz);
-       printk(KERN_DEBUG "\tlsave_sz:      %d\n", c->lsave_sz);
-       printk(KERN_DEBUG "\tbig_lpt:       %d\n", c->big_lpt);
-       printk(KERN_DEBUG "\tlpt_hght:      %d\n", c->lpt_hght);
-       printk(KERN_DEBUG "\tpnode_cnt:     %d\n", c->pnode_cnt);
-       printk(KERN_DEBUG "\tnnode_cnt:     %d\n", c->nnode_cnt);
-       printk(KERN_DEBUG "\tdirty_pn_cnt:  %d\n", c->dirty_pn_cnt);
-       printk(KERN_DEBUG "\tdirty_nn_cnt:  %d\n", c->dirty_nn_cnt);
-       printk(KERN_DEBUG "\tlsave_cnt:     %d\n", c->lsave_cnt);
-       printk(KERN_DEBUG "\tspace_bits:    %d\n", c->space_bits);
-       printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits);
-       printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits);
-       printk(KERN_DEBUG "\tlpt_spc_bits:  %d\n", c->lpt_spc_bits);
-       printk(KERN_DEBUG "\tpcnt_bits:     %d\n", c->pcnt_bits);
-       printk(KERN_DEBUG "\tlnum_bits:     %d\n", c->lnum_bits);
-       printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
-       printk(KERN_DEBUG "\tLPT head is at %d:%d\n",
+       printk(KERN_ERR "(pid %d) dumping LPT information\n", current->pid);
+       printk(KERN_ERR "\tlpt_sz:        %lld\n", c->lpt_sz);
+       printk(KERN_ERR "\tpnode_sz:      %d\n", c->pnode_sz);
+       printk(KERN_ERR "\tnnode_sz:      %d\n", c->nnode_sz);
+       printk(KERN_ERR "\tltab_sz:       %d\n", c->ltab_sz);
+       printk(KERN_ERR "\tlsave_sz:      %d\n", c->lsave_sz);
+       printk(KERN_ERR "\tbig_lpt:       %d\n", c->big_lpt);
+       printk(KERN_ERR "\tlpt_hght:      %d\n", c->lpt_hght);
+       printk(KERN_ERR "\tpnode_cnt:     %d\n", c->pnode_cnt);
+       printk(KERN_ERR "\tnnode_cnt:     %d\n", c->nnode_cnt);
+       printk(KERN_ERR "\tdirty_pn_cnt:  %d\n", c->dirty_pn_cnt);
+       printk(KERN_ERR "\tdirty_nn_cnt:  %d\n", c->dirty_nn_cnt);
+       printk(KERN_ERR "\tlsave_cnt:     %d\n", c->lsave_cnt);
+       printk(KERN_ERR "\tspace_bits:    %d\n", c->space_bits);
+       printk(KERN_ERR "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits);
+       printk(KERN_ERR "\tlpt_offs_bits: %d\n", c->lpt_offs_bits);
+       printk(KERN_ERR "\tlpt_spc_bits:  %d\n", c->lpt_spc_bits);
+       printk(KERN_ERR "\tpcnt_bits:     %d\n", c->pcnt_bits);
+       printk(KERN_ERR "\tlnum_bits:     %d\n", c->lnum_bits);
+       printk(KERN_ERR "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
+       printk(KERN_ERR "\tLPT head is at %d:%d\n",
               c->nhead_lnum, c->nhead_offs);
-       printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n",
+       printk(KERN_ERR "\tLPT ltab is at %d:%d\n",
               c->ltab_lnum, c->ltab_offs);
        if (c->big_lpt)
-               printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n",
+               printk(KERN_ERR "\tLPT lsave is at %d:%d\n",
                       c->lsave_lnum, c->lsave_offs);
        for (i = 0; i < c->lpt_lebs; i++)
-               printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d "
+               printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d "
                       "cmt %d\n", i + c->lpt_first, c->ltab[i].free,
                       c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt);
        spin_unlock(&dbg_lock);
@@ -867,12 +867,12 @@ void dbg_dump_sleb(const struct ubifs_info *c,
 {
        struct ubifs_scan_node *snod;
 
-       printk(KERN_DEBUG "(pid %d) start dumping scanned data from LEB %d:%d\n",
+       printk(KERN_ERR "(pid %d) start dumping scanned data from LEB %d:%d\n",
               current->pid, sleb->lnum, offs);
 
        list_for_each_entry(snod, &sleb->nodes, list) {
                cond_resched();
-               printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", sleb->lnum,
+               printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", sleb->lnum,
                       snod->offs, snod->len);
                dbg_dump_node(c, snod->node);
        }
@@ -887,7 +887,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
        if (dbg_is_tst_rcvry(c))
                return;
 
-       printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
+       printk(KERN_ERR "(pid %d) start dumping LEB %d\n",
               current->pid, lnum);
 
        buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
@@ -902,17 +902,17 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
                goto out;
        }
 
-       printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum,
+       printk(KERN_ERR "LEB %d has %d nodes ending at %d\n", lnum,
               sleb->nodes_cnt, sleb->endpt);
 
        list_for_each_entry(snod, &sleb->nodes, list) {
                cond_resched();
-               printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum,
+               printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", lnum,
                       snod->offs, snod->len);
                dbg_dump_node(c, snod->node);
        }
 
-       printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
+       printk(KERN_ERR "(pid %d) finish dumping LEB %d\n",
               current->pid, lnum);
        ubifs_scan_destroy(sleb);
 
@@ -934,7 +934,7 @@ void dbg_dump_znode(const struct ubifs_info *c,
        else
                zbr = &c->zroot;
 
-       printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d"
+       printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d"
               " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs,
               zbr->len, znode->parent, znode->iip, znode->level,
               znode->child_cnt, znode->flags);
@@ -944,18 +944,18 @@ void dbg_dump_znode(const struct ubifs_info *c,
                return;
        }
 
-       printk(KERN_DEBUG "zbranches:\n");
+       printk(KERN_ERR "zbranches:\n");
        for (n = 0; n < znode->child_cnt; n++) {
                zbr = &znode->zbranch[n];
                if (znode->level > 0)
-                       printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key "
+                       printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key "
                                          "%s\n", n, zbr->znode, zbr->lnum,
                                          zbr->offs, zbr->len,
                                          dbg_snprintf_key(c, &zbr->key,
                                                           key_buf,
                                                           DBG_KEY_BUF_LEN));
                else
-                       printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key "
+                       printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key "
                                          "%s\n", n, zbr->znode, zbr->lnum,
                                          zbr->offs, zbr->len,
                                          dbg_snprintf_key(c, &zbr->key,
@@ -969,16 +969,16 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
 {
        int i;
 
-       printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n",
+       printk(KERN_ERR "(pid %d) start dumping heap cat %d (%d elements)\n",
               current->pid, cat, heap->cnt);
        for (i = 0; i < heap->cnt; i++) {
                struct ubifs_lprops *lprops = heap->arr[i];
 
-               printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d "
+               printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d "
                       "flags %d\n", i, lprops->lnum, lprops->hpos,
                       lprops->free, lprops->dirty, lprops->flags);
        }
-       printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid);
+       printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid);
 }
 
 void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
@@ -986,15 +986,15 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
 {
        int i;
 
-       printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid);
-       printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n",
+       printk(KERN_ERR "(pid %d) dumping pnode:\n", current->pid);
+       printk(KERN_ERR "\taddress %zx parent %zx cnext %zx\n",
               (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
-       printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n",
+       printk(KERN_ERR "\tflags %lu iip %d level %d num %d\n",
               pnode->flags, iip, pnode->level, pnode->num);
        for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
                struct ubifs_lprops *lp = &pnode->lprops[i];
 
-               printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n",
+               printk(KERN_ERR "\t%d: free %d dirty %d flags %d lnum %d\n",
                       i, lp->free, lp->dirty, lp->flags, lp->lnum);
        }
 }
@@ -1004,20 +1004,20 @@ void dbg_dump_tnc(struct ubifs_info *c)
        struct ubifs_znode *znode;
        int level;
 
-       printk(KERN_DEBUG "\n");
-       printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid);
+       printk(KERN_ERR "\n");
+       printk(KERN_ERR "(pid %d) start dumping TNC tree\n", current->pid);
        znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
        level = znode->level;
-       printk(KERN_DEBUG "== Level %d ==\n", level);
+       printk(KERN_ERR "== Level %d ==\n", level);
        while (znode) {
                if (level != znode->level) {
                        level = znode->level;
-                       printk(KERN_DEBUG "== Level %d ==\n", level);
+                       printk(KERN_ERR "== Level %d ==\n", level);
                }
                dbg_dump_znode(c, znode);
                znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
        }
-       printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid);
+       printk(KERN_ERR "(pid %d) finish dumping TNC tree\n", current->pid);
 }
 
 static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode,
index ad1a6fee6010ba9d17c9e2e0e8616734a9960b04..9f717655df18a3e908aff028b3f3c7cae36ef94e 100644 (file)
@@ -164,9 +164,7 @@ struct ubifs_global_debug_info {
 #define dbg_dump_stack() dump_stack()
 
 #define dbg_err(fmt, ...) do {                                                 \
-       spin_lock(&dbg_lock);                                                  \
        ubifs_err(fmt, ##__VA_ARGS__);                                         \
-       spin_unlock(&dbg_lock);                                                \
 } while (0)
 
 #define ubifs_dbg_msg(type, fmt, ...) \
@@ -217,7 +215,6 @@ struct ubifs_global_debug_info {
 /* Additional recovery messages */
 #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
 
-extern spinlock_t dbg_lock;
 extern struct ubifs_global_debug_info ubifs_dbg;
 
 static inline int dbg_is_chk_gen(const struct ubifs_info *c)
index d6fe1c79f18b2fd8376728df1c55628d3814ab36..ec9f1870ab7f083d1a9c5084ad3cbf8110259754 100644 (file)
@@ -566,6 +566,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
        int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
        int err, budgeted = 1;
        struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
+       unsigned int saved_nlink = inode->i_nlink;
 
        /*
         * Budget request settings: deletion direntry, deletion inode (+1 for
@@ -613,7 +614,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
 out_cancel:
        dir->i_size += sz_change;
        dir_ui->ui_size = dir->i_size;
-       inc_nlink(inode);
+       set_nlink(inode, saved_nlink);
        unlock_2_inodes(dir, inode);
        if (budgeted)
                ubifs_release_budget(c, &req);
@@ -704,8 +705,7 @@ out_cancel:
        dir->i_size += sz_change;
        dir_ui->ui_size = dir->i_size;
        inc_nlink(dir);
-       inc_nlink(inode);
-       inc_nlink(inode);
+       set_nlink(inode, 2);
        unlock_2_inodes(dir, inode);
        if (budgeted)
                ubifs_release_budget(c, &req);
@@ -977,6 +977,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
                        .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
        struct timespec time;
+       unsigned int saved_nlink;
 
        /*
         * Budget request settings: deletion direntry, new direntry, removing
@@ -1059,13 +1060,14 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (unlink) {
                /*
                 * Directories cannot have hard-links, so if this is a
-                * directory, decrement its @i_nlink twice because an empty
-                * directory has @i_nlink 2.
+                * directory, just clear @i_nlink.
                 */
+               saved_nlink = new_inode->i_nlink;
                if (is_dir)
+                       clear_nlink(new_inode);
+               else
                        drop_nlink(new_inode);
                new_inode->i_ctime = time;
-               drop_nlink(new_inode);
        } else {
                new_dir->i_size += new_sz;
                ubifs_inode(new_dir)->ui_size = new_dir->i_size;
@@ -1102,9 +1104,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 out_cancel:
        if (unlink) {
-               if (is_dir)
-                       inc_nlink(new_inode);
-               inc_nlink(new_inode);
+               set_nlink(new_inode, saved_nlink);
        } else {
                new_dir->i_size -= new_sz;
                ubifs_inode(new_dir)->ui_size = new_dir->i_size;
index ee4f43f4bb998d2bc20ac1f96437e926e2f2e490..2a935b317232b6ff87e408143c410ca3b745501f 100644 (file)
@@ -679,7 +679,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
                           ret == SCANNED_GARBAGE     ||
                           ret == SCANNED_A_BAD_PAD_NODE ||
                           ret == SCANNED_A_CORRUPT_NODE) {
-                       dbg_rcvry("found corruption - %d", ret);
+                       dbg_rcvry("found corruption (%d) at %d:%d",
+                                 ret, lnum, offs);
                        break;
                } else {
                        dbg_err("unexpected return value %d", ret);
index 6094c5a5d7a8de597e3905b9c4941f458cc999f3..771f7fb6ce92a32b038bd63e1e1fca540493ab60 100644 (file)
@@ -410,13 +410,23 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
        }
 
        if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
-               err = 7;
+               ubifs_err("too few main LEBs count %d, must be at least %d",
+                         c->main_lebs, UBIFS_MIN_MAIN_LEBS);
                goto failed;
        }
 
-       if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS ||
-           c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) {
-               err = 8;
+       max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS;
+       if (c->max_bud_bytes < max_bytes) {
+               ubifs_err("too small journal (%lld bytes), must be at least "
+                         "%lld bytes",  c->max_bud_bytes, max_bytes);
+               goto failed;
+       }
+
+       max_bytes = (long long)c->leb_size * c->main_lebs;
+       if (c->max_bud_bytes > max_bytes) {
+               ubifs_err("too large journal size (%lld bytes), only %lld bytes"
+                         "available in the main area",
+                         c->max_bud_bytes, max_bytes);
                goto failed;
        }
 
@@ -450,7 +460,6 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
                goto failed;
        }
 
-       max_bytes = c->main_lebs * (long long)c->leb_size;
        if (c->rp_size < 0 || max_bytes < c->rp_size) {
                err = 14;
                goto failed;
index 12e94774aa88b7ebac85ac427d658e648d402063..93d59aceaaef99b454107b8f70d73c41cecf8a20 100644 (file)
@@ -84,9 +84,6 @@
 #define INUM_WARN_WATERMARK 0xFFF00000
 #define INUM_WATERMARK      0xFFFFFF00
 
-/* Largest key size supported in this implementation */
-#define CUR_MAX_KEY_LEN UBIFS_SK_LEN
-
 /* Maximum number of entries in each LPT (LEB category) heap */
 #define LPT_HEAP_SZ 256
 
@@ -277,10 +274,10 @@ struct ubifs_old_idx {
 
 /* The below union makes it easier to deal with keys */
 union ubifs_key {
-       uint8_t u8[CUR_MAX_KEY_LEN];
-       uint32_t u32[CUR_MAX_KEY_LEN/4];
-       uint64_t u64[CUR_MAX_KEY_LEN/8];
-       __le32 j32[CUR_MAX_KEY_LEN/4];
+       uint8_t u8[UBIFS_SK_LEN];
+       uint32_t u32[UBIFS_SK_LEN/4];
+       uint64_t u64[UBIFS_SK_LEN/8];
+       __le32 j32[UBIFS_SK_LEN/4];
 };
 
 /**
index 427a4e82a588759dbfb49394f73eca9400d455e7..0a9977983f92b358989f3cb01e81cb0e31db27e7 100644 (file)
@@ -96,9 +96,6 @@ xfs-$(CONFIG_XFS_QUOTA)               += xfs_dquot.o \
                                   xfs_qm_bhv.o \
                                   xfs_qm.o \
                                   xfs_quotaops.o
-ifeq ($(CONFIG_XFS_QUOTA),y)
-xfs-$(CONFIG_PROC_FS)          += xfs_qm_stats.o
-endif
 xfs-$(CONFIG_XFS_RT)           += xfs_rtalloc.o
 xfs-$(CONFIG_XFS_POSIX_ACL)    += xfs_acl.o
 xfs-$(CONFIG_PROC_FS)          += xfs_stats.o
index 74b9baf36ac39038f827c8e262ea62aa33d81de8..0dbb9e70fe21664740bc90721f4ae7bb5d3d190b 100644 (file)
@@ -26,6 +26,7 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
@@ -98,23 +99,6 @@ xfs_destroy_ioend(
        mempool_free(ioend, xfs_ioend_pool);
 }
 
-/*
- * If the end of the current ioend is beyond the current EOF,
- * return the new EOF value, otherwise zero.
- */
-STATIC xfs_fsize_t
-xfs_ioend_new_eof(
-       xfs_ioend_t             *ioend)
-{
-       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
-       xfs_fsize_t             isize;
-       xfs_fsize_t             bsize;
-
-       bsize = ioend->io_offset + ioend->io_size;
-       isize = MIN(i_size_read(VFS_I(ip)), bsize);
-       return isize > ip->i_d.di_size ? isize : 0;
-}
-
 /*
  * Fast and loose check if this write could update the on-disk inode size.
  */
@@ -124,32 +108,65 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
                XFS_I(ioend->io_inode)->i_d.di_size;
 }
 
+STATIC int
+xfs_setfilesize_trans_alloc(
+       struct xfs_ioend        *ioend)
+{
+       struct xfs_mount        *mp = XFS_I(ioend->io_inode)->i_mount;
+       struct xfs_trans        *tp;
+       int                     error;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+
+       error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+
+       ioend->io_append_trans = tp;
+
+       /*
+        * We hand off the transaction to the completion thread now, so
+        * clear the flag here.
+        */
+       current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+       return 0;
+}
+
 /*
  * Update on-disk file size now that data has been written to disk.
- *
- * This function does not block as blocking on the inode lock in IO completion
- * can lead to IO completion order dependency deadlocks.. If it can't get the
- * inode ilock it will return EAGAIN. Callers must handle this.
  */
 STATIC int
 xfs_setfilesize(
-       xfs_ioend_t             *ioend)
+       struct xfs_ioend        *ioend)
 {
-       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
+       struct xfs_inode        *ip = XFS_I(ioend->io_inode);
+       struct xfs_trans        *tp = ioend->io_append_trans;
        xfs_fsize_t             isize;
 
-       if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
-               return EAGAIN;
+       /*
+        * The transaction was allocated in the I/O submission thread,
+        * thus we need to mark ourselves as beeing in a transaction
+        * manually.
+        */
+       current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
 
-       isize = xfs_ioend_new_eof(ioend);
-       if (isize) {
-               trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
-               ip->i_d.di_size = isize;
-               xfs_mark_inode_dirty(ip);
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
+       if (!isize) {
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+               xfs_trans_cancel(tp, 0);
+               return 0;
        }
 
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       return 0;
+       trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
+
+       ip->i_d.di_size = isize;
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       return xfs_trans_commit(tp, 0);
 }
 
 /*
@@ -163,10 +180,12 @@ xfs_finish_ioend(
        struct xfs_ioend        *ioend)
 {
        if (atomic_dec_and_test(&ioend->io_remaining)) {
+               struct xfs_mount        *mp = XFS_I(ioend->io_inode)->i_mount;
+
                if (ioend->io_type == IO_UNWRITTEN)
-                       queue_work(xfsconvertd_workqueue, &ioend->io_work);
-               else if (xfs_ioend_is_append(ioend))
-                       queue_work(xfsdatad_workqueue, &ioend->io_work);
+                       queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
+               else if (ioend->io_append_trans)
+                       queue_work(mp->m_data_workqueue, &ioend->io_work);
                else
                        xfs_destroy_ioend(ioend);
        }
@@ -195,35 +214,36 @@ xfs_end_io(
         * range to normal written extens after the data I/O has finished.
         */
        if (ioend->io_type == IO_UNWRITTEN) {
+               /*
+                * For buffered I/O we never preallocate a transaction when
+                * doing the unwritten extent conversion, but for direct I/O
+                * we do not know if we are converting an unwritten extent
+                * or not at the point where we preallocate the transaction.
+                */
+               if (ioend->io_append_trans) {
+                       ASSERT(ioend->io_isdirect);
+
+                       current_set_flags_nested(
+                               &ioend->io_append_trans->t_pflags, PF_FSTRANS);
+                       xfs_trans_cancel(ioend->io_append_trans, 0);
+               }
+
                error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
                                                 ioend->io_size);
                if (error) {
                        ioend->io_error = -error;
                        goto done;
                }
+       } else if (ioend->io_append_trans) {
+               error = xfs_setfilesize(ioend);
+               if (error)
+                       ioend->io_error = -error;
+       } else {
+               ASSERT(!xfs_ioend_is_append(ioend));
        }
 
-       /*
-        * We might have to update the on-disk file size after extending
-        * writes.
-        */
-       error = xfs_setfilesize(ioend);
-       ASSERT(!error || error == EAGAIN);
-
 done:
-       /*
-        * If we didn't complete processing of the ioend, requeue it to the
-        * tail of the workqueue for another attempt later. Otherwise destroy
-        * it.
-        */
-       if (error == EAGAIN) {
-               atomic_inc(&ioend->io_remaining);
-               xfs_finish_ioend(ioend);
-               /* ensure we don't spin on blocked ioends */
-               delay(1);
-       } else {
-               xfs_destroy_ioend(ioend);
-       }
+       xfs_destroy_ioend(ioend);
 }
 
 /*
@@ -259,6 +279,7 @@ xfs_alloc_ioend(
         */
        atomic_set(&ioend->io_remaining, 1);
        ioend->io_isasync = 0;
+       ioend->io_isdirect = 0;
        ioend->io_error = 0;
        ioend->io_list = NULL;
        ioend->io_type = type;
@@ -269,6 +290,7 @@ xfs_alloc_ioend(
        ioend->io_size = 0;
        ioend->io_iocb = NULL;
        ioend->io_result = 0;
+       ioend->io_append_trans = NULL;
 
        INIT_WORK(&ioend->io_work, xfs_end_io);
        return ioend;
@@ -379,14 +401,6 @@ xfs_submit_ioend_bio(
        atomic_inc(&ioend->io_remaining);
        bio->bi_private = ioend;
        bio->bi_end_io = xfs_end_bio;
-
-       /*
-        * If the I/O is beyond EOF we mark the inode dirty immediately
-        * but don't update the inode size until I/O completion.
-        */
-       if (xfs_ioend_new_eof(ioend))
-               xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
-
        submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
 }
 
@@ -1033,8 +1047,20 @@ xfs_vm_writepage(
                                  wbc, end_index);
        }
 
-       if (iohead)
+       if (iohead) {
+               /*
+                * Reserve log space if we might write beyond the on-disk
+                * inode size.
+                */
+               if (ioend->io_type != IO_UNWRITTEN &&
+                   xfs_ioend_is_append(ioend)) {
+                       err = xfs_setfilesize_trans_alloc(ioend);
+                       if (err)
+                               goto error;
+               }
+
                xfs_submit_ioend(wbc, iohead);
+       }
 
        return 0;
 
@@ -1314,17 +1340,32 @@ xfs_vm_direct_IO(
 {
        struct inode            *inode = iocb->ki_filp->f_mapping->host;
        struct block_device     *bdev = xfs_find_bdev_for_inode(inode);
+       struct xfs_ioend        *ioend = NULL;
        ssize_t                 ret;
 
        if (rw & WRITE) {
-               iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
+               size_t size = iov_length(iov, nr_segs);
+
+               /*
+                * We need to preallocate a transaction for a size update
+                * here.  In the case that this write both updates the size
+                * and converts at least on unwritten extent we will cancel
+                * the still clean transaction after the I/O has finished.
+                */
+               iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
+               if (offset + size > XFS_I(inode)->i_d.di_size) {
+                       ret = xfs_setfilesize_trans_alloc(ioend);
+                       if (ret)
+                               goto out_destroy_ioend;
+                       ioend->io_isdirect = 1;
+               }
 
                ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
                                            offset, nr_segs,
                                            xfs_get_blocks_direct,
                                            xfs_end_io_direct_write, NULL, 0);
                if (ret != -EIOCBQUEUED && iocb->private)
-                       xfs_destroy_ioend(iocb->private);
+                       goto out_trans_cancel;
        } else {
                ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
                                            offset, nr_segs,
@@ -1333,6 +1374,16 @@ xfs_vm_direct_IO(
        }
 
        return ret;
+
+out_trans_cancel:
+       if (ioend->io_append_trans) {
+               current_set_flags_nested(&ioend->io_append_trans->t_pflags,
+                                        PF_FSTRANS);
+               xfs_trans_cancel(ioend->io_append_trans, 0);
+       }
+out_destroy_ioend:
+       xfs_destroy_ioend(ioend);
+       return ret;
 }
 
 STATIC void
index 116dd5c370346eb118baa34ce19d8af8ef195b7e..84eafbcb0d9dd65cecd57ef9074eb66b89bb9c08 100644 (file)
@@ -18,8 +18,6 @@
 #ifndef __XFS_AOPS_H__
 #define __XFS_AOPS_H__
 
-extern struct workqueue_struct *xfsdatad_workqueue;
-extern struct workqueue_struct *xfsconvertd_workqueue;
 extern mempool_t *xfs_ioend_pool;
 
 /*
@@ -48,12 +46,14 @@ typedef struct xfs_ioend {
        int                     io_error;       /* I/O error code */
        atomic_t                io_remaining;   /* hold count */
        unsigned int            io_isasync : 1; /* needs aio_complete */
+       unsigned int            io_isdirect : 1;/* direct I/O */
        struct inode            *io_inode;      /* file being written to */
        struct buffer_head      *io_buffer_head;/* buffer linked list head */
        struct buffer_head      *io_buffer_tail;/* buffer linked list tail */
        size_t                  io_size;        /* size of the extent */
        xfs_off_t               io_offset;      /* offset in the file */
        struct work_struct      io_work;        /* xfsdatad work queue */
+       struct xfs_trans        *io_append_trans;/* xact. for size update */
        struct kiocb            *io_iocb;
        int                     io_result;
 } xfs_ioend_t;
index 188ef2fbd62880614a29ea0432e20707d5cf45a2..3548c6f75593d1d1f3acd949ea2677f072f19e2d 100644 (file)
@@ -5536,8 +5536,12 @@ xfs_getbmap(
        if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
                return XFS_ERROR(ENOMEM);
        out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
-       if (!out)
-               return XFS_ERROR(ENOMEM);
+       if (!out) {
+               out = kmem_zalloc_large(bmv->bmv_count *
+                                       sizeof(struct getbmapx));
+               if (!out)
+                       return XFS_ERROR(ENOMEM);
+       }
 
        xfs_ilock(ip, XFS_IOLOCK_SHARED);
        if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
@@ -5661,7 +5665,10 @@ xfs_getbmap(
                        break;
        }
 
-       kmem_free(out);
+       if (is_vmalloc_addr(out))
+               kmem_free_large(out);
+       else
+               kmem_free(out);
        return error;
 }
 
index 4dff85c7d7eb1feda7999ff05f9941f6c42077ff..6819b5163e337f0762351d59f408c0c4156c2544 100644 (file)
@@ -45,8 +45,6 @@ static kmem_zone_t *xfs_buf_zone;
 STATIC int xfsbufd(void *);
 
 static struct workqueue_struct *xfslogd_workqueue;
-struct workqueue_struct *xfsdatad_workqueue;
-struct workqueue_struct *xfsconvertd_workqueue;
 
 #ifdef XFS_BUF_LOCK_TRACKING
 # define XB_SET_OWNER(bp)      ((bp)->b_last_holder = current->pid)
@@ -1793,21 +1791,8 @@ xfs_buf_init(void)
        if (!xfslogd_workqueue)
                goto out_free_buf_zone;
 
-       xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
-       if (!xfsdatad_workqueue)
-               goto out_destroy_xfslogd_workqueue;
-
-       xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
-                                               WQ_MEM_RECLAIM, 1);
-       if (!xfsconvertd_workqueue)
-               goto out_destroy_xfsdatad_workqueue;
-
        return 0;
 
- out_destroy_xfsdatad_workqueue:
-       destroy_workqueue(xfsdatad_workqueue);
- out_destroy_xfslogd_workqueue:
-       destroy_workqueue(xfslogd_workqueue);
  out_free_buf_zone:
        kmem_zone_destroy(xfs_buf_zone);
  out:
@@ -1817,8 +1802,6 @@ xfs_buf_init(void)
 void
 xfs_buf_terminate(void)
 {
-       destroy_workqueue(xfsconvertd_workqueue);
-       destroy_workqueue(xfsdatad_workqueue);
        destroy_workqueue(xfslogd_workqueue);
        kmem_zone_destroy(xfs_buf_zone);
 }
index dd974a55c77daee6de56a44c527e871d7cfe7fca..1137bbc5eccba64c1a53fc4ff91a3b950da7b81f 100644 (file)
@@ -215,7 +215,7 @@ xfs_swap_extents(
        xfs_trans_t     *tp;
        xfs_bstat_t     *sbp = &sxp->sx_stat;
        xfs_ifork_t     *tempifp, *ifp, *tifp;
-       int             ilf_fields, tilf_fields;
+       int             src_log_flags, target_log_flags;
        int             error = 0;
        int             aforkblks = 0;
        int             taforkblks = 0;
@@ -385,9 +385,8 @@ xfs_swap_extents(
        tip->i_delayed_blks = ip->i_delayed_blks;
        ip->i_delayed_blks = 0;
 
-       ilf_fields = XFS_ILOG_CORE;
-
-       switch(ip->i_d.di_format) {
+       src_log_flags = XFS_ILOG_CORE;
+       switch (ip->i_d.di_format) {
        case XFS_DINODE_FMT_EXTENTS:
                /* If the extents fit in the inode, fix the
                 * pointer.  Otherwise it's already NULL or
@@ -397,16 +396,15 @@ xfs_swap_extents(
                        ifp->if_u1.if_extents =
                                ifp->if_u2.if_inline_ext;
                }
-               ilf_fields |= XFS_ILOG_DEXT;
+               src_log_flags |= XFS_ILOG_DEXT;
                break;
        case XFS_DINODE_FMT_BTREE:
-               ilf_fields |= XFS_ILOG_DBROOT;
+               src_log_flags |= XFS_ILOG_DBROOT;
                break;
        }
 
-       tilf_fields = XFS_ILOG_CORE;
-
-       switch(tip->i_d.di_format) {
+       target_log_flags = XFS_ILOG_CORE;
+       switch (tip->i_d.di_format) {
        case XFS_DINODE_FMT_EXTENTS:
                /* If the extents fit in the inode, fix the
                 * pointer.  Otherwise it's already NULL or
@@ -416,10 +414,10 @@ xfs_swap_extents(
                        tifp->if_u1.if_extents =
                                tifp->if_u2.if_inline_ext;
                }
-               tilf_fields |= XFS_ILOG_DEXT;
+               target_log_flags |= XFS_ILOG_DEXT;
                break;
        case XFS_DINODE_FMT_BTREE:
-               tilf_fields |= XFS_ILOG_DBROOT;
+               target_log_flags |= XFS_ILOG_DBROOT;
                break;
        }
 
@@ -427,8 +425,8 @@ xfs_swap_extents(
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
        xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 
-       xfs_trans_log_inode(tp, ip,  ilf_fields);
-       xfs_trans_log_inode(tp, tip, tilf_fields);
+       xfs_trans_log_inode(tp, ip,  src_log_flags);
+       xfs_trans_log_inode(tp, tip, target_log_flags);
 
        /*
         * If this is a synchronous mount, make sure that the
index 9245e029b8eaddb08f58bbdbe3f78d972cfd2610..d3b63aefd01dbf46f7ec473bd2b01570096a7d0e 100644 (file)
@@ -29,6 +29,7 @@
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
+#include "xfs_dir2.h"
 #include "xfs_dir2_format.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_error.h"
index 53db20ee3e774fab3f643f8c280e018086dffa10..4be16a0cbe5aee7caf7854c720e371b1e9d851c2 100644 (file)
  * Lock order:
  *
  * ip->i_lock
- *   qh->qh_lock
- *     qi->qi_dqlist_lock
- *       dquot->q_qlock (xfs_dqlock() and friends)
- *         dquot->q_flush (xfs_dqflock() and friends)
- *         xfs_Gqm->qm_dqfrlist_lock
+ *   qi->qi_tree_lock
+ *     dquot->q_qlock (xfs_dqlock() and friends)
+ *       dquot->q_flush (xfs_dqflock() and friends)
+ *       qi->qi_lru_lock
  *
  * If two dquots need to be locked the order is user before group/project,
  * otherwise by the lowest id first, see xfs_dqlock2.
@@ -60,6 +59,9 @@ int xfs_dqreq_num;
 int xfs_dqerror_mod = 33;
 #endif
 
+struct kmem_zone               *xfs_qm_dqtrxzone;
+static struct kmem_zone                *xfs_qm_dqzone;
+
 static struct lock_class_key xfs_dquot_other_class;
 
 /*
@@ -69,12 +71,12 @@ void
 xfs_qm_dqdestroy(
        xfs_dquot_t     *dqp)
 {
-       ASSERT(list_empty(&dqp->q_freelist));
+       ASSERT(list_empty(&dqp->q_lru));
 
        mutex_destroy(&dqp->q_qlock);
-       kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
+       kmem_zone_free(xfs_qm_dqzone, dqp);
 
-       atomic_dec(&xfs_Gqm->qm_totaldquots);
+       XFS_STATS_DEC(xs_qm_dquot);
 }
 
 /*
@@ -282,7 +284,7 @@ xfs_qm_dqalloc(
         * Return if this type of quotas is turned off while we didn't
         * have an inode lock
         */
-       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+       if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
                xfs_iunlock(quotip, XFS_ILOCK_EXCL);
                return (ESRCH);
        }
@@ -384,7 +386,7 @@ xfs_qm_dqtobp(
        dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
 
        xfs_ilock(quotip, XFS_ILOCK_SHARED);
-       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+       if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
                /*
                 * Return if this type of quotas is turned off while we
                 * didn't have the quota inode lock.
@@ -492,12 +494,12 @@ xfs_qm_dqread(
        int                     cancelflags = 0;
 
 
-       dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+       dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
 
        dqp->dq_flags = type;
        dqp->q_core.d_id = cpu_to_be32(id);
        dqp->q_mount = mp;
-       INIT_LIST_HEAD(&dqp->q_freelist);
+       INIT_LIST_HEAD(&dqp->q_lru);
        mutex_init(&dqp->q_qlock);
        init_waitqueue_head(&dqp->q_pinwait);
 
@@ -516,7 +518,7 @@ xfs_qm_dqread(
        if (!(type & XFS_DQ_USER))
                lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
 
-       atomic_inc(&xfs_Gqm->qm_totaldquots);
+       XFS_STATS_INC(xs_qm_dquot);
 
        trace_xfs_dqread(dqp);
 
@@ -601,60 +603,6 @@ error0:
        return error;
 }
 
-/*
- * Lookup a dquot in the incore dquot hashtable. We keep two separate
- * hashtables for user and group dquots; and, these are global tables
- * inside the XQM, not per-filesystem tables.
- * The hash chain must be locked by caller, and it is left locked
- * on return. Returning dquot is locked.
- */
-STATIC int
-xfs_qm_dqlookup(
-       xfs_mount_t             *mp,
-       xfs_dqid_t              id,
-       xfs_dqhash_t            *qh,
-       xfs_dquot_t             **O_dqpp)
-{
-       xfs_dquot_t             *dqp;
-
-       ASSERT(mutex_is_locked(&qh->qh_lock));
-
-       /*
-        * Traverse the hashchain looking for a match
-        */
-       list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
-               /*
-                * We already have the hashlock. We don't need the
-                * dqlock to look at the id field of the dquot, since the
-                * id can't be modified without the hashlock anyway.
-                */
-               if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
-                       continue;
-
-               trace_xfs_dqlookup_found(dqp);
-
-               xfs_dqlock(dqp);
-               if (dqp->dq_flags & XFS_DQ_FREEING) {
-                       *O_dqpp = NULL;
-                       xfs_dqunlock(dqp);
-                       return -1;
-               }
-
-               dqp->q_nrefs++;
-
-               /*
-                * move the dquot to the front of the hashchain
-                */
-               list_move(&dqp->q_hashlist, &qh->qh_list);
-               trace_xfs_dqlookup_done(dqp);
-               *O_dqpp = dqp;
-               return 0;
-       }
-
-       *O_dqpp = NULL;
-       return 1;
-}
-
 /*
  * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
  * a locked dquot, doing an allocation (if requested) as needed.
@@ -672,10 +620,10 @@ xfs_qm_dqget(
        uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
        xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
 {
-       xfs_dquot_t     *dqp;
-       xfs_dqhash_t    *h;
-       uint            version;
-       int             error;
+       struct xfs_quotainfo    *qi = mp->m_quotainfo;
+       struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
+       struct xfs_dquot        *dqp;
+       int                     error;
 
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
        if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
@@ -683,7 +631,6 @@ xfs_qm_dqget(
            (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
                return (ESRCH);
        }
-       h = XFS_DQ_HASH(mp, id, type);
 
 #ifdef DEBUG
        if (xfs_do_dqerror) {
@@ -699,42 +646,33 @@ xfs_qm_dqget(
               type == XFS_DQ_GROUP);
        if (ip) {
                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-               if (type == XFS_DQ_USER)
-                       ASSERT(ip->i_udquot == NULL);
-               else
-                       ASSERT(ip->i_gdquot == NULL);
+               ASSERT(xfs_inode_dquot(ip, type) == NULL);
        }
 #endif
 
 restart:
-       mutex_lock(&h->qh_lock);
+       mutex_lock(&qi->qi_tree_lock);
+       dqp = radix_tree_lookup(tree, id);
+       if (dqp) {
+               xfs_dqlock(dqp);
+               if (dqp->dq_flags & XFS_DQ_FREEING) {
+                       xfs_dqunlock(dqp);
+                       mutex_unlock(&qi->qi_tree_lock);
+                       trace_xfs_dqget_freeing(dqp);
+                       delay(1);
+                       goto restart;
+               }
 
-       /*
-        * Look in the cache (hashtable).
-        * The chain is kept locked during lookup.
-        */
-       switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
-       case -1:
-               XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
-               mutex_unlock(&h->qh_lock);
-               delay(1);
-               goto restart;
-       case 0:
-               XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
-               /*
-                * The dquot was found, moved to the front of the chain,
-                * taken off the freelist if it was on it, and locked
-                * at this point. Just unlock the hashchain and return.
-                */
-               ASSERT(*O_dqpp);
-               ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
-               mutex_unlock(&h->qh_lock);
-               trace_xfs_dqget_hit(*O_dqpp);
-               return 0;       /* success */
-       default:
-               XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
-               break;
+               dqp->q_nrefs++;
+               mutex_unlock(&qi->qi_tree_lock);
+
+               trace_xfs_dqget_hit(dqp);
+               XFS_STATS_INC(xs_qm_dqcachehits);
+               *O_dqpp = dqp;
+               return 0;
        }
+       mutex_unlock(&qi->qi_tree_lock);
+       XFS_STATS_INC(xs_qm_dqcachemisses);
 
        /*
         * Dquot cache miss. We don't want to keep the inode lock across
@@ -745,12 +683,6 @@ restart:
         */
        if (ip)
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       /*
-        * Save the hashchain version stamp, and unlock the chain, so that
-        * we don't keep the lock across a disk read
-        */
-       version = h->qh_version;
-       mutex_unlock(&h->qh_lock);
 
        error = xfs_qm_dqread(mp, id, type, flags, &dqp);
 
@@ -760,97 +692,53 @@ restart:
        if (error)
                return error;
 
-       /*
-        * Dquot lock comes after hashlock in the lock ordering
-        */
        if (ip) {
                /*
                 * A dquot could be attached to this inode by now, since
                 * we had dropped the ilock.
                 */
-               if (type == XFS_DQ_USER) {
-                       if (!XFS_IS_UQUOTA_ON(mp)) {
-                               /* inode stays locked on return */
-                               xfs_qm_dqdestroy(dqp);
-                               return XFS_ERROR(ESRCH);
-                       }
-                       if (ip->i_udquot) {
+               if (xfs_this_quota_on(mp, type)) {
+                       struct xfs_dquot        *dqp1;
+
+                       dqp1 = xfs_inode_dquot(ip, type);
+                       if (dqp1) {
                                xfs_qm_dqdestroy(dqp);
-                               dqp = ip->i_udquot;
+                               dqp = dqp1;
                                xfs_dqlock(dqp);
                                goto dqret;
                        }
                } else {
-                       if (!XFS_IS_OQUOTA_ON(mp)) {
-                               /* inode stays locked on return */
-                               xfs_qm_dqdestroy(dqp);
-                               return XFS_ERROR(ESRCH);
-                       }
-                       if (ip->i_gdquot) {
-                               xfs_qm_dqdestroy(dqp);
-                               dqp = ip->i_gdquot;
-                               xfs_dqlock(dqp);
-                               goto dqret;
-                       }
+                       /* inode stays locked on return */
+                       xfs_qm_dqdestroy(dqp);
+                       return XFS_ERROR(ESRCH);
                }
        }
 
-       /*
-        * Hashlock comes after ilock in lock order
-        */
-       mutex_lock(&h->qh_lock);
-       if (version != h->qh_version) {
-               xfs_dquot_t *tmpdqp;
+       mutex_lock(&qi->qi_tree_lock);
+       error = -radix_tree_insert(tree, id, dqp);
+       if (unlikely(error)) {
+               WARN_ON(error != EEXIST);
+
                /*
-                * Now, see if somebody else put the dquot in the
-                * hashtable before us. This can happen because we didn't
-                * keep the hashchain lock. We don't have to worry about
-                * lock order between the two dquots here since dqp isn't
-                * on any findable lists yet.
+                * Duplicate found. Just throw away the new dquot and start
+                * over.
                 */
-               switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
-               case 0:
-               case -1:
-                       /*
-                        * Duplicate found, either in cache or on its way out.
-                        * Just throw away the new dquot and start over.
-                        */
-                       if (tmpdqp)
-                               xfs_qm_dqput(tmpdqp);
-                       mutex_unlock(&h->qh_lock);
-                       xfs_qm_dqdestroy(dqp);
-                       XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
-                       goto restart;
-               default:
-                       break;
-               }
+               mutex_unlock(&qi->qi_tree_lock);
+               trace_xfs_dqget_dup(dqp);
+               xfs_qm_dqdestroy(dqp);
+               XFS_STATS_INC(xs_qm_dquot_dups);
+               goto restart;
        }
 
-       /*
-        * Put the dquot at the beginning of the hash-chain and mp's list
-        * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
-        */
-       ASSERT(mutex_is_locked(&h->qh_lock));
-       dqp->q_hash = h;
-       list_add(&dqp->q_hashlist, &h->qh_list);
-       h->qh_version++;
-
-       /*
-        * Attach this dquot to this filesystem's list of all dquots,
-        * kept inside the mount structure in m_quotainfo field
-        */
-       mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
-
        /*
         * We return a locked dquot to the caller, with a reference taken
         */
        xfs_dqlock(dqp);
        dqp->q_nrefs = 1;
 
-       list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
-       mp->m_quotainfo->qi_dquots++;
-       mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-       mutex_unlock(&h->qh_lock);
+       qi->qi_dquots++;
+       mutex_unlock(&qi->qi_tree_lock);
+
  dqret:
        ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
        trace_xfs_dqget_miss(dqp);
@@ -859,37 +747,22 @@ restart:
 }
 
 
-/*
- * Release a reference to the dquot (decrement ref-count)
- * and unlock it. If there is a group quota attached to this
- * dquot, carefully release that too without tripping over
- * deadlocks'n'stuff.
- */
-void
-xfs_qm_dqput(
+STATIC void
+xfs_qm_dqput_final(
        struct xfs_dquot        *dqp)
 {
+       struct xfs_quotainfo    *qi = dqp->q_mount->m_quotainfo;
        struct xfs_dquot        *gdqp;
 
-       ASSERT(dqp->q_nrefs > 0);
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       trace_xfs_dqput(dqp);
-
-recurse:
-       if (--dqp->q_nrefs > 0) {
-               xfs_dqunlock(dqp);
-               return;
-       }
-
        trace_xfs_dqput_free(dqp);
 
-       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-       if (list_empty(&dqp->q_freelist)) {
-               list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
-               xfs_Gqm->qm_dqfrlist_cnt++;
+       mutex_lock(&qi->qi_lru_lock);
+       if (list_empty(&dqp->q_lru)) {
+               list_add_tail(&dqp->q_lru, &qi->qi_lru_list);
+               qi->qi_lru_count++;
+               XFS_STATS_INC(xs_qm_dquot_unused);
        }
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+       mutex_unlock(&qi->qi_lru_lock);
 
        /*
         * If we just added a udquot to the freelist, then we want to release
@@ -906,10 +779,29 @@ recurse:
        /*
         * If we had a group quota hint, release it now.
         */
-       if (gdqp) {
-               dqp = gdqp;
-               goto recurse;
-       }
+       if (gdqp)
+               xfs_qm_dqput(gdqp);
+}
+
+/*
+ * Release a reference to the dquot (decrement ref-count) and unlock it.
+ *
+ * If there is a group quota attached to this dquot, carefully release that
+ * too without tripping over deadlocks'n'stuff.
+ */
+void
+xfs_qm_dqput(
+       struct xfs_dquot        *dqp)
+{
+       ASSERT(dqp->q_nrefs > 0);
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       trace_xfs_dqput(dqp);
+
+       if (--dqp->q_nrefs > 0)
+               xfs_dqunlock(dqp);
+       else
+               xfs_qm_dqput_final(dqp);
 }
 
 /*
@@ -1091,17 +983,6 @@ xfs_qm_dqflush(
 
 }
 
-void
-xfs_dqunlock(
-       xfs_dquot_t *dqp)
-{
-       xfs_dqunlock_nonotify(dqp);
-       if (dqp->q_logitem.qli_dquot == dqp) {
-               xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
-                                       &dqp->q_logitem.qli_item);
-       }
-}
-
 /*
  * Lock two xfs_dquot structures.
  *
@@ -1130,85 +1011,6 @@ xfs_dqlock2(
        }
 }
 
-/*
- * Take a dquot out of the mount's dqlist as well as the hashlist.  This is
- * called via unmount as well as quotaoff, and the purge will always succeed.
- */
-void
-xfs_qm_dqpurge(
-       struct xfs_dquot        *dqp)
-{
-       struct xfs_mount        *mp = dqp->q_mount;
-       struct xfs_dqhash       *qh = dqp->q_hash;
-
-       xfs_dqlock(dqp);
-
-       /*
-        * If we're turning off quotas, we have to make sure that, for
-        * example, we don't delete quota disk blocks while dquots are
-        * in the process of getting written to those disk blocks.
-        * This dquot might well be on AIL, and we can't leave it there
-        * if we're turning off quotas. Basically, we need this flush
-        * lock, and are willing to block on it.
-        */
-       if (!xfs_dqflock_nowait(dqp)) {
-               /*
-                * Block on the flush lock after nudging dquot buffer,
-                * if it is incore.
-                */
-               xfs_dqflock_pushbuf_wait(dqp);
-       }
-
-       /*
-        * If we are turning this type of quotas off, we don't care
-        * about the dirty metadata sitting in this dquot. OTOH, if
-        * we're unmounting, we do care, so we flush it and wait.
-        */
-       if (XFS_DQ_IS_DIRTY(dqp)) {
-               int     error;
-
-               /*
-                * We don't care about getting disk errors here. We need
-                * to purge this dquot anyway, so we go ahead regardless.
-                */
-               error = xfs_qm_dqflush(dqp, SYNC_WAIT);
-               if (error)
-                       xfs_warn(mp, "%s: dquot %p flush failed",
-                               __func__, dqp);
-               xfs_dqflock(dqp);
-       }
-
-       ASSERT(atomic_read(&dqp->q_pincount) == 0);
-       ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
-              !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
-
-       xfs_dqfunlock(dqp);
-       xfs_dqunlock(dqp);
-
-       mutex_lock(&qh->qh_lock);
-       list_del_init(&dqp->q_hashlist);
-       qh->qh_version++;
-       mutex_unlock(&qh->qh_lock);
-
-       mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
-       list_del_init(&dqp->q_mplist);
-       mp->m_quotainfo->qi_dqreclaims++;
-       mp->m_quotainfo->qi_dquots--;
-       mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-
-       /*
-        * We move dquots to the freelist as soon as their reference count
-        * hits zero, so it really should be on the freelist here.
-        */
-       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-       ASSERT(!list_empty(&dqp->q_freelist));
-       list_del_init(&dqp->q_freelist);
-       xfs_Gqm->qm_dqfrlist_cnt--;
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-
-       xfs_qm_dqdestroy(dqp);
-}
-
 /*
  * Give the buffer a little push if it is incore and
  * wait on the flush lock.
@@ -1241,3 +1043,31 @@ xfs_dqflock_pushbuf_wait(
 out_lock:
        xfs_dqflock(dqp);
 }
+
+int __init
+xfs_qm_init(void)
+{
+       xfs_qm_dqzone =
+               kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot");
+       if (!xfs_qm_dqzone)
+               goto out;
+
+       xfs_qm_dqtrxzone =
+               kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx");
+       if (!xfs_qm_dqtrxzone)
+               goto out_free_dqzone;
+
+       return 0;
+
+out_free_dqzone:
+       kmem_zone_destroy(xfs_qm_dqzone);
+out:
+       return -ENOMEM;
+}
+
+void __exit
+xfs_qm_exit(void)
+{
+       kmem_zone_destroy(xfs_qm_dqtrxzone);
+       kmem_zone_destroy(xfs_qm_dqzone);
+}
index a1d91d8f18027b9e217f07ff8769d9d4a21fe2a1..ef9190bd8b300a061244d1d294a90acf39610b08 100644 (file)
  * when quotas are off.
  */
 
-/*
- * The hash chain headers (hash buckets)
- */
-typedef struct xfs_dqhash {
-       struct list_head  qh_list;
-       struct mutex      qh_lock;
-       uint              qh_version;   /* ever increasing version */
-       uint              qh_nelems;    /* number of dquots on the list */
-} xfs_dqhash_t;
-
 struct xfs_mount;
 struct xfs_trans;
 
@@ -47,10 +37,7 @@ struct xfs_trans;
  */
 typedef struct xfs_dquot {
        uint             dq_flags;      /* various flags (XFS_DQ_*) */
-       struct list_head q_freelist;    /* global free list of dquots */
-       struct list_head q_mplist;      /* mount's list of dquots */
-       struct list_head q_hashlist;    /* gloabl hash list of dquots */
-       xfs_dqhash_t    *q_hash;        /* the hashchain header */
+       struct list_head q_lru;         /* global free list of dquots */
        struct xfs_mount*q_mount;       /* filesystem this relates to */
        struct xfs_trans*q_transp;      /* trans this belongs to currently */
        uint             q_nrefs;       /* # active refs from inodes */
@@ -110,11 +97,37 @@ static inline void xfs_dqlock(struct xfs_dquot *dqp)
        mutex_lock(&dqp->q_qlock);
 }
 
-static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp)
+static inline void xfs_dqunlock(struct xfs_dquot *dqp)
 {
        mutex_unlock(&dqp->q_qlock);
 }
 
+static inline int xfs_this_quota_on(struct xfs_mount *mp, int type)
+{
+       switch (type & XFS_DQ_ALLTYPES) {
+       case XFS_DQ_USER:
+               return XFS_IS_UQUOTA_ON(mp);
+       case XFS_DQ_GROUP:
+       case XFS_DQ_PROJ:
+               return XFS_IS_OQUOTA_ON(mp);
+       default:
+               return 0;
+       }
+}
+
+static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
+{
+       switch (type & XFS_DQ_ALLTYPES) {
+       case XFS_DQ_USER:
+               return ip->i_udquot;
+       case XFS_DQ_GROUP:
+       case XFS_DQ_PROJ:
+               return ip->i_gdquot;
+       default:
+               return NULL;
+       }
+}
+
 #define XFS_DQ_IS_LOCKED(dqp)  (mutex_is_locked(&((dqp)->q_qlock)))
 #define XFS_DQ_IS_DIRTY(dqp)   ((dqp)->dq_flags & XFS_DQ_DIRTY)
 #define XFS_QM_ISUDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_USER)
@@ -125,15 +138,10 @@ static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp)
                                 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
                                 XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
 
-#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
-                                    (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
-                                    (XFS_IS_OQUOTA_ON((d)->q_mount))))
-
 extern int             xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
                                        uint, struct xfs_dquot  **);
 extern void            xfs_qm_dqdestroy(xfs_dquot_t *);
 extern int             xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern void            xfs_qm_dqpurge(xfs_dquot_t *);
 extern void            xfs_qm_dqunpin_wait(xfs_dquot_t *);
 extern void            xfs_qm_adjust_dqtimers(xfs_mount_t *,
                                        xfs_disk_dquot_t *);
@@ -144,7 +152,6 @@ extern int          xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
 extern void            xfs_qm_dqput(xfs_dquot_t *);
 
 extern void            xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
-extern void            xfs_dqunlock(struct xfs_dquot *);
 extern void            xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);
 
 static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
index 7e5bc872f2b4fb12d67f3da3796f3c5b86ac162c..54a67dd9ac0a5fbe5a7caf4271d14c67798fd872 100644 (file)
@@ -163,7 +163,6 @@ xfs_file_fsync(
        struct inode            *inode = file->f_mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_trans        *tp;
        int                     error = 0;
        int                     log_flushed = 0;
        xfs_lsn_t               lsn = 0;
@@ -194,75 +193,18 @@ xfs_file_fsync(
        }
 
        /*
-        * We always need to make sure that the required inode state is safe on
-        * disk.  The inode might be clean but we still might need to force the
-        * log because of committed transactions that haven't hit the disk yet.
-        * Likewise, there could be unflushed non-transactional changes to the
-        * inode core that have to go to disk and this requires us to issue
-        * a synchronous transaction to capture these changes correctly.
-        *
-        * This code relies on the assumption that if the i_update_core field
-        * of the inode is clear and the inode is unpinned then it is clean
-        * and no action is required.
+        * All metadata updates are logged, which means that we just have
+        * to flush the log up to the latest LSN that touched the inode.
         */
        xfs_ilock(ip, XFS_ILOCK_SHARED);
-
-       /*
-        * First check if the VFS inode is marked dirty.  All the dirtying
-        * of non-transactional updates do not go through mark_inode_dirty*,
-        * which allows us to distinguish between pure timestamp updates
-        * and i_size updates which need to be caught for fdatasync.
-        * After that also check for the dirty state in the XFS inode, which
-        * might gets cleared when the inode gets written out via the AIL
-        * or xfs_iflush_cluster.
-        */
-       if (((inode->i_state & I_DIRTY_DATASYNC) ||
-           ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
-           ip->i_update_core) {
-               /*
-                * Kick off a transaction to log the inode core to get the
-                * updates.  The sync transaction will also force the log.
-                */
-               xfs_iunlock(ip, XFS_ILOCK_SHARED);
-               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-               error = xfs_trans_reserve(tp, 0,
-                               XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-               if (error) {
-                       xfs_trans_cancel(tp, 0);
-                       return -error;
-               }
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-               /*
-                * Note - it's possible that we might have pushed ourselves out
-                * of the way during trans_reserve which would flush the inode.
-                * But there's no guarantee that the inode buffer has actually
-                * gone out yet (it's delwri).  Plus the buffer could be pinned
-                * anyway if it's part of an inode in another recent
-                * transaction.  So we play it safe and fire off the
-                * transaction anyway.
-                */
-               xfs_trans_ijoin(tp, ip, 0);
-               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-               error = xfs_trans_commit(tp, 0);
-
-               lsn = ip->i_itemp->ili_last_lsn;
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       } else {
-               /*
-                * Timestamps/size haven't changed since last inode flush or
-                * inode transaction commit.  That means either nothing got
-                * written or a transaction committed which caught the updates.
-                * If the latter happened and the transaction hasn't hit the
-                * disk yet, the inode will be still be pinned.  If it is,
-                * force the log.
-                */
-               if (xfs_ipincount(ip))
+       if (xfs_ipincount(ip)) {
+               if (!datasync ||
+                   (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
                        lsn = ip->i_itemp->ili_last_lsn;
-               xfs_iunlock(ip, XFS_ILOCK_SHARED);
        }
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
-       if (!error && lsn)
+       if (lsn)
                error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
 
        /*
@@ -659,9 +601,6 @@ restart:
                return error;
        }
 
-       if (likely(!(file->f_mode & FMODE_NOCMTIME)))
-               file_update_time(file);
-
        /*
         * If the offset is beyond the size of the file, we need to zero any
         * blocks that fall between the existing EOF and the start of this
@@ -684,6 +623,15 @@ restart:
        if (error)
                return error;
 
+       /*
+        * Updating the timestamps will grab the ilock again from
+        * xfs_fs_dirty_inode, so we have to call it after dropping the
+        * lock above.  Eventually we should look into a way to avoid
+        * the pointless lock roundtrip.
+        */
+       if (likely(!(file->f_mode & FMODE_NOCMTIME)))
+               file_update_time(file);
+
        /*
         * If we're writing the file then make sure to clear the setuid and
         * setgid bits if the process is not being run by root.  This keeps
index 8c3e46394d484c3fbd798913c6862d90b3f820e7..a98cb4524e6cbc8d0c5064813014c36963bc03e0 100644 (file)
@@ -91,7 +91,6 @@ xfs_inode_alloc(
        ip->i_afp = NULL;
        memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
        ip->i_flags = 0;
-       ip->i_update_core = 0;
        ip->i_delayed_blks = 0;
        memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
 
@@ -350,9 +349,20 @@ xfs_iget_cache_miss(
                        BUG();
        }
 
-       spin_lock(&pag->pag_ici_lock);
+       /*
+        * These values must be set before inserting the inode into the radix
+        * tree as the moment it is inserted a concurrent lookup (allowed by the
+        * RCU locking mechanism) can find it and that lookup must see that this
+        * is an inode currently under construction (i.e. that XFS_INEW is set).
+        * The ip->i_flags_lock that protects the XFS_INEW flag forms the
+        * memory barrier that ensures this detection works correctly at lookup
+        * time.
+        */
+       ip->i_udquot = ip->i_gdquot = NULL;
+       xfs_iflags_set(ip, XFS_INEW);
 
        /* insert the new inode */
+       spin_lock(&pag->pag_ici_lock);
        error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
        if (unlikely(error)) {
                WARN_ON(error != -EEXIST);
@@ -360,11 +370,6 @@ xfs_iget_cache_miss(
                error = EAGAIN;
                goto out_preload_end;
        }
-
-       /* These values _must_ be set before releasing the radix tree lock! */
-       ip->i_udquot = ip->i_gdquot = NULL;
-       xfs_iflags_set(ip, XFS_INEW);
-
        spin_unlock(&pag->pag_ici_lock);
        radix_tree_preload_end();
 
@@ -418,6 +423,15 @@ xfs_iget(
        xfs_perag_t     *pag;
        xfs_agino_t     agino;
 
+       /*
+        * xfs_reclaim_inode() uses the ILOCK to ensure an inode
+        * doesn't get freed while it's being referenced during a
+        * radix tree traversal here.  It assumes this function
+        * aqcuires only the ILOCK (and therefore it has no need to
+        * involve the IOLOCK in this synchronization).
+        */
+       ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0);
+
        /* reject inode numbers outside existing AGs */
        if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
                return EINVAL;
@@ -642,8 +656,7 @@ xfs_iunlock(
               (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
-       ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY |
-                       XFS_LOCK_DEP_MASK)) == 0);
+       ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
        ASSERT(lock_flags != 0);
 
        if (lock_flags & XFS_IOLOCK_EXCL)
@@ -656,16 +669,6 @@ xfs_iunlock(
        else if (lock_flags & XFS_ILOCK_SHARED)
                mrunlock_shared(&ip->i_lock);
 
-       if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) &&
-           !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) {
-               /*
-                * Let the AIL know that this item has been unlocked in case
-                * it is in the AIL and anyone is waiting on it.  Don't do
-                * this if the caller has asked us not to.
-                */
-               xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
-                                       (xfs_log_item_t*)(ip->i_itemp));
-       }
        trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
 }
 
index b21022499c2e8f302699f80ca2af344301fee941..bc46c0a133d373d3afffad99f1323a8d15b52c19 100644 (file)
@@ -1656,14 +1656,13 @@ retry:
                        iip = ip->i_itemp;
                        if (!iip || xfs_inode_clean(ip)) {
                                ASSERT(ip != free_ip);
-                               ip->i_update_core = 0;
                                xfs_ifunlock(ip);
                                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                                continue;
                        }
 
-                       iip->ili_last_fields = iip->ili_format.ilf_fields;
-                       iip->ili_format.ilf_fields = 0;
+                       iip->ili_last_fields = iip->ili_fields;
+                       iip->ili_fields = 0;
                        iip->ili_logged = 1;
                        xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
                                                &iip->ili_item.li_lsn);
@@ -2177,7 +2176,7 @@ xfs_iflush_fork(
        mp = ip->i_mount;
        switch (XFS_IFORK_FORMAT(ip, whichfork)) {
        case XFS_DINODE_FMT_LOCAL:
-               if ((iip->ili_format.ilf_fields & dataflag[whichfork]) &&
+               if ((iip->ili_fields & dataflag[whichfork]) &&
                    (ifp->if_bytes > 0)) {
                        ASSERT(ifp->if_u1.if_data != NULL);
                        ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
@@ -2187,8 +2186,8 @@ xfs_iflush_fork(
 
        case XFS_DINODE_FMT_EXTENTS:
                ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
-                      !(iip->ili_format.ilf_fields & extflag[whichfork]));
-               if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
+                      !(iip->ili_fields & extflag[whichfork]));
+               if ((iip->ili_fields & extflag[whichfork]) &&
                    (ifp->if_bytes > 0)) {
                        ASSERT(xfs_iext_get_ext(ifp, 0));
                        ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
@@ -2198,7 +2197,7 @@ xfs_iflush_fork(
                break;
 
        case XFS_DINODE_FMT_BTREE:
-               if ((iip->ili_format.ilf_fields & brootflag[whichfork]) &&
+               if ((iip->ili_fields & brootflag[whichfork]) &&
                    (ifp->if_broot_bytes > 0)) {
                        ASSERT(ifp->if_broot != NULL);
                        ASSERT(ifp->if_broot_bytes <=
@@ -2211,14 +2210,14 @@ xfs_iflush_fork(
                break;
 
        case XFS_DINODE_FMT_DEV:
-               if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
+               if (iip->ili_fields & XFS_ILOG_DEV) {
                        ASSERT(whichfork == XFS_DATA_FORK);
                        xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
                }
                break;
 
        case XFS_DINODE_FMT_UUID:
-               if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
+               if (iip->ili_fields & XFS_ILOG_UUID) {
                        ASSERT(whichfork == XFS_DATA_FORK);
                        memcpy(XFS_DFORK_DPTR(dip),
                               &ip->i_df.if_u2.if_uuid,
@@ -2451,9 +2450,8 @@ xfs_iflush(
         * to disk, because the log record didn't make it to disk!
         */
        if (XFS_FORCED_SHUTDOWN(mp)) {
-               ip->i_update_core = 0;
                if (iip)
-                       iip->ili_format.ilf_fields = 0;
+                       iip->ili_fields = 0;
                xfs_ifunlock(ip);
                return XFS_ERROR(EIO);
        }
@@ -2533,26 +2531,6 @@ xfs_iflush_int(
        /* set *dip = inode's place in the buffer */
        dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
 
-       /*
-        * Clear i_update_core before copying out the data.
-        * This is for coordination with our timestamp updates
-        * that don't hold the inode lock. They will always
-        * update the timestamps BEFORE setting i_update_core,
-        * so if we clear i_update_core after they set it we
-        * are guaranteed to see their updates to the timestamps.
-        * I believe that this depends on strongly ordered memory
-        * semantics, but we have that.  We use the SYNCHRONIZE
-        * macro to make sure that the compiler does not reorder
-        * the i_update_core access below the data copy below.
-        */
-       ip->i_update_core = 0;
-       SYNCHRONIZE();
-
-       /*
-        * Make sure to get the latest timestamps from the Linux inode.
-        */
-       xfs_synchronize_times(ip);
-
        if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
                               mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
                xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
@@ -2663,36 +2641,33 @@ xfs_iflush_int(
        xfs_inobp_check(mp, bp);
 
        /*
-        * We've recorded everything logged in the inode, so we'd
-        * like to clear the ilf_fields bits so we don't log and
-        * flush things unnecessarily.  However, we can't stop
-        * logging all this information until the data we've copied
-        * into the disk buffer is written to disk.  If we did we might
-        * overwrite the copy of the inode in the log with all the
-        * data after re-logging only part of it, and in the face of
-        * a crash we wouldn't have all the data we need to recover.
+        * We've recorded everything logged in the inode, so we'd like to clear
+        * the ili_fields bits so we don't log and flush things unnecessarily.
+        * However, we can't stop logging all this information until the data
+        * we've copied into the disk buffer is written to disk.  If we did we
+        * might overwrite the copy of the inode in the log with all the data
+        * after re-logging only part of it, and in the face of a crash we
+        * wouldn't have all the data we need to recover.
         *
-        * What we do is move the bits to the ili_last_fields field.
-        * When logging the inode, these bits are moved back to the
-        * ilf_fields field.  In the xfs_iflush_done() routine we
-        * clear ili_last_fields, since we know that the information
-        * those bits represent is permanently on disk.  As long as
-        * the flush completes before the inode is logged again, then
-        * both ilf_fields and ili_last_fields will be cleared.
+        * What we do is move the bits to the ili_last_fields field.  When
+        * logging the inode, these bits are moved back to the ili_fields field.
+        * In the xfs_iflush_done() routine we clear ili_last_fields, since we
+        * know that the information those bits represent is permanently on
+        * disk.  As long as the flush completes before the inode is logged
+        * again, then both ili_fields and ili_last_fields will be cleared.
         *
-        * We can play with the ilf_fields bits here, because the inode
-        * lock must be held exclusively in order to set bits there
-        * and the flush lock protects the ili_last_fields bits.
-        * Set ili_logged so the flush done
-        * routine can tell whether or not to look in the AIL.
-        * Also, store the current LSN of the inode so that we can tell
-        * whether the item has moved in the AIL from xfs_iflush_done().
-        * In order to read the lsn we need the AIL lock, because
-        * it is a 64 bit value that cannot be read atomically.
+        * We can play with the ili_fields bits here, because the inode lock
+        * must be held exclusively in order to set bits there and the flush
+        * lock protects the ili_last_fields bits.  Set ili_logged so the flush
+        * done routine can tell whether or not to look in the AIL.  Also, store
+        * the current LSN of the inode so that we can tell whether the item has
+        * moved in the AIL from xfs_iflush_done().  In order to read the lsn we
+        * need the AIL lock, because it is a 64 bit value that cannot be read
+        * atomically.
         */
-       if (iip != NULL && iip->ili_format.ilf_fields != 0) {
-               iip->ili_last_fields = iip->ili_format.ilf_fields;
-               iip->ili_format.ilf_fields = 0;
+       if (iip != NULL && iip->ili_fields != 0) {
+               iip->ili_last_fields = iip->ili_fields;
+               iip->ili_fields = 0;
                iip->ili_logged = 1;
 
                xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
@@ -2711,8 +2686,7 @@ xfs_iflush_int(
        } else {
                /*
                 * We're flushing an inode which is not in the AIL and has
-                * not been logged but has i_update_core set.  For this
-                * case we can use a B_DELWRI flush and immediately drop
+                * not been logged.  For this case we can immediately drop
                 * the inode flush lock because we can avoid the whole
                 * AIL state thing.  It's OK to drop the flush lock now,
                 * because we've already locked the buffer and to do anything
index 2f27b745408520b73bab9bd8a1a2ca4ed1f96ea0..f123dbe6d42a0e4203f1db43980dd29e097311f9 100644 (file)
@@ -241,7 +241,6 @@ typedef struct xfs_inode {
        spinlock_t              i_flags_lock;   /* inode i_flags lock */
        /* Miscellaneous state. */
        unsigned long           i_flags;        /* see defined flags below */
-       unsigned char           i_update_core;  /* timestamps/size is dirty */
        unsigned int            i_delayed_blks; /* count of delay alloc blks */
 
        xfs_icdinode_t          i_d;            /* most of ondisk inode */
@@ -274,6 +273,20 @@ static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
        return ip->i_d.di_size;
 }
 
+/*
+ * If this I/O goes past the on-disk inode size update it unless it would
+ * be past the current in-core inode size.
+ */
+static inline xfs_fsize_t
+xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
+{
+       xfs_fsize_t i_size = i_size_read(VFS_I(ip));
+
+       if (new_size > i_size)
+               new_size = i_size;
+       return new_size > ip->i_d.di_size ? new_size : 0;
+}
+
 /*
  * i_flags helper functions
  */
@@ -422,7 +435,6 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 #define        XFS_IOLOCK_SHARED       (1<<1)
 #define        XFS_ILOCK_EXCL          (1<<2)
 #define        XFS_ILOCK_SHARED        (1<<3)
-#define        XFS_IUNLOCK_NONOTIFY    (1<<4)
 
 #define XFS_LOCK_MASK          (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
                                | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
@@ -431,8 +443,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
        { XFS_IOLOCK_EXCL,      "IOLOCK_EXCL" }, \
        { XFS_IOLOCK_SHARED,    "IOLOCK_SHARED" }, \
        { XFS_ILOCK_EXCL,       "ILOCK_EXCL" }, \
-       { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }, \
-       { XFS_IUNLOCK_NONOTIFY, "IUNLOCK_NONOTIFY" }
+       { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }
 
 
 /*
@@ -522,10 +533,6 @@ void               xfs_promote_inode(struct xfs_inode *);
 void           xfs_lock_inodes(xfs_inode_t **, int, uint);
 void           xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
 
-void           xfs_synchronize_times(xfs_inode_t *);
-void           xfs_mark_inode_dirty(xfs_inode_t *);
-void           xfs_mark_inode_dirty_sync(xfs_inode_t *);
-
 #define IHOLD(ip) \
 do { \
        ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
index 91d71dcd4852eed6339bd1ceb54a8dbdf04cd27a..05d924efceafb68940a5dd072a719b42133d152a 100644 (file)
@@ -57,77 +57,28 @@ xfs_inode_item_size(
        struct xfs_inode        *ip = iip->ili_inode;
        uint                    nvecs = 2;
 
-       /*
-        * Only log the data/extents/b-tree root if there is something
-        * left to log.
-        */
-       iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
-
        switch (ip->i_d.di_format) {
        case XFS_DINODE_FMT_EXTENTS:
-               iip->ili_format.ilf_fields &=
-                       ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
-                         XFS_ILOG_DEV | XFS_ILOG_UUID);
-               if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) &&
-                   (ip->i_d.di_nextents > 0) &&
-                   (ip->i_df.if_bytes > 0)) {
-                       ASSERT(ip->i_df.if_u1.if_extents != NULL);
+               if ((iip->ili_fields & XFS_ILOG_DEXT) &&
+                   ip->i_d.di_nextents > 0 &&
+                   ip->i_df.if_bytes > 0)
                        nvecs++;
-               } else {
-                       iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT;
-               }
                break;
 
        case XFS_DINODE_FMT_BTREE:
-               iip->ili_format.ilf_fields &=
-                       ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
-                         XFS_ILOG_DEV | XFS_ILOG_UUID);
-               if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) &&
-                   (ip->i_df.if_broot_bytes > 0)) {
-                       ASSERT(ip->i_df.if_broot != NULL);
+               if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
+                   ip->i_df.if_broot_bytes > 0)
                        nvecs++;
-               } else {
-                       ASSERT(!(iip->ili_format.ilf_fields &
-                                XFS_ILOG_DBROOT));
-#ifdef XFS_TRANS_DEBUG
-                       if (iip->ili_root_size > 0) {
-                               ASSERT(iip->ili_root_size ==
-                                      ip->i_df.if_broot_bytes);
-                               ASSERT(memcmp(iip->ili_orig_root,
-                                           ip->i_df.if_broot,
-                                           iip->ili_root_size) == 0);
-                       } else {
-                               ASSERT(ip->i_df.if_broot_bytes == 0);
-                       }
-#endif
-                       iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT;
-               }
                break;
 
        case XFS_DINODE_FMT_LOCAL:
-               iip->ili_format.ilf_fields &=
-                       ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
-                         XFS_ILOG_DEV | XFS_ILOG_UUID);
-               if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) &&
-                   (ip->i_df.if_bytes > 0)) {
-                       ASSERT(ip->i_df.if_u1.if_data != NULL);
-                       ASSERT(ip->i_d.di_size > 0);
+               if ((iip->ili_fields & XFS_ILOG_DDATA) &&
+                   ip->i_df.if_bytes > 0)
                        nvecs++;
-               } else {
-                       iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA;
-               }
                break;
 
        case XFS_DINODE_FMT_DEV:
-               iip->ili_format.ilf_fields &=
-                       ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
-                         XFS_ILOG_DEXT | XFS_ILOG_UUID);
-               break;
-
        case XFS_DINODE_FMT_UUID:
-               iip->ili_format.ilf_fields &=
-                       ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
-                         XFS_ILOG_DEXT | XFS_ILOG_DEV);
                break;
 
        default:
@@ -135,56 +86,31 @@ xfs_inode_item_size(
                break;
        }
 
-       /*
-        * If there are no attributes associated with this file,
-        * then there cannot be anything more to log.
-        * Clear all attribute-related log flags.
-        */
-       if (!XFS_IFORK_Q(ip)) {
-               iip->ili_format.ilf_fields &=
-                       ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
+       if (!XFS_IFORK_Q(ip))
                return nvecs;
-       }
+
 
        /*
         * Log any necessary attribute data.
         */
        switch (ip->i_d.di_aformat) {
        case XFS_DINODE_FMT_EXTENTS:
-               iip->ili_format.ilf_fields &=
-                       ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
-               if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) &&
-                   (ip->i_d.di_anextents > 0) &&
-                   (ip->i_afp->if_bytes > 0)) {
-                       ASSERT(ip->i_afp->if_u1.if_extents != NULL);
+               if ((iip->ili_fields & XFS_ILOG_AEXT) &&
+                   ip->i_d.di_anextents > 0 &&
+                   ip->i_afp->if_bytes > 0)
                        nvecs++;
-               } else {
-                       iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT;
-               }
                break;
 
        case XFS_DINODE_FMT_BTREE:
-               iip->ili_format.ilf_fields &=
-                       ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
-               if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) &&
-                   (ip->i_afp->if_broot_bytes > 0)) {
-                       ASSERT(ip->i_afp->if_broot != NULL);
+               if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
+                   ip->i_afp->if_broot_bytes > 0)
                        nvecs++;
-               } else {
-                       iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT;
-               }
                break;
 
        case XFS_DINODE_FMT_LOCAL:
-               iip->ili_format.ilf_fields &=
-                       ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
-               if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) &&
-                   (ip->i_afp->if_bytes > 0)) {
-                       ASSERT(ip->i_afp->if_u1.if_data != NULL);
+               if ((iip->ili_fields & XFS_ILOG_ADATA) &&
+                   ip->i_afp->if_bytes > 0)
                        nvecs++;
-               } else {
-                       iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA;
-               }
                break;
 
        default:
@@ -254,48 +180,11 @@ xfs_inode_item_format(
        vecp++;
        nvecs        = 1;
 
-       /*
-        * Clear i_update_core if the timestamps (or any other
-        * non-transactional modification) need flushing/logging
-        * and we're about to log them with the rest of the core.
-        *
-        * This is the same logic as xfs_iflush() but this code can't
-        * run at the same time as xfs_iflush because we're in commit
-        * processing here and so we have the inode lock held in
-        * exclusive mode.  Although it doesn't really matter
-        * for the timestamps if both routines were to grab the
-        * timestamps or not.  That would be ok.
-        *
-        * We clear i_update_core before copying out the data.
-        * This is for coordination with our timestamp updates
-        * that don't hold the inode lock. They will always
-        * update the timestamps BEFORE setting i_update_core,
-        * so if we clear i_update_core after they set it we
-        * are guaranteed to see their updates to the timestamps
-        * either here.  Likewise, if they set it after we clear it
-        * here, we'll see it either on the next commit of this
-        * inode or the next time the inode gets flushed via
-        * xfs_iflush().  This depends on strongly ordered memory
-        * semantics, but we have that.  We use the SYNCHRONIZE
-        * macro to make sure that the compiler does not reorder
-        * the i_update_core access below the data copy below.
-        */
-       if (ip->i_update_core)  {
-               ip->i_update_core = 0;
-               SYNCHRONIZE();
-       }
-
-       /*
-        * Make sure to get the latest timestamps from the Linux inode.
-        */
-       xfs_synchronize_times(ip);
-
        vecp->i_addr = &ip->i_d;
        vecp->i_len  = sizeof(struct xfs_icdinode);
        vecp->i_type = XLOG_REG_TYPE_ICORE;
        vecp++;
        nvecs++;
-       iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
 
        /*
         * If this is really an old format inode, then we need to
@@ -328,16 +217,17 @@ xfs_inode_item_format(
 
        switch (ip->i_d.di_format) {
        case XFS_DINODE_FMT_EXTENTS:
-               ASSERT(!(iip->ili_format.ilf_fields &
-                        (XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
-                         XFS_ILOG_DEV | XFS_ILOG_UUID)));
-               if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) {
-                       ASSERT(ip->i_df.if_bytes > 0);
+               iip->ili_fields &=
+                       ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
+                         XFS_ILOG_DEV | XFS_ILOG_UUID);
+
+               if ((iip->ili_fields & XFS_ILOG_DEXT) &&
+                   ip->i_d.di_nextents > 0 &&
+                   ip->i_df.if_bytes > 0) {
                        ASSERT(ip->i_df.if_u1.if_extents != NULL);
-                       ASSERT(ip->i_d.di_nextents > 0);
+                       ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
                        ASSERT(iip->ili_extents_buf == NULL);
-                       ASSERT((ip->i_df.if_bytes /
-                               (uint)sizeof(xfs_bmbt_rec_t)) > 0);
+
 #ifdef XFS_NATIVE_HOST
                        if (ip->i_d.di_nextents == ip->i_df.if_bytes /
                                                (uint)sizeof(xfs_bmbt_rec_t)) {
@@ -359,15 +249,18 @@ xfs_inode_item_format(
                        iip->ili_format.ilf_dsize = vecp->i_len;
                        vecp++;
                        nvecs++;
+               } else {
+                       iip->ili_fields &= ~XFS_ILOG_DEXT;
                }
                break;
 
        case XFS_DINODE_FMT_BTREE:
-               ASSERT(!(iip->ili_format.ilf_fields &
-                        (XFS_ILOG_DDATA | XFS_ILOG_DEXT |
-                         XFS_ILOG_DEV | XFS_ILOG_UUID)));
-               if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) {
-                       ASSERT(ip->i_df.if_broot_bytes > 0);
+               iip->ili_fields &=
+                       ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
+                         XFS_ILOG_DEV | XFS_ILOG_UUID);
+
+               if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
+                   ip->i_df.if_broot_bytes > 0) {
                        ASSERT(ip->i_df.if_broot != NULL);
                        vecp->i_addr = ip->i_df.if_broot;
                        vecp->i_len = ip->i_df.if_broot_bytes;
@@ -375,15 +268,30 @@ xfs_inode_item_format(
                        vecp++;
                        nvecs++;
                        iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
+               } else {
+                       ASSERT(!(iip->ili_fields &
+                                XFS_ILOG_DBROOT));
+#ifdef XFS_TRANS_DEBUG
+                       if (iip->ili_root_size > 0) {
+                               ASSERT(iip->ili_root_size ==
+                                      ip->i_df.if_broot_bytes);
+                               ASSERT(memcmp(iip->ili_orig_root,
+                                           ip->i_df.if_broot,
+                                           iip->ili_root_size) == 0);
+                       } else {
+                               ASSERT(ip->i_df.if_broot_bytes == 0);
+                       }
+#endif
+                       iip->ili_fields &= ~XFS_ILOG_DBROOT;
                }
                break;
 
        case XFS_DINODE_FMT_LOCAL:
-               ASSERT(!(iip->ili_format.ilf_fields &
-                        (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
-                         XFS_ILOG_DEV | XFS_ILOG_UUID)));
-               if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) {
-                       ASSERT(ip->i_df.if_bytes > 0);
+               iip->ili_fields &=
+                       ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
+                         XFS_ILOG_DEV | XFS_ILOG_UUID);
+               if ((iip->ili_fields & XFS_ILOG_DDATA) &&
+                   ip->i_df.if_bytes > 0) {
                        ASSERT(ip->i_df.if_u1.if_data != NULL);
                        ASSERT(ip->i_d.di_size > 0);
 
@@ -401,24 +309,26 @@ xfs_inode_item_format(
                        vecp++;
                        nvecs++;
                        iip->ili_format.ilf_dsize = (unsigned)data_bytes;
+               } else {
+                       iip->ili_fields &= ~XFS_ILOG_DDATA;
                }
                break;
 
        case XFS_DINODE_FMT_DEV:
-               ASSERT(!(iip->ili_format.ilf_fields &
-                        (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
-                         XFS_ILOG_DDATA | XFS_ILOG_UUID)));
-               if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
+               iip->ili_fields &=
+                       ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
+                         XFS_ILOG_DEXT | XFS_ILOG_UUID);
+               if (iip->ili_fields & XFS_ILOG_DEV) {
                        iip->ili_format.ilf_u.ilfu_rdev =
                                ip->i_df.if_u2.if_rdev;
                }
                break;
 
        case XFS_DINODE_FMT_UUID:
-               ASSERT(!(iip->ili_format.ilf_fields &
-                        (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
-                         XFS_ILOG_DDATA | XFS_ILOG_DEV)));
-               if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
+               iip->ili_fields &=
+                       ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
+                         XFS_ILOG_DEXT | XFS_ILOG_DEV);
+               if (iip->ili_fields & XFS_ILOG_UUID) {
                        iip->ili_format.ilf_u.ilfu_uuid =
                                ip->i_df.if_u2.if_uuid;
                }
@@ -430,31 +340,25 @@ xfs_inode_item_format(
        }
 
        /*
-        * If there are no attributes associated with the file,
-        * then we're done.
-        * Assert that no attribute-related log flags are set.
+        * If there are no attributes associated with the file, then we're done.
         */
        if (!XFS_IFORK_Q(ip)) {
-               iip->ili_format.ilf_size = nvecs;
-               ASSERT(!(iip->ili_format.ilf_fields &
-                        (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
-               return;
+               iip->ili_fields &=
+                       ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
+               goto out;
        }
 
        switch (ip->i_d.di_aformat) {
        case XFS_DINODE_FMT_EXTENTS:
-               ASSERT(!(iip->ili_format.ilf_fields &
-                        (XFS_ILOG_ADATA | XFS_ILOG_ABROOT)));
-               if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) {
-#ifdef DEBUG
-                       int nrecs = ip->i_afp->if_bytes /
-                               (uint)sizeof(xfs_bmbt_rec_t);
-                       ASSERT(nrecs > 0);
-                       ASSERT(nrecs == ip->i_d.di_anextents);
-                       ASSERT(ip->i_afp->if_bytes > 0);
+               iip->ili_fields &=
+                       ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
+
+               if ((iip->ili_fields & XFS_ILOG_AEXT) &&
+                   ip->i_d.di_anextents > 0 &&
+                   ip->i_afp->if_bytes > 0) {
+                       ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
+                               ip->i_d.di_anextents);
                        ASSERT(ip->i_afp->if_u1.if_extents != NULL);
-                       ASSERT(ip->i_d.di_anextents > 0);
-#endif
 #ifdef XFS_NATIVE_HOST
                        /*
                         * There are not delayed allocation extents
@@ -471,29 +375,36 @@ xfs_inode_item_format(
                        iip->ili_format.ilf_asize = vecp->i_len;
                        vecp++;
                        nvecs++;
+               } else {
+                       iip->ili_fields &= ~XFS_ILOG_AEXT;
                }
                break;
 
        case XFS_DINODE_FMT_BTREE:
-               ASSERT(!(iip->ili_format.ilf_fields &
-                        (XFS_ILOG_ADATA | XFS_ILOG_AEXT)));
-               if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) {
-                       ASSERT(ip->i_afp->if_broot_bytes > 0);
+               iip->ili_fields &=
+                       ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
+
+               if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
+                   ip->i_afp->if_broot_bytes > 0) {
                        ASSERT(ip->i_afp->if_broot != NULL);
+
                        vecp->i_addr = ip->i_afp->if_broot;
                        vecp->i_len = ip->i_afp->if_broot_bytes;
                        vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
                        vecp++;
                        nvecs++;
                        iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
+               } else {
+                       iip->ili_fields &= ~XFS_ILOG_ABROOT;
                }
                break;
 
        case XFS_DINODE_FMT_LOCAL:
-               ASSERT(!(iip->ili_format.ilf_fields &
-                        (XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
-               if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) {
-                       ASSERT(ip->i_afp->if_bytes > 0);
+               iip->ili_fields &=
+                       ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
+
+               if ((iip->ili_fields & XFS_ILOG_ADATA) &&
+                   ip->i_afp->if_bytes > 0) {
                        ASSERT(ip->i_afp->if_u1.if_data != NULL);
 
                        vecp->i_addr = ip->i_afp->if_u1.if_data;
@@ -510,6 +421,8 @@ xfs_inode_item_format(
                        vecp++;
                        nvecs++;
                        iip->ili_format.ilf_asize = (unsigned)data_bytes;
+               } else {
+                       iip->ili_fields &= ~XFS_ILOG_ADATA;
                }
                break;
 
@@ -518,6 +431,15 @@ xfs_inode_item_format(
                break;
        }
 
+out:
+       /*
+        * Now update the log format that goes out to disk from the in-core
+        * values.  We always write the inode core to make the arithmetic
+        * games in recovery easier, which isn't a big deal as just about any
+        * transaction would dirty it anyway.
+        */
+       iip->ili_format.ilf_fields = XFS_ILOG_CORE |
+               (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
        iip->ili_format.ilf_size = nvecs;
 }
 
@@ -596,17 +518,13 @@ xfs_inode_item_trylock(
        /* Stale items should force out the iclog */
        if (ip->i_flags & XFS_ISTALE) {
                xfs_ifunlock(ip);
-               /*
-                * we hold the AIL lock - notify the unlock routine of this
-                * so it doesn't try to get the lock again.
-                */
-               xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
+               xfs_iunlock(ip, XFS_ILOCK_SHARED);
                return XFS_ITEM_PINNED;
        }
 
 #ifdef DEBUG
        if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-               ASSERT(iip->ili_format.ilf_fields != 0);
+               ASSERT(iip->ili_fields != 0);
                ASSERT(iip->ili_logged == 0);
                ASSERT(lip->li_flags & XFS_LI_IN_AIL);
        }
@@ -638,7 +556,7 @@ xfs_inode_item_unlock(
        if (iip->ili_extents_buf != NULL) {
                ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
                ASSERT(ip->i_d.di_nextents > 0);
-               ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT);
+               ASSERT(iip->ili_fields & XFS_ILOG_DEXT);
                ASSERT(ip->i_df.if_bytes > 0);
                kmem_free(iip->ili_extents_buf);
                iip->ili_extents_buf = NULL;
@@ -646,7 +564,7 @@ xfs_inode_item_unlock(
        if (iip->ili_aextents_buf != NULL) {
                ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
                ASSERT(ip->i_d.di_anextents > 0);
-               ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
+               ASSERT(iip->ili_fields & XFS_ILOG_AEXT);
                ASSERT(ip->i_afp->if_bytes > 0);
                kmem_free(iip->ili_aextents_buf);
                iip->ili_aextents_buf = NULL;
@@ -761,8 +679,7 @@ xfs_inode_item_push(
         * lock without sleeping, then there must not have been
         * anyone in the process of flushing the inode.
         */
-       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) ||
-              iip->ili_format.ilf_fields != 0);
+       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0);
 
        /*
         * Push the inode to it's backing buffer. This will not remove the
@@ -985,7 +902,7 @@ xfs_iflush_abort(
                 * Clear the inode logging fields so no more flushes are
                 * attempted.
                 */
-               iip->ili_format.ilf_fields = 0;
+               iip->ili_fields = 0;
        }
        /*
         * Release the inode's flush lock since we're done with it.
index d3dee61e6d91fde1671157b3ee2deeee8df49fcd..41d61c3b7a36e249028d34c505e941636f5c9e68 100644 (file)
@@ -86,6 +86,15 @@ typedef struct xfs_inode_log_format_64 {
 #define        XFS_ILOG_AEXT   0x080   /* log i_af.if_extents */
 #define        XFS_ILOG_ABROOT 0x100   /* log i_af.i_broot */
 
+
+/*
+ * The timestamps are dirty, but not necessarily anything else in the inode
+ * core.  Unlike the other fields above this one must never make it to disk
+ * in the ilf_fields of the inode_log_format, but is purely store in-memory in
+ * ili_fields in the inode_log_item.
+ */
+#define XFS_ILOG_TIMESTAMP     0x4000
+
 #define        XFS_ILOG_NONCORE        (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
                                 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
                                 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
@@ -101,7 +110,7 @@ typedef struct xfs_inode_log_format_64 {
                                 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
                                 XFS_ILOG_DEV | XFS_ILOG_UUID | \
                                 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
-                                XFS_ILOG_ABROOT)
+                                XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
 
 static inline int xfs_ilog_fbroot(int w)
 {
@@ -134,6 +143,7 @@ typedef struct xfs_inode_log_item {
        unsigned short          ili_lock_flags;    /* lock flags */
        unsigned short          ili_logged;        /* flushed logged data */
        unsigned int            ili_last_fields;   /* fields when flushed */
+       unsigned int            ili_fields;        /* fields to be logged */
        struct xfs_bmbt_rec     *ili_extents_buf;  /* array of logged
                                                      data exts */
        struct xfs_bmbt_rec     *ili_aextents_buf; /* array of logged
@@ -148,9 +158,7 @@ typedef struct xfs_inode_log_item {
 
 static inline int xfs_inode_clean(xfs_inode_t *ip)
 {
-       return (!ip->i_itemp ||
-               !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
-              !ip->i_update_core;
+       return !ip->i_itemp || !(ip->i_itemp->ili_fields & XFS_ILOG_ALL);
 }
 
 extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
index 76f3ca5cfc361f962fa40a72243a525f191a618f..f588320dc4b9070a6d2c65244a9a7ed9ac1cc462 100644 (file)
@@ -450,9 +450,12 @@ xfs_attrmulti_attr_get(
 
        if (*len > XATTR_SIZE_MAX)
                return EINVAL;
-       kbuf = kmalloc(*len, GFP_KERNEL);
-       if (!kbuf)
-               return ENOMEM;
+       kbuf = kmem_zalloc(*len, KM_SLEEP | KM_MAYFAIL);
+       if (!kbuf) {
+               kbuf = kmem_zalloc_large(*len);
+               if (!kbuf)
+                       return ENOMEM;
+       }
 
        error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
        if (error)
@@ -462,7 +465,10 @@ xfs_attrmulti_attr_get(
                error = EFAULT;
 
  out_kfree:
-       kfree(kbuf);
+       if (is_vmalloc_addr(kbuf))
+               kmem_free_large(kbuf);
+       else
+               kmem_free(kbuf);
        return error;
 }
 
index f9ccb7b7c043bc0d9524fae8f3afa59975bf32ce..a849a5473aff41ec61dcb0770cc3251fab318206 100644 (file)
@@ -293,7 +293,7 @@ xfs_compat_ioc_bulkstat(
                int res;
 
                error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
-                               sizeof(compat_xfs_bstat_t), 0, &res);
+                               sizeof(compat_xfs_bstat_t), NULL, &res);
        } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
                error = xfs_bulkstat(mp, &inlast, &count,
                        xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
index 246c7d57c6f96c876778128e8d21c90fca692ce9..71a464503c43837c181b7fb163aebdd612c0d18d 100644 (file)
@@ -31,6 +31,7 @@
 #include "xfs_ialloc_btree.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
+#include "xfs_inode_item.h"
 #include "xfs_btree.h"
 #include "xfs_bmap.h"
 #include "xfs_rtalloc.h"
@@ -645,6 +646,7 @@ xfs_iomap_write_unwritten(
        xfs_trans_t     *tp;
        xfs_bmbt_irec_t imap;
        xfs_bmap_free_t free_list;
+       xfs_fsize_t     i_size;
        uint            resblks;
        int             committed;
        int             error;
@@ -705,7 +707,22 @@ xfs_iomap_write_unwritten(
                if (error)
                        goto error_on_bmapi_transaction;
 
-               error = xfs_bmap_finish(&(tp), &(free_list), &committed);
+               /*
+                * Log the updated inode size as we go.  We have to be careful
+                * to only log it up to the actual write offset if it is
+                * halfway into a block.
+                */
+               i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
+               if (i_size > offset + count)
+                       i_size = offset + count;
+
+               i_size = xfs_new_eof(ip, i_size);
+               if (i_size) {
+                       ip->i_d.di_size = i_size;
+                       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+               }
+
+               error = xfs_bmap_finish(&tp, &free_list, &committed);
                if (error)
                        goto error_on_bmapi_transaction;
 
index ab302539e5b9603b8a67bb9f4399c03625fe1fd7..3011b879f850c2a91b94aafa011bf4006d9ab41e 100644 (file)
 #include <linux/fiemap.h>
 #include <linux/slab.h>
 
-/*
- * Bring the timestamps in the XFS inode uptodate.
- *
- * Used before writing the inode to disk.
- */
-void
-xfs_synchronize_times(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
-       ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
-       ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
-       ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
-       ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
-       ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
-}
-
-/*
- * If the linux inode is valid, mark it dirty, else mark the dirty state
- * in the XFS inode to make sure we pick it up when reclaiming the inode.
- */
-void
-xfs_mark_inode_dirty_sync(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
-               mark_inode_dirty_sync(inode);
-       else {
-               barrier();
-               ip->i_update_core = 1;
-       }
-}
-
-void
-xfs_mark_inode_dirty(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
-               mark_inode_dirty(inode);
-       else {
-               barrier();
-               ip->i_update_core = 1;
-       }
-
-}
-
-
-int xfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
-                  void *fs_info)
+static int
+xfs_initxattrs(
+       struct inode            *inode,
+       const struct xattr      *xattr_array,
+       void                    *fs_info)
 {
-       const struct xattr *xattr;
-       struct xfs_inode *ip = XFS_I(inode);
-       int error = 0;
+       const struct xattr      *xattr;
+       struct xfs_inode        *ip = XFS_I(inode);
+       int                     error = 0;
 
        for (xattr = xattr_array; xattr->name != NULL; xattr++) {
                error = xfs_attr_set(ip, xattr->name, xattr->value,
@@ -678,19 +628,16 @@ xfs_setattr_nonsize(
                inode->i_atime = iattr->ia_atime;
                ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
                ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
-               ip->i_update_core = 1;
        }
        if (mask & ATTR_CTIME) {
                inode->i_ctime = iattr->ia_ctime;
                ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
                ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-               ip->i_update_core = 1;
        }
        if (mask & ATTR_MTIME) {
                inode->i_mtime = iattr->ia_mtime;
                ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
                ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-               ip->i_update_core = 1;
        }
 
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -918,13 +865,11 @@ xfs_setattr_size(
                inode->i_ctime = iattr->ia_ctime;
                ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
                ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-               ip->i_update_core = 1;
        }
        if (mask & ATTR_MTIME) {
                inode->i_mtime = iattr->ia_mtime;
                ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
                ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-               ip->i_update_core = 1;
        }
 
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
index 751e94fe1f77e2694790da48952cf03f30a554e4..9720c54bbed0dd4a37e9ebc6bbb33bf9ee8fdbf7 100644 (file)
@@ -62,7 +62,6 @@ xfs_bulkstat_one_int(
 {
        struct xfs_icdinode     *dic;           /* dinode core info pointer */
        struct xfs_inode        *ip;            /* incore inode pointer */
-       struct inode            *inode;
        struct xfs_bstat        *buf;           /* return buffer */
        int                     error = 0;      /* error value */
 
@@ -86,7 +85,6 @@ xfs_bulkstat_one_int(
        ASSERT(ip->i_imap.im_blkno != 0);
 
        dic = &ip->i_d;
-       inode = VFS_I(ip);
 
        /* xfs_iget returns the following without needing
         * further change.
@@ -99,19 +97,12 @@ xfs_bulkstat_one_int(
        buf->bs_uid = dic->di_uid;
        buf->bs_gid = dic->di_gid;
        buf->bs_size = dic->di_size;
-
-       /*
-        * We need to read the timestamps from the Linux inode because
-        * the VFS keeps writing directly into the inode structure instead
-        * of telling us about the updates.
-        */
-       buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
-       buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
-       buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
-       buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
-       buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
-       buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
-
+       buf->bs_atime.tv_sec = dic->di_atime.t_sec;
+       buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
+       buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
+       buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
+       buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
+       buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
        buf->bs_xflags = xfs_ip2xflags(ip);
        buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
        buf->bs_extents = dic->di_nextents;
index e2cc3568c2998ccc45be81b5381c1bf9f91fcf9f..98a9cb5ffd1700995e8118e71524ba6d12e12fa2 100644 (file)
@@ -67,15 +67,10 @@ STATIC void xlog_state_switch_iclogs(xlog_t         *log,
                                     int                eventual_size);
 STATIC void xlog_state_want_sync(xlog_t        *log, xlog_in_core_t *iclog);
 
-/* local functions to manipulate grant head */
-STATIC int  xlog_grant_log_space(xlog_t                *log,
-                                xlog_ticket_t  *xtic);
 STATIC void xlog_grant_push_ail(struct log     *log,
                                int             need_bytes);
 STATIC void xlog_regrant_reserve_log_space(xlog_t       *log,
                                           xlog_ticket_t *ticket);
-STATIC int xlog_regrant_write_log_space(xlog_t         *log,
-                                        xlog_ticket_t  *ticket);
 STATIC void xlog_ungrant_log_space(xlog_t       *log,
                                   xlog_ticket_t *ticket);
 
@@ -150,78 +145,93 @@ xlog_grant_add_space(
        } while (head_val != old);
 }
 
-STATIC bool
-xlog_reserveq_wake(
-       struct log              *log,
-       int                     *free_bytes)
+STATIC void
+xlog_grant_head_init(
+       struct xlog_grant_head  *head)
+{
+       xlog_assign_grant_head(&head->grant, 1, 0);
+       INIT_LIST_HEAD(&head->waiters);
+       spin_lock_init(&head->lock);
+}
+
+STATIC void
+xlog_grant_head_wake_all(
+       struct xlog_grant_head  *head)
 {
        struct xlog_ticket      *tic;
-       int                     need_bytes;
 
-       list_for_each_entry(tic, &log->l_reserveq, t_queue) {
+       spin_lock(&head->lock);
+       list_for_each_entry(tic, &head->waiters, t_queue)
+               wake_up_process(tic->t_task);
+       spin_unlock(&head->lock);
+}
+
+static inline int
+xlog_ticket_reservation(
+       struct log              *log,
+       struct xlog_grant_head  *head,
+       struct xlog_ticket      *tic)
+{
+       if (head == &log->l_write_head) {
+               ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
+               return tic->t_unit_res;
+       } else {
                if (tic->t_flags & XLOG_TIC_PERM_RESERV)
-                       need_bytes = tic->t_unit_res * tic->t_cnt;
+                       return tic->t_unit_res * tic->t_cnt;
                else
-                       need_bytes = tic->t_unit_res;
-
-               if (*free_bytes < need_bytes)
-                       return false;
-               *free_bytes -= need_bytes;
-
-               trace_xfs_log_grant_wake_up(log, tic);
-               wake_up(&tic->t_wait);
+                       return tic->t_unit_res;
        }
-
-       return true;
 }
 
 STATIC bool
-xlog_writeq_wake(
+xlog_grant_head_wake(
        struct log              *log,
+       struct xlog_grant_head  *head,
        int                     *free_bytes)
 {
        struct xlog_ticket      *tic;
        int                     need_bytes;
 
-       list_for_each_entry(tic, &log->l_writeq, t_queue) {
-               ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
-
-               need_bytes = tic->t_unit_res;
-
+       list_for_each_entry(tic, &head->waiters, t_queue) {
+               need_bytes = xlog_ticket_reservation(log, head, tic);
                if (*free_bytes < need_bytes)
                        return false;
-               *free_bytes -= need_bytes;
 
-               trace_xfs_log_regrant_write_wake_up(log, tic);
-               wake_up(&tic->t_wait);
+               *free_bytes -= need_bytes;
+               trace_xfs_log_grant_wake_up(log, tic);
+               wake_up_process(tic->t_task);
        }
 
        return true;
 }
 
 STATIC int
-xlog_reserveq_wait(
+xlog_grant_head_wait(
        struct log              *log,
+       struct xlog_grant_head  *head,
        struct xlog_ticket      *tic,
        int                     need_bytes)
 {
-       list_add_tail(&tic->t_queue, &log->l_reserveq);
+       list_add_tail(&tic->t_queue, &head->waiters);
 
        do {
                if (XLOG_FORCED_SHUTDOWN(log))
                        goto shutdown;
                xlog_grant_push_ail(log, need_bytes);
 
+               __set_current_state(TASK_UNINTERRUPTIBLE);
+               spin_unlock(&head->lock);
+
                XFS_STATS_INC(xs_sleep_logspace);
-               trace_xfs_log_grant_sleep(log, tic);
 
-               xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
+               trace_xfs_log_grant_sleep(log, tic);
+               schedule();
                trace_xfs_log_grant_wake(log, tic);
 
-               spin_lock(&log->l_grant_reserve_lock);
+               spin_lock(&head->lock);
                if (XLOG_FORCED_SHUTDOWN(log))
                        goto shutdown;
-       } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes);
+       } while (xlog_space_left(log, &head->grant) < need_bytes);
 
        list_del_init(&tic->t_queue);
        return 0;
@@ -230,35 +240,58 @@ shutdown:
        return XFS_ERROR(EIO);
 }
 
+/*
+ * Atomically get the log space required for a log ticket.
+ *
+ * Once a ticket gets put onto head->waiters, it will only return after the
+ * needed reservation is satisfied.
+ *
+ * This function is structured so that it has a lock free fast path. This is
+ * necessary because every new transaction reservation will come through this
+ * path. Hence any lock will be globally hot if we take it unconditionally on
+ * every pass.
+ *
+ * As tickets are only ever moved on and off head->waiters under head->lock, we
+ * only need to take that lock if we are going to add the ticket to the queue
+ * and sleep. We can avoid taking the lock if the ticket was never added to
+ * head->waiters because the t_queue list head will be empty and we hold the
+ * only reference to it so it can safely be checked unlocked.
+ */
 STATIC int
-xlog_writeq_wait(
+xlog_grant_head_check(
        struct log              *log,
+       struct xlog_grant_head  *head,
        struct xlog_ticket      *tic,
-       int                     need_bytes)
+       int                     *need_bytes)
 {
-       list_add_tail(&tic->t_queue, &log->l_writeq);
-
-       do {
-               if (XLOG_FORCED_SHUTDOWN(log))
-                       goto shutdown;
-               xlog_grant_push_ail(log, need_bytes);
-
-               XFS_STATS_INC(xs_sleep_logspace);
-               trace_xfs_log_regrant_write_sleep(log, tic);
+       int                     free_bytes;
+       int                     error = 0;
 
-               xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
-               trace_xfs_log_regrant_write_wake(log, tic);
+       ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
 
-               spin_lock(&log->l_grant_write_lock);
-               if (XLOG_FORCED_SHUTDOWN(log))
-                       goto shutdown;
-       } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes);
+       /*
+        * If there are other waiters on the queue then give them a chance at
+        * logspace before us.  Wake up the first waiters, if we do not wake
+        * up all the waiters then go to sleep waiting for more free space,
+        * otherwise try to get some space for this transaction.
+        */
+       *need_bytes = xlog_ticket_reservation(log, head, tic);
+       free_bytes = xlog_space_left(log, &head->grant);
+       if (!list_empty_careful(&head->waiters)) {
+               spin_lock(&head->lock);
+               if (!xlog_grant_head_wake(log, head, &free_bytes) ||
+                   free_bytes < *need_bytes) {
+                       error = xlog_grant_head_wait(log, head, tic,
+                                                    *need_bytes);
+               }
+               spin_unlock(&head->lock);
+       } else if (free_bytes < *need_bytes) {
+               spin_lock(&head->lock);
+               error = xlog_grant_head_wait(log, head, tic, *need_bytes);
+               spin_unlock(&head->lock);
+       }
 
-       list_del_init(&tic->t_queue);
-       return 0;
-shutdown:
-       list_del_init(&tic->t_queue);
-       return XFS_ERROR(EIO);
+       return error;
 }
 
 static void
@@ -285,6 +318,128 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
        tic->t_res_num++;
 }
 
+/*
+ * Replenish the byte reservation required by moving the grant write head.
+ */
+int
+xfs_log_regrant(
+       struct xfs_mount        *mp,
+       struct xlog_ticket      *tic)
+{
+       struct log              *log = mp->m_log;
+       int                     need_bytes;
+       int                     error = 0;
+
+       if (XLOG_FORCED_SHUTDOWN(log))
+               return XFS_ERROR(EIO);
+
+       XFS_STATS_INC(xs_try_logspace);
+
+       /*
+        * This is a new transaction on the ticket, so we need to change the
+        * transaction ID so that the next transaction has a different TID in
+        * the log. Just add one to the existing tid so that we can see chains
+        * of rolling transactions in the log easily.
+        */
+       tic->t_tid++;
+
+       xlog_grant_push_ail(log, tic->t_unit_res);
+
+       tic->t_curr_res = tic->t_unit_res;
+       xlog_tic_reset_res(tic);
+
+       if (tic->t_cnt > 0)
+               return 0;
+
+       trace_xfs_log_regrant(log, tic);
+
+       error = xlog_grant_head_check(log, &log->l_write_head, tic,
+                                     &need_bytes);
+       if (error)
+               goto out_error;
+
+       xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
+       trace_xfs_log_regrant_exit(log, tic);
+       xlog_verify_grant_tail(log);
+       return 0;
+
+out_error:
+       /*
+        * If we are failing, make sure the ticket doesn't have any current
+        * reservations.  We don't want to add this back when the ticket/
+        * transaction gets cancelled.
+        */
+       tic->t_curr_res = 0;
+       tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
+       return error;
+}
+
+/*
+ * Reserve log space and return a ticket corresponding the reservation.
+ *
+ * Each reservation is going to reserve extra space for a log record header.
+ * When writes happen to the on-disk log, we don't subtract the length of the
+ * log record header from any reservation.  By wasting space in each
+ * reservation, we prevent over allocation problems.
+ */
+int
+xfs_log_reserve(
+       struct xfs_mount        *mp,
+       int                     unit_bytes,
+       int                     cnt,
+       struct xlog_ticket      **ticp,
+       __uint8_t               client,
+       bool                    permanent,
+       uint                    t_type)
+{
+       struct log              *log = mp->m_log;
+       struct xlog_ticket      *tic;
+       int                     need_bytes;
+       int                     error = 0;
+
+       ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
+
+       if (XLOG_FORCED_SHUTDOWN(log))
+               return XFS_ERROR(EIO);
+
+       XFS_STATS_INC(xs_try_logspace);
+
+       ASSERT(*ticp == NULL);
+       tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
+                               KM_SLEEP | KM_MAYFAIL);
+       if (!tic)
+               return XFS_ERROR(ENOMEM);
+
+       tic->t_trans_type = t_type;
+       *ticp = tic;
+
+       xlog_grant_push_ail(log, tic->t_unit_res * tic->t_cnt);
+
+       trace_xfs_log_reserve(log, tic);
+
+       error = xlog_grant_head_check(log, &log->l_reserve_head, tic,
+                                     &need_bytes);
+       if (error)
+               goto out_error;
+
+       xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes);
+       xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
+       trace_xfs_log_reserve_exit(log, tic);
+       xlog_verify_grant_tail(log);
+       return 0;
+
+out_error:
+       /*
+        * If we are failing, make sure the ticket doesn't have any current
+        * reservations.  We don't want to add this back when the ticket/
+        * transaction gets cancelled.
+        */
+       tic->t_curr_res = 0;
+       tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
+       return error;
+}
+
+
 /*
  * NOTES:
  *
@@ -394,88 +549,6 @@ xfs_log_release_iclog(
        return 0;
 }
 
-/*
- *  1. Reserve an amount of on-disk log space and return a ticket corresponding
- *     to the reservation.
- *  2. Potentially, push buffers at tail of log to disk.
- *
- * Each reservation is going to reserve extra space for a log record header.
- * When writes happen to the on-disk log, we don't subtract the length of the
- * log record header from any reservation.  By wasting space in each
- * reservation, we prevent over allocation problems.
- */
-int
-xfs_log_reserve(
-       struct xfs_mount        *mp,
-       int                     unit_bytes,
-       int                     cnt,
-       struct xlog_ticket      **ticket,
-       __uint8_t               client,
-       uint                    flags,
-       uint                    t_type)
-{
-       struct log              *log = mp->m_log;
-       struct xlog_ticket      *internal_ticket;
-       int                     retval = 0;
-
-       ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
-
-       if (XLOG_FORCED_SHUTDOWN(log))
-               return XFS_ERROR(EIO);
-
-       XFS_STATS_INC(xs_try_logspace);
-
-
-       if (*ticket != NULL) {
-               ASSERT(flags & XFS_LOG_PERM_RESERV);
-               internal_ticket = *ticket;
-
-               /*
-                * this is a new transaction on the ticket, so we need to
-                * change the transaction ID so that the next transaction has a
-                * different TID in the log. Just add one to the existing tid
-                * so that we can see chains of rolling transactions in the log
-                * easily.
-                */
-               internal_ticket->t_tid++;
-
-               trace_xfs_log_reserve(log, internal_ticket);
-
-               xlog_grant_push_ail(log, internal_ticket->t_unit_res);
-               retval = xlog_regrant_write_log_space(log, internal_ticket);
-       } else {
-               /* may sleep if need to allocate more tickets */
-               internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt,
-                                                 client, flags,
-                                                 KM_SLEEP|KM_MAYFAIL);
-               if (!internal_ticket)
-                       return XFS_ERROR(ENOMEM);
-               internal_ticket->t_trans_type = t_type;
-               *ticket = internal_ticket;
-
-               trace_xfs_log_reserve(log, internal_ticket);
-
-               xlog_grant_push_ail(log,
-                                   (internal_ticket->t_unit_res *
-                                    internal_ticket->t_cnt));
-               retval = xlog_grant_log_space(log, internal_ticket);
-       }
-
-       if (unlikely(retval)) {
-               /*
-                * If we are failing, make sure the ticket doesn't have any
-                * current reservations.  We don't want to add this back
-                * when the ticket/ transaction gets cancelled.
-                */
-               internal_ticket->t_curr_res = 0;
-               /* ungrant will give back unit_res * t_cnt. */
-               internal_ticket->t_cnt = 0;
-       }
-
-       return retval;
-}
-
-
 /*
  * Mount a log filesystem
  *
@@ -760,64 +833,35 @@ xfs_log_item_init(
        INIT_LIST_HEAD(&item->li_cil);
 }
 
+/*
+ * Wake up processes waiting for log space after we have moved the log tail.
+ */
 void
-xfs_log_move_tail(xfs_mount_t  *mp,
-                 xfs_lsn_t     tail_lsn)
+xfs_log_space_wake(
+       struct xfs_mount        *mp)
 {
-       xlog_ticket_t   *tic;
-       xlog_t          *log = mp->m_log;
-       int             need_bytes, free_bytes;
+       struct log              *log = mp->m_log;
+       int                     free_bytes;
 
        if (XLOG_FORCED_SHUTDOWN(log))
                return;
 
-       if (tail_lsn == 0)
-               tail_lsn = atomic64_read(&log->l_last_sync_lsn);
-
-       /* tail_lsn == 1 implies that we weren't passed a valid value.  */
-       if (tail_lsn != 1)
-               atomic64_set(&log->l_tail_lsn, tail_lsn);
-
-       if (!list_empty_careful(&log->l_writeq)) {
-#ifdef DEBUG
-               if (log->l_flags & XLOG_ACTIVE_RECOVERY)
-                       panic("Recovery problem");
-#endif
-               spin_lock(&log->l_grant_write_lock);
-               free_bytes = xlog_space_left(log, &log->l_grant_write_head);
-               list_for_each_entry(tic, &log->l_writeq, t_queue) {
-                       ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
+       if (!list_empty_careful(&log->l_write_head.waiters)) {
+               ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
 
-                       if (free_bytes < tic->t_unit_res && tail_lsn != 1)
-                               break;
-                       tail_lsn = 0;
-                       free_bytes -= tic->t_unit_res;
-                       trace_xfs_log_regrant_write_wake_up(log, tic);
-                       wake_up(&tic->t_wait);
-               }
-               spin_unlock(&log->l_grant_write_lock);
+               spin_lock(&log->l_write_head.lock);
+               free_bytes = xlog_space_left(log, &log->l_write_head.grant);
+               xlog_grant_head_wake(log, &log->l_write_head, &free_bytes);
+               spin_unlock(&log->l_write_head.lock);
        }
 
-       if (!list_empty_careful(&log->l_reserveq)) {
-#ifdef DEBUG
-               if (log->l_flags & XLOG_ACTIVE_RECOVERY)
-                       panic("Recovery problem");
-#endif
-               spin_lock(&log->l_grant_reserve_lock);
-               free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
-               list_for_each_entry(tic, &log->l_reserveq, t_queue) {
-                       if (tic->t_flags & XLOG_TIC_PERM_RESERV)
-                               need_bytes = tic->t_unit_res*tic->t_cnt;
-                       else
-                               need_bytes = tic->t_unit_res;
-                       if (free_bytes < need_bytes && tail_lsn != 1)
-                               break;
-                       tail_lsn = 0;
-                       free_bytes -= need_bytes;
-                       trace_xfs_log_grant_wake_up(log, tic);
-                       wake_up(&tic->t_wait);
-               }
-               spin_unlock(&log->l_grant_reserve_lock);
+       if (!list_empty_careful(&log->l_reserve_head.waiters)) {
+               ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
+
+               spin_lock(&log->l_reserve_head.lock);
+               free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
+               xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes);
+               spin_unlock(&log->l_reserve_head.lock);
        }
 }
 
@@ -867,21 +911,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
        return needed;
 }
 
-/******************************************************************************
- *
- *     local routines
- *
- ******************************************************************************
- */
-
-/* xfs_trans_tail_ail returns 0 when there is nothing in the list.
- * The log manager must keep track of the last LR which was committed
- * to disk.  The lsn of this LR will become the new tail_lsn whenever
- * xfs_trans_tail_ail returns 0.  If we don't do this, we run into
- * the situation where stuff could be written into the log but nothing
- * was ever in the AIL when asked.  Eventually, we panic since the
- * tail hits the head.
- *
+/*
  * We may be holding the log iclog lock upon entering this routine.
  */
 xfs_lsn_t
@@ -891,10 +921,17 @@ xlog_assign_tail_lsn(
        xfs_lsn_t               tail_lsn;
        struct log              *log = mp->m_log;
 
+       /*
+        * To make sure we always have a valid LSN for the log tail we keep
+        * track of the last LSN which was committed in log->l_last_sync_lsn,
+        * and use that when the AIL was empty and xfs_ail_min_lsn returns 0.
+        *
+        * If the AIL has been emptied we also need to wake any process
+        * waiting for this condition.
+        */
        tail_lsn = xfs_ail_min_lsn(mp->m_ail);
        if (!tail_lsn)
                tail_lsn = atomic64_read(&log->l_last_sync_lsn);
-
        atomic64_set(&log->l_tail_lsn, tail_lsn);
        return tail_lsn;
 }
@@ -1100,12 +1137,9 @@ xlog_alloc_log(xfs_mount_t       *mp,
        xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
        xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
        log->l_curr_cycle  = 1;     /* 0 is bad since this is initial value */
-       xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0);
-       xlog_assign_grant_head(&log->l_grant_write_head, 1, 0);
-       INIT_LIST_HEAD(&log->l_reserveq);
-       INIT_LIST_HEAD(&log->l_writeq);
-       spin_lock_init(&log->l_grant_reserve_lock);
-       spin_lock_init(&log->l_grant_write_lock);
+
+       xlog_grant_head_init(&log->l_reserve_head);
+       xlog_grant_head_init(&log->l_write_head);
 
        error = EFSCORRUPTED;
        if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -1280,7 +1314,7 @@ xlog_grant_push_ail(
 
        ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
 
-       free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
+       free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
        free_blocks = BTOBBT(free_bytes);
 
        /*
@@ -1412,8 +1446,8 @@ xlog_sync(xlog_t          *log,
                 roundoff < BBTOB(1)));
 
        /* move grant heads by roundoff in sync */
-       xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff);
-       xlog_grant_add_space(log, &log->l_grant_write_head, roundoff);
+       xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
+       xlog_grant_add_space(log, &log->l_write_head.grant, roundoff);
 
        /* put cycle number in every block */
        xlog_pack_data(log, iclog, roundoff); 
@@ -2566,119 +2600,6 @@ restart:
        return 0;
 }      /* xlog_state_get_iclog_space */
 
-/*
- * Atomically get the log space required for a log ticket.
- *
- * Once a ticket gets put onto the reserveq, it will only return after the
- * needed reservation is satisfied.
- *
- * This function is structured so that it has a lock free fast path. This is
- * necessary because every new transaction reservation will come through this
- * path. Hence any lock will be globally hot if we take it unconditionally on
- * every pass.
- *
- * As tickets are only ever moved on and off the reserveq under the
- * l_grant_reserve_lock, we only need to take that lock if we are going to add
- * the ticket to the queue and sleep. We can avoid taking the lock if the ticket
- * was never added to the reserveq because the t_queue list head will be empty
- * and we hold the only reference to it so it can safely be checked unlocked.
- */
-STATIC int
-xlog_grant_log_space(
-       struct log              *log,
-       struct xlog_ticket      *tic)
-{
-       int                     free_bytes, need_bytes;
-       int                     error = 0;
-
-       ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
-
-       trace_xfs_log_grant_enter(log, tic);
-
-       /*
-        * If there are other waiters on the queue then give them a chance at
-        * logspace before us.  Wake up the first waiters, if we do not wake
-        * up all the waiters then go to sleep waiting for more free space,
-        * otherwise try to get some space for this transaction.
-        */
-       need_bytes = tic->t_unit_res;
-       if (tic->t_flags & XFS_LOG_PERM_RESERV)
-               need_bytes *= tic->t_ocnt;
-       free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
-       if (!list_empty_careful(&log->l_reserveq)) {
-               spin_lock(&log->l_grant_reserve_lock);
-               if (!xlog_reserveq_wake(log, &free_bytes) ||
-                   free_bytes < need_bytes)
-                       error = xlog_reserveq_wait(log, tic, need_bytes);
-               spin_unlock(&log->l_grant_reserve_lock);
-       } else if (free_bytes < need_bytes) {
-               spin_lock(&log->l_grant_reserve_lock);
-               error = xlog_reserveq_wait(log, tic, need_bytes);
-               spin_unlock(&log->l_grant_reserve_lock);
-       }
-       if (error)
-               return error;
-
-       xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
-       xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
-       trace_xfs_log_grant_exit(log, tic);
-       xlog_verify_grant_tail(log);
-       return 0;
-}
-
-/*
- * Replenish the byte reservation required by moving the grant write head.
- *
- * Similar to xlog_grant_log_space, the function is structured to have a lock
- * free fast path.
- */
-STATIC int
-xlog_regrant_write_log_space(
-       struct log              *log,
-       struct xlog_ticket      *tic)
-{
-       int                     free_bytes, need_bytes;
-       int                     error = 0;
-
-       tic->t_curr_res = tic->t_unit_res;
-       xlog_tic_reset_res(tic);
-
-       if (tic->t_cnt > 0)
-               return 0;
-
-       ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
-
-       trace_xfs_log_regrant_write_enter(log, tic);
-
-       /*
-        * If there are other waiters on the queue then give them a chance at
-        * logspace before us.  Wake up the first waiters, if we do not wake
-        * up all the waiters then go to sleep waiting for more free space,
-        * otherwise try to get some space for this transaction.
-        */
-       need_bytes = tic->t_unit_res;
-       free_bytes = xlog_space_left(log, &log->l_grant_write_head);
-       if (!list_empty_careful(&log->l_writeq)) {
-               spin_lock(&log->l_grant_write_lock);
-               if (!xlog_writeq_wake(log, &free_bytes) ||
-                   free_bytes < need_bytes)
-                       error = xlog_writeq_wait(log, tic, need_bytes);
-               spin_unlock(&log->l_grant_write_lock);
-       } else if (free_bytes < need_bytes) {
-               spin_lock(&log->l_grant_write_lock);
-               error = xlog_writeq_wait(log, tic, need_bytes);
-               spin_unlock(&log->l_grant_write_lock);
-       }
-
-       if (error)
-               return error;
-
-       xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
-       trace_xfs_log_regrant_write_exit(log, tic);
-       xlog_verify_grant_tail(log);
-       return 0;
-}
-
 /* The first cnt-1 times through here we don't need to
  * move the grant write head because the permanent
  * reservation has reserved cnt times the unit amount.
@@ -2695,9 +2616,9 @@ xlog_regrant_reserve_log_space(xlog_t          *log,
        if (ticket->t_cnt > 0)
                ticket->t_cnt--;
 
-       xlog_grant_sub_space(log, &log->l_grant_reserve_head,
+       xlog_grant_sub_space(log, &log->l_reserve_head.grant,
                                        ticket->t_curr_res);
-       xlog_grant_sub_space(log, &log->l_grant_write_head,
+       xlog_grant_sub_space(log, &log->l_write_head.grant,
                                        ticket->t_curr_res);
        ticket->t_curr_res = ticket->t_unit_res;
        xlog_tic_reset_res(ticket);
@@ -2708,7 +2629,7 @@ xlog_regrant_reserve_log_space(xlog_t          *log,
        if (ticket->t_cnt > 0)
                return;
 
-       xlog_grant_add_space(log, &log->l_grant_reserve_head,
+       xlog_grant_add_space(log, &log->l_reserve_head.grant,
                                        ticket->t_unit_res);
 
        trace_xfs_log_regrant_reserve_exit(log, ticket);
@@ -2754,14 +2675,13 @@ xlog_ungrant_log_space(xlog_t        *log,
                bytes += ticket->t_unit_res*ticket->t_cnt;
        }
 
-       xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes);
-       xlog_grant_sub_space(log, &log->l_grant_write_head, bytes);
+       xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes);
+       xlog_grant_sub_space(log, &log->l_write_head.grant, bytes);
 
        trace_xfs_log_ungrant_exit(log, ticket);
 
-       xfs_log_move_tail(log->l_mp, 1);
-}      /* xlog_ungrant_log_space */
-
+       xfs_log_space_wake(log->l_mp);
+}
 
 /*
  * Flush iclog to disk if this is the last reference to the given iclog and
@@ -3219,7 +3139,7 @@ xlog_ticket_alloc(
        int             unit_bytes,
        int             cnt,
        char            client,
-       uint            xflags,
+       bool            permanent,
        int             alloc_flags)
 {
        struct xlog_ticket *tic;
@@ -3313,6 +3233,7 @@ xlog_ticket_alloc(
         }
 
        atomic_set(&tic->t_ref, 1);
+       tic->t_task             = current;
        INIT_LIST_HEAD(&tic->t_queue);
        tic->t_unit_res         = unit_bytes;
        tic->t_curr_res         = unit_bytes;
@@ -3322,9 +3243,8 @@ xlog_ticket_alloc(
        tic->t_clientid         = client;
        tic->t_flags            = XLOG_TIC_INITED;
        tic->t_trans_type       = 0;
-       if (xflags & XFS_LOG_PERM_RESERV)
+       if (permanent)
                tic->t_flags |= XLOG_TIC_PERM_RESERV;
-       init_waitqueue_head(&tic->t_wait);
 
        xlog_tic_reset_res(tic);
 
@@ -3380,7 +3300,7 @@ xlog_verify_grant_tail(
        int             tail_cycle, tail_blocks;
        int             cycle, space;
 
-       xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space);
+       xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space);
        xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
        if (tail_cycle != cycle) {
                if (cycle - 1 != tail_cycle &&
@@ -3582,7 +3502,6 @@ xfs_log_force_umount(
        struct xfs_mount        *mp,
        int                     logerror)
 {
-       xlog_ticket_t   *tic;
        xlog_t          *log;
        int             retval;
 
@@ -3650,15 +3569,8 @@ xfs_log_force_umount(
         * we don't enqueue anything once the SHUTDOWN flag is set, and this
         * action is protected by the grant locks.
         */
-       spin_lock(&log->l_grant_reserve_lock);
-       list_for_each_entry(tic, &log->l_reserveq, t_queue)
-               wake_up(&tic->t_wait);
-       spin_unlock(&log->l_grant_reserve_lock);
-
-       spin_lock(&log->l_grant_write_lock);
-       list_for_each_entry(tic, &log->l_writeq, t_queue)
-               wake_up(&tic->t_wait);
-       spin_unlock(&log->l_grant_write_lock);
+       xlog_grant_head_wake_all(&log->l_reserve_head);
+       xlog_grant_head_wake_all(&log->l_write_head);
 
        if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
                ASSERT(!logerror);
index 2aee3b22d29c26d9d11b92f9e1531cab1c2540e3..2c622bedb3021c72f9c3dde56aeab1969549f350 100644 (file)
@@ -52,15 +52,6 @@ static inline xfs_lsn_t      _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
  */
 #define XFS_LOG_REL_PERM_RESERV        0x1
 
-/*
- * Flags to xfs_log_reserve()
- *
- *     XFS_LOG_PERM_RESERV: Permanent reservation.  When writes are
- *             performed against this type of reservation, the reservation
- *             is not decreased.  Long running transactions should use this.
- */
-#define XFS_LOG_PERM_RESERV    0x2
-
 /*
  * Flags to xfs_log_force()
  *
@@ -160,8 +151,8 @@ int   xfs_log_mount(struct xfs_mount        *mp,
                        xfs_daddr_t             start_block,
                        int                     num_bblocks);
 int      xfs_log_mount_finish(struct xfs_mount *mp);
-void     xfs_log_move_tail(struct xfs_mount    *mp,
-                           xfs_lsn_t           tail_lsn);
+xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
+void     xfs_log_space_wake(struct xfs_mount *mp);
 int      xfs_log_notify(struct xfs_mount       *mp,
                         struct xlog_in_core    *iclog,
                         xfs_log_callback_t     *callback_entry);
@@ -172,8 +163,9 @@ int   xfs_log_reserve(struct xfs_mount *mp,
                          int              count,
                          struct xlog_ticket **ticket,
                          __uint8_t        clientid,
-                         uint             flags,
+                         bool             permanent,
                          uint             t_type);
+int      xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
 int      xfs_log_unmount_write(struct xfs_mount *mp);
 void      xfs_log_unmount(struct xfs_mount *mp);
 int      xfs_log_force_umount(struct xfs_mount *mp, int logerror);
index 2d3b6a498d632752a95109b2d699fe7de36afec7..2152900b79d4059d943ca6cdc94d4d4e502b62cc 100644 (file)
@@ -239,8 +239,8 @@ typedef struct xlog_res {
 } xlog_res_t;
 
 typedef struct xlog_ticket {
-       wait_queue_head_t  t_wait;       /* ticket wait queue */
        struct list_head   t_queue;      /* reserve/write queue */
+       struct task_struct *t_task;      /* task that owns this ticket */
        xlog_tid_t         t_tid;        /* transaction identifier       : 4  */
        atomic_t           t_ref;        /* ticket reference count       : 4  */
        int                t_curr_res;   /* current reservation in bytes : 4  */
@@ -469,6 +469,16 @@ struct xfs_cil {
 #define XLOG_CIL_SPACE_LIMIT(log)      (log->l_logsize >> 3)
 #define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
 
+/*
+ * ticket grant locks, queues and accounting have their own cachlines
+ * as these are quite hot and can be operated on concurrently.
+ */
+struct xlog_grant_head {
+       spinlock_t              lock ____cacheline_aligned_in_smp;
+       struct list_head        waiters;
+       atomic64_t              grant;
+};
+
 /*
  * The reservation head lsn is not made up of a cycle number and block number.
  * Instead, it uses a cycle number and byte number.  Logs don't expect to
@@ -520,17 +530,8 @@ typedef struct log {
        /* lsn of 1st LR with unflushed * buffers */
        atomic64_t              l_tail_lsn ____cacheline_aligned_in_smp;
 
-       /*
-        * ticket grant locks, queues and accounting have their own cachlines
-        * as these are quite hot and can be operated on concurrently.
-        */
-       spinlock_t              l_grant_reserve_lock ____cacheline_aligned_in_smp;
-       struct list_head        l_reserveq;
-       atomic64_t              l_grant_reserve_head;
-
-       spinlock_t              l_grant_write_lock ____cacheline_aligned_in_smp;
-       struct list_head        l_writeq;
-       atomic64_t              l_grant_write_head;
+       struct xlog_grant_head  l_reserve_head;
+       struct xlog_grant_head  l_write_head;
 
        /* The following field are used for debugging; need to hold icloglock */
 #ifdef DEBUG
@@ -545,14 +546,13 @@ typedef struct log {
 #define XLOG_FORCED_SHUTDOWN(log)      ((log)->l_flags & XLOG_IO_ERROR)
 
 /* common routines */
-extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
 extern int      xlog_recover(xlog_t *log);
 extern int      xlog_recover_finish(xlog_t *log);
 extern void     xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
 
 extern kmem_zone_t *xfs_log_ticket_zone;
 struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes,
-                               int count, char client, uint xflags,
+                               int count, char client, bool permanent,
                                int alloc_flags);
 
 
index 0ed9ee77937c50470fdea8b7573ea738e4a1587c..7c75c7374d5a4acfdc1e83e139db95294d35fd58 100644 (file)
@@ -965,9 +965,9 @@ xlog_find_tail(
                log->l_curr_cycle++;
        atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
        atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
-       xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle,
+       xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
                                        BBTOB(log->l_curr_block));
-       xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle,
+       xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
                                        BBTOB(log->l_curr_block));
 
        /*
@@ -3695,7 +3695,7 @@ xlog_do_recover(
 
        /* Convert superblock from on-disk format */
        sbp = &log->l_mp->m_sb;
-       xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
+       xfs_sb_from_disk(log->l_mp, XFS_BUF_TO_SBP(bp));
        ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
        ASSERT(xfs_sb_good_version(sbp));
        xfs_buf_relse(bp);
index d06afbc3540dde90d7796139bf658c8439a83d4c..1ffead4b2296c947bfbfd3c32a93a10946830028 100644 (file)
@@ -158,7 +158,7 @@ xfs_uuid_mount(
 
  out_duplicate:
        mutex_unlock(&xfs_uuid_table_mutex);
-       xfs_warn(mp, "Filesystem has duplicate UUID - can't mount");
+       xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
        return XFS_ERROR(EINVAL);
 }
 
@@ -553,9 +553,11 @@ out_unwind:
 
 void
 xfs_sb_from_disk(
-       xfs_sb_t        *to,
+       struct xfs_mount        *mp,
        xfs_dsb_t       *from)
 {
+       struct xfs_sb *to = &mp->m_sb;
+
        to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
        to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
        to->sb_dblocks = be64_to_cpu(from->sb_dblocks);
@@ -693,7 +695,7 @@ reread:
         * Initialize the mount structure from the superblock.
         * But first do some basic consistency checking.
         */
-       xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));
+       xfs_sb_from_disk(mp, XFS_BUF_TO_SBP(bp));
        error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
        if (error) {
                if (loud)
index 19f69e232509019ffa1ac99381e75cb0279ee908..9eba73887829a89027c0269b1b3cc468343e19e8 100644 (file)
@@ -211,6 +211,9 @@ typedef struct xfs_mount {
        struct shrinker         m_inode_shrink; /* inode reclaim shrinker */
        int64_t                 m_low_space[XFS_LOWSP_MAX];
                                                /* low free space thresholds */
+
+       struct workqueue_struct *m_data_workqueue;
+       struct workqueue_struct *m_unwritten_workqueue;
 } xfs_mount_t;
 
 /*
@@ -395,7 +398,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *);
 extern void    xfs_mod_sb(struct xfs_trans *, __int64_t);
 extern int     xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
                                        xfs_agnumber_t *);
-extern void    xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
+extern void    xfs_sb_from_disk(struct xfs_mount *, struct xfs_dsb *);
 extern void    xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
 
 #endif /* __XFS_MOUNT_H__ */
index c436def733bf892eb324603f688e6d92b92ffc61..55c6afedc8796a8223cf67f1e2c3073cc555a050 100644 (file)
  * quota functionality, including maintaining the freelist and hash
  * tables of dquots.
  */
-struct mutex   xfs_Gqm_lock;
-struct xfs_qm  *xfs_Gqm;
-
-kmem_zone_t    *qm_dqzone;
-kmem_zone_t    *qm_dqtrxzone;
-
-STATIC void    xfs_qm_list_init(xfs_dqlist_t *, char *, int);
-STATIC void    xfs_qm_list_destroy(xfs_dqlist_t *);
-
 STATIC int     xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int     xfs_qm_init_quotainfo(xfs_mount_t *);
 STATIC int     xfs_qm_shake(struct shrinker *, struct shrink_control *);
 
-static struct shrinker xfs_qm_shaker = {
-       .shrink = xfs_qm_shake,
-       .seeks = DEFAULT_SEEKS,
-};
-
 /*
- * Initialize the XQM structure.
- * Note that there is not one quota manager per file system.
+ * We use the batch lookup interface to iterate over the dquots as it
+ * currently is the only interface into the radix tree code that allows
+ * fuzzy lookups instead of exact matches.  Holding the lock over multiple
+ * operations is fine as all callers are used either during mount/umount
+ * or quotaoff.
  */
-STATIC struct xfs_qm *
-xfs_Gqm_init(void)
+#define XFS_DQ_LOOKUP_BATCH    32
+
+STATIC int
+xfs_qm_dquot_walk(
+       struct xfs_mount        *mp,
+       int                     type,
+       int                     (*execute)(struct xfs_dquot *dqp))
 {
-       xfs_dqhash_t    *udqhash, *gdqhash;
-       xfs_qm_t        *xqm;
-       size_t          hsize;
-       uint            i;
+       struct xfs_quotainfo    *qi = mp->m_quotainfo;
+       struct radix_tree_root  *tree = XFS_DQUOT_TREE(qi, type);
+       uint32_t                next_index;
+       int                     last_error = 0;
+       int                     skipped;
+       int                     nr_found;
+
+restart:
+       skipped = 0;
+       next_index = 0;
+       nr_found = 0;
+
+       while (1) {
+               struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
+               int             error = 0;
+               int             i;
+
+               mutex_lock(&qi->qi_tree_lock);
+               nr_found = radix_tree_gang_lookup(tree, (void **)batch,
+                                       next_index, XFS_DQ_LOOKUP_BATCH);
+               if (!nr_found) {
+                       mutex_unlock(&qi->qi_tree_lock);
+                       break;
+               }
 
-       /*
-        * Initialize the dquot hash tables.
-        */
-       udqhash = kmem_zalloc_greedy(&hsize,
-                                    XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
-                                    XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
-       if (!udqhash)
-               goto out;
+               for (i = 0; i < nr_found; i++) {
+                       struct xfs_dquot *dqp = batch[i];
 
-       gdqhash = kmem_zalloc_large(hsize);
-       if (!gdqhash)
-               goto out_free_udqhash;
+                       next_index = be32_to_cpu(dqp->q_core.d_id) + 1;
 
-       hsize /= sizeof(xfs_dqhash_t);
+                       error = execute(batch[i]);
+                       if (error == EAGAIN) {
+                               skipped++;
+                               continue;
+                       }
+                       if (error && last_error != EFSCORRUPTED)
+                               last_error = error;
+               }
 
-       xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
-       xqm->qm_dqhashmask = hsize - 1;
-       xqm->qm_usr_dqhtable = udqhash;
-       xqm->qm_grp_dqhtable = gdqhash;
-       ASSERT(xqm->qm_usr_dqhtable != NULL);
-       ASSERT(xqm->qm_grp_dqhtable != NULL);
+               mutex_unlock(&qi->qi_tree_lock);
 
-       for (i = 0; i < hsize; i++) {
-               xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
-               xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
+               /* bail out if the filesystem is corrupted.  */
+               if (last_error == EFSCORRUPTED) {
+                       skipped = 0;
+                       break;
+               }
        }
 
-       /*
-        * Freelist of all dquots of all file systems
-        */
-       INIT_LIST_HEAD(&xqm->qm_dqfrlist);
-       xqm->qm_dqfrlist_cnt = 0;
-       mutex_init(&xqm->qm_dqfrlist_lock);
-
-       /*
-        * dquot zone. we register our own low-memory callback.
-        */
-       if (!qm_dqzone) {
-               xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
-                                               "xfs_dquots");
-               qm_dqzone = xqm->qm_dqzone;
-       } else
-               xqm->qm_dqzone = qm_dqzone;
-
-       register_shrinker(&xfs_qm_shaker);
-
-       /*
-        * The t_dqinfo portion of transactions.
-        */
-       if (!qm_dqtrxzone) {
-               xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
-                                                  "xfs_dqtrx");
-               qm_dqtrxzone = xqm->qm_dqtrxzone;
-       } else
-               xqm->qm_dqtrxzone = qm_dqtrxzone;
-
-       atomic_set(&xqm->qm_totaldquots, 0);
-       xqm->qm_nrefs = 0;
-       return xqm;
+       if (skipped) {
+               delay(1);
+               goto restart;
+       }
 
- out_free_udqhash:
-       kmem_free_large(udqhash);
- out:
-       return NULL;
+       return last_error;
 }
 
+
 /*
- * Destroy the global quota manager when its reference count goes to zero.
+ * Purge a dquot from all tracking data structures and free it.
  */
-STATIC void
-xfs_qm_destroy(
-       struct xfs_qm   *xqm)
+STATIC int
+xfs_qm_dqpurge(
+       struct xfs_dquot        *dqp)
 {
-       int             hsize, i;
+       struct xfs_mount        *mp = dqp->q_mount;
+       struct xfs_quotainfo    *qi = mp->m_quotainfo;
+       struct xfs_dquot        *gdqp = NULL;
 
-       ASSERT(xqm != NULL);
-       ASSERT(xqm->qm_nrefs == 0);
+       xfs_dqlock(dqp);
+       if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
+               xfs_dqunlock(dqp);
+               return EAGAIN;
+       }
 
-       unregister_shrinker(&xfs_qm_shaker);
+       /*
+        * If this quota has a group hint attached, prepare for releasing it
+        * now.
+        */
+       gdqp = dqp->q_gdquot;
+       if (gdqp) {
+               xfs_dqlock(gdqp);
+               dqp->q_gdquot = NULL;
+       }
 
-       mutex_lock(&xqm->qm_dqfrlist_lock);
-       ASSERT(list_empty(&xqm->qm_dqfrlist));
-       mutex_unlock(&xqm->qm_dqfrlist_lock);
+       dqp->dq_flags |= XFS_DQ_FREEING;
 
-       hsize = xqm->qm_dqhashmask + 1;
-       for (i = 0; i < hsize; i++) {
-               xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
-               xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
+       /*
+        * If we're turning off quotas, we have to make sure that, for
+        * example, we don't delete quota disk blocks while dquots are
+        * in the process of getting written to those disk blocks.
+        * This dquot might well be on AIL, and we can't leave it there
+        * if we're turning off quotas. Basically, we need this flush
+        * lock, and are willing to block on it.
+        */
+       if (!xfs_dqflock_nowait(dqp)) {
+               /*
+                * Block on the flush lock after nudging dquot buffer,
+                * if it is incore.
+                */
+               xfs_dqflock_pushbuf_wait(dqp);
        }
-       kmem_free_large(xqm->qm_usr_dqhtable);
-       kmem_free_large(xqm->qm_grp_dqhtable);
-       xqm->qm_usr_dqhtable = NULL;
-       xqm->qm_grp_dqhtable = NULL;
-       xqm->qm_dqhashmask = 0;
 
-       kmem_free(xqm);
-}
-
-/*
- * Called at mount time to let XQM know that another file system is
- * starting quotas. This isn't crucial information as the individual mount
- * structures are pretty independent, but it helps the XQM keep a
- * global view of what's going on.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_hold_quotafs_ref(
-       struct xfs_mount *mp)
-{
        /*
-        * Need to lock the xfs_Gqm structure for things like this. For example,
-        * the structure could disappear between the entry to this routine and
-        * a HOLD operation if not locked.
+        * If we are turning this type of quotas off, we don't care
+        * about the dirty metadata sitting in this dquot. OTOH, if
+        * we're unmounting, we do care, so we flush it and wait.
         */
-       mutex_lock(&xfs_Gqm_lock);
+       if (XFS_DQ_IS_DIRTY(dqp)) {
+               int     error;
 
-       if (!xfs_Gqm) {
-               xfs_Gqm = xfs_Gqm_init();
-               if (!xfs_Gqm) {
-                       mutex_unlock(&xfs_Gqm_lock);
-                       return ENOMEM;
-               }
+               /*
+                * We don't care about getting disk errors here. We need
+                * to purge this dquot anyway, so we go ahead regardless.
+                */
+               error = xfs_qm_dqflush(dqp, SYNC_WAIT);
+               if (error)
+                       xfs_warn(mp, "%s: dquot %p flush failed",
+                               __func__, dqp);
+               xfs_dqflock(dqp);
        }
 
+       ASSERT(atomic_read(&dqp->q_pincount) == 0);
+       ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
+              !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
+
+       xfs_dqfunlock(dqp);
+       xfs_dqunlock(dqp);
+
+       radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
+                         be32_to_cpu(dqp->q_core.d_id));
+       qi->qi_dquots--;
+
        /*
-        * We can keep a list of all filesystems with quotas mounted for
-        * debugging and statistical purposes, but ...
-        * Just take a reference and get out.
+        * We move dquots to the freelist as soon as their reference count
+        * hits zero, so it really should be on the freelist here.
         */
-       xfs_Gqm->qm_nrefs++;
-       mutex_unlock(&xfs_Gqm_lock);
+       mutex_lock(&qi->qi_lru_lock);
+       ASSERT(!list_empty(&dqp->q_lru));
+       list_del_init(&dqp->q_lru);
+       qi->qi_lru_count--;
+       XFS_STATS_DEC(xs_qm_dquot_unused);
+       mutex_unlock(&qi->qi_lru_lock);
 
+       xfs_qm_dqdestroy(dqp);
+
+       if (gdqp)
+               xfs_qm_dqput(gdqp);
        return 0;
 }
 
-
 /*
- * Release the reference that a filesystem took at mount time,
- * so that we know when we need to destroy the entire quota manager.
+ * Purge the dquot cache.
  */
-/* ARGSUSED */
-STATIC void
-xfs_qm_rele_quotafs_ref(
-       struct xfs_mount *mp)
+void
+xfs_qm_dqpurge_all(
+       struct xfs_mount        *mp,
+       uint                    flags)
 {
-       ASSERT(xfs_Gqm);
-       ASSERT(xfs_Gqm->qm_nrefs > 0);
-
-       /*
-        * Destroy the entire XQM. If somebody mounts with quotaon, this'll
-        * be restarted.
-        */
-       mutex_lock(&xfs_Gqm_lock);
-       if (--xfs_Gqm->qm_nrefs == 0) {
-               xfs_qm_destroy(xfs_Gqm);
-               xfs_Gqm = NULL;
-       }
-       mutex_unlock(&xfs_Gqm_lock);
+       if (flags & XFS_QMOPT_UQUOTA)
+               xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge);
+       if (flags & XFS_QMOPT_GQUOTA)
+               xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge);
+       if (flags & XFS_QMOPT_PQUOTA)
+               xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge);
 }
 
 /*
@@ -376,175 +371,6 @@ xfs_qm_unmount_quotas(
        }
 }
 
-/*
- * Flush all dquots of the given file system to disk. The dquots are
- * _not_ purged from memory here, just their data written to disk.
- */
-STATIC int
-xfs_qm_dqflush_all(
-       struct xfs_mount        *mp)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       int                     recl;
-       struct xfs_dquot        *dqp;
-       int                     error;
-
-       if (!q)
-               return 0;
-again:
-       mutex_lock(&q->qi_dqlist_lock);
-       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-               xfs_dqlock(dqp);
-               if ((dqp->dq_flags & XFS_DQ_FREEING) ||
-                   !XFS_DQ_IS_DIRTY(dqp)) {
-                       xfs_dqunlock(dqp);
-                       continue;
-               }
-
-               /* XXX a sentinel would be better */
-               recl = q->qi_dqreclaims;
-               if (!xfs_dqflock_nowait(dqp)) {
-                       /*
-                        * If we can't grab the flush lock then check
-                        * to see if the dquot has been flushed delayed
-                        * write.  If so, grab its buffer and send it
-                        * out immediately.  We'll be able to acquire
-                        * the flush lock when the I/O completes.
-                        */
-                       xfs_dqflock_pushbuf_wait(dqp);
-               }
-               /*
-                * Let go of the mplist lock. We don't want to hold it
-                * across a disk write.
-                */
-               mutex_unlock(&q->qi_dqlist_lock);
-               error = xfs_qm_dqflush(dqp, 0);
-               xfs_dqunlock(dqp);
-               if (error)
-                       return error;
-
-               mutex_lock(&q->qi_dqlist_lock);
-               if (recl != q->qi_dqreclaims) {
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       /* XXX restart limit */
-                       goto again;
-               }
-       }
-
-       mutex_unlock(&q->qi_dqlist_lock);
-       /* return ! busy */
-       return 0;
-}
-
-/*
- * Release the group dquot pointers the user dquots may be
- * carrying around as a hint. mplist is locked on entry and exit.
- */
-STATIC void
-xfs_qm_detach_gdquots(
-       struct xfs_mount        *mp)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       struct xfs_dquot        *dqp, *gdqp;
-
- again:
-       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-               xfs_dqlock(dqp);
-               if (dqp->dq_flags & XFS_DQ_FREEING) {
-                       xfs_dqunlock(dqp);
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       delay(1);
-                       mutex_lock(&q->qi_dqlist_lock);
-                       goto again;
-               }
-
-               gdqp = dqp->q_gdquot;
-               if (gdqp)
-                       dqp->q_gdquot = NULL;
-               xfs_dqunlock(dqp);
-
-               if (gdqp)
-                       xfs_qm_dqrele(gdqp);
-       }
-}
-
-/*
- * Go through all the incore dquots of this file system and take them
- * off the mplist and hashlist, if the dquot type matches the dqtype
- * parameter. This is used when turning off quota accounting for
- * users and/or groups, as well as when the filesystem is unmounting.
- */
-STATIC int
-xfs_qm_dqpurge_int(
-       struct xfs_mount        *mp,
-       uint                    flags)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       struct xfs_dquot        *dqp, *n;
-       uint                    dqtype;
-       int                     nmisses = 0;
-       LIST_HEAD               (dispose_list);
-
-       if (!q)
-               return 0;
-
-       dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
-       dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
-       dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
-
-       mutex_lock(&q->qi_dqlist_lock);
-
-       /*
-        * In the first pass through all incore dquots of this filesystem,
-        * we release the group dquot pointers the user dquots may be
-        * carrying around as a hint. We need to do this irrespective of
-        * what's being turned off.
-        */
-       xfs_qm_detach_gdquots(mp);
-
-       /*
-        * Try to get rid of all of the unwanted dquots.
-        */
-       list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
-               xfs_dqlock(dqp);
-               if ((dqp->dq_flags & dqtype) != 0 &&
-                   !(dqp->dq_flags & XFS_DQ_FREEING)) {
-                       if (dqp->q_nrefs == 0) {
-                               dqp->dq_flags |= XFS_DQ_FREEING;
-                               list_move_tail(&dqp->q_mplist, &dispose_list);
-                       } else
-                               nmisses++;
-               }
-               xfs_dqunlock(dqp);
-       }
-       mutex_unlock(&q->qi_dqlist_lock);
-
-       list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
-               xfs_qm_dqpurge(dqp);
-
-       return nmisses;
-}
-
-int
-xfs_qm_dqpurge_all(
-       xfs_mount_t     *mp,
-       uint            flags)
-{
-       int             ndquots;
-
-       /*
-        * Purge the dquot cache.
-        * None of the dquots should really be busy at this point.
-        */
-       if (mp->m_quotainfo) {
-               while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
-                       delay(ndquots * 10);
-               }
-       }
-       return 0;
-}
-
 STATIC int
 xfs_qm_dqattach_one(
        xfs_inode_t     *ip,
@@ -782,14 +608,6 @@ xfs_qm_dqdetach(
        }
 }
 
-/*
- * The hash chains and the mplist use the same xfs_dqhash structure as
- * their list head, but we can take the mplist qh_lock and one of the
- * hash qh_locks at the same time without any problem as they aren't
- * related.
- */
-static struct lock_class_key xfs_quota_mplist_class;
-
 /*
  * This initializes all the quota information that's kept in the
  * mount structure
@@ -804,13 +622,6 @@ xfs_qm_init_quotainfo(
 
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
-       /*
-        * Tell XQM that we exist as soon as possible.
-        */
-       if ((error = xfs_qm_hold_quotafs_ref(mp))) {
-               return error;
-       }
-
        qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
 
        /*
@@ -823,11 +634,13 @@ xfs_qm_init_quotainfo(
                return error;
        }
 
-       INIT_LIST_HEAD(&qinf->qi_dqlist);
-       mutex_init(&qinf->qi_dqlist_lock);
-       lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
+       INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
+       INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
+       mutex_init(&qinf->qi_tree_lock);
 
-       qinf->qi_dqreclaims = 0;
+       INIT_LIST_HEAD(&qinf->qi_lru_list);
+       qinf->qi_lru_count = 0;
+       mutex_init(&qinf->qi_lru_lock);
 
        /* mutex used to serialize quotaoffs */
        mutex_init(&qinf->qi_quotaofflock);
@@ -894,6 +707,9 @@ xfs_qm_init_quotainfo(
                qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
        }
 
+       qinf->qi_shrinker.shrink = xfs_qm_shake;
+       qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
+       register_shrinker(&qinf->qi_shrinker);
        return 0;
 }
 
@@ -911,17 +727,8 @@ xfs_qm_destroy_quotainfo(
 
        qi = mp->m_quotainfo;
        ASSERT(qi != NULL);
-       ASSERT(xfs_Gqm != NULL);
-
-       /*
-        * Release the reference that XQM kept, so that we know
-        * when the XQM structure should be freed. We cannot assume
-        * that xfs_Gqm is non-null after this point.
-        */
-       xfs_qm_rele_quotafs_ref(mp);
 
-       ASSERT(list_empty(&qi->qi_dqlist));
-       mutex_destroy(&qi->qi_dqlist_lock);
+       unregister_shrinker(&qi->qi_shrinker);
 
        if (qi->qi_uquotaip) {
                IRELE(qi->qi_uquotaip);
@@ -936,30 +743,6 @@ xfs_qm_destroy_quotainfo(
        mp->m_quotainfo = NULL;
 }
 
-
-
-/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_list_init(
-       xfs_dqlist_t    *list,
-       char            *str,
-       int             n)
-{
-       mutex_init(&list->qh_lock);
-       INIT_LIST_HEAD(&list->qh_list);
-       list->qh_version = 0;
-       list->qh_nelems = 0;
-}
-
-STATIC void
-xfs_qm_list_destroy(
-       xfs_dqlist_t    *list)
-{
-       mutex_destroy(&(list->qh_lock));
-}
-
 /*
  * Create an inode and return with a reference already taken, but unlocked
  * This is how we create quota inodes
@@ -1397,6 +1180,28 @@ error0:
        return error;
 }
 
+STATIC int
+xfs_qm_flush_one(
+       struct xfs_dquot        *dqp)
+{
+       int                     error = 0;
+
+       xfs_dqlock(dqp);
+       if (dqp->dq_flags & XFS_DQ_FREEING)
+               goto out_unlock;
+       if (!XFS_DQ_IS_DIRTY(dqp))
+               goto out_unlock;
+
+       if (!xfs_dqflock_nowait(dqp))
+               xfs_dqflock_pushbuf_wait(dqp);
+
+       error = xfs_qm_dqflush(dqp, 0);
+
+out_unlock:
+       xfs_dqunlock(dqp);
+       return error;
+}
+
 /*
  * Walk thru all the filesystem inodes and construct a consistent view
  * of the disk quota world. If the quotacheck fails, disable quotas.
@@ -1405,7 +1210,7 @@ int
 xfs_qm_quotacheck(
        xfs_mount_t     *mp)
 {
-       int             done, count, error;
+       int             done, count, error, error2;
        xfs_ino_t       lastino;
        size_t          structsz;
        xfs_inode_t     *uip, *gip;
@@ -1419,12 +1224,6 @@ xfs_qm_quotacheck(
        ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
-       /*
-        * There should be no cached dquots. The (simplistic) quotacheck
-        * algorithm doesn't like that.
-        */
-       ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
-
        xfs_notice(mp, "Quotacheck needed: Please wait.");
 
        /*
@@ -1463,12 +1262,21 @@ xfs_qm_quotacheck(
        } while (!done);
 
        /*
-        * We've made all the changes that we need to make incore.
-        * Flush them down to disk buffers if everything was updated
-        * successfully.
+        * We've made all the changes that we need to make incore.  Flush them
+        * down to disk buffers if everything was updated successfully.
         */
-       if (!error)
-               error = xfs_qm_dqflush_all(mp);
+       if (XFS_IS_UQUOTA_ON(mp))
+               error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one);
+       if (XFS_IS_GQUOTA_ON(mp)) {
+               error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one);
+               if (!error)
+                       error = error2;
+       }
+       if (XFS_IS_PQUOTA_ON(mp)) {
+               error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one);
+               if (!error)
+                       error = error2;
+       }
 
        /*
         * We can get this error if we couldn't do a dquot allocation inside
@@ -1496,7 +1304,7 @@ xfs_qm_quotacheck(
         * quotachecked status, since we won't be doing accounting for
         * that type anymore.
         */
-       mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
+       mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
        mp->m_qflags |= flags;
 
  error_return:
@@ -1508,7 +1316,6 @@ xfs_qm_quotacheck(
                 * We must turn off quotas.
                 */
                ASSERT(mp->m_quotainfo != NULL);
-               ASSERT(xfs_Gqm != NULL);
                xfs_qm_destroy_quotainfo(mp);
                if (xfs_mount_reset_sbqflags(mp)) {
                        xfs_warn(mp,
@@ -1604,16 +1411,12 @@ xfs_qm_dqfree_one(
        struct xfs_mount        *mp = dqp->q_mount;
        struct xfs_quotainfo    *qi = mp->m_quotainfo;
 
-       mutex_lock(&dqp->q_hash->qh_lock);
-       list_del_init(&dqp->q_hashlist);
-       dqp->q_hash->qh_version++;
-       mutex_unlock(&dqp->q_hash->qh_lock);
+       mutex_lock(&qi->qi_tree_lock);
+       radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
+                         be32_to_cpu(dqp->q_core.d_id));
 
-       mutex_lock(&qi->qi_dqlist_lock);
-       list_del_init(&dqp->q_mplist);
        qi->qi_dquots--;
-       qi->qi_dqreclaims++;
-       mutex_unlock(&qi->qi_dqlist_lock);
+       mutex_unlock(&qi->qi_tree_lock);
 
        xfs_qm_dqdestroy(dqp);
 }
@@ -1624,6 +1427,7 @@ xfs_qm_dqreclaim_one(
        struct list_head        *dispose_list)
 {
        struct xfs_mount        *mp = dqp->q_mount;
+       struct xfs_quotainfo    *qi = mp->m_quotainfo;
        int                     error;
 
        if (!xfs_dqlock_nowait(dqp))
@@ -1637,16 +1441,14 @@ xfs_qm_dqreclaim_one(
                xfs_dqunlock(dqp);
 
                trace_xfs_dqreclaim_want(dqp);
-               XQM_STATS_INC(xqmstats.xs_qm_dqwants);
+               XFS_STATS_INC(xs_qm_dqwants);
 
-               list_del_init(&dqp->q_freelist);
-               xfs_Gqm->qm_dqfrlist_cnt--;
+               list_del_init(&dqp->q_lru);
+               qi->qi_lru_count--;
+               XFS_STATS_DEC(xs_qm_dquot_unused);
                return;
        }
 
-       ASSERT(dqp->q_hash);
-       ASSERT(!list_empty(&dqp->q_mplist));
-
        /*
         * Try to grab the flush lock. If this dquot is in the process of
         * getting flushed to disk, we don't want to reclaim it.
@@ -1688,11 +1490,12 @@ xfs_qm_dqreclaim_one(
        xfs_dqunlock(dqp);
 
        ASSERT(dqp->q_nrefs == 0);
-       list_move_tail(&dqp->q_freelist, dispose_list);
-       xfs_Gqm->qm_dqfrlist_cnt--;
+       list_move_tail(&dqp->q_lru, dispose_list);
+       qi->qi_lru_count--;
+       XFS_STATS_DEC(xs_qm_dquot_unused);
 
        trace_xfs_dqreclaim_done(dqp);
-       XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
+       XFS_STATS_INC(xs_qm_dqreclaims);
        return;
 
 out_busy:
@@ -1701,10 +1504,10 @@ out_busy:
        /*
         * Move the dquot to the tail of the list so that we don't spin on it.
         */
-       list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
+       list_move_tail(&dqp->q_lru, &qi->qi_lru_list);
 
        trace_xfs_dqreclaim_busy(dqp);
-       XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
+       XFS_STATS_INC(xs_qm_dqreclaim_misses);
 }
 
 STATIC int
@@ -1712,6 +1515,8 @@ xfs_qm_shake(
        struct shrinker         *shrink,
        struct shrink_control   *sc)
 {
+       struct xfs_quotainfo    *qi =
+               container_of(shrink, struct xfs_quotainfo, qi_shrinker);
        int                     nr_to_scan = sc->nr_to_scan;
        LIST_HEAD               (dispose_list);
        struct xfs_dquot        *dqp;
@@ -1721,24 +1526,23 @@ xfs_qm_shake(
        if (!nr_to_scan)
                goto out;
 
-       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-       while (!list_empty(&xfs_Gqm->qm_dqfrlist)) {
+       mutex_lock(&qi->qi_lru_lock);
+       while (!list_empty(&qi->qi_lru_list)) {
                if (nr_to_scan-- <= 0)
                        break;
-               dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot,
-                                      q_freelist);
+               dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot,
+                                      q_lru);
                xfs_qm_dqreclaim_one(dqp, &dispose_list);
        }
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+       mutex_unlock(&qi->qi_lru_lock);
 
        while (!list_empty(&dispose_list)) {
-               dqp = list_first_entry(&dispose_list, struct xfs_dquot,
-                                      q_freelist);
-               list_del_init(&dqp->q_freelist);
+               dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru);
+               list_del_init(&dqp->q_lru);
                xfs_qm_dqfree_one(dqp);
        }
 out:
-       return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;
+       return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure;
 }
 
 /*
index 9a9b997e1a0a294bd6f180ccaac011f2ec49a517..44b858b79d716709a155976f01183fa056f25fcf 100644 (file)
 #include "xfs_dquot_item.h"
 #include "xfs_dquot.h"
 #include "xfs_quota_priv.h"
-#include "xfs_qm_stats.h"
 
-struct xfs_qm;
 struct xfs_inode;
 
-extern struct mutex    xfs_Gqm_lock;
-extern struct xfs_qm   *xfs_Gqm;
-extern kmem_zone_t     *qm_dqzone;
-extern kmem_zone_t     *qm_dqtrxzone;
-
-/*
- * Dquot hashtable constants/threshold values.
- */
-#define XFS_QM_HASHSIZE_LOW            (PAGE_SIZE / sizeof(xfs_dqhash_t))
-#define XFS_QM_HASHSIZE_HIGH           ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
+extern struct kmem_zone        *xfs_qm_dqtrxzone;
 
 /*
  * This defines the unit of allocation of dquots.
@@ -48,36 +37,20 @@ extern kmem_zone_t  *qm_dqtrxzone;
  */
 #define XFS_DQUOT_CLUSTER_SIZE_FSB     (xfs_filblks_t)1
 
-typedef xfs_dqhash_t   xfs_dqlist_t;
-
-/*
- * Quota Manager (global) structure. Lives only in core.
- */
-typedef struct xfs_qm {
-       xfs_dqlist_t    *qm_usr_dqhtable;/* udquot hash table */
-       xfs_dqlist_t    *qm_grp_dqhtable;/* gdquot hash table */
-       uint             qm_dqhashmask;  /* # buckets in dq hashtab - 1 */
-       struct list_head qm_dqfrlist;    /* freelist of dquots */
-       struct mutex     qm_dqfrlist_lock;
-       int              qm_dqfrlist_cnt;
-       atomic_t         qm_totaldquots; /* total incore dquots */
-       uint             qm_nrefs;       /* file systems with quota on */
-       kmem_zone_t     *qm_dqzone;      /* dquot mem-alloc zone */
-       kmem_zone_t     *qm_dqtrxzone;   /* t_dqinfo of transactions */
-} xfs_qm_t;
-
 /*
  * Various quota information for individual filesystems.
  * The mount structure keeps a pointer to this.
  */
 typedef struct xfs_quotainfo {
+       struct radix_tree_root qi_uquota_tree;
+       struct radix_tree_root qi_gquota_tree;
+       struct mutex qi_tree_lock;
        xfs_inode_t     *qi_uquotaip;    /* user quota inode */
        xfs_inode_t     *qi_gquotaip;    /* group quota inode */
-       struct list_head qi_dqlist;      /* all dquots in filesys */
-       struct mutex     qi_dqlist_lock;
+       struct list_head qi_lru_list;
+       struct mutex     qi_lru_lock;
+       int              qi_lru_count;
        int              qi_dquots;
-       int              qi_dqreclaims;  /* a change here indicates
-                                           a removal in the dqlist */
        time_t           qi_btimelimit;  /* limit for blks timer */
        time_t           qi_itimelimit;  /* limit for inodes timer */
        time_t           qi_rtbtimelimit;/* limit for rt blks timer */
@@ -93,8 +66,14 @@ typedef struct xfs_quotainfo {
        xfs_qcnt_t       qi_isoftlimit;  /* default inode count soft limit */
        xfs_qcnt_t       qi_rtbhardlimit;/* default realtime blk hard limit */
        xfs_qcnt_t       qi_rtbsoftlimit;/* default realtime blk soft limit */
+       struct shrinker  qi_shrinker;
 } xfs_quotainfo_t;
 
+#define XFS_DQUOT_TREE(qi, type) \
+       ((type & XFS_DQ_USER) ? \
+        &((qi)->qi_uquota_tree) : \
+        &((qi)->qi_gquota_tree))
+
 
 extern void    xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
 extern int     xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
@@ -130,7 +109,7 @@ extern int          xfs_qm_quotacheck(xfs_mount_t *);
 extern int             xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
 
 /* dquot stuff */
-extern int             xfs_qm_dqpurge_all(xfs_mount_t *, uint);
+extern void            xfs_qm_dqpurge_all(xfs_mount_t *, uint);
 extern void            xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
 
 /* quota ops */
index a0a829addca9d3c79c649201cf3f9d76432847c1..e6986b5d80d8fca7d9add0e7277c6f3a61859cdd 100644 (file)
 STATIC void
 xfs_fill_statvfs_from_dquot(
        struct kstatfs          *statp,
-       xfs_disk_dquot_t        *dp)
+       struct xfs_dquot        *dqp)
 {
        __uint64_t              limit;
 
-       limit = dp->d_blk_softlimit ?
-               be64_to_cpu(dp->d_blk_softlimit) :
-               be64_to_cpu(dp->d_blk_hardlimit);
+       limit = dqp->q_core.d_blk_softlimit ?
+               be64_to_cpu(dqp->q_core.d_blk_softlimit) :
+               be64_to_cpu(dqp->q_core.d_blk_hardlimit);
        if (limit && statp->f_blocks > limit) {
                statp->f_blocks = limit;
                statp->f_bfree = statp->f_bavail =
-                       (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
-                        (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
+                       (statp->f_blocks > dqp->q_res_bcount) ?
+                        (statp->f_blocks - dqp->q_res_bcount) : 0;
        }
 
-       limit = dp->d_ino_softlimit ?
-               be64_to_cpu(dp->d_ino_softlimit) :
-               be64_to_cpu(dp->d_ino_hardlimit);
+       limit = dqp->q_core.d_ino_softlimit ?
+               be64_to_cpu(dqp->q_core.d_ino_softlimit) :
+               be64_to_cpu(dqp->q_core.d_ino_hardlimit);
        if (limit && statp->f_files > limit) {
                statp->f_files = limit;
                statp->f_ffree =
-                       (statp->f_files > be64_to_cpu(dp->d_icount)) ?
-                        (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0;
+                       (statp->f_files > dqp->q_res_icount) ?
+                        (statp->f_ffree - dqp->q_res_icount) : 0;
        }
 }
 
@@ -82,7 +82,7 @@ xfs_qm_statvfs(
        xfs_dquot_t             *dqp;
 
        if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
-               xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
+               xfs_fill_statvfs_from_dquot(statp, dqp);
                xfs_qm_dqput(dqp);
        }
 }
@@ -156,21 +156,3 @@ xfs_qm_newmount(
 
        return 0;
 }
-
-void __init
-xfs_qm_init(void)
-{
-       printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
-       mutex_init(&xfs_Gqm_lock);
-       xfs_qm_init_procfs();
-}
-
-void __exit
-xfs_qm_exit(void)
-{
-       xfs_qm_cleanup_procfs();
-       if (qm_dqzone)
-               kmem_zone_destroy(qm_dqzone);
-       if (qm_dqtrxzone)
-               kmem_zone_destroy(qm_dqtrxzone);
-}
diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c
deleted file mode 100644 (file)
index 5729ba5..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-struct xqmstats xqmstats;
-
-static int xqm_proc_show(struct seq_file *m, void *v)
-{
-       /* maximum; incore; ratio free to inuse; freelist */
-       seq_printf(m, "%d\t%d\t%d\t%u\n",
-                       0,
-                       xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
-                       0,
-                       xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
-       return 0;
-}
-
-static int xqm_proc_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, xqm_proc_show, NULL);
-}
-
-static const struct file_operations xqm_proc_fops = {
-       .owner          = THIS_MODULE,
-       .open           = xqm_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-static int xqmstat_proc_show(struct seq_file *m, void *v)
-{
-       /* quota performance statistics */
-       seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
-                       xqmstats.xs_qm_dqreclaims,
-                       xqmstats.xs_qm_dqreclaim_misses,
-                       xqmstats.xs_qm_dquot_dups,
-                       xqmstats.xs_qm_dqcachemisses,
-                       xqmstats.xs_qm_dqcachehits,
-                       xqmstats.xs_qm_dqwants,
-                       xqmstats.xs_qm_dqshake_reclaims,
-                       xqmstats.xs_qm_dqinact_reclaims);
-       return 0;
-}
-
-static int xqmstat_proc_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, xqmstat_proc_show, NULL);
-}
-
-static const struct file_operations xqmstat_proc_fops = {
-       .owner          = THIS_MODULE,
-       .open           = xqmstat_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-void
-xfs_qm_init_procfs(void)
-{
-       proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
-       proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
-}
-
-void
-xfs_qm_cleanup_procfs(void)
-{
-       remove_proc_entry("fs/xfs/xqm", NULL);
-       remove_proc_entry("fs/xfs/xqmstat", NULL);
-}
diff --git a/fs/xfs/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h
deleted file mode 100644 (file)
index 5b964fc..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2002 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QM_STATS_H__
-#define __XFS_QM_STATS_H__
-
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
-/*
- * XQM global statistics
- */
-struct xqmstats {
-       __uint32_t              xs_qm_dqreclaims;
-       __uint32_t              xs_qm_dqreclaim_misses;
-       __uint32_t              xs_qm_dquot_dups;
-       __uint32_t              xs_qm_dqcachemisses;
-       __uint32_t              xs_qm_dqcachehits;
-       __uint32_t              xs_qm_dqwants;
-       __uint32_t              xs_qm_dqshake_reclaims;
-       __uint32_t              xs_qm_dqinact_reclaims;
-};
-
-extern struct xqmstats xqmstats;
-
-# define XQM_STATS_INC(count)  ( (count)++ )
-
-extern void xfs_qm_init_procfs(void);
-extern void xfs_qm_cleanup_procfs(void);
-
-#else
-
-# define XQM_STATS_INC(count)  do { } while (0)
-
-static inline void xfs_qm_init_procfs(void) { };
-static inline void xfs_qm_cleanup_procfs(void) { };
-
-#endif
-
-#endif /* __XFS_QM_STATS_H__ */
index 711a86e39ff046d302a5ff14695882b65bcb3e25..c4f396e437a87656599d21a25186f426556fbcae 100644 (file)
@@ -47,9 +47,6 @@ STATIC int    xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
                                        uint);
 STATIC uint    xfs_qm_export_flags(uint);
 STATIC uint    xfs_qm_export_qtype_flags(uint);
-STATIC void    xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
-                                       fs_disk_quota_t *);
-
 
 /*
  * Turn off quota accounting and/or enforcement for all udquots and/or
@@ -69,7 +66,6 @@ xfs_qm_scall_quotaoff(
        int                     error;
        uint                    inactivate_flags;
        xfs_qoff_logitem_t      *qoffstart;
-       int                     nculprits;
 
        /*
         * No file system can have quotas enabled on disk but not in core.
@@ -175,18 +171,13 @@ xfs_qm_scall_quotaoff(
         * This isn't protected by a particular lock directly, because we
         * don't want to take a mrlock every time we depend on quotas being on.
         */
-       mp->m_qflags &= ~(flags);
+       mp->m_qflags &= ~flags;
 
        /*
         * Go through all the dquots of this file system and purge them,
-        * according to what was turned off. We may not be able to get rid
-        * of all dquots, because dquots can have temporary references that
-        * are not attached to inodes. eg. xfs_setattr, xfs_create.
-        * So, if we couldn't purge all the dquots from the filesystem,
-        * we can't get rid of the incore data structures.
+        * according to what was turned off.
         */
-       while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
-               delay(10 * nculprits);
+       xfs_qm_dqpurge_all(mp, dqtype);
 
        /*
         * Transactions that had started before ACTIVE state bit was cleared
@@ -635,42 +626,6 @@ xfs_qm_scall_setqlim(
        return error;
 }
 
-int
-xfs_qm_scall_getquota(
-       xfs_mount_t     *mp,
-       xfs_dqid_t      id,
-       uint            type,
-       fs_disk_quota_t *out)
-{
-       xfs_dquot_t     *dqp;
-       int             error;
-
-       /*
-        * Try to get the dquot. We don't want it allocated on disk, so
-        * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
-        * exist, we'll get ENOENT back.
-        */
-       if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
-               return (error);
-       }
-
-       /*
-        * If everything's NULL, this dquot doesn't quite exist as far as
-        * our utility programs are concerned.
-        */
-       if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
-               xfs_qm_dqput(dqp);
-               return XFS_ERROR(ENOENT);
-       }
-       /*
-        * Convert the disk dquot to the exportable format
-        */
-       xfs_qm_export_dquot(mp, &dqp->q_core, out);
-       xfs_qm_dqput(dqp);
-       return (error ? XFS_ERROR(EFAULT) : 0);
-}
-
-
 STATIC int
 xfs_qm_log_quotaoff_end(
        xfs_mount_t             *mp,
@@ -759,50 +714,66 @@ error0:
 }
 
 
-/*
- * Translate an internal style on-disk-dquot to the exportable format.
- * The main differences are that the counters/limits are all in Basic
- * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
- * to be converted to the native endianness.
- */
-STATIC void
-xfs_qm_export_dquot(
-       xfs_mount_t             *mp,
-       xfs_disk_dquot_t        *src,
+int
+xfs_qm_scall_getquota(
+       struct xfs_mount        *mp,
+       xfs_dqid_t              id,
+       uint                    type,
        struct fs_disk_quota    *dst)
 {
+       struct xfs_dquot        *dqp;
+       int                     error;
+
+       /*
+        * Try to get the dquot. We don't want it allocated on disk, so
+        * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
+        * exist, we'll get ENOENT back.
+        */
+       error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp);
+       if (error)
+               return error;
+
+       /*
+        * If everything's NULL, this dquot doesn't quite exist as far as
+        * our utility programs are concerned.
+        */
+       if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+               error = XFS_ERROR(ENOENT);
+               goto out_put;
+       }
+
        memset(dst, 0, sizeof(*dst));
-       dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
-       dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags);
-       dst->d_id = be32_to_cpu(src->d_id);
+       dst->d_version = FS_DQUOT_VERSION;
+       dst->d_flags = xfs_qm_export_qtype_flags(dqp->q_core.d_flags);
+       dst->d_id = be32_to_cpu(dqp->q_core.d_id);
        dst->d_blk_hardlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit));
+               XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
        dst->d_blk_softlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit));
-       dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit);
-       dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit);
-       dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount));
-       dst->d_icount = be64_to_cpu(src->d_icount);
-       dst->d_btimer = be32_to_cpu(src->d_btimer);
-       dst->d_itimer = be32_to_cpu(src->d_itimer);
-       dst->d_iwarns = be16_to_cpu(src->d_iwarns);
-       dst->d_bwarns = be16_to_cpu(src->d_bwarns);
+               XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_softlimit));
+       dst->d_ino_hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
+       dst->d_ino_softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
+       dst->d_bcount = XFS_FSB_TO_BB(mp, dqp->q_res_bcount);
+       dst->d_icount = dqp->q_res_icount;
+       dst->d_btimer = be32_to_cpu(dqp->q_core.d_btimer);
+       dst->d_itimer = be32_to_cpu(dqp->q_core.d_itimer);
+       dst->d_iwarns = be16_to_cpu(dqp->q_core.d_iwarns);
+       dst->d_bwarns = be16_to_cpu(dqp->q_core.d_bwarns);
        dst->d_rtb_hardlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit));
+               XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_hardlimit));
        dst->d_rtb_softlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit));
-       dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount));
-       dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer);
-       dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns);
+               XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_softlimit));
+       dst->d_rtbcount = XFS_FSB_TO_BB(mp, dqp->q_res_rtbcount);
+       dst->d_rtbtimer = be32_to_cpu(dqp->q_core.d_rtbtimer);
+       dst->d_rtbwarns = be16_to_cpu(dqp->q_core.d_rtbwarns);
 
        /*
         * Internally, we don't reset all the timers when quota enforcement
         * gets turned off. No need to confuse the user level code,
         * so return zeroes in that case.
         */
-       if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) ||
+       if ((!XFS_IS_UQUOTA_ENFORCED(mp) && dqp->q_core.d_flags == XFS_DQ_USER) ||
            (!XFS_IS_OQUOTA_ENFORCED(mp) &&
-                       (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
+                       (dqp->q_core.d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
                dst->d_btimer = 0;
                dst->d_itimer = 0;
                dst->d_rtbtimer = 0;
@@ -823,6 +794,9 @@ xfs_qm_export_dquot(
                }
        }
 #endif
+out_put:
+       xfs_qm_dqput(dqp);
+       return error;
 }
 
 STATIC uint
index 8a0807e0f979eff06d930c39cdd11b3e03b2fcc4..b50ec5b95d5a89fb4b0972c1761682c27fe8cdd9 100644 (file)
@@ -174,6 +174,8 @@ typedef struct xfs_qoff_logformat {
 #define XFS_UQUOTA_ACTIVE      0x0100  /* uquotas are being turned off */
 #define XFS_PQUOTA_ACTIVE      0x0200  /* pquotas are being turned off */
 #define XFS_GQUOTA_ACTIVE      0x0400  /* gquotas are being turned off */
+#define XFS_ALL_QUOTA_ACTIVE   \
+       (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE)
 
 /*
  * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
index 94a3d927d716c6ac6075323340affe7110f7d81a..6d86219d93da2c55b98eb26c190b2ec478e31bcd 100644 (file)
  */
 #define XFS_DQITER_MAP_SIZE    10
 
-/*
- * Hash into a bucket in the dquot hash table, based on <mp, id>.
- */
-#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
-                                (__psunsigned_t)(id)) & \
-                               (xfs_Gqm->qm_dqhashmask - 1))
-#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
-                                    (xfs_Gqm->qm_usr_dqhtable + \
-                                     XFS_DQ_HASHVAL(mp, id)) : \
-                                    (xfs_Gqm->qm_grp_dqhtable + \
-                                     XFS_DQ_HASHVAL(mp, id)))
 #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
        !dqp->q_core.d_blk_hardlimit && \
        !dqp->q_core.d_blk_softlimit && \
index cb6ae715814a8f87026b26f701abe419aa3ed6c7..f429d9d5d325d8f1e48f13efda8b1b9a2d4b6f7e 100644 (file)
@@ -529,7 +529,6 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
 #define        XFS_BB_TO_FSB(mp,bb)    \
        (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log)
 #define        XFS_BB_TO_FSBT(mp,bb)   ((bb) >> (mp)->m_blkbb_log)
-#define        XFS_BB_FSB_OFFSET(mp,bb) ((bb) & ((mp)->m_bsize - 1))
 
 /*
  * File system block to byte conversions.
index 76fdc5861932f8ed5412b4345dbb59a1054f8f9c..ce372b7d5644600ec6e52a23f8ec00584e1f2e9c 100644 (file)
 
 DEFINE_PER_CPU(struct xfsstats, xfsstats);
 
+static int counter_val(int idx)
+{
+       int val = 0, cpu;
+
+       for_each_possible_cpu(cpu)
+               val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx));
+       return val;
+}
+
 static int xfs_stat_proc_show(struct seq_file *m, void *v)
 {
-       int             c, i, j, val;
+       int             i, j;
        __uint64_t      xs_xstrat_bytes = 0;
        __uint64_t      xs_write_bytes = 0;
        __uint64_t      xs_read_bytes = 0;
@@ -50,20 +59,16 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
                { "abtc2",              XFSSTAT_END_ABTC_V2             },
                { "bmbt2",              XFSSTAT_END_BMBT_V2             },
                { "ibt2",               XFSSTAT_END_IBT_V2              },
+               /* we print both series of quota information together */
+               { "qm",                 XFSSTAT_END_QM                  },
        };
 
        /* Loop over all stats groups */
-       for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
+       for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
                seq_printf(m, "%s", xstats[i].desc);
                /* inner loop does each group */
-               while (j < xstats[i].endpoint) {
-                       val = 0;
-                       /* sum over all cpus */
-                       for_each_possible_cpu(c)
-                               val += *(((__u32*)&per_cpu(xfsstats, c) + j));
-                       seq_printf(m, " %u", val);
-                       j++;
-               }
+               for (; j < xstats[i].endpoint; j++)
+                       seq_printf(m, " %u", counter_val(j));
                seq_putc(m, '\n');
        }
        /* extra precision counters */
@@ -97,6 +102,58 @@ static const struct file_operations xfs_stat_proc_fops = {
        .release        = single_release,
 };
 
+/* legacy quota interfaces */
+#ifdef CONFIG_XFS_QUOTA
+static int xqm_proc_show(struct seq_file *m, void *v)
+{
+       /* maximum; incore; ratio free to inuse; freelist */
+       seq_printf(m, "%d\t%d\t%d\t%u\n",
+                       0,
+                       counter_val(XFSSTAT_END_XQMSTAT),
+                       0,
+                       counter_val(XFSSTAT_END_XQMSTAT + 1));
+       return 0;
+}
+
+static int xqm_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, xqm_proc_show, NULL);
+}
+
+static const struct file_operations xqm_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = xqm_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+/* legacy quota stats interface no 2 */
+static int xqmstat_proc_show(struct seq_file *m, void *v)
+{
+       int j;
+
+       seq_printf(m, "qm");
+       for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++)
+               seq_printf(m, " %u", counter_val(j));
+       seq_putc(m, '\n');
+       return 0;
+}
+
+static int xqmstat_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, xqmstat_proc_show, NULL);
+}
+
+static const struct file_operations xqmstat_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = xqmstat_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+#endif /* CONFIG_XFS_QUOTA */
+
 int
 xfs_init_procfs(void)
 {
@@ -105,10 +162,24 @@ xfs_init_procfs(void)
 
        if (!proc_create("fs/xfs/stat", 0, NULL,
                         &xfs_stat_proc_fops))
-               goto out_remove_entry;
+               goto out_remove_xfs_dir;
+#ifdef CONFIG_XFS_QUOTA
+       if (!proc_create("fs/xfs/xqmstat", 0, NULL,
+                        &xqmstat_proc_fops))
+               goto out_remove_stat_file;
+       if (!proc_create("fs/xfs/xqm", 0, NULL,
+                        &xqm_proc_fops))
+               goto out_remove_xqmstat_file;
+#endif
        return 0;
 
- out_remove_entry:
+#ifdef CONFIG_XFS_QUOTA
+ out_remove_xqmstat_file:
+       remove_proc_entry("fs/xfs/xqmstat", NULL);
+ out_remove_stat_file:
+       remove_proc_entry("fs/xfs/stat", NULL);
+#endif
+ out_remove_xfs_dir:
        remove_proc_entry("fs/xfs", NULL);
  out:
        return -ENOMEM;
@@ -117,6 +188,10 @@ xfs_init_procfs(void)
 void
 xfs_cleanup_procfs(void)
 {
+#ifdef CONFIG_XFS_QUOTA
+       remove_proc_entry("fs/xfs/xqm", NULL);
+       remove_proc_entry("fs/xfs/xqmstat", NULL);
+#endif
        remove_proc_entry("fs/xfs/stat", NULL);
        remove_proc_entry("fs/xfs", NULL);
 }
index 736854b1ca1a0c6d1d92488f6cc79891241b8a6d..c03ad38ceaebaeb8b82a2e0fae785cc592e9adaf 100644 (file)
@@ -183,6 +183,16 @@ struct xfsstats {
        __uint32_t              xs_ibt_2_alloc;
        __uint32_t              xs_ibt_2_free;
        __uint32_t              xs_ibt_2_moves;
+#define XFSSTAT_END_XQMSTAT            (XFSSTAT_END_IBT_V2+6)
+       __uint32_t              xs_qm_dqreclaims;
+       __uint32_t              xs_qm_dqreclaim_misses;
+       __uint32_t              xs_qm_dquot_dups;
+       __uint32_t              xs_qm_dqcachemisses;
+       __uint32_t              xs_qm_dqcachehits;
+       __uint32_t              xs_qm_dqwants;
+#define XFSSTAT_END_QM                 (XFSSTAT_END_XQMSTAT+2)
+       __uint32_t              xs_qm_dquot;
+       __uint32_t              xs_qm_dquot_unused;
 /* Extra precision counters */
        __uint64_t              xs_xstrat_bytes;
        __uint64_t              xs_write_bytes;
index baf40e378d35372e7f8c7f99f62b9a02767b3153..912442cf0f82c3a285fa7d49cab22baa03553f53 100644 (file)
@@ -324,10 +324,9 @@ xfs_parseargs(
                } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
                        mp->m_flags |= XFS_MOUNT_FILESTREAMS;
                } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
-                       mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
-                                         XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
-                                         XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
-                                         XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
+                       mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
+                       mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
+                       mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
                } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
                           !strcmp(this_char, MNTOPT_UQUOTA) ||
                           !strcmp(this_char, MNTOPT_USRQUOTA)) {
@@ -760,6 +759,36 @@ xfs_setup_devices(
        return 0;
 }
 
+STATIC int
+xfs_init_mount_workqueues(
+       struct xfs_mount        *mp)
+{
+       mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
+                       WQ_MEM_RECLAIM, 0, mp->m_fsname);
+       if (!mp->m_data_workqueue)
+               goto out;
+
+       mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
+                       WQ_MEM_RECLAIM, 0, mp->m_fsname);
+       if (!mp->m_unwritten_workqueue)
+               goto out_destroy_data_iodone_queue;
+
+       return 0;
+
+out_destroy_data_iodone_queue:
+       destroy_workqueue(mp->m_data_workqueue);
+out:
+       return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_mount_workqueues(
+       struct xfs_mount        *mp)
+{
+       destroy_workqueue(mp->m_data_workqueue);
+       destroy_workqueue(mp->m_unwritten_workqueue);
+}
+
 /* Catch misguided souls that try to use this interface on XFS */
 STATIC struct inode *
 xfs_fs_alloc_inode(
@@ -834,91 +863,58 @@ xfs_fs_inode_init_once(
 }
 
 /*
- * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode.
+ * This is called by the VFS when dirtying inode metadata.  This can happen
+ * for a few reasons, but we only care about timestamp updates, given that
+ * we handled the rest ourselves.  In theory no other calls should happen,
+ * but for example generic_write_end() keeps dirtying the inode after
+ * updating i_size.  Thus we check that the flags are exactly I_DIRTY_SYNC,
+ * and skip this call otherwise.
  *
- * We need the barrier() to maintain correct ordering between unlogged
- * updates and the transaction commit code that clears the i_update_core
- * field. This requires all updates to be completed before marking the
- * inode dirty.
+ * We'll hopefull get a different method just for updating timestamps soon,
+ * at which point this hack can go away, and maybe we'll also get real
+ * error handling here.
  */
 STATIC void
 xfs_fs_dirty_inode(
-       struct inode    *inode,
-       int             flags)
-{
-       barrier();
-       XFS_I(inode)->i_update_core = 1;
-}
-
-STATIC int
-xfs_fs_write_inode(
        struct inode            *inode,
-       struct writeback_control *wbc)
+       int                     flags)
 {
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
-       int                     error = EAGAIN;
-
-       trace_xfs_write_inode(ip);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
-               /*
-                * Make sure the inode has made it it into the log.  Instead
-                * of forcing it all the way to stable storage using a
-                * synchronous transaction we let the log force inside the
-                * ->sync_fs call do that for thus, which reduces the number
-                * of synchronous log forces dramatically.
-                */
-               error = xfs_log_dirty_inode(ip, NULL, 0);
-               if (error)
-                       goto out;
-               return 0;
-       } else {
-               if (!ip->i_update_core)
-                       return 0;
+       struct xfs_trans        *tp;
+       int                     error;
 
-               /*
-                * We make this non-blocking if the inode is contended, return
-                * EAGAIN to indicate to the caller that they did not succeed.
-                * This prevents the flush path from blocking on inodes inside
-                * another operation right now, they get caught later by
-                * xfs_sync.
-                */
-               if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
-                       goto out;
+       if (flags != I_DIRTY_SYNC)
+               return;
 
-               if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
-                       goto out_unlock;
+       trace_xfs_dirty_inode(ip);
 
-               /*
-                * Now we have the flush lock and the inode is not pinned, we
-                * can check if the inode is really clean as we know that
-                * there are no pending transaction completions, it is not
-                * waiting on the delayed write queue and there is no IO in
-                * progress.
-                */
-               if (xfs_inode_clean(ip)) {
-                       xfs_ifunlock(ip);
-                       error = 0;
-                       goto out_unlock;
-               }
-               error = xfs_iflush(ip, SYNC_TRYLOCK);
+       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+       error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               goto trouble;
        }
-
- out_unlock:
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
- out:
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
        /*
-        * if we failed to write out the inode then mark
-        * it dirty again so we'll try again later.
+        * Grab all the latest timestamps from the Linux inode.
         */
+       ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
+       ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
+       ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
+       ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
+       ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
+       ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
+
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
+       error = xfs_trans_commit(tp, 0);
        if (error)
-               xfs_mark_inode_dirty_sync(ip);
-       return -error;
+               goto trouble;
+       return;
+
+trouble:
+       xfs_warn(mp, "failed to update timestamps for inode 0x%llx", ip->i_ino);
 }
 
 STATIC void
@@ -983,6 +979,7 @@ xfs_fs_put_super(
        xfs_unmountfs(mp);
        xfs_freesb(mp);
        xfs_icsb_destroy_counters(mp);
+       xfs_destroy_mount_workqueues(mp);
        xfs_close_devices(mp);
        xfs_free_fsname(mp);
        kfree(mp);
@@ -1309,10 +1306,14 @@ xfs_fs_fill_super(
        if (error)
                goto out_free_fsname;
 
-       error = xfs_icsb_init_counters(mp);
+       error = xfs_init_mount_workqueues(mp);
        if (error)
                goto out_close_devices;
 
+       error = xfs_icsb_init_counters(mp);
+       if (error)
+               goto out_destroy_workqueues;
+
        error = xfs_readsb(mp, flags);
        if (error)
                goto out_destroy_counters;
@@ -1376,6 +1377,8 @@ xfs_fs_fill_super(
        xfs_freesb(mp);
  out_destroy_counters:
        xfs_icsb_destroy_counters(mp);
+out_destroy_workqueues:
+       xfs_destroy_mount_workqueues(mp);
  out_close_devices:
        xfs_close_devices(mp);
  out_free_fsname:
@@ -1429,7 +1432,6 @@ static const struct super_operations xfs_super_operations = {
        .alloc_inode            = xfs_fs_alloc_inode,
        .destroy_inode          = xfs_fs_destroy_inode,
        .dirty_inode            = xfs_fs_dirty_inode,
-       .write_inode            = xfs_fs_write_inode,
        .evict_inode            = xfs_fs_evict_inode,
        .put_super              = xfs_fs_put_super,
        .sync_fs                = xfs_fs_sync_fs,
@@ -1651,13 +1653,17 @@ init_xfs_fs(void)
        if (error)
                goto out_cleanup_procfs;
 
-       vfs_initquota();
+       error = xfs_qm_init();
+       if (error)
+               goto out_sysctl_unregister;
 
        error = register_filesystem(&xfs_fs_type);
        if (error)
-               goto out_sysctl_unregister;
+               goto out_qm_exit;
        return 0;
 
+ out_qm_exit:
+       xfs_qm_exit();
  out_sysctl_unregister:
        xfs_sysctl_unregister();
  out_cleanup_procfs:
@@ -1679,7 +1685,7 @@ init_xfs_fs(void)
 STATIC void __exit
 exit_xfs_fs(void)
 {
-       vfs_exitquota();
+       xfs_qm_exit();
        unregister_filesystem(&xfs_fs_type);
        xfs_sysctl_unregister();
        xfs_cleanup_procfs();
index 50a3266c999e591dee1728aa02173c862bd73838..09b0c26b2245ebd245c2d8fcf4849105d1e5d400 100644 (file)
 #include <linux/exportfs.h>
 
 #ifdef CONFIG_XFS_QUOTA
-extern void xfs_qm_init(void);
+extern int xfs_qm_init(void);
 extern void xfs_qm_exit(void);
-# define vfs_initquota()       xfs_qm_init()
-# define vfs_exitquota()       xfs_qm_exit()
 #else
-# define vfs_initquota()       do { } while (0)
-# define vfs_exitquota()       do { } while (0)
+# define xfs_qm_init() (0)
+# define xfs_qm_exit() do { } while (0)
 #endif
 
 #ifdef CONFIG_XFS_POSIX_ACL
index 40b75eecd2b4b376253e0e9408e42bc475e63f9b..205ebcb34d9e499f732423bd00c1204bb89f6ef8 100644 (file)
@@ -336,32 +336,6 @@ xfs_sync_fsdata(
        return error;
 }
 
-int
-xfs_log_dirty_inode(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     flags)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_trans        *tp;
-       int                     error;
-
-       if (!ip->i_update_core)
-               return 0;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-       error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               return error;
-       }
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       return xfs_trans_commit(tp, 0);
-}
-
 /*
  * When remounting a filesystem read-only or freezing the filesystem, we have
  * two phases to execute. This first phase is syncing the data before we
@@ -385,16 +359,6 @@ xfs_quiesce_data(
 {
        int                     error, error2 = 0;
 
-       /*
-        * Log all pending size and timestamp updates.  The vfs writeback
-        * code is supposed to do this, but due to its overagressive
-        * livelock detection it will skip inodes where appending writes
-        * were written out in the first non-blocking sync phase if their
-        * completion took long enough that it happened after taking the
-        * timestamp for the cut-off in the blocking phase.
-        */
-       xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
-
        /* force out the log */
        xfs_log_force(mp, XFS_LOG_SYNC);
 
@@ -913,17 +877,15 @@ reclaim:
         * can reference the inodes in the cache without taking references.
         *
         * We make that OK here by ensuring that we wait until the inode is
-        * unlocked after the lookup before we go ahead and free it.  We get
-        * both the ilock and the iolock because the code may need to drop the
-        * ilock one but will still hold the iolock.
+        * unlocked after the lookup before we go ahead and free it.
         */
-       xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
        xfs_qm_dqdetach(ip);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
        xfs_inode_free(ip);
-       return error;
 
+       return error;
 }
 
 /*
index fa965479d788d29da66b0e85bd59123c1fe08c65..941202e7ac6e594e2c423c19bc89248397e39516 100644 (file)
@@ -34,8 +34,6 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
 
 void xfs_flush_inodes(struct xfs_inode *ip);
 
-int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
-
 int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
 int xfs_reclaim_inodes_count(struct xfs_mount *mp);
 void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
index bb134a819930c72448e37cc62de3fb98ec971835..75eb54af4d581e7f4cc9270f0abb1c72238195ac 100644 (file)
@@ -580,7 +580,7 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr);
 DEFINE_INODE_EVENT(xfs_dir_fsync);
 DEFINE_INODE_EVENT(xfs_file_fsync);
 DEFINE_INODE_EVENT(xfs_destroy_inode);
-DEFINE_INODE_EVENT(xfs_write_inode);
+DEFINE_INODE_EVENT(xfs_dirty_inode);
 DEFINE_INODE_EVENT(xfs_evict_inode);
 
 DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
@@ -741,10 +741,10 @@ DEFINE_DQUOT_EVENT(xfs_dqalloc);
 DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
 DEFINE_DQUOT_EVENT(xfs_dqread);
 DEFINE_DQUOT_EVENT(xfs_dqread_fail);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
 DEFINE_DQUOT_EVENT(xfs_dqget_hit);
 DEFINE_DQUOT_EVENT(xfs_dqget_miss);
+DEFINE_DQUOT_EVENT(xfs_dqget_freeing);
+DEFINE_DQUOT_EVENT(xfs_dqget_dup);
 DEFINE_DQUOT_EVENT(xfs_dqput);
 DEFINE_DQUOT_EVENT(xfs_dqput_wait);
 DEFINE_DQUOT_EVENT(xfs_dqput_free);
@@ -782,12 +782,12 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
                __entry->curr_res = tic->t_curr_res;
                __entry->unit_res = tic->t_unit_res;
                __entry->flags = tic->t_flags;
-               __entry->reserveq = list_empty(&log->l_reserveq);
-               __entry->writeq = list_empty(&log->l_writeq);
-               xlog_crack_grant_head(&log->l_grant_reserve_head,
+               __entry->reserveq = list_empty(&log->l_reserve_head.waiters);
+               __entry->writeq = list_empty(&log->l_write_head.waiters);
+               xlog_crack_grant_head(&log->l_reserve_head.grant,
                                &__entry->grant_reserve_cycle,
                                &__entry->grant_reserve_bytes);
-               xlog_crack_grant_head(&log->l_grant_write_head,
+               xlog_crack_grant_head(&log->l_write_head.grant,
                                &__entry->grant_write_cycle,
                                &__entry->grant_write_bytes);
                __entry->curr_cycle = log->l_curr_cycle;
@@ -826,20 +826,14 @@ DEFINE_EVENT(xfs_loggrant_class, name, \
        TP_ARGS(log, tic))
 DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
 DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
-DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
 DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
 DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep);
 DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake);
 DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
+DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
+DEFINE_LOGGRANT_EVENT(xfs_log_reserve_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_exit);
 DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
 DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
 DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
index 7adcdf15ae0ce563b32eb37582fbdd79a8fdba15..103b00c90004940e40c8b62fe83af2c9f3e93443 100644 (file)
@@ -681,7 +681,6 @@ xfs_trans_reserve(
        uint            flags,
        uint            logcount)
 {
-       int             log_flags;
        int             error = 0;
        int             rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
 
@@ -707,24 +706,32 @@ xfs_trans_reserve(
         * Reserve the log space needed for this transaction.
         */
        if (logspace > 0) {
-               ASSERT((tp->t_log_res == 0) || (tp->t_log_res == logspace));
-               ASSERT((tp->t_log_count == 0) ||
-                       (tp->t_log_count == logcount));
+               bool    permanent = false;
+
+               ASSERT(tp->t_log_res == 0 || tp->t_log_res == logspace);
+               ASSERT(tp->t_log_count == 0 || tp->t_log_count == logcount);
+
                if (flags & XFS_TRANS_PERM_LOG_RES) {
-                       log_flags = XFS_LOG_PERM_RESERV;
                        tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
+                       permanent = true;
                } else {
                        ASSERT(tp->t_ticket == NULL);
                        ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
-                       log_flags = 0;
                }
 
-               error = xfs_log_reserve(tp->t_mountp, logspace, logcount,
-                                       &tp->t_ticket,
-                                       XFS_TRANSACTION, log_flags, tp->t_type);
-               if (error) {
-                       goto undo_blocks;
+               if (tp->t_ticket != NULL) {
+                       ASSERT(flags & XFS_TRANS_PERM_LOG_RES);
+                       error = xfs_log_regrant(tp->t_mountp, tp->t_ticket);
+               } else {
+                       error = xfs_log_reserve(tp->t_mountp, logspace,
+                                               logcount, &tp->t_ticket,
+                                               XFS_TRANSACTION, permanent,
+                                               tp->t_type);
                }
+
+               if (error)
+                       goto undo_blocks;
+
                tp->t_log_res = logspace;
                tp->t_log_count = logcount;
        }
@@ -752,6 +759,8 @@ xfs_trans_reserve(
         */
 undo_log:
        if (logspace > 0) {
+               int             log_flags;
+
                if (flags & XFS_TRANS_PERM_LOG_RES) {
                        log_flags = XFS_LOG_REL_PERM_RESERV;
                } else {
index ed9252bcdac9c351a79020451cf76eb4287992f7..1dead07f092c92afbea3ceeb22e0cf70430fa1fb 100644 (file)
@@ -610,50 +610,6 @@ xfs_ail_push_all(
                xfs_ail_push(ailp, threshold_lsn);
 }
 
-/*
- * This is to be called when an item is unlocked that may have
- * been in the AIL.  It will wake up the first member of the AIL
- * wait list if this item's unlocking might allow it to progress.
- * If the item is in the AIL, then we need to get the AIL lock
- * while doing our checking so we don't race with someone going
- * to sleep waiting for this event in xfs_trans_push_ail().
- */
-void
-xfs_trans_unlocked_item(
-       struct xfs_ail  *ailp,
-       xfs_log_item_t  *lip)
-{
-       xfs_log_item_t  *min_lip;
-
-       /*
-        * If we're forcibly shutting down, we may have
-        * unlocked log items arbitrarily. The last thing
-        * we want to do is to move the tail of the log
-        * over some potentially valid data.
-        */
-       if (!(lip->li_flags & XFS_LI_IN_AIL) ||
-           XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
-               return;
-       }
-
-       /*
-        * This is the one case where we can call into xfs_ail_min()
-        * without holding the AIL lock because we only care about the
-        * case where we are at the tail of the AIL.  If the object isn't
-        * at the tail, it doesn't matter what result we get back.  This
-        * is slightly racy because since we were just unlocked, we could
-        * go to sleep between the call to xfs_ail_min and the call to
-        * xfs_log_move_tail, have someone else lock us, commit to us disk,
-        * move us out of the tail of the AIL, and then we wake up.  However,
-        * the call to xfs_log_move_tail() doesn't do anything if there's
-        * not enough free space to wake people up so we're safe calling it.
-        */
-       min_lip = xfs_ail_min(ailp);
-
-       if (min_lip == lip)
-               xfs_log_move_tail(ailp->xa_mount, 1);
-}      /* xfs_trans_unlocked_item */
-
 /*
  * xfs_trans_ail_update - bulk AIL insertion operation.
  *
@@ -685,7 +641,6 @@ xfs_trans_ail_update_bulk(
        xfs_lsn_t               lsn) __releases(ailp->xa_lock)
 {
        xfs_log_item_t          *mlip;
-       xfs_lsn_t               tail_lsn;
        int                     mlip_changed = 0;
        int                     i;
        LIST_HEAD(tmp);
@@ -712,22 +667,12 @@ xfs_trans_ail_update_bulk(
 
        if (!list_empty(&tmp))
                xfs_ail_splice(ailp, cur, &tmp, lsn);
+       spin_unlock(&ailp->xa_lock);
 
-       if (!mlip_changed) {
-               spin_unlock(&ailp->xa_lock);
-               return;
+       if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
+               xlog_assign_tail_lsn(ailp->xa_mount);
+               xfs_log_space_wake(ailp->xa_mount);
        }
-
-       /*
-        * It is not safe to access mlip after the AIL lock is dropped, so we
-        * must get a copy of li_lsn before we do so.  This is especially
-        * important on 32-bit platforms where accessing and updating 64-bit
-        * values like li_lsn is not atomic.
-        */
-       mlip = xfs_ail_min(ailp);
-       tail_lsn = mlip->li_lsn;
-       spin_unlock(&ailp->xa_lock);
-       xfs_log_move_tail(ailp->xa_mount, tail_lsn);
 }
 
 /*
@@ -758,7 +703,6 @@ xfs_trans_ail_delete_bulk(
        int                     nr_items) __releases(ailp->xa_lock)
 {
        xfs_log_item_t          *mlip;
-       xfs_lsn_t               tail_lsn;
        int                     mlip_changed = 0;
        int                     i;
 
@@ -785,23 +729,12 @@ xfs_trans_ail_delete_bulk(
                if (mlip == lip)
                        mlip_changed = 1;
        }
+       spin_unlock(&ailp->xa_lock);
 
-       if (!mlip_changed) {
-               spin_unlock(&ailp->xa_lock);
-               return;
+       if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
+               xlog_assign_tail_lsn(ailp->xa_mount);
+               xfs_log_space_wake(ailp->xa_mount);
        }
-
-       /*
-        * It is not safe to access mlip after the AIL lock is dropped, so we
-        * must get a copy of li_lsn before we do so.  This is especially
-        * important on 32-bit platforms where accessing and updating 64-bit
-        * values like li_lsn is not atomic. It is possible we've emptied the
-        * AIL here, so if that is the case, pass an LSN of 0 to the tail move.
-        */
-       mlip = xfs_ail_min(ailp);
-       tail_lsn = mlip ? mlip->li_lsn : 0;
-       spin_unlock(&ailp->xa_lock);
-       xfs_log_move_tail(ailp->xa_mount, tail_lsn);
 }
 
 /*
index 475a4ded4f41a875ff0f77ee7990b969cbc3f124..1302d1d95a5850d121af792719b04d2d5672af39 100644 (file)
@@ -463,19 +463,7 @@ xfs_trans_brelse(xfs_trans_t       *tp,
         * Default to a normal brelse() call if the tp is NULL.
         */
        if (tp == NULL) {
-               struct xfs_log_item     *lip = bp->b_fspriv;
-
                ASSERT(bp->b_transp == NULL);
-
-               /*
-                * If there's a buf log item attached to the buffer,
-                * then let the AIL know that the buffer is being
-                * unlocked.
-                */
-               if (lip != NULL && lip->li_type == XFS_LI_BUF) {
-                       bip = bp->b_fspriv;
-                       xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip);
-               }
                xfs_buf_relse(bp);
                return;
        }
@@ -550,21 +538,10 @@ xfs_trans_brelse(xfs_trans_t      *tp,
                ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
                ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
                xfs_buf_item_relse(bp);
-               bip = NULL;
-       }
-       bp->b_transp = NULL;
-
-       /*
-        * If we've still got a buf log item on the buffer, then
-        * tell the AIL that the buffer is being unlocked.
-        */
-       if (bip != NULL) {
-               xfs_trans_unlocked_item(bip->bli_item.li_ailp,
-                                       (xfs_log_item_t*)bip);
        }
 
+       bp->b_transp = NULL;
        xfs_buf_relse(bp);
-       return;
 }
 
 /*
index c4ba366d24e65c8fde43fa7a6ca32b4735bce331..279099717ed2db8a11ab5f6a05ba76033254d921 100644 (file)
@@ -605,7 +605,7 @@ xfs_trans_dqresv(
        time_t          timer;
        xfs_qwarncnt_t  warns;
        xfs_qwarncnt_t  warnlimit;
-       xfs_qcnt_t      count;
+       xfs_qcnt_t      total_count;
        xfs_qcnt_t      *resbcountp;
        xfs_quotainfo_t *q = mp->m_quotainfo;
 
@@ -648,13 +648,12 @@ xfs_trans_dqresv(
                         * hardlimit or exceed the timelimit if we allocate
                         * nblks.
                         */
-                       if (hardlimit > 0ULL &&
-                           hardlimit < nblks + *resbcountp) {
+                       total_count = *resbcountp + nblks;
+                       if (hardlimit && total_count > hardlimit) {
                                xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
                                goto error_return;
                        }
-                       if (softlimit > 0ULL &&
-                           softlimit < nblks + *resbcountp) {
+                       if (softlimit && total_count > softlimit) {
                                if ((timer != 0 && get_seconds() > timer) ||
                                    (warns != 0 && warns >= warnlimit)) {
                                        xfs_quota_warn(mp, dqp,
@@ -666,7 +665,7 @@ xfs_trans_dqresv(
                        }
                }
                if (ninos > 0) {
-                       count = be64_to_cpu(dqp->q_core.d_icount);
+                       total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos;
                        timer = be32_to_cpu(dqp->q_core.d_itimer);
                        warns = be16_to_cpu(dqp->q_core.d_iwarns);
                        warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
@@ -677,13 +676,11 @@ xfs_trans_dqresv(
                        if (!softlimit)
                                softlimit = q->qi_isoftlimit;
 
-                       if (hardlimit > 0ULL &&
-                           hardlimit < ninos + count) {
+                       if (hardlimit && total_count > hardlimit) {
                                xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
                                goto error_return;
                        }
-                       if (softlimit > 0ULL &&
-                           softlimit < ninos + count) {
+                       if (softlimit && total_count > softlimit) {
                                if  ((timer != 0 && get_seconds() > timer) ||
                                     (warns != 0 && warns >= warnlimit)) {
                                        xfs_quota_warn(mp, dqp,
@@ -878,7 +875,7 @@ STATIC void
 xfs_trans_alloc_dqinfo(
        xfs_trans_t     *tp)
 {
-       tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+       tp->t_dqinfo = kmem_zone_zalloc(xfs_qm_dqtrxzone, KM_SLEEP);
 }
 
 void
@@ -887,6 +884,6 @@ xfs_trans_free_dqinfo(
 {
        if (!tp->t_dqinfo)
                return;
-       kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
+       kmem_zone_free(xfs_qm_dqtrxzone, tp->t_dqinfo);
        tp->t_dqinfo = NULL;
 }
index 32f0288ae10f30abe1f67d3ebcb766926fe8de7b..7a7442c03f2bfde90606a7e82e78fdd4d2363e6e 100644 (file)
@@ -95,10 +95,14 @@ xfs_trans_ichgtime(
        if ((flags & XFS_ICHGTIME_MOD) &&
            !timespec_equal(&inode->i_mtime, &tv)) {
                inode->i_mtime = tv;
+               ip->i_d.di_mtime.t_sec = tv.tv_sec;
+               ip->i_d.di_mtime.t_nsec = tv.tv_nsec;
        }
        if ((flags & XFS_ICHGTIME_CHG) &&
            !timespec_equal(&inode->i_ctime, &tv)) {
                inode->i_ctime = tv;
+               ip->i_d.di_ctime.t_sec = tv.tv_sec;
+               ip->i_d.di_ctime.t_nsec = tv.tv_nsec;
        }
 }
 
@@ -126,12 +130,12 @@ xfs_trans_log_inode(
        /*
         * Always OR in the bits from the ili_last_fields field.
         * This is to coordinate with the xfs_iflush() and xfs_iflush_done()
-        * routines in the eventual clearing of the ilf_fields bits.
+        * routines in the eventual clearing of the ili_fields bits.
         * See the big comment in xfs_iflush() for an explanation of
         * this coordination mechanism.
         */
        flags |= ip->i_itemp->ili_last_fields;
-       ip->i_itemp->ili_format.ilf_fields |= flags;
+       ip->i_itemp->ili_fields |= flags;
 }
 
 #ifdef XFS_TRANS_DEBUG
index 44820b9fcb4327f9d7d3b14b626e67f19bb18973..8ab2ced415f1d2680c274d6370047f749e74bce7 100644 (file)
@@ -104,9 +104,6 @@ void                        xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
 void                   xfs_ail_push_all(struct xfs_ail *);
 xfs_lsn_t              xfs_ail_min_lsn(struct xfs_ail *ailp);
 
-void                   xfs_trans_unlocked_item(struct xfs_ail *,
-                                       xfs_log_item_t *);
-
 struct xfs_log_item *  xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
                                        struct xfs_ail_cursor *cur,
                                        xfs_lsn_t lsn);
index 7c220b4227bc7f817cf507b84a1721a7d0091e2f..db14d0c08682b90949f031532069f715ca628804 100644 (file)
@@ -22,7 +22,6 @@
 
 struct file;
 struct xfs_inode;
-struct xfs_iomap;
 struct attrlist_cursor_kern;
 
 /*
index 0c877cbde142f8427690fd519264c68951c10b18..447e146b2ba6d8ae1a9213841f6daae5a209f487 100644 (file)
@@ -10,7 +10,6 @@ struct kiocb;
 struct pipe_inode_info;
 struct uio;
 struct xfs_inode;
-struct xfs_iomap;
 
 
 int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags);
@@ -49,8 +48,6 @@ int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
 int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
 int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
                int flags, struct attrlist_cursor_kern *cursor);
-int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
-               int flags, struct xfs_iomap *iomapp, int *niomaps);
 void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first,
                xfs_off_t last, int fiopt);
 int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first,
index 4a5aca2a2c9442aecb1db3b6cbe74ea2b5739870..a5b5d5a89a4fe3fc87d2adb2de31b9ac58c74e61 100644 (file)
@@ -45,6 +45,11 @@ static inline void pci_add_flags(int flags)
        pci_flags |= flags;
 }
 
+static inline void pci_clear_flags(int flags)
+{
+       pci_flags &= ~flags;
+}
+
 static inline int pci_has_flag(int flag)
 {
        return pci_flags & flag;
@@ -52,6 +57,7 @@ static inline int pci_has_flag(int flag)
 #else
 static inline void pci_set_flags(int flags) { }
 static inline void pci_add_flags(int flags) { }
+static inline void pci_clear_flags(int flags) { }
 static inline int pci_has_flag(int flag)
 {
        return 0;
index 26373cff454632e91e1021251f14722032906c1b..e80a0495e5b0e063fb6f8ac655981b9385d9f7bf 100644 (file)
@@ -6,30 +6,6 @@
 #ifndef _ASM_GENERIC_PCI_H
 #define _ASM_GENERIC_PCI_H
 
-/**
- * pcibios_resource_to_bus - convert resource to PCI bus address
- * @dev: device which owns this resource
- * @region: converted bus-centric region (start,end)
- * @res: resource to convert
- *
- * Convert a resource to a PCI device bus address or bus window.
- */
-static inline void
-pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-                        struct resource *res)
-{
-       region->start = res->start;
-       region->end = res->end;
-}
-
-static inline void
-pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-                       struct pci_bus_region *region)
-{
-       res->start = region->start;
-       res->end = region->end;
-}
-
 static inline struct resource *
 pcibios_select_root(struct pci_dev *pdev, struct resource *res)
 {
index ef00610837d4f9e1b556028c151df0118b89677c..15f6b9edd0b17ec30ed3cafacd5d686666279bdc 100644 (file)
@@ -28,7 +28,7 @@ struct task_struct;
 struct pci_dev;
 
 extern int amd_iommu_detect(void);
-
+extern int amd_iommu_init_hardware(void);
 
 /**
  * amd_iommu_enable_device_erratum() - Enable erratum workaround for device
index 8ace93024d60f96cb054f404c3d6381b786eb105..285025a9cdc9a0533e2fb61abbdc71b1808224e8 100644 (file)
@@ -1,19 +1,42 @@
+/*
+ * at24.h - platform_data for the at24 (generic eeprom) driver
+ * (C) Copyright 2008 by Pengutronix
+ * (C) Copyright 2012 by Wolfram Sang
+ * same license as the driver
+ */
+
 #ifndef _LINUX_AT24_H
 #define _LINUX_AT24_H
 
 #include <linux/types.h>
 #include <linux/memory.h>
 
-/*
- * As seen through Linux I2C, differences between the most common types of I2C
- * memory include:
- * - How much memory is available (usually specified in bit)?
- * - What write page size does it support?
- * - Special flags (16 bit addresses, read_only, world readable...)?
+/**
+ * struct at24_platform_data - data to set up at24 (generic eeprom) driver
+ * @byte_len: size of eeprom in byte
+ * @page_size: number of byte which can be written in one go
+ * @flags: tunable options, check AT24_FLAG_* defines
+ * @setup: an optional callback invoked after eeprom is probed; enables kernel
+       code to access eeprom via memory_accessor, see example
+ * @context: optional parameter passed to setup()
  *
  * If you set up a custom eeprom type, please double-check the parameters.
  * Especially page_size needs extra care, as you risk data loss if your value
  * is bigger than what the chip actually supports!
+ *
+ * An example in pseudo code for a setup() callback:
+ *
+ * void get_mac_addr(struct memory_accessor *mem_acc, void *context)
+ * {
+ *     u8 *mac_addr = ethernet_pdata->mac_addr;
+ *     off_t offset = context;
+ *
+ *     // Read MAC addr from EEPROM
+ *     if (mem_acc->read(mem_acc, mac_addr, offset, ETH_ALEN) == ETH_ALEN)
+ *             pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
+ * }
+ *
+ * This function pointer and context can now be set up in at24_platform_data.
  */
 
 struct at24_platform_data {
index 9d57a71775b5714b5d67df7857142eaa6f349ebe..e885ba23de7017882457ff0dc2d6fbb6f6575b13 100644 (file)
@@ -23,12 +23,6 @@ struct resource {
        struct resource *parent, *sibling, *child;
 };
 
-struct resource_list {
-       struct resource_list *next;
-       struct resource *res;
-       struct pci_dev *dev;
-};
-
 /*
  * IO resources have these defined flags.
  */
index 1600ebf717a79b4259a721e21bd096ec6206b1ed..96933b1e5d24eeee3dde80b5336b0053a0c1d06e 100644 (file)
@@ -277,6 +277,8 @@ static inline key_serial_t key_serial(const struct key *key)
        return key ? key->serial : 0;
 }
 
+extern void key_set_timeout(struct key *, unsigned);
+
 /**
  * key_is_instantiated - Determine if a key has been positively instantiated
  * @key: The key to check.
index fbc48f898521c1a24492c8eb782c12301e341568..11a966e5f829e9d9862589e393c1576780cfed48 100644 (file)
@@ -42,6 +42,7 @@ struct nlmclnt_initdata {
        unsigned short          protocol;
        u32                     nfs_version;
        int                     noresvport;
+       struct net              *net;
 };
 
 /*
index 88a114fce477ad5a42e5e5c3cb837ae7a8a1384e..f04ce6ac6d04fd84f65c533cb34b5748d2d6665f 100644 (file)
@@ -67,6 +67,7 @@ struct nlm_host {
        struct list_head        h_reclaim;      /* Locks in RECLAIM state */
        struct nsm_handle       *h_nsmhandle;   /* NSM status handle */
        char                    *h_addrbuf;     /* address eyecatcher */
+       struct net              *net;           /* host net */
 };
 
 /*
@@ -188,7 +189,7 @@ struct nlm_block {
 /*
  * Global variables
  */
-extern struct rpc_program      nlm_program;
+extern const struct rpc_program        nlm_program;
 extern struct svc_procedure    nlmsvc_procedures[];
 #ifdef CONFIG_LOCKD_V4
 extern struct svc_procedure    nlmsvc_procedures4[];
@@ -222,7 +223,8 @@ struct nlm_host  *nlmclnt_lookup_host(const struct sockaddr *sap,
                                        const unsigned short protocol,
                                        const u32 version,
                                        const char *hostname,
-                                       int noresvport);
+                                       int noresvport,
+                                       struct net *net);
 void             nlmclnt_release_host(struct nlm_host *);
 struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
                                        const char *hostname,
@@ -232,6 +234,7 @@ struct rpc_clnt * nlm_bind_host(struct nlm_host *);
 void             nlm_rebind_host(struct nlm_host *);
 struct nlm_host * nlm_get_host(struct nlm_host *);
 void             nlm_shutdown_hosts(void);
+void             nlm_shutdown_hosts_net(struct net *net);
 void             nlm_host_rebooted(const struct nlm_reboot *);
 
 /*
index 7353821341edb76bc1e02e646ed01fe32200768b..e58c88b52ce138f638f9adcd0c673b895291bb1d 100644 (file)
@@ -42,6 +42,6 @@ int   nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int    nlmclt_encode_cancargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int    nlmclt_encode_unlockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
  */
-extern struct rpc_version nlm_version4;
+extern const struct rpc_version nlm_version4;
 
 #endif /* LOCKD_XDR4_H */
index 8c6ee44914cb4c7dc464167a2163625ac6c6f8f8..6d1fb63f59221690f72d62f1d8a5cae71070fcc5 100644 (file)
@@ -29,7 +29,7 @@
 #define NFS_MNT_VERSION                1
 #define NFS_MNT3_VERSION       3
 
-#define NFS_PIPE_DIRNAME "/nfs"
+#define NFS_PIPE_DIRNAME "nfs"
 
 /*
  * NFS stats. The good thing with these values is that NFSv3 errors are
index 32345c2805c0588bb2373b63e234359271fc7cfe..834df8bf08b6e54951bc6483f0ffecbc47c7bd28 100644 (file)
@@ -183,15 +183,12 @@ struct nfs4_acl {
 
 typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
 
-struct nfs41_stateid {
+struct nfs_stateid4 {
        __be32 seqid;
        char other[NFS4_STATEID_OTHER_SIZE];
 } __attribute__ ((packed));
 
-typedef union {
-       char data[NFS4_STATEID_SIZE];
-       struct nfs41_stateid stateid;
-} nfs4_stateid;
+typedef struct nfs_stateid4 nfs4_stateid;
 
 enum nfs_opnum4 {
        OP_ACCESS = 3,
index 8c29950d2fa5041497c22614808f682e76b3681c..52a1bdb4ee2bad0a668262c7b67bf8003f738095 100644 (file)
 
 #ifdef __KERNEL__
 
+/*
+ * Enable dprintk() debugging support for nfs client.
+ */
+#ifdef CONFIG_NFS_DEBUG
+# define NFS_DEBUG
+#endif
+
 #include <linux/in.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
@@ -171,13 +178,9 @@ struct nfs_inode {
         */
        __be32                  cookieverf[2];
 
-       /*
-        * This is the list of dirty unwritten pages.
-        */
-       struct radix_tree_root  nfs_page_tree;
-
        unsigned long           npages;
        unsigned long           ncommit;
+       struct list_head        commit_list;
 
        /* Open contexts for shared mmap writes */
        struct list_head        open_files;
@@ -395,6 +398,29 @@ static inline void nfs_free_fhandle(const struct nfs_fh *fh)
        kfree(fh);
 }
 
+#ifdef NFS_DEBUG
+extern u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh);
+static inline u32 nfs_display_fhandle_hash(const struct nfs_fh *fh)
+{
+       return _nfs_display_fhandle_hash(fh);
+}
+extern void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption);
+#define nfs_display_fhandle(fh, caption)                       \
+       do {                                                    \
+               if (unlikely(nfs_debug & NFSDBG_FACILITY))      \
+                       _nfs_display_fhandle(fh, caption);      \
+       } while (0)
+#else
+static inline u32 nfs_display_fhandle_hash(const struct nfs_fh *fh)
+{
+       return 0;
+}
+static inline void nfs_display_fhandle(const struct nfs_fh *fh,
+                                      const char *caption)
+{
+}
+#endif
+
 /*
  * linux/fs/nfs/nfsroot.c
  */
@@ -632,19 +658,13 @@ nfs_fileid_to_ino_t(u64 fileid)
 
 #ifdef __KERNEL__
 
-/*
- * Enable debugging support for nfs client.
- * Requires RPC_DEBUG.
- */
-#ifdef RPC_DEBUG
-# define NFS_DEBUG
-#endif
-
 # undef ifdebug
 # ifdef NFS_DEBUG
 #  define ifdebug(fac)         if (unlikely(nfs_debug & NFSDBG_##fac))
+#  define NFS_IFDEBUG(x)       x
 # else
 #  define ifdebug(fac)         if (0)
+#  define NFS_IFDEBUG(x)
 # endif
 #endif /* __KERNEL */
 
index 861730275ba0545feb6857c42f9d161d7f38d47c..a5c50d97341edfac63d04a0f7bad96a5f1bfd0f9 100644 (file)
@@ -1,10 +1,6 @@
 #ifndef _NFS_FS_I
 #define _NFS_FS_I
 
-#include <asm/types.h>
-#include <linux/list.h>
-#include <linux/nfs.h>
-
 struct nlm_lockowner;
 
 /*
index ba4d7656ecfde15c98188afee4a086b19746772f..7073fc74481cb6e1d69b0278e26c87c52cbc349e 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <linux/list.h>
 #include <linux/backing-dev.h>
+#include <linux/idr.h>
 #include <linux/wait.h>
 #include <linux/nfs_xdr.h>
 #include <linux/sunrpc/xprt.h>
@@ -17,6 +18,7 @@ struct nfs4_sequence_res;
 struct nfs_server;
 struct nfs4_minor_version_ops;
 struct server_scope;
+struct nfs41_impl_id;
 
 /*
  * The nfs_client identifies our client state to the server.
@@ -85,6 +87,8 @@ struct nfs_client {
 #endif
 
        struct server_scope     *server_scope;  /* from exchange_id */
+       struct nfs41_impl_id    *impl_id;       /* from exchange_id */
+       struct net              *net;
 };
 
 /*
@@ -144,15 +148,18 @@ struct nfs_server {
        u32                     acl_bitmask;    /* V4 bitmask representing the ACEs
                                                   that are supported on this
                                                   filesystem */
+       u32                     fh_expire_type; /* V4 bitmask representing file
+                                                  handle volatility type for
+                                                  this filesystem */
        struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
        struct rpc_wait_queue   roc_rpcwaitq;
        void                    *pnfs_ld_data;  /* per mount point data */
 
        /* the following fields are protected by nfs_client->cl_lock */
        struct rb_root          state_owners;
-       struct rb_root          openowner_id;
-       struct rb_root          lockowner_id;
 #endif
+       struct ida              openowner_id;
+       struct ida              lockowner_id;
        struct list_head        state_owners_lru;
        struct list_head        layouts;
        struct list_head        delegations;
@@ -188,21 +195,23 @@ struct nfs_server {
 
 
 /* maximum number of slots to use */
-#define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE
+#define NFS4_DEF_SLOT_TABLE_SIZE (16U)
+#define NFS4_MAX_SLOT_TABLE (256U)
+#define NFS4_NO_SLOT ((u32)-1)
 
 #if defined(CONFIG_NFS_V4)
 
 /* Sessions */
-#define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long)))
+#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
 struct nfs4_slot_table {
        struct nfs4_slot *slots;                /* seqid per slot */
        unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
        spinlock_t      slot_tbl_lock;
        struct rpc_wait_queue   slot_tbl_waitq; /* allocators may wait here */
-       int             max_slots;              /* # slots in table */
-       int             highest_used_slotid;    /* sent to server on each SEQ.
+       u32             max_slots;              /* # slots in table */
+       u32             highest_used_slotid;    /* sent to server on each SEQ.
                                                 * op for dynamic resizing */
-       int             target_max_slots;       /* Set by CB_RECALL_SLOT as
+       u32             target_max_slots;       /* Set by CB_RECALL_SLOT as
                                                 * the new max_slots */
        struct completion complete;
 };
index 308c188770185962547e196312aa87f304aa4502..7eed2012d288926a6317e53ead4eb7880796c7ca 100644 (file)
@@ -69,36 +69,22 @@ struct nfs_server;
 struct nfs_fattr;
 struct nfs4_string;
 
-#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
-
+#ifdef CONFIG_NFS_V4
 int nfs_idmap_init(void);
 void nfs_idmap_quit(void);
-
-static inline int nfs_idmap_new(struct nfs_client *clp)
-{
-       return 0;
-}
-
-static inline void nfs_idmap_delete(struct nfs_client *clp)
-{
-}
-
-#else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */
-
+#else
 static inline int nfs_idmap_init(void)
 {
        return 0;
 }
 
 static inline void nfs_idmap_quit(void)
-{
-}
+{}
+#endif
 
 int nfs_idmap_new(struct nfs_client *);
 void nfs_idmap_delete(struct nfs_client *);
 
-#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
-
 void nfs_fattr_init_names(struct nfs_fattr *fattr,
                struct nfs4_string *owner_name,
                struct nfs4_string *group_name);
index 8866bb3502ee1df94075aa2068a980cb7fe2f090..9dcbbe9a51fb17da49fe9d8e8a4deaadd67ccec1 100644 (file)
@@ -21,7 +21,7 @@
 #ifndef _LINUX_NFS_IOSTAT
 #define _LINUX_NFS_IOSTAT
 
-#define NFS_IOSTAT_VERS                "1.0"
+#define NFS_IOSTAT_VERS                "1.1"
 
 /*
  * NFS byte counters
index ab465fe8c3d6fe2d3e9f204b3a013bd22fb14038..eac30d6bec17c78db77a050e269ae0336e00e372 100644 (file)
 
 #include <linux/kref.h>
 
-/*
- * Valid flags for the radix tree
- */
-#define NFS_PAGE_TAG_LOCKED    0
-#define NFS_PAGE_TAG_COMMIT    1
-
 /*
  * Valid flags for a dirty buffer
  */
@@ -33,16 +27,13 @@ enum {
        PG_CLEAN,
        PG_NEED_COMMIT,
        PG_NEED_RESCHED,
-       PG_PNFS_COMMIT,
        PG_PARTIAL_READ_FAILED,
+       PG_COMMIT_TO_DS,
 };
 
 struct nfs_inode;
 struct nfs_page {
-       union {
-               struct list_head        wb_list;        /* Defines state of page: */
-               struct pnfs_layout_segment *wb_commit_lseg; /* Used when PG_PNFS_COMMIT set */
-       };
+       struct list_head        wb_list;        /* Defines state of page: */
        struct page             *wb_page;       /* page to read in/write out */
        struct nfs_open_context *wb_context;    /* File state context info */
        struct nfs_lock_context *wb_lock_context;       /* lock context info */
@@ -90,8 +81,6 @@ extern        struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
 extern void nfs_release_request(struct nfs_page *req);
 
 
-extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
-                         pgoff_t idx_start, unsigned int npages, int tag);
 extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
                             struct inode *inode,
                             const struct nfs_pageio_ops *pg_ops,
@@ -106,8 +95,6 @@ extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
                                struct nfs_page *req);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern void nfs_unlock_request(struct nfs_page *req);
-extern int nfs_set_page_tag_locked(struct nfs_page *req);
-extern  void nfs_clear_page_tag_locked(struct nfs_page *req);
 
 /*
  * Lock the page of an asynchronous request without getting a new reference
@@ -118,6 +105,16 @@ nfs_lock_request_dontget(struct nfs_page *req)
        return !test_and_set_bit(PG_BUSY, &req->wb_flags);
 }
 
+static inline int
+nfs_lock_request(struct nfs_page *req)
+{
+       if (test_and_set_bit(PG_BUSY, &req->wb_flags))
+               return 0;
+       kref_get(&req->wb_kref);
+       return 1;
+}
+
+
 /**
  * nfs_list_add_request - Insert a request into a list
  * @req: request
index d6ba9a12591ea464991b1d36157918ee5fc1eebe..bfd0d1bf67072e9a5a6f6c143e80f56fcc6238aa 100644 (file)
@@ -2,7 +2,6 @@
 #define _LINUX_NFS_XDR_H
 
 #include <linux/nfsacl.h>
-#include <linux/nfs3.h>
 #include <linux/sunrpc/gss_api.h>
 
 /*
@@ -89,11 +88,12 @@ struct nfs_fattr {
 #define NFS_ATTR_FATTR_PRECTIME                (1U << 16)
 #define NFS_ATTR_FATTR_CHANGE          (1U << 17)
 #define NFS_ATTR_FATTR_PRECHANGE       (1U << 18)
-#define NFS_ATTR_FATTR_V4_REFERRAL     (1U << 19)      /* NFSv4 referral */
-#define NFS_ATTR_FATTR_MOUNTPOINT      (1U << 20)      /* Treat as mountpoint */
-#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID               (1U << 21)
-#define NFS_ATTR_FATTR_OWNER_NAME      (1U << 22)
-#define NFS_ATTR_FATTR_GROUP_NAME      (1U << 23)
+#define NFS_ATTR_FATTR_V4_LOCATIONS    (1U << 19)
+#define NFS_ATTR_FATTR_V4_REFERRAL     (1U << 20)
+#define NFS_ATTR_FATTR_MOUNTPOINT      (1U << 21)
+#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID (1U << 22)
+#define NFS_ATTR_FATTR_OWNER_NAME      (1U << 23)
+#define NFS_ATTR_FATTR_GROUP_NAME      (1U << 24)
 
 #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
                | NFS_ATTR_FATTR_MODE \
@@ -182,7 +182,7 @@ struct nfs4_slot {
 
 struct nfs4_sequence_args {
        struct nfs4_session     *sa_session;
-       u                     sa_slotid;
+       u32                     sa_slotid;
        u8                      sa_cache_this;
 };
 
@@ -977,6 +977,7 @@ struct nfs4_server_caps_res {
        u32                             acl_bitmask;
        u32                             has_links;
        u32                             has_symlinks;
+       u32                             fh_expire_type;
        struct nfs4_sequence_res        seq_res;
 };
 
@@ -1055,14 +1056,6 @@ struct nfstime4 {
 };
 
 #ifdef CONFIG_NFS_V4_1
-struct nfs_impl_id4 {
-       u32             domain_len;
-       char            *domain;
-       u32             name_len;
-       char            *name;
-       struct nfstime4 date;
-};
-
 #define NFS4_EXCHANGE_ID_LEN   (48)
 struct nfs41_exchange_id_args {
        struct nfs_client               *client;
@@ -1083,10 +1076,17 @@ struct server_scope {
        char                            server_scope[NFS4_OPAQUE_LIMIT];
 };
 
+struct nfs41_impl_id {
+       char                            domain[NFS4_OPAQUE_LIMIT + 1];
+       char                            name[NFS4_OPAQUE_LIMIT + 1];
+       struct nfstime4                 date;
+};
+
 struct nfs41_exchange_id_res {
        struct nfs_client               *client;
        u32                             flags;
        struct server_scope             *server_scope;
+       struct nfs41_impl_id            *impl_id;
 };
 
 struct nfs41_create_session_args {
@@ -1192,6 +1192,27 @@ struct nfs_write_data {
        struct page             *page_array[NFS_PAGEVEC_SIZE];
 };
 
+struct nfs_unlinkdata {
+       struct hlist_node list;
+       struct nfs_removeargs args;
+       struct nfs_removeres res;
+       struct inode *dir;
+       struct rpc_cred *cred;
+       struct nfs_fattr dir_attr;
+};
+
+struct nfs_renamedata {
+       struct nfs_renameargs   args;
+       struct nfs_renameres    res;
+       struct rpc_cred         *cred;
+       struct inode            *old_dir;
+       struct dentry           *old_dentry;
+       struct nfs_fattr        old_fattr;
+       struct inode            *new_dir;
+       struct dentry           *new_dentry;
+       struct nfs_fattr        new_fattr;
+};
+
 struct nfs_access_entry;
 struct nfs_client;
 struct rpc_timeout;
@@ -1221,10 +1242,12 @@ struct nfs_rpc_ops {
                            struct iattr *, int, struct nfs_open_context *);
        int     (*remove)  (struct inode *, struct qstr *);
        void    (*unlink_setup)  (struct rpc_message *, struct inode *dir);
+       void    (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *);
        int     (*unlink_done) (struct rpc_task *, struct inode *);
        int     (*rename)  (struct inode *, struct qstr *,
                            struct inode *, struct qstr *);
        void    (*rename_setup)  (struct rpc_message *msg, struct inode *dir);
+       void    (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *);
        int     (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir);
        int     (*link)    (struct inode *, struct inode *, struct qstr *);
        int     (*symlink) (struct inode *, struct dentry *, struct page *,
@@ -1244,8 +1267,10 @@ struct nfs_rpc_ops {
        int     (*set_capabilities)(struct nfs_server *, struct nfs_fh *);
        int     (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
        void    (*read_setup)   (struct nfs_read_data *, struct rpc_message *);
+       void    (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *);
        int     (*read_done)  (struct rpc_task *, struct nfs_read_data *);
        void    (*write_setup)  (struct nfs_write_data *, struct rpc_message *);
+       void    (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
        int     (*write_done)  (struct rpc_task *, struct nfs_write_data *);
        void    (*commit_setup) (struct nfs_write_data *, struct rpc_message *);
        int     (*commit_done) (struct rpc_task *, struct nfs_write_data *);
@@ -1275,11 +1300,11 @@ struct nfs_rpc_ops {
 extern const struct nfs_rpc_ops        nfs_v2_clientops;
 extern const struct nfs_rpc_ops        nfs_v3_clientops;
 extern const struct nfs_rpc_ops        nfs_v4_clientops;
-extern struct rpc_version      nfs_version2;
-extern struct rpc_version      nfs_version3;
-extern struct rpc_version      nfs_version4;
+extern const struct rpc_version nfs_version2;
+extern const struct rpc_version nfs_version3;
+extern const struct rpc_version nfs_version4;
 
-extern struct rpc_version      nfsacl_version3;
-extern struct rpc_program      nfsacl_program;
+extern const struct rpc_version nfsacl_version3;
+extern const struct rpc_program nfsacl_program;
 
 #endif
index 900da5db60ee313a161ca9dc68ab504ce465f90d..e444f5b491188566b8a7107182fd24584d92fea2 100644 (file)
@@ -299,7 +299,6 @@ struct pci_dev {
         */
        unsigned int    irq;
        struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
-       resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; /* FW-assigned addr */
 
        /* These fields are used by common fixups */
        unsigned int    transparent:1;  /* Transparent PCI bridge */
@@ -369,24 +368,17 @@ static inline int pci_channel_offline(struct pci_dev *pdev)
        return (pdev->error_state != pci_channel_io_normal);
 }
 
-static inline struct pci_cap_saved_state *pci_find_saved_cap(
-       struct pci_dev *pci_dev, char cap)
-{
-       struct pci_cap_saved_state *tmp;
-       struct hlist_node *pos;
-
-       hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) {
-               if (tmp->cap.cap_nr == cap)
-                       return tmp;
-       }
-       return NULL;
-}
+struct pci_host_bridge_window {
+       struct list_head list;
+       struct resource *res;           /* host bridge aperture (CPU address) */
+       resource_size_t offset;         /* bus address + offset = CPU address */
+};
 
-static inline void pci_add_saved_cap(struct pci_dev *pci_dev,
-       struct pci_cap_saved_state *new_cap)
-{
-       hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space);
-}
+struct pci_host_bridge {
+       struct list_head list;
+       struct pci_bus *bus;            /* root bus */
+       struct list_head windows;       /* pci_host_bridge_windows */
+};
 
 /*
  * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond
@@ -656,6 +648,10 @@ void pci_fixup_cardbus(struct pci_bus *);
 
 /* Generic PCI functions used internally */
 
+void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+                            struct resource *res);
+void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                            struct pci_bus_region *region);
 void pcibios_scan_specific_bus(int busn);
 extern struct pci_bus *pci_find_bus(int domain, int busnr);
 void pci_bus_add_devices(const struct pci_bus *bus);
@@ -690,7 +686,8 @@ u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp);
 extern struct pci_dev *pci_dev_get(struct pci_dev *dev);
 extern void pci_dev_put(struct pci_dev *dev);
 extern void pci_remove_bus(struct pci_bus *b);
-extern void pci_remove_bus_device(struct pci_dev *dev);
+extern void __pci_remove_bus_device(struct pci_dev *dev);
+extern void pci_stop_and_remove_bus_device(struct pci_dev *dev);
 extern void pci_stop_bus_device(struct pci_dev *dev);
 void pci_setup_cardbus(struct pci_bus *bus);
 extern void pci_sort_breadthfirst(void);
@@ -883,6 +880,7 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev);
 /* Functions for PCI Hotplug drivers to use */
 int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
 #ifdef CONFIG_HOTPLUG
+unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge);
 unsigned int pci_rescan_bus(struct pci_bus *bus);
 #endif
 
@@ -892,13 +890,13 @@ ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void
 int pci_vpd_truncate(struct pci_dev *dev, size_t size);
 
 /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx);
 void pci_bus_assign_resources(const struct pci_bus *bus);
 void pci_bus_size_bridges(struct pci_bus *bus);
 int pci_claim_resource(struct pci_dev *, int);
 void pci_assign_unassigned_resources(void);
 void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge);
 void pdev_enable_device(struct pci_dev *);
-void pdev_sort_resources(struct pci_dev *, struct resource_list *);
 int pci_enable_resources(struct pci_dev *, int mask);
 void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
                    int (*)(const struct pci_dev *, u8, u8));
@@ -915,6 +913,8 @@ void pci_release_selected_regions(struct pci_dev *, int);
 
 /* drivers/pci/bus.c */
 void pci_add_resource(struct list_head *resources, struct resource *res);
+void pci_add_resource_offset(struct list_head *resources, struct resource *res,
+                            resource_size_t offset);
 void pci_free_resource_list(struct list_head *resources);
 void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, unsigned int flags);
 struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n);
@@ -960,7 +960,7 @@ void pci_unregister_driver(struct pci_driver *dev);
        module_driver(__pci_driver, pci_register_driver, \
                       pci_unregister_driver)
 
-void pci_remove_behind_bridge(struct pci_dev *dev);
+void pci_stop_and_remove_behind_bridge(struct pci_dev *dev);
 struct pci_driver *pci_dev_driver(const struct pci_dev *dev);
 int pci_add_dynid(struct pci_driver *drv,
                  unsigned int vendor, unsigned int device,
@@ -1396,7 +1396,10 @@ static inline void pci_resource_to_user(const struct pci_dev *dev, int bar,
  */
 
 struct pci_fixup {
-       u16 vendor, device;     /* You can use PCI_ANY_ID here of course */
+       u16 vendor;             /* You can use PCI_ANY_ID here of course */
+       u16 device;             /* You can use PCI_ANY_ID here of course */
+       u32 class;              /* You can use PCI_ANY_ID here too */
+       unsigned int class_shift;       /* should be 0, 8, 16 */
        void (*hook)(struct pci_dev *dev);
 };
 
@@ -1411,30 +1414,68 @@ enum pci_fixup_pass {
 };
 
 /* Anonymous variables would be nice... */
-#define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, hook) \
-       static const struct pci_fixup __pci_fixup_##name __used         \
-       __attribute__((__section__(#section))) = { vendor, device, hook };
+#define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class,        \
+                                 class_shift, hook)                    \
+       static const struct pci_fixup const __pci_fixup_##name __used   \
+       __attribute__((__section__(#section), aligned((sizeof(void *)))))    \
+               = { vendor, device, class, class_shift, hook };
+
+#define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class,           \
+                                        class_shift, hook)             \
+       DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early,                     \
+               vendor##device##hook, vendor, device, class, class_shift, hook)
+#define DECLARE_PCI_FIXUP_CLASS_HEADER(vendor, device, class,          \
+                                        class_shift, hook)             \
+       DECLARE_PCI_FIXUP_SECTION(.pci_fixup_header,                    \
+               vendor##device##hook, vendor, device, class, class_shift, hook)
+#define DECLARE_PCI_FIXUP_CLASS_FINAL(vendor, device, class,           \
+                                        class_shift, hook)             \
+       DECLARE_PCI_FIXUP_SECTION(.pci_fixup_final,                     \
+               vendor##device##hook, vendor, device, class, class_shift, hook)
+#define DECLARE_PCI_FIXUP_CLASS_ENABLE(vendor, device, class,          \
+                                        class_shift, hook)             \
+       DECLARE_PCI_FIXUP_SECTION(.pci_fixup_enable,                    \
+               vendor##device##hook, vendor, device, class, class_shift, hook)
+#define DECLARE_PCI_FIXUP_CLASS_RESUME(vendor, device, class,          \
+                                        class_shift, hook)             \
+       DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume,                    \
+               resume##vendor##device##hook, vendor, device, class,    \
+               class_shift, hook)
+#define DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(vendor, device, class,    \
+                                        class_shift, hook)             \
+       DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early,              \
+               resume_early##vendor##device##hook, vendor, device,     \
+               class, class_shift, hook)
+#define DECLARE_PCI_FIXUP_CLASS_SUSPEND(vendor, device, class,         \
+                                        class_shift, hook)             \
+       DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,                   \
+               suspend##vendor##device##hook, vendor, device, class,   \
+               class_shift, hook)
+
 #define DECLARE_PCI_FIXUP_EARLY(vendor, device, hook)                  \
        DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early,                     \
-                       vendor##device##hook, vendor, device, hook)
+               vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_HEADER(vendor, device, hook)                 \
        DECLARE_PCI_FIXUP_SECTION(.pci_fixup_header,                    \
-                       vendor##device##hook, vendor, device, hook)
+               vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_FINAL(vendor, device, hook)                  \
        DECLARE_PCI_FIXUP_SECTION(.pci_fixup_final,                     \
-                       vendor##device##hook, vendor, device, hook)
+               vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_ENABLE(vendor, device, hook)                 \
        DECLARE_PCI_FIXUP_SECTION(.pci_fixup_enable,                    \
-                       vendor##device##hook, vendor, device, hook)
+               vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_RESUME(vendor, device, hook)                 \
        DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume,                    \
-                       resume##vendor##device##hook, vendor, device, hook)
+               resume##vendor##device##hook, vendor, device,           \
+               PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_RESUME_EARLY(vendor, device, hook)           \
        DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early,              \
-                       resume_early##vendor##device##hook, vendor, device, hook)
+               resume_early##vendor##device##hook, vendor, device,     \
+               PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_SUSPEND(vendor, device, hook)                        \
        DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,                   \
-                       suspend##vendor##device##hook, vendor, device, hook)
+               suspend##vendor##device##hook, vendor, device,          \
+               PCI_ANY_ID, 0, hook)
 
 #ifdef CONFIG_PCI_QUIRKS
 void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
index e41a10f5ae83814c602002af8abdf59dba3d2b6c..4b608f5434128413a4b491891d820a17cbdbe33e 100644 (file)
 #define  PCI_EXP_TYPE_UPSTREAM 0x5     /* Upstream Port */
 #define  PCI_EXP_TYPE_DOWNSTREAM 0x6   /* Downstream Port */
 #define  PCI_EXP_TYPE_PCI_BRIDGE 0x7   /* PCI/PCI-X Bridge */
+#define  PCI_EXP_TYPE_PCIE_BRIDGE 0x8  /* PCI/PCI-X to PCIE Bridge */
 #define  PCI_EXP_TYPE_RC_END   0x9     /* Root Complex Integrated Endpoint */
 #define  PCI_EXP_TYPE_RC_EC    0xa     /* Root Complex Event Collector */
 #define PCI_EXP_FLAGS_SLOT     0x0100  /* Slot implemented */
index 7874a8a566386a02165ebc0bff474d8bf160f0c0..492a36d72829939c21f6c4ff3f5bd412956b2a95 100644 (file)
@@ -99,6 +99,8 @@ struct rpc_authops {
 
        struct rpc_cred *       (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int);
        struct rpc_cred *       (*crcreate)(struct rpc_auth*, struct auth_cred *, int);
+       int                     (*pipes_create)(struct rpc_auth *);
+       void                    (*pipes_destroy)(struct rpc_auth *);
 };
 
 struct rpc_credops {
index f7f3ce340c083f04b311f06aac1e8e34239c8016..969c0a671dbfbbab399f7472d501a93d75bbd8fc 100644 (file)
@@ -35,7 +35,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt);
 void xprt_free_bc_request(struct rpc_rqst *req);
 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
-void xprt_destroy_backchannel(struct rpc_xprt *, int max_reqs);
+void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs);
 int bc_send(struct rpc_rqst *req);
 
 /*
index 57531f8e5956dd07c83652789bbb9d7a8a40e399..f5fd6160dbca396835773609586e65071f9c78ab 100644 (file)
@@ -117,6 +117,7 @@ struct cache_detail {
                struct cache_detail_procfs procfs;
                struct cache_detail_pipefs pipefs;
        } u;
+       struct net              *net;
 };
 
 
@@ -197,11 +198,14 @@ extern void cache_flush(void);
 extern void cache_purge(struct cache_detail *detail);
 #define NEVER (0x7FFFFFFF)
 extern void __init cache_initialize(void);
-extern int cache_register(struct cache_detail *cd);
 extern int cache_register_net(struct cache_detail *cd, struct net *net);
-extern void cache_unregister(struct cache_detail *cd);
 extern void cache_unregister_net(struct cache_detail *cd, struct net *net);
 
+extern struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net);
+extern void cache_destroy_net(struct cache_detail *cd, struct net *net);
+
+extern void sunrpc_init_cache_detail(struct cache_detail *cd);
+extern void sunrpc_destroy_cache_detail(struct cache_detail *cd);
 extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
                                        umode_t, struct cache_detail *);
 extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
index 2c5993a17c3315423cbb3ba895fb8f24f4ac0bb3..523547ecfee2812c0f8d7fa069f7c8ac3f66ab2d 100644 (file)
@@ -35,14 +35,13 @@ struct rpc_clnt {
        struct list_head        cl_clients;     /* Global list of clients */
        struct list_head        cl_tasks;       /* List of tasks */
        spinlock_t              cl_lock;        /* spinlock */
-       struct rpc_xprt *       cl_xprt;        /* transport */
+       struct rpc_xprt __rcu * cl_xprt;        /* transport */
        struct rpc_procinfo *   cl_procinfo;    /* procedure info */
        u32                     cl_prog,        /* RPC program number */
                                cl_vers,        /* RPC version number */
                                cl_maxproc;     /* max procedure number */
 
-       char *                  cl_server;      /* server machine name */
-       char *                  cl_protname;    /* protocol name */
+       const char *            cl_protname;    /* protocol name */
        struct rpc_auth *       cl_auth;        /* authenticator */
        struct rpc_stat *       cl_stats;       /* per-program statistics */
        struct rpc_iostats *    cl_metrics;     /* per-client statistics */
@@ -57,12 +56,11 @@ struct rpc_clnt {
 
        int                     cl_nodelen;     /* nodename length */
        char                    cl_nodename[UNX_MAXNODENAME];
-       struct path             cl_path;
+       struct dentry *         cl_dentry;
        struct rpc_clnt *       cl_parent;      /* Points to parent of clones */
        struct rpc_rtt          cl_rtt_default;
        struct rpc_timeout      cl_timeout_default;
-       struct rpc_program *    cl_program;
-       char                    cl_inline_name[32];
+       const struct rpc_program *cl_program;
        char                    *cl_principal;  /* target to authenticate to */
 };
 
@@ -71,12 +69,12 @@ struct rpc_clnt {
  */
 #define RPC_MAXVERSION         4
 struct rpc_program {
-       char *                  name;           /* protocol name */
+       const char *            name;           /* protocol name */
        u32                     number;         /* program number */
        unsigned int            nrvers;         /* number of versions */
-       struct rpc_version **   version;        /* version array */
+       const struct rpc_version **     version;        /* version array */
        struct rpc_stat *       stats;          /* statistics */
-       char *                  pipe_dir_name;  /* path to rpc_pipefs dir */
+       const char *            pipe_dir_name;  /* path to rpc_pipefs dir */
 };
 
 struct rpc_version {
@@ -97,7 +95,7 @@ struct rpc_procinfo {
        unsigned int            p_count;        /* call count */
        unsigned int            p_timer;        /* Which RTT timer to use */
        u32                     p_statidx;      /* Which procedure to account */
-       char *                  p_name;         /* name of procedure */
+       const char *            p_name;         /* name of procedure */
 };
 
 #ifdef __KERNEL__
@@ -109,8 +107,8 @@ struct rpc_create_args {
        size_t                  addrsize;
        struct sockaddr         *saddress;
        const struct rpc_timeout *timeout;
-       char                    *servername;
-       struct rpc_program      *program;
+       const char              *servername;
+       const struct rpc_program *program;
        u32                     prognumber;     /* overrides program->number */
        u32                     version;
        rpc_authflavor_t        authflavor;
@@ -129,17 +127,18 @@ struct rpc_create_args {
 
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
-                               struct rpc_program *, u32);
+                               const struct rpc_program *, u32);
 void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 void           rpc_shutdown_client(struct rpc_clnt *);
 void           rpc_release_client(struct rpc_clnt *);
 void           rpc_task_release_client(struct rpc_task *);
 
-int            rpcb_create_local(void);
-void           rpcb_put_local(void);
-int            rpcb_register(u32, u32, int, unsigned short);
-int            rpcb_v4_register(const u32 program, const u32 version,
+int            rpcb_create_local(struct net *);
+void           rpcb_put_local(struct net *);
+int            rpcb_register(struct net *, u32, u32, int, unsigned short);
+int            rpcb_v4_register(struct net *net, const u32 program,
+                                const u32 version,
                                 const struct sockaddr *address,
                                 const char *netid);
 void           rpcb_getport_async(struct rpc_task *);
@@ -156,16 +155,19 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
 int            rpc_restart_call_prepare(struct rpc_task *);
 int            rpc_restart_call(struct rpc_task *);
 void           rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
+int            rpc_protocol(struct rpc_clnt *);
+struct net *   rpc_net_ns(struct rpc_clnt *);
 size_t         rpc_max_payload(struct rpc_clnt *);
 void           rpc_force_rebind(struct rpc_clnt *);
 size_t         rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 const char     *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
+int            rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t);
 
 size_t         rpc_ntop(const struct sockaddr *, char *, const size_t);
-size_t         rpc_pton(const char *, const size_t,
+size_t         rpc_pton(struct net *, const char *, const size_t,
                         struct sockaddr *, const size_t);
 char *         rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t);
-size_t         rpc_uaddr2sockaddr(const char *, const size_t,
+size_t         rpc_uaddr2sockaddr(struct net *, const char *, const size_t,
                                   struct sockaddr *, const size_t);
 
 static inline unsigned short rpc_get_port(const struct sockaddr *sap)
index c2786f20016f9184f0849c2ca9e94636efeaff9e..a76cc20d98ce21531a6fcbd70e861bbfc5055c12 100644 (file)
 /*
  * Enable RPC debugging/profiling.
  */
-#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_SUNRPC_DEBUG
 #define  RPC_DEBUG
 #endif
+#ifdef CONFIG_TRACEPOINTS
+#define RPC_TRACEPOINTS
+#endif
 /* #define  RPC_PROFILE */
 
 /*
@@ -47,15 +50,32 @@ extern unsigned int         nlm_debug;
 #endif
 
 #define dprintk(args...)       dfprintk(FACILITY, ## args)
+#define dprintk_rcu(args...)   dfprintk_rcu(FACILITY, ## args)
 
 #undef ifdebug
 #ifdef RPC_DEBUG                       
 # define ifdebug(fac)          if (unlikely(rpc_debug & RPCDBG_##fac))
-# define dfprintk(fac, args...)        do { ifdebug(fac) printk(args); } while(0)
+
+# define dfprintk(fac, args...)        \
+       do { \
+               ifdebug(fac) \
+                       printk(KERN_DEFAULT args); \
+       } while (0)
+
+# define dfprintk_rcu(fac, args...)    \
+       do { \
+               ifdebug(fac) { \
+                       rcu_read_lock(); \
+                       printk(KERN_DEFAULT args); \
+                       rcu_read_unlock(); \
+               } \
+       } while (0)
+
 # define RPC_IFDEBUG(x)                x
 #else
 # define ifdebug(fac)          if (0)
-# define dfprintk(fac, args...)        do ; while (0)
+# define dfprintk(fac, args...)        do {} while (0)
+# define dfprintk_rcu(fac, args...)    do {} while (0)
 # define RPC_IFDEBUG(x)
 #endif
 
index b6edbc0ea83dddcdc450fa05a06cc6face7d69b5..1565bbe86d51e77c2323ca26a04bbad8a35b9f22 100644 (file)
@@ -74,14 +74,16 @@ struct rpc_clnt;
 #ifdef CONFIG_PROC_FS
 
 struct rpc_iostats *   rpc_alloc_iostats(struct rpc_clnt *);
-void                   rpc_count_iostats(struct rpc_task *);
+void                   rpc_count_iostats(const struct rpc_task *,
+                                         struct rpc_iostats *);
 void                   rpc_print_iostats(struct seq_file *, struct rpc_clnt *);
 void                   rpc_free_iostats(struct rpc_iostats *);
 
 #else  /*  CONFIG_PROC_FS  */
 
 static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; }
-static inline void rpc_count_iostats(struct rpc_task *task) {}
+static inline void rpc_count_iostats(const struct rpc_task *task,
+                                    struct rpc_iostats *stats) {}
 static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {}
 static inline void rpc_free_iostats(struct rpc_iostats *stats) {}
 
index 2bb03d77375a26fb30c00fd1846f232a73e809bf..a7b422b33eda65583d175ccd494b58d23c7716ec 100644 (file)
@@ -21,21 +21,26 @@ struct rpc_pipe_ops {
        void (*destroy_msg)(struct rpc_pipe_msg *);
 };
 
-struct rpc_inode {
-       struct inode vfs_inode;
-       void *private;
+struct rpc_pipe {
        struct list_head pipe;
        struct list_head in_upcall;
        struct list_head in_downcall;
        int pipelen;
        int nreaders;
        int nwriters;
-       int nkern_readwriters;
-       wait_queue_head_t waitq;
 #define RPC_PIPE_WAIT_FOR_OPEN 1
        int flags;
        struct delayed_work queue_timeout;
        const struct rpc_pipe_ops *ops;
+       spinlock_t lock;
+       struct dentry *dentry;
+};
+
+struct rpc_inode {
+       struct inode vfs_inode;
+       void *private;
+       struct rpc_pipe *pipe;
+       wait_queue_head_t waitq;
 };
 
 static inline struct rpc_inode *
@@ -44,9 +49,28 @@ RPC_I(struct inode *inode)
        return container_of(inode, struct rpc_inode, vfs_inode);
 }
 
+enum {
+       SUNRPC_PIPEFS_NFS_PRIO,
+       SUNRPC_PIPEFS_RPC_PRIO,
+};
+
+extern int rpc_pipefs_notifier_register(struct notifier_block *);
+extern void rpc_pipefs_notifier_unregister(struct notifier_block *);
+
+enum {
+       RPC_PIPEFS_MOUNT,
+       RPC_PIPEFS_UMOUNT,
+};
+
+extern struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
+                                     const unsigned char *dir_name);
+extern void rpc_pipefs_init_net(struct net *net);
+extern struct super_block *rpc_get_sb_net(const struct net *net);
+extern void rpc_put_sb_net(const struct net *net);
+
 extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *,
                                       char __user *, size_t);
-extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
+extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *);
 
 struct rpc_clnt;
 extern struct dentry *rpc_create_client_dir(struct dentry *, struct qstr *, struct rpc_clnt *);
@@ -59,11 +83,13 @@ extern struct dentry *rpc_create_cache_dir(struct dentry *,
                                           struct cache_detail *);
 extern void rpc_remove_cache_dir(struct dentry *);
 
-extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *,
-                                const struct rpc_pipe_ops *, int flags);
+extern int rpc_rmdir(struct dentry *dentry);
+
+struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags);
+void rpc_destroy_pipe_data(struct rpc_pipe *pipe);
+extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *,
+                                       struct rpc_pipe *);
 extern int rpc_unlink(struct dentry *);
-extern struct vfsmount *rpc_get_mount(void);
-extern void rpc_put_mount(void);
 extern int register_rpc_pipefs(void);
 extern void unregister_rpc_pipefs(void);
 
index e7756896f3ca292f4398304f7761b691232df131..dc0c3cc3ada3f8ced03b7fb00b2b1772bb722034 100644 (file)
@@ -103,6 +103,7 @@ typedef void                        (*rpc_action)(struct rpc_task *);
 struct rpc_call_ops {
        void (*rpc_call_prepare)(struct rpc_task *, void *);
        void (*rpc_call_done)(struct rpc_task *, void *);
+       void (*rpc_count_stats)(struct rpc_task *, void *);
        void (*rpc_release)(void *);
 };
 
@@ -195,7 +196,7 @@ struct rpc_wait_queue {
        unsigned char           nr;                     /* # tasks remaining for cookie */
        unsigned short          qlen;                   /* total # tasks waiting in queue */
        struct rpc_timer        timer_list;
-#ifdef RPC_DEBUG
+#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS)
        const char *            name;
 #endif
 };
@@ -235,6 +236,9 @@ void                rpc_wake_up_queued_task(struct rpc_wait_queue *,
                                        struct rpc_task *);
 void           rpc_wake_up(struct rpc_wait_queue *);
 struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
+struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *,
+                                       bool (*)(struct rpc_task *, void *),
+                                       void *);
 void           rpc_wake_up_status(struct rpc_wait_queue *, int);
 int            rpc_queue_empty(struct rpc_wait_queue *);
 void           rpc_delay(struct rpc_task *, unsigned long);
@@ -244,7 +248,8 @@ int         rpciod_up(void);
 void           rpciod_down(void);
 int            __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *));
 #ifdef RPC_DEBUG
-void           rpc_show_tasks(void);
+struct net;
+void           rpc_show_tasks(struct net *);
 #endif
 int            rpc_init_mempool(void);
 void           rpc_destroy_mempool(void);
@@ -266,11 +271,22 @@ static inline int rpc_task_has_priority(struct rpc_task *task, unsigned char pri
        return (task->tk_priority + RPC_PRIORITY_LOW == prio);
 }
 
-#ifdef RPC_DEBUG
-static inline const char * rpc_qname(struct rpc_wait_queue *q)
+#if defined(RPC_DEBUG) || defined (RPC_TRACEPOINTS)
+static inline const char * rpc_qname(const struct rpc_wait_queue *q)
 {
        return ((q && q->name) ? q->name : "unknown");
 }
+
+static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q,
+               const char *name)
+{
+       q->name = name;
+}
+#else
+static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q,
+               const char *name)
+{
+}
 #endif
 
 #endif /* _LINUX_SUNRPC_SCHED_H_ */
index 680471d1f28a4300700883489c3b8b78335c07da..edc64219f92b1153b5f0c2517d06fa46d1228ea8 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/proc_fs.h>
 
 struct rpc_stat {
-       struct rpc_program *    program;
+       const struct rpc_program *program;
 
        unsigned int            netcnt,
                                netudpcnt,
@@ -58,24 +58,24 @@ void                        rpc_modcount(struct inode *, int);
 #endif
 
 #ifdef CONFIG_PROC_FS
-struct proc_dir_entry *        rpc_proc_register(struct rpc_stat *);
-void                   rpc_proc_unregister(const char *);
-void                   rpc_proc_zero(struct rpc_program *);
-struct proc_dir_entry *        svc_proc_register(struct svc_stat *,
+struct proc_dir_entry *        rpc_proc_register(struct net *,struct rpc_stat *);
+void                   rpc_proc_unregister(struct net *,const char *);
+void                   rpc_proc_zero(const struct rpc_program *);
+struct proc_dir_entry *        svc_proc_register(struct net *, struct svc_stat *,
                                          const struct file_operations *);
-void                   svc_proc_unregister(const char *);
+void                   svc_proc_unregister(struct net *, const char *);
 
 void                   svc_seq_show(struct seq_file *,
                                     const struct svc_stat *);
 #else
 
-static inline struct proc_dir_entry *rpc_proc_register(struct rpc_stat *s) { return NULL; }
-static inline void rpc_proc_unregister(const char *p) {}
-static inline void rpc_proc_zero(struct rpc_program *p) {}
+static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; }
+static inline void rpc_proc_unregister(struct net *net, const char *p) {}
+static inline void rpc_proc_zero(const struct rpc_program *p) {}
 
-static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s,
+static inline struct proc_dir_entry *svc_proc_register(struct net *net, struct svc_stat *s,
                                                       const struct file_operations *f) { return NULL; }
-static inline void svc_proc_unregister(const char *p) {}
+static inline void svc_proc_unregister(struct net *net, const char *p) {}
 
 static inline void svc_seq_show(struct seq_file *seq,
                                const struct svc_stat *st) {}
index 35b37b1e9299e37825fe79a281a29055aa4d273a..51b29ac45a8e7b26583df0217ab37a0d939ad6da 100644 (file)
@@ -84,7 +84,8 @@ struct svc_serv {
        unsigned int            sv_nrpools;     /* number of thread pools */
        struct svc_pool *       sv_pools;       /* array of thread pools */
 
-       void                    (*sv_shutdown)(struct svc_serv *serv);
+       void                    (*sv_shutdown)(struct svc_serv *serv,
+                                              struct net *net);
                                                /* Callback to use when last thread
                                                 * exits.
                                                 */
@@ -413,22 +414,24 @@ struct svc_procedure {
 /*
  * Function prototypes.
  */
-void svc_rpcb_cleanup(struct svc_serv *serv);
+int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
+void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
 struct svc_serv *svc_create(struct svc_program *, unsigned int,
-                           void (*shutdown)(struct svc_serv *));
+                           void (*shutdown)(struct svc_serv *, struct net *net));
 struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
                                        struct svc_pool *pool, int node);
 void              svc_exit_thread(struct svc_rqst *);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
-                       void (*shutdown)(struct svc_serv *),
+                       void (*shutdown)(struct svc_serv *, struct net *net),
                        svc_thread_fn, struct module *);
 int               svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 int               svc_pool_stats_open(struct svc_serv *serv, struct file *file);
 void              svc_destroy(struct svc_serv *);
+void              svc_shutdown_net(struct svc_serv *, struct net *);
 int               svc_process(struct svc_rqst *);
 int               bc_svc_process(struct svc_serv *, struct rpc_rqst *,
                        struct svc_rqst *);
-int               svc_register(const struct svc_serv *, const int,
+int               svc_register(const struct svc_serv *, struct net *, const int,
                                const unsigned short, const unsigned short);
 
 void              svc_wake_up(struct svc_serv *);
index dfa900948af79a6fbd277bbbb5b56c0c7842cec4..b3f64b12f1415f7e14d1f49cdeca4ccf7de20f0f 100644 (file)
@@ -121,7 +121,8 @@ void        svc_close_xprt(struct svc_xprt *xprt);
 int    svc_port_is_privileged(struct sockaddr *sin);
 int    svc_print_xprts(char *buf, int maxlen);
 struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
-                       const sa_family_t af, const unsigned short port);
+                       struct net *net, const sa_family_t af,
+                       const unsigned short port);
 int    svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen);
 
 static inline void svc_xprt_get(struct svc_xprt *xprt)
index 25d333c1b5717f3ee9f659168d805865ab420bd5..548790e9113b317dbc8de0c46a691df3c0030269 100644 (file)
@@ -135,6 +135,9 @@ extern void svcauth_unix_purge(void);
 extern void svcauth_unix_info_release(struct svc_xprt *xpt);
 extern int svcauth_unix_set_client(struct svc_rqst *rqstp);
 
+extern int unix_gid_cache_create(struct net *net);
+extern void unix_gid_cache_destroy(struct net *net);
+
 static inline unsigned long hash_str(char *name, int bits)
 {
        unsigned long hash = 0;
index 83bbee3f089cd7bd4491b5dd5edadc9f5e4c5207..7c32daa025eb07b644d8185a27c8ea10d8b7c55f 100644 (file)
@@ -18,6 +18,8 @@
 
 int gss_svc_init(void);
 void gss_svc_shutdown(void);
+int gss_svc_init_net(struct net *net);
+void gss_svc_shutdown_net(struct net *net);
 int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name);
 u32 svcauth_gss_flavor(struct auth_domain *dom);
 char *svc_gss_principal(struct svc_rqst *);
index c84e9741cb2a25471838c2c31503b8d550c8bfbf..cb4ac69e1f3356aeb96cdeed3ca83e106b45ba25 100644 (file)
@@ -34,7 +34,7 @@ struct svc_sock {
 /*
  * Function prototypes.
  */
-void           svc_close_all(struct svc_serv *);
+void           svc_close_net(struct svc_serv *, struct net *);
 int            svc_recv(struct svc_rqst *, long);
 int            svc_send(struct svc_rqst *);
 void           svc_drop(struct svc_rqst *);
index 15518a152ac3db6e973a95685d50d200e6a1f475..77d278defa70667011b5a8b08ba70394b23d1d84 100644 (file)
@@ -21,8 +21,8 @@
 
 #define RPC_MIN_SLOT_TABLE     (2U)
 #define RPC_DEF_SLOT_TABLE     (16U)
-#define RPC_MAX_SLOT_TABLE     (128U)
 #define RPC_MAX_SLOT_TABLE_LIMIT       (65536U)
+#define RPC_MAX_SLOT_TABLE     RPC_MAX_SLOT_TABLE_LIMIT
 
 /*
  * This describes a timeout strategy
@@ -219,13 +219,17 @@ struct rpc_xprt {
                                        connect_time,   /* jiffies waiting for connect */
                                        sends,          /* how many complete requests */
                                        recvs,          /* how many complete requests */
-                                       bad_xids;       /* lookup_rqst didn't find XID */
+                                       bad_xids,       /* lookup_rqst didn't find XID */
+                                       max_slots;      /* max rpc_slots used */
 
                unsigned long long      req_u,          /* average requests on the wire */
-                                       bklog_u;        /* backlog queue utilization */
+                                       bklog_u,        /* backlog queue utilization */
+                                       sending_u,      /* send q utilization */
+                                       pending_u;      /* pend q utilization */
        } stat;
 
        struct net              *xprt_net;
+       const char              *servername;
        const char              *address_strings[RPC_DISPLAY_MAX];
 };
 
@@ -255,6 +259,7 @@ struct xprt_create {
        struct sockaddr *       srcaddr;        /* optional local address */
        struct sockaddr *       dstaddr;        /* remote peer address */
        size_t                  addrlen;
+       const char              *servername;
        struct svc_xprt         *bc_xprt;       /* NFSv4.1 backchannel */
 };
 
index 3f14a02e9cc022dc5b4d276068110e62ff48b52a..1ad36cc25b2ed53756145457fa86dbbe6cacc009 100644 (file)
 int            init_socket_xprt(void);
 void           cleanup_socket_xprt(void);
 
-/*
- * RPC slot table sizes for UDP, TCP transports
- */
-extern unsigned int xprt_udp_slot_table_entries;
-extern unsigned int xprt_tcp_slot_table_entries;
-
-/*
- * Parameters for choosing a free port
- */
-extern unsigned int xprt_min_resvport;
-extern unsigned int xprt_max_resvport;
-
 #define RPC_MIN_RESVPORT       (1U)
 #define RPC_MAX_RESVPORT       (65535U)
 #define RPC_DEF_MIN_RESVPORT   (665U)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
new file mode 100644 (file)
index 0000000..43be87d
--- /dev/null
@@ -0,0 +1,177 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sunrpc
+
+#if !defined(_TRACE_SUNRPC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SUNRPC_H
+
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(rpc_task_status,
+
+       TP_PROTO(struct rpc_task *task),
+
+       TP_ARGS(task),
+
+       TP_STRUCT__entry(
+               __field(const struct rpc_task *, task)
+               __field(const struct rpc_clnt *, clnt)
+               __field(int, status)
+       ),
+
+       TP_fast_assign(
+               __entry->task = task;
+               __entry->clnt = task->tk_client;
+               __entry->status = task->tk_status;
+       ),
+
+       TP_printk("task:%p@%p, status %d",__entry->task, __entry->clnt, __entry->status)
+);
+
+DEFINE_EVENT(rpc_task_status, rpc_call_status,
+       TP_PROTO(struct rpc_task *task),
+
+       TP_ARGS(task)
+);
+
+DEFINE_EVENT(rpc_task_status, rpc_bind_status,
+       TP_PROTO(struct rpc_task *task),
+
+       TP_ARGS(task)
+);
+
+TRACE_EVENT(rpc_connect_status,
+       TP_PROTO(struct rpc_task *task, int status),
+
+       TP_ARGS(task, status),
+
+       TP_STRUCT__entry(
+               __field(const struct rpc_task *, task)
+               __field(const struct rpc_clnt *, clnt)
+               __field(int, status)
+       ),
+
+       TP_fast_assign(
+               __entry->task = task;
+               __entry->clnt = task->tk_client;
+               __entry->status = status;
+       ),
+
+       TP_printk("task:%p@%p, status %d",__entry->task, __entry->clnt, __entry->status)
+);
+
+DECLARE_EVENT_CLASS(rpc_task_running,
+
+       TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
+
+       TP_ARGS(clnt, task, action),
+
+       TP_STRUCT__entry(
+               __field(const struct rpc_clnt *, clnt)
+               __field(const struct rpc_task *, task)
+               __field(const void *, action)
+               __field(unsigned long, runstate)
+               __field(int, status)
+               __field(unsigned short, flags)
+               ),
+
+       TP_fast_assign(
+               __entry->clnt = clnt;
+               __entry->task = task;
+               __entry->action = action;
+               __entry->runstate = task->tk_runstate;
+               __entry->status = task->tk_status;
+               __entry->flags = task->tk_flags;
+               ),
+
+       TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d action=%pf",
+               __entry->task,
+               __entry->clnt,
+               __entry->flags,
+               __entry->runstate,
+               __entry->status,
+               __entry->action
+               )
+);
+
+DEFINE_EVENT(rpc_task_running, rpc_task_begin,
+
+       TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
+
+       TP_ARGS(clnt, task, action)
+
+);
+
+DEFINE_EVENT(rpc_task_running, rpc_task_run_action,
+
+       TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
+
+       TP_ARGS(clnt, task, action)
+
+);
+
+DEFINE_EVENT(rpc_task_running, rpc_task_complete,
+
+       TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
+
+       TP_ARGS(clnt, task, action)
+
+);
+
+DECLARE_EVENT_CLASS(rpc_task_queued,
+
+       TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q),
+
+       TP_ARGS(clnt, task, q),
+
+       TP_STRUCT__entry(
+               __field(const struct rpc_clnt *, clnt)
+               __field(const struct rpc_task *, task)
+               __field(unsigned long, timeout)
+               __field(unsigned long, runstate)
+               __field(int, status)
+               __field(unsigned short, flags)
+               __string(q_name, rpc_qname(q))
+               ),
+
+       TP_fast_assign(
+               __entry->clnt = clnt;
+               __entry->task = task;
+               __entry->timeout = task->tk_timeout;
+               __entry->runstate = task->tk_runstate;
+               __entry->status = task->tk_status;
+               __entry->flags = task->tk_flags;
+               __assign_str(q_name, rpc_qname(q));
+               ),
+
+       TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s",
+               __entry->task,
+               __entry->clnt,
+               __entry->flags,
+               __entry->runstate,
+               __entry->status,
+               __entry->timeout,
+               __get_str(q_name)
+               )
+);
+
+DEFINE_EVENT(rpc_task_queued, rpc_task_sleep,
+
+       TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q),
+
+       TP_ARGS(clnt, task, q)
+
+);
+
+DEFINE_EVENT(rpc_task_queued, rpc_task_wakeup,
+
+       TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q),
+
+       TP_ARGS(clnt, task, q)
+
+);
+
+#endif /* _TRACE_SUNRPC_H */
+
+#include <trace/define_trace.h>
index 0d7c08784efbdd11da5634bf1d4158641089e0b3..3f88a45e6f0abdf08e8fff7dfae0dbd3210f6eec 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/delay.h>
 #include <linux/sched.h>
 #include <linux/sysrq.h>
+#include <linux/reboot.h>
 #include <linux/init.h>
 #include <linux/kgdb.h>
 #include <linux/kdb.h>
@@ -75,6 +76,8 @@ static int                    exception_level;
 struct kgdb_io         *dbg_io_ops;
 static DEFINE_SPINLOCK(kgdb_registration_lock);
 
+/* Action for the reboot notifiter, a global allow kdb to change it */
+static int kgdbreboot;
 /* kgdb console driver is loaded */
 static int kgdb_con_registered;
 /* determine if kgdb console output should be used */
@@ -96,6 +99,7 @@ static int __init opt_kgdb_con(char *str)
 early_param("kgdbcon", opt_kgdb_con);
 
 module_param(kgdb_use_con, int, 0644);
+module_param(kgdbreboot, int, 0644);
 
 /*
  * Holds information about breakpoints in a kernel. These breakpoints are
@@ -784,6 +788,33 @@ void __init dbg_late_init(void)
        kdb_init(KDB_INIT_FULL);
 }
 
+static int
+dbg_notify_reboot(struct notifier_block *this, unsigned long code, void *x)
+{
+       /*
+        * Take the following action on reboot notify depending on value:
+        *    1 == Enter debugger
+        *    0 == [the default] detatch debug client
+        *   -1 == Do nothing... and use this until the board resets
+        */
+       switch (kgdbreboot) {
+       case 1:
+               kgdb_breakpoint();
+       case -1:
+               goto done;
+       }
+       if (!dbg_kdb_mode)
+               gdbstub_exit(code);
+done:
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block dbg_reboot_notifier = {
+       .notifier_call          = dbg_notify_reboot,
+       .next                   = NULL,
+       .priority               = INT_MAX,
+};
+
 static void kgdb_register_callbacks(void)
 {
        if (!kgdb_io_module_registered) {
@@ -791,6 +822,7 @@ static void kgdb_register_callbacks(void)
                kgdb_arch_init();
                if (!dbg_is_early)
                        kgdb_arch_late();
+               register_reboot_notifier(&dbg_reboot_notifier);
                atomic_notifier_chain_register(&panic_notifier_list,
                                               &kgdb_panic_event_nb);
 #ifdef CONFIG_MAGIC_SYSRQ
@@ -812,6 +844,7 @@ static void kgdb_unregister_callbacks(void)
         */
        if (kgdb_io_module_registered) {
                kgdb_io_module_registered = 0;
+               unregister_reboot_notifier(&dbg_reboot_notifier);
                atomic_notifier_chain_unregister(&panic_notifier_list,
                                               &kgdb_panic_event_nb);
                kgdb_arch_exit();
index c22d8c28ad848c63003ffb00bf235d9d0ab34649..ce615e064482c00fcfb6c98cd4b3f4fa98435ba6 100644 (file)
@@ -1111,6 +1111,13 @@ void gdbstub_exit(int status)
        unsigned char checksum, ch, buffer[3];
        int loop;
 
+       if (!kgdb_connected)
+               return;
+       kgdb_connected = 0;
+
+       if (!dbg_io_ops || dbg_kdb_mode)
+               return;
+
        buffer[0] = 'W';
        buffer[1] = hex_asc_hi(status);
        buffer[2] = hex_asc_lo(status);
@@ -1129,5 +1136,6 @@ void gdbstub_exit(int status)
        dbg_io_ops->write_char(hex_asc_lo(checksum));
 
        /* make sure the output is flushed, lest the bootloader clobber it */
-       dbg_io_ops->flush();
+       if (dbg_io_ops->flush)
+               dbg_io_ops->flush();
 }
index 20059ef4459a4ff293428337d9936ff438a8eb96..8418c2f8ec5dd255d18794ca13cf895a3b588a76 100644 (file)
@@ -153,6 +153,13 @@ static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp)
        } else {
                kdb_printf("%s: failed to set breakpoint at 0x%lx\n",
                           __func__, bp->bp_addr);
+#ifdef CONFIG_DEBUG_RODATA
+               if (!bp->bp_type) {
+                       kdb_printf("Software breakpoints are unavailable.\n"
+                                  "  Change the kernel CONFIG_DEBUG_RODATA=n\n"
+                                  "  OR use hw breaks: help bph\n");
+               }
+#endif
                return 1;
        }
        return 0;
index 4802eb5840e1a138057444fbb7e1a7953a507d96..9b5f17da1c560e65fe945a9ab580f4de87ccb1ac 100644 (file)
@@ -689,7 +689,7 @@ kdb_printit:
        if (!dbg_kdb_mode && kgdb_connected) {
                gdbstub_msg_write(kdb_buffer, retlen);
        } else {
-               if (!dbg_io_ops->is_console) {
+               if (dbg_io_ops && !dbg_io_ops->is_console) {
                        len = strlen(kdb_buffer);
                        cp = kdb_buffer;
                        while (len--) {
index 4bca634975c0ed008de18d415e348a4cbab966af..118527aa60eae183f6d3b882ff2102a0c9a6012a 100644 (file)
@@ -25,6 +25,7 @@
 #define KBD_STAT_MOUSE_OBF     0x20    /* Mouse output buffer full */
 
 static int kbd_exists;
+static int kbd_last_ret;
 
 /*
  * Check if the keyboard controller has a keypress for us.
@@ -90,8 +91,11 @@ int kdb_get_kbd_char(void)
                return -1;
        }
 
-       if ((scancode & 0x80) != 0)
+       if ((scancode & 0x80) != 0) {
+               if (scancode == 0x9c)
+                       kbd_last_ret = 0;
                return -1;
+       }
 
        scancode &= 0x7f;
 
@@ -178,35 +182,82 @@ int kdb_get_kbd_char(void)
                return -1;      /* ignore unprintables */
        }
 
-       if ((scancode & 0x7f) == 0x1c) {
-               /*
-                * enter key.  All done.  Absorb the release scancode.
-                */
+       if (scancode == 0x1c) {
+               kbd_last_ret = 1;
+               return 13;
+       }
+
+       return keychar & 0xff;
+}
+EXPORT_SYMBOL_GPL(kdb_get_kbd_char);
+
+/*
+ * Best effort cleanup of ENTER break codes on leaving KDB. Called on
+ * exiting KDB, when we know we processed an ENTER or KP ENTER scan
+ * code.
+ */
+void kdb_kbd_cleanup_state(void)
+{
+       int scancode, scanstatus;
+
+       /*
+        * Nothing to clean up, since either
+        * ENTER was never pressed, or has already
+        * gotten cleaned up.
+        */
+       if (!kbd_last_ret)
+               return;
+
+       kbd_last_ret = 0;
+       /*
+        * Enter key. Need to absorb the break code here, lest it gets
+        * leaked out if we exit KDB as the result of processing 'g'.
+        *
+        * This has several interesting implications:
+        * + Need to handle KP ENTER, which has break code 0xe0 0x9c.
+        * + Need to handle repeat ENTER and repeat KP ENTER. Repeats
+        *   only get a break code at the end of the repeated
+        *   sequence. This means we can't propagate the repeated key
+        *   press, and must swallow it away.
+        * + Need to handle possible PS/2 mouse input.
+        * + Need to handle mashed keys.
+        */
+
+       while (1) {
                while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0)
-                       ;
+                       cpu_relax();
 
                /*
-                * Fetch the scancode
+                * Fetch the scancode.
                 */
                scancode = inb(KBD_DATA_REG);
                scanstatus = inb(KBD_STATUS_REG);
 
-               while (scanstatus & KBD_STAT_MOUSE_OBF) {
-                       scancode = inb(KBD_DATA_REG);
-                       scanstatus = inb(KBD_STATUS_REG);
-               }
+               /*
+                * Skip mouse input.
+                */
+               if (scanstatus & KBD_STAT_MOUSE_OBF)
+                       continue;
 
-               if (scancode != 0x9c) {
-                       /*
-                        * Wasn't an enter-release,  why not?
-                        */
-                       kdb_printf("kdb: expected enter got 0x%x status 0x%x\n",
-                              scancode, scanstatus);
-               }
+               /*
+                * If we see 0xe0, this is either a break code for KP
+                * ENTER, or a repeat make for KP ENTER. Either way,
+                * since the second byte is equivalent to an ENTER,
+                * skip the 0xe0 and try again.
+                *
+                * If we see 0x1c, this must be a repeat ENTER or KP
+                * ENTER (and we swallowed 0xe0 before). Try again.
+                *
+                * We can also see make and break codes for other keys
+                * mashed before or after pressing ENTER. Thus, if we
+                * see anything other than 0x9c, we have to try again.
+                *
+                * Note, if you held some key as ENTER was depressed,
+                * that break code would get leaked out.
+                */
+               if (scancode != 0x9c)
+                       continue;
 
-               return 13;
+               return;
        }
-
-       return keychar & 0xff;
 }
-EXPORT_SYMBOL_GPL(kdb_get_kbd_char);
index e2ae7349437f2b1a36879be0ca713b9ea4e8948b..67b847dfa2bb64b58d30db51460f22243142ee2d 100644 (file)
@@ -1400,6 +1400,9 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
        if (KDB_STATE(DOING_SS))
                KDB_STATE_CLEAR(SSBPT);
 
+       /* Clean up any keyboard devices before leaving */
+       kdb_kbd_cleanup_state();
+
        return result;
 }
 
index e381d105b40b827c71660d6f5265d197dba18c9f..47c4e56e513ba72ec485afdaa2ad0e30bac6e30f 100644 (file)
@@ -246,6 +246,13 @@ extern void debug_kusage(void);
 
 extern void kdb_set_current_task(struct task_struct *);
 extern struct task_struct *kdb_current_task;
+
+#ifdef CONFIG_KDB_KEYBOARD
+extern void kdb_kbd_cleanup_state(void);
+#else /* ! CONFIG_KDB_KEYBOARD */
+#define kdb_kbd_cleanup_state()
+#endif /* ! CONFIG_KDB_KEYBOARD */
+
 #ifdef CONFIG_MODULES
 extern struct list_head *kdb_modules;
 #endif /* CONFIG_MODULES */
index 49f15ef0a99a3f7f78adcd687f3ed54c421b2a0b..7658fd6536dd241c78bca77415b49994339b8516 100644 (file)
@@ -2817,7 +2817,7 @@ loop_again:
                                testorder = 0;
 
                        if ((buffer_heads_over_limit && is_highmem_idx(i)) ||
-                                   !zone_watermark_ok_safe(zone, order,
+                                   !zone_watermark_ok_safe(zone, testorder,
                                        high_wmark_pages(zone) + balance_gap,
                                        end_zone, 0)) {
                                shrink_zone(priority, zone, &sc);
index 0f3eb7d79a2da7ae3ea9102b5f9b1501c1b6690b..452db7090d18b39995a302843d05eeed0042414d 100644 (file)
@@ -3560,7 +3560,8 @@ EXPORT_SYMBOL(napi_gro_receive);
 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 {
        __skb_pull(skb, skb_headlen(skb));
-       skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+       /* restore the reserve we had after netdev_alloc_skb_ip_align() */
+       skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
        skb->vlan_tci = 0;
        skb->dev = napi->dev;
        skb->skb_iif = 0;
index e41c40f48cfe053f28baa3fe9dcf4179ce5a1f37..d4fad5c774471529dbe76703a7c4e95d2b694b6e 100644 (file)
@@ -1079,6 +1079,7 @@ __be32 inet_confirm_addr(struct in_device *in_dev,
 
        return addr;
 }
+EXPORT_SYMBOL(inet_confirm_addr);
 
 /*
  *     Device notifier
index 0e58f09e59fb345501123159952027f8f854e539..851acec852d284bbe61f7fc9fd9b1cf2ab4bb1f1 100644 (file)
@@ -52,7 +52,7 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
 static struct nf_hook_ops *filter_ops __read_mostly;
 
 /* Default to forward because I got too much mail already. */
-static bool forward = NF_ACCEPT;
+static bool forward = true;
 module_param(forward, bool, 0000);
 
 static int __net_init iptable_filter_net_init(struct net *net)
@@ -64,7 +64,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
                return -ENOMEM;
        /* Entry 1 is the FORWARD hook */
        ((struct ipt_standard *)repl->entries)[1].target.verdict =
-               -forward - 1;
+               forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
 
        net->ipv4.iptable_filter =
                ipt_register_table(net, &packet_filter, repl);
@@ -88,11 +88,6 @@ static int __init iptable_filter_init(void)
 {
        int ret;
 
-       if (forward < 0 || forward > NF_MAX_VERDICT) {
-               pr_err("iptables forward must be 0 or 1\n");
-               return -EINVAL;
-       }
-
        ret = register_pernet_subsys(&iptable_filter_net_ops);
        if (ret < 0)
                return ret;
index a8f6da97e3b252368469065ce0159295eb00e96b..325e59a0224ffa3f0b08e7474628d614e2d3ee49 100644 (file)
@@ -44,7 +44,7 @@ ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
 static struct nf_hook_ops *filter_ops __read_mostly;
 
 /* Default to forward because I got too much mail already. */
-static bool forward = NF_ACCEPT;
+static bool forward = true;
 module_param(forward, bool, 0000);
 
 static int __net_init ip6table_filter_net_init(struct net *net)
@@ -56,7 +56,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
                return -ENOMEM;
        /* Entry 1 is the FORWARD hook */
        ((struct ip6t_standard *)repl->entries)[1].target.verdict =
-               -forward - 1;
+               forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
 
        net->ipv6.ip6table_filter =
                ip6t_register_table(net, &packet_filter, repl);
@@ -80,11 +80,6 @@ static int __init ip6table_filter_init(void)
 {
        int ret;
 
-       if (forward < 0 || forward > NF_MAX_VERDICT) {
-               pr_err("iptables forward must be 0 or 1\n");
-               return -EINVAL;
-       }
-
        ret = register_pernet_subsys(&ip6table_filter_net_ops);
        if (ret < 0)
                return ret;
index 9b071910b4ba6a31ea4edc5b5dd53c84375c7f23..1addd9f3f40a05938273ce0a99c6d5d9c5d940bb 100644 (file)
@@ -1845,3 +1845,4 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
 MODULE_DESCRIPTION("PPP over L2TP over UDP");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(PPPOL2TP_DRV_VERSION);
+MODULE_ALIAS("pppox-proto-" __stringify(PX_PROTO_OL2TP));
index 2560e7b441c60a2b7a8c76aa6de3c4d62bd92035..7c94aedd0912777d6d0956f0f5b048f0f62ab97d 100644 (file)
@@ -597,7 +597,7 @@ int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap,
                        iter = iter->next;
                        iter_max_spot = iter->startbit + NETLBL_CATMAP_SIZE;
                }
-               ret_val = netlbl_secattr_catmap_setbit(iter, spot, GFP_ATOMIC);
+               ret_val = netlbl_secattr_catmap_setbit(iter, spot, flags);
        }
 
        return ret_val;
index 51c868923f64d6a0e92fd741d73cd258fcc78089..a1e116277477541606e51e0c4af31aa53af4b673 100644 (file)
@@ -749,7 +749,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
        int ret;
 
        /* XXX too lazy? */
-       ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL);
+       ic = kzalloc(sizeof(struct rds_ib_connection), gfp);
        if (!ic)
                return -ENOMEM;
 
index 9556d2895f7a35c72135989520a186618ed4e6bf..a91e1db62ee6a1833e65372989b0644d980899e6 100644 (file)
@@ -694,7 +694,7 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
        unsigned long flags;
 
        /* XXX too lazy? */
-       ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL);
+       ic = kzalloc(sizeof(struct rds_iw_connection), gfp);
        if (!ic)
                return -ENOMEM;
 
index 87ff2a8a454b33fa32972bd83b84269cb077a74a..6b12b68541ae96fb8be76e72cb8d0e6f8c89abee 100644 (file)
@@ -121,7 +121,7 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
        struct rds_loop_connection *lc;
        unsigned long flags;
 
-       lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL);
+       lc = kzalloc(sizeof(struct rds_loop_connection), gfp);
        if (!lc)
                return -ENOMEM;
 
index ffd243d09188dee10d2dffa34d642f7c941b9013..9fe8857d8d596e5eb59146416396b834b195b2c1 100644 (file)
@@ -39,3 +39,16 @@ config RPCSEC_GSS_KRB5
          Kerberos support should be installed.
 
          If unsure, say Y.
+
+config SUNRPC_DEBUG
+       bool "RPC: Enable dprintk debugging"
+       depends on SUNRPC && SYSCTL
+       help
+         This option enables a sysctl-based debugging interface
+         that is be used by the 'rpcdebug' utility to turn on or off
+         logging of different aspects of the kernel RPC activity.
+
+         Disabling this option will make your kernel slightly smaller,
+         but makes troubleshooting NFS issues significantly harder.
+
+         If unsure, say Y.
index ee77742e0ed6d3dcc89401808028b0228a666aaf..d11418f97f1fa58cb04b3badb1a03d21b91af7a5 100644 (file)
@@ -156,8 +156,9 @@ static size_t rpc_pton4(const char *buf, const size_t buflen,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int rpc_parse_scope_id(const char *buf, const size_t buflen,
-                             const char *delim, struct sockaddr_in6 *sin6)
+static int rpc_parse_scope_id(struct net *net, const char *buf,
+                             const size_t buflen, const char *delim,
+                             struct sockaddr_in6 *sin6)
 {
        char *p;
        size_t len;
@@ -177,7 +178,7 @@ static int rpc_parse_scope_id(const char *buf, const size_t buflen,
                unsigned long scope_id = 0;
                struct net_device *dev;
 
-               dev = dev_get_by_name(&init_net, p);
+               dev = dev_get_by_name(net, p);
                if (dev != NULL) {
                        scope_id = dev->ifindex;
                        dev_put(dev);
@@ -197,7 +198,7 @@ static int rpc_parse_scope_id(const char *buf, const size_t buflen,
        return 0;
 }
 
-static size_t rpc_pton6(const char *buf, const size_t buflen,
+static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen,
                        struct sockaddr *sap, const size_t salen)
 {
        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
@@ -213,14 +214,14 @@ static size_t rpc_pton6(const char *buf, const size_t buflen,
        if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0)
                return 0;
 
-       if (!rpc_parse_scope_id(buf, buflen, delim, sin6))
+       if (!rpc_parse_scope_id(net, buf, buflen, delim, sin6))
                return 0;
 
        sin6->sin6_family = AF_INET6;
        return sizeof(struct sockaddr_in6);
 }
 #else
-static size_t rpc_pton6(const char *buf, const size_t buflen,
+static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen,
                        struct sockaddr *sap, const size_t salen)
 {
        return 0;
@@ -229,6 +230,7 @@ static size_t rpc_pton6(const char *buf, const size_t buflen,
 
 /**
  * rpc_pton - Construct a sockaddr in @sap
+ * @net: applicable network namespace
  * @buf: C string containing presentation format IP address
  * @buflen: length of presentation address in bytes
  * @sap: buffer into which to plant socket address
@@ -241,14 +243,14 @@ static size_t rpc_pton6(const char *buf, const size_t buflen,
  * socket address, if successful.  Returns zero if an error
  * occurred.
  */
-size_t rpc_pton(const char *buf, const size_t buflen,
+size_t rpc_pton(struct net *net, const char *buf, const size_t buflen,
                struct sockaddr *sap, const size_t salen)
 {
        unsigned int i;
 
        for (i = 0; i < buflen; i++)
                if (buf[i] == ':')
-                       return rpc_pton6(buf, buflen, sap, salen);
+                       return rpc_pton6(net, buf, buflen, sap, salen);
        return rpc_pton4(buf, buflen, sap, salen);
 }
 EXPORT_SYMBOL_GPL(rpc_pton);
@@ -295,6 +297,7 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags)
 
 /**
  * rpc_uaddr2sockaddr - convert a universal address to a socket address.
+ * @net: applicable network namespace
  * @uaddr: C string containing universal address to convert
  * @uaddr_len: length of universal address string
  * @sap: buffer into which to plant socket address
@@ -306,8 +309,9 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags)
  * Returns the size of the socket address if successful; otherwise
  * zero is returned.
  */
-size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len,
-                         struct sockaddr *sap, const size_t salen)
+size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr,
+                         const size_t uaddr_len, struct sockaddr *sap,
+                         const size_t salen)
 {
        char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')];
        unsigned long portlo, porthi;
@@ -339,7 +343,7 @@ size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len,
        port = (unsigned short)((porthi << 8) | portlo);
 
        *c = '\0';
-       if (rpc_pton(buf, strlen(buf), sap, salen) == 0)
+       if (rpc_pton(net, buf, strlen(buf), sap, salen) == 0)
                return 0;
 
        switch (sap->sa_family) {
index affa631ac1abe3d42f75635a30e1f066dccbfb44..d3ad81f8da5b79551c36b17a7d53007406946699 100644 (file)
@@ -81,7 +81,7 @@ struct gss_auth {
         * mechanism (for example, "krb5") and exists for
         * backwards-compatibility with older gssd's.
         */
-       struct dentry *dentry[2];
+       struct rpc_pipe *pipe[2];
 };
 
 /* pipe_version >= 0 if and only if someone has a pipe open. */
@@ -112,7 +112,7 @@ gss_put_ctx(struct gss_cl_ctx *ctx)
 /* gss_cred_set_ctx:
  * called by gss_upcall_callback and gss_create_upcall in order
  * to set the gss context. The actual exchange of an old context
- * and a new one is protected by the inode->i_lock.
+ * and a new one is protected by the pipe->lock.
  */
 static void
 gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
@@ -251,7 +251,7 @@ struct gss_upcall_msg {
        struct rpc_pipe_msg msg;
        struct list_head list;
        struct gss_auth *auth;
-       struct rpc_inode *inode;
+       struct rpc_pipe *pipe;
        struct rpc_wait_queue rpc_waitqueue;
        wait_queue_head_t waitqueue;
        struct gss_cl_ctx *ctx;
@@ -294,10 +294,10 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
 }
 
 static struct gss_upcall_msg *
-__gss_find_upcall(struct rpc_inode *rpci, uid_t uid)
+__gss_find_upcall(struct rpc_pipe *pipe, uid_t uid)
 {
        struct gss_upcall_msg *pos;
-       list_for_each_entry(pos, &rpci->in_downcall, list) {
+       list_for_each_entry(pos, &pipe->in_downcall, list) {
                if (pos->uid != uid)
                        continue;
                atomic_inc(&pos->count);
@@ -315,18 +315,17 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid)
 static inline struct gss_upcall_msg *
 gss_add_msg(struct gss_upcall_msg *gss_msg)
 {
-       struct rpc_inode *rpci = gss_msg->inode;
-       struct inode *inode = &rpci->vfs_inode;
+       struct rpc_pipe *pipe = gss_msg->pipe;
        struct gss_upcall_msg *old;
 
-       spin_lock(&inode->i_lock);
-       old = __gss_find_upcall(rpci, gss_msg->uid);
+       spin_lock(&pipe->lock);
+       old = __gss_find_upcall(pipe, gss_msg->uid);
        if (old == NULL) {
                atomic_inc(&gss_msg->count);
-               list_add(&gss_msg->list, &rpci->in_downcall);
+               list_add(&gss_msg->list, &pipe->in_downcall);
        } else
                gss_msg = old;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&pipe->lock);
        return gss_msg;
 }
 
@@ -342,14 +341,14 @@ __gss_unhash_msg(struct gss_upcall_msg *gss_msg)
 static void
 gss_unhash_msg(struct gss_upcall_msg *gss_msg)
 {
-       struct inode *inode = &gss_msg->inode->vfs_inode;
+       struct rpc_pipe *pipe = gss_msg->pipe;
 
        if (list_empty(&gss_msg->list))
                return;
-       spin_lock(&inode->i_lock);
+       spin_lock(&pipe->lock);
        if (!list_empty(&gss_msg->list))
                __gss_unhash_msg(gss_msg);
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&pipe->lock);
 }
 
 static void
@@ -376,11 +375,11 @@ gss_upcall_callback(struct rpc_task *task)
        struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred,
                        struct gss_cred, gc_base);
        struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
-       struct inode *inode = &gss_msg->inode->vfs_inode;
+       struct rpc_pipe *pipe = gss_msg->pipe;
 
-       spin_lock(&inode->i_lock);
+       spin_lock(&pipe->lock);
        gss_handle_downcall_result(gss_cred, gss_msg);
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&pipe->lock);
        task->tk_status = gss_msg->msg.errno;
        gss_release_msg(gss_msg);
 }
@@ -450,7 +449,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
                kfree(gss_msg);
                return ERR_PTR(vers);
        }
-       gss_msg->inode = RPC_I(gss_auth->dentry[vers]->d_inode);
+       gss_msg->pipe = gss_auth->pipe[vers];
        INIT_LIST_HEAD(&gss_msg->list);
        rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
        init_waitqueue_head(&gss_msg->waitqueue);
@@ -474,8 +473,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr
                return gss_new;
        gss_msg = gss_add_msg(gss_new);
        if (gss_msg == gss_new) {
-               struct inode *inode = &gss_new->inode->vfs_inode;
-               int res = rpc_queue_upcall(inode, &gss_new->msg);
+               int res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg);
                if (res) {
                        gss_unhash_msg(gss_new);
                        gss_msg = ERR_PTR(res);
@@ -506,7 +504,7 @@ gss_refresh_upcall(struct rpc_task *task)
        struct gss_cred *gss_cred = container_of(cred,
                        struct gss_cred, gc_base);
        struct gss_upcall_msg *gss_msg;
-       struct inode *inode;
+       struct rpc_pipe *pipe;
        int err = 0;
 
        dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid,
@@ -524,8 +522,8 @@ gss_refresh_upcall(struct rpc_task *task)
                err = PTR_ERR(gss_msg);
                goto out;
        }
-       inode = &gss_msg->inode->vfs_inode;
-       spin_lock(&inode->i_lock);
+       pipe = gss_msg->pipe;
+       spin_lock(&pipe->lock);
        if (gss_cred->gc_upcall != NULL)
                rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
        else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
@@ -538,7 +536,7 @@ gss_refresh_upcall(struct rpc_task *task)
                gss_handle_downcall_result(gss_cred, gss_msg);
                err = gss_msg->msg.errno;
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&pipe->lock);
        gss_release_msg(gss_msg);
 out:
        dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n",
@@ -549,7 +547,7 @@ out:
 static inline int
 gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
 {
-       struct inode *inode;
+       struct rpc_pipe *pipe;
        struct rpc_cred *cred = &gss_cred->gc_base;
        struct gss_upcall_msg *gss_msg;
        DEFINE_WAIT(wait);
@@ -573,14 +571,14 @@ retry:
                err = PTR_ERR(gss_msg);
                goto out;
        }
-       inode = &gss_msg->inode->vfs_inode;
+       pipe = gss_msg->pipe;
        for (;;) {
                prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE);
-               spin_lock(&inode->i_lock);
+               spin_lock(&pipe->lock);
                if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) {
                        break;
                }
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&pipe->lock);
                if (fatal_signal_pending(current)) {
                        err = -ERESTARTSYS;
                        goto out_intr;
@@ -591,7 +589,7 @@ retry:
                gss_cred_set_ctx(cred, gss_msg->ctx);
        else
                err = gss_msg->msg.errno;
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&pipe->lock);
 out_intr:
        finish_wait(&gss_msg->waitqueue, &wait);
        gss_release_msg(gss_msg);
@@ -609,7 +607,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
        const void *p, *end;
        void *buf;
        struct gss_upcall_msg *gss_msg;
-       struct inode *inode = filp->f_path.dentry->d_inode;
+       struct rpc_pipe *pipe = RPC_I(filp->f_dentry->d_inode)->pipe;
        struct gss_cl_ctx *ctx;
        uid_t uid;
        ssize_t err = -EFBIG;
@@ -639,14 +637,14 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 
        err = -ENOENT;
        /* Find a matching upcall */
-       spin_lock(&inode->i_lock);
-       gss_msg = __gss_find_upcall(RPC_I(inode), uid);
+       spin_lock(&pipe->lock);
+       gss_msg = __gss_find_upcall(pipe, uid);
        if (gss_msg == NULL) {
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&pipe->lock);
                goto err_put_ctx;
        }
        list_del_init(&gss_msg->list);
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&pipe->lock);
 
        p = gss_fill_context(p, end, ctx, gss_msg->auth->mech);
        if (IS_ERR(p)) {
@@ -674,9 +672,9 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
        err = mlen;
 
 err_release_msg:
-       spin_lock(&inode->i_lock);
+       spin_lock(&pipe->lock);
        __gss_unhash_msg(gss_msg);
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&pipe->lock);
        gss_release_msg(gss_msg);
 err_put_ctx:
        gss_put_ctx(ctx);
@@ -722,23 +720,23 @@ static int gss_pipe_open_v1(struct inode *inode)
 static void
 gss_pipe_release(struct inode *inode)
 {
-       struct rpc_inode *rpci = RPC_I(inode);
+       struct rpc_pipe *pipe = RPC_I(inode)->pipe;
        struct gss_upcall_msg *gss_msg;
 
 restart:
-       spin_lock(&inode->i_lock);
-       list_for_each_entry(gss_msg, &rpci->in_downcall, list) {
+       spin_lock(&pipe->lock);
+       list_for_each_entry(gss_msg, &pipe->in_downcall, list) {
 
                if (!list_empty(&gss_msg->msg.list))
                        continue;
                gss_msg->msg.errno = -EPIPE;
                atomic_inc(&gss_msg->count);
                __gss_unhash_msg(gss_msg);
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&pipe->lock);
                gss_release_msg(gss_msg);
                goto restart;
        }
-       spin_unlock(&inode->i_lock);
+       spin_unlock(&pipe->lock);
 
        put_pipe_version();
 }
@@ -759,6 +757,75 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
        }
 }
 
+static void gss_pipes_dentries_destroy(struct rpc_auth *auth)
+{
+       struct gss_auth *gss_auth;
+
+       gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+       if (gss_auth->pipe[0]->dentry)
+               rpc_unlink(gss_auth->pipe[0]->dentry);
+       if (gss_auth->pipe[1]->dentry)
+               rpc_unlink(gss_auth->pipe[1]->dentry);
+}
+
+static int gss_pipes_dentries_create(struct rpc_auth *auth)
+{
+       int err;
+       struct gss_auth *gss_auth;
+       struct rpc_clnt *clnt;
+
+       gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+       clnt = gss_auth->client;
+
+       gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry,
+                                                     "gssd",
+                                                     clnt, gss_auth->pipe[1]);
+       if (IS_ERR(gss_auth->pipe[1]->dentry))
+               return PTR_ERR(gss_auth->pipe[1]->dentry);
+       gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry,
+                                                     gss_auth->mech->gm_name,
+                                                     clnt, gss_auth->pipe[0]);
+       if (IS_ERR(gss_auth->pipe[0]->dentry)) {
+               err = PTR_ERR(gss_auth->pipe[0]->dentry);
+               goto err_unlink_pipe_1;
+       }
+       return 0;
+
+err_unlink_pipe_1:
+       rpc_unlink(gss_auth->pipe[1]->dentry);
+       return err;
+}
+
+static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt,
+                                          struct rpc_auth *auth)
+{
+       struct net *net = rpc_net_ns(clnt);
+       struct super_block *sb;
+
+       sb = rpc_get_sb_net(net);
+       if (sb) {
+               if (clnt->cl_dentry)
+                       gss_pipes_dentries_destroy(auth);
+               rpc_put_sb_net(net);
+       }
+}
+
+static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt,
+                                        struct rpc_auth *auth)
+{
+       struct net *net = rpc_net_ns(clnt);
+       struct super_block *sb;
+       int err = 0;
+
+       sb = rpc_get_sb_net(net);
+       if (sb) {
+               if (clnt->cl_dentry)
+                       err = gss_pipes_dentries_create(auth);
+               rpc_put_sb_net(net);
+       }
+       return err;
+}
+
 /*
  * NOTE: we have the opportunity to use different
  * parameters based on the input flavor (which must be a pseudoflavor)
@@ -801,32 +868,33 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
         * that we supported only the old pipe.  So we instead create
         * the new pipe first.
         */
-       gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_path.dentry,
-                                        "gssd",
-                                        clnt, &gss_upcall_ops_v1,
-                                        RPC_PIPE_WAIT_FOR_OPEN);
-       if (IS_ERR(gss_auth->dentry[1])) {
-               err = PTR_ERR(gss_auth->dentry[1]);
+       gss_auth->pipe[1] = rpc_mkpipe_data(&gss_upcall_ops_v1,
+                                           RPC_PIPE_WAIT_FOR_OPEN);
+       if (IS_ERR(gss_auth->pipe[1])) {
+               err = PTR_ERR(gss_auth->pipe[1]);
                goto err_put_mech;
        }
 
-       gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_path.dentry,
-                                        gss_auth->mech->gm_name,
-                                        clnt, &gss_upcall_ops_v0,
-                                        RPC_PIPE_WAIT_FOR_OPEN);
-       if (IS_ERR(gss_auth->dentry[0])) {
-               err = PTR_ERR(gss_auth->dentry[0]);
-               goto err_unlink_pipe_1;
+       gss_auth->pipe[0] = rpc_mkpipe_data(&gss_upcall_ops_v0,
+                                           RPC_PIPE_WAIT_FOR_OPEN);
+       if (IS_ERR(gss_auth->pipe[0])) {
+               err = PTR_ERR(gss_auth->pipe[0]);
+               goto err_destroy_pipe_1;
        }
+       err = gss_pipes_dentries_create_net(clnt, auth);
+       if (err)
+               goto err_destroy_pipe_0;
        err = rpcauth_init_credcache(auth);
        if (err)
-               goto err_unlink_pipe_0;
+               goto err_unlink_pipes;
 
        return auth;
-err_unlink_pipe_0:
-       rpc_unlink(gss_auth->dentry[0]);
-err_unlink_pipe_1:
-       rpc_unlink(gss_auth->dentry[1]);
+err_unlink_pipes:
+       gss_pipes_dentries_destroy_net(clnt, auth);
+err_destroy_pipe_0:
+       rpc_destroy_pipe_data(gss_auth->pipe[0]);
+err_destroy_pipe_1:
+       rpc_destroy_pipe_data(gss_auth->pipe[1]);
 err_put_mech:
        gss_mech_put(gss_auth->mech);
 err_free:
@@ -839,8 +907,9 @@ out_dec:
 static void
 gss_free(struct gss_auth *gss_auth)
 {
-       rpc_unlink(gss_auth->dentry[1]);
-       rpc_unlink(gss_auth->dentry[0]);
+       gss_pipes_dentries_destroy_net(gss_auth->client, &gss_auth->rpc_auth);
+       rpc_destroy_pipe_data(gss_auth->pipe[0]);
+       rpc_destroy_pipe_data(gss_auth->pipe[1]);
        gss_mech_put(gss_auth->mech);
 
        kfree(gss_auth);
@@ -1547,7 +1616,9 @@ static const struct rpc_authops authgss_ops = {
        .create         = gss_create,
        .destroy        = gss_destroy,
        .lookup_cred    = gss_lookup_cred,
-       .crcreate       = gss_create_cred
+       .crcreate       = gss_create_cred,
+       .pipes_create   = gss_pipes_dentries_create,
+       .pipes_destroy  = gss_pipes_dentries_destroy,
 };
 
 static const struct rpc_credops gss_credops = {
@@ -1591,6 +1662,21 @@ static const struct rpc_pipe_ops gss_upcall_ops_v1 = {
        .release_pipe   = gss_pipe_release,
 };
 
+static __net_init int rpcsec_gss_init_net(struct net *net)
+{
+       return gss_svc_init_net(net);
+}
+
+static __net_exit void rpcsec_gss_exit_net(struct net *net)
+{
+       gss_svc_shutdown_net(net);
+}
+
+static struct pernet_operations rpcsec_gss_net_ops = {
+       .init = rpcsec_gss_init_net,
+       .exit = rpcsec_gss_exit_net,
+};
+
 /*
  * Initialize RPCSEC_GSS module
  */
@@ -1604,8 +1690,13 @@ static int __init init_rpcsec_gss(void)
        err = gss_svc_init();
        if (err)
                goto out_unregister;
+       err = register_pernet_subsys(&rpcsec_gss_net_ops);
+       if (err)
+               goto out_svc_exit;
        rpc_init_wait_queue(&pipe_version_rpc_waitqueue, "gss pipe version");
        return 0;
+out_svc_exit:
+       gss_svc_shutdown();
 out_unregister:
        rpcauth_unregister(&authgss_ops);
 out:
@@ -1614,6 +1705,7 @@ out:
 
 static void __exit exit_rpcsec_gss(void)
 {
+       unregister_pernet_subsys(&rpcsec_gss_net_ops);
        gss_svc_shutdown();
        rpcauth_unregister(&authgss_ops);
        rcu_barrier(); /* Wait for completion of call_rcu()'s */
index 9576f35ab7014f2c506dfdd306d4201e3535b7a7..0f43e894bc0a47e913ca5999afc69d392cc6e6ad 100644 (file)
@@ -600,11 +600,14 @@ gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
        u32 ret;
        struct scatterlist sg[1];
        struct blkcipher_desc desc = { .tfm = cipher, .info = iv };
-       u8 data[crypto_blkcipher_blocksize(cipher) * 2];
+       u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2];
        struct page **save_pages;
        u32 len = buf->len - offset;
 
-       BUG_ON(len > crypto_blkcipher_blocksize(cipher) * 2);
+       if (len > ARRAY_SIZE(data)) {
+               WARN_ON(0);
+               return -ENOMEM;
+       }
 
        /*
         * For encryption, we want to read from the cleartext
index 8c67890de427e35a01e2777db805d0366bea0c8c..8eff8c32d1b9b403c2365326c16e44df7c0923e6 100644 (file)
@@ -344,7 +344,7 @@ out_err:
        return PTR_ERR(p);
 }
 
-struct crypto_blkcipher *
+static struct crypto_blkcipher *
 context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
 {
        struct crypto_blkcipher *cp;
index d7941eab77969cacc26e4b17229e87c34d6894f3..62ae3273186cdd94545d26742ae7a2ece246a685 100644 (file)
@@ -159,7 +159,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
        return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
 }
 
-u32
+static u32
 gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
                struct xdr_netobj *token)
 {
index 8d0f7d3c71c80864356c6106c36886609c8c3713..1600cfb1618cd2abb4ae14be87f96554100a862a 100644 (file)
@@ -48,6 +48,8 @@
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/cache.h>
 
+#include "../netns.h"
+
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY       RPCDBG_AUTH
 #endif
@@ -75,10 +77,8 @@ struct rsi {
        int                     major_status, minor_status;
 };
 
-static struct cache_head *rsi_table[RSI_HASHMAX];
-static struct cache_detail rsi_cache;
-static struct rsi *rsi_update(struct rsi *new, struct rsi *old);
-static struct rsi *rsi_lookup(struct rsi *item);
+static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old);
+static struct rsi *rsi_lookup(struct cache_detail *cd, struct rsi *item);
 
 static void rsi_free(struct rsi *rsii)
 {
@@ -216,7 +216,7 @@ static int rsi_parse(struct cache_detail *cd,
        if (dup_to_netobj(&rsii.in_token, buf, len))
                goto out;
 
-       rsip = rsi_lookup(&rsii);
+       rsip = rsi_lookup(cd, &rsii);
        if (!rsip)
                goto out;
 
@@ -258,21 +258,20 @@ static int rsi_parse(struct cache_detail *cd,
        if (dup_to_netobj(&rsii.out_token, buf, len))
                goto out;
        rsii.h.expiry_time = expiry;
-       rsip = rsi_update(&rsii, rsip);
+       rsip = rsi_update(cd, &rsii, rsip);
        status = 0;
 out:
        rsi_free(&rsii);
        if (rsip)
-               cache_put(&rsip->h, &rsi_cache);
+               cache_put(&rsip->h, cd);
        else
                status = -ENOMEM;
        return status;
 }
 
-static struct cache_detail rsi_cache = {
+static struct cache_detail rsi_cache_template = {
        .owner          = THIS_MODULE,
        .hash_size      = RSI_HASHMAX,
-       .hash_table     = rsi_table,
        .name           = "auth.rpcsec.init",
        .cache_put      = rsi_put,
        .cache_upcall   = rsi_upcall,
@@ -283,24 +282,24 @@ static struct cache_detail rsi_cache = {
        .alloc          = rsi_alloc,
 };
 
-static struct rsi *rsi_lookup(struct rsi *item)
+static struct rsi *rsi_lookup(struct cache_detail *cd, struct rsi *item)
 {
        struct cache_head *ch;
        int hash = rsi_hash(item);
 
-       ch = sunrpc_cache_lookup(&rsi_cache, &item->h, hash);
+       ch = sunrpc_cache_lookup(cd, &item->h, hash);
        if (ch)
                return container_of(ch, struct rsi, h);
        else
                return NULL;
 }
 
-static struct rsi *rsi_update(struct rsi *new, struct rsi *old)
+static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old)
 {
        struct cache_head *ch;
        int hash = rsi_hash(new);
 
-       ch = sunrpc_cache_update(&rsi_cache, &new->h,
+       ch = sunrpc_cache_update(cd, &new->h,
                                 &old->h, hash);
        if (ch)
                return container_of(ch, struct rsi, h);
@@ -339,10 +338,8 @@ struct rsc {
        char                    *client_name;
 };
 
-static struct cache_head *rsc_table[RSC_HASHMAX];
-static struct cache_detail rsc_cache;
-static struct rsc *rsc_update(struct rsc *new, struct rsc *old);
-static struct rsc *rsc_lookup(struct rsc *item);
+static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old);
+static struct rsc *rsc_lookup(struct cache_detail *cd, struct rsc *item);
 
 static void rsc_free(struct rsc *rsci)
 {
@@ -444,7 +441,7 @@ static int rsc_parse(struct cache_detail *cd,
        if (expiry == 0)
                goto out;
 
-       rscp = rsc_lookup(&rsci);
+       rscp = rsc_lookup(cd, &rsci);
        if (!rscp)
                goto out;
 
@@ -506,22 +503,21 @@ static int rsc_parse(struct cache_detail *cd,
 
        }
        rsci.h.expiry_time = expiry;
-       rscp = rsc_update(&rsci, rscp);
+       rscp = rsc_update(cd, &rsci, rscp);
        status = 0;
 out:
        gss_mech_put(gm);
        rsc_free(&rsci);
        if (rscp)
-               cache_put(&rscp->h, &rsc_cache);
+               cache_put(&rscp->h, cd);
        else
                status = -ENOMEM;
        return status;
 }
 
-static struct cache_detail rsc_cache = {
+static struct cache_detail rsc_cache_template = {
        .owner          = THIS_MODULE,
        .hash_size      = RSC_HASHMAX,
-       .hash_table     = rsc_table,
        .name           = "auth.rpcsec.context",
        .cache_put      = rsc_put,
        .cache_parse    = rsc_parse,
@@ -531,24 +527,24 @@ static struct cache_detail rsc_cache = {
        .alloc          = rsc_alloc,
 };
 
-static struct rsc *rsc_lookup(struct rsc *item)
+static struct rsc *rsc_lookup(struct cache_detail *cd, struct rsc *item)
 {
        struct cache_head *ch;
        int hash = rsc_hash(item);
 
-       ch = sunrpc_cache_lookup(&rsc_cache, &item->h, hash);
+       ch = sunrpc_cache_lookup(cd, &item->h, hash);
        if (ch)
                return container_of(ch, struct rsc, h);
        else
                return NULL;
 }
 
-static struct rsc *rsc_update(struct rsc *new, struct rsc *old)
+static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old)
 {
        struct cache_head *ch;
        int hash = rsc_hash(new);
 
-       ch = sunrpc_cache_update(&rsc_cache, &new->h,
+       ch = sunrpc_cache_update(cd, &new->h,
                                 &old->h, hash);
        if (ch)
                return container_of(ch, struct rsc, h);
@@ -558,7 +554,7 @@ static struct rsc *rsc_update(struct rsc *new, struct rsc *old)
 
 
 static struct rsc *
-gss_svc_searchbyctx(struct xdr_netobj *handle)
+gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle)
 {
        struct rsc rsci;
        struct rsc *found;
@@ -566,11 +562,11 @@ gss_svc_searchbyctx(struct xdr_netobj *handle)
        memset(&rsci, 0, sizeof(rsci));
        if (dup_to_netobj(&rsci.handle, handle->data, handle->len))
                return NULL;
-       found = rsc_lookup(&rsci);
+       found = rsc_lookup(cd, &rsci);
        rsc_free(&rsci);
        if (!found)
                return NULL;
-       if (cache_check(&rsc_cache, &found->h, NULL))
+       if (cache_check(cd, &found->h, NULL))
                return NULL;
        return found;
 }
@@ -968,20 +964,20 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
 }
 
 static inline int
-gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip)
+gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp, struct rsi *rsip)
 {
        struct rsc *rsci;
        int        rc;
 
        if (rsip->major_status != GSS_S_COMPLETE)
                return gss_write_null_verf(rqstp);
-       rsci = gss_svc_searchbyctx(&rsip->out_handle);
+       rsci = gss_svc_searchbyctx(cd, &rsip->out_handle);
        if (rsci == NULL) {
                rsip->major_status = GSS_S_NO_CONTEXT;
                return gss_write_null_verf(rqstp);
        }
        rc = gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN);
-       cache_put(&rsci->h, &rsc_cache);
+       cache_put(&rsci->h, cd);
        return rc;
 }
 
@@ -1000,6 +996,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
        struct xdr_netobj tmpobj;
        struct rsi *rsip, rsikey;
        int ret;
+       struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
 
        /* Read the verifier; should be NULL: */
        *authp = rpc_autherr_badverf;
@@ -1028,17 +1025,17 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
        }
 
        /* Perform upcall, or find upcall result: */
-       rsip = rsi_lookup(&rsikey);
+       rsip = rsi_lookup(sn->rsi_cache, &rsikey);
        rsi_free(&rsikey);
        if (!rsip)
                return SVC_CLOSE;
-       if (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0)
+       if (cache_check(sn->rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0)
                /* No upcall result: */
                return SVC_CLOSE;
 
        ret = SVC_CLOSE;
        /* Got an answer to the upcall; use it: */
-       if (gss_write_init_verf(rqstp, rsip))
+       if (gss_write_init_verf(sn->rsc_cache, rqstp, rsip))
                goto out;
        if (resv->iov_len + 4 > PAGE_SIZE)
                goto out;
@@ -1055,7 +1052,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
 
        ret = SVC_COMPLETE;
 out:
-       cache_put(&rsip->h, &rsi_cache);
+       cache_put(&rsip->h, sn->rsi_cache);
        return ret;
 }
 
@@ -1079,6 +1076,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
        __be32          *rpcstart;
        __be32          *reject_stat = resv->iov_base + resv->iov_len;
        int             ret;
+       struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
 
        dprintk("RPC:       svcauth_gss: argv->iov_len = %zd\n",
                        argv->iov_len);
@@ -1129,7 +1127,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
        case RPC_GSS_PROC_DESTROY:
                /* Look up the context, and check the verifier: */
                *authp = rpcsec_gsserr_credproblem;
-               rsci = gss_svc_searchbyctx(&gc->gc_ctx);
+               rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx);
                if (!rsci)
                        goto auth_err;
                switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) {
@@ -1209,7 +1207,7 @@ drop:
        ret = SVC_DROP;
 out:
        if (rsci)
-               cache_put(&rsci->h, &rsc_cache);
+               cache_put(&rsci->h, sn->rsc_cache);
        return ret;
 }
 
@@ -1362,6 +1360,7 @@ svcauth_gss_release(struct svc_rqst *rqstp)
        struct rpc_gss_wire_cred *gc = &gsd->clcred;
        struct xdr_buf *resbuf = &rqstp->rq_res;
        int stat = -EINVAL;
+       struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
 
        if (gc->gc_proc != RPC_GSS_PROC_DATA)
                goto out;
@@ -1404,7 +1403,7 @@ out_err:
                put_group_info(rqstp->rq_cred.cr_group_info);
        rqstp->rq_cred.cr_group_info = NULL;
        if (gsd->rsci)
-               cache_put(&gsd->rsci->h, &rsc_cache);
+               cache_put(&gsd->rsci->h, sn->rsc_cache);
        gsd->rsci = NULL;
 
        return stat;
@@ -1429,30 +1428,96 @@ static struct auth_ops svcauthops_gss = {
        .set_client     = svcauth_gss_set_client,
 };
 
+static int rsi_cache_create_net(struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct cache_detail *cd;
+       int err;
+
+       cd = cache_create_net(&rsi_cache_template, net);
+       if (IS_ERR(cd))
+               return PTR_ERR(cd);
+       err = cache_register_net(cd, net);
+       if (err) {
+               cache_destroy_net(cd, net);
+               return err;
+       }
+       sn->rsi_cache = cd;
+       return 0;
+}
+
+static void rsi_cache_destroy_net(struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct cache_detail *cd = sn->rsi_cache;
+
+       sn->rsi_cache = NULL;
+       cache_purge(cd);
+       cache_unregister_net(cd, net);
+       cache_destroy_net(cd, net);
+}
+
+static int rsc_cache_create_net(struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct cache_detail *cd;
+       int err;
+
+       cd = cache_create_net(&rsc_cache_template, net);
+       if (IS_ERR(cd))
+               return PTR_ERR(cd);
+       err = cache_register_net(cd, net);
+       if (err) {
+               cache_destroy_net(cd, net);
+               return err;
+       }
+       sn->rsc_cache = cd;
+       return 0;
+}
+
+static void rsc_cache_destroy_net(struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct cache_detail *cd = sn->rsc_cache;
+
+       sn->rsc_cache = NULL;
+       cache_purge(cd);
+       cache_unregister_net(cd, net);
+       cache_destroy_net(cd, net);
+}
+
 int
-gss_svc_init(void)
+gss_svc_init_net(struct net *net)
 {
-       int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss);
+       int rv;
+
+       rv = rsc_cache_create_net(net);
        if (rv)
                return rv;
-       rv = cache_register(&rsc_cache);
+       rv = rsi_cache_create_net(net);
        if (rv)
                goto out1;
-       rv = cache_register(&rsi_cache);
-       if (rv)
-               goto out2;
        return 0;
-out2:
-       cache_unregister(&rsc_cache);
 out1:
-       svc_auth_unregister(RPC_AUTH_GSS);
+       rsc_cache_destroy_net(net);
        return rv;
 }
 
+void
+gss_svc_shutdown_net(struct net *net)
+{
+       rsi_cache_destroy_net(net);
+       rsc_cache_destroy_net(net);
+}
+
+int
+gss_svc_init(void)
+{
+       return svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss);
+}
+
 void
 gss_svc_shutdown(void)
 {
-       cache_unregister(&rsc_cache);
-       cache_unregister(&rsi_cache);
        svc_auth_unregister(RPC_AUTH_GSS);
 }
index 3ad435a14ada7ecd4afce4374f7dfb32dca77c52..31def68a0f6e7032f232474b9a0d09a0083b9ac7 100644 (file)
@@ -25,6 +25,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <linux/slab.h>
 #include <linux/sunrpc/xprt.h>
 #include <linux/export.h>
+#include <linux/sunrpc/bc_xprt.h>
 
 #ifdef RPC_DEBUG
 #define RPCDBG_FACILITY        RPCDBG_TRANS
index 465df9ae1046b7fc12fe99fd0759017be7a7dc2a..f21ece08876440d574dad1ac6a09cf22d40d043e 100644 (file)
@@ -344,7 +344,7 @@ static int current_index;
 static void do_cache_clean(struct work_struct *work);
 static struct delayed_work cache_cleaner;
 
-static void sunrpc_init_cache_detail(struct cache_detail *cd)
+void sunrpc_init_cache_detail(struct cache_detail *cd)
 {
        rwlock_init(&cd->hash_lock);
        INIT_LIST_HEAD(&cd->queue);
@@ -360,8 +360,9 @@ static void sunrpc_init_cache_detail(struct cache_detail *cd)
        /* start the cleaning process */
        schedule_delayed_work(&cache_cleaner, 0);
 }
+EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail);
 
-static void sunrpc_destroy_cache_detail(struct cache_detail *cd)
+void sunrpc_destroy_cache_detail(struct cache_detail *cd)
 {
        cache_purge(cd);
        spin_lock(&cache_list_lock);
@@ -384,6 +385,7 @@ static void sunrpc_destroy_cache_detail(struct cache_detail *cd)
 out:
        printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name);
 }
+EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
 
 /* clean cache tries to find something to clean
  * and cleans it.
@@ -1643,12 +1645,6 @@ int cache_register_net(struct cache_detail *cd, struct net *net)
 }
 EXPORT_SYMBOL_GPL(cache_register_net);
 
-int cache_register(struct cache_detail *cd)
-{
-       return cache_register_net(cd, &init_net);
-}
-EXPORT_SYMBOL_GPL(cache_register);
-
 void cache_unregister_net(struct cache_detail *cd, struct net *net)
 {
        remove_cache_proc_entries(cd, net);
@@ -1656,11 +1652,31 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net)
 }
 EXPORT_SYMBOL_GPL(cache_unregister_net);
 
-void cache_unregister(struct cache_detail *cd)
+struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net)
+{
+       struct cache_detail *cd;
+
+       cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL);
+       if (cd == NULL)
+               return ERR_PTR(-ENOMEM);
+
+       cd->hash_table = kzalloc(cd->hash_size * sizeof(struct cache_head *),
+                                GFP_KERNEL);
+       if (cd->hash_table == NULL) {
+               kfree(cd);
+               return ERR_PTR(-ENOMEM);
+       }
+       cd->net = net;
+       return cd;
+}
+EXPORT_SYMBOL_GPL(cache_create_net);
+
+void cache_destroy_net(struct cache_detail *cd, struct net *net)
 {
-       cache_unregister_net(cd, &init_net);
+       kfree(cd->hash_table);
+       kfree(cd);
 }
-EXPORT_SYMBOL_GPL(cache_unregister);
+EXPORT_SYMBOL_GPL(cache_destroy_net);
 
 static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
                                 size_t count, loff_t *ppos)
@@ -1787,17 +1803,14 @@ int sunrpc_cache_register_pipefs(struct dentry *parent,
        struct dentry *dir;
        int ret = 0;
 
-       sunrpc_init_cache_detail(cd);
        q.name = name;
        q.len = strlen(name);
        q.hash = full_name_hash(q.name, q.len);
        dir = rpc_create_cache_dir(parent, &q, umode, cd);
        if (!IS_ERR(dir))
                cd->u.pipefs.dir = dir;
-       else {
-               sunrpc_destroy_cache_detail(cd);
+       else
                ret = PTR_ERR(dir);
-       }
        return ret;
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs);
@@ -1806,7 +1819,6 @@ void sunrpc_cache_unregister_pipefs(struct cache_detail *cd)
 {
        rpc_remove_cache_dir(cd->u.pipefs.dir);
        cd->u.pipefs.dir = NULL;
-       sunrpc_destroy_cache_detail(cd);
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
 
index f0268ea7e71121f3c07f159b6f46732b4074dd73..7a4cb5fdc21239d3628f44c912018399121b14e5 100644 (file)
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/un.h>
+#include <linux/rcupdate.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/sunrpc/metrics.h>
 #include <linux/sunrpc/bc_xprt.h>
+#include <trace/events/sunrpc.h>
 
 #include "sunrpc.h"
+#include "netns.h"
 
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY       RPCDBG_CALL
@@ -50,8 +53,6 @@
 /*
  * All RPC clients are linked into this list
  */
-static LIST_HEAD(all_clients);
-static DEFINE_SPINLOCK(rpc_client_lock);
 
 static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);
 
@@ -81,82 +82,191 @@ static int rpc_ping(struct rpc_clnt *clnt);
 
 static void rpc_register_client(struct rpc_clnt *clnt)
 {
-       spin_lock(&rpc_client_lock);
-       list_add(&clnt->cl_clients, &all_clients);
-       spin_unlock(&rpc_client_lock);
+       struct net *net = rpc_net_ns(clnt);
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+       spin_lock(&sn->rpc_client_lock);
+       list_add(&clnt->cl_clients, &sn->all_clients);
+       spin_unlock(&sn->rpc_client_lock);
 }
 
 static void rpc_unregister_client(struct rpc_clnt *clnt)
 {
-       spin_lock(&rpc_client_lock);
+       struct net *net = rpc_net_ns(clnt);
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+       spin_lock(&sn->rpc_client_lock);
        list_del(&clnt->cl_clients);
-       spin_unlock(&rpc_client_lock);
+       spin_unlock(&sn->rpc_client_lock);
 }
 
-static int
-rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
+static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
+{
+       if (clnt->cl_dentry) {
+               if (clnt->cl_auth && clnt->cl_auth->au_ops->pipes_destroy)
+                       clnt->cl_auth->au_ops->pipes_destroy(clnt->cl_auth);
+               rpc_remove_client_dir(clnt->cl_dentry);
+       }
+       clnt->cl_dentry = NULL;
+}
+
+static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
+{
+       struct net *net = rpc_net_ns(clnt);
+       struct super_block *pipefs_sb;
+
+       pipefs_sb = rpc_get_sb_net(net);
+       if (pipefs_sb) {
+               __rpc_clnt_remove_pipedir(clnt);
+               rpc_put_sb_net(net);
+       }
+}
+
+static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
+                                   struct rpc_clnt *clnt,
+                                   const char *dir_name)
 {
        static uint32_t clntid;
-       struct path path, dir;
        char name[15];
        struct qstr q = {
                .name = name,
        };
+       struct dentry *dir, *dentry;
        int error;
 
-       clnt->cl_path.mnt = ERR_PTR(-ENOENT);
-       clnt->cl_path.dentry = ERR_PTR(-ENOENT);
-       if (dir_name == NULL)
-               return 0;
-
-       path.mnt = rpc_get_mount();
-       if (IS_ERR(path.mnt))
-               return PTR_ERR(path.mnt);
-       error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &dir);
-       if (error)
-               goto err;
-
+       dir = rpc_d_lookup_sb(sb, dir_name);
+       if (dir == NULL)
+               return dir;
        for (;;) {
                q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++);
                name[sizeof(name) - 1] = '\0';
                q.hash = full_name_hash(q.name, q.len);
-               path.dentry = rpc_create_client_dir(dir.dentry, &q, clnt);
-               if (!IS_ERR(path.dentry))
+               dentry = rpc_create_client_dir(dir, &q, clnt);
+               if (!IS_ERR(dentry))
                        break;
-               error = PTR_ERR(path.dentry);
+               error = PTR_ERR(dentry);
                if (error != -EEXIST) {
                        printk(KERN_INFO "RPC: Couldn't create pipefs entry"
                                        " %s/%s, error %d\n",
                                        dir_name, name, error);
-                       goto err_path_put;
+                       break;
                }
        }
-       path_put(&dir);
-       clnt->cl_path = path;
+       dput(dir);
+       return dentry;
+}
+
+static int
+rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name)
+{
+       struct net *net = rpc_net_ns(clnt);
+       struct super_block *pipefs_sb;
+       struct dentry *dentry;
+
+       clnt->cl_dentry = NULL;
+       if (dir_name == NULL)
+               return 0;
+       pipefs_sb = rpc_get_sb_net(net);
+       if (!pipefs_sb)
+               return 0;
+       dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name);
+       rpc_put_sb_net(net);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+       clnt->cl_dentry = dentry;
        return 0;
-err_path_put:
-       path_put(&dir);
-err:
-       rpc_put_mount();
+}
+
+static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event,
+                               struct super_block *sb)
+{
+       struct dentry *dentry;
+       int err = 0;
+
+       switch (event) {
+       case RPC_PIPEFS_MOUNT:
+               if (clnt->cl_program->pipe_dir_name == NULL)
+                       break;
+               dentry = rpc_setup_pipedir_sb(sb, clnt,
+                                             clnt->cl_program->pipe_dir_name);
+               BUG_ON(dentry == NULL);
+               if (IS_ERR(dentry))
+                       return PTR_ERR(dentry);
+               clnt->cl_dentry = dentry;
+               if (clnt->cl_auth->au_ops->pipes_create) {
+                       err = clnt->cl_auth->au_ops->pipes_create(clnt->cl_auth);
+                       if (err)
+                               __rpc_clnt_remove_pipedir(clnt);
+               }
+               break;
+       case RPC_PIPEFS_UMOUNT:
+               __rpc_clnt_remove_pipedir(clnt);
+               break;
+       default:
+               printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event);
+               return -ENOTSUPP;
+       }
+       return err;
+}
+
+static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct rpc_clnt *clnt;
+
+       spin_lock(&sn->rpc_client_lock);
+       list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
+               if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) ||
+                   ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry))
+                       continue;
+               atomic_inc(&clnt->cl_count);
+               spin_unlock(&sn->rpc_client_lock);
+               return clnt;
+       }
+       spin_unlock(&sn->rpc_client_lock);
+       return NULL;
+}
+
+static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
+                           void *ptr)
+{
+       struct super_block *sb = ptr;
+       struct rpc_clnt *clnt;
+       int error = 0;
+
+       while ((clnt = rpc_get_client_for_event(sb->s_fs_info, event))) {
+               error = __rpc_pipefs_event(clnt, event, sb);
+               rpc_release_client(clnt);
+               if (error)
+                       break;
+       }
        return error;
 }
 
+static struct notifier_block rpc_clients_block = {
+       .notifier_call  = rpc_pipefs_event,
+       .priority       = SUNRPC_PIPEFS_RPC_PRIO,
+};
+
+int rpc_clients_notifier_register(void)
+{
+       return rpc_pipefs_notifier_register(&rpc_clients_block);
+}
+
+void rpc_clients_notifier_unregister(void)
+{
+       return rpc_pipefs_notifier_unregister(&rpc_clients_block);
+}
+
 static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt)
 {
-       struct rpc_program      *program = args->program;
-       struct rpc_version      *version;
+       const struct rpc_program *program = args->program;
+       const struct rpc_version *version;
        struct rpc_clnt         *clnt = NULL;
        struct rpc_auth         *auth;
        int err;
-       size_t len;
 
        /* sanity check the name before trying to print it */
-       err = -EINVAL;
-       len = strlen(args->servername);
-       if (len > RPC_MAXNETNAMELEN)
-               goto out_no_rpciod;
-       len++;
-
        dprintk("RPC:       creating %s client for %s (xprt %p)\n",
                        program->name, args->servername, xprt);
 
@@ -179,17 +289,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
                goto out_err;
        clnt->cl_parent = clnt;
 
-       clnt->cl_server = clnt->cl_inline_name;
-       if (len > sizeof(clnt->cl_inline_name)) {
-               char *buf = kmalloc(len, GFP_KERNEL);
-               if (buf != NULL)
-                       clnt->cl_server = buf;
-               else
-                       len = sizeof(clnt->cl_inline_name);
-       }
-       strlcpy(clnt->cl_server, args->servername, len);
-
-       clnt->cl_xprt     = xprt;
+       rcu_assign_pointer(clnt->cl_xprt, xprt);
        clnt->cl_procinfo = version->procs;
        clnt->cl_maxproc  = version->nrprocs;
        clnt->cl_protname = program->name;
@@ -204,7 +304,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
        INIT_LIST_HEAD(&clnt->cl_tasks);
        spin_lock_init(&clnt->cl_lock);
 
-       if (!xprt_bound(clnt->cl_xprt))
+       if (!xprt_bound(xprt))
                clnt->cl_autobind = 1;
 
        clnt->cl_timeout = xprt->timeout;
@@ -246,17 +346,12 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
        return clnt;
 
 out_no_auth:
-       if (!IS_ERR(clnt->cl_path.dentry)) {
-               rpc_remove_client_dir(clnt->cl_path.dentry);
-               rpc_put_mount();
-       }
+       rpc_clnt_remove_pipedir(clnt);
 out_no_path:
        kfree(clnt->cl_principal);
 out_no_principal:
        rpc_free_iostats(clnt->cl_metrics);
 out_no_stats:
-       if (clnt->cl_server != clnt->cl_inline_name)
-               kfree(clnt->cl_server);
        kfree(clnt);
 out_err:
        xprt_put(xprt);
@@ -286,6 +381,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
                .srcaddr = args->saddress,
                .dstaddr = args->address,
                .addrlen = args->addrsize,
+               .servername = args->servername,
                .bc_xprt = args->bc_xprt,
        };
        char servername[48];
@@ -294,7 +390,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
         * If the caller chooses not to specify a hostname, whip
         * up a string representation of the passed-in address.
         */
-       if (args->servername == NULL) {
+       if (xprtargs.servername == NULL) {
                struct sockaddr_un *sun =
                                (struct sockaddr_un *)args->address;
                struct sockaddr_in *sin =
@@ -321,7 +417,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
                         * address family isn't recognized. */
                        return ERR_PTR(-EINVAL);
                }
-               args->servername = servername;
+               xprtargs.servername = servername;
        }
 
        xprt = xprt_create_transport(&xprtargs);
@@ -374,6 +470,7 @@ struct rpc_clnt *
 rpc_clone_client(struct rpc_clnt *clnt)
 {
        struct rpc_clnt *new;
+       struct rpc_xprt *xprt;
        int err = -ENOMEM;
 
        new = kmemdup(clnt, sizeof(*new), GFP_KERNEL);
@@ -393,18 +490,25 @@ rpc_clone_client(struct rpc_clnt *clnt)
                if (new->cl_principal == NULL)
                        goto out_no_principal;
        }
+       rcu_read_lock();
+       xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+       rcu_read_unlock();
+       if (xprt == NULL)
+               goto out_no_transport;
+       rcu_assign_pointer(new->cl_xprt, xprt);
        atomic_set(&new->cl_count, 1);
        err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
        if (err != 0)
                goto out_no_path;
        if (new->cl_auth)
                atomic_inc(&new->cl_auth->au_count);
-       xprt_get(clnt->cl_xprt);
        atomic_inc(&clnt->cl_count);
        rpc_register_client(new);
        rpciod_up();
        return new;
 out_no_path:
+       xprt_put(xprt);
+out_no_transport:
        kfree(new->cl_principal);
 out_no_principal:
        rpc_free_iostats(new->cl_metrics);
@@ -453,8 +557,9 @@ EXPORT_SYMBOL_GPL(rpc_killall_tasks);
  */
 void rpc_shutdown_client(struct rpc_clnt *clnt)
 {
-       dprintk("RPC:       shutting down %s client for %s\n",
-                       clnt->cl_protname, clnt->cl_server);
+       dprintk_rcu("RPC:       shutting down %s client for %s\n",
+                       clnt->cl_protname,
+                       rcu_dereference(clnt->cl_xprt)->servername);
 
        while (!list_empty(&clnt->cl_tasks)) {
                rpc_killall_tasks(clnt);
@@ -472,24 +577,17 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client);
 static void
 rpc_free_client(struct rpc_clnt *clnt)
 {
-       dprintk("RPC:       destroying %s client for %s\n",
-                       clnt->cl_protname, clnt->cl_server);
-       if (!IS_ERR(clnt->cl_path.dentry)) {
-               rpc_remove_client_dir(clnt->cl_path.dentry);
-               rpc_put_mount();
-       }
-       if (clnt->cl_parent != clnt) {
+       dprintk_rcu("RPC:       destroying %s client for %s\n",
+                       clnt->cl_protname,
+                       rcu_dereference(clnt->cl_xprt)->servername);
+       if (clnt->cl_parent != clnt)
                rpc_release_client(clnt->cl_parent);
-               goto out_free;
-       }
-       if (clnt->cl_server != clnt->cl_inline_name)
-               kfree(clnt->cl_server);
-out_free:
        rpc_unregister_client(clnt);
+       rpc_clnt_remove_pipedir(clnt);
        rpc_free_iostats(clnt->cl_metrics);
        kfree(clnt->cl_principal);
        clnt->cl_metrics = NULL;
-       xprt_put(clnt->cl_xprt);
+       xprt_put(rcu_dereference_raw(clnt->cl_xprt));
        rpciod_down();
        kfree(clnt);
 }
@@ -542,11 +640,11 @@ rpc_release_client(struct rpc_clnt *clnt)
  * The Sun NFSv2/v3 ACL protocol can do this.
  */
 struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
-                                     struct rpc_program *program,
+                                     const struct rpc_program *program,
                                      u32 vers)
 {
        struct rpc_clnt *clnt;
-       struct rpc_version *version;
+       const struct rpc_version *version;
        int err;
 
        BUG_ON(vers >= program->nrvers || !program->version[vers]);
@@ -778,13 +876,18 @@ EXPORT_SYMBOL_GPL(rpc_call_start);
 size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize)
 {
        size_t bytes;
-       struct rpc_xprt *xprt = clnt->cl_xprt;
+       struct rpc_xprt *xprt;
 
-       bytes = sizeof(xprt->addr);
+       rcu_read_lock();
+       xprt = rcu_dereference(clnt->cl_xprt);
+
+       bytes = xprt->addrlen;
        if (bytes > bufsize)
                bytes = bufsize;
-       memcpy(buf, &clnt->cl_xprt->addr, bytes);
-       return xprt->addrlen;
+       memcpy(buf, &xprt->addr, bytes);
+       rcu_read_unlock();
+
+       return bytes;
 }
 EXPORT_SYMBOL_GPL(rpc_peeraddr);
 
@@ -793,11 +896,16 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr);
  * @clnt: RPC client structure
  * @format: address format
  *
+ * NB: the lifetime of the memory referenced by the returned pointer is
+ * the same as the rpc_xprt itself.  As long as the caller uses this
+ * pointer, it must hold the RCU read lock.
  */
 const char *rpc_peeraddr2str(struct rpc_clnt *clnt,
                             enum rpc_display_format_t format)
 {
-       struct rpc_xprt *xprt = clnt->cl_xprt;
+       struct rpc_xprt *xprt;
+
+       xprt = rcu_dereference(clnt->cl_xprt);
 
        if (xprt->address_strings[format] != NULL)
                return xprt->address_strings[format];
@@ -806,17 +914,203 @@ const char *rpc_peeraddr2str(struct rpc_clnt *clnt,
 }
 EXPORT_SYMBOL_GPL(rpc_peeraddr2str);
 
+static const struct sockaddr_in rpc_inaddr_loopback = {
+       .sin_family             = AF_INET,
+       .sin_addr.s_addr        = htonl(INADDR_ANY),
+};
+
+static const struct sockaddr_in6 rpc_in6addr_loopback = {
+       .sin6_family            = AF_INET6,
+       .sin6_addr              = IN6ADDR_ANY_INIT,
+};
+
+/*
+ * Try a getsockname() on a connected datagram socket.  Using a
+ * connected datagram socket prevents leaving a socket in TIME_WAIT.
+ * This conserves the ephemeral port number space.
+ *
+ * Returns zero and fills in "buf" if successful; otherwise, a
+ * negative errno is returned.
+ */
+static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
+                       struct sockaddr *buf, int buflen)
+{
+       struct socket *sock;
+       int err;
+
+       err = __sock_create(net, sap->sa_family,
+                               SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
+       if (err < 0) {
+               dprintk("RPC:       can't create UDP socket (%d)\n", err);
+               goto out;
+       }
+
+       switch (sap->sa_family) {
+       case AF_INET:
+               err = kernel_bind(sock,
+                               (struct sockaddr *)&rpc_inaddr_loopback,
+                               sizeof(rpc_inaddr_loopback));
+               break;
+       case AF_INET6:
+               err = kernel_bind(sock,
+                               (struct sockaddr *)&rpc_in6addr_loopback,
+                               sizeof(rpc_in6addr_loopback));
+               break;
+       default:
+               err = -EAFNOSUPPORT;
+               goto out;
+       }
+       if (err < 0) {
+               dprintk("RPC:       can't bind UDP socket (%d)\n", err);
+               goto out_release;
+       }
+
+       err = kernel_connect(sock, sap, salen, 0);
+       if (err < 0) {
+               dprintk("RPC:       can't connect UDP socket (%d)\n", err);
+               goto out_release;
+       }
+
+       err = kernel_getsockname(sock, buf, &buflen);
+       if (err < 0) {
+               dprintk("RPC:       getsockname failed (%d)\n", err);
+               goto out_release;
+       }
+
+       err = 0;
+       if (buf->sa_family == AF_INET6) {
+               struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)buf;
+               sin6->sin6_scope_id = 0;
+       }
+       dprintk("RPC:       %s succeeded\n", __func__);
+
+out_release:
+       sock_release(sock);
+out:
+       return err;
+}
+
+/*
+ * Scraping a connected socket failed, so we don't have a useable
+ * local address.  Fallback: generate an address that will prevent
+ * the server from calling us back.
+ *
+ * Returns zero and fills in "buf" if successful; otherwise, a
+ * negative errno is returned.
+ */
+static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen)
+{
+       switch (family) {
+       case AF_INET:
+               if (buflen < sizeof(rpc_inaddr_loopback))
+                       return -EINVAL;
+               memcpy(buf, &rpc_inaddr_loopback,
+                               sizeof(rpc_inaddr_loopback));
+               break;
+       case AF_INET6:
+               if (buflen < sizeof(rpc_in6addr_loopback))
+                       return -EINVAL;
+               memcpy(buf, &rpc_in6addr_loopback,
+                               sizeof(rpc_in6addr_loopback));
+       default:
+               dprintk("RPC:       %s: address family not supported\n",
+                       __func__);
+               return -EAFNOSUPPORT;
+       }
+       dprintk("RPC:       %s: succeeded\n", __func__);
+       return 0;
+}
+
+/**
+ * rpc_localaddr - discover local endpoint address for an RPC client
+ * @clnt: RPC client structure
+ * @buf: target buffer
+ * @buflen: size of target buffer, in bytes
+ *
+ * Returns zero and fills in "buf" and "buflen" if successful;
+ * otherwise, a negative errno is returned.
+ *
+ * This works even if the underlying transport is not currently connected,
+ * or if the upper layer never previously provided a source address.
+ *
+ * The result of this function call is transient: multiple calls in
+ * succession may give different results, depending on how local
+ * networking configuration changes over time.
+ */
+int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen)
+{
+       struct sockaddr_storage address;
+       struct sockaddr *sap = (struct sockaddr *)&address;
+       struct rpc_xprt *xprt;
+       struct net *net;
+       size_t salen;
+       int err;
+
+       rcu_read_lock();
+       xprt = rcu_dereference(clnt->cl_xprt);
+       salen = xprt->addrlen;
+       memcpy(sap, &xprt->addr, salen);
+       net = get_net(xprt->xprt_net);
+       rcu_read_unlock();
+
+       rpc_set_port(sap, 0);
+       err = rpc_sockname(net, sap, salen, buf, buflen);
+       put_net(net);
+       if (err != 0)
+               /* Couldn't discover local address, return ANYADDR */
+               return rpc_anyaddr(sap->sa_family, buf, buflen);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(rpc_localaddr);
+
 void
 rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
 {
-       struct rpc_xprt *xprt = clnt->cl_xprt;
+       struct rpc_xprt *xprt;
+
+       rcu_read_lock();
+       xprt = rcu_dereference(clnt->cl_xprt);
        if (xprt->ops->set_buffer_size)
                xprt->ops->set_buffer_size(xprt, sndsize, rcvsize);
+       rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(rpc_setbufsize);
 
-/*
- * Return size of largest payload RPC client can support, in bytes
+/**
+ * rpc_protocol - Get transport protocol number for an RPC client
+ * @clnt: RPC client to query
+ *
+ */
+int rpc_protocol(struct rpc_clnt *clnt)
+{
+       int protocol;
+
+       rcu_read_lock();
+       protocol = rcu_dereference(clnt->cl_xprt)->prot;
+       rcu_read_unlock();
+       return protocol;
+}
+EXPORT_SYMBOL_GPL(rpc_protocol);
+
+/**
+ * rpc_net_ns - Get the network namespace for this RPC client
+ * @clnt: RPC client to query
+ *
+ */
+struct net *rpc_net_ns(struct rpc_clnt *clnt)
+{
+       struct net *ret;
+
+       rcu_read_lock();
+       ret = rcu_dereference(clnt->cl_xprt)->xprt_net;
+       rcu_read_unlock();
+       return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_net_ns);
+
+/**
+ * rpc_max_payload - Get maximum payload size for a transport, in bytes
+ * @clnt: RPC client to query
  *
  * For stream transports, this is one RPC record fragment (see RFC
  * 1831), as we don't support multi-record requests yet.  For datagram
@@ -825,7 +1119,12 @@ EXPORT_SYMBOL_GPL(rpc_setbufsize);
  */
 size_t rpc_max_payload(struct rpc_clnt *clnt)
 {
-       return clnt->cl_xprt->max_payload;
+       size_t ret;
+
+       rcu_read_lock();
+       ret = rcu_dereference(clnt->cl_xprt)->max_payload;
+       rcu_read_unlock();
+       return ret;
 }
 EXPORT_SYMBOL_GPL(rpc_max_payload);
 
@@ -836,8 +1135,11 @@ EXPORT_SYMBOL_GPL(rpc_max_payload);
  */
 void rpc_force_rebind(struct rpc_clnt *clnt)
 {
-       if (clnt->cl_autobind)
-               xprt_clear_bound(clnt->cl_xprt);
+       if (clnt->cl_autobind) {
+               rcu_read_lock();
+               xprt_clear_bound(rcu_dereference(clnt->cl_xprt));
+               rcu_read_unlock();
+       }
 }
 EXPORT_SYMBOL_GPL(rpc_force_rebind);
 
@@ -1163,6 +1465,7 @@ call_bind_status(struct rpc_task *task)
                return;
        }
 
+       trace_rpc_bind_status(task);
        switch (task->tk_status) {
        case -ENOMEM:
                dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid);
@@ -1262,6 +1565,7 @@ call_connect_status(struct rpc_task *task)
                return;
        }
 
+       trace_rpc_connect_status(task, status);
        switch (status) {
                /* if soft mounted, test if we've timed out */
        case -ETIMEDOUT:
@@ -1450,6 +1754,7 @@ call_status(struct rpc_task *task)
                return;
        }
 
+       trace_rpc_call_status(task);
        task->tk_status = 0;
        switch(status) {
        case -EHOSTDOWN:
@@ -1513,8 +1818,11 @@ call_timeout(struct rpc_task *task)
        }
        if (RPC_IS_SOFT(task)) {
                if (clnt->cl_chatty)
+                       rcu_read_lock();
                        printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
-                               clnt->cl_protname, clnt->cl_server);
+                               clnt->cl_protname,
+                               rcu_dereference(clnt->cl_xprt)->servername);
+                       rcu_read_unlock();
                if (task->tk_flags & RPC_TASK_TIMEOUT)
                        rpc_exit(task, -ETIMEDOUT);
                else
@@ -1524,9 +1832,13 @@ call_timeout(struct rpc_task *task)
 
        if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
                task->tk_flags |= RPC_CALL_MAJORSEEN;
-               if (clnt->cl_chatty)
+               if (clnt->cl_chatty) {
+                       rcu_read_lock();
                        printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
-                       clnt->cl_protname, clnt->cl_server);
+                       clnt->cl_protname,
+                       rcu_dereference(clnt->cl_xprt)->servername);
+                       rcu_read_unlock();
+               }
        }
        rpc_force_rebind(clnt);
        /*
@@ -1555,9 +1867,13 @@ call_decode(struct rpc_task *task)
        dprint_status(task);
 
        if (task->tk_flags & RPC_CALL_MAJORSEEN) {
-               if (clnt->cl_chatty)
+               if (clnt->cl_chatty) {
+                       rcu_read_lock();
                        printk(KERN_NOTICE "%s: server %s OK\n",
-                               clnt->cl_protname, clnt->cl_server);
+                               clnt->cl_protname,
+                               rcu_dereference(clnt->cl_xprt)->servername);
+                       rcu_read_unlock();
+               }
                task->tk_flags &= ~RPC_CALL_MAJORSEEN;
        }
 
@@ -1635,6 +1951,7 @@ rpc_encode_header(struct rpc_task *task)
 static __be32 *
 rpc_verify_header(struct rpc_task *task)
 {
+       struct rpc_clnt *clnt = task->tk_client;
        struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
        int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
        __be32  *p = iov->iov_base;
@@ -1707,8 +2024,11 @@ rpc_verify_header(struct rpc_task *task)
                        task->tk_action = call_bind;
                        goto out_retry;
                case RPC_AUTH_TOOWEAK:
+                       rcu_read_lock();
                        printk(KERN_NOTICE "RPC: server %s requires stronger "
-                              "authentication.\n", task->tk_client->cl_server);
+                              "authentication.\n",
+                              rcu_dereference(clnt->cl_xprt)->servername);
+                       rcu_read_unlock();
                        break;
                default:
                        dprintk("RPC: %5u %s: unknown auth error: %x\n",
@@ -1731,28 +2051,27 @@ rpc_verify_header(struct rpc_task *task)
        case RPC_SUCCESS:
                return p;
        case RPC_PROG_UNAVAIL:
-               dprintk("RPC: %5u %s: program %u is unsupported by server %s\n",
-                               task->tk_pid, __func__,
-                               (unsigned int)task->tk_client->cl_prog,
-                               task->tk_client->cl_server);
+               dprintk_rcu("RPC: %5u %s: program %u is unsupported "
+                               "by server %s\n", task->tk_pid, __func__,
+                               (unsigned int)clnt->cl_prog,
+                               rcu_dereference(clnt->cl_xprt)->servername);
                error = -EPFNOSUPPORT;
                goto out_err;
        case RPC_PROG_MISMATCH:
-               dprintk("RPC: %5u %s: program %u, version %u unsupported by "
-                               "server %s\n", task->tk_pid, __func__,
-                               (unsigned int)task->tk_client->cl_prog,
-                               (unsigned int)task->tk_client->cl_vers,
-                               task->tk_client->cl_server);
+               dprintk_rcu("RPC: %5u %s: program %u, version %u unsupported "
+                               "by server %s\n", task->tk_pid, __func__,
+                               (unsigned int)clnt->cl_prog,
+                               (unsigned int)clnt->cl_vers,
+                               rcu_dereference(clnt->cl_xprt)->servername);
                error = -EPROTONOSUPPORT;
                goto out_err;
        case RPC_PROC_UNAVAIL:
-               dprintk("RPC: %5u %s: proc %s unsupported by program %u, "
+               dprintk_rcu("RPC: %5u %s: proc %s unsupported by program %u, "
                                "version %u on server %s\n",
                                task->tk_pid, __func__,
                                rpc_proc_name(task),
-                               task->tk_client->cl_prog,
-                               task->tk_client->cl_vers,
-                               task->tk_client->cl_server);
+                               clnt->cl_prog, clnt->cl_vers,
+                               rcu_dereference(clnt->cl_xprt)->servername);
                error = -EOPNOTSUPP;
                goto out_err;
        case RPC_GARBAGE_ARGS:
@@ -1766,7 +2085,7 @@ rpc_verify_header(struct rpc_task *task)
        }
 
 out_garbage:
-       task->tk_client->cl_stats->rpcgarbage++;
+       clnt->cl_stats->rpcgarbage++;
        if (task->tk_garb_retry) {
                task->tk_garb_retry--;
                dprintk("RPC: %5u %s: retrying\n",
@@ -1852,14 +2171,15 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
                task->tk_action, rpc_waitq);
 }
 
-void rpc_show_tasks(void)
+void rpc_show_tasks(struct net *net)
 {
        struct rpc_clnt *clnt;
        struct rpc_task *task;
        int header = 0;
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
-       spin_lock(&rpc_client_lock);
-       list_for_each_entry(clnt, &all_clients, cl_clients) {
+       spin_lock(&sn->rpc_client_lock);
+       list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
                spin_lock(&clnt->cl_lock);
                list_for_each_entry(task, &clnt->cl_tasks, tk_task) {
                        if (!header) {
@@ -1870,6 +2190,6 @@ void rpc_show_tasks(void)
                }
                spin_unlock(&clnt->cl_lock);
        }
-       spin_unlock(&rpc_client_lock);
+       spin_unlock(&sn->rpc_client_lock);
 }
 #endif
index d013bf211caeb0ca87874ee21d741466d359ea85..ce7bd449173dc3e1095d28909f5c23879cc0c477 100644 (file)
@@ -9,6 +9,20 @@ struct cache_detail;
 struct sunrpc_net {
        struct proc_dir_entry *proc_net_rpc;
        struct cache_detail *ip_map_cache;
+       struct cache_detail *unix_gid_cache;
+       struct cache_detail *rsc_cache;
+       struct cache_detail *rsi_cache;
+
+       struct super_block *pipefs_sb;
+       struct mutex pipefs_sb_lock;
+
+       struct list_head all_clients;
+       spinlock_t rpc_client_lock;
+
+       struct rpc_clnt *rpcb_local_clnt;
+       struct rpc_clnt *rpcb_local_clnt4;
+       spinlock_t rpcb_clnt_lock;
+       unsigned int rpcb_users;
 };
 
 extern int sunrpc_net_id;
index 7d6dd6efbdbe33020985ebccc4cad492f867f475..c84c0e0c41cb39dd41d37c3cc23c875b98c1e4d7 100644 (file)
@@ -16,9 +16,9 @@
 #include <linux/namei.h>
 #include <linux/fsnotify.h>
 #include <linux/kernel.h>
+#include <linux/rcupdate.h>
 
 #include <asm/ioctls.h>
-#include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/wait.h>
 #include <linux/seq_file.h>
 #include <linux/workqueue.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/sunrpc/cache.h>
+#include <linux/nsproxy.h>
+#include <linux/notifier.h>
 
-static struct vfsmount *rpc_mnt __read_mostly;
-static int rpc_mount_count;
+#include "netns.h"
+#include "sunrpc.h"
+
+#define RPCDBG_FACILITY RPCDBG_DEBUG
+
+#define NET_NAME(net)  ((net == &init_net) ? " (init_net)" : "")
 
 static struct file_system_type rpc_pipe_fs_type;
 
@@ -38,7 +44,21 @@ static struct kmem_cache *rpc_inode_cachep __read_mostly;
 
 #define RPC_UPCALL_TIMEOUT (30*HZ)
 
-static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head,
+static BLOCKING_NOTIFIER_HEAD(rpc_pipefs_notifier_list);
+
+int rpc_pipefs_notifier_register(struct notifier_block *nb)
+{
+       return blocking_notifier_chain_cond_register(&rpc_pipefs_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_register);
+
+void rpc_pipefs_notifier_unregister(struct notifier_block *nb)
+{
+       blocking_notifier_chain_unregister(&rpc_pipefs_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_unregister);
+
+static void rpc_purge_list(wait_queue_head_t *waitq, struct list_head *head,
                void (*destroy_msg)(struct rpc_pipe_msg *), int err)
 {
        struct rpc_pipe_msg *msg;
@@ -51,30 +71,31 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head,
                msg->errno = err;
                destroy_msg(msg);
        } while (!list_empty(head));
-       wake_up(&rpci->waitq);
+       wake_up(waitq);
 }
 
 static void
 rpc_timeout_upcall_queue(struct work_struct *work)
 {
        LIST_HEAD(free_list);
-       struct rpc_inode *rpci =
-               container_of(work, struct rpc_inode, queue_timeout.work);
-       struct inode *inode = &rpci->vfs_inode;
+       struct rpc_pipe *pipe =
+               container_of(work, struct rpc_pipe, queue_timeout.work);
        void (*destroy_msg)(struct rpc_pipe_msg *);
+       struct dentry *dentry;
 
-       spin_lock(&inode->i_lock);
-       if (rpci->ops == NULL) {
-               spin_unlock(&inode->i_lock);
-               return;
+       spin_lock(&pipe->lock);
+       destroy_msg = pipe->ops->destroy_msg;
+       if (pipe->nreaders == 0) {
+               list_splice_init(&pipe->pipe, &free_list);
+               pipe->pipelen = 0;
        }
-       destroy_msg = rpci->ops->destroy_msg;
-       if (rpci->nreaders == 0) {
-               list_splice_init(&rpci->pipe, &free_list);
-               rpci->pipelen = 0;
+       dentry = dget(pipe->dentry);
+       spin_unlock(&pipe->lock);
+       if (dentry) {
+               rpc_purge_list(&RPC_I(dentry->d_inode)->waitq,
+                              &free_list, destroy_msg, -ETIMEDOUT);
+               dput(dentry);
        }
-       spin_unlock(&inode->i_lock);
-       rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT);
 }
 
 ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg,
@@ -108,30 +129,31 @@ EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall);
  * initialize the fields of @msg (other than @msg->list) appropriately.
  */
 int
-rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
+rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg)
 {
-       struct rpc_inode *rpci = RPC_I(inode);
        int res = -EPIPE;
+       struct dentry *dentry;
 
-       spin_lock(&inode->i_lock);
-       if (rpci->ops == NULL)
-               goto out;
-       if (rpci->nreaders) {
-               list_add_tail(&msg->list, &rpci->pipe);
-               rpci->pipelen += msg->len;
+       spin_lock(&pipe->lock);
+       if (pipe->nreaders) {
+               list_add_tail(&msg->list, &pipe->pipe);
+               pipe->pipelen += msg->len;
                res = 0;
-       } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) {
-               if (list_empty(&rpci->pipe))
+       } else if (pipe->flags & RPC_PIPE_WAIT_FOR_OPEN) {
+               if (list_empty(&pipe->pipe))
                        queue_delayed_work(rpciod_workqueue,
-                                       &rpci->queue_timeout,
+                                       &pipe->queue_timeout,
                                        RPC_UPCALL_TIMEOUT);
-               list_add_tail(&msg->list, &rpci->pipe);
-               rpci->pipelen += msg->len;
+               list_add_tail(&msg->list, &pipe->pipe);
+               pipe->pipelen += msg->len;
                res = 0;
        }
-out:
-       spin_unlock(&inode->i_lock);
-       wake_up(&rpci->waitq);
+       dentry = dget(pipe->dentry);
+       spin_unlock(&pipe->lock);
+       if (dentry) {
+               wake_up(&RPC_I(dentry->d_inode)->waitq);
+               dput(dentry);
+       }
        return res;
 }
 EXPORT_SYMBOL_GPL(rpc_queue_upcall);
@@ -145,29 +167,26 @@ rpc_inode_setowner(struct inode *inode, void *private)
 static void
 rpc_close_pipes(struct inode *inode)
 {
-       struct rpc_inode *rpci = RPC_I(inode);
-       const struct rpc_pipe_ops *ops;
+       struct rpc_pipe *pipe = RPC_I(inode)->pipe;
        int need_release;
+       LIST_HEAD(free_list);
 
        mutex_lock(&inode->i_mutex);
-       ops = rpci->ops;
-       if (ops != NULL) {
-               LIST_HEAD(free_list);
-               spin_lock(&inode->i_lock);
-               need_release = rpci->nreaders != 0 || rpci->nwriters != 0;
-               rpci->nreaders = 0;
-               list_splice_init(&rpci->in_upcall, &free_list);
-               list_splice_init(&rpci->pipe, &free_list);
-               rpci->pipelen = 0;
-               rpci->ops = NULL;
-               spin_unlock(&inode->i_lock);
-               rpc_purge_list(rpci, &free_list, ops->destroy_msg, -EPIPE);
-               rpci->nwriters = 0;
-               if (need_release && ops->release_pipe)
-                       ops->release_pipe(inode);
-               cancel_delayed_work_sync(&rpci->queue_timeout);
-       }
+       spin_lock(&pipe->lock);
+       need_release = pipe->nreaders != 0 || pipe->nwriters != 0;
+       pipe->nreaders = 0;
+       list_splice_init(&pipe->in_upcall, &free_list);
+       list_splice_init(&pipe->pipe, &free_list);
+       pipe->pipelen = 0;
+       pipe->dentry = NULL;
+       spin_unlock(&pipe->lock);
+       rpc_purge_list(&RPC_I(inode)->waitq, &free_list, pipe->ops->destroy_msg, -EPIPE);
+       pipe->nwriters = 0;
+       if (need_release && pipe->ops->release_pipe)
+               pipe->ops->release_pipe(inode);
+       cancel_delayed_work_sync(&pipe->queue_timeout);
        rpc_inode_setowner(inode, NULL);
+       RPC_I(inode)->pipe = NULL;
        mutex_unlock(&inode->i_mutex);
 }
 
@@ -197,23 +216,24 @@ rpc_destroy_inode(struct inode *inode)
 static int
 rpc_pipe_open(struct inode *inode, struct file *filp)
 {
-       struct rpc_inode *rpci = RPC_I(inode);
+       struct rpc_pipe *pipe;
        int first_open;
        int res = -ENXIO;
 
        mutex_lock(&inode->i_mutex);
-       if (rpci->ops == NULL)
+       pipe = RPC_I(inode)->pipe;
+       if (pipe == NULL)
                goto out;
-       first_open = rpci->nreaders == 0 && rpci->nwriters == 0;
-       if (first_open && rpci->ops->open_pipe) {
-               res = rpci->ops->open_pipe(inode);
+       first_open = pipe->nreaders == 0 && pipe->nwriters == 0;
+       if (first_open && pipe->ops->open_pipe) {
+               res = pipe->ops->open_pipe(inode);
                if (res)
                        goto out;
        }
        if (filp->f_mode & FMODE_READ)
-               rpci->nreaders++;
+               pipe->nreaders++;
        if (filp->f_mode & FMODE_WRITE)
-               rpci->nwriters++;
+               pipe->nwriters++;
        res = 0;
 out:
        mutex_unlock(&inode->i_mutex);
@@ -223,38 +243,39 @@ out:
 static int
 rpc_pipe_release(struct inode *inode, struct file *filp)
 {
-       struct rpc_inode *rpci = RPC_I(inode);
+       struct rpc_pipe *pipe;
        struct rpc_pipe_msg *msg;
        int last_close;
 
        mutex_lock(&inode->i_mutex);
-       if (rpci->ops == NULL)
+       pipe = RPC_I(inode)->pipe;
+       if (pipe == NULL)
                goto out;
        msg = filp->private_data;
        if (msg != NULL) {
-               spin_lock(&inode->i_lock);
+               spin_lock(&pipe->lock);
                msg->errno = -EAGAIN;
                list_del_init(&msg->list);
-               spin_unlock(&inode->i_lock);
-               rpci->ops->destroy_msg(msg);
+               spin_unlock(&pipe->lock);
+               pipe->ops->destroy_msg(msg);
        }
        if (filp->f_mode & FMODE_WRITE)
-               rpci->nwriters --;
+               pipe->nwriters --;
        if (filp->f_mode & FMODE_READ) {
-               rpci->nreaders --;
-               if (rpci->nreaders == 0) {
+               pipe->nreaders --;
+               if (pipe->nreaders == 0) {
                        LIST_HEAD(free_list);
-                       spin_lock(&inode->i_lock);
-                       list_splice_init(&rpci->pipe, &free_list);
-                       rpci->pipelen = 0;
-                       spin_unlock(&inode->i_lock);
-                       rpc_purge_list(rpci, &free_list,
-                                       rpci->ops->destroy_msg, -EAGAIN);
+                       spin_lock(&pipe->lock);
+                       list_splice_init(&pipe->pipe, &free_list);
+                       pipe->pipelen = 0;
+                       spin_unlock(&pipe->lock);
+                       rpc_purge_list(&RPC_I(inode)->waitq, &free_list,
+                                       pipe->ops->destroy_msg, -EAGAIN);
                }
        }
-       last_close = rpci->nwriters == 0 && rpci->nreaders == 0;
-       if (last_close && rpci->ops->release_pipe)
-               rpci->ops->release_pipe(inode);
+       last_close = pipe->nwriters == 0 && pipe->nreaders == 0;
+       if (last_close && pipe->ops->release_pipe)
+               pipe->ops->release_pipe(inode);
 out:
        mutex_unlock(&inode->i_mutex);
        return 0;
@@ -264,39 +285,40 @@ static ssize_t
 rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
 {
        struct inode *inode = filp->f_path.dentry->d_inode;
-       struct rpc_inode *rpci = RPC_I(inode);
+       struct rpc_pipe *pipe;
        struct rpc_pipe_msg *msg;
        int res = 0;
 
        mutex_lock(&inode->i_mutex);
-       if (rpci->ops == NULL) {
+       pipe = RPC_I(inode)->pipe;
+       if (pipe == NULL) {
                res = -EPIPE;
                goto out_unlock;
        }
        msg = filp->private_data;
        if (msg == NULL) {
-               spin_lock(&inode->i_lock);
-               if (!list_empty(&rpci->pipe)) {
-                       msg = list_entry(rpci->pipe.next,
+               spin_lock(&pipe->lock);
+               if (!list_empty(&pipe->pipe)) {
+                       msg = list_entry(pipe->pipe.next,
                                        struct rpc_pipe_msg,
                                        list);
-                       list_move(&msg->list, &rpci->in_upcall);
-                       rpci->pipelen -= msg->len;
+                       list_move(&msg->list, &pipe->in_upcall);
+                       pipe->pipelen -= msg->len;
                        filp->private_data = msg;
                        msg->copied = 0;
                }
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&pipe->lock);
                if (msg == NULL)
                        goto out_unlock;
        }
        /* NOTE: it is up to the callback to update msg->copied */
-       res = rpci->ops->upcall(filp, msg, buf, len);
+       res = pipe->ops->upcall(filp, msg, buf, len);
        if (res < 0 || msg->len == msg->copied) {
                filp->private_data = NULL;
-               spin_lock(&inode->i_lock);
+               spin_lock(&pipe->lock);
                list_del_init(&msg->list);
-               spin_unlock(&inode->i_lock);
-               rpci->ops->destroy_msg(msg);
+               spin_unlock(&pipe->lock);
+               pipe->ops->destroy_msg(msg);
        }
 out_unlock:
        mutex_unlock(&inode->i_mutex);
@@ -307,13 +329,12 @@ static ssize_t
 rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset)
 {
        struct inode *inode = filp->f_path.dentry->d_inode;
-       struct rpc_inode *rpci = RPC_I(inode);
        int res;
 
        mutex_lock(&inode->i_mutex);
        res = -EPIPE;
-       if (rpci->ops != NULL)
-               res = rpci->ops->downcall(filp, buf, len);
+       if (RPC_I(inode)->pipe != NULL)
+               res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len);
        mutex_unlock(&inode->i_mutex);
        return res;
 }
@@ -321,17 +342,18 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
 static unsigned int
 rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
 {
-       struct rpc_inode *rpci;
-       unsigned int mask = 0;
+       struct inode *inode = filp->f_path.dentry->d_inode;
+       struct rpc_inode *rpci = RPC_I(inode);
+       unsigned int mask = POLLOUT | POLLWRNORM;
 
-       rpci = RPC_I(filp->f_path.dentry->d_inode);
        poll_wait(filp, &rpci->waitq, wait);
 
-       mask = POLLOUT | POLLWRNORM;
-       if (rpci->ops == NULL)
+       mutex_lock(&inode->i_mutex);
+       if (rpci->pipe == NULL)
                mask |= POLLERR | POLLHUP;
-       if (filp->private_data || !list_empty(&rpci->pipe))
+       else if (filp->private_data || !list_empty(&rpci->pipe->pipe))
                mask |= POLLIN | POLLRDNORM;
+       mutex_unlock(&inode->i_mutex);
        return mask;
 }
 
@@ -339,23 +361,26 @@ static long
 rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        struct inode *inode = filp->f_path.dentry->d_inode;
-       struct rpc_inode *rpci = RPC_I(inode);
+       struct rpc_pipe *pipe;
        int len;
 
        switch (cmd) {
        case FIONREAD:
-               spin_lock(&inode->i_lock);
-               if (rpci->ops == NULL) {
-                       spin_unlock(&inode->i_lock);
+               mutex_lock(&inode->i_mutex);
+               pipe = RPC_I(inode)->pipe;
+               if (pipe == NULL) {
+                       mutex_unlock(&inode->i_mutex);
                        return -EPIPE;
                }
-               len = rpci->pipelen;
+               spin_lock(&pipe->lock);
+               len = pipe->pipelen;
                if (filp->private_data) {
                        struct rpc_pipe_msg *msg;
                        msg = filp->private_data;
                        len += msg->len - msg->copied;
                }
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&pipe->lock);
+               mutex_unlock(&inode->i_mutex);
                return put_user(len, (int __user *)arg);
        default:
                return -EINVAL;
@@ -378,12 +403,15 @@ rpc_show_info(struct seq_file *m, void *v)
 {
        struct rpc_clnt *clnt = m->private;
 
-       seq_printf(m, "RPC server: %s\n", clnt->cl_server);
+       rcu_read_lock();
+       seq_printf(m, "RPC server: %s\n",
+                       rcu_dereference(clnt->cl_xprt)->servername);
        seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
                        clnt->cl_prog, clnt->cl_vers);
        seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
        seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
        seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT));
+       rcu_read_unlock();
        return 0;
 }
 
@@ -440,23 +468,6 @@ struct rpc_filelist {
        umode_t mode;
 };
 
-struct vfsmount *rpc_get_mount(void)
-{
-       int err;
-
-       err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mnt, &rpc_mount_count);
-       if (err != 0)
-               return ERR_PTR(err);
-       return rpc_mnt;
-}
-EXPORT_SYMBOL_GPL(rpc_get_mount);
-
-void rpc_put_mount(void)
-{
-       simple_release_fs(&rpc_mnt, &rpc_mount_count);
-}
-EXPORT_SYMBOL_GPL(rpc_put_mount);
-
 static int rpc_delete_dentry(const struct dentry *dentry)
 {
        return 1;
@@ -540,12 +551,47 @@ static int __rpc_mkdir(struct inode *dir, struct dentry *dentry,
        return 0;
 }
 
-static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry,
-                       umode_t mode,
-                       const struct file_operations *i_fop,
-                       void *private,
-                       const struct rpc_pipe_ops *ops,
-                       int flags)
+static void
+init_pipe(struct rpc_pipe *pipe)
+{
+       pipe->nreaders = 0;
+       pipe->nwriters = 0;
+       INIT_LIST_HEAD(&pipe->in_upcall);
+       INIT_LIST_HEAD(&pipe->in_downcall);
+       INIT_LIST_HEAD(&pipe->pipe);
+       pipe->pipelen = 0;
+       INIT_DELAYED_WORK(&pipe->queue_timeout,
+                           rpc_timeout_upcall_queue);
+       pipe->ops = NULL;
+       spin_lock_init(&pipe->lock);
+       pipe->dentry = NULL;
+}
+
+void rpc_destroy_pipe_data(struct rpc_pipe *pipe)
+{
+       kfree(pipe);
+}
+EXPORT_SYMBOL_GPL(rpc_destroy_pipe_data);
+
+struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags)
+{
+       struct rpc_pipe *pipe;
+
+       pipe = kzalloc(sizeof(struct rpc_pipe), GFP_KERNEL);
+       if (!pipe)
+               return ERR_PTR(-ENOMEM);
+       init_pipe(pipe);
+       pipe->ops = ops;
+       pipe->flags = flags;
+       return pipe;
+}
+EXPORT_SYMBOL_GPL(rpc_mkpipe_data);
+
+static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry,
+                              umode_t mode,
+                              const struct file_operations *i_fop,
+                              void *private,
+                              struct rpc_pipe *pipe)
 {
        struct rpc_inode *rpci;
        int err;
@@ -554,10 +600,8 @@ static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry,
        if (err)
                return err;
        rpci = RPC_I(dentry->d_inode);
-       rpci->nkern_readwriters = 1;
        rpci->private = private;
-       rpci->flags = flags;
-       rpci->ops = ops;
+       rpci->pipe = pipe;
        fsnotify_create(dir, dentry);
        return 0;
 }
@@ -573,6 +617,22 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
        return ret;
 }
 
+int rpc_rmdir(struct dentry *dentry)
+{
+       struct dentry *parent;
+       struct inode *dir;
+       int error;
+
+       parent = dget_parent(dentry);
+       dir = parent->d_inode;
+       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       error = __rpc_rmdir(dir, dentry);
+       mutex_unlock(&dir->i_mutex);
+       dput(parent);
+       return error;
+}
+EXPORT_SYMBOL_GPL(rpc_rmdir);
+
 static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
 {
        int ret;
@@ -587,16 +647,12 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
 static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry)
 {
        struct inode *inode = dentry->d_inode;
-       struct rpc_inode *rpci = RPC_I(inode);
 
-       rpci->nkern_readwriters--;
-       if (rpci->nkern_readwriters != 0)
-               return 0;
        rpc_close_pipes(inode);
        return __rpc_unlink(dir, dentry);
 }
 
-static struct dentry *__rpc_lookup_create(struct dentry *parent,
+static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
                                          struct qstr *name)
 {
        struct dentry *dentry;
@@ -604,27 +660,13 @@ static struct dentry *__rpc_lookup_create(struct dentry *parent,
        dentry = d_lookup(parent, name);
        if (!dentry) {
                dentry = d_alloc(parent, name);
-               if (!dentry) {
-                       dentry = ERR_PTR(-ENOMEM);
-                       goto out_err;
-               }
+               if (!dentry)
+                       return ERR_PTR(-ENOMEM);
        }
-       if (!dentry->d_inode)
+       if (dentry->d_inode == NULL) {
                d_set_d_op(dentry, &rpc_dentry_operations);
-out_err:
-       return dentry;
-}
-
-static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
-                                         struct qstr *name)
-{
-       struct dentry *dentry;
-
-       dentry = __rpc_lookup_create(parent, name);
-       if (IS_ERR(dentry))
-               return dentry;
-       if (dentry->d_inode == NULL)
                return dentry;
+       }
        dput(dentry);
        return ERR_PTR(-EEXIST);
 }
@@ -779,7 +821,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
  * @private: private data to associate with the pipe, for the caller's use
  * @ops: operations defining the behavior of the pipe: upcall, downcall,
  *     release_pipe, open_pipe, and destroy_msg.
- * @flags: rpc_inode flags
+ * @flags: rpc_pipe flags
  *
  * Data is made available for userspace to read by calls to
  * rpc_queue_upcall().  The actual reads will result in calls to
@@ -792,9 +834,8 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
  * The @private argument passed here will be available to all these methods
  * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private.
  */
-struct dentry *rpc_mkpipe(struct dentry *parent, const char *name,
-                         void *private, const struct rpc_pipe_ops *ops,
-                         int flags)
+struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
+                                void *private, struct rpc_pipe *pipe)
 {
        struct dentry *dentry;
        struct inode *dir = parent->d_inode;
@@ -802,9 +843,9 @@ struct dentry *rpc_mkpipe(struct dentry *parent, const char *name,
        struct qstr q;
        int err;
 
-       if (ops->upcall == NULL)
+       if (pipe->ops->upcall == NULL)
                umode &= ~S_IRUGO;
-       if (ops->downcall == NULL)
+       if (pipe->ops->downcall == NULL)
                umode &= ~S_IWUGO;
 
        q.name = name;
@@ -812,24 +853,11 @@ struct dentry *rpc_mkpipe(struct dentry *parent, const char *name,
        q.hash = full_name_hash(q.name, q.len),
 
        mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
-       dentry = __rpc_lookup_create(parent, &q);
+       dentry = __rpc_lookup_create_exclusive(parent, &q);
        if (IS_ERR(dentry))
                goto out;
-       if (dentry->d_inode) {
-               struct rpc_inode *rpci = RPC_I(dentry->d_inode);
-               if (rpci->private != private ||
-                               rpci->ops != ops ||
-                               rpci->flags != flags) {
-                       dput (dentry);
-                       err = -EBUSY;
-                       goto out_err;
-               }
-               rpci->nkern_readwriters++;
-               goto out;
-       }
-
-       err = __rpc_mkpipe(dir, dentry, umode, &rpc_pipe_fops,
-                          private, ops, flags);
+       err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops,
+                                 private, pipe);
        if (err)
                goto out_err;
 out:
@@ -842,7 +870,7 @@ out_err:
                        err);
        goto out;
 }
-EXPORT_SYMBOL_GPL(rpc_mkpipe);
+EXPORT_SYMBOL_GPL(rpc_mkpipe_dentry);
 
 /**
  * rpc_unlink - remove a pipe
@@ -915,7 +943,7 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry,
 
 /**
  * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir()
- * @dentry: directory to remove
+ * @clnt: rpc client
  */
 int rpc_remove_client_dir(struct dentry *dentry)
 {
@@ -1020,11 +1048,64 @@ static const struct rpc_filelist files[] = {
        },
 };
 
+/*
+ * This call can be used only in RPC pipefs mount notification hooks.
+ */
+struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
+                              const unsigned char *dir_name)
+{
+       struct qstr dir = {
+               .name = dir_name,
+               .len = strlen(dir_name),
+               .hash = full_name_hash(dir_name, strlen(dir_name)),
+       };
+
+       return d_lookup(sb->s_root, &dir);
+}
+EXPORT_SYMBOL_GPL(rpc_d_lookup_sb);
+
+void rpc_pipefs_init_net(struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+       mutex_init(&sn->pipefs_sb_lock);
+}
+
+/*
+ * This call will be used for per network namespace operations calls.
+ * Note: Function will be returned with pipefs_sb_lock taken if superblock was
+ * found. This lock have to be released by rpc_put_sb_net() when all operations
+ * will be completed.
+ */
+struct super_block *rpc_get_sb_net(const struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+       mutex_lock(&sn->pipefs_sb_lock);
+       if (sn->pipefs_sb)
+               return sn->pipefs_sb;
+       mutex_unlock(&sn->pipefs_sb_lock);
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(rpc_get_sb_net);
+
+void rpc_put_sb_net(const struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+       BUG_ON(sn->pipefs_sb == NULL);
+       mutex_unlock(&sn->pipefs_sb_lock);
+}
+EXPORT_SYMBOL_GPL(rpc_put_sb_net);
+
 static int
 rpc_fill_super(struct super_block *sb, void *data, int silent)
 {
        struct inode *inode;
        struct dentry *root;
+       struct net *net = data;
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       int err;
 
        sb->s_blocksize = PAGE_CACHE_SIZE;
        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -1038,21 +1119,54 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
                return -ENOMEM;
        if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
                return -ENOMEM;
+       dprintk("RPC:   sending pipefs MOUNT notification for net %p%s\n", net,
+                                                               NET_NAME(net));
+       err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
+                                          RPC_PIPEFS_MOUNT,
+                                          sb);
+       if (err)
+               goto err_depopulate;
+       sb->s_fs_info = get_net(net);
+       sn->pipefs_sb = sb;
        return 0;
+
+err_depopulate:
+       blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
+                                          RPC_PIPEFS_UMOUNT,
+                                          sb);
+       __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF);
+       return err;
 }
 
 static struct dentry *
 rpc_mount(struct file_system_type *fs_type,
                int flags, const char *dev_name, void *data)
 {
-       return mount_single(fs_type, flags, data, rpc_fill_super);
+       return mount_ns(fs_type, flags, current->nsproxy->net_ns, rpc_fill_super);
+}
+
+static void rpc_kill_sb(struct super_block *sb)
+{
+       struct net *net = sb->s_fs_info;
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+       mutex_lock(&sn->pipefs_sb_lock);
+       sn->pipefs_sb = NULL;
+       mutex_unlock(&sn->pipefs_sb_lock);
+       put_net(net);
+       dprintk("RPC:   sending pipefs UMOUNT notification for net %p%s\n", net,
+                                                               NET_NAME(net));
+       blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
+                                          RPC_PIPEFS_UMOUNT,
+                                          sb);
+       kill_litter_super(sb);
 }
 
 static struct file_system_type rpc_pipe_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "rpc_pipefs",
        .mount          = rpc_mount,
-       .kill_sb        = kill_litter_super,
+       .kill_sb        = rpc_kill_sb,
 };
 
 static void
@@ -1062,16 +1176,8 @@ init_once(void *foo)
 
        inode_init_once(&rpci->vfs_inode);
        rpci->private = NULL;
-       rpci->nreaders = 0;
-       rpci->nwriters = 0;
-       INIT_LIST_HEAD(&rpci->in_upcall);
-       INIT_LIST_HEAD(&rpci->in_downcall);
-       INIT_LIST_HEAD(&rpci->pipe);
-       rpci->pipelen = 0;
+       rpci->pipe = NULL;
        init_waitqueue_head(&rpci->waitq);
-       INIT_DELAYED_WORK(&rpci->queue_timeout,
-                           rpc_timeout_upcall_queue);
-       rpci->ops = NULL;
 }
 
 int register_rpc_pipefs(void)
@@ -1085,17 +1191,24 @@ int register_rpc_pipefs(void)
                                init_once);
        if (!rpc_inode_cachep)
                return -ENOMEM;
+       err = rpc_clients_notifier_register();
+       if (err)
+               goto err_notifier;
        err = register_filesystem(&rpc_pipe_fs_type);
-       if (err) {
-               kmem_cache_destroy(rpc_inode_cachep);
-               return err;
-       }
-
+       if (err)
+               goto err_register;
        return 0;
+
+err_register:
+       rpc_clients_notifier_unregister();
+err_notifier:
+       kmem_cache_destroy(rpc_inode_cachep);
+       return err;
 }
 
 void unregister_rpc_pipefs(void)
 {
+       rpc_clients_notifier_unregister();
        kmem_cache_destroy(rpc_inode_cachep);
        unregister_filesystem(&rpc_pipe_fs_type);
 }
index 8761bf8e36fc3cb8b41348d879093461c6ce98db..207a74696c9f84a62df704c94190ed28474c23b0 100644 (file)
 #include <linux/errno.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/nsproxy.h>
 #include <net/ipv6.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xprtsock.h>
 
+#include "netns.h"
+
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY       RPCDBG_BIND
 #endif
@@ -109,13 +112,7 @@ enum {
 
 static void                    rpcb_getport_done(struct rpc_task *, void *);
 static void                    rpcb_map_release(void *data);
-static struct rpc_program      rpcb_program;
-
-static struct rpc_clnt *       rpcb_local_clnt;
-static struct rpc_clnt *       rpcb_local_clnt4;
-
-DEFINE_SPINLOCK(rpcb_clnt_lock);
-unsigned int                   rpcb_users;
+static const struct rpc_program        rpcb_program;
 
 struct rpcbind_args {
        struct rpc_xprt *       r_xprt;
@@ -140,8 +137,8 @@ struct rpcb_info {
        struct rpc_procinfo *   rpc_proc;
 };
 
-static struct rpcb_info rpcb_next_version[];
-static struct rpcb_info rpcb_next_version6[];
+static const struct rpcb_info rpcb_next_version[];
+static const struct rpcb_info rpcb_next_version6[];
 
 static const struct rpc_call_ops rpcb_getport_ops = {
        .rpc_call_done          = rpcb_getport_done,
@@ -164,32 +161,34 @@ static void rpcb_map_release(void *data)
        kfree(map);
 }
 
-static int rpcb_get_local(void)
+static int rpcb_get_local(struct net *net)
 {
        int cnt;
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
-       spin_lock(&rpcb_clnt_lock);
-       if (rpcb_users)
-               rpcb_users++;
-       cnt = rpcb_users;
-       spin_unlock(&rpcb_clnt_lock);
+       spin_lock(&sn->rpcb_clnt_lock);
+       if (sn->rpcb_users)
+               sn->rpcb_users++;
+       cnt = sn->rpcb_users;
+       spin_unlock(&sn->rpcb_clnt_lock);
 
        return cnt;
 }
 
-void rpcb_put_local(void)
+void rpcb_put_local(struct net *net)
 {
-       struct rpc_clnt *clnt = rpcb_local_clnt;
-       struct rpc_clnt *clnt4 = rpcb_local_clnt4;
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct rpc_clnt *clnt = sn->rpcb_local_clnt;
+       struct rpc_clnt *clnt4 = sn->rpcb_local_clnt4;
        int shutdown;
 
-       spin_lock(&rpcb_clnt_lock);
-       if (--rpcb_users == 0) {
-               rpcb_local_clnt = NULL;
-               rpcb_local_clnt4 = NULL;
+       spin_lock(&sn->rpcb_clnt_lock);
+       if (--sn->rpcb_users == 0) {
+               sn->rpcb_local_clnt = NULL;
+               sn->rpcb_local_clnt4 = NULL;
        }
-       shutdown = !rpcb_users;
-       spin_unlock(&rpcb_clnt_lock);
+       shutdown = !sn->rpcb_users;
+       spin_unlock(&sn->rpcb_clnt_lock);
 
        if (shutdown) {
                /*
@@ -202,30 +201,34 @@ void rpcb_put_local(void)
        }
 }
 
-static void rpcb_set_local(struct rpc_clnt *clnt, struct rpc_clnt *clnt4)
+static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
+                       struct rpc_clnt *clnt4)
 {
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
        /* Protected by rpcb_create_local_mutex */
-       rpcb_local_clnt = clnt;
-       rpcb_local_clnt4 = clnt4;
+       sn->rpcb_local_clnt = clnt;
+       sn->rpcb_local_clnt4 = clnt4;
        smp_wmb(); 
-       rpcb_users = 1;
+       sn->rpcb_users = 1;
        dprintk("RPC:       created new rpcb local clients (rpcb_local_clnt: "
-                       "%p, rpcb_local_clnt4: %p)\n", rpcb_local_clnt,
-                       rpcb_local_clnt4);
+                       "%p, rpcb_local_clnt4: %p) for net %p%s\n",
+                       sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
+                       net, (net == &init_net) ? " (init_net)" : "");
 }
 
 /*
  * Returns zero on success, otherwise a negative errno value
  * is returned.
  */
-static int rpcb_create_local_unix(void)
+static int rpcb_create_local_unix(struct net *net)
 {
        static const struct sockaddr_un rpcb_localaddr_rpcbind = {
                .sun_family             = AF_LOCAL,
                .sun_path               = RPCBIND_SOCK_PATHNAME,
        };
        struct rpc_create_args args = {
-               .net            = &init_net,
+               .net            = net,
                .protocol       = XPRT_TRANSPORT_LOCAL,
                .address        = (struct sockaddr *)&rpcb_localaddr_rpcbind,
                .addrsize       = sizeof(rpcb_localaddr_rpcbind),
@@ -258,7 +261,7 @@ static int rpcb_create_local_unix(void)
                clnt4 = NULL;
        }
 
-       rpcb_set_local(clnt, clnt4);
+       rpcb_set_local(net, clnt, clnt4);
 
 out:
        return result;
@@ -268,7 +271,7 @@ out:
  * Returns zero on success, otherwise a negative errno value
  * is returned.
  */
-static int rpcb_create_local_net(void)
+static int rpcb_create_local_net(struct net *net)
 {
        static const struct sockaddr_in rpcb_inaddr_loopback = {
                .sin_family             = AF_INET,
@@ -276,7 +279,7 @@ static int rpcb_create_local_net(void)
                .sin_port               = htons(RPCBIND_PORT),
        };
        struct rpc_create_args args = {
-               .net            = &init_net,
+               .net            = net,
                .protocol       = XPRT_TRANSPORT_TCP,
                .address        = (struct sockaddr *)&rpcb_inaddr_loopback,
                .addrsize       = sizeof(rpcb_inaddr_loopback),
@@ -310,7 +313,7 @@ static int rpcb_create_local_net(void)
                clnt4 = NULL;
        }
 
-       rpcb_set_local(clnt, clnt4);
+       rpcb_set_local(net, clnt, clnt4);
 
 out:
        return result;
@@ -320,31 +323,32 @@ out:
  * Returns zero on success, otherwise a negative errno value
  * is returned.
  */
-int rpcb_create_local(void)
+int rpcb_create_local(struct net *net)
 {
        static DEFINE_MUTEX(rpcb_create_local_mutex);
        int result = 0;
 
-       if (rpcb_get_local())
+       if (rpcb_get_local(net))
                return result;
 
        mutex_lock(&rpcb_create_local_mutex);
-       if (rpcb_get_local())
+       if (rpcb_get_local(net))
                goto out;
 
-       if (rpcb_create_local_unix() != 0)
-               result = rpcb_create_local_net();
+       if (rpcb_create_local_unix(net) != 0)
+               result = rpcb_create_local_net(net);
 
 out:
        mutex_unlock(&rpcb_create_local_mutex);
        return result;
 }
 
-static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
-                                   size_t salen, int proto, u32 version)
+static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname,
+                                   struct sockaddr *srvaddr, size_t salen,
+                                   int proto, u32 version)
 {
        struct rpc_create_args args = {
-               .net            = &init_net,
+               .net            = net,
                .protocol       = proto,
                .address        = srvaddr,
                .addrsize       = salen,
@@ -420,7 +424,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg)
  * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6
  * addresses).
  */
-int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
+int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short port)
 {
        struct rpcbind_args map = {
                .r_prog         = prog,
@@ -431,6 +435,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
        struct rpc_message msg = {
                .rpc_argp       = &map,
        };
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
        dprintk("RPC:       %sregistering (%u, %u, %d, %u) with local "
                        "rpcbind\n", (port ? "" : "un"),
@@ -440,13 +445,14 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
        if (port)
                msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
 
-       return rpcb_register_call(rpcb_local_clnt, &msg);
+       return rpcb_register_call(sn->rpcb_local_clnt, &msg);
 }
 
 /*
  * Fill in AF_INET family-specific arguments to register
  */
-static int rpcb_register_inet4(const struct sockaddr *sap,
+static int rpcb_register_inet4(struct sunrpc_net *sn,
+                              const struct sockaddr *sap,
                               struct rpc_message *msg)
 {
        const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
@@ -465,7 +471,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap,
        if (port)
                msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
 
-       result = rpcb_register_call(rpcb_local_clnt4, msg);
+       result = rpcb_register_call(sn->rpcb_local_clnt4, msg);
        kfree(map->r_addr);
        return result;
 }
@@ -473,7 +479,8 @@ static int rpcb_register_inet4(const struct sockaddr *sap,
 /*
  * Fill in AF_INET6 family-specific arguments to register
  */
-static int rpcb_register_inet6(const struct sockaddr *sap,
+static int rpcb_register_inet6(struct sunrpc_net *sn,
+                              const struct sockaddr *sap,
                               struct rpc_message *msg)
 {
        const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
@@ -492,12 +499,13 @@ static int rpcb_register_inet6(const struct sockaddr *sap,
        if (port)
                msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
 
-       result = rpcb_register_call(rpcb_local_clnt4, msg);
+       result = rpcb_register_call(sn->rpcb_local_clnt4, msg);
        kfree(map->r_addr);
        return result;
 }
 
-static int rpcb_unregister_all_protofamilies(struct rpc_message *msg)
+static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn,
+                                            struct rpc_message *msg)
 {
        struct rpcbind_args *map = msg->rpc_argp;
 
@@ -508,7 +516,7 @@ static int rpcb_unregister_all_protofamilies(struct rpc_message *msg)
        map->r_addr = "";
        msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
 
-       return rpcb_register_call(rpcb_local_clnt4, msg);
+       return rpcb_register_call(sn->rpcb_local_clnt4, msg);
 }
 
 /**
@@ -554,7 +562,7 @@ static int rpcb_unregister_all_protofamilies(struct rpc_message *msg)
  * service on any IPv4 address, but not on IPv6.  The latter
  * advertises the service on all IPv4 and IPv6 addresses.
  */
-int rpcb_v4_register(const u32 program, const u32 version,
+int rpcb_v4_register(struct net *net, const u32 program, const u32 version,
                     const struct sockaddr *address, const char *netid)
 {
        struct rpcbind_args map = {
@@ -566,18 +574,19 @@ int rpcb_v4_register(const u32 program, const u32 version,
        struct rpc_message msg = {
                .rpc_argp       = &map,
        };
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
-       if (rpcb_local_clnt4 == NULL)
+       if (sn->rpcb_local_clnt4 == NULL)
                return -EPROTONOSUPPORT;
 
        if (address == NULL)
-               return rpcb_unregister_all_protofamilies(&msg);
+               return rpcb_unregister_all_protofamilies(sn, &msg);
 
        switch (address->sa_family) {
        case AF_INET:
-               return rpcb_register_inet4(address, &msg);
+               return rpcb_register_inet4(sn, address, &msg);
        case AF_INET6:
-               return rpcb_register_inet6(address, &msg);
+               return rpcb_register_inet6(sn, address, &msg);
        }
 
        return -EAFNOSUPPORT;
@@ -611,9 +620,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
 static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
 {
        struct rpc_clnt *parent = clnt->cl_parent;
+       struct rpc_xprt *xprt = rcu_dereference(clnt->cl_xprt);
 
        while (parent != clnt) {
-               if (parent->cl_xprt != clnt->cl_xprt)
+               if (rcu_dereference(parent->cl_xprt) != xprt)
                        break;
                if (clnt->cl_autobind)
                        break;
@@ -644,12 +654,16 @@ void rpcb_getport_async(struct rpc_task *task)
        size_t salen;
        int status;
 
-       clnt = rpcb_find_transport_owner(task->tk_client);
-       xprt = clnt->cl_xprt;
+       rcu_read_lock();
+       do {
+               clnt = rpcb_find_transport_owner(task->tk_client);
+               xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+       } while (xprt == NULL);
+       rcu_read_unlock();
 
        dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
                task->tk_pid, __func__,
-               clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot);
+               xprt->servername, clnt->cl_prog, clnt->cl_vers, xprt->prot);
 
        /* Put self on the wait queue to ensure we get notified if
         * some other task is already attempting to bind the port */
@@ -658,6 +672,7 @@ void rpcb_getport_async(struct rpc_task *task)
        if (xprt_test_and_set_binding(xprt)) {
                dprintk("RPC: %5u %s: waiting for another binder\n",
                        task->tk_pid, __func__);
+               xprt_put(xprt);
                return;
        }
 
@@ -699,8 +714,8 @@ void rpcb_getport_async(struct rpc_task *task)
        dprintk("RPC: %5u %s: trying rpcbind version %u\n",
                task->tk_pid, __func__, bind_version);
 
-       rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot,
-                               bind_version);
+       rpcb_clnt = rpcb_create(xprt->xprt_net, xprt->servername, sap, salen,
+                               xprt->prot, bind_version);
        if (IS_ERR(rpcb_clnt)) {
                status = PTR_ERR(rpcb_clnt);
                dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
@@ -725,7 +740,7 @@ void rpcb_getport_async(struct rpc_task *task)
        switch (bind_version) {
        case RPCBVERS_4:
        case RPCBVERS_3:
-               map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
+               map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID];
                map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC);
                map->r_owner = "";
                break;
@@ -754,6 +769,7 @@ bailout_release_client:
 bailout_nofree:
        rpcb_wake_rpcbind_waiters(xprt, status);
        task->tk_status = status;
+       xprt_put(xprt);
 }
 EXPORT_SYMBOL_GPL(rpcb_getport_async);
 
@@ -801,11 +817,11 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
 static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
                             const struct rpcbind_args *rpcb)
 {
-       struct rpc_task *task = req->rq_task;
        __be32 *p;
 
        dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n",
-                       task->tk_pid, task->tk_msg.rpc_proc->p_name,
+                       req->rq_task->tk_pid,
+                       req->rq_task->tk_msg.rpc_proc->p_name,
                        rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
 
        p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2);
@@ -818,7 +834,6 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
 static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
                            struct rpcbind_args *rpcb)
 {
-       struct rpc_task *task = req->rq_task;
        unsigned long port;
        __be32 *p;
 
@@ -829,8 +844,8 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
                return -EIO;
 
        port = be32_to_cpup(p);
-       dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid,
-                       task->tk_msg.rpc_proc->p_name, port);
+       dprintk("RPC: %5u PMAP_%s result: %lu\n", req->rq_task->tk_pid,
+                       req->rq_task->tk_msg.rpc_proc->p_name, port);
        if (unlikely(port > USHRT_MAX))
                return -EIO;
 
@@ -841,7 +856,6 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
 static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr,
                        unsigned int *boolp)
 {
-       struct rpc_task *task = req->rq_task;
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4);
@@ -853,7 +867,8 @@ static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr,
                *boolp = 1;
 
        dprintk("RPC: %5u RPCB_%s call %s\n",
-                       task->tk_pid, task->tk_msg.rpc_proc->p_name,
+                       req->rq_task->tk_pid,
+                       req->rq_task->tk_msg.rpc_proc->p_name,
                        (*boolp ? "succeeded" : "failed"));
        return 0;
 }
@@ -873,11 +888,11 @@ static void encode_rpcb_string(struct xdr_stream *xdr, const char *string,
 static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
                             const struct rpcbind_args *rpcb)
 {
-       struct rpc_task *task = req->rq_task;
        __be32 *p;
 
        dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n",
-                       task->tk_pid, task->tk_msg.rpc_proc->p_name,
+                       req->rq_task->tk_pid,
+                       req->rq_task->tk_msg.rpc_proc->p_name,
                        rpcb->r_prog, rpcb->r_vers,
                        rpcb->r_netid, rpcb->r_addr);
 
@@ -895,7 +910,6 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 {
        struct sockaddr_storage address;
        struct sockaddr *sap = (struct sockaddr *)&address;
-       struct rpc_task *task = req->rq_task;
        __be32 *p;
        u32 len;
 
@@ -912,7 +926,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
         */
        if (len == 0) {
                dprintk("RPC: %5u RPCB reply: program not registered\n",
-                               task->tk_pid);
+                               req->rq_task->tk_pid);
                return 0;
        }
 
@@ -922,10 +936,11 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
        p = xdr_inline_decode(xdr, len);
        if (unlikely(p == NULL))
                goto out_fail;
-       dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid,
-                       task->tk_msg.rpc_proc->p_name, (char *)p);
+       dprintk("RPC: %5u RPCB_%s reply: %s\n", req->rq_task->tk_pid,
+                       req->rq_task->tk_msg.rpc_proc->p_name, (char *)p);
 
-       if (rpc_uaddr2sockaddr((char *)p, len, sap, sizeof(address)) == 0)
+       if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len,
+                               sap, sizeof(address)) == 0)
                goto out_fail;
        rpcb->r_port = rpc_get_port(sap);
 
@@ -933,7 +948,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 
 out_fail:
        dprintk("RPC: %5u malformed RPCB_%s reply\n",
-                       task->tk_pid, task->tk_msg.rpc_proc->p_name);
+                       req->rq_task->tk_pid,
+                       req->rq_task->tk_msg.rpc_proc->p_name);
        return -EIO;
 }
 
@@ -1041,7 +1057,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
        },
 };
 
-static struct rpcb_info rpcb_next_version[] = {
+static const struct rpcb_info rpcb_next_version[] = {
        {
                .rpc_vers       = RPCBVERS_2,
                .rpc_proc       = &rpcb_procedures2[RPCBPROC_GETPORT],
@@ -1051,7 +1067,7 @@ static struct rpcb_info rpcb_next_version[] = {
        },
 };
 
-static struct rpcb_info rpcb_next_version6[] = {
+static const struct rpcb_info rpcb_next_version6[] = {
        {
                .rpc_vers       = RPCBVERS_4,
                .rpc_proc       = &rpcb_procedures4[RPCBPROC_GETADDR],
@@ -1065,25 +1081,25 @@ static struct rpcb_info rpcb_next_version6[] = {
        },
 };
 
-static struct rpc_version rpcb_version2 = {
+static const struct rpc_version rpcb_version2 = {
        .number         = RPCBVERS_2,
        .nrprocs        = ARRAY_SIZE(rpcb_procedures2),
        .procs          = rpcb_procedures2
 };
 
-static struct rpc_version rpcb_version3 = {
+static const struct rpc_version rpcb_version3 = {
        .number         = RPCBVERS_3,
        .nrprocs        = ARRAY_SIZE(rpcb_procedures3),
        .procs          = rpcb_procedures3
 };
 
-static struct rpc_version rpcb_version4 = {
+static const struct rpc_version rpcb_version4 = {
        .number         = RPCBVERS_4,
        .nrprocs        = ARRAY_SIZE(rpcb_procedures4),
        .procs          = rpcb_procedures4
 };
 
-static struct rpc_version *rpcb_version[] = {
+static const struct rpc_version *rpcb_version[] = {
        NULL,
        NULL,
        &rpcb_version2,
@@ -1093,7 +1109,7 @@ static struct rpc_version *rpcb_version[] = {
 
 static struct rpc_stat rpcb_stats;
 
-static struct rpc_program rpcb_program = {
+static const struct rpc_program rpcb_program = {
        .name           = "rpcbind",
        .number         = RPCBIND_PROGRAM,
        .nrvers         = ARRAY_SIZE(rpcb_version),
index 3341d89627865308be08e38a78c828f29eafa655..994cfea2bad66f814432c2fcbb1227d0a321d34f 100644 (file)
@@ -28,6 +28,9 @@
 #define RPCDBG_FACILITY                RPCDBG_SCHED
 #endif
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/sunrpc.h>
+
 /*
  * RPC slabs and memory pools
  */
@@ -205,9 +208,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
        queue->qlen = 0;
        setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue);
        INIT_LIST_HEAD(&queue->timer_list.list);
-#ifdef RPC_DEBUG
-       queue->name = qname;
-#endif
+       rpc_assign_waitqueue_name(queue, qname);
 }
 
 void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname)
@@ -251,6 +252,8 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
 
 static void rpc_set_active(struct rpc_task *task)
 {
+       trace_rpc_task_begin(task->tk_client, task, NULL);
+
        rpc_task_set_debuginfo(task);
        set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
 }
@@ -267,6 +270,8 @@ static int rpc_complete_task(struct rpc_task *task)
        unsigned long flags;
        int ret;
 
+       trace_rpc_task_complete(task->tk_client, task, NULL);
+
        spin_lock_irqsave(&wq->lock, flags);
        clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
        ret = atomic_dec_and_test(&task->tk_count);
@@ -324,6 +329,8 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
        dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
                        task->tk_pid, rpc_qname(q), jiffies);
 
+       trace_rpc_task_sleep(task->tk_client, task, q);
+
        __rpc_add_wait_queue(q, task, queue_priority);
 
        BUG_ON(task->tk_callback != NULL);
@@ -378,6 +385,8 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task
                return;
        }
 
+       trace_rpc_task_wakeup(task->tk_client, task, queue);
+
        __rpc_remove_wait_queue(queue, task);
 
        rpc_make_runnable(task);
@@ -422,7 +431,7 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
 /*
  * Wake up the next task on a priority queue.
  */
-static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue)
+static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *queue)
 {
        struct list_head *q;
        struct rpc_task *task;
@@ -467,30 +476,54 @@ new_queue:
 new_owner:
        rpc_set_waitqueue_owner(queue, task->tk_owner);
 out:
-       rpc_wake_up_task_queue_locked(queue, task);
        return task;
 }
 
+static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue)
+{
+       if (RPC_IS_PRIORITY(queue))
+               return __rpc_find_next_queued_priority(queue);
+       if (!list_empty(&queue->tasks[0]))
+               return list_first_entry(&queue->tasks[0], struct rpc_task, u.tk_wait.list);
+       return NULL;
+}
+
 /*
- * Wake up the next task on the wait queue.
+ * Wake up the first task on the wait queue.
  */
-struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
+struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue,
+               bool (*func)(struct rpc_task *, void *), void *data)
 {
        struct rpc_task *task = NULL;
 
-       dprintk("RPC:       wake_up_next(%p \"%s\")\n",
+       dprintk("RPC:       wake_up_first(%p \"%s\")\n",
                        queue, rpc_qname(queue));
        spin_lock_bh(&queue->lock);
-       if (RPC_IS_PRIORITY(queue))
-               task = __rpc_wake_up_next_priority(queue);
-       else {
-               task_for_first(task, &queue->tasks[0])
+       task = __rpc_find_next_queued(queue);
+       if (task != NULL) {
+               if (func(task, data))
                        rpc_wake_up_task_queue_locked(queue, task);
+               else
+                       task = NULL;
        }
        spin_unlock_bh(&queue->lock);
 
        return task;
 }
+EXPORT_SYMBOL_GPL(rpc_wake_up_first);
+
+static bool rpc_wake_up_next_func(struct rpc_task *task, void *data)
+{
+       return true;
+}
+
+/*
+ * Wake up the next task on the wait queue.
+*/
+struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue)
+{
+       return rpc_wake_up_first(queue, rpc_wake_up_next_func, NULL);
+}
 EXPORT_SYMBOL_GPL(rpc_wake_up_next);
 
 /**
@@ -501,14 +534,18 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next);
  */
 void rpc_wake_up(struct rpc_wait_queue *queue)
 {
-       struct rpc_task *task, *next;
        struct list_head *head;
 
        spin_lock_bh(&queue->lock);
        head = &queue->tasks[queue->maxpriority];
        for (;;) {
-               list_for_each_entry_safe(task, next, head, u.tk_wait.list)
+               while (!list_empty(head)) {
+                       struct rpc_task *task;
+                       task = list_first_entry(head,
+                                       struct rpc_task,
+                                       u.tk_wait.list);
                        rpc_wake_up_task_queue_locked(queue, task);
+               }
                if (head == &queue->tasks[0])
                        break;
                head--;
@@ -526,13 +563,16 @@ EXPORT_SYMBOL_GPL(rpc_wake_up);
  */
 void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
 {
-       struct rpc_task *task, *next;
        struct list_head *head;
 
        spin_lock_bh(&queue->lock);
        head = &queue->tasks[queue->maxpriority];
        for (;;) {
-               list_for_each_entry_safe(task, next, head, u.tk_wait.list) {
+               while (!list_empty(head)) {
+                       struct rpc_task *task;
+                       task = list_first_entry(head,
+                                       struct rpc_task,
+                                       u.tk_wait.list);
                        task->tk_status = status;
                        rpc_wake_up_task_queue_locked(queue, task);
                }
@@ -677,6 +717,7 @@ static void __rpc_execute(struct rpc_task *task)
                        if (do_action == NULL)
                                break;
                }
+               trace_rpc_task_run_action(task->tk_client, task, task->tk_action);
                do_action(task);
 
                /*
index 80df89d957ba02dce1b977ec19f96b51d2c657b0..bc2068ee795b95d7fdec5d57503ab3919ff3e97d 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/metrics.h>
+#include <linux/rcupdate.h>
 
 #include "netns.h"
 
@@ -133,20 +134,19 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats);
 /**
  * rpc_count_iostats - tally up per-task stats
  * @task: completed rpc_task
+ * @stats: array of stat structures
  *
  * Relies on the caller for serialization.
  */
-void rpc_count_iostats(struct rpc_task *task)
+void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats)
 {
        struct rpc_rqst *req = task->tk_rqstp;
-       struct rpc_iostats *stats;
        struct rpc_iostats *op_metrics;
        ktime_t delta;
 
-       if (!task->tk_client || !task->tk_client->cl_metrics || !req)
+       if (!stats || !req)
                return;
 
-       stats = task->tk_client->cl_metrics;
        op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx];
 
        op_metrics->om_ops++;
@@ -164,6 +164,7 @@ void rpc_count_iostats(struct rpc_task *task)
        delta = ktime_sub(ktime_get(), task->tk_start);
        op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
 }
+EXPORT_SYMBOL_GPL(rpc_count_iostats);
 
 static void _print_name(struct seq_file *seq, unsigned int op,
                        struct rpc_procinfo *procs)
@@ -179,7 +180,7 @@ static void _print_name(struct seq_file *seq, unsigned int op,
 void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
 {
        struct rpc_iostats *stats = clnt->cl_metrics;
-       struct rpc_xprt *xprt = clnt->cl_xprt;
+       struct rpc_xprt *xprt;
        unsigned int op, maxproc = clnt->cl_maxproc;
 
        if (!stats)
@@ -189,8 +190,11 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
        seq_printf(seq, "p/v: %u/%u (%s)\n",
                        clnt->cl_prog, clnt->cl_vers, clnt->cl_protname);
 
+       rcu_read_lock();
+       xprt = rcu_dereference(clnt->cl_xprt);
        if (xprt)
                xprt->ops->print_stats(xprt, seq);
+       rcu_read_unlock();
 
        seq_printf(seq, "\tper-op statistics\n");
        for (op = 0; op < maxproc; op++) {
@@ -213,45 +217,46 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats);
  * Register/unregister RPC proc files
  */
 static inline struct proc_dir_entry *
-do_register(const char *name, void *data, const struct file_operations *fops)
+do_register(struct net *net, const char *name, void *data,
+           const struct file_operations *fops)
 {
        struct sunrpc_net *sn;
 
        dprintk("RPC:       registering /proc/net/rpc/%s\n", name);
-       sn = net_generic(&init_net, sunrpc_net_id);
+       sn = net_generic(net, sunrpc_net_id);
        return proc_create_data(name, 0, sn->proc_net_rpc, fops, data);
 }
 
 struct proc_dir_entry *
-rpc_proc_register(struct rpc_stat *statp)
+rpc_proc_register(struct net *net, struct rpc_stat *statp)
 {
-       return do_register(statp->program->name, statp, &rpc_proc_fops);
+       return do_register(net, statp->program->name, statp, &rpc_proc_fops);
 }
 EXPORT_SYMBOL_GPL(rpc_proc_register);
 
 void
-rpc_proc_unregister(const char *name)
+rpc_proc_unregister(struct net *net, const char *name)
 {
        struct sunrpc_net *sn;
 
-       sn = net_generic(&init_net, sunrpc_net_id);
+       sn = net_generic(net, sunrpc_net_id);
        remove_proc_entry(name, sn->proc_net_rpc);
 }
 EXPORT_SYMBOL_GPL(rpc_proc_unregister);
 
 struct proc_dir_entry *
-svc_proc_register(struct svc_stat *statp, const struct file_operations *fops)
+svc_proc_register(struct net *net, struct svc_stat *statp, const struct file_operations *fops)
 {
-       return do_register(statp->program->pg_name, statp, fops);
+       return do_register(net, statp->program->pg_name, statp, fops);
 }
 EXPORT_SYMBOL_GPL(svc_proc_register);
 
 void
-svc_proc_unregister(const char *name)
+svc_proc_unregister(struct net *net, const char *name)
 {
        struct sunrpc_net *sn;
 
-       sn = net_generic(&init_net, sunrpc_net_id);
+       sn = net_generic(net, sunrpc_net_id);
        remove_proc_entry(name, sn->proc_net_rpc);
 }
 EXPORT_SYMBOL_GPL(svc_proc_unregister);
index 90c292e2738b5f7248db0f267aa63c1b88f7d8b2..14c9f6d1c5ff22987e6dfdbf2ce9a95508ee4696 100644 (file)
@@ -47,5 +47,7 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
                    struct page *headpage, unsigned long headoffset,
                    struct page *tailpage, unsigned long tailoffset);
 
+int rpc_clients_notifier_register(void);
+void rpc_clients_notifier_unregister(void);
 #endif /* _NET_SUNRPC_SUNRPC_H */
 
index 8ec9778c3f4ad959991fe38308e999f93e5adf3d..8adfc88e793a72308f72012bd30e447cd40dd6bf 100644 (file)
 #include "netns.h"
 
 int sunrpc_net_id;
+EXPORT_SYMBOL_GPL(sunrpc_net_id);
 
 static __net_init int sunrpc_init_net(struct net *net)
 {
        int err;
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
        err = rpc_proc_init(net);
        if (err)
@@ -38,8 +40,18 @@ static __net_init int sunrpc_init_net(struct net *net)
        if (err)
                goto err_ipmap;
 
+       err = unix_gid_cache_create(net);
+       if (err)
+               goto err_unixgid;
+
+       rpc_pipefs_init_net(net);
+       INIT_LIST_HEAD(&sn->all_clients);
+       spin_lock_init(&sn->rpc_client_lock);
+       spin_lock_init(&sn->rpcb_clnt_lock);
        return 0;
 
+err_unixgid:
+       ip_map_cache_destroy(net);
 err_ipmap:
        rpc_proc_exit(net);
 err_proc:
@@ -48,6 +60,7 @@ err_proc:
 
 static __net_exit void sunrpc_exit_net(struct net *net)
 {
+       unix_gid_cache_destroy(net);
        ip_map_cache_destroy(net);
        rpc_proc_exit(net);
 }
@@ -59,8 +72,6 @@ static struct pernet_operations sunrpc_net_ops = {
        .size = sizeof(struct sunrpc_net),
 };
 
-extern struct cache_detail unix_gid_cache;
-
 static int __init
 init_sunrpc(void)
 {
@@ -82,7 +93,6 @@ init_sunrpc(void)
 #ifdef RPC_DEBUG
        rpc_register_sysctl();
 #endif
-       cache_register(&unix_gid_cache);
        svc_init_xprt_sock();   /* svc sock transport */
        init_socket_xprt();     /* clnt sock transport */
        return 0;
@@ -105,7 +115,6 @@ cleanup_sunrpc(void)
        svc_cleanup_xprt_sock();
        unregister_rpc_pipefs();
        rpc_destroy_mempool();
-       cache_unregister(&unix_gid_cache);
        unregister_pernet_subsys(&sunrpc_net_ops);
 #ifdef RPC_DEBUG
        rpc_unregister_sysctl();
index e4aabc02368b94e0d7b0109ab7906bfbba329b23..4153846984ac72be3a0f97b1ede45799128be863 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/nsproxy.h>
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/xdr.h>
@@ -30,7 +31,7 @@
 
 #define RPCDBG_FACILITY        RPCDBG_SVCDSP
 
-static void svc_unregister(const struct svc_serv *serv);
+static void svc_unregister(const struct svc_serv *serv, struct net *net);
 
 #define svc_serv_is_pooled(serv)    ((serv)->sv_function)
 
@@ -368,23 +369,24 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
        return &serv->sv_pools[pidx % serv->sv_nrpools];
 }
 
-static int svc_rpcb_setup(struct svc_serv *serv)
+int svc_rpcb_setup(struct svc_serv *serv, struct net *net)
 {
        int err;
 
-       err = rpcb_create_local();
+       err = rpcb_create_local(net);
        if (err)
                return err;
 
        /* Remove any stale portmap registrations */
-       svc_unregister(serv);
+       svc_unregister(serv, net);
        return 0;
 }
+EXPORT_SYMBOL_GPL(svc_rpcb_setup);
 
-void svc_rpcb_cleanup(struct svc_serv *serv)
+void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net)
 {
-       svc_unregister(serv);
-       rpcb_put_local();
+       svc_unregister(serv, net);
+       rpcb_put_local(net);
 }
 EXPORT_SYMBOL_GPL(svc_rpcb_cleanup);
 
@@ -410,7 +412,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv)
  */
 static struct svc_serv *
 __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
-            void (*shutdown)(struct svc_serv *serv))
+            void (*shutdown)(struct svc_serv *serv, struct net *net))
 {
        struct svc_serv *serv;
        unsigned int vers;
@@ -470,7 +472,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
        }
 
        if (svc_uses_rpcbind(serv)) {
-               if (svc_rpcb_setup(serv) < 0) {
+               if (svc_rpcb_setup(serv, current->nsproxy->net_ns) < 0) {
                        kfree(serv->sv_pools);
                        kfree(serv);
                        return NULL;
@@ -484,7 +486,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 
 struct svc_serv *
 svc_create(struct svc_program *prog, unsigned int bufsize,
-          void (*shutdown)(struct svc_serv *serv))
+          void (*shutdown)(struct svc_serv *serv, struct net *net))
 {
        return __svc_create(prog, bufsize, /*npools*/1, shutdown);
 }
@@ -492,7 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create);
 
 struct svc_serv *
 svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
-                 void (*shutdown)(struct svc_serv *serv),
+                 void (*shutdown)(struct svc_serv *serv, struct net *net),
                  svc_thread_fn func, struct module *mod)
 {
        struct svc_serv *serv;
@@ -509,6 +511,24 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
 }
 EXPORT_SYMBOL_GPL(svc_create_pooled);
 
+void svc_shutdown_net(struct svc_serv *serv, struct net *net)
+{
+       /*
+        * The set of xprts (contained in the sv_tempsocks and
+        * sv_permsocks lists) is now constant, since it is modified
+        * only by accepting new sockets (done by service threads in
+        * svc_recv) or aging old ones (done by sv_temptimer), or
+        * configuration changes (excluded by whatever locking the
+        * caller is using--nfsd_mutex in the case of nfsd).  So it's
+        * safe to traverse those lists and shut everything down:
+        */
+       svc_close_net(serv, net);
+
+       if (serv->sv_shutdown)
+               serv->sv_shutdown(serv, net);
+}
+EXPORT_SYMBOL_GPL(svc_shutdown_net);
+
 /*
  * Destroy an RPC service. Should be called with appropriate locking to
  * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
@@ -516,6 +536,8 @@ EXPORT_SYMBOL_GPL(svc_create_pooled);
 void
 svc_destroy(struct svc_serv *serv)
 {
+       struct net *net = current->nsproxy->net_ns;
+
        dprintk("svc: svc_destroy(%s, %d)\n",
                                serv->sv_program->pg_name,
                                serv->sv_nrthreads);
@@ -529,19 +551,15 @@ svc_destroy(struct svc_serv *serv)
                printk("svc_destroy: no threads for serv=%p!\n", serv);
 
        del_timer_sync(&serv->sv_temptimer);
+
+       svc_shutdown_net(serv, net);
+
        /*
-        * The set of xprts (contained in the sv_tempsocks and
-        * sv_permsocks lists) is now constant, since it is modified
-        * only by accepting new sockets (done by service threads in
-        * svc_recv) or aging old ones (done by sv_temptimer), or
-        * configuration changes (excluded by whatever locking the
-        * caller is using--nfsd_mutex in the case of nfsd).  So it's
-        * safe to traverse those lists and shut everything down:
+        * The last user is gone and thus all sockets have to be destroyed to
+        * the point. Check this.
         */
-       svc_close_all(serv);
-
-       if (serv->sv_shutdown)
-               serv->sv_shutdown(serv);
+       BUG_ON(!list_empty(&serv->sv_permsocks));
+       BUG_ON(!list_empty(&serv->sv_tempsocks));
 
        cache_clean_deferred(serv);
 
@@ -795,7 +813,8 @@ EXPORT_SYMBOL_GPL(svc_exit_thread);
  * Returns zero on success; a negative errno value is returned
  * if any error occurs.
  */
-static int __svc_rpcb_register4(const u32 program, const u32 version,
+static int __svc_rpcb_register4(struct net *net, const u32 program,
+                               const u32 version,
                                const unsigned short protocol,
                                const unsigned short port)
 {
@@ -818,7 +837,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
                return -ENOPROTOOPT;
        }
 
-       error = rpcb_v4_register(program, version,
+       error = rpcb_v4_register(net, program, version,
                                        (const struct sockaddr *)&sin, netid);
 
        /*
@@ -826,7 +845,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
         * registration request with the legacy rpcbind v2 protocol.
         */
        if (error == -EPROTONOSUPPORT)
-               error = rpcb_register(program, version, protocol, port);
+               error = rpcb_register(net, program, version, protocol, port);
 
        return error;
 }
@@ -842,7 +861,8 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
  * Returns zero on success; a negative errno value is returned
  * if any error occurs.
  */
-static int __svc_rpcb_register6(const u32 program, const u32 version,
+static int __svc_rpcb_register6(struct net *net, const u32 program,
+                               const u32 version,
                                const unsigned short protocol,
                                const unsigned short port)
 {
@@ -865,7 +885,7 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
                return -ENOPROTOOPT;
        }
 
-       error = rpcb_v4_register(program, version,
+       error = rpcb_v4_register(net, program, version,
                                        (const struct sockaddr *)&sin6, netid);
 
        /*
@@ -885,7 +905,7 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
  * Returns zero on success; a negative errno value is returned
  * if any error occurs.
  */
-static int __svc_register(const char *progname,
+static int __svc_register(struct net *net, const char *progname,
                          const u32 program, const u32 version,
                          const int family,
                          const unsigned short protocol,
@@ -895,12 +915,12 @@ static int __svc_register(const char *progname,
 
        switch (family) {
        case PF_INET:
-               error = __svc_rpcb_register4(program, version,
+               error = __svc_rpcb_register4(net, program, version,
                                                protocol, port);
                break;
 #if IS_ENABLED(CONFIG_IPV6)
        case PF_INET6:
-               error = __svc_rpcb_register6(program, version,
+               error = __svc_rpcb_register6(net, program, version,
                                                protocol, port);
 #endif
        }
@@ -914,14 +934,16 @@ static int __svc_register(const char *progname,
 /**
  * svc_register - register an RPC service with the local portmapper
  * @serv: svc_serv struct for the service to register
+ * @net: net namespace for the service to register
  * @family: protocol family of service's listener socket
  * @proto: transport protocol number to advertise
  * @port: port to advertise
  *
  * Service is registered for any address in the passed-in protocol family
  */
-int svc_register(const struct svc_serv *serv, const int family,
-                const unsigned short proto, const unsigned short port)
+int svc_register(const struct svc_serv *serv, struct net *net,
+                const int family, const unsigned short proto,
+                const unsigned short port)
 {
        struct svc_program      *progp;
        unsigned int            i;
@@ -946,7 +968,7 @@ int svc_register(const struct svc_serv *serv, const int family,
                        if (progp->pg_vers[i]->vs_hidden)
                                continue;
 
-                       error = __svc_register(progp->pg_name, progp->pg_prog,
+                       error = __svc_register(net, progp->pg_name, progp->pg_prog,
                                                i, family, proto, port);
                        if (error < 0)
                                break;
@@ -963,19 +985,19 @@ int svc_register(const struct svc_serv *serv, const int family,
  * any "inet6" entries anyway.  So a PMAP_UNSET should be sufficient
  * in this case to clear all existing entries for [program, version].
  */
-static void __svc_unregister(const u32 program, const u32 version,
+static void __svc_unregister(struct net *net, const u32 program, const u32 version,
                             const char *progname)
 {
        int error;
 
-       error = rpcb_v4_register(program, version, NULL, "");
+       error = rpcb_v4_register(net, program, version, NULL, "");
 
        /*
         * User space didn't support rpcbind v4, so retry this
         * request with the legacy rpcbind v2 protocol.
         */
        if (error == -EPROTONOSUPPORT)
-               error = rpcb_register(program, version, 0, 0);
+               error = rpcb_register(net, program, version, 0, 0);
 
        dprintk("svc: %s(%sv%u), error %d\n",
                        __func__, progname, version, error);
@@ -989,7 +1011,7 @@ static void __svc_unregister(const u32 program, const u32 version,
  * The result of unregistration is reported via dprintk for those who want
  * verification of the result, but is otherwise not important.
  */
-static void svc_unregister(const struct svc_serv *serv)
+static void svc_unregister(const struct svc_serv *serv, struct net *net)
 {
        struct svc_program *progp;
        unsigned long flags;
@@ -1006,7 +1028,7 @@ static void svc_unregister(const struct svc_serv *serv)
 
                        dprintk("svc: attempting to unregister %sv%u\n",
                                progp->pg_name, i);
-                       __svc_unregister(progp->pg_prog, i, progp->pg_name);
+                       __svc_unregister(net, progp->pg_prog, i, progp->pg_name);
                }
        }
 
index 74cb0d8e9ca1f58aae66b85bee5c313cf3473f18..4bda09d7e1a4cc6e5c5eaf24eec5b171ae922613 100644 (file)
@@ -922,48 +922,65 @@ void svc_close_xprt(struct svc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(svc_close_xprt);
 
-static void svc_close_list(struct list_head *xprt_list)
+static void svc_close_list(struct list_head *xprt_list, struct net *net)
 {
        struct svc_xprt *xprt;
 
        list_for_each_entry(xprt, xprt_list, xpt_list) {
+               if (xprt->xpt_net != net)
+                       continue;
                set_bit(XPT_CLOSE, &xprt->xpt_flags);
                set_bit(XPT_BUSY, &xprt->xpt_flags);
        }
 }
 
-void svc_close_all(struct svc_serv *serv)
+static void svc_clear_pools(struct svc_serv *serv, struct net *net)
 {
        struct svc_pool *pool;
        struct svc_xprt *xprt;
        struct svc_xprt *tmp;
        int i;
 
-       svc_close_list(&serv->sv_tempsocks);
-       svc_close_list(&serv->sv_permsocks);
-
        for (i = 0; i < serv->sv_nrpools; i++) {
                pool = &serv->sv_pools[i];
 
                spin_lock_bh(&pool->sp_lock);
-               while (!list_empty(&pool->sp_sockets)) {
-                       xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready);
+               list_for_each_entry_safe(xprt, tmp, &pool->sp_sockets, xpt_ready) {
+                       if (xprt->xpt_net != net)
+                               continue;
                        list_del_init(&xprt->xpt_ready);
                }
                spin_unlock_bh(&pool->sp_lock);
        }
+}
+
+static void svc_clear_list(struct list_head *xprt_list, struct net *net)
+{
+       struct svc_xprt *xprt;
+       struct svc_xprt *tmp;
+
+       list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
+               if (xprt->xpt_net != net)
+                       continue;
+               svc_delete_xprt(xprt);
+       }
+       list_for_each_entry(xprt, xprt_list, xpt_list)
+               BUG_ON(xprt->xpt_net == net);
+}
+
+void svc_close_net(struct svc_serv *serv, struct net *net)
+{
+       svc_close_list(&serv->sv_tempsocks, net);
+       svc_close_list(&serv->sv_permsocks, net);
+
+       svc_clear_pools(serv, net);
        /*
         * At this point the sp_sockets lists will stay empty, since
         * svc_enqueue will not add new entries without taking the
         * sp_lock and checking XPT_BUSY.
         */
-       list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list)
-               svc_delete_xprt(xprt);
-       list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list)
-               svc_delete_xprt(xprt);
-
-       BUG_ON(!list_empty(&serv->sv_permsocks));
-       BUG_ON(!list_empty(&serv->sv_tempsocks));
+       svc_clear_list(&serv->sv_tempsocks, net);
+       svc_clear_list(&serv->sv_permsocks, net);
 }
 
 /*
@@ -1089,6 +1106,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
  * svc_find_xprt - find an RPC transport instance
  * @serv: pointer to svc_serv to search
  * @xcl_name: C string containing transport's class name
+ * @net: owner net pointer
  * @af: Address family of transport's local address
  * @port: transport's IP port number
  *
@@ -1101,7 +1119,8 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
  * service's list that has a matching class name.
  */
 struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
-                              const sa_family_t af, const unsigned short port)
+                              struct net *net, const sa_family_t af,
+                              const unsigned short port)
 {
        struct svc_xprt *xprt;
        struct svc_xprt *found = NULL;
@@ -1112,6 +1131,8 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
 
        spin_lock_bh(&serv->sv_lock);
        list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
+               if (xprt->xpt_net != net)
+                       continue;
                if (strcmp(xprt->xpt_class->xcl_name, xcl_name))
                        continue;
                if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
index 01153ead1dbaf3d982e4d57bfacccce674b507a4..bcd574f2ac566a96c34b041f44ce1e0b1bedbed8 100644 (file)
@@ -211,7 +211,7 @@ static int ip_map_parse(struct cache_detail *cd,
        len = qword_get(&mesg, buf, mlen);
        if (len <= 0) return -EINVAL;
 
-       if (rpc_pton(buf, len, &address.sa, sizeof(address)) == 0)
+       if (rpc_pton(cd->net, buf, len, &address.sa, sizeof(address)) == 0)
                return -EINVAL;
        switch (address.sa.sa_family) {
        case AF_INET:
@@ -436,7 +436,6 @@ struct unix_gid {
        uid_t                   uid;
        struct group_info       *gi;
 };
-static struct cache_head       *gid_table[GID_HASHMAX];
 
 static void unix_gid_put(struct kref *kref)
 {
@@ -494,8 +493,7 @@ static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h)
        return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request);
 }
 
-static struct unix_gid *unix_gid_lookup(uid_t uid);
-extern struct cache_detail unix_gid_cache;
+static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid);
 
 static int unix_gid_parse(struct cache_detail *cd,
                        char *mesg, int mlen)
@@ -539,19 +537,19 @@ static int unix_gid_parse(struct cache_detail *cd,
                GROUP_AT(ug.gi, i) = gid;
        }
 
-       ugp = unix_gid_lookup(uid);
+       ugp = unix_gid_lookup(cd, uid);
        if (ugp) {
                struct cache_head *ch;
                ug.h.flags = 0;
                ug.h.expiry_time = expiry;
-               ch = sunrpc_cache_update(&unix_gid_cache,
+               ch = sunrpc_cache_update(cd,
                                         &ug.h, &ugp->h,
                                         hash_long(uid, GID_HASHBITS));
                if (!ch)
                        err = -ENOMEM;
                else {
                        err = 0;
-                       cache_put(ch, &unix_gid_cache);
+                       cache_put(ch, cd);
                }
        } else
                err = -ENOMEM;
@@ -587,10 +585,9 @@ static int unix_gid_show(struct seq_file *m,
        return 0;
 }
 
-struct cache_detail unix_gid_cache = {
+static struct cache_detail unix_gid_cache_template = {
        .owner          = THIS_MODULE,
        .hash_size      = GID_HASHMAX,
-       .hash_table     = gid_table,
        .name           = "auth.unix.gid",
        .cache_put      = unix_gid_put,
        .cache_upcall   = unix_gid_upcall,
@@ -602,14 +599,42 @@ struct cache_detail unix_gid_cache = {
        .alloc          = unix_gid_alloc,
 };
 
-static struct unix_gid *unix_gid_lookup(uid_t uid)
+int unix_gid_cache_create(struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct cache_detail *cd;
+       int err;
+
+       cd = cache_create_net(&unix_gid_cache_template, net);
+       if (IS_ERR(cd))
+               return PTR_ERR(cd);
+       err = cache_register_net(cd, net);
+       if (err) {
+               cache_destroy_net(cd, net);
+               return err;
+       }
+       sn->unix_gid_cache = cd;
+       return 0;
+}
+
+void unix_gid_cache_destroy(struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct cache_detail *cd = sn->unix_gid_cache;
+
+       sn->unix_gid_cache = NULL;
+       cache_purge(cd);
+       cache_unregister_net(cd, net);
+       cache_destroy_net(cd, net);
+}
+
+static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid)
 {
        struct unix_gid ug;
        struct cache_head *ch;
 
        ug.uid = uid;
-       ch = sunrpc_cache_lookup(&unix_gid_cache, &ug.h,
-                                hash_long(uid, GID_HASHBITS));
+       ch = sunrpc_cache_lookup(cd, &ug.h, hash_long(uid, GID_HASHBITS));
        if (ch)
                return container_of(ch, struct unix_gid, h);
        else
@@ -621,11 +646,13 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
        struct unix_gid *ug;
        struct group_info *gi;
        int ret;
+       struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net,
+                                           sunrpc_net_id);
 
-       ug = unix_gid_lookup(uid);
+       ug = unix_gid_lookup(sn->unix_gid_cache, uid);
        if (!ug)
                return ERR_PTR(-EAGAIN);
-       ret = cache_check(&unix_gid_cache, &ug->h, &rqstp->rq_chandle);
+       ret = cache_check(sn->unix_gid_cache, &ug->h, &rqstp->rq_chandle);
        switch (ret) {
        case -ENOENT:
                return ERR_PTR(-ENOENT);
@@ -633,7 +660,7 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
                return ERR_PTR(-ESHUTDOWN);
        case 0:
                gi = get_group_info(ug->gi);
-               cache_put(&ug->h, &unix_gid_cache);
+               cache_put(&ug->h, sn->unix_gid_cache);
                return gi;
        default:
                return ERR_PTR(-EAGAIN);
@@ -849,56 +876,45 @@ struct auth_ops svcauth_unix = {
        .set_client     = svcauth_unix_set_client,
 };
 
+static struct cache_detail ip_map_cache_template = {
+       .owner          = THIS_MODULE,
+       .hash_size      = IP_HASHMAX,
+       .name           = "auth.unix.ip",
+       .cache_put      = ip_map_put,
+       .cache_upcall   = ip_map_upcall,
+       .cache_parse    = ip_map_parse,
+       .cache_show     = ip_map_show,
+       .match          = ip_map_match,
+       .init           = ip_map_init,
+       .update         = update,
+       .alloc          = ip_map_alloc,
+};
+
 int ip_map_cache_create(struct net *net)
 {
-       int err = -ENOMEM;
-       struct cache_detail *cd;
-       struct cache_head **tbl;
        struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct cache_detail *cd;
+       int err;
 
-       cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
-       if (cd == NULL)
-               goto err_cd;
-
-       tbl = kzalloc(IP_HASHMAX * sizeof(struct cache_head *), GFP_KERNEL);
-       if (tbl == NULL)
-               goto err_tbl;
-
-       cd->owner = THIS_MODULE,
-       cd->hash_size = IP_HASHMAX,
-       cd->hash_table = tbl,
-       cd->name = "auth.unix.ip",
-       cd->cache_put = ip_map_put,
-       cd->cache_upcall = ip_map_upcall,
-       cd->cache_parse = ip_map_parse,
-       cd->cache_show = ip_map_show,
-       cd->match = ip_map_match,
-       cd->init = ip_map_init,
-       cd->update = update,
-       cd->alloc = ip_map_alloc,
-
+       cd = cache_create_net(&ip_map_cache_template, net);
+       if (IS_ERR(cd))
+               return PTR_ERR(cd);
        err = cache_register_net(cd, net);
-       if (err)
-               goto err_reg;
-
+       if (err) {
+               cache_destroy_net(cd, net);
+               return err;
+       }
        sn->ip_map_cache = cd;
        return 0;
-
-err_reg:
-       kfree(tbl);
-err_tbl:
-       kfree(cd);
-err_cd:
-       return err;
 }
 
 void ip_map_cache_destroy(struct net *net)
 {
-       struct sunrpc_net *sn;
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct cache_detail *cd = sn->ip_map_cache;
 
-       sn = net_generic(net, sunrpc_net_id);
-       cache_purge(sn->ip_map_cache);
-       cache_unregister_net(sn->ip_map_cache, net);
-       kfree(sn->ip_map_cache->hash_table);
-       kfree(sn->ip_map_cache);
+       sn->ip_map_cache = NULL;
+       cache_purge(cd);
+       cache_unregister_net(cd, net);
+       cache_destroy_net(cd, net);
 }
index 464570906f80c24190260bef957a53128202a486..40ae884db865f975f589a433432652d0fe1936ed 100644 (file)
@@ -396,7 +396,7 @@ static int svc_partial_recvfrom(struct svc_rqst *rqstp,
                                int buflen, unsigned int base)
 {
        size_t save_iovlen;
-       void __user *save_iovbase;
+       void *save_iovbase;
        unsigned int i;
        int ret;
 
@@ -1409,7 +1409,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 
        /* Register socket with portmapper */
        if (*errp >= 0 && pmap_register)
-               *errp = svc_register(serv, inet->sk_family, inet->sk_protocol,
+               *errp = svc_register(serv, sock_net(sock->sk), inet->sk_family,
+                                    inet->sk_protocol,
                                     ntohs(inet_sk(inet)->inet_sport));
 
        if (*errp < 0) {
index e65dcc613339a932f4796467c11d8ff6ffd5f488..af7d339add9d5b853174ef8cac0a6016419fc663 100644 (file)
@@ -20,6 +20,8 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svc_xprt.h>
 
+#include "netns.h"
+
 /*
  * Declare the debug flags here
  */
@@ -110,7 +112,7 @@ proc_dodebug(ctl_table *table, int write,
                *(unsigned int *) table->data = value;
                /* Display the RPC tasks on writing to rpc_debug */
                if (strcmp(table->procname, "rpc_debug") == 0)
-                       rpc_show_tasks();
+                       rpc_show_tasks(&init_net);
        } else {
                if (!access_ok(VERIFY_WRITE, buffer, left))
                        return -EFAULT;
index c64c0ef519b594320ff688f3881579d2926be21d..0cbcd1ab49ab5544952d3385b64b2e29b1843872 100644 (file)
@@ -66,6 +66,7 @@ static void    xprt_init(struct rpc_xprt *xprt, struct net *net);
 static void    xprt_request_init(struct rpc_task *, struct rpc_xprt *);
 static void    xprt_connect_status(struct rpc_task *task);
 static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
+static void     xprt_destroy(struct rpc_xprt *xprt);
 
 static DEFINE_SPINLOCK(xprt_list_lock);
 static LIST_HEAD(xprt_list);
@@ -292,54 +293,57 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
        return retval;
 }
 
-static void __xprt_lock_write_next(struct rpc_xprt *xprt)
+static bool __xprt_lock_write_func(struct rpc_task *task, void *data)
 {
-       struct rpc_task *task;
+       struct rpc_xprt *xprt = data;
        struct rpc_rqst *req;
 
-       if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
-               return;
-
-       task = rpc_wake_up_next(&xprt->sending);
-       if (task == NULL)
-               goto out_unlock;
-
        req = task->tk_rqstp;
        xprt->snd_task = task;
        if (req) {
                req->rq_bytes_sent = 0;
                req->rq_ntrans++;
        }
-       return;
+       return true;
+}
 
-out_unlock:
+static void __xprt_lock_write_next(struct rpc_xprt *xprt)
+{
+       if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
+               return;
+
+       if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_func, xprt))
+               return;
        xprt_clear_locked(xprt);
 }
 
-static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
+static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data)
 {
-       struct rpc_task *task;
+       struct rpc_xprt *xprt = data;
        struct rpc_rqst *req;
 
-       if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
-               return;
-       if (RPCXPRT_CONGESTED(xprt))
-               goto out_unlock;
-       task = rpc_wake_up_next(&xprt->sending);
-       if (task == NULL)
-               goto out_unlock;
-
        req = task->tk_rqstp;
        if (req == NULL) {
                xprt->snd_task = task;
-               return;
+               return true;
        }
        if (__xprt_get_cong(xprt, task)) {
                xprt->snd_task = task;
                req->rq_bytes_sent = 0;
                req->rq_ntrans++;
-               return;
+               return true;
        }
+       return false;
+}
+
+static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
+{
+       if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
+               return;
+       if (RPCXPRT_CONGESTED(xprt))
+               goto out_unlock;
+       if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_cong_func, xprt))
+               return;
 out_unlock:
        xprt_clear_locked(xprt);
 }
@@ -712,9 +716,7 @@ void xprt_connect(struct rpc_task *task)
        if (xprt_connected(xprt))
                xprt_release_write(xprt, task);
        else {
-               if (task->tk_rqstp)
-                       task->tk_rqstp->rq_bytes_sent = 0;
-
+               task->tk_rqstp->rq_bytes_sent = 0;
                task->tk_timeout = task->tk_rqstp->rq_timeout;
                rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
 
@@ -750,7 +752,7 @@ static void xprt_connect_status(struct rpc_task *task)
        default:
                dprintk("RPC: %5u xprt_connect_status: error %d connecting to "
                                "server %s\n", task->tk_pid, -task->tk_status,
-                               task->tk_client->cl_server);
+                               xprt->servername);
                xprt_release_write(xprt, task);
                task->tk_status = -EIO;
        }
@@ -884,7 +886,7 @@ void xprt_transmit(struct rpc_task *task)
 {
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_xprt *xprt = req->rq_xprt;
-       int status;
+       int status, numreqs;
 
        dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
 
@@ -921,9 +923,14 @@ void xprt_transmit(struct rpc_task *task)
 
        xprt->ops->set_retrans_timeout(task);
 
+       numreqs = atomic_read(&xprt->num_reqs);
+       if (numreqs > xprt->stat.max_slots)
+               xprt->stat.max_slots = numreqs;
        xprt->stat.sends++;
        xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
        xprt->stat.bklog_u += xprt->backlog.qlen;
+       xprt->stat.sending_u += xprt->sending.qlen;
+       xprt->stat.pending_u += xprt->pending.qlen;
 
        /* Don't race with disconnect */
        if (!xprt_connected(xprt))
@@ -1131,7 +1138,10 @@ void xprt_release(struct rpc_task *task)
                return;
 
        xprt = req->rq_xprt;
-       rpc_count_iostats(task);
+       if (task->tk_ops->rpc_count_stats != NULL)
+               task->tk_ops->rpc_count_stats(task, task->tk_calldata);
+       else if (task->tk_client)
+               rpc_count_iostats(task, task->tk_client->cl_metrics);
        spin_lock_bh(&xprt->transport_lock);
        xprt->ops->release_xprt(xprt, task);
        if (xprt->ops->release_request)
@@ -1220,6 +1230,17 @@ found:
                            (unsigned long)xprt);
        else
                init_timer(&xprt->timer);
+
+       if (strlen(args->servername) > RPC_MAXNETNAMELEN) {
+               xprt_destroy(xprt);
+               return ERR_PTR(-EINVAL);
+       }
+       xprt->servername = kstrdup(args->servername, GFP_KERNEL);
+       if (xprt->servername == NULL) {
+               xprt_destroy(xprt);
+               return ERR_PTR(-ENOMEM);
+       }
+
        dprintk("RPC:       created transport %p with %u slots\n", xprt,
                        xprt->max_reqs);
 out:
@@ -1242,6 +1263,7 @@ static void xprt_destroy(struct rpc_xprt *xprt)
        rpc_destroy_wait_queue(&xprt->sending);
        rpc_destroy_wait_queue(&xprt->backlog);
        cancel_work_sync(&xprt->task_cleanup);
+       kfree(xprt->servername);
        /*
         * Tear down transport state and free the rpc_xprt
         */
index 1776e5731dcf1005a7a163867f3132cacd1a7026..558fbab574f00eadf0d52d91ef82e23e858b0dee 100644 (file)
@@ -771,13 +771,18 @@ repost:
 
        /* get request object */
        req = rpcr_to_rdmar(rqst);
+       if (req->rl_reply) {
+               spin_unlock(&xprt->transport_lock);
+               dprintk("RPC:       %s: duplicate reply 0x%p to RPC "
+                       "request 0x%p: xid 0x%08x\n", __func__, rep, req,
+                       headerp->rm_xid);
+               goto repost;
+       }
 
        dprintk("RPC:       %s: reply 0x%p completes request 0x%p\n"
                "                   RPC request 0x%p xid 0x%08x\n",
                        __func__, rep, req, rqst, headerp->rm_xid);
 
-       BUG_ON(!req || req->rl_reply);
-
        /* from here on, the reply is no longer an orphan */
        req->rl_reply = rep;
 
index 28236bab57f929e1edadd8245e5851a2fb925bc2..745973b729af6af33d8a882f45f6a3ba62f62c2e 100644 (file)
@@ -1490,6 +1490,9 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
        u8 key;
        int len, pageoff;
        int i, rc;
+       int seg_len;
+       u64 pa;
+       int page_no;
 
        pageoff = offset_in_page(seg1->mr_offset);
        seg1->mr_offset -= pageoff;     /* start of page */
@@ -1497,11 +1500,15 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
        len = -pageoff;
        if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
                *nsegs = RPCRDMA_MAX_DATA_SEGS;
-       for (i = 0; i < *nsegs;) {
+       for (page_no = i = 0; i < *nsegs;) {
                rpcrdma_map_one(ia, seg, writing);
-               seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
+               pa = seg->mr_dma;
+               for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
+                       seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->
+                               page_list[page_no++] = pa;
+                       pa += PAGE_SIZE;
+               }
                len += seg->mr_len;
-               BUG_ON(seg->mr_len > PAGE_SIZE);
                ++seg;
                ++i;
                /* Check for holes */
@@ -1540,9 +1547,9 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
        frmr_wr.send_flags = IB_SEND_SIGNALED;
        frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
        frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
-       frmr_wr.wr.fast_reg.page_list_len = i;
+       frmr_wr.wr.fast_reg.page_list_len = page_no;
        frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-       frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
+       frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
        BUG_ON(frmr_wr.wr.fast_reg.length < len);
        frmr_wr.wr.fast_reg.access_flags = (writing ?
                                IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
index 55472c48825e6fd43c3357a2f58b398231a2767e..92bc5181dbebde6e82af566ed54f4ca04a61b857 100644 (file)
@@ -53,12 +53,12 @@ static void xs_close(struct rpc_xprt *xprt);
 /*
  * xprtsock tunables
  */
-unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
-unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE;
-unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
+static unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
+static unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE;
+static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
 
-unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
-unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
+static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
+static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
 
 #define XS_TCP_LINGER_TO       (15U * HZ)
 static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
@@ -2227,7 +2227,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
                idle_time = (long)(jiffies - xprt->last_used) / HZ;
 
        seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu "
-                       "%llu %llu\n",
+                       "%llu %llu %lu %llu %llu\n",
                        xprt->stat.bind_count,
                        xprt->stat.connect_count,
                        xprt->stat.connect_time,
@@ -2236,7 +2236,10 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
                        xprt->stat.recvs,
                        xprt->stat.bad_xids,
                        xprt->stat.req_u,
-                       xprt->stat.bklog_u);
+                       xprt->stat.bklog_u,
+                       xprt->stat.max_slots,
+                       xprt->stat.sending_u,
+                       xprt->stat.pending_u);
 }
 
 /**
@@ -2249,14 +2252,18 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 {
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 
-       seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n",
+       seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %llu %llu "
+                       "%lu %llu %llu\n",
                        transport->srcport,
                        xprt->stat.bind_count,
                        xprt->stat.sends,
                        xprt->stat.recvs,
                        xprt->stat.bad_xids,
                        xprt->stat.req_u,
-                       xprt->stat.bklog_u);
+                       xprt->stat.bklog_u,
+                       xprt->stat.max_slots,
+                       xprt->stat.sending_u,
+                       xprt->stat.pending_u);
 }
 
 /**
@@ -2273,7 +2280,8 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
        if (xprt_connected(xprt))
                idle_time = (long)(jiffies - xprt->last_used) / HZ;
 
-       seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n",
+       seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu "
+                       "%llu %llu %lu %llu %llu\n",
                        transport->srcport,
                        xprt->stat.bind_count,
                        xprt->stat.connect_count,
@@ -2283,7 +2291,10 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
                        xprt->stat.recvs,
                        xprt->stat.bad_xids,
                        xprt->stat.req_u,
-                       xprt->stat.bklog_u);
+                       xprt->stat.bklog_u,
+                       xprt->stat.max_slots,
+                       xprt->stat.sending_u,
+                       xprt->stat.pending_u);
 }
 
 /*
index 47bacd8c025094e24b0200bab9a7c1d438093aa1..95a338c89f99d87af24ffa97faf17e1a7ef700e8 100644 (file)
@@ -21,7 +21,7 @@
 
 static int xfrm_output2(struct sk_buff *skb);
 
-static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
+static int xfrm_skb_check_space(struct sk_buff *skb)
 {
        struct dst_entry *dst = skb_dst(skb);
        int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev)
@@ -48,7 +48,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
                goto resume;
 
        do {
-               err = xfrm_state_check_space(x, skb);
+               err = xfrm_skb_check_space(skb);
                if (err) {
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
                        goto error_nolock;
index 39e02c54ed26544d2b0b669ee6444584b4b2c498..2f6d11d04a2b29910a1f284d3e3af8b0db1bfcce 100644 (file)
@@ -167,7 +167,7 @@ static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
        }
 
        if (xfrm_aevent_is_on(xs_net(x)))
-               xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
+               x->repl->notify(x, XFRM_REPLAY_UPDATE);
 }
 
 static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb)
@@ -279,7 +279,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq)
        replay_esn->bmp[nr] |= (1U << bitnr);
 
        if (xfrm_aevent_is_on(xs_net(x)))
-               xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
+               x->repl->notify(x, XFRM_REPLAY_UPDATE);
 }
 
 static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event)
@@ -473,7 +473,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
        replay_esn->bmp[nr] |= (1U << bitnr);
 
        if (xfrm_aevent_is_on(xs_net(x)))
-               xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
+               x->repl->notify(x, XFRM_REPLAY_UPDATE);
 }
 
 static struct xfrm_replay xfrm_replay_legacy = {
index 7ada8019be1f2c08314fe0c8c14d3058a1ee26ea..06783cffb3afa631fcca8efee90d17cc4f711b2c 100644 (file)
@@ -671,6 +671,26 @@ found_kernel_type:
        return ktype;
 }
 
+void key_set_timeout(struct key *key, unsigned timeout)
+{
+       struct timespec now;
+       time_t expiry = 0;
+
+       /* make the changes with the locks held to prevent races */
+       down_write(&key->sem);
+
+       if (timeout > 0) {
+               now = current_kernel_time();
+               expiry = now.tv_sec + timeout;
+       }
+
+       key->expiry = expiry;
+       key_schedule_gc(key->expiry + key_gc_delay);
+
+       up_write(&key->sem);
+}
+EXPORT_SYMBOL_GPL(key_set_timeout);
+
 /*
  * Unlock a key type locked by key_type_lookup().
  */
index 6523599e9ac0e08d7911c1fb007cc1971d1334a6..fb767c6cd99f6a92da8fa989999a18988bce1a19 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
+#include <linux/key.h>
 #include <linux/keyctl.h>
 #include <linux/fs.h>
 #include <linux/capability.h>
@@ -1257,10 +1258,8 @@ error:
  */
 long keyctl_set_timeout(key_serial_t id, unsigned timeout)
 {
-       struct timespec now;
        struct key *key, *instkey;
        key_ref_t key_ref;
-       time_t expiry;
        long ret;
 
        key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
@@ -1286,20 +1285,7 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout)
 
 okay:
        key = key_ref_to_ptr(key_ref);
-
-       /* make the changes with the locks held to prevent races */
-       down_write(&key->sem);
-
-       expiry = 0;
-       if (timeout > 0) {
-               now = current_kernel_time();
-               expiry = now.tv_sec + timeout;
-       }
-
-       key->expiry = expiry;
-       key_schedule_gc(key->expiry + key_gc_delay);
-
-       up_write(&key->sem);
+       key_set_timeout(key, timeout);
        key_put(key);
 
        ret = 0;
index 758ec2a08c40642e6e08eac4c6069f50e4806180..95d6a6f7c33aa7971c50f9b107f0c8474a16d70a 100755 (executable)
@@ -46,6 +46,7 @@ my %default = (
     "DIE_ON_FAILURE"           => 1,
     "SSH_EXEC"                 => "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND",
     "SCP_TO_TARGET"            => "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE",
+    "SCP_TO_TARGET_INSTALL"    => "\${SCP_TO_TARGET}",
     "REBOOT"                   => "ssh \$SSH_USER\@\$MACHINE reboot",
     "STOP_AFTER_SUCCESS"       => 10,
     "STOP_AFTER_FAILURE"       => 60,
@@ -86,11 +87,13 @@ my $reboot_on_error;
 my $switch_to_good;
 my $switch_to_test;
 my $poweroff_on_error;
+my $reboot_on_success;
 my $die_on_failure;
 my $powercycle_after_reboot;
 my $poweroff_after_halt;
 my $ssh_exec;
 my $scp_to_target;
+my $scp_to_target_install;
 my $power_off;
 my $grub_menu;
 my $grub_number;
@@ -211,6 +214,7 @@ my %option_map = (
     "SWITCH_TO_GOOD"           => \$switch_to_good,
     "SWITCH_TO_TEST"           => \$switch_to_test,
     "POWEROFF_ON_ERROR"                => \$poweroff_on_error,
+    "REBOOT_ON_SUCCESS"                => \$reboot_on_success,
     "DIE_ON_FAILURE"           => \$die_on_failure,
     "POWER_OFF"                        => \$power_off,
     "POWERCYCLE_AFTER_REBOOT"  => \$powercycle_after_reboot,
@@ -243,6 +247,7 @@ my %option_map = (
     "BUILD_TARGET"             => \$build_target,
     "SSH_EXEC"                 => \$ssh_exec,
     "SCP_TO_TARGET"            => \$scp_to_target,
+    "SCP_TO_TARGET_INSTALL"    => \$scp_to_target_install,
     "CHECKOUT"                 => \$checkout,
     "TARGET_IMAGE"             => \$target_image,
     "LOCALVERSION"             => \$localversion,
@@ -1113,7 +1118,6 @@ sub reboot_to_good {
 
     if (defined($switch_to_good)) {
        run_command $switch_to_good;
-       return;
     }
 
     reboot $time;
@@ -1349,8 +1353,7 @@ sub run_ssh {
 }
 
 sub run_scp {
-    my ($src, $dst) = @_;
-    my $cp_scp = $scp_to_target;
+    my ($src, $dst, $cp_scp) = @_;
 
     $cp_scp =~ s/\$SRC_FILE/$src/g;
     $cp_scp =~ s/\$DST_FILE/$dst/g;
@@ -1358,6 +1361,22 @@ sub run_scp {
     return run_command "$cp_scp";
 }
 
+sub run_scp_install {
+    my ($src, $dst) = @_;
+
+    my $cp_scp = $scp_to_target_install;
+
+    return run_scp($src, $dst, $cp_scp);
+}
+
+sub run_scp_mod {
+    my ($src, $dst) = @_;
+
+    my $cp_scp = $scp_to_target;
+
+    return run_scp($src, $dst, $cp_scp);
+}
+
 sub get_grub_index {
 
     if ($reboot_type ne "grub") {
@@ -1460,6 +1479,7 @@ sub get_sha1 {
 sub monitor {
     my $booted = 0;
     my $bug = 0;
+    my $bug_ignored = 0;
     my $skip_call_trace = 0;
     my $loops;
 
@@ -1531,9 +1551,13 @@ sub monitor {
        }
 
        if ($full_line =~ /call trace:/i) {
-           if (!$ignore_errors && !$bug && !$skip_call_trace) {
-               $bug = 1;
-               $failure_start = time;
+           if (!$bug && !$skip_call_trace) {
+               if ($ignore_errors) {
+                   $bug_ignored = 1;
+               } else {
+                   $bug = 1;
+                   $failure_start = time;
+               }
            }
        }
 
@@ -1595,6 +1619,10 @@ sub monitor {
        fail "failed - never got a boot prompt." and return 0;
     }
 
+    if ($bug_ignored) {
+       doprint "WARNING: Call Trace detected but ignored due to IGNORE_ERRORS=1\n";
+    }
+
     return 1;
 }
 
@@ -1621,7 +1649,7 @@ sub install {
 
     my $cp_target = eval_kernel_version $target_image;
 
-    run_scp "$outputdir/$build_target", "$cp_target" or
+    run_scp_install "$outputdir/$build_target", "$cp_target" or
        dodie "failed to copy image";
 
     my $install_mods = 0;
@@ -1643,7 +1671,7 @@ sub install {
        return;
     }
 
-    run_command "$make INSTALL_MOD_PATH=$tmpdir modules_install" or
+    run_command "$make INSTALL_MOD_STRIP=1 INSTALL_MOD_PATH=$tmpdir modules_install" or
        dodie "Failed to install modules";
 
     my $modlib = "/lib/modules/$version";
@@ -1656,7 +1684,7 @@ sub install {
     run_command "cd $tmpdir && tar -cjf $modtar lib/modules/$version" or
        dodie "making tarball";
 
-    run_scp "$tmpdir/$modtar", "/tmp" or
+    run_scp_mod "$tmpdir/$modtar", "/tmp" or
        dodie "failed to copy modules";
 
     unlink "$tmpdir/$modtar";
@@ -3526,8 +3554,10 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
            die "failed to checkout $checkout";
     }
 
-    $no_reboot = 0;
-
+    # A test may opt to not reboot the box
+    if ($reboot_on_success) {
+       $no_reboot = 0;
+    }
 
     if ($test_type eq "bisect") {
        bisect $i;
@@ -3572,8 +3602,12 @@ if ($opt{"POWEROFF_ON_SUCCESS"}) {
     halt;
 } elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot) {
     reboot_to_good;
+} elsif (defined($switch_to_good)) {
+    # still need to get to the good kernel
+    run_command $switch_to_good;
 }
 
+
 doprint "\n    $successes of $opt{NUM_TESTS} tests were successful\n\n";
 
 exit 0;
index 5ea04c6a71bfc830feca1c10c144264ba97a5db3..b682456afda8b9f665d72cacb442c80664d2c2b2 100644 (file)
 # The variables SSH_USER, MACHINE and SSH_COMMAND are defined
 #SSH_EXEC = ssh $SSH_USER@$MACHINE $SSH_COMMAND";
 
-# The way to copy a file to the target
+# The way to copy a file to the target (install and modules)
 # (default scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE)
-# The variables SSH_USER, MACHINE, SRC_FILE and DST_FILE are defined.
-#SCP_TO_TARGET = scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE
+# The variables SSH_USER, MACHINE are defined by the config
+# SRC_FILE and DST_FILE are ktest internal variables and
+# should only have '$' and not the '${}' notation.
+# (default scp $SRC_FILE ${SSH_USER}@${MACHINE}:$DST_FILE)
+#SCP_TO_TARGET = echo skip scp for $SRC_FILE $DST_FILE
+
+# If install needs to be different than modules, then this
+# option will override the SCP_TO_TARGET for installation.
+# (default ${SCP_TO_TARGET} )
+#SCP_TO_TARGET_INSTALL = scp $SRC_FILE tftp@tftpserver:$DST_FILE
 
 # The nice way to reboot the target
 # (default ssh $SSH_USER@$MACHINE reboot)
This page took 0.567473 seconds and 5 git commands to generate.