fs/xfs/quota/xfs_qm.c

   1 /*
   2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   3  * All Rights Reserved.
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it would be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write the Free Software Foundation,
  16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18 #include "xfs.h"
  19 #include "xfs_fs.h"
  20 #include "xfs_bit.h"
  21 #include "xfs_log.h"
  22 #include "xfs_inum.h"
  23 #include "xfs_trans.h"
  24 #include "xfs_sb.h"
  25 #include "xfs_ag.h"
  26 #include "xfs_dir2.h"
  27 #include "xfs_alloc.h"
  28 #include "xfs_dmapi.h"
  29 #include "xfs_quota.h"
  30 #include "xfs_mount.h"
  31 #include "xfs_bmap_btree.h"
  32 #include "xfs_alloc_btree.h"
  33 #include "xfs_ialloc_btree.h"
  34 #include "xfs_dir2_sf.h"
  35 #include "xfs_attr_sf.h"
  36 #include "xfs_dinode.h"
  37 #include "xfs_inode.h"
  38 #include "xfs_btree.h"
  39 #include "xfs_ialloc.h"
  40 #include "xfs_itable.h"
  41 #include "xfs_rtalloc.h"
  42 #include "xfs_error.h"
  43 #include "xfs_bmap.h"
  44 #include "xfs_rw.h"
  45 #include "xfs_acl.h"
  46 #include "xfs_attr.h"
  47 #include "xfs_buf_item.h"
  48 #include "xfs_trans_space.h"
  49 #include "xfs_utils.h"
  50 #include "xfs_qm.h"
  51
  52 /*
  53  * The global quota manager. There is only one of these for the entire
  54  * system, _not_ one per file system. XQM keeps track of the overall
  55  * quota functionality, including maintaining the freelist and hash
  56  * tables of dquots.
  57  */
  58 struct mutex    xfs_Gqm_lock;
  59 struct xfs_qm   *xfs_Gqm;
  60 uint            ndquot;
  61
  62 kmem_zone_t     *qm_dqzone;
  63 kmem_zone_t     *qm_dqtrxzone;
  64
  65 static cred_t   xfs_zerocr;
  66
  67 STATIC void     xfs_qm_list_init(xfs_dqlist_t *, char *, int);
  68 STATIC void     xfs_qm_list_destroy(xfs_dqlist_t *);
  69
  70 STATIC void     xfs_qm_freelist_init(xfs_frlist_t *);
  71 STATIC void     xfs_qm_freelist_destroy(xfs_frlist_t *);
  72
  73 STATIC int      xfs_qm_init_quotainos(xfs_mount_t *);
  74 STATIC int      xfs_qm_init_quotainfo(xfs_mount_t *);
  75 STATIC int      xfs_qm_shake(int, gfp_t);
  76
  77 static struct shrinker xfs_qm_shaker = {
  78         .shrink = xfs_qm_shake,
  79         .seeks = DEFAULT_SEEKS,
  80 };
  81
  82 #ifdef DEBUG
  83 extern struct mutex     qcheck_lock;
  84 #endif
  85
  86 #ifdef QUOTADEBUG
  87 #define XQM_LIST_PRINT(l, NXT, title) \
  88 { \
  89         xfs_dquot_t     *dqp; int i = 0; \
  90         cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
  91         for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
  92                 cmn_err(CE_DEBUG, "   %d.  \"%d (%s)\"   " \
  93                                   "bcnt = %d, icnt = %d, refs = %d", \
  94                         ++i, (int) be32_to_cpu(dqp->q_core.d_id), \
  95                         DQFLAGTO_TYPESTR(dqp),       \
  96                         (int) be64_to_cpu(dqp->q_core.d_bcount), \
  97                         (int) be64_to_cpu(dqp->q_core.d_icount), \
  98                         (int) dqp->q_nrefs);  } \
  99 }
 100 #else
 101 #define XQM_LIST_PRINT(l, NXT, title) do { } while (0)
 102 #endif
 103
 104 /*
 105  * Initialize the XQM structure.
 106  * Note that there is not one quota manager per file system.
 107  */
 108 STATIC struct xfs_qm *
 109 xfs_Gqm_init(void)
 110 {
 111         xfs_dqhash_t    *udqhash, *gdqhash;
 112         xfs_qm_t        *xqm;
 113         size_t          hsize;
 114         uint            i;
 115
 116         /*
 117          * Initialize the dquot hash tables.
 118          */
 119         udqhash = kmem_zalloc_greedy(&hsize,
 120                                      XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
 121                                      XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t),
 122                                      KM_SLEEP | KM_MAYFAIL | KM_LARGE);
 123         gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE);
 124         hsize /= sizeof(xfs_dqhash_t);
 125         ndquot = hsize << 8;
 126
 127         xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
 128         xqm->qm_dqhashmask = hsize - 1;
 129         xqm->qm_usr_dqhtable = udqhash;
 130         xqm->qm_grp_dqhtable = gdqhash;
 131         ASSERT(xqm->qm_usr_dqhtable != NULL);
 132         ASSERT(xqm->qm_grp_dqhtable != NULL);
 133
 134         for (i = 0; i < hsize; i++) {
 135                 xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
 136                 xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
 137         }
 138
 139         /*
 140          * Freelist of all dquots of all file systems
 141          */
 142         xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
 143
 144         /*
 145          * dquot zone. we register our own low-memory callback.
 146          */
 147         if (!qm_dqzone) {
 148                 xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
 149                                                 "xfs_dquots");
 150                 qm_dqzone = xqm->qm_dqzone;
 151         } else
 152                 xqm->qm_dqzone = qm_dqzone;
 153
 154         register_shrinker(&xfs_qm_shaker);
 155
 156         /*
 157          * The t_dqinfo portion of transactions.
 158          */
 159         if (!qm_dqtrxzone) {
 160                 xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
 161                                                    "xfs_dqtrx");
 162                 qm_dqtrxzone = xqm->qm_dqtrxzone;
 163         } else
 164                 xqm->qm_dqtrxzone = qm_dqtrxzone;
 165
 166         atomic_set(&xqm->qm_totaldquots, 0);
 167         xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
 168         xqm->qm_nrefs = 0;
 169 #ifdef DEBUG
 170         mutex_init(&qcheck_lock);
 171 #endif
 172         return xqm;
 173 }
 174
 175 /*
 176  * Destroy the global quota manager when its reference count goes to zero.
 177  */
 178 STATIC void
 179 xfs_qm_destroy(
 180         struct xfs_qm   *xqm)
 181 {
 182         int             hsize, i;
 183
 184         ASSERT(xqm != NULL);
 185         ASSERT(xqm->qm_nrefs == 0);
 186         unregister_shrinker(&xfs_qm_shaker);
 187         hsize = xqm->qm_dqhashmask + 1;
 188         for (i = 0; i < hsize; i++) {
 189                 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
 190                 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
 191         }
 192         kmem_free(xqm->qm_usr_dqhtable);
 193         kmem_free(xqm->qm_grp_dqhtable);
 194         xqm->qm_usr_dqhtable = NULL;
 195         xqm->qm_grp_dqhtable = NULL;
 196         xqm->qm_dqhashmask = 0;
 197         xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
 198 #ifdef DEBUG
 199         mutex_destroy(&qcheck_lock);
 200 #endif
 201         kmem_free(xqm);
 202 }
 203
 204 /*
 205  * Called at mount time to let XQM know that another file system is
 206  * starting quotas. This isn't crucial information as the individual mount
 207  * structures are pretty independent, but it helps the XQM keep a
 208  * global view of what's going on.
 209  */
 210 /* ARGSUSED */
 211 STATIC int
 212 xfs_qm_hold_quotafs_ref(
 213         struct xfs_mount *mp)
 214 {
 215         /*
 216          * Need to lock the xfs_Gqm structure for things like this. For example,
 217          * the structure could disappear between the entry to this routine and
 218          * a HOLD operation if not locked.
 219          */
 220         mutex_lock(&xfs_Gqm_lock);
 221
 222         if (xfs_Gqm == NULL)
 223                 xfs_Gqm = xfs_Gqm_init();
 224         /*
 225          * We can keep a list of all filesystems with quotas mounted for
 226          * debugging and statistical purposes, but ...
 227          * Just take a reference and get out.
 228          */
 229         xfs_Gqm->qm_nrefs++;
 230         mutex_unlock(&xfs_Gqm_lock);
 231
 232         return 0;
 233 }
 234
 235
 236 /*
 237  * Release the reference that a filesystem took at mount time,
 238  * so that we know when we need to destroy the entire quota manager.
 239  */
 240 /* ARGSUSED */
 241 STATIC void
 242 xfs_qm_rele_quotafs_ref(
 243         struct xfs_mount *mp)
 244 {
 245         xfs_dquot_t     *dqp, *nextdqp;
 246
 247         ASSERT(xfs_Gqm);
 248         ASSERT(xfs_Gqm->qm_nrefs > 0);
 249
 250         /*
 251          * Go thru the freelist and destroy all inactive dquots.
 252          */
 253         xfs_qm_freelist_lock(xfs_Gqm);
 254
 255         for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
 256              dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
 257                 xfs_dqlock(dqp);
 258                 nextdqp = dqp->dq_flnext;
 259                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
 260                         ASSERT(dqp->q_mount == NULL);
 261                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
 262                         ASSERT(dqp->HL_PREVP == NULL);
 263                         ASSERT(dqp->MPL_PREVP == NULL);
 264                         XQM_FREELIST_REMOVE(dqp);
 265                         xfs_dqunlock(dqp);
 266                         xfs_qm_dqdestroy(dqp);
 267                 } else {
 268                         xfs_dqunlock(dqp);
 269                 }
 270                 dqp = nextdqp;
 271         }
 272         xfs_qm_freelist_unlock(xfs_Gqm);
 273
 274         /*
 275          * Destroy the entire XQM. If somebody mounts with quotaon, this'll
 276          * be restarted.
 277          */
 278         mutex_lock(&xfs_Gqm_lock);
 279         if (--xfs_Gqm->qm_nrefs == 0) {
 280                 xfs_qm_destroy(xfs_Gqm);
 281                 xfs_Gqm = NULL;
 282         }
 283         mutex_unlock(&xfs_Gqm_lock);
 284 }
 285
 286 /*
 287  * Just destroy the quotainfo structure.
 288  */
 289 void
 290 xfs_qm_unmount(
 291         struct xfs_mount        *mp)
 292 {
 293         if (mp->m_quotainfo) {
 294                 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
 295                 xfs_qm_destroy_quotainfo(mp);
 296         }
 297 }
 298
 299
 300 /*
 301  * This is called from xfs_mountfs to start quotas and initialize all
 302  * necessary data structures like quotainfo.  This is also responsible for
 303  * running a quotacheck as necessary.  We are guaranteed that the superblock
 304  * is consistently read in at this point.
 305  *
 306  * If we fail here, the mount will continue with quota turned off. We don't
 307  * need to inidicate success or failure at all.
 308  */
 309 void
 310 xfs_qm_mount_quotas(
 311         xfs_mount_t     *mp)
 312 {
 313         int             error = 0;
 314         uint            sbf;
 315
 316         /*
 317          * If quotas on realtime volumes is not supported, we disable
 318          * quotas immediately.
 319          */
 320         if (mp->m_sb.sb_rextents) {
 321                 cmn_err(CE_NOTE,
 322                         "Cannot turn on quotas for realtime filesystem %s",
 323                         mp->m_fsname);
 324                 mp->m_qflags = 0;
 325                 goto write_changes;
 326         }
 327
 328         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 329
 330         /*
 331          * Allocate the quotainfo structure inside the mount struct, and
 332          * create quotainode(s), and change/rev superblock if necessary.
 333          */
 334         error = xfs_qm_init_quotainfo(mp);
 335         if (error) {
 336                 /*
 337                  * We must turn off quotas.
 338                  */
 339                 ASSERT(mp->m_quotainfo == NULL);
 340                 mp->m_qflags = 0;
 341                 goto write_changes;
 342         }
 343         /*
 344          * If any of the quotas are not consistent, do a quotacheck.
 345          */
 346         if (XFS_QM_NEED_QUOTACHECK(mp)) {
 347                 error = xfs_qm_quotacheck(mp);
 348                 if (error) {
 349                         /* Quotacheck failed and disabled quotas. */
 350                         return;
 351                 }
 352         }
 353         /*
 354          * If one type of quotas is off, then it will lose its
 355          * quotachecked status, since we won't be doing accounting for
 356          * that type anymore.
 357          */
 358         if (!XFS_IS_UQUOTA_ON(mp))
 359                 mp->m_qflags &= ~XFS_UQUOTA_CHKD;
 360         if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
 361                 mp->m_qflags &= ~XFS_OQUOTA_CHKD;
 362
 363  write_changes:
 364         /*
 365          * We actually don't have to acquire the m_sb_lock at all.
 366          * This can only be called from mount, and that's single threaded. XXX
 367          */
 368         spin_lock(&mp->m_sb_lock);
 369         sbf = mp->m_sb.sb_qflags;
 370         mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
 371         spin_unlock(&mp->m_sb_lock);
 372
 373         if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
 374                 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
 375                         /*
 376                          * We could only have been turning quotas off.
 377                          * We aren't in very good shape actually because
 378                          * the incore structures are convinced that quotas are
 379                          * off, but the on disk superblock doesn't know that !
 380                          */
 381                         ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
 382                         xfs_fs_cmn_err(CE_ALERT, mp,
 383                                 "XFS mount_quotas: Superblock update failed!");
 384                 }
 385         }
 386
 387         if (error) {
 388                 xfs_fs_cmn_err(CE_WARN, mp,
 389                         "Failed to initialize disk quotas.");
 390                 return;
 391         }
 392
 393 #ifdef QUOTADEBUG
 394         if (XFS_IS_QUOTA_ON(mp))
 395                 xfs_qm_internalqcheck(mp);
 396 #endif
 397 }
 398
 399 /*
 400  * Called from the vfsops layer.
 401  */
 402 void
 403 xfs_qm_unmount_quotas(
 404         xfs_mount_t     *mp)
 405 {
 406         /*
 407          * Release the dquots that root inode, et al might be holding,
 408          * before we flush quotas and blow away the quotainfo structure.
 409          */
 410         ASSERT(mp->m_rootip);
 411         xfs_qm_dqdetach(mp->m_rootip);
 412         if (mp->m_rbmip)
 413                 xfs_qm_dqdetach(mp->m_rbmip);
 414         if (mp->m_rsumip)
 415                 xfs_qm_dqdetach(mp->m_rsumip);
 416
 417         /*
 418          * Release the quota inodes.
 419          */
 420         if (mp->m_quotainfo) {
 421                 if (mp->m_quotainfo->qi_uquotaip) {
 422                         IRELE(mp->m_quotainfo->qi_uquotaip);
 423                         mp->m_quotainfo->qi_uquotaip = NULL;
 424                 }
 425                 if (mp->m_quotainfo->qi_gquotaip) {
 426                         IRELE(mp->m_quotainfo->qi_gquotaip);
 427                         mp->m_quotainfo->qi_gquotaip = NULL;
 428                 }
 429         }
 430 }
 431
 432 /*
 433  * Flush all dquots of the given file system to disk. The dquots are
 434  * _not_ purged from memory here, just their data written to disk.
 435  */
 436 STATIC int
 437 xfs_qm_dqflush_all(
 438         xfs_mount_t     *mp,
 439         int             flags)
 440 {
 441         int             recl;
 442         xfs_dquot_t     *dqp;
 443         int             niters;
 444         int             error;
 445
 446         if (mp->m_quotainfo == NULL)
 447                 return 0;
 448         niters = 0;
 449 again:
 450         xfs_qm_mplist_lock(mp);
 451         FOREACH_DQUOT_IN_MP(dqp, mp) {
 452                 xfs_dqlock(dqp);
 453                 if (! XFS_DQ_IS_DIRTY(dqp)) {
 454                         xfs_dqunlock(dqp);
 455                         continue;
 456                 }
 457                 xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
 458                 /* XXX a sentinel would be better */
 459                 recl = XFS_QI_MPLRECLAIMS(mp);
 460                 if (!xfs_dqflock_nowait(dqp)) {
 461                         /*
 462                          * If we can't grab the flush lock then check
 463                          * to see if the dquot has been flushed delayed
 464                          * write.  If so, grab its buffer and send it
 465                          * out immediately.  We'll be able to acquire
 466                          * the flush lock when the I/O completes.
 467                          */
 468                         xfs_qm_dqflock_pushbuf_wait(dqp);
 469                 }
 470                 /*
 471                  * Let go of the mplist lock. We don't want to hold it
 472                  * across a disk write.
 473                  */
 474                 xfs_qm_mplist_unlock(mp);
 475                 error = xfs_qm_dqflush(dqp, flags);
 476                 xfs_dqunlock(dqp);
 477                 if (error)
 478                         return error;
 479
 480                 xfs_qm_mplist_lock(mp);
 481                 if (recl != XFS_QI_MPLRECLAIMS(mp)) {
 482                         xfs_qm_mplist_unlock(mp);
 483                         /* XXX restart limit */
 484                         goto again;
 485                 }
 486         }
 487
 488         xfs_qm_mplist_unlock(mp);
 489         /* return ! busy */
 490         return 0;
 491 }
 492 /*
 493  * Release the group dquot pointers the user dquots may be
 494  * carrying around as a hint. mplist is locked on entry and exit.
 495  */
 496 STATIC void
 497 xfs_qm_detach_gdquots(
 498         xfs_mount_t     *mp)
 499 {
 500         xfs_dquot_t     *dqp, *gdqp;
 501         int             nrecl;
 502
 503  again:
 504         ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
 505         dqp = XFS_QI_MPLNEXT(mp);
 506         while (dqp) {
 507                 xfs_dqlock(dqp);
 508                 if ((gdqp = dqp->q_gdquot)) {
 509                         xfs_dqlock(gdqp);
 510                         dqp->q_gdquot = NULL;
 511                 }
 512                 xfs_dqunlock(dqp);
 513
 514                 if (gdqp) {
 515                         /*
 516                          * Can't hold the mplist lock across a dqput.
 517                          * XXXmust convert to marker based iterations here.
 518                          */
 519                         nrecl = XFS_QI_MPLRECLAIMS(mp);
 520                         xfs_qm_mplist_unlock(mp);
 521                         xfs_qm_dqput(gdqp);
 522
 523                         xfs_qm_mplist_lock(mp);
 524                         if (nrecl != XFS_QI_MPLRECLAIMS(mp))
 525                                 goto again;
 526                 }
 527                 dqp = dqp->MPL_NEXT;
 528         }
 529 }
 530
 531 /*
 532  * Go through all the incore dquots of this file system and take them
 533  * off the mplist and hashlist, if the dquot type matches the dqtype
 534  * parameter. This is used when turning off quota accounting for
 535  * users and/or groups, as well as when the filesystem is unmounting.
 536  */
 537 STATIC int
 538 xfs_qm_dqpurge_int(
 539         xfs_mount_t     *mp,
 540         uint            flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */
 541 {
 542         xfs_dquot_t     *dqp;
 543         uint            dqtype;
 544         int             nrecl;
 545         xfs_dquot_t     *nextdqp;
 546         int             nmisses;
 547
 548         if (mp->m_quotainfo == NULL)
 549                 return 0;
 550
 551         dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
 552         dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
 553         dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
 554
 555         xfs_qm_mplist_lock(mp);
 556
 557         /*
 558          * In the first pass through all incore dquots of this filesystem,
 559          * we release the group dquot pointers the user dquots may be
 560          * carrying around as a hint. We need to do this irrespective of
 561          * what's being turned off.
 562          */
 563         xfs_qm_detach_gdquots(mp);
 564
 565       again:
 566         nmisses = 0;
 567         ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
 568         /*
 569          * Try to get rid of all of the unwanted dquots. The idea is to
 570          * get them off mplist and hashlist, but leave them on freelist.
 571          */
 572         dqp = XFS_QI_MPLNEXT(mp);
 573         while (dqp) {
 574                 /*
 575                  * It's OK to look at the type without taking dqlock here.
 576                  * We're holding the mplist lock here, and that's needed for
 577                  * a dqreclaim.
 578                  */
 579                 if ((dqp->dq_flags & dqtype) == 0) {
 580                         dqp = dqp->MPL_NEXT;
 581                         continue;
 582                 }
 583
 584                 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
 585                         nrecl = XFS_QI_MPLRECLAIMS(mp);
 586                         xfs_qm_mplist_unlock(mp);
 587                         mutex_lock(&dqp->q_hash->qh_lock);
 588                         xfs_qm_mplist_lock(mp);
 589
 590                         /*
 591                          * XXXTheoretically, we can get into a very long
 592                          * ping pong game here.
 593                          * No one can be adding dquots to the mplist at
 594                          * this point, but somebody might be taking things off.
 595                          */
 596                         if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
 597                                 mutex_unlock(&dqp->q_hash->qh_lock);
 598                                 goto again;
 599                         }
 600                 }
 601
 602                 /*
 603                  * Take the dquot off the mplist and hashlist. It may remain on
 604                  * freelist in INACTIVE state.
 605                  */
 606                 nextdqp = dqp->MPL_NEXT;
 607                 nmisses += xfs_qm_dqpurge(dqp);
 608                 dqp = nextdqp;
 609         }
 610         xfs_qm_mplist_unlock(mp);
 611         return nmisses;
 612 }
 613
 614 int
 615 xfs_qm_dqpurge_all(
 616         xfs_mount_t     *mp,
 617         uint            flags)
 618 {
 619         int             ndquots;
 620
 621         /*
 622          * Purge the dquot cache.
 623          * None of the dquots should really be busy at this point.
 624          */
 625         if (mp->m_quotainfo) {
 626                 while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
 627                         delay(ndquots * 10);
 628                 }
 629         }
 630         return 0;
 631 }
 632
 633 STATIC int
 634 xfs_qm_dqattach_one(
 635         xfs_inode_t     *ip,
 636         xfs_dqid_t      id,
 637         uint            type,
 638         uint            doalloc,
 639         xfs_dquot_t     *udqhint, /* hint */
 640         xfs_dquot_t     **IO_idqpp)
 641 {
 642         xfs_dquot_t     *dqp;
 643         int             error;
 644
 645         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 646         error = 0;
 647
 648         /*
 649          * See if we already have it in the inode itself. IO_idqpp is
 650          * &i_udquot or &i_gdquot. This made the code look weird, but
 651          * made the logic a lot simpler.
 652          */
 653         dqp = *IO_idqpp;
 654         if (dqp) {
 655                 xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
 656                 return 0;
 657         }
 658
 659         /*
 660          * udqhint is the i_udquot field in inode, and is non-NULL only
 661          * when the type arg is group/project. Its purpose is to save a
 662          * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
 663          * the user dquot.
 664          */
 665         if (udqhint) {
 666                 ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
 667                 xfs_dqlock(udqhint);
 668
 669                 /*
 670                  * No need to take dqlock to look at the id.
 671                  *
 672                  * The ID can't change until it gets reclaimed, and it won't
 673                  * be reclaimed as long as we have a ref from inode and we
 674                  * hold the ilock.
 675                  */
 676                 dqp = udqhint->q_gdquot;
 677                 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
 678                         xfs_dqlock(dqp);
 679                         XFS_DQHOLD(dqp);
 680                         ASSERT(*IO_idqpp == NULL);
 681                         *IO_idqpp = dqp;
 682
 683                         xfs_dqunlock(dqp);
 684                         xfs_dqunlock(udqhint);
 685                         return 0;
 686                 }
 687
 688                 /*
 689                  * We can't hold a dquot lock when we call the dqget code.
 690                  * We'll deadlock in no time, because of (not conforming to)
 691                  * lock ordering - the inodelock comes before any dquot lock,
 692                  * and we may drop and reacquire the ilock in xfs_qm_dqget().
 693                  */
 694                 xfs_dqunlock(udqhint);
 695         }
 696
 697         /*
 698          * Find the dquot from somewhere. This bumps the
 699          * reference count of dquot and returns it locked.
 700          * This can return ENOENT if dquot didn't exist on
 701          * disk and we didn't ask it to allocate;
 702          * ESRCH if quotas got turned off suddenly.
 703          */
 704         error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
 705         if (error)
 706                 return error;
 707
 708         xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
 709
 710         /*
 711          * dqget may have dropped and re-acquired the ilock, but it guarantees
 712          * that the dquot returned is the one that should go in the inode.
 713          */
 714         *IO_idqpp = dqp;
 715         xfs_dqunlock(dqp);
 716         return 0;
 717 }
 718
 719
 720 /*
 721  * Given a udquot and gdquot, attach a ptr to the group dquot in the
 722  * udquot as a hint for future lookups. The idea sounds simple, but the
 723  * execution isn't, because the udquot might have a group dquot attached
 724  * already and getting rid of that gets us into lock ordering constraints.
 725  * The process is complicated more by the fact that the dquots may or may not
 726  * be locked on entry.
 727  */
 728 STATIC void
 729 xfs_qm_dqattach_grouphint(
 730         xfs_dquot_t     *udq,
 731         xfs_dquot_t     *gdq)
 732 {
 733         xfs_dquot_t     *tmp;
 734
 735         xfs_dqlock(udq);
 736
 737         if ((tmp = udq->q_gdquot)) {
 738                 if (tmp == gdq) {
 739                         xfs_dqunlock(udq);
 740                         return;
 741                 }
 742
 743                 udq->q_gdquot = NULL;
 744                 /*
 745                  * We can't keep any dqlocks when calling dqrele,
 746                  * because the freelist lock comes before dqlocks.
 747                  */
 748                 xfs_dqunlock(udq);
 749                 /*
 750                  * we took a hard reference once upon a time in dqget,
 751                  * so give it back when the udquot no longer points at it
 752                  * dqput() does the unlocking of the dquot.
 753                  */
 754                 xfs_qm_dqrele(tmp);
 755
 756                 xfs_dqlock(udq);
 757                 xfs_dqlock(gdq);
 758
 759         } else {
 760                 ASSERT(XFS_DQ_IS_LOCKED(udq));
 761                 xfs_dqlock(gdq);
 762         }
 763
 764         ASSERT(XFS_DQ_IS_LOCKED(udq));
 765         ASSERT(XFS_DQ_IS_LOCKED(gdq));
 766         /*
 767          * Somebody could have attached a gdquot here,
 768          * when we dropped the uqlock. If so, just do nothing.
 769          */
 770         if (udq->q_gdquot == NULL) {
 771                 XFS_DQHOLD(gdq);
 772                 udq->q_gdquot = gdq;
 773         }
 774
 775         xfs_dqunlock(gdq);
 776         xfs_dqunlock(udq);
 777 }
 778
 779
 780 /*
 781  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
 782  * into account.
 783  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
 784  * Inode may get unlocked and relocked in here, and the caller must deal with
 785  * the consequences.
 786  */
 787 int
 788 xfs_qm_dqattach_locked(
 789         xfs_inode_t     *ip,
 790         uint            flags)
 791 {
 792         xfs_mount_t     *mp = ip->i_mount;
 793         uint            nquotas = 0;
 794         int             error = 0;
 795
 796         if (!XFS_IS_QUOTA_RUNNING(mp) ||
 797             !XFS_IS_QUOTA_ON(mp) ||
 798             !XFS_NOT_DQATTACHED(mp, ip) ||
 799             ip->i_ino == mp->m_sb.sb_uquotino ||
 800             ip->i_ino == mp->m_sb.sb_gquotino)
 801                 return 0;
 802
 803         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 804
 805         if (XFS_IS_UQUOTA_ON(mp)) {
 806                 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
 807                                                 flags & XFS_QMOPT_DQALLOC,
 808                                                 NULL, &ip->i_udquot);
 809                 if (error)
 810                         goto done;
 811                 nquotas++;
 812         }
 813
 814         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 815         if (XFS_IS_OQUOTA_ON(mp)) {
 816                 error = XFS_IS_GQUOTA_ON(mp) ?
 817                         xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
 818                                                 flags & XFS_QMOPT_DQALLOC,
 819                                                 ip->i_udquot, &ip->i_gdquot) :
 820                         xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
 821                                                 flags & XFS_QMOPT_DQALLOC,
 822                                                 ip->i_udquot, &ip->i_gdquot);
 823                 /*
 824                  * Don't worry about the udquot that we may have
 825                  * attached above. It'll get detached, if not already.
 826                  */
 827                 if (error)
 828                         goto done;
 829                 nquotas++;
 830         }
 831
 832         /*
 833          * Attach this group quota to the user quota as a hint.
 834          * This WON'T, in general, result in a thrash.
 835          */
 836         if (nquotas == 2) {
 837                 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 838                 ASSERT(ip->i_udquot);
 839                 ASSERT(ip->i_gdquot);
 840
 841                 /*
 842                  * We may or may not have the i_udquot locked at this point,
 843                  * but this check is OK since we don't depend on the i_gdquot to
 844                  * be accurate 100% all the time. It is just a hint, and this
 845                  * will succeed in general.
 846                  */
 847                 if (ip->i_udquot->q_gdquot == ip->i_gdquot)
 848                         goto done;
 849                 /*
 850                  * Attach i_gdquot to the gdquot hint inside the i_udquot.
 851                  */
 852                 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
 853         }
 854
 855  done:
 856 #ifdef QUOTADEBUG
 857         if (! error) {
 858                 if (XFS_IS_UQUOTA_ON(mp))
 859                         ASSERT(ip->i_udquot);
 860                 if (XFS_IS_OQUOTA_ON(mp))
 861                         ASSERT(ip->i_gdquot);
 862         }
 863         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 864 #endif
 865         return error;
 866 }
 867
 868 int
 869 xfs_qm_dqattach(
 870         struct xfs_inode        *ip,
 871         uint                    flags)
 872 {
 873         int                     error;
 874
 875         xfs_ilock(ip, XFS_ILOCK_EXCL);
 876         error = xfs_qm_dqattach_locked(ip, flags);
 877         xfs_iunlock(ip, XFS_ILOCK_EXCL);
 878
 879         return error;
 880 }
 881
 882 /*
 883  * Release dquots (and their references) if any.
 884  * The inode should be locked EXCL except when this's called by
 885  * xfs_ireclaim.
 886  */
 887 void
 888 xfs_qm_dqdetach(
 889         xfs_inode_t     *ip)
 890 {
 891         if (!(ip->i_udquot || ip->i_gdquot))
 892                 return;
 893
 894         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
 895         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
 896         if (ip->i_udquot) {
 897                 xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
 898                 xfs_qm_dqrele(ip->i_udquot);
 899                 ip->i_udquot = NULL;
 900         }
 901         if (ip->i_gdquot) {
 902                 xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
 903                 xfs_qm_dqrele(ip->i_gdquot);
 904                 ip->i_gdquot = NULL;
 905         }
 906 }
 907
 908 /*
 909  * This is called to sync quotas. We can be told to use non-blocking
 910  * semantics by either the SYNC_BDFLUSH flag or the absence of the
 911  * SYNC_WAIT flag.
 912  */
 913 int
 914 xfs_qm_sync(
 915         xfs_mount_t     *mp,
 916         int             flags)
 917 {
 918         int             recl, restarts;
 919         xfs_dquot_t     *dqp;
 920         uint            flush_flags;
 921         boolean_t       nowait;
 922         int             error;
 923
 924         if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 925                 return 0;
 926
 927         restarts = 0;
 928         /*
 929          * We won't block unless we are asked to.
 930          */
 931         nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
 932
 933   again:
 934         xfs_qm_mplist_lock(mp);
 935         /*
 936          * dqpurge_all() also takes the mplist lock and iterate thru all dquots
 937          * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
 938          * when we have the mplist lock, we know that dquots will be consistent
 939          * as long as we have it locked.
 940          */
 941         if (! XFS_IS_QUOTA_ON(mp)) {
 942                 xfs_qm_mplist_unlock(mp);
 943                 return 0;
 944         }
 945         FOREACH_DQUOT_IN_MP(dqp, mp) {
 946                 /*
 947                  * If this is vfs_sync calling, then skip the dquots that
 948                  * don't 'seem' to be dirty. ie. don't acquire dqlock.
 949                  * This is very similar to what xfs_sync does with inodes.
 950                  */
 951                 if (flags & SYNC_BDFLUSH) {
 952                         if (! XFS_DQ_IS_DIRTY(dqp))
 953                                 continue;
 954                 }
 955
 956                 if (nowait) {
 957                         /*
 958                          * Try to acquire the dquot lock. We are NOT out of
 959                          * lock order, but we just don't want to wait for this
 960                          * lock, unless somebody wanted us to.
 961                          */
 962                         if (! xfs_qm_dqlock_nowait(dqp))
 963                                 continue;
 964                 } else {
 965                         xfs_dqlock(dqp);
 966                 }
 967
 968                 /*
 969                  * Now, find out for sure if this dquot is dirty or not.
 970                  */
 971                 if (! XFS_DQ_IS_DIRTY(dqp)) {
 972                         xfs_dqunlock(dqp);
 973                         continue;
 974                 }
 975
 976                 /* XXX a sentinel would be better */
 977                 recl = XFS_QI_MPLRECLAIMS(mp);
 978                 if (!xfs_dqflock_nowait(dqp)) {
 979                         if (nowait) {
 980                                 xfs_dqunlock(dqp);
 981                                 continue;
 982                         }
 983                         /*
 984                          * If we can't grab the flush lock then if the caller
 985                          * really wanted us to give this our best shot, so
 986                          * see if we can give a push to the buffer before we wait
 987                          * on the flush lock. At this point, we know that
 988                          * even though the dquot is being flushed,
 989                          * it has (new) dirty data.
 990                          */
 991                         xfs_qm_dqflock_pushbuf_wait(dqp);
 992                 }
 993                 /*
 994                  * Let go of the mplist lock. We don't want to hold it
 995                  * across a disk write
 996                  */
 997                 flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
 998                 xfs_qm_mplist_unlock(mp);
 999                 xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
1000                 error = xfs_qm_dqflush(dqp, flush_flags);
1001                 xfs_dqunlock(dqp);
1002                 if (error && XFS_FORCED_SHUTDOWN(mp))
1003                         return 0;       /* Need to prevent umount failure */
1004                 else if (error)
1005                         return error;
1006
1007                 xfs_qm_mplist_lock(mp);
1008                 if (recl != XFS_QI_MPLRECLAIMS(mp)) {
1009                         if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
1010                                 break;
1011
1012                         xfs_qm_mplist_unlock(mp);
1013                         goto again;
1014                 }
1015         }
1016
1017         xfs_qm_mplist_unlock(mp);
1018         return 0;
1019 }
1020
1021 /*
1022  * The hash chains and the mplist use the same xfs_dqhash structure as
1023  * their list head, but we can take the mplist qh_lock and one of the
1024  * hash qh_locks at the same time without any problem as they aren't
1025  * related.
1026  */
1027 static struct lock_class_key xfs_quota_mplist_class;
1028
1029 /*
1030  * This initializes all the quota information that's kept in the
1031  * mount structure
1032  */
1033 STATIC int
1034 xfs_qm_init_quotainfo(
1035         xfs_mount_t     *mp)
1036 {
1037         xfs_quotainfo_t *qinf;
1038         int             error;
1039         xfs_dquot_t     *dqp;
1040
1041         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1042
1043         /*
1044          * Tell XQM that we exist as soon as possible.
1045          */
1046         if ((error = xfs_qm_hold_quotafs_ref(mp))) {
1047                 return error;
1048         }
1049
1050         qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
1051
1052         /*
1053          * See if quotainodes are setup, and if not, allocate them,
1054          * and change the superblock accordingly.
1055          */
1056         if ((error = xfs_qm_init_quotainos(mp))) {
1057                 kmem_free(qinf);
1058                 mp->m_quotainfo = NULL;
1059                 return error;
1060         }
1061
1062         xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
1063         lockdep_set_class(&qinf->qi_dqlist.qh_lock, &xfs_quota_mplist_class);
1064
1065         qinf->qi_dqreclaims = 0;
1066
1067         /* mutex used to serialize quotaoffs */
1068         mutex_init(&qinf->qi_quotaofflock);
1069
1070         /* Precalc some constants */
1071         qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1072         ASSERT(qinf->qi_dqchunklen);
1073         qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
1074         do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
1075
1076         mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
1077
1078         /*
1079          * We try to get the limits from the superuser's limits fields.
1080          * This is quite hacky, but it is standard quota practice.
1081          * We look at the USR dquot with id == 0 first, but if user quotas
1082          * are not enabled we goto the GRP dquot with id == 0.
1083          * We don't really care to keep separate default limits for user
1084          * and group quotas, at least not at this point.
1085          */
1086         error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
1087                              XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
1088                              (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
1089                                 XFS_DQ_PROJ),
1090                              XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
1091                              &dqp);
1092         if (! error) {
1093                 xfs_disk_dquot_t        *ddqp = &dqp->q_core;
1094
1095                 /*
1096                  * The warnings and timers set the grace period given to
1097                  * a user or group before he or she can not perform any
1098                  * more writing. If it is zero, a default is used.
1099                  */
1100                 qinf->qi_btimelimit = ddqp->d_btimer ?
1101                         be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
1102                 qinf->qi_itimelimit = ddqp->d_itimer ?
1103                         be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
1104                 qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
1105                         be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
1106                 qinf->qi_bwarnlimit = ddqp->d_bwarns ?
1107                         be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
1108                 qinf->qi_iwarnlimit = ddqp->d_iwarns ?
1109                         be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
1110                 qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
1111                         be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
1112                 qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
1113                 qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
1114                 qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
1115                 qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
1116                 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
1117                 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
1118
1119                 /*
1120                  * We sent the XFS_QMOPT_DQSUSER flag to dqget because
1121                  * we don't want this dquot cached. We haven't done a
1122                  * quotacheck yet, and quotacheck doesn't like incore dquots.
1123                  */
1124                 xfs_qm_dqdestroy(dqp);
1125         } else {
1126                 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
1127                 qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
1128                 qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
1129                 qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
1130                 qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
1131                 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
1132         }
1133
1134         return 0;
1135 }
1136
1137
1138 /*
1139  * Gets called when unmounting a filesystem or when all quotas get
1140  * turned off.
1141  * This purges the quota inodes, destroys locks and frees itself.
1142  */
1143 void
1144 xfs_qm_destroy_quotainfo(
1145         xfs_mount_t     *mp)
1146 {
1147         xfs_quotainfo_t *qi;
1148
1149         qi = mp->m_quotainfo;
1150         ASSERT(qi != NULL);
1151         ASSERT(xfs_Gqm != NULL);
1152
1153         /*
1154          * Release the reference that XQM kept, so that we know
1155          * when the XQM structure should be freed. We cannot assume
1156          * that xfs_Gqm is non-null after this point.
1157          */
1158         xfs_qm_rele_quotafs_ref(mp);
1159
1160         xfs_qm_list_destroy(&qi->qi_dqlist);
1161
1162         if (qi->qi_uquotaip) {
1163                 IRELE(qi->qi_uquotaip);
1164                 qi->qi_uquotaip = NULL; /* paranoia */
1165         }
1166         if (qi->qi_gquotaip) {
1167                 IRELE(qi->qi_gquotaip);
1168                 qi->qi_gquotaip = NULL;
1169         }
1170         mutex_destroy(&qi->qi_quotaofflock);
1171         kmem_free(qi);
1172         mp->m_quotainfo = NULL;
1173 }
1174
1175
1176
1177 /* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
1178
1179 /* ARGSUSED */
1180 STATIC void
1181 xfs_qm_list_init(
1182         xfs_dqlist_t    *list,
1183         char            *str,
1184         int             n)
1185 {
1186         mutex_init(&list->qh_lock);
1187         list->qh_next = NULL;
1188         list->qh_version = 0;
1189         list->qh_nelems = 0;
1190 }
1191
1192 STATIC void
1193 xfs_qm_list_destroy(
1194         xfs_dqlist_t    *list)
1195 {
1196         mutex_destroy(&(list->qh_lock));
1197 }
1198
1199
1200 /*
1201  * Stripped down version of dqattach. This doesn't attach, or even look at the
1202  * dquots attached to the inode. The rationale is that there won't be any
1203  * attached at the time this is called from quotacheck.
1204  */
1205 STATIC int
1206 xfs_qm_dqget_noattach(
1207         xfs_inode_t     *ip,
1208         xfs_dquot_t     **O_udqpp,
1209         xfs_dquot_t     **O_gdqpp)
1210 {
1211         int             error;
1212         xfs_mount_t     *mp;
1213         xfs_dquot_t     *udqp, *gdqp;
1214
1215         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1216         mp = ip->i_mount;
1217         udqp = NULL;
1218         gdqp = NULL;
1219
1220         if (XFS_IS_UQUOTA_ON(mp)) {
1221                 ASSERT(ip->i_udquot == NULL);
1222                 /*
1223                  * We want the dquot allocated if it doesn't exist.
1224                  */
1225                 if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
1226                                          XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
1227                                          &udqp))) {
1228                         /*
1229                          * Shouldn't be able to turn off quotas here.
1230                          */
1231                         ASSERT(error != ESRCH);
1232                         ASSERT(error != ENOENT);
1233                         return error;
1234                 }
1235                 ASSERT(udqp);
1236         }
1237
1238         if (XFS_IS_OQUOTA_ON(mp)) {
1239                 ASSERT(ip->i_gdquot == NULL);
1240                 if (udqp)
1241                         xfs_dqunlock(udqp);
1242                 error = XFS_IS_GQUOTA_ON(mp) ?
1243                                 xfs_qm_dqget(mp, ip,
1244                                              ip->i_d.di_gid, XFS_DQ_GROUP,
1245                                              XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1246                                              &gdqp) :
1247                                 xfs_qm_dqget(mp, ip,
1248                                              ip->i_d.di_projid, XFS_DQ_PROJ,
1249                                              XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1250                                              &gdqp);
1251                 if (error) {
1252                         if (udqp)
1253                                 xfs_qm_dqrele(udqp);
1254                         ASSERT(error != ESRCH);
1255                         ASSERT(error != ENOENT);
1256                         return error;
1257                 }
1258                 ASSERT(gdqp);
1259
1260                 /* Reacquire the locks in the right order */
1261                 if (udqp) {
1262                         if (! xfs_qm_dqlock_nowait(udqp)) {
1263                                 xfs_dqunlock(gdqp);
1264                                 xfs_dqlock(udqp);
1265                                 xfs_dqlock(gdqp);
1266                         }
1267                 }
1268         }
1269
1270         *O_udqpp = udqp;
1271         *O_gdqpp = gdqp;
1272
1273 #ifdef QUOTADEBUG
1274         if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
1275         if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
1276 #endif
1277         return 0;
1278 }
1279
1280 /*
1281  * Create an inode and return with a reference already taken, but unlocked
1282  * This is how we create quota inodes
1283  */
1284 STATIC int
1285 xfs_qm_qino_alloc(
1286         xfs_mount_t     *mp,
1287         xfs_inode_t     **ip,
1288         __int64_t       sbfields,
1289         uint            flags)
1290 {
1291         xfs_trans_t     *tp;
1292         int             error;
1293         int             committed;
1294
1295         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
1296         if ((error = xfs_trans_reserve(tp,
1297                                       XFS_QM_QINOCREATE_SPACE_RES(mp),
1298                                       XFS_CREATE_LOG_RES(mp), 0,
1299                                       XFS_TRANS_PERM_LOG_RES,
1300                                       XFS_CREATE_LOG_COUNT))) {
1301                 xfs_trans_cancel(tp, 0);
1302                 return error;
1303         }
1304
1305         if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
1306                                    &xfs_zerocr, 0, 1, ip, &committed))) {
1307                 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1308                                  XFS_TRANS_ABORT);
1309                 return error;
1310         }
1311
1312         /*
1313          * Keep an extra reference to this quota inode. This inode is
1314          * locked exclusively and joined to the transaction already.
1315          */
1316         ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1317         IHOLD(*ip);
1318
1319         /*
1320          * Make the changes in the superblock, and log those too.
1321          * sbfields arg may contain fields other than *QUOTINO;
1322          * VERSIONNUM for example.
1323          */
1324         spin_lock(&mp->m_sb_lock);
1325         if (flags & XFS_QMOPT_SBVERSION) {
1326 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1327                 unsigned oldv = mp->m_sb.sb_versionnum;
1328 #endif
1329                 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
1330                 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1331                                    XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
1332                        (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1333                         XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
1334
1335                 xfs_sb_version_addquota(&mp->m_sb);
1336                 mp->m_sb.sb_uquotino = NULLFSINO;
1337                 mp->m_sb.sb_gquotino = NULLFSINO;
1338
1339                 /* qflags will get updated _after_ quotacheck */
1340                 mp->m_sb.sb_qflags = 0;
1341 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1342                 cmn_err(CE_NOTE,
1343                         "Old superblock version %x, converting to %x.",
1344                         oldv, mp->m_sb.sb_versionnum);
1345 #endif
1346         }
1347         if (flags & XFS_QMOPT_UQUOTA)
1348                 mp->m_sb.sb_uquotino = (*ip)->i_ino;
1349         else
1350                 mp->m_sb.sb_gquotino = (*ip)->i_ino;
1351         spin_unlock(&mp->m_sb_lock);
1352         xfs_mod_sb(tp, sbfields);
1353
1354         if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1355                 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
1356                 return error;
1357         }
1358         return 0;
1359 }
1360
1361
1362 STATIC void
1363 xfs_qm_reset_dqcounts(
1364         xfs_mount_t     *mp,
1365         xfs_buf_t       *bp,
1366         xfs_dqid_t      id,
1367         uint            type)
1368 {
1369         xfs_disk_dquot_t        *ddq;
1370         int                     j;
1371
1372         xfs_buftrace("RESET DQUOTS", bp);
1373         /*
1374          * Reset all counters and timers. They'll be
1375          * started afresh by xfs_qm_quotacheck.
1376          */
1377 #ifdef DEBUG
1378         j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1379         do_div(j, sizeof(xfs_dqblk_t));
1380         ASSERT(XFS_QM_DQPERBLK(mp) == j);
1381 #endif
1382         ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
1383         for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
1384                 /*
1385                  * Do a sanity check, and if needed, repair the dqblk. Don't
1386                  * output any warnings because it's perfectly possible to
1387                  * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
1388                  */
1389                 (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1390                                       "xfs_quotacheck");
1391                 ddq->d_bcount = 0;
1392                 ddq->d_icount = 0;
1393                 ddq->d_rtbcount = 0;
1394                 ddq->d_btimer = 0;
1395                 ddq->d_itimer = 0;
1396                 ddq->d_rtbtimer = 0;
1397                 ddq->d_bwarns = 0;
1398                 ddq->d_iwarns = 0;
1399                 ddq->d_rtbwarns = 0;
1400                 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1401         }
1402 }
1403
1404 STATIC int
1405 xfs_qm_dqiter_bufs(
1406         xfs_mount_t     *mp,
1407         xfs_dqid_t      firstid,
1408         xfs_fsblock_t   bno,
1409         xfs_filblks_t   blkcnt,
1410         uint            flags)
1411 {
1412         xfs_buf_t       *bp;
1413         int             error;
1414         int             notcommitted;
1415         int             incr;
1416         int             type;
1417
1418         ASSERT(blkcnt > 0);
1419         notcommitted = 0;
1420         incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1421                 XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1422         type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1423                 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1424         error = 0;
1425
1426         /*
1427          * Blkcnt arg can be a very big number, and might even be
1428          * larger than the log itself. So, we have to break it up into
1429          * manageable-sized transactions.
1430          * Note that we don't start a permanent transaction here; we might
1431          * not be able to get a log reservation for the whole thing up front,
1432          * and we don't really care to either, because we just discard
1433          * everything if we were to crash in the middle of this loop.
1434          */
1435         while (blkcnt--) {
1436                 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1437                               XFS_FSB_TO_DADDR(mp, bno),
1438                               (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
1439                 if (error)
1440                         break;
1441
1442                 xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1443                 xfs_bdwrite(mp, bp);
1444                 /*
1445                  * goto the next block.
1446                  */
1447                 bno++;
1448                 firstid += XFS_QM_DQPERBLK(mp);
1449         }
1450         return error;
1451 }
1452
1453 /*
1454  * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
1455  * caller supplied function for every chunk of dquots that we find.
1456  */
1457 STATIC int
1458 xfs_qm_dqiterate(
1459         xfs_mount_t     *mp,
1460         xfs_inode_t     *qip,
1461         uint            flags)
1462 {
1463         xfs_bmbt_irec_t         *map;
1464         int                     i, nmaps;       /* number of map entries */
1465         int                     error;          /* return value */
1466         xfs_fileoff_t           lblkno;
1467         xfs_filblks_t           maxlblkcnt;
1468         xfs_dqid_t              firstid;
1469         xfs_fsblock_t           rablkno;
1470         xfs_filblks_t           rablkcnt;
1471
1472         error = 0;
1473         /*
1474          * This looks racy, but we can't keep an inode lock across a
1475          * trans_reserve. But, this gets called during quotacheck, and that
1476          * happens only at mount time which is single threaded.
1477          */
1478         if (qip->i_d.di_nblocks == 0)
1479                 return 0;
1480
1481         map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1482
1483         lblkno = 0;
1484         maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1485         do {
1486                 nmaps = XFS_DQITER_MAP_SIZE;
1487                 /*
1488                  * We aren't changing the inode itself. Just changing
1489                  * some of its data. No new blocks are added here, and
1490                  * the inode is never added to the transaction.
1491                  */
1492                 xfs_ilock(qip, XFS_ILOCK_SHARED);
1493                 error = xfs_bmapi(NULL, qip, lblkno,
1494                                   maxlblkcnt - lblkno,
1495                                   XFS_BMAPI_METADATA,
1496                                   NULL,
1497                                   0, map, &nmaps, NULL, NULL);
1498                 xfs_iunlock(qip, XFS_ILOCK_SHARED);
1499                 if (error)
1500                         break;
1501
1502                 ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1503                 for (i = 0; i < nmaps; i++) {
1504                         ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1505                         ASSERT(map[i].br_blockcount);
1506
1507
1508                         lblkno += map[i].br_blockcount;
1509
1510                         if (map[i].br_startblock == HOLESTARTBLOCK)
1511                                 continue;
1512
1513                         firstid = (xfs_dqid_t) map[i].br_startoff *
1514                                 XFS_QM_DQPERBLK(mp);
1515                         /*
1516                          * Do a read-ahead on the next extent.
1517                          */
1518                         if ((i+1 < nmaps) &&
1519                             (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1520                                 rablkcnt =  map[i+1].br_blockcount;
1521                                 rablkno = map[i+1].br_startblock;
1522                                 while (rablkcnt--) {
1523                                         xfs_baread(mp->m_ddev_targp,
1524                                                XFS_FSB_TO_DADDR(mp, rablkno),
1525                                                (int)XFS_QI_DQCHUNKLEN(mp));
1526                                         rablkno++;
1527                                 }
1528                         }
1529                         /*
1530                          * Iterate thru all the blks in the extent and
1531                          * reset the counters of all the dquots inside them.
1532                          */
1533                         if ((error = xfs_qm_dqiter_bufs(mp,
1534                                                        firstid,
1535                                                        map[i].br_startblock,
1536                                                        map[i].br_blockcount,
1537                                                        flags))) {
1538                                 break;
1539                         }
1540                 }
1541
1542                 if (error)
1543                         break;
1544         } while (nmaps > 0);
1545
1546         kmem_free(map);
1547
1548         return error;
1549 }
1550
1551 /*
1552  * Called by dqusage_adjust in doing a quotacheck.
1553  * Given the inode, and a dquot (either USR or GRP, doesn't matter),
1554  * this updates its incore copy as well as the buffer copy. This is
1555  * so that once the quotacheck is done, we can just log all the buffers,
1556  * as opposed to logging numerous updates to individual dquots.
1557  */
1558 STATIC void
1559 xfs_qm_quotacheck_dqadjust(
1560         xfs_dquot_t             *dqp,
1561         xfs_qcnt_t              nblks,
1562         xfs_qcnt_t              rtblks)
1563 {
1564         ASSERT(XFS_DQ_IS_LOCKED(dqp));
1565         xfs_dqtrace_entry(dqp, "QCHECK DQADJUST");
1566         /*
1567          * Adjust the inode count and the block count to reflect this inode's
1568          * resource usage.
1569          */
1570         be64_add_cpu(&dqp->q_core.d_icount, 1);
1571         dqp->q_res_icount++;
1572         if (nblks) {
1573                 be64_add_cpu(&dqp->q_core.d_bcount, nblks);
1574                 dqp->q_res_bcount += nblks;
1575         }
1576         if (rtblks) {
1577                 be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
1578                 dqp->q_res_rtbcount += rtblks;
1579         }
1580
1581         /*
1582          * Set default limits, adjust timers (since we changed usages)
1583          */
1584         if (! XFS_IS_SUSER_DQUOT(dqp)) {
1585                 xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
1586                 xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
1587         }
1588
1589         dqp->dq_flags |= XFS_DQ_DIRTY;
1590 }
1591
1592 STATIC int
1593 xfs_qm_get_rtblks(
1594         xfs_inode_t     *ip,
1595         xfs_qcnt_t      *O_rtblks)
1596 {
1597         xfs_filblks_t   rtblks;                 /* total rt blks */
1598         xfs_extnum_t    idx;                    /* extent record index */
1599         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1600         xfs_extnum_t    nextents;               /* number of extent entries */
1601         int             error;
1602
1603         ASSERT(XFS_IS_REALTIME_INODE(ip));
1604         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1605         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1606                 if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1607                         return error;
1608         }
1609         rtblks = 0;
1610         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1611         for (idx = 0; idx < nextents; idx++)
1612                 rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
1613         *O_rtblks = (xfs_qcnt_t)rtblks;
1614         return 0;
1615 }
1616
1617 /*
1618  * callback routine supplied to bulkstat(). Given an inumber, find its
1619  * dquots and update them to account for resources taken by that inode.
1620  */
1621 /* ARGSUSED */
1622 STATIC int
1623 xfs_qm_dqusage_adjust(
1624         xfs_mount_t     *mp,            /* mount point for filesystem */
1625         xfs_ino_t       ino,            /* inode number to get data for */
1626         void            __user *buffer, /* not used */
1627         int             ubsize,         /* not used */
1628         void            *private_data,  /* not used */
1629         xfs_daddr_t     bno,            /* starting block of inode cluster */
1630         int             *ubused,        /* not used */
1631         void            *dip,           /* on-disk inode pointer (not used) */
1632         int             *res)           /* result code value */
1633 {
1634         xfs_inode_t     *ip;
1635         xfs_dquot_t     *udqp, *gdqp;
1636         xfs_qcnt_t      nblks, rtblks;
1637         int             error;
1638
1639         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1640
1641         /*
1642          * rootino must have its resources accounted for, not so with the quota
1643          * inodes.
1644          */
1645         if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1646                 *res = BULKSTAT_RV_NOTHING;
1647                 return XFS_ERROR(EINVAL);
1648         }
1649
1650         /*
1651          * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1652          * interface expects the inode to be exclusively locked because that's
1653          * the case in all other instances. It's OK that we do this because
1654          * quotacheck is done only at mount time.
1655          */
1656         if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
1657                 *res = BULKSTAT_RV_NOTHING;
1658                 return error;
1659         }
1660
1661         /*
1662          * Obtain the locked dquots. In case of an error (eg. allocation
1663          * fails for ENOSPC), we return the negative of the error number
1664          * to bulkstat, so that it can get propagated to quotacheck() and
1665          * making us disable quotas for the file system.
1666          */
1667         if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
1668                 xfs_iput(ip, XFS_ILOCK_EXCL);
1669                 *res = BULKSTAT_RV_GIVEUP;
1670                 return error;
1671         }
1672
1673         rtblks = 0;
1674         if (! XFS_IS_REALTIME_INODE(ip)) {
1675                 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
1676         } else {
1677                 /*
1678                  * Walk thru the extent list and count the realtime blocks.
1679                  */
1680                 if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
1681                         xfs_iput(ip, XFS_ILOCK_EXCL);
1682                         if (udqp)
1683                                 xfs_qm_dqput(udqp);
1684                         if (gdqp)
1685                                 xfs_qm_dqput(gdqp);
1686                         *res = BULKSTAT_RV_GIVEUP;
1687                         return error;
1688                 }
1689                 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1690         }
1691         ASSERT(ip->i_delayed_blks == 0);
1692
1693         /*
1694          * We can't release the inode while holding its dquot locks.
1695          * The inode can go into inactive and might try to acquire the dquotlocks.
1696          * So, just unlock here and do a vn_rele at the end.
1697          */
1698         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1699
1700         /*
1701          * Add the (disk blocks and inode) resources occupied by this
1702          * inode to its dquots. We do this adjustment in the incore dquot,
1703          * and also copy the changes to its buffer.
1704          * We don't care about putting these changes in a transaction
1705          * envelope because if we crash in the middle of a 'quotacheck'
1706          * we have to start from the beginning anyway.
1707          * Once we're done, we'll log all the dquot bufs.
1708          *
1709          * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1710          * and quotaoffs don't race. (Quotachecks happen at mount time only).
1711          */
1712         if (XFS_IS_UQUOTA_ON(mp)) {
1713                 ASSERT(udqp);
1714                 xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
1715                 xfs_qm_dqput(udqp);
1716         }
1717         if (XFS_IS_OQUOTA_ON(mp)) {
1718                 ASSERT(gdqp);
1719                 xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
1720                 xfs_qm_dqput(gdqp);
1721         }
1722         /*
1723          * Now release the inode. This will send it to 'inactive', and
1724          * possibly even free blocks.
1725          */
1726         IRELE(ip);
1727
1728         /*
1729          * Goto next inode.
1730          */
1731         *res = BULKSTAT_RV_DIDONE;
1732         return 0;
1733 }
1734
1735 /*
1736  * Walk thru all the filesystem inodes and construct a consistent view
1737  * of the disk quota world. If the quotacheck fails, disable quotas.
1738  */
1739 int
1740 xfs_qm_quotacheck(
1741         xfs_mount_t     *mp)
1742 {
1743         int             done, count, error;
1744         xfs_ino_t       lastino;
1745         size_t          structsz;
1746         xfs_inode_t     *uip, *gip;
1747         uint            flags;
1748
1749         count = INT_MAX;
1750         structsz = 1;
1751         lastino = 0;
1752         flags = 0;
1753
1754         ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
1755         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1756
1757         /*
1758          * There should be no cached dquots. The (simplistic) quotacheck
1759          * algorithm doesn't like that.
1760          */
1761         ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0);
1762
1763         cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
1764
1765         /*
1766          * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1767          * their counters to zero. We need a clean slate.
1768          * We don't log our changes till later.
1769          */
1770         if ((uip = XFS_QI_UQIP(mp))) {
1771                 if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
1772                         goto error_return;
1773                 flags |= XFS_UQUOTA_CHKD;
1774         }
1775
1776         if ((gip = XFS_QI_GQIP(mp))) {
1777                 if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1778                                         XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA)))
1779                         goto error_return;
1780                 flags |= XFS_OQUOTA_CHKD;
1781         }
1782
1783         do {
1784                 /*
1785                  * Iterate thru all the inodes in the file system,
1786                  * adjusting the corresponding dquot counters in core.
1787                  */
1788                 if ((error = xfs_bulkstat(mp, &lastino, &count,
1789                                      xfs_qm_dqusage_adjust, NULL,
1790                                      structsz, NULL, BULKSTAT_FG_IGET, &done)))
1791                         break;
1792
1793         } while (! done);
1794
1795         /*
1796          * We've made all the changes that we need to make incore.
1797          * Flush them down to disk buffers if everything was updated
1798          * successfully.
1799          */
1800         if (!error)
1801                 error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
1802
1803         /*
1804          * We can get this error if we couldn't do a dquot allocation inside
1805          * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1806          * dirty dquots that might be cached, we just want to get rid of them
1807          * and turn quotaoff. The dquots won't be attached to any of the inodes
1808          * at this point (because we intentionally didn't in dqget_noattach).
1809          */
1810         if (error) {
1811                 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
1812                 goto error_return;
1813         }
1814
1815         /*
1816          * We didn't log anything, because if we crashed, we'll have to
1817          * start the quotacheck from scratch anyway. However, we must make
1818          * sure that our dquot changes are secure before we put the
1819          * quotacheck'd stamp on the superblock. So, here we do a synchronous
1820          * flush.
1821          */
1822         XFS_bflush(mp->m_ddev_targp);
1823
1824         /*
1825          * If one type of quotas is off, then it will lose its
1826          * quotachecked status, since we won't be doing accounting for
1827          * that type anymore.
1828          */
1829         mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1830         mp->m_qflags |= flags;
1831
1832         XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
1833
1834  error_return:
1835         if (error) {
1836                 cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): "
1837                         "Disabling quotas.",
1838                         mp->m_fsname, error);
1839                 /*
1840                  * We must turn off quotas.
1841                  */
1842                 ASSERT(mp->m_quotainfo != NULL);
1843                 ASSERT(xfs_Gqm != NULL);
1844                 xfs_qm_destroy_quotainfo(mp);
1845                 if (xfs_mount_reset_sbqflags(mp)) {
1846                         cmn_err(CE_WARN, "XFS quotacheck %s: "
1847                                 "Failed to reset quota flags.", mp->m_fsname);
1848                 }
1849         } else {
1850                 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
1851         }
1852         return (error);
1853 }
1854
1855 /*
1856  * This is called after the superblock has been read in and we're ready to
1857  * iget the quota inodes.
1858  */
1859 STATIC int
1860 xfs_qm_init_quotainos(
1861         xfs_mount_t     *mp)
1862 {
1863         xfs_inode_t     *uip, *gip;
1864         int             error;
1865         __int64_t       sbflags;
1866         uint            flags;
1867
1868         ASSERT(mp->m_quotainfo);
1869         uip = gip = NULL;
1870         sbflags = 0;
1871         flags = 0;
1872
1873         /*
1874          * Get the uquota and gquota inodes
1875          */
1876         if (xfs_sb_version_hasquota(&mp->m_sb)) {
1877                 if (XFS_IS_UQUOTA_ON(mp) &&
1878                     mp->m_sb.sb_uquotino != NULLFSINO) {
1879                         ASSERT(mp->m_sb.sb_uquotino > 0);
1880                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1881                                              0, 0, &uip, 0)))
1882                                 return XFS_ERROR(error);
1883                 }
1884                 if (XFS_IS_OQUOTA_ON(mp) &&
1885                     mp->m_sb.sb_gquotino != NULLFSINO) {
1886                         ASSERT(mp->m_sb.sb_gquotino > 0);
1887                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1888                                              0, 0, &gip, 0))) {
1889                                 if (uip)
1890                                         IRELE(uip);
1891                                 return XFS_ERROR(error);
1892                         }
1893                 }
1894         } else {
1895                 flags |= XFS_QMOPT_SBVERSION;
1896                 sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1897                             XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
1898         }
1899
1900         /*
1901          * Create the two inodes, if they don't exist already. The changes
1902          * made above will get added to a transaction and logged in one of
1903          * the qino_alloc calls below.  If the device is readonly,
1904          * temporarily switch to read-write to do this.
1905          */
1906         if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1907                 if ((error = xfs_qm_qino_alloc(mp, &uip,
1908                                               sbflags | XFS_SB_UQUOTINO,
1909                                               flags | XFS_QMOPT_UQUOTA)))
1910                         return XFS_ERROR(error);
1911
1912                 flags &= ~XFS_QMOPT_SBVERSION;
1913         }
1914         if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
1915                 flags |= (XFS_IS_GQUOTA_ON(mp) ?
1916                                 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1917                 error = xfs_qm_qino_alloc(mp, &gip,
1918                                           sbflags | XFS_SB_GQUOTINO, flags);
1919                 if (error) {
1920                         if (uip)
1921                                 IRELE(uip);
1922
1923                         return XFS_ERROR(error);
1924                 }
1925         }
1926
1927         XFS_QI_UQIP(mp) = uip;
1928         XFS_QI_GQIP(mp) = gip;
1929
1930         return 0;
1931 }
1932
1933
1934 /*
1935  * Traverse the freelist of dquots and attempt to reclaim a maximum of
1936  * 'howmany' dquots. This operation races with dqlookup(), and attempts to
1937  * favor the lookup function ...
1938  * XXXsup merge this with qm_reclaim_one().
1939  */
1940 STATIC int
1941 xfs_qm_shake_freelist(
1942         int howmany)
1943 {
1944         int             nreclaimed;
1945         xfs_dqhash_t    *hash;
1946         xfs_dquot_t     *dqp, *nextdqp;
1947         int             restarts;
1948         int             nflushes;
1949
1950         if (howmany <= 0)
1951                 return 0;
1952
1953         nreclaimed = 0;
1954         restarts = 0;
1955         nflushes = 0;
1956
1957 #ifdef QUOTADEBUG
1958         cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
1959 #endif
1960         /* lock order is : hashchainlock, freelistlock, mplistlock */
1961  tryagain:
1962         xfs_qm_freelist_lock(xfs_Gqm);
1963
1964         for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
1965              ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
1966               nreclaimed < howmany); ) {
1967                 xfs_dqlock(dqp);
1968
1969                 /*
1970                  * We are racing with dqlookup here. Naturally we don't
1971                  * want to reclaim a dquot that lookup wants.
1972                  */
1973                 if (dqp->dq_flags & XFS_DQ_WANT) {
1974                         xfs_dqunlock(dqp);
1975                         xfs_qm_freelist_unlock(xfs_Gqm);
1976                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1977                                 return nreclaimed;
1978                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1979                         goto tryagain;
1980                 }
1981
1982                 /*
1983                  * If the dquot is inactive, we are assured that it is
1984                  * not on the mplist or the hashlist, and that makes our
1985                  * life easier.
1986                  */
1987                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
1988                         ASSERT(dqp->q_mount == NULL);
1989                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
1990                         ASSERT(dqp->HL_PREVP == NULL);
1991                         ASSERT(dqp->MPL_PREVP == NULL);
1992                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
1993                         nextdqp = dqp->dq_flnext;
1994                         goto off_freelist;
1995                 }
1996
1997                 ASSERT(dqp->MPL_PREVP);
1998                 /*
1999                  * Try to grab the flush lock. If this dquot is in the process of
2000                  * getting flushed to disk, we don't want to reclaim it.
2001                  */
2002                 if (!xfs_dqflock_nowait(dqp)) {
2003                         xfs_dqunlock(dqp);
2004                         dqp = dqp->dq_flnext;
2005                         continue;
2006                 }
2007
2008                 /*
2009                  * We have the flush lock so we know that this is not in the
2010                  * process of being flushed. So, if this is dirty, flush it
2011                  * DELWRI so that we don't get a freelist infested with
2012                  * dirty dquots.
2013                  */
2014                 if (XFS_DQ_IS_DIRTY(dqp)) {
2015                         int     error;
2016                         xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
2017                         /*
2018                          * We flush it delayed write, so don't bother
2019                          * releasing the mplock.
2020                          */
2021                         error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2022                         if (error) {
2023                                 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2024                         "xfs_qm_dqflush_all: dquot %p flush failed", dqp);
2025                         }
2026                         xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2027                         dqp = dqp->dq_flnext;
2028                         continue;
2029                 }
2030                 /*
2031                  * We're trying to get the hashlock out of order. This races
2032                  * with dqlookup; so, we giveup and goto the next dquot if
2033                  * we couldn't get the hashlock. This way, we won't starve
2034                  * a dqlookup process that holds the hashlock that is
2035                  * waiting for the freelist lock.
2036                  */
2037                 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
2038                         xfs_dqfunlock(dqp);
2039                         xfs_dqunlock(dqp);
2040                         dqp = dqp->dq_flnext;
2041                         continue;
2042                 }
2043                 /*
2044                  * This races with dquot allocation code as well as dqflush_all
2045                  * and reclaim code. So, if we failed to grab the mplist lock,
2046                  * giveup everything and start over.
2047                  */
2048                 hash = dqp->q_hash;
2049                 ASSERT(hash);
2050                 if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2051                         /* XXX put a sentinel so that we can come back here */
2052                         xfs_dqfunlock(dqp);
2053                         xfs_dqunlock(dqp);
2054                         mutex_unlock(&hash->qh_lock);
2055                         xfs_qm_freelist_unlock(xfs_Gqm);
2056                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2057                                 return nreclaimed;
2058                         goto tryagain;
2059                 }
2060                 xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
2061 #ifdef QUOTADEBUG
2062                 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2063                         dqp, be32_to_cpu(dqp->q_core.d_id));
2064 #endif
2065                 ASSERT(dqp->q_nrefs == 0);
2066                 nextdqp = dqp->dq_flnext;
2067                 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2068                 XQM_HASHLIST_REMOVE(hash, dqp);
2069                 xfs_dqfunlock(dqp);
2070                 xfs_qm_mplist_unlock(dqp->q_mount);
2071                 mutex_unlock(&hash->qh_lock);
2072
2073  off_freelist:
2074                 XQM_FREELIST_REMOVE(dqp);
2075                 xfs_dqunlock(dqp);
2076                 nreclaimed++;
2077                 XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
2078                 xfs_qm_dqdestroy(dqp);
2079                 dqp = nextdqp;
2080         }
2081         xfs_qm_freelist_unlock(xfs_Gqm);
2082         return nreclaimed;
2083 }
2084
2085
2086 /*
2087  * The kmem_shake interface is invoked when memory is running low.
2088  */
2089 /* ARGSUSED */
2090 STATIC int
2091 xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
2092 {
2093         int     ndqused, nfree, n;
2094
2095         if (!kmem_shake_allow(gfp_mask))
2096                 return 0;
2097         if (!xfs_Gqm)
2098                 return 0;
2099
2100         nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
2101         /* incore dquots in all f/s's */
2102         ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
2103
2104         ASSERT(ndqused >= 0);
2105
2106         if (nfree <= ndqused && nfree < ndquot)
2107                 return 0;
2108
2109         ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
2110         n = nfree - ndqused - ndquot;           /* # over target */
2111
2112         return xfs_qm_shake_freelist(MAX(nfree, n));
2113 }
2114
2115
2116 /*
2117  * Just pop the least recently used dquot off the freelist and
2118  * recycle it. The returned dquot is locked.
2119  */
2120 STATIC xfs_dquot_t *
2121 xfs_qm_dqreclaim_one(void)
2122 {
2123         xfs_dquot_t     *dqpout;
2124         xfs_dquot_t     *dqp;
2125         int             restarts;
2126         int             nflushes;
2127
2128         restarts = 0;
2129         dqpout = NULL;
2130         nflushes = 0;
2131
2132         /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
2133  startagain:
2134         xfs_qm_freelist_lock(xfs_Gqm);
2135
2136         FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
2137                 xfs_dqlock(dqp);
2138
2139                 /*
2140                  * We are racing with dqlookup here. Naturally we don't
2141                  * want to reclaim a dquot that lookup wants. We release the
2142                  * freelist lock and start over, so that lookup will grab
2143                  * both the dquot and the freelistlock.
2144                  */
2145                 if (dqp->dq_flags & XFS_DQ_WANT) {
2146                         ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2147                         xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT");
2148                         xfs_dqunlock(dqp);
2149                         xfs_qm_freelist_unlock(xfs_Gqm);
2150                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2151                                 return NULL;
2152                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2153                         goto startagain;
2154                 }
2155
2156                 /*
2157                  * If the dquot is inactive, we are assured that it is
2158                  * not on the mplist or the hashlist, and that makes our
2159                  * life easier.
2160                  */
2161                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2162                         ASSERT(dqp->q_mount == NULL);
2163                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2164                         ASSERT(dqp->HL_PREVP == NULL);
2165                         ASSERT(dqp->MPL_PREVP == NULL);
2166                         XQM_FREELIST_REMOVE(dqp);
2167                         xfs_dqunlock(dqp);
2168                         dqpout = dqp;
2169                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2170                         break;
2171                 }
2172
2173                 ASSERT(dqp->q_hash);
2174                 ASSERT(dqp->MPL_PREVP);
2175
2176                 /*
2177                  * Try to grab the flush lock. If this dquot is in the process of
2178                  * getting flushed to disk, we don't want to reclaim it.
2179                  */
2180                 if (!xfs_dqflock_nowait(dqp)) {
2181                         xfs_dqunlock(dqp);
2182                         continue;
2183                 }
2184
2185                 /*
2186                  * We have the flush lock so we know that this is not in the
2187                  * process of being flushed. So, if this is dirty, flush it
2188                  * DELWRI so that we don't get a freelist infested with
2189                  * dirty dquots.
2190                  */
2191                 if (XFS_DQ_IS_DIRTY(dqp)) {
2192                         int     error;
2193                         xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
2194                         /*
2195                          * We flush it delayed write, so don't bother
2196                          * releasing the freelist lock.
2197                          */
2198                         error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2199                         if (error) {
2200                                 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2201                         "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
2202                         }
2203                         xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2204                         continue;
2205                 }
2206
2207                 if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2208                         xfs_dqfunlock(dqp);
2209                         xfs_dqunlock(dqp);
2210                         continue;
2211                 }
2212
2213                 if (!mutex_trylock(&dqp->q_hash->qh_lock))
2214                         goto mplistunlock;
2215
2216                 ASSERT(dqp->q_nrefs == 0);
2217                 xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
2218                 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2219                 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2220                 XQM_FREELIST_REMOVE(dqp);
2221                 dqpout = dqp;
2222                 mutex_unlock(&dqp->q_hash->qh_lock);
2223  mplistunlock:
2224                 xfs_qm_mplist_unlock(dqp->q_mount);
2225                 xfs_dqfunlock(dqp);
2226                 xfs_dqunlock(dqp);
2227                 if (dqpout)
2228                         break;
2229         }
2230
2231         xfs_qm_freelist_unlock(xfs_Gqm);
2232         return dqpout;
2233 }
2234
2235
2236 /*------------------------------------------------------------------*/
2237
2238 /*
2239  * Return a new incore dquot. Depending on the number of
2240  * dquots in the system, we either allocate a new one on the kernel heap,
2241  * or reclaim a free one.
2242  * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
2243  * to reclaim an existing one from the freelist.
2244  */
2245 boolean_t
2246 xfs_qm_dqalloc_incore(
2247         xfs_dquot_t **O_dqpp)
2248 {
2249         xfs_dquot_t     *dqp;
2250
2251         /*
2252          * Check against high water mark to see if we want to pop
2253          * a nincompoop dquot off the freelist.
2254          */
2255         if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
2256                 /*
2257                  * Try to recycle a dquot from the freelist.
2258                  */
2259                 if ((dqp = xfs_qm_dqreclaim_one())) {
2260                         XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
2261                         /*
2262                          * Just zero the core here. The rest will get
2263                          * reinitialized by caller. XXX we shouldn't even
2264                          * do this zero ...
2265                          */
2266                         memset(&dqp->q_core, 0, sizeof(dqp->q_core));
2267                         *O_dqpp = dqp;
2268                         return B_FALSE;
2269                 }
2270                 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
2271         }
2272
2273         /*
2274          * Allocate a brand new dquot on the kernel heap and return it
2275          * to the caller to initialize.
2276          */
2277         ASSERT(xfs_Gqm->qm_dqzone != NULL);
2278         *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
2279         atomic_inc(&xfs_Gqm->qm_totaldquots);
2280
2281         return B_TRUE;
2282 }
2283
2284
2285 /*
2286  * Start a transaction and write the incore superblock changes to
2287  * disk. flags parameter indicates which fields have changed.
2288  */
2289 int
2290 xfs_qm_write_sb_changes(
2291         xfs_mount_t     *mp,
2292         __int64_t       flags)
2293 {
2294         xfs_trans_t     *tp;
2295         int             error;
2296
2297 #ifdef QUOTADEBUG
2298         cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname);
2299 #endif
2300         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2301         if ((error = xfs_trans_reserve(tp, 0,
2302                                       mp->m_sb.sb_sectsize + 128, 0,
2303                                       0,
2304                                       XFS_DEFAULT_LOG_COUNT))) {
2305                 xfs_trans_cancel(tp, 0);
2306                 return error;
2307         }
2308
2309         xfs_mod_sb(tp, flags);
2310         error = xfs_trans_commit(tp, 0);
2311
2312         return error;
2313 }
2314
2315
2316 /* --------------- utility functions for vnodeops ---------------- */
2317
2318
2319 /*
2320  * Given an inode, a uid and gid (from cred_t) make sure that we have
2321  * allocated relevant dquot(s) on disk, and that we won't exceed inode
2322  * quotas by creating this file.
2323  * This also attaches dquot(s) to the given inode after locking it,
2324  * and returns the dquots corresponding to the uid and/or gid.
2325  *
2326  * in   : inode (unlocked)
2327  * out  : udquot, gdquot with references taken and unlocked
2328  */
2329 int
2330 xfs_qm_vop_dqalloc(
2331         struct xfs_inode        *ip,
2332         uid_t                   uid,
2333         gid_t                   gid,
2334         prid_t                  prid,
2335         uint                    flags,
2336         struct xfs_dquot        **O_udqpp,
2337         struct xfs_dquot        **O_gdqpp)
2338 {
2339         struct xfs_mount        *mp = ip->i_mount;
2340         struct xfs_dquot        *uq, *gq;
2341         int                     error;
2342         uint                    lockflags;
2343
2344         if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2345                 return 0;
2346
2347         lockflags = XFS_ILOCK_EXCL;
2348         xfs_ilock(ip, lockflags);
2349
2350         if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
2351                 gid = ip->i_d.di_gid;
2352
2353         /*
2354          * Attach the dquot(s) to this inode, doing a dquot allocation
2355          * if necessary. The dquot(s) will not be locked.
2356          */
2357         if (XFS_NOT_DQATTACHED(mp, ip)) {
2358                 error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
2359                 if (error) {
2360                         xfs_iunlock(ip, lockflags);
2361                         return error;
2362                 }
2363         }
2364
2365         uq = gq = NULL;
2366         if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
2367                 if (ip->i_d.di_uid != uid) {
2368                         /*
2369                          * What we need is the dquot that has this uid, and
2370                          * if we send the inode to dqget, the uid of the inode
2371                          * takes priority over what's sent in the uid argument.
2372                          * We must unlock inode here before calling dqget if
2373                          * we're not sending the inode, because otherwise
2374                          * we'll deadlock by doing trans_reserve while
2375                          * holding ilock.
2376                          */
2377                         xfs_iunlock(ip, lockflags);
2378                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
2379                                                  XFS_DQ_USER,
2380                                                  XFS_QMOPT_DQALLOC |
2381                                                  XFS_QMOPT_DOWARN,
2382                                                  &uq))) {
2383                                 ASSERT(error != ENOENT);
2384                                 return error;
2385                         }
2386                         /*
2387                          * Get the ilock in the right order.
2388                          */
2389                         xfs_dqunlock(uq);
2390                         lockflags = XFS_ILOCK_SHARED;
2391                         xfs_ilock(ip, lockflags);
2392                 } else {
2393                         /*
2394                          * Take an extra reference, because we'll return
2395                          * this to caller
2396                          */
2397                         ASSERT(ip->i_udquot);
2398                         uq = ip->i_udquot;
2399                         xfs_dqlock(uq);
2400                         XFS_DQHOLD(uq);
2401                         xfs_dqunlock(uq);
2402                 }
2403         }
2404         if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
2405                 if (ip->i_d.di_gid != gid) {
2406                         xfs_iunlock(ip, lockflags);
2407                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
2408                                                  XFS_DQ_GROUP,
2409                                                  XFS_QMOPT_DQALLOC |
2410                                                  XFS_QMOPT_DOWARN,
2411                                                  &gq))) {
2412                                 if (uq)
2413                                         xfs_qm_dqrele(uq);
2414                                 ASSERT(error != ENOENT);
2415                                 return error;
2416                         }
2417                         xfs_dqunlock(gq);
2418                         lockflags = XFS_ILOCK_SHARED;
2419                         xfs_ilock(ip, lockflags);
2420                 } else {
2421                         ASSERT(ip->i_gdquot);
2422                         gq = ip->i_gdquot;
2423                         xfs_dqlock(gq);
2424                         XFS_DQHOLD(gq);
2425                         xfs_dqunlock(gq);
2426                 }
2427         } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2428                 if (ip->i_d.di_projid != prid) {
2429                         xfs_iunlock(ip, lockflags);
2430                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
2431                                                  XFS_DQ_PROJ,
2432                                                  XFS_QMOPT_DQALLOC |
2433                                                  XFS_QMOPT_DOWARN,
2434                                                  &gq))) {
2435                                 if (uq)
2436                                         xfs_qm_dqrele(uq);
2437                                 ASSERT(error != ENOENT);
2438                                 return (error);
2439                         }
2440                         xfs_dqunlock(gq);
2441                         lockflags = XFS_ILOCK_SHARED;
2442                         xfs_ilock(ip, lockflags);
2443                 } else {
2444                         ASSERT(ip->i_gdquot);
2445                         gq = ip->i_gdquot;
2446                         xfs_dqlock(gq);
2447                         XFS_DQHOLD(gq);
2448                         xfs_dqunlock(gq);
2449                 }
2450         }
2451         if (uq)
2452                 xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
2453
2454         xfs_iunlock(ip, lockflags);
2455         if (O_udqpp)
2456                 *O_udqpp = uq;
2457         else if (uq)
2458                 xfs_qm_dqrele(uq);
2459         if (O_gdqpp)
2460                 *O_gdqpp = gq;
2461         else if (gq)
2462                 xfs_qm_dqrele(gq);
2463         return 0;
2464 }
2465
2466 /*
2467  * Actually transfer ownership, and do dquot modifications.
2468  * These were already reserved.
2469  */
2470 xfs_dquot_t *
2471 xfs_qm_vop_chown(
2472         xfs_trans_t     *tp,
2473         xfs_inode_t     *ip,
2474         xfs_dquot_t     **IO_olddq,
2475         xfs_dquot_t     *newdq)
2476 {
2477         xfs_dquot_t     *prevdq;
2478         uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
2479                                  XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2480
2481
2482         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2483         ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2484
2485         /* old dquot */
2486         prevdq = *IO_olddq;
2487         ASSERT(prevdq);
2488         ASSERT(prevdq != newdq);
2489
2490         xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
2491         xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
2492
2493         /* the sparkling new dquot */
2494         xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
2495         xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2496
2497         /*
2498          * Take an extra reference, because the inode
2499          * is going to keep this dquot pointer even
2500          * after the trans_commit.
2501          */
2502         xfs_dqlock(newdq);
2503         XFS_DQHOLD(newdq);
2504         xfs_dqunlock(newdq);
2505         *IO_olddq = newdq;
2506
2507         return prevdq;
2508 }
2509
2510 /*
2511  * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
2512  */
2513 int
2514 xfs_qm_vop_chown_reserve(
2515         xfs_trans_t     *tp,
2516         xfs_inode_t     *ip,
2517         xfs_dquot_t     *udqp,
2518         xfs_dquot_t     *gdqp,
2519         uint            flags)
2520 {
2521         xfs_mount_t     *mp = ip->i_mount;
2522         uint            delblks, blkflags, prjflags = 0;
2523         xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2524         int             error;
2525
2526
2527         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2528         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2529
2530         delblks = ip->i_delayed_blks;
2531         delblksudq = delblksgdq = unresudq = unresgdq = NULL;
2532         blkflags = XFS_IS_REALTIME_INODE(ip) ?
2533                         XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
2534
2535         if (XFS_IS_UQUOTA_ON(mp) && udqp &&
2536             ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
2537                 delblksudq = udqp;
2538                 /*
2539                  * If there are delayed allocation blocks, then we have to
2540                  * unreserve those from the old dquot, and add them to the
2541                  * new dquot.
2542                  */
2543                 if (delblks) {
2544                         ASSERT(ip->i_udquot);
2545                         unresudq = ip->i_udquot;
2546                 }
2547         }
2548         if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
2549                 if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
2550                      ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))
2551                         prjflags = XFS_QMOPT_ENOSPC;
2552
2553                 if (prjflags ||
2554                     (XFS_IS_GQUOTA_ON(ip->i_mount) &&
2555                      ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
2556                         delblksgdq = gdqp;
2557                         if (delblks) {
2558                                 ASSERT(ip->i_gdquot);
2559                                 unresgdq = ip->i_gdquot;
2560                         }
2561                 }
2562         }
2563
2564         if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2565                                 delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
2566                                 flags | blkflags | prjflags)))
2567                 return (error);
2568
2569         /*
2570          * Do the delayed blks reservations/unreservations now. Since, these
2571          * are done without the help of a transaction, if a reservation fails
2572          * its previous reservations won't be automatically undone by trans
2573          * code. So, we have to do it manually here.
2574          */
2575         if (delblks) {
2576                 /*
2577                  * Do the reservations first. Unreservation can't fail.
2578                  */
2579                 ASSERT(delblksudq || delblksgdq);
2580                 ASSERT(unresudq || unresgdq);
2581                 if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2582                                 delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
2583                                 flags | blkflags | prjflags)))
2584                         return (error);
2585                 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2586                                 unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
2587                                 blkflags);
2588         }
2589
2590         return (0);
2591 }
2592
2593 int
2594 xfs_qm_vop_rename_dqattach(
2595         struct xfs_inode        **i_tab)
2596 {
2597         struct xfs_mount        *mp = i_tab[0]->i_mount;
2598         int                     i;
2599
2600         if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2601                 return 0;
2602
2603         for (i = 0; (i < 4 && i_tab[i]); i++) {
2604                 struct xfs_inode        *ip = i_tab[i];
2605                 int                     error;
2606
2607                 /*
2608                  * Watch out for duplicate entries in the table.
2609                  */
2610                 if (i == 0 || ip != i_tab[i-1]) {
2611                         if (XFS_NOT_DQATTACHED(mp, ip)) {
2612                                 error = xfs_qm_dqattach(ip, 0);
2613                                 if (error)
2614                                         return error;
2615                         }
2616                 }
2617         }
2618         return 0;
2619 }
2620
2621 void
2622 xfs_qm_vop_create_dqattach(
2623         struct xfs_trans        *tp,
2624         struct xfs_inode        *ip,
2625         struct xfs_dquot        *udqp,
2626         struct xfs_dquot        *gdqp)
2627 {
2628         struct xfs_mount        *mp = tp->t_mountp;
2629
2630         if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2631                 return;
2632
2633         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2634         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2635
2636         if (udqp) {
2637                 xfs_dqlock(udqp);
2638                 XFS_DQHOLD(udqp);
2639                 xfs_dqunlock(udqp);
2640                 ASSERT(ip->i_udquot == NULL);
2641                 ip->i_udquot = udqp;
2642                 ASSERT(XFS_IS_UQUOTA_ON(mp));
2643                 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2644                 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2645         }
2646         if (gdqp) {
2647                 xfs_dqlock(gdqp);
2648                 XFS_DQHOLD(gdqp);
2649                 xfs_dqunlock(gdqp);
2650                 ASSERT(ip->i_gdquot == NULL);
2651                 ip->i_gdquot = gdqp;
2652                 ASSERT(XFS_IS_OQUOTA_ON(mp));
2653                 ASSERT((XFS_IS_GQUOTA_ON(mp) ?
2654                         ip->i_d.di_gid : ip->i_d.di_projid) ==
2655                                 be32_to_cpu(gdqp->q_core.d_id));
2656                 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2657         }
2658 }
2659
2660 /* ------------- list stuff -----------------*/
2661 STATIC void
2662 xfs_qm_freelist_init(xfs_frlist_t *ql)
2663 {
2664         ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
2665         mutex_init(&ql->qh_lock);
2666         ql->qh_version = 0;
2667         ql->qh_nelems = 0;
2668 }
2669
2670 STATIC void
2671 xfs_qm_freelist_destroy(xfs_frlist_t *ql)
2672 {
2673         xfs_dquot_t     *dqp, *nextdqp;
2674
2675         mutex_lock(&ql->qh_lock);
2676         for (dqp = ql->qh_next;
2677              dqp != (xfs_dquot_t *)ql; ) {
2678                 xfs_dqlock(dqp);
2679                 nextdqp = dqp->dq_flnext;
2680 #ifdef QUOTADEBUG
2681                 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
2682 #endif
2683                 XQM_FREELIST_REMOVE(dqp);
2684                 xfs_dqunlock(dqp);
2685                 xfs_qm_dqdestroy(dqp);
2686                 dqp = nextdqp;
2687         }
2688         mutex_unlock(&ql->qh_lock);
2689         mutex_destroy(&ql->qh_lock);
2690
2691         ASSERT(ql->qh_nelems == 0);
2692 }
2693
2694 STATIC void
2695 xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
2696 {
2697         dq->dq_flnext = ql->qh_next;
2698         dq->dq_flprev = (xfs_dquot_t *)ql;
2699         ql->qh_next = dq;
2700         dq->dq_flnext->dq_flprev = dq;
2701         xfs_Gqm->qm_dqfreelist.qh_nelems++;
2702         xfs_Gqm->qm_dqfreelist.qh_version++;
2703 }
2704
2705 void
2706 xfs_qm_freelist_unlink(xfs_dquot_t *dq)
2707 {
2708         xfs_dquot_t *next = dq->dq_flnext;
2709         xfs_dquot_t *prev = dq->dq_flprev;
2710
2711         next->dq_flprev = prev;
2712         prev->dq_flnext = next;
2713         dq->dq_flnext = dq->dq_flprev = dq;
2714         xfs_Gqm->qm_dqfreelist.qh_nelems--;
2715         xfs_Gqm->qm_dqfreelist.qh_version++;
2716 }
2717
2718 void
2719 xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
2720 {
2721         xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
2722 }