fs/nfsd/nfscache.c

   1 /*
   2  * Request reply cache. This is currently a global cache, but this may
   3  * change in the future and be a per-client cache.
   4  *
   5  * This code is heavily inspired by the 44BSD implementation, although
   6  * it does things a bit differently.
   7  *
   8  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
   9  */
  10
  11 #include <linux/slab.h>
  12 #include <linux/sunrpc/addr.h>
  13 #include <linux/highmem.h>
  14
  15 #include "nfsd.h"
  16 #include "cache.h"
  17
  18 #define NFSDDBG_FACILITY        NFSDDBG_REPCACHE
  19
  20 #define HASHSIZE                64
  21
  22 static struct hlist_head *      cache_hash;
  23 static struct list_head         lru_head;
  24 static struct kmem_cache        *drc_slab;
  25 static unsigned int             num_drc_entries;
  26 static unsigned int             max_drc_entries;
  27
  28 /*
  29  * Calculate the hash index from an XID.
  30  */
  31 static inline u32 request_hash(u32 xid)
  32 {
  33         u32 h = xid;
  34         h ^= (xid >> 24);
  35         return h & (HASHSIZE-1);
  36 }
  37
  38 static int      nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
  39 static void     cache_cleaner_func(struct work_struct *unused);
  40 static int      nfsd_reply_cache_shrink(struct shrinker *shrink,
  41                                         struct shrink_control *sc);
  42
  43 struct shrinker nfsd_reply_cache_shrinker = {
  44         .shrink = nfsd_reply_cache_shrink,
  45         .seeks  = 1,
  46 };
  47
  48 /*
  49  * locking for the reply cache:
  50  * A cache entry is "single use" if c_state == RC_INPROG
  51  * Otherwise, it when accessing _prev or _next, the lock must be held.
  52  */
  53 static DEFINE_SPINLOCK(cache_lock);
  54 static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func);
  55
  56 /*
  57  * Put a cap on the size of the DRC based on the amount of available
  58  * low memory in the machine.
  59  *
  60  *  64MB:    8192
  61  * 128MB:   11585
  62  * 256MB:   16384
  63  * 512MB:   23170
  64  *   1GB:   32768
  65  *   2GB:   46340
  66  *   4GB:   65536
  67  *   8GB:   92681
  68  *  16GB:  131072
  69  *
  70  * ...with a hard cap of 256k entries. In the worst case, each entry will be
  71  * ~1k, so the above numbers should give a rough max of the amount of memory
  72  * used in k.
  73  */
  74 static unsigned int
  75 nfsd_cache_size_limit(void)
  76 {
  77         unsigned int limit;
  78         unsigned long low_pages = totalram_pages - totalhigh_pages;
  79
  80         limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10);
  81         return min_t(unsigned int, limit, 256*1024);
  82 }
  83
  84 static struct svc_cacherep *
  85 nfsd_reply_cache_alloc(void)
  86 {
  87         struct svc_cacherep     *rp;
  88
  89         rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
  90         if (rp) {
  91                 rp->c_state = RC_UNUSED;
  92                 rp->c_type = RC_NOCACHE;
  93                 INIT_LIST_HEAD(&rp->c_lru);
  94                 INIT_HLIST_NODE(&rp->c_hash);
  95         }
  96         return rp;
  97 }
  98
  99 static void
 100 nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
 101 {
 102         if (rp->c_type == RC_REPLBUFF)
 103                 kfree(rp->c_replvec.iov_base);
 104         hlist_del(&rp->c_hash);
 105         list_del(&rp->c_lru);
 106         --num_drc_entries;
 107         kmem_cache_free(drc_slab, rp);
 108 }
 109
 110 static void
 111 nfsd_reply_cache_free(struct svc_cacherep *rp)
 112 {
 113         spin_lock(&cache_lock);
 114         nfsd_reply_cache_free_locked(rp);
 115         spin_unlock(&cache_lock);
 116 }
 117
 118 int nfsd_reply_cache_init(void)
 119 {
 120         register_shrinker(&nfsd_reply_cache_shrinker);
 121         drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
 122                                         0, 0, NULL);
 123         if (!drc_slab)
 124                 goto out_nomem;
 125
 126         cache_hash = kcalloc(HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
 127         if (!cache_hash)
 128                 goto out_nomem;
 129
 130         INIT_LIST_HEAD(&lru_head);
 131         max_drc_entries = nfsd_cache_size_limit();
 132         num_drc_entries = 0;
 133         return 0;
 134 out_nomem:
 135         printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
 136         nfsd_reply_cache_shutdown();
 137         return -ENOMEM;
 138 }
 139
 140 void nfsd_reply_cache_shutdown(void)
 141 {
 142         struct svc_cacherep     *rp;
 143
 144         unregister_shrinker(&nfsd_reply_cache_shrinker);
 145         cancel_delayed_work_sync(&cache_cleaner);
 146
 147         while (!list_empty(&lru_head)) {
 148                 rp = list_entry(lru_head.next, struct svc_cacherep, c_lru);
 149                 nfsd_reply_cache_free_locked(rp);
 150         }
 151
 152         kfree (cache_hash);
 153         cache_hash = NULL;
 154
 155         if (drc_slab) {
 156                 kmem_cache_destroy(drc_slab);
 157                 drc_slab = NULL;
 158         }
 159 }
 160
 161 /*
 162  * Move cache entry to end of LRU list, and queue the cleaner to run if it's
 163  * not already scheduled.
 164  */
 165 static void
 166 lru_put_end(struct svc_cacherep *rp)
 167 {
 168         rp->c_timestamp = jiffies;
 169         list_move_tail(&rp->c_lru, &lru_head);
 170         schedule_delayed_work(&cache_cleaner, RC_EXPIRE);
 171 }
 172
 173 /*
 174  * Move a cache entry from one hash list to another
 175  */
 176 static void
 177 hash_refile(struct svc_cacherep *rp)
 178 {
 179         hlist_del_init(&rp->c_hash);
 180         hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid));
 181 }
 182
 183 static inline bool
 184 nfsd_cache_entry_expired(struct svc_cacherep *rp)
 185 {
 186         return rp->c_state != RC_INPROG &&
 187                time_after(jiffies, rp->c_timestamp + RC_EXPIRE);
 188 }
 189
 190 /*
 191  * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
 192  * Also prune the oldest ones when the total exceeds the max number of entries.
 193  */
 194 static void
 195 prune_cache_entries(void)
 196 {
 197         struct svc_cacherep *rp, *tmp;
 198
 199         list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) {
 200                 if (!nfsd_cache_entry_expired(rp) &&
 201                     num_drc_entries <= max_drc_entries)
 202                         break;
 203                 nfsd_reply_cache_free_locked(rp);
 204         }
 205
 206         /*
 207          * Conditionally rearm the job. If we cleaned out the list, then
 208          * cancel any pending run (since there won't be any work to do).
 209          * Otherwise, we rearm the job or modify the existing one to run in
 210          * RC_EXPIRE since we just ran the pruner.
 211          */
 212         if (list_empty(&lru_head))
 213                 cancel_delayed_work(&cache_cleaner);
 214         else
 215                 mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE);
 216 }
 217
 218 static void
 219 cache_cleaner_func(struct work_struct *unused)
 220 {
 221         spin_lock(&cache_lock);
 222         prune_cache_entries();
 223         spin_unlock(&cache_lock);
 224 }
 225
 226 static int
 227 nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc)
 228 {
 229         unsigned int num;
 230
 231         spin_lock(&cache_lock);
 232         if (sc->nr_to_scan)
 233                 prune_cache_entries();
 234         num = num_drc_entries;
 235         spin_unlock(&cache_lock);
 236
 237         return num;
 238 }
 239
 240 /*
 241  * Search the request hash for an entry that matches the given rqstp.
 242  * Must be called with cache_lock held. Returns the found entry or
 243  * NULL on failure.
 244  */
 245 static struct svc_cacherep *
 246 nfsd_cache_search(struct svc_rqst *rqstp)
 247 {
 248         struct svc_cacherep     *rp;
 249         struct hlist_node       *hn;
 250         struct hlist_head       *rh;
 251         __be32                  xid = rqstp->rq_xid;
 252         u32                     proto =  rqstp->rq_prot,
 253                                 vers = rqstp->rq_vers,
 254                                 proc = rqstp->rq_proc;
 255
 256         rh = &cache_hash[request_hash(xid)];
 257         hlist_for_each_entry(rp, hn, rh, c_hash) {
 258                 if (xid == rp->c_xid && proc == rp->c_proc &&
 259                     proto == rp->c_prot && vers == rp->c_vers &&
 260                     rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) &&
 261                     rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr))
 262                         return rp;
 263         }
 264         return NULL;
 265 }
 266
 267 /*
 268  * Try to find an entry matching the current call in the cache. When none
 269  * is found, we grab the oldest unlocked entry off the LRU list.
 270  * Note that no operation within the loop may sleep.
 271  */
 272 int
 273 nfsd_cache_lookup(struct svc_rqst *rqstp)
 274 {
 275         struct svc_cacherep     *rp, *found;
 276         __be32                  xid = rqstp->rq_xid;
 277         u32                     proto =  rqstp->rq_prot,
 278                                 vers = rqstp->rq_vers,
 279                                 proc = rqstp->rq_proc;
 280         unsigned long           age;
 281         int type = rqstp->rq_cachetype;
 282         int rtn;
 283
 284         rqstp->rq_cacherep = NULL;
 285         if (type == RC_NOCACHE) {
 286                 nfsdstats.rcnocache++;
 287                 return RC_DOIT;
 288         }
 289
 290         spin_lock(&cache_lock);
 291         rtn = RC_DOIT;
 292
 293         rp = nfsd_cache_search(rqstp);
 294         if (rp)
 295                 goto found_entry;
 296
 297         /* Try to use the first entry on the LRU */
 298         if (!list_empty(&lru_head)) {
 299                 rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
 300                 if (nfsd_cache_entry_expired(rp) ||
 301                     num_drc_entries >= max_drc_entries) {
 302                         lru_put_end(rp);
 303                         prune_cache_entries();
 304                         goto setup_entry;
 305                 }
 306         }
 307
 308         spin_unlock(&cache_lock);
 309         rp = nfsd_reply_cache_alloc();
 310         if (!rp) {
 311                 dprintk("nfsd: unable to allocate DRC entry!\n");
 312                 return RC_DOIT;
 313         }
 314         spin_lock(&cache_lock);
 315         ++num_drc_entries;
 316
 317         /*
 318          * Must search again just in case someone inserted one
 319          * after we dropped the lock above.
 320          */
 321         found = nfsd_cache_search(rqstp);
 322         if (found) {
 323                 nfsd_reply_cache_free_locked(rp);
 324                 rp = found;
 325                 goto found_entry;
 326         }
 327
 328         /*
 329          * We're keeping the one we just allocated. Are we now over the
 330          * limit? Prune one off the tip of the LRU in trade for the one we
 331          * just allocated if so.
 332          */
 333         if (num_drc_entries >= max_drc_entries)
 334                 nfsd_reply_cache_free_locked(list_first_entry(&lru_head,
 335                                                 struct svc_cacherep, c_lru));
 336
 337 setup_entry:
 338         nfsdstats.rcmisses++;
 339         rqstp->rq_cacherep = rp;
 340         rp->c_state = RC_INPROG;
 341         rp->c_xid = xid;
 342         rp->c_proc = proc;
 343         rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp));
 344         rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp)));
 345         rp->c_prot = proto;
 346         rp->c_vers = vers;
 347
 348         hash_refile(rp);
 349         lru_put_end(rp);
 350
 351         /* release any buffer */
 352         if (rp->c_type == RC_REPLBUFF) {
 353                 kfree(rp->c_replvec.iov_base);
 354                 rp->c_replvec.iov_base = NULL;
 355         }
 356         rp->c_type = RC_NOCACHE;
 357  out:
 358         spin_unlock(&cache_lock);
 359         return rtn;
 360
 361 found_entry:
 362         nfsdstats.rchits++;
 363         /* We found a matching entry which is either in progress or done. */
 364         age = jiffies - rp->c_timestamp;
 365         lru_put_end(rp);
 366
 367         rtn = RC_DROPIT;
 368         /* Request being processed or excessive rexmits */
 369         if (rp->c_state == RC_INPROG || age < RC_DELAY)
 370                 goto out;
 371
 372         /* From the hall of fame of impractical attacks:
 373          * Is this a user who tries to snoop on the cache? */
 374         rtn = RC_DOIT;
 375         if (!rqstp->rq_secure && rp->c_secure)
 376                 goto out;
 377
 378         /* Compose RPC reply header */
 379         switch (rp->c_type) {
 380         case RC_NOCACHE:
 381                 break;
 382         case RC_REPLSTAT:
 383                 svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat);
 384                 rtn = RC_REPLY;
 385                 break;
 386         case RC_REPLBUFF:
 387                 if (!nfsd_cache_append(rqstp, &rp->c_replvec))
 388                         goto out;       /* should not happen */
 389                 rtn = RC_REPLY;
 390                 break;
 391         default:
 392                 printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
 393                 nfsd_reply_cache_free_locked(rp);
 394         }
 395
 396         goto out;
 397 }
 398
 399 /*
 400  * Update a cache entry. This is called from nfsd_dispatch when
 401  * the procedure has been executed and the complete reply is in
 402  * rqstp->rq_res.
 403  *
 404  * We're copying around data here rather than swapping buffers because
 405  * the toplevel loop requires max-sized buffers, which would be a waste
 406  * of memory for a cache with a max reply size of 100 bytes (diropokres).
 407  *
 408  * If we should start to use different types of cache entries tailored
 409  * specifically for attrstat and fh's, we may save even more space.
 410  *
 411  * Also note that a cachetype of RC_NOCACHE can legally be passed when
 412  * nfsd failed to encode a reply that otherwise would have been cached.
 413  * In this case, nfsd_cache_update is called with statp == NULL.
 414  */
 415 void
 416 nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 417 {
 418         struct svc_cacherep *rp = rqstp->rq_cacherep;
 419         struct kvec     *resv = &rqstp->rq_res.head[0], *cachv;
 420         int             len;
 421
 422         if (!rp)
 423                 return;
 424
 425         len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
 426         len >>= 2;
 427
 428         /* Don't cache excessive amounts of data and XDR failures */
 429         if (!statp || len > (256 >> 2)) {
 430                 nfsd_reply_cache_free(rp);
 431                 return;
 432         }
 433
 434         switch (cachetype) {
 435         case RC_REPLSTAT:
 436                 if (len != 1)
 437                         printk("nfsd: RC_REPLSTAT/reply len %d!\n",len);
 438                 rp->c_replstat = *statp;
 439                 break;
 440         case RC_REPLBUFF:
 441                 cachv = &rp->c_replvec;
 442                 cachv->iov_base = kmalloc(len << 2, GFP_KERNEL);
 443                 if (!cachv->iov_base) {
 444                         nfsd_reply_cache_free(rp);
 445                         return;
 446                 }
 447                 cachv->iov_len = len << 2;
 448                 memcpy(cachv->iov_base, statp, len << 2);
 449                 break;
 450         case RC_NOCACHE:
 451                 nfsd_reply_cache_free(rp);
 452                 return;
 453         }
 454         spin_lock(&cache_lock);
 455         lru_put_end(rp);
 456         rp->c_secure = rqstp->rq_secure;
 457         rp->c_type = cachetype;
 458         rp->c_state = RC_DONE;
 459         spin_unlock(&cache_lock);
 460         return;
 461 }
 462
 463 /*
 464  * Copy cached reply to current reply buffer. Should always fit.
 465  * FIXME as reply is in a page, we should just attach the page, and
 466  * keep a refcount....
 467  */
 468 static int
 469 nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
 470 {
 471         struct kvec     *vec = &rqstp->rq_res.head[0];
 472
 473         if (vec->iov_len + data->iov_len > PAGE_SIZE) {
 474                 printk(KERN_WARNING "nfsd: cached reply too large (%Zd).\n",
 475                                 data->iov_len);
 476                 return 0;
 477         }
 478         memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
 479         vec->iov_len += data->iov_len;
 480         return 1;
 481 }