Commit | Line | Data |
---|---|---|
7fc1f831 AP |
1 | /* |
2 | * Copyright 2012 Xyratex Technology Limited | |
3 | * | |
1dc563a6 AD |
4 | * Copyright (c) 2013, 2015, Intel Corporation. |
5 | * | |
7fc1f831 AP |
6 | * Author: Andrew Perepechko <Andrew_Perepechko@xyratex.com> |
7 | * | |
8 | */ | |
9 | ||
10 | #define DEBUG_SUBSYSTEM S_LLITE | |
11 | ||
12 | #include <linux/fs.h> | |
13 | #include <linux/sched.h> | |
14 | #include <linux/mm.h> | |
67a235f5 GKH |
15 | #include "../include/obd_support.h" |
16 | #include "../include/lustre_lite.h" | |
17 | #include "../include/lustre_dlm.h" | |
18 | #include "../include/lustre_ver.h" | |
7fc1f831 AP |
19 | #include "llite_internal.h" |
20 | ||
21 | /* If we ever have hundreds of extended attributes, we might want to consider | |
22 | * using a hash or a tree structure instead of list for faster lookups. | |
23 | */ | |
24 | struct ll_xattr_entry { | |
25 | struct list_head xe_list; /* protected with | |
26 | * lli_xattrs_list_rwsem */ | |
27 | char *xe_name; /* xattr name, \0-terminated */ | |
28 | char *xe_value; /* xattr value */ | |
29 | unsigned xe_namelen; /* strlen(xe_name) + 1 */ | |
30 | unsigned xe_vallen; /* xattr value length */ | |
31 | }; | |
32 | ||
33 | static struct kmem_cache *xattr_kmem; | |
34 | static struct lu_kmem_descr xattr_caches[] = { | |
35 | { | |
36 | .ckd_cache = &xattr_kmem, | |
37 | .ckd_name = "xattr_kmem", | |
38 | .ckd_size = sizeof(struct ll_xattr_entry) | |
39 | }, | |
40 | { | |
41 | .ckd_cache = NULL | |
42 | } | |
43 | }; | |
44 | ||
45 | int ll_xattr_init(void) | |
46 | { | |
47 | return lu_kmem_init(xattr_caches); | |
48 | } | |
49 | ||
50 | void ll_xattr_fini(void) | |
51 | { | |
52 | lu_kmem_fini(xattr_caches); | |
53 | } | |
54 | ||
55 | /** | |
56 | * Initializes xattr cache for an inode. | |
57 | * | |
58 | * This initializes the xattr list and marks cache presence. | |
59 | */ | |
60 | static void ll_xattr_cache_init(struct ll_inode_info *lli) | |
61 | { | |
62 | ||
7fc1f831 AP |
63 | LASSERT(lli != NULL); |
64 | ||
65 | INIT_LIST_HEAD(&lli->lli_xattrs); | |
66 | lli->lli_flags |= LLIF_XATTR_CACHE; | |
67 | } | |
68 | ||
69 | /** | |
70 | * This looks for a specific extended attribute. | |
71 | * | |
72 | * Find in @cache and return @xattr_name attribute in @xattr, | |
73 | * for the NULL @xattr_name return the first cached @xattr. | |
74 | * | |
75 | * \retval 0 success | |
76 | * \retval -ENODATA if not found | |
77 | */ | |
78 | static int ll_xattr_cache_find(struct list_head *cache, | |
79 | const char *xattr_name, | |
80 | struct ll_xattr_entry **xattr) | |
81 | { | |
82 | struct ll_xattr_entry *entry; | |
83 | ||
7fc1f831 AP |
84 | list_for_each_entry(entry, cache, xe_list) { |
85 | /* xattr_name == NULL means look for any entry */ | |
86 | if (xattr_name == NULL || | |
87 | strcmp(xattr_name, entry->xe_name) == 0) { | |
88 | *xattr = entry; | |
89 | CDEBUG(D_CACHE, "find: [%s]=%.*s\n", | |
90 | entry->xe_name, entry->xe_vallen, | |
91 | entry->xe_value); | |
92 | return 0; | |
93 | } | |
94 | } | |
95 | ||
96 | return -ENODATA; | |
97 | } | |
98 | ||
99 | /** | |
e93a3082 | 100 | * This adds an xattr. |
7fc1f831 AP |
101 | * |
102 | * Add @xattr_name attr with @xattr_val value and @xattr_val_len length, | |
7fc1f831 AP |
103 | * |
104 | * \retval 0 success | |
105 | * \retval -ENOMEM if no memory could be allocated for the cached attr | |
e93a3082 | 106 | * \retval -EPROTO if duplicate xattr is being added |
7fc1f831 AP |
107 | */ |
108 | static int ll_xattr_cache_add(struct list_head *cache, | |
109 | const char *xattr_name, | |
110 | const char *xattr_val, | |
111 | unsigned xattr_val_len) | |
112 | { | |
113 | struct ll_xattr_entry *xattr; | |
114 | ||
7fc1f831 | 115 | if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) { |
e93a3082 AP |
116 | CDEBUG(D_CACHE, "duplicate xattr: [%s]\n", xattr_name); |
117 | return -EPROTO; | |
7fc1f831 AP |
118 | } |
119 | ||
ccaabce1 | 120 | xattr = kmem_cache_alloc(xattr_kmem, GFP_NOFS | __GFP_ZERO); |
7fc1f831 AP |
121 | if (xattr == NULL) { |
122 | CDEBUG(D_CACHE, "failed to allocate xattr\n"); | |
123 | return -ENOMEM; | |
124 | } | |
125 | ||
b3dd8957 | 126 | xattr->xe_name = kstrdup(xattr_name, GFP_NOFS); |
7fc1f831 AP |
127 | if (!xattr->xe_name) { |
128 | CDEBUG(D_CACHE, "failed to alloc xattr name %u\n", | |
129 | xattr->xe_namelen); | |
130 | goto err_name; | |
131 | } | |
9cda6856 | 132 | xattr->xe_value = kmemdup(xattr_val, xattr_val_len, GFP_NOFS); |
695a0666 | 133 | if (!xattr->xe_value) |
7fc1f831 | 134 | goto err_value; |
7fc1f831 | 135 | |
7fc1f831 AP |
136 | xattr->xe_vallen = xattr_val_len; |
137 | list_add(&xattr->xe_list, cache); | |
138 | ||
139 | CDEBUG(D_CACHE, "set: [%s]=%.*s\n", xattr_name, | |
140 | xattr_val_len, xattr_val); | |
141 | ||
142 | return 0; | |
143 | err_value: | |
97903a26 | 144 | kfree(xattr->xe_name); |
7fc1f831 | 145 | err_name: |
50d30362 | 146 | kmem_cache_free(xattr_kmem, xattr); |
7fc1f831 AP |
147 | |
148 | return -ENOMEM; | |
149 | } | |
150 | ||
151 | /** | |
152 | * This removes an extended attribute from cache. | |
153 | * | |
154 | * Remove @xattr_name attribute from @cache. | |
155 | * | |
156 | * \retval 0 success | |
157 | * \retval -ENODATA if @xattr_name is not cached | |
158 | */ | |
159 | static int ll_xattr_cache_del(struct list_head *cache, | |
160 | const char *xattr_name) | |
161 | { | |
162 | struct ll_xattr_entry *xattr; | |
163 | ||
7fc1f831 AP |
164 | CDEBUG(D_CACHE, "del xattr: %s\n", xattr_name); |
165 | ||
166 | if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) { | |
167 | list_del(&xattr->xe_list); | |
97903a26 JL |
168 | kfree(xattr->xe_name); |
169 | kfree(xattr->xe_value); | |
50d30362 | 170 | kmem_cache_free(xattr_kmem, xattr); |
7fc1f831 AP |
171 | |
172 | return 0; | |
173 | } | |
174 | ||
175 | return -ENODATA; | |
176 | } | |
177 | ||
178 | /** | |
179 | * This iterates cached extended attributes. | |
180 | * | |
181 | * Walk over cached attributes in @cache and | |
182 | * fill in @xld_buffer or only calculate buffer | |
183 | * size if @xld_buffer is NULL. | |
184 | * | |
185 | * \retval >= 0 buffer list size | |
186 | * \retval -ENODATA if the list cannot fit @xld_size buffer | |
187 | */ | |
188 | static int ll_xattr_cache_list(struct list_head *cache, | |
189 | char *xld_buffer, | |
190 | int xld_size) | |
191 | { | |
192 | struct ll_xattr_entry *xattr, *tmp; | |
193 | int xld_tail = 0; | |
194 | ||
7fc1f831 AP |
195 | list_for_each_entry_safe(xattr, tmp, cache, xe_list) { |
196 | CDEBUG(D_CACHE, "list: buffer=%p[%d] name=%s\n", | |
197 | xld_buffer, xld_tail, xattr->xe_name); | |
198 | ||
199 | if (xld_buffer) { | |
200 | xld_size -= xattr->xe_namelen; | |
201 | if (xld_size < 0) | |
202 | break; | |
203 | memcpy(&xld_buffer[xld_tail], | |
204 | xattr->xe_name, xattr->xe_namelen); | |
205 | } | |
206 | xld_tail += xattr->xe_namelen; | |
207 | } | |
208 | ||
209 | if (xld_size < 0) | |
210 | return -ERANGE; | |
211 | ||
212 | return xld_tail; | |
213 | } | |
214 | ||
215 | /** | |
216 | * Check if the xattr cache is initialized (filled). | |
217 | * | |
218 | * \retval 0 @cache is not initialized | |
219 | * \retval 1 @cache is initialized | |
220 | */ | |
2d95f10e | 221 | static int ll_xattr_cache_valid(struct ll_inode_info *lli) |
7fc1f831 AP |
222 | { |
223 | return !!(lli->lli_flags & LLIF_XATTR_CACHE); | |
224 | } | |
225 | ||
226 | /** | |
227 | * This finalizes the xattr cache. | |
228 | * | |
229 | * Free all xattr memory. @lli is the inode info pointer. | |
230 | * | |
d0a0acc3 | 231 | * \retval 0 no error occurred |
7fc1f831 AP |
232 | */ |
233 | static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli) | |
234 | { | |
235 | ||
7fc1f831 AP |
236 | if (!ll_xattr_cache_valid(lli)) |
237 | return 0; | |
238 | ||
239 | while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0) | |
240 | ; /* empty loop */ | |
241 | lli->lli_flags &= ~LLIF_XATTR_CACHE; | |
242 | ||
243 | return 0; | |
244 | } | |
245 | ||
246 | int ll_xattr_cache_destroy(struct inode *inode) | |
247 | { | |
248 | struct ll_inode_info *lli = ll_i2info(inode); | |
249 | int rc; | |
250 | ||
7fc1f831 AP |
251 | down_write(&lli->lli_xattrs_list_rwsem); |
252 | rc = ll_xattr_cache_destroy_locked(lli); | |
253 | up_write(&lli->lli_xattrs_list_rwsem); | |
254 | ||
255 | return rc; | |
256 | } | |
257 | ||
258 | /** | |
e93a3082 | 259 | * Match or enqueue a PR lock. |
7fc1f831 AP |
260 | * |
261 | * Find or request an LDLM lock with xattr data. | |
262 | * Since LDLM does not provide API for atomic match_or_enqueue, | |
263 | * the function handles it with a separate enq lock. | |
264 | * If successful, the function exits with the list lock held. | |
265 | * | |
d0a0acc3 | 266 | * \retval 0 no error occurred |
7fc1f831 AP |
267 | * \retval -ENOMEM not enough memory |
268 | */ | |
269 | static int ll_xattr_find_get_lock(struct inode *inode, | |
270 | struct lookup_intent *oit, | |
271 | struct ptlrpc_request **req) | |
272 | { | |
273 | ldlm_mode_t mode; | |
274 | struct lustre_handle lockh = { 0 }; | |
275 | struct md_op_data *op_data; | |
276 | struct ll_inode_info *lli = ll_i2info(inode); | |
277 | struct ldlm_enqueue_info einfo = { .ei_type = LDLM_IBITS, | |
278 | .ei_mode = it_to_lock_mode(oit), | |
279 | .ei_cb_bl = ll_md_blocking_ast, | |
280 | .ei_cb_cp = ldlm_completion_ast }; | |
281 | struct ll_sb_info *sbi = ll_i2sbi(inode); | |
282 | struct obd_export *exp = sbi->ll_md_exp; | |
283 | int rc; | |
284 | ||
7fc1f831 | 285 | mutex_lock(&lli->lli_xattrs_enq_lock); |
d6abc59e LS |
286 | /* inode may have been shrunk and recreated, so data is gone, match lock |
287 | * only when data exists. */ | |
288 | if (ll_xattr_cache_valid(lli)) { | |
289 | /* Try matching first. */ | |
290 | mode = ll_take_md_lock(inode, MDS_INODELOCK_XATTR, &lockh, 0, | |
291 | LCK_PR); | |
292 | if (mode != 0) { | |
293 | /* fake oit in mdc_revalidate_lock() manner */ | |
294 | oit->d.lustre.it_lock_handle = lockh.cookie; | |
295 | oit->d.lustre.it_lock_mode = mode; | |
296 | goto out; | |
297 | } | |
7fc1f831 AP |
298 | } |
299 | ||
300 | /* Enqueue if the lock isn't cached locally. */ | |
301 | op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, | |
302 | LUSTRE_OPC_ANY, NULL); | |
303 | if (IS_ERR(op_data)) { | |
304 | mutex_unlock(&lli->lli_xattrs_enq_lock); | |
305 | return PTR_ERR(op_data); | |
306 | } | |
307 | ||
e93a3082 | 308 | op_data->op_valid = OBD_MD_FLXATTR | OBD_MD_FLXATTRLS; |
7fc1f831 AP |
309 | |
310 | rc = md_enqueue(exp, &einfo, oit, op_data, &lockh, NULL, 0, NULL, 0); | |
311 | ll_finish_md_op_data(op_data); | |
312 | ||
313 | if (rc < 0) { | |
314 | CDEBUG(D_CACHE, | |
315 | "md_intent_lock failed with %d for fid "DFID"\n", | |
316 | rc, PFID(ll_inode2fid(inode))); | |
317 | mutex_unlock(&lli->lli_xattrs_enq_lock); | |
318 | return rc; | |
319 | } | |
320 | ||
321 | *req = (struct ptlrpc_request *)oit->d.lustre.it_data; | |
322 | out: | |
323 | down_write(&lli->lli_xattrs_list_rwsem); | |
324 | mutex_unlock(&lli->lli_xattrs_enq_lock); | |
325 | ||
326 | return 0; | |
327 | } | |
328 | ||
329 | /** | |
330 | * Refill the xattr cache. | |
331 | * | |
332 | * Fetch and cache the whole of xattrs for @inode, acquiring | |
333 | * a read or a write xattr lock depending on operation in @oit. | |
334 | * Intent is dropped on exit unless the operation is setxattr. | |
335 | * | |
d0a0acc3 | 336 | * \retval 0 no error occurred |
7fc1f831 AP |
337 | * \retval -EPROTO network protocol error |
338 | * \retval -ENOMEM not enough memory for the cache | |
339 | */ | |
340 | static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit) | |
341 | { | |
342 | struct ll_sb_info *sbi = ll_i2sbi(inode); | |
343 | struct ptlrpc_request *req = NULL; | |
344 | const char *xdata, *xval, *xtail, *xvtail; | |
345 | struct ll_inode_info *lli = ll_i2info(inode); | |
346 | struct mdt_body *body; | |
347 | __u32 *xsizes; | |
f82ced5d | 348 | int rc, i; |
7fc1f831 | 349 | |
7fc1f831 AP |
350 | rc = ll_xattr_find_get_lock(inode, oit, &req); |
351 | if (rc) | |
34e1f2bb | 352 | goto out_no_unlock; |
7fc1f831 AP |
353 | |
354 | /* Do we have the data at this point? */ | |
355 | if (ll_xattr_cache_valid(lli)) { | |
356 | ll_stats_ops_tally(sbi, LPROC_LL_GETXATTR_HITS, 1); | |
34e1f2bb JL |
357 | rc = 0; |
358 | goto out_maybe_drop; | |
7fc1f831 AP |
359 | } |
360 | ||
361 | /* Matched but no cache? Cancelled on error by a parallel refill. */ | |
362 | if (unlikely(req == NULL)) { | |
363 | CDEBUG(D_CACHE, "cancelled by a parallel getxattr\n"); | |
34e1f2bb JL |
364 | rc = -EIO; |
365 | goto out_maybe_drop; | |
7fc1f831 AP |
366 | } |
367 | ||
368 | if (oit->d.lustre.it_status < 0) { | |
369 | CDEBUG(D_CACHE, "getxattr intent returned %d for fid "DFID"\n", | |
370 | oit->d.lustre.it_status, PFID(ll_inode2fid(inode))); | |
e93a3082 AP |
371 | rc = oit->d.lustre.it_status; |
372 | /* xattr data is so large that we don't want to cache it */ | |
373 | if (rc == -ERANGE) | |
374 | rc = -EAGAIN; | |
34e1f2bb | 375 | goto out_destroy; |
7fc1f831 AP |
376 | } |
377 | ||
378 | body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); | |
379 | if (body == NULL) { | |
380 | CERROR("no MDT BODY in the refill xattr reply\n"); | |
34e1f2bb JL |
381 | rc = -EPROTO; |
382 | goto out_destroy; | |
7fc1f831 AP |
383 | } |
384 | /* do not need swab xattr data */ | |
385 | xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, | |
386 | body->eadatasize); | |
387 | xval = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS, | |
388 | body->aclsize); | |
389 | xsizes = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS_LENS, | |
390 | body->max_mdsize * sizeof(__u32)); | |
391 | if (xdata == NULL || xval == NULL || xsizes == NULL) { | |
392 | CERROR("wrong setxattr reply\n"); | |
34e1f2bb JL |
393 | rc = -EPROTO; |
394 | goto out_destroy; | |
7fc1f831 AP |
395 | } |
396 | ||
397 | xtail = xdata + body->eadatasize; | |
398 | xvtail = xval + body->aclsize; | |
399 | ||
400 | CDEBUG(D_CACHE, "caching: xdata=%p xtail=%p\n", xdata, xtail); | |
401 | ||
402 | ll_xattr_cache_init(lli); | |
403 | ||
404 | for (i = 0; i < body->max_mdsize; i++) { | |
405 | CDEBUG(D_CACHE, "caching [%s]=%.*s\n", xdata, *xsizes, xval); | |
406 | /* Perform consistency checks: attr names and vals in pill */ | |
407 | if (memchr(xdata, 0, xtail - xdata) == NULL) { | |
408 | CERROR("xattr protocol violation (names are broken)\n"); | |
409 | rc = -EPROTO; | |
410 | } else if (xval + *xsizes > xvtail) { | |
411 | CERROR("xattr protocol violation (vals are broken)\n"); | |
412 | rc = -EPROTO; | |
413 | } else if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_XATTR_ENOMEM)) { | |
414 | rc = -ENOMEM; | |
e93a3082 AP |
415 | } else if (!strcmp(xdata, XATTR_NAME_ACL_ACCESS)) { |
416 | /* Filter out ACL ACCESS since it's cached separately */ | |
417 | CDEBUG(D_CACHE, "not caching %s\n", | |
418 | XATTR_NAME_ACL_ACCESS); | |
419 | rc = 0; | |
7fc1f831 AP |
420 | } else { |
421 | rc = ll_xattr_cache_add(&lli->lli_xattrs, xdata, xval, | |
422 | *xsizes); | |
423 | } | |
424 | if (rc < 0) { | |
425 | ll_xattr_cache_destroy_locked(lli); | |
34e1f2bb | 426 | goto out_destroy; |
7fc1f831 AP |
427 | } |
428 | xdata += strlen(xdata) + 1; | |
429 | xval += *xsizes; | |
430 | xsizes++; | |
431 | } | |
432 | ||
433 | if (xdata != xtail || xval != xvtail) | |
434 | CERROR("a hole in xattr data\n"); | |
435 | ||
436 | ll_set_lock_data(sbi->ll_md_exp, inode, oit, NULL); | |
437 | ||
34e1f2bb | 438 | goto out_maybe_drop; |
7fc1f831 | 439 | out_maybe_drop: |
e93a3082 | 440 | |
7fc1f831 AP |
441 | ll_intent_drop_lock(oit); |
442 | ||
443 | if (rc != 0) | |
444 | up_write(&lli->lli_xattrs_list_rwsem); | |
445 | out_no_unlock: | |
446 | ptlrpc_req_finished(req); | |
447 | ||
448 | return rc; | |
449 | ||
450 | out_destroy: | |
451 | up_write(&lli->lli_xattrs_list_rwsem); | |
452 | ||
453 | ldlm_lock_decref_and_cancel((struct lustre_handle *) | |
454 | &oit->d.lustre.it_lock_handle, | |
455 | oit->d.lustre.it_lock_mode); | |
456 | ||
457 | goto out_no_unlock; | |
458 | } | |
459 | ||
460 | /** | |
461 | * Get an xattr value or list xattrs using the write-through cache. | |
462 | * | |
463 | * Get the xattr value (@valid has OBD_MD_FLXATTR set) of @name or | |
464 | * list xattr names (@valid has OBD_MD_FLXATTRLS set) for @inode. | |
465 | * The resulting value/list is stored in @buffer if the former | |
466 | * is not larger than @size. | |
467 | * | |
d0a0acc3 | 468 | * \retval 0 no error occurred |
7fc1f831 AP |
469 | * \retval -EPROTO network protocol error |
470 | * \retval -ENOMEM not enough memory for the cache | |
471 | * \retval -ERANGE the buffer is not large enough | |
472 | * \retval -ENODATA no such attr or the list is empty | |
473 | */ | |
474 | int ll_xattr_cache_get(struct inode *inode, | |
475 | const char *name, | |
476 | char *buffer, | |
477 | size_t size, | |
478 | __u64 valid) | |
479 | { | |
480 | struct lookup_intent oit = { .it_op = IT_GETXATTR }; | |
481 | struct ll_inode_info *lli = ll_i2info(inode); | |
482 | int rc = 0; | |
483 | ||
7fc1f831 AP |
484 | LASSERT(!!(valid & OBD_MD_FLXATTR) ^ !!(valid & OBD_MD_FLXATTRLS)); |
485 | ||
486 | down_read(&lli->lli_xattrs_list_rwsem); | |
487 | if (!ll_xattr_cache_valid(lli)) { | |
488 | up_read(&lli->lli_xattrs_list_rwsem); | |
489 | rc = ll_xattr_cache_refill(inode, &oit); | |
490 | if (rc) | |
491 | return rc; | |
492 | downgrade_write(&lli->lli_xattrs_list_rwsem); | |
493 | } else { | |
494 | ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR_HITS, 1); | |
495 | } | |
496 | ||
497 | if (valid & OBD_MD_FLXATTR) { | |
498 | struct ll_xattr_entry *xattr; | |
499 | ||
500 | rc = ll_xattr_cache_find(&lli->lli_xattrs, name, &xattr); | |
501 | if (rc == 0) { | |
502 | rc = xattr->xe_vallen; | |
503 | /* zero size means we are only requested size in rc */ | |
504 | if (size != 0) { | |
505 | if (size >= xattr->xe_vallen) | |
506 | memcpy(buffer, xattr->xe_value, | |
507 | xattr->xe_vallen); | |
508 | else | |
509 | rc = -ERANGE; | |
510 | } | |
511 | } | |
512 | } else if (valid & OBD_MD_FLXATTRLS) { | |
513 | rc = ll_xattr_cache_list(&lli->lli_xattrs, | |
514 | size ? buffer : NULL, size); | |
515 | } | |
516 | ||
34e1f2bb | 517 | goto out; |
7fc1f831 AP |
518 | out: |
519 | up_read(&lli->lli_xattrs_list_rwsem); | |
520 | ||
521 | return rc; | |
522 | } |