Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2011, 2013, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * lustre/fld/fld_request.c | |
37 | * | |
38 | * FLD (Fids Location Database) | |
39 | * | |
40 | * Author: Yury Umanets <umka@clusterfs.com> | |
41 | */ | |
42 | ||
43 | #define DEBUG_SUBSYSTEM S_FLD | |
44 | ||
45 | # include <linux/libcfs/libcfs.h> | |
46 | # include <linux/module.h> | |
d7e09d03 PT |
47 | # include <asm/div64.h> |
48 | ||
49 | #include <obd.h> | |
50 | #include <obd_class.h> | |
51 | #include <lustre_ver.h> | |
52 | #include <obd_support.h> | |
53 | #include <lprocfs_status.h> | |
54 | ||
55 | #include <dt_object.h> | |
56 | #include <md_object.h> | |
57 | #include <lustre_req_layout.h> | |
58 | #include <lustre_fld.h> | |
59 | #include <lustre_mdc.h> | |
60 | #include "fld_internal.h" | |
61 | ||
e62e5d92 LX |
62 | struct lu_context_key fld_thread_key; |
63 | ||
d7e09d03 PT |
64 | /* TODO: these 3 functions are copies of flow-control code from mdc_lib.c |
65 | * It should be common thing. The same about mdc RPC lock */ | |
66 | static int fld_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw) | |
67 | { | |
68 | int rc; | |
29aaf496 | 69 | |
d7e09d03 PT |
70 | client_obd_list_lock(&cli->cl_loi_list_lock); |
71 | rc = list_empty(&mcw->mcw_entry); | |
72 | client_obd_list_unlock(&cli->cl_loi_list_lock); | |
73 | RETURN(rc); | |
74 | }; | |
75 | ||
76 | static void fld_enter_request(struct client_obd *cli) | |
77 | { | |
78 | struct mdc_cache_waiter mcw; | |
79 | struct l_wait_info lwi = { 0 }; | |
80 | ||
81 | client_obd_list_lock(&cli->cl_loi_list_lock); | |
82 | if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { | |
83 | list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters); | |
84 | init_waitqueue_head(&mcw.mcw_waitq); | |
85 | client_obd_list_unlock(&cli->cl_loi_list_lock); | |
86 | l_wait_event(mcw.mcw_waitq, fld_req_avail(cli, &mcw), &lwi); | |
87 | } else { | |
88 | cli->cl_r_in_flight++; | |
89 | client_obd_list_unlock(&cli->cl_loi_list_lock); | |
90 | } | |
91 | } | |
92 | ||
93 | static void fld_exit_request(struct client_obd *cli) | |
94 | { | |
95 | struct list_head *l, *tmp; | |
96 | struct mdc_cache_waiter *mcw; | |
97 | ||
98 | client_obd_list_lock(&cli->cl_loi_list_lock); | |
99 | cli->cl_r_in_flight--; | |
100 | list_for_each_safe(l, tmp, &cli->cl_cache_waiters) { | |
101 | ||
102 | if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { | |
103 | /* No free request slots anymore */ | |
104 | break; | |
105 | } | |
106 | ||
107 | mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry); | |
108 | list_del_init(&mcw->mcw_entry); | |
109 | cli->cl_r_in_flight++; | |
110 | wake_up(&mcw->mcw_waitq); | |
111 | } | |
112 | client_obd_list_unlock(&cli->cl_loi_list_lock); | |
113 | } | |
114 | ||
115 | static int fld_rrb_hash(struct lu_client_fld *fld, | |
116 | seqno_t seq) | |
117 | { | |
118 | LASSERT(fld->lcf_count > 0); | |
119 | return do_div(seq, fld->lcf_count); | |
120 | } | |
121 | ||
122 | static struct lu_fld_target * | |
123 | fld_rrb_scan(struct lu_client_fld *fld, seqno_t seq) | |
124 | { | |
125 | struct lu_fld_target *target; | |
126 | int hash; | |
d7e09d03 PT |
127 | |
128 | /* Because almost all of special sequence located in MDT0, | |
129 | * it should go to index 0 directly, instead of calculating | |
130 | * hash again, and also if other MDTs is not being connected, | |
131 | * the fld lookup requests(for seq on MDT0) should not be | |
132 | * blocked because of other MDTs */ | |
133 | if (fid_seq_is_norm(seq)) | |
134 | hash = fld_rrb_hash(fld, seq); | |
135 | else | |
136 | hash = 0; | |
137 | ||
138 | list_for_each_entry(target, &fld->lcf_targets, ft_chain) { | |
139 | if (target->ft_idx == hash) | |
140 | RETURN(target); | |
141 | } | |
142 | ||
143 | CERROR("%s: Can't find target by hash %d (seq "LPX64"). " | |
144 | "Targets (%d):\n", fld->lcf_name, hash, seq, | |
145 | fld->lcf_count); | |
146 | ||
147 | list_for_each_entry(target, &fld->lcf_targets, ft_chain) { | |
148 | const char *srv_name = target->ft_srv != NULL ? | |
149 | target->ft_srv->lsf_name : "<null>"; | |
150 | const char *exp_name = target->ft_exp != NULL ? | |
151 | (char *)target->ft_exp->exp_obd->obd_uuid.uuid : | |
152 | "<null>"; | |
153 | ||
154 | CERROR(" exp: 0x%p (%s), srv: 0x%p (%s), idx: "LPU64"\n", | |
155 | target->ft_exp, exp_name, target->ft_srv, | |
156 | srv_name, target->ft_idx); | |
157 | } | |
158 | ||
159 | /* | |
160 | * If target is not found, there is logical error anyway, so here is | |
161 | * LBUG() to catch this situation. | |
162 | */ | |
163 | LBUG(); | |
164 | RETURN(NULL); | |
165 | } | |
166 | ||
167 | struct lu_fld_hash fld_hash[] = { | |
168 | { | |
169 | .fh_name = "RRB", | |
170 | .fh_hash_func = fld_rrb_hash, | |
171 | .fh_scan_func = fld_rrb_scan | |
172 | }, | |
173 | { | |
174 | 0, | |
175 | } | |
176 | }; | |
177 | ||
178 | static struct lu_fld_target * | |
179 | fld_client_get_target(struct lu_client_fld *fld, seqno_t seq) | |
180 | { | |
181 | struct lu_fld_target *target; | |
d7e09d03 PT |
182 | |
183 | LASSERT(fld->lcf_hash != NULL); | |
184 | ||
185 | spin_lock(&fld->lcf_lock); | |
186 | target = fld->lcf_hash->fh_scan_func(fld, seq); | |
187 | spin_unlock(&fld->lcf_lock); | |
188 | ||
189 | if (target != NULL) { | |
190 | CDEBUG(D_INFO, "%s: Found target (idx "LPU64 | |
191 | ") by seq "LPX64"\n", fld->lcf_name, | |
192 | target->ft_idx, seq); | |
193 | } | |
194 | ||
195 | RETURN(target); | |
196 | } | |
197 | ||
198 | /* | |
199 | * Add export to FLD. This is usually done by CMM and LMV as they are main users | |
200 | * of FLD module. | |
201 | */ | |
202 | int fld_client_add_target(struct lu_client_fld *fld, | |
203 | struct lu_fld_target *tar) | |
204 | { | |
205 | const char *name; | |
206 | struct lu_fld_target *target, *tmp; | |
d7e09d03 PT |
207 | |
208 | LASSERT(tar != NULL); | |
209 | name = fld_target_name(tar); | |
210 | LASSERT(name != NULL); | |
211 | LASSERT(tar->ft_srv != NULL || tar->ft_exp != NULL); | |
212 | ||
213 | if (fld->lcf_flags != LUSTRE_FLD_INIT) { | |
214 | CERROR("%s: Attempt to add target %s (idx "LPU64") " | |
215 | "on fly - skip it\n", fld->lcf_name, name, | |
216 | tar->ft_idx); | |
217 | RETURN(0); | |
218 | } else { | |
219 | CDEBUG(D_INFO, "%s: Adding target %s (idx " | |
220 | LPU64")\n", fld->lcf_name, name, tar->ft_idx); | |
221 | } | |
222 | ||
223 | OBD_ALLOC_PTR(target); | |
224 | if (target == NULL) | |
225 | RETURN(-ENOMEM); | |
226 | ||
227 | spin_lock(&fld->lcf_lock); | |
228 | list_for_each_entry(tmp, &fld->lcf_targets, ft_chain) { | |
229 | if (tmp->ft_idx == tar->ft_idx) { | |
230 | spin_unlock(&fld->lcf_lock); | |
231 | OBD_FREE_PTR(target); | |
232 | CERROR("Target %s exists in FLD and known as %s:#"LPU64"\n", | |
233 | name, fld_target_name(tmp), tmp->ft_idx); | |
234 | RETURN(-EEXIST); | |
235 | } | |
236 | } | |
237 | ||
238 | target->ft_exp = tar->ft_exp; | |
239 | if (target->ft_exp != NULL) | |
240 | class_export_get(target->ft_exp); | |
241 | target->ft_srv = tar->ft_srv; | |
242 | target->ft_idx = tar->ft_idx; | |
243 | ||
244 | list_add_tail(&target->ft_chain, | |
245 | &fld->lcf_targets); | |
246 | ||
247 | fld->lcf_count++; | |
248 | spin_unlock(&fld->lcf_lock); | |
249 | ||
250 | RETURN(0); | |
251 | } | |
252 | EXPORT_SYMBOL(fld_client_add_target); | |
253 | ||
254 | /* Remove export from FLD */ | |
255 | int fld_client_del_target(struct lu_client_fld *fld, __u64 idx) | |
256 | { | |
257 | struct lu_fld_target *target, *tmp; | |
d7e09d03 PT |
258 | |
259 | spin_lock(&fld->lcf_lock); | |
260 | list_for_each_entry_safe(target, tmp, | |
261 | &fld->lcf_targets, ft_chain) { | |
262 | if (target->ft_idx == idx) { | |
263 | fld->lcf_count--; | |
264 | list_del(&target->ft_chain); | |
265 | spin_unlock(&fld->lcf_lock); | |
266 | ||
267 | if (target->ft_exp != NULL) | |
268 | class_export_put(target->ft_exp); | |
269 | ||
270 | OBD_FREE_PTR(target); | |
271 | RETURN(0); | |
272 | } | |
273 | } | |
274 | spin_unlock(&fld->lcf_lock); | |
275 | RETURN(-ENOENT); | |
276 | } | |
277 | EXPORT_SYMBOL(fld_client_del_target); | |
278 | ||
279 | #ifdef LPROCFS | |
e62e5d92 LX |
280 | proc_dir_entry_t *fld_type_proc_dir = NULL; |
281 | ||
d7e09d03 PT |
282 | static int fld_client_proc_init(struct lu_client_fld *fld) |
283 | { | |
284 | int rc; | |
d7e09d03 PT |
285 | |
286 | fld->lcf_proc_dir = lprocfs_register(fld->lcf_name, | |
287 | fld_type_proc_dir, | |
288 | NULL, NULL); | |
289 | ||
290 | if (IS_ERR(fld->lcf_proc_dir)) { | |
291 | CERROR("%s: LProcFS failed in fld-init\n", | |
292 | fld->lcf_name); | |
293 | rc = PTR_ERR(fld->lcf_proc_dir); | |
294 | RETURN(rc); | |
295 | } | |
296 | ||
297 | rc = lprocfs_add_vars(fld->lcf_proc_dir, | |
298 | fld_client_proc_list, fld); | |
299 | if (rc) { | |
300 | CERROR("%s: Can't init FLD proc, rc %d\n", | |
301 | fld->lcf_name, rc); | |
302 | GOTO(out_cleanup, rc); | |
303 | } | |
304 | ||
305 | RETURN(0); | |
306 | ||
307 | out_cleanup: | |
308 | fld_client_proc_fini(fld); | |
309 | return rc; | |
310 | } | |
311 | ||
312 | void fld_client_proc_fini(struct lu_client_fld *fld) | |
313 | { | |
d7e09d03 PT |
314 | if (fld->lcf_proc_dir) { |
315 | if (!IS_ERR(fld->lcf_proc_dir)) | |
316 | lprocfs_remove(&fld->lcf_proc_dir); | |
317 | fld->lcf_proc_dir = NULL; | |
318 | } | |
d7e09d03 PT |
319 | } |
320 | #else | |
321 | static int fld_client_proc_init(struct lu_client_fld *fld) | |
322 | { | |
323 | return 0; | |
324 | } | |
325 | ||
326 | void fld_client_proc_fini(struct lu_client_fld *fld) | |
327 | { | |
328 | return; | |
329 | } | |
330 | #endif | |
331 | ||
332 | EXPORT_SYMBOL(fld_client_proc_fini); | |
333 | ||
334 | static inline int hash_is_sane(int hash) | |
335 | { | |
336 | return (hash >= 0 && hash < ARRAY_SIZE(fld_hash)); | |
337 | } | |
338 | ||
339 | int fld_client_init(struct lu_client_fld *fld, | |
340 | const char *prefix, int hash) | |
341 | { | |
342 | int cache_size, cache_threshold; | |
343 | int rc; | |
d7e09d03 PT |
344 | |
345 | LASSERT(fld != NULL); | |
346 | ||
347 | snprintf(fld->lcf_name, sizeof(fld->lcf_name), | |
348 | "cli-%s", prefix); | |
349 | ||
350 | if (!hash_is_sane(hash)) { | |
351 | CERROR("%s: Wrong hash function %#x\n", | |
352 | fld->lcf_name, hash); | |
353 | RETURN(-EINVAL); | |
354 | } | |
355 | ||
356 | fld->lcf_count = 0; | |
357 | spin_lock_init(&fld->lcf_lock); | |
358 | fld->lcf_hash = &fld_hash[hash]; | |
359 | fld->lcf_flags = LUSTRE_FLD_INIT; | |
360 | INIT_LIST_HEAD(&fld->lcf_targets); | |
361 | ||
362 | cache_size = FLD_CLIENT_CACHE_SIZE / | |
363 | sizeof(struct fld_cache_entry); | |
364 | ||
365 | cache_threshold = cache_size * | |
366 | FLD_CLIENT_CACHE_THRESHOLD / 100; | |
367 | ||
368 | fld->lcf_cache = fld_cache_init(fld->lcf_name, | |
369 | cache_size, cache_threshold); | |
370 | if (IS_ERR(fld->lcf_cache)) { | |
371 | rc = PTR_ERR(fld->lcf_cache); | |
372 | fld->lcf_cache = NULL; | |
373 | GOTO(out, rc); | |
374 | } | |
375 | ||
376 | rc = fld_client_proc_init(fld); | |
377 | if (rc) | |
378 | GOTO(out, rc); | |
d7e09d03 PT |
379 | out: |
380 | if (rc) | |
381 | fld_client_fini(fld); | |
382 | else | |
383 | CDEBUG(D_INFO, "%s: Using \"%s\" hash\n", | |
384 | fld->lcf_name, fld->lcf_hash->fh_name); | |
385 | return rc; | |
386 | } | |
387 | EXPORT_SYMBOL(fld_client_init); | |
388 | ||
389 | void fld_client_fini(struct lu_client_fld *fld) | |
390 | { | |
391 | struct lu_fld_target *target, *tmp; | |
d7e09d03 PT |
392 | |
393 | spin_lock(&fld->lcf_lock); | |
394 | list_for_each_entry_safe(target, tmp, | |
395 | &fld->lcf_targets, ft_chain) { | |
396 | fld->lcf_count--; | |
397 | list_del(&target->ft_chain); | |
398 | if (target->ft_exp != NULL) | |
399 | class_export_put(target->ft_exp); | |
400 | OBD_FREE_PTR(target); | |
401 | } | |
402 | spin_unlock(&fld->lcf_lock); | |
403 | ||
404 | if (fld->lcf_cache != NULL) { | |
405 | if (!IS_ERR(fld->lcf_cache)) | |
406 | fld_cache_fini(fld->lcf_cache); | |
407 | fld->lcf_cache = NULL; | |
408 | } | |
d7e09d03 PT |
409 | } |
410 | EXPORT_SYMBOL(fld_client_fini); | |
411 | ||
412 | int fld_client_rpc(struct obd_export *exp, | |
413 | struct lu_seq_range *range, __u32 fld_op) | |
414 | { | |
415 | struct ptlrpc_request *req; | |
416 | struct lu_seq_range *prange; | |
417 | __u32 *op; | |
418 | int rc; | |
419 | struct obd_import *imp; | |
d7e09d03 PT |
420 | |
421 | LASSERT(exp != NULL); | |
422 | ||
423 | imp = class_exp2cliimp(exp); | |
424 | req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY, LUSTRE_MDS_VERSION, | |
425 | FLD_QUERY); | |
426 | if (req == NULL) | |
427 | RETURN(-ENOMEM); | |
428 | ||
429 | op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC); | |
430 | *op = fld_op; | |
431 | ||
432 | prange = req_capsule_client_get(&req->rq_pill, &RMF_FLD_MDFLD); | |
433 | *prange = *range; | |
434 | ||
435 | ptlrpc_request_set_replen(req); | |
436 | req->rq_request_portal = FLD_REQUEST_PORTAL; | |
437 | ptlrpc_at_set_req_timeout(req); | |
438 | ||
439 | if (fld_op == FLD_LOOKUP && | |
440 | imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS) | |
441 | req->rq_allow_replay = 1; | |
442 | ||
443 | if (fld_op != FLD_LOOKUP) | |
444 | mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); | |
445 | fld_enter_request(&exp->exp_obd->u.cli); | |
446 | rc = ptlrpc_queue_wait(req); | |
447 | fld_exit_request(&exp->exp_obd->u.cli); | |
448 | if (fld_op != FLD_LOOKUP) | |
449 | mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); | |
450 | if (rc) | |
451 | GOTO(out_req, rc); | |
452 | ||
453 | prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD); | |
454 | if (prange == NULL) | |
455 | GOTO(out_req, rc = -EFAULT); | |
456 | *range = *prange; | |
d7e09d03 PT |
457 | out_req: |
458 | ptlrpc_req_finished(req); | |
459 | return rc; | |
460 | } | |
461 | ||
462 | int fld_client_lookup(struct lu_client_fld *fld, seqno_t seq, mdsno_t *mds, | |
463 | __u32 flags, const struct lu_env *env) | |
464 | { | |
465 | struct lu_seq_range res = { 0 }; | |
466 | struct lu_fld_target *target; | |
467 | int rc; | |
d7e09d03 PT |
468 | |
469 | fld->lcf_flags |= LUSTRE_FLD_RUN; | |
470 | ||
471 | rc = fld_cache_lookup(fld->lcf_cache, seq, &res); | |
472 | if (rc == 0) { | |
473 | *mds = res.lsr_index; | |
474 | RETURN(0); | |
475 | } | |
476 | ||
477 | /* Can not find it in the cache */ | |
478 | target = fld_client_get_target(fld, seq); | |
479 | LASSERT(target != NULL); | |
480 | ||
481 | CDEBUG(D_INFO, "%s: Lookup fld entry (seq: "LPX64") on " | |
482 | "target %s (idx "LPU64")\n", fld->lcf_name, seq, | |
483 | fld_target_name(target), target->ft_idx); | |
484 | ||
485 | res.lsr_start = seq; | |
486 | fld_range_set_type(&res, flags); | |
e62e5d92 | 487 | rc = fld_client_rpc(target->ft_exp, &res, FLD_LOOKUP); |
d7e09d03 PT |
488 | |
489 | if (rc == 0) { | |
490 | *mds = res.lsr_index; | |
491 | ||
492 | fld_cache_insert(fld->lcf_cache, &res); | |
493 | } | |
494 | RETURN(rc); | |
495 | } | |
496 | EXPORT_SYMBOL(fld_client_lookup); | |
497 | ||
498 | void fld_client_flush(struct lu_client_fld *fld) | |
499 | { | |
500 | fld_cache_flush(fld->lcf_cache); | |
501 | } | |
502 | EXPORT_SYMBOL(fld_client_flush); | |
e62e5d92 LX |
503 | |
504 | static int __init fld_mod_init(void) | |
505 | { | |
506 | fld_type_proc_dir = lprocfs_register(LUSTRE_FLD_NAME, | |
507 | proc_lustre_root, | |
508 | NULL, NULL); | |
509 | if (IS_ERR(fld_type_proc_dir)) | |
510 | return PTR_ERR(fld_type_proc_dir); | |
511 | ||
512 | LU_CONTEXT_KEY_INIT(&fld_thread_key); | |
513 | lu_context_key_register(&fld_thread_key); | |
514 | return 0; | |
515 | } | |
516 | ||
517 | static void __exit fld_mod_exit(void) | |
518 | { | |
519 | lu_context_key_degister(&fld_thread_key); | |
520 | if (fld_type_proc_dir != NULL && !IS_ERR(fld_type_proc_dir)) { | |
521 | lprocfs_remove(&fld_type_proc_dir); | |
522 | fld_type_proc_dir = NULL; | |
523 | } | |
524 | } | |
525 | ||
526 | MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>"); | |
527 | MODULE_DESCRIPTION("Lustre FLD"); | |
528 | MODULE_LICENSE("GPL"); | |
529 | ||
530 | module_init(fld_mod_init) | |
531 | module_exit(fld_mod_exit) |