Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
1dc563a6 | 30 | * Copyright (c) 2011, 2015, Intel Corporation. |
d7e09d03 PT |
31 | */ |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * lustre/fid/fid_request.c | |
37 | * | |
38 | * Lustre Sequence Manager | |
39 | * | |
40 | * Author: Yury Umanets <umka@clusterfs.com> | |
41 | */ | |
42 | ||
43 | #define DEBUG_SUBSYSTEM S_FID | |
44 | ||
9fdaf8c0 | 45 | #include "../../include/linux/libcfs/libcfs.h" |
56f4c5a8 | 46 | #include <linux/module.h> |
d7e09d03 | 47 | |
61566391 GKH |
48 | #include "../include/obd.h" |
49 | #include "../include/obd_class.h" | |
50 | #include "../include/obd_support.h" | |
51 | #include "../include/lustre_fid.h" | |
d7e09d03 | 52 | /* mdc RPC locks */ |
61566391 | 53 | #include "../include/lustre_mdc.h" |
d7e09d03 PT |
54 | #include "fid_internal.h" |
55 | ||
f3aa79fb DE |
56 | static struct dentry *seq_debugfs_dir; |
57 | ||
d7e09d03 PT |
58 | static int seq_client_rpc(struct lu_client_seq *seq, |
59 | struct lu_seq_range *output, __u32 opc, | |
60 | const char *opcname) | |
61 | { | |
62 | struct obd_export *exp = seq->lcs_exp; | |
63 | struct ptlrpc_request *req; | |
64 | struct lu_seq_range *out, *in; | |
56f4c5a8 LX |
65 | __u32 *op; |
66 | unsigned int debug_mask; | |
67 | int rc; | |
d7e09d03 PT |
68 | |
69 | req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_SEQ_QUERY, | |
70 | LUSTRE_MDS_VERSION, SEQ_QUERY); | |
71 | if (req == NULL) | |
0a3bdb00 | 72 | return -ENOMEM; |
d7e09d03 PT |
73 | |
74 | /* Init operation code */ | |
75 | op = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_OPC); | |
76 | *op = opc; | |
77 | ||
78 | /* Zero out input range, this is not recovery yet. */ | |
79 | in = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_RANGE); | |
80 | range_init(in); | |
81 | ||
82 | ptlrpc_request_set_replen(req); | |
83 | ||
84 | in->lsr_index = seq->lcs_space.lsr_index; | |
85 | if (seq->lcs_type == LUSTRE_SEQ_METADATA) | |
86 | fld_range_set_mdt(in); | |
87 | else | |
88 | fld_range_set_ost(in); | |
89 | ||
90 | if (opc == SEQ_ALLOC_SUPER) { | |
91 | req->rq_request_portal = SEQ_CONTROLLER_PORTAL; | |
92 | req->rq_reply_portal = MDC_REPLY_PORTAL; | |
93 | /* During allocating super sequence for data object, | |
94 | * the current thread might hold the export of MDT0(MDT0 | |
95 | * precreating objects on this OST), and it will send the | |
96 | * request to MDT0 here, so we can not keep resending the | |
97 | * request here, otherwise if MDT0 is failed(umounted), | |
98 | * it can not release the export of MDT0 */ | |
99 | if (seq->lcs_type == LUSTRE_SEQ_DATA) | |
100 | req->rq_no_delay = req->rq_no_resend = 1; | |
101 | debug_mask = D_CONSOLE; | |
102 | } else { | |
103 | if (seq->lcs_type == LUSTRE_SEQ_METADATA) | |
104 | req->rq_request_portal = SEQ_METADATA_PORTAL; | |
105 | else | |
106 | req->rq_request_portal = SEQ_DATA_PORTAL; | |
107 | debug_mask = D_INFO; | |
108 | } | |
109 | ||
110 | ptlrpc_at_set_req_timeout(req); | |
111 | ||
112 | if (seq->lcs_type == LUSTRE_SEQ_METADATA) | |
113 | mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); | |
114 | rc = ptlrpc_queue_wait(req); | |
115 | if (seq->lcs_type == LUSTRE_SEQ_METADATA) | |
116 | mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); | |
117 | if (rc) | |
89180ca7 | 118 | goto out_req; |
d7e09d03 PT |
119 | |
120 | out = req_capsule_server_get(&req->rq_pill, &RMF_SEQ_RANGE); | |
121 | *output = *out; | |
122 | ||
123 | if (!range_is_sane(output)) { | |
124 | CERROR("%s: Invalid range received from server: " | |
125 | DRANGE"\n", seq->lcs_name, PRANGE(output)); | |
89180ca7 JL |
126 | rc = -EINVAL; |
127 | goto out_req; | |
d7e09d03 PT |
128 | } |
129 | ||
130 | if (range_is_exhausted(output)) { | |
131 | CERROR("%s: Range received from server is exhausted: " | |
132 | DRANGE"]\n", seq->lcs_name, PRANGE(output)); | |
89180ca7 JL |
133 | rc = -EINVAL; |
134 | goto out_req; | |
d7e09d03 PT |
135 | } |
136 | ||
137 | CDEBUG_LIMIT(debug_mask, "%s: Allocated %s-sequence "DRANGE"]\n", | |
138 | seq->lcs_name, opcname, PRANGE(output)); | |
139 | ||
d7e09d03 PT |
140 | out_req: |
141 | ptlrpc_req_finished(req); | |
142 | return rc; | |
143 | } | |
144 | ||
d7e09d03 PT |
145 | /* Request sequence-controller node to allocate new meta-sequence. */ |
146 | static int seq_client_alloc_meta(const struct lu_env *env, | |
147 | struct lu_client_seq *seq) | |
148 | { | |
149 | int rc; | |
d7e09d03 | 150 | |
f4f8407f OD |
151 | do { |
152 | /* If meta server return -EINPROGRESS or EAGAIN, | |
153 | * it means meta server might not be ready to | |
154 | * allocate super sequence from sequence controller | |
155 | * (MDT0)yet */ | |
156 | rc = seq_client_rpc(seq, &seq->lcs_space, | |
157 | SEQ_ALLOC_META, "meta"); | |
158 | } while (rc == -EINPROGRESS || rc == -EAGAIN); | |
56f4c5a8 | 159 | |
0a3bdb00 | 160 | return rc; |
d7e09d03 PT |
161 | } |
162 | ||
163 | /* Allocate new sequence for client. */ | |
164 | static int seq_client_alloc_seq(const struct lu_env *env, | |
114acca8 | 165 | struct lu_client_seq *seq, u64 *seqnr) |
d7e09d03 PT |
166 | { |
167 | int rc; | |
d7e09d03 PT |
168 | |
169 | LASSERT(range_is_sane(&seq->lcs_space)); | |
170 | ||
171 | if (range_is_exhausted(&seq->lcs_space)) { | |
172 | rc = seq_client_alloc_meta(env, seq); | |
173 | if (rc) { | |
5cbcd111 | 174 | CERROR("%s: Can't allocate new meta-sequence, rc %d\n", |
913827ee | 175 | seq->lcs_name, rc); |
0a3bdb00 | 176 | return rc; |
d7e09d03 | 177 | } |
71e8dd9a AM |
178 | CDEBUG(D_INFO, "%s: New range - "DRANGE"\n", |
179 | seq->lcs_name, PRANGE(&seq->lcs_space)); | |
d7e09d03 PT |
180 | } else { |
181 | rc = 0; | |
182 | } | |
183 | ||
184 | LASSERT(!range_is_exhausted(&seq->lcs_space)); | |
185 | *seqnr = seq->lcs_space.lsr_start; | |
186 | seq->lcs_space.lsr_start += 1; | |
187 | ||
55f5a824 | 188 | CDEBUG(D_INFO, "%s: Allocated sequence [%#llx]\n", seq->lcs_name, |
d7e09d03 PT |
189 | *seqnr); |
190 | ||
0a3bdb00 | 191 | return rc; |
d7e09d03 PT |
192 | } |
193 | ||
194 | static int seq_fid_alloc_prep(struct lu_client_seq *seq, | |
195 | wait_queue_t *link) | |
196 | { | |
197 | if (seq->lcs_update) { | |
198 | add_wait_queue(&seq->lcs_waitq, link); | |
199 | set_current_state(TASK_UNINTERRUPTIBLE); | |
200 | mutex_unlock(&seq->lcs_mutex); | |
201 | ||
b3669a7f | 202 | schedule(); |
d7e09d03 PT |
203 | |
204 | mutex_lock(&seq->lcs_mutex); | |
205 | remove_wait_queue(&seq->lcs_waitq, link); | |
206 | set_current_state(TASK_RUNNING); | |
207 | return -EAGAIN; | |
208 | } | |
209 | ++seq->lcs_update; | |
210 | mutex_unlock(&seq->lcs_mutex); | |
211 | return 0; | |
212 | } | |
213 | ||
214 | static void seq_fid_alloc_fini(struct lu_client_seq *seq) | |
215 | { | |
216 | LASSERT(seq->lcs_update == 1); | |
217 | mutex_lock(&seq->lcs_mutex); | |
218 | --seq->lcs_update; | |
219 | wake_up(&seq->lcs_waitq); | |
220 | } | |
221 | ||
d7e09d03 PT |
222 | /* Allocate new fid on passed client @seq and save it to @fid. */ |
223 | int seq_client_alloc_fid(const struct lu_env *env, | |
224 | struct lu_client_seq *seq, struct lu_fid *fid) | |
225 | { | |
226 | wait_queue_t link; | |
227 | int rc; | |
d7e09d03 PT |
228 | |
229 | LASSERT(seq != NULL); | |
230 | LASSERT(fid != NULL); | |
231 | ||
9e795d35 | 232 | init_waitqueue_entry(&link, current); |
d7e09d03 PT |
233 | mutex_lock(&seq->lcs_mutex); |
234 | ||
235 | if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST)) | |
236 | seq->lcs_fid.f_oid = seq->lcs_width; | |
237 | ||
238 | while (1) { | |
114acca8 | 239 | u64 seqnr; |
d7e09d03 PT |
240 | |
241 | if (!fid_is_zero(&seq->lcs_fid) && | |
242 | fid_oid(&seq->lcs_fid) < seq->lcs_width) { | |
243 | /* Just bump last allocated fid and return to caller. */ | |
244 | seq->lcs_fid.f_oid += 1; | |
245 | rc = 0; | |
246 | break; | |
247 | } | |
248 | ||
249 | rc = seq_fid_alloc_prep(seq, &link); | |
250 | if (rc) | |
251 | continue; | |
252 | ||
253 | rc = seq_client_alloc_seq(env, seq, &seqnr); | |
254 | if (rc) { | |
5cbcd111 | 255 | CERROR("%s: Can't allocate new sequence, rc %d\n", |
913827ee | 256 | seq->lcs_name, rc); |
d7e09d03 PT |
257 | seq_fid_alloc_fini(seq); |
258 | mutex_unlock(&seq->lcs_mutex); | |
0a3bdb00 | 259 | return rc; |
d7e09d03 PT |
260 | } |
261 | ||
143f378c | 262 | CDEBUG(D_INFO, "%s: Switch to sequence [0x%16.16Lx]\n", |
913827ee | 263 | seq->lcs_name, seqnr); |
d7e09d03 PT |
264 | |
265 | seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID; | |
266 | seq->lcs_fid.f_seq = seqnr; | |
267 | seq->lcs_fid.f_ver = 0; | |
268 | ||
269 | /* | |
270 | * Inform caller that sequence switch is performed to allow it | |
271 | * to setup FLD for it. | |
272 | */ | |
273 | rc = 1; | |
274 | ||
275 | seq_fid_alloc_fini(seq); | |
276 | break; | |
277 | } | |
278 | ||
279 | *fid = seq->lcs_fid; | |
280 | mutex_unlock(&seq->lcs_mutex); | |
281 | ||
282 | CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name, PFID(fid)); | |
0a3bdb00 | 283 | return rc; |
d7e09d03 PT |
284 | } |
285 | EXPORT_SYMBOL(seq_client_alloc_fid); | |
286 | ||
287 | /* | |
288 | * Finish the current sequence due to disconnect. | |
289 | * See mdc_import_event() | |
290 | */ | |
291 | void seq_client_flush(struct lu_client_seq *seq) | |
292 | { | |
293 | wait_queue_t link; | |
294 | ||
295 | LASSERT(seq != NULL); | |
9e795d35 | 296 | init_waitqueue_entry(&link, current); |
d7e09d03 PT |
297 | mutex_lock(&seq->lcs_mutex); |
298 | ||
299 | while (seq->lcs_update) { | |
300 | add_wait_queue(&seq->lcs_waitq, &link); | |
301 | set_current_state(TASK_UNINTERRUPTIBLE); | |
302 | mutex_unlock(&seq->lcs_mutex); | |
303 | ||
b3669a7f | 304 | schedule(); |
d7e09d03 PT |
305 | |
306 | mutex_lock(&seq->lcs_mutex); | |
307 | remove_wait_queue(&seq->lcs_waitq, &link); | |
308 | set_current_state(TASK_RUNNING); | |
309 | } | |
310 | ||
311 | fid_zero(&seq->lcs_fid); | |
312 | /** | |
313 | * this id shld not be used for seq range allocation. | |
314 | * set to -1 for dgb check. | |
315 | */ | |
316 | ||
317 | seq->lcs_space.lsr_index = -1; | |
318 | ||
319 | range_init(&seq->lcs_space); | |
320 | mutex_unlock(&seq->lcs_mutex); | |
321 | } | |
322 | EXPORT_SYMBOL(seq_client_flush); | |
323 | ||
f3aa79fb | 324 | static void seq_client_debugfs_fini(struct lu_client_seq *seq) |
56f4c5a8 | 325 | { |
f3aa79fb DE |
326 | if (!IS_ERR_OR_NULL(seq->lcs_debugfs_entry)) |
327 | ldebugfs_remove(&seq->lcs_debugfs_entry); | |
56f4c5a8 LX |
328 | } |
329 | ||
f3aa79fb | 330 | static int seq_client_debugfs_init(struct lu_client_seq *seq) |
d7e09d03 PT |
331 | { |
332 | int rc; | |
d7e09d03 | 333 | |
f3aa79fb DE |
334 | seq->lcs_debugfs_entry = ldebugfs_register(seq->lcs_name, |
335 | seq_debugfs_dir, | |
336 | NULL, NULL); | |
d7e09d03 | 337 | |
f3aa79fb DE |
338 | if (IS_ERR_OR_NULL(seq->lcs_debugfs_entry)) { |
339 | CERROR("%s: LdebugFS failed in seq-init\n", seq->lcs_name); | |
340 | rc = seq->lcs_debugfs_entry ? PTR_ERR(seq->lcs_debugfs_entry) | |
341 | : -ENOMEM; | |
342 | seq->lcs_debugfs_entry = NULL; | |
0a3bdb00 | 343 | return rc; |
d7e09d03 PT |
344 | } |
345 | ||
f3aa79fb DE |
346 | rc = ldebugfs_add_vars(seq->lcs_debugfs_entry, |
347 | seq_client_debugfs_list, seq); | |
d7e09d03 | 348 | if (rc) { |
f3aa79fb | 349 | CERROR("%s: Can't init sequence manager debugfs, rc %d\n", |
913827ee | 350 | seq->lcs_name, rc); |
89180ca7 | 351 | goto out_cleanup; |
d7e09d03 PT |
352 | } |
353 | ||
0a3bdb00 | 354 | return 0; |
d7e09d03 PT |
355 | |
356 | out_cleanup: | |
f3aa79fb | 357 | seq_client_debugfs_fini(seq); |
d7e09d03 | 358 | return rc; |
56f4c5a8 | 359 | } |
d7e09d03 | 360 | |
065749ef OD |
361 | static void seq_client_fini(struct lu_client_seq *seq) |
362 | { | |
363 | seq_client_debugfs_fini(seq); | |
364 | ||
365 | if (seq->lcs_exp) { | |
366 | class_export_put(seq->lcs_exp); | |
367 | seq->lcs_exp = NULL; | |
368 | } | |
065749ef OD |
369 | } |
370 | ||
371 | static int seq_client_init(struct lu_client_seq *seq, | |
372 | struct obd_export *exp, | |
373 | enum lu_cli_type type, | |
374 | const char *prefix) | |
d7e09d03 PT |
375 | { |
376 | int rc; | |
d7e09d03 PT |
377 | |
378 | LASSERT(seq != NULL); | |
379 | LASSERT(prefix != NULL); | |
380 | ||
d7e09d03 PT |
381 | seq->lcs_type = type; |
382 | ||
383 | mutex_init(&seq->lcs_mutex); | |
384 | if (type == LUSTRE_SEQ_METADATA) | |
385 | seq->lcs_width = LUSTRE_METADATA_SEQ_MAX_WIDTH; | |
386 | else | |
387 | seq->lcs_width = LUSTRE_DATA_SEQ_MAX_WIDTH; | |
388 | ||
389 | init_waitqueue_head(&seq->lcs_waitq); | |
390 | /* Make sure that things are clear before work is started. */ | |
391 | seq_client_flush(seq); | |
392 | ||
f4f8407f | 393 | seq->lcs_exp = class_export_get(exp); |
d7e09d03 PT |
394 | |
395 | snprintf(seq->lcs_name, sizeof(seq->lcs_name), | |
396 | "cli-%s", prefix); | |
397 | ||
f3aa79fb | 398 | rc = seq_client_debugfs_init(seq); |
d7e09d03 PT |
399 | if (rc) |
400 | seq_client_fini(seq); | |
0a3bdb00 | 401 | return rc; |
d7e09d03 | 402 | } |
56f4c5a8 LX |
403 | |
404 | int client_fid_init(struct obd_device *obd, | |
405 | struct obd_export *exp, enum lu_cli_type type) | |
406 | { | |
407 | struct client_obd *cli = &obd->u.cli; | |
408 | char *prefix; | |
409 | int rc; | |
56f4c5a8 | 410 | |
5a56474a | 411 | cli->cl_seq = kzalloc(sizeof(*cli->cl_seq), GFP_NOFS); |
6c7c6553 | 412 | if (!cli->cl_seq) |
0a3bdb00 | 413 | return -ENOMEM; |
56f4c5a8 | 414 | |
5a56474a | 415 | prefix = kzalloc(MAX_OBD_NAME + 5, GFP_NOFS); |
6c7c6553 | 416 | if (!prefix) { |
89180ca7 JL |
417 | rc = -ENOMEM; |
418 | goto out_free_seq; | |
419 | } | |
56f4c5a8 LX |
420 | |
421 | snprintf(prefix, MAX_OBD_NAME + 5, "cli-%s", obd->obd_name); | |
422 | ||
423 | /* Init client side sequence-manager */ | |
065749ef | 424 | rc = seq_client_init(cli->cl_seq, exp, type, prefix); |
5a56474a | 425 | kfree(prefix); |
56f4c5a8 | 426 | if (rc) |
89180ca7 | 427 | goto out_free_seq; |
56f4c5a8 | 428 | |
0a3bdb00 | 429 | return rc; |
56f4c5a8 | 430 | out_free_seq: |
5a56474a | 431 | kfree(cli->cl_seq); |
56f4c5a8 LX |
432 | cli->cl_seq = NULL; |
433 | return rc; | |
434 | } | |
435 | EXPORT_SYMBOL(client_fid_init); | |
436 | ||
437 | int client_fid_fini(struct obd_device *obd) | |
438 | { | |
439 | struct client_obd *cli = &obd->u.cli; | |
56f4c5a8 LX |
440 | |
441 | if (cli->cl_seq != NULL) { | |
442 | seq_client_fini(cli->cl_seq); | |
5a56474a | 443 | kfree(cli->cl_seq); |
56f4c5a8 LX |
444 | cli->cl_seq = NULL; |
445 | } | |
446 | ||
0a3bdb00 | 447 | return 0; |
56f4c5a8 LX |
448 | } |
449 | EXPORT_SYMBOL(client_fid_fini); | |
450 | ||
56f4c5a8 LX |
451 | static int __init fid_mod_init(void) |
452 | { | |
f3aa79fb DE |
453 | seq_debugfs_dir = ldebugfs_register(LUSTRE_SEQ_NAME, |
454 | debugfs_lustre_root, | |
455 | NULL, NULL); | |
456 | return PTR_ERR_OR_ZERO(seq_debugfs_dir); | |
56f4c5a8 LX |
457 | } |
458 | ||
459 | static void __exit fid_mod_exit(void) | |
460 | { | |
f3aa79fb DE |
461 | if (!IS_ERR_OR_NULL(seq_debugfs_dir)) |
462 | ldebugfs_remove(&seq_debugfs_dir); | |
56f4c5a8 LX |
463 | } |
464 | ||
a0455471 | 465 | MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); |
56f4c5a8 LX |
466 | MODULE_DESCRIPTION("Lustre FID Module"); |
467 | MODULE_LICENSE("GPL"); | |
6960736c | 468 | MODULE_VERSION("0.1.0"); |
56f4c5a8 | 469 | |
6960736c GKH |
470 | module_init(fid_mod_init); |
471 | module_exit(fid_mod_exit); |