1db4669262b0d492aa380d8715f4aee70047b79c
[deliverable/linux.git] / drivers / staging / lustre / lustre / obdclass / obd_mount.c
1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2012, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/obdclass/obd_mount.c
37 *
38 * Client mount routines
39 *
40 * Author: Nathan Rutman <nathan@clusterfs.com>
41 */
42
43
44 #define DEBUG_SUBSYSTEM S_CLASS
45 #define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
46 #define PRINT_CMD CDEBUG
47
48 #include "../include/obd.h"
49 #include "../include/linux/lustre_compat25.h"
50 #include "../include/obd_class.h"
51 #include "../include/lustre/lustre_user.h"
52 #include "../include/lustre_log.h"
53 #include "../include/lustre_disk.h"
54 #include "../include/lustre_param.h"
55
56 static int (*client_fill_super)(struct super_block *sb,
57 struct vfsmount *mnt);
58
59 static void (*kill_super_cb)(struct super_block *sb);
60
61 /**************** config llog ********************/
62
63 /** Get a config log from the MGS and process it.
64 * This func is called for both clients and servers.
65 * Continue to process new statements appended to the logs
66 * (whenever the config lock is revoked) until lustre_end_log
67 * is called.
68 * @param sb The superblock is used by the MGC to write to the local copy of
69 * the config log
70 * @param logname The name of the llog to replicate from the MGS
71 * @param cfg Since the same mgc may be used to follow multiple config logs
72 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
73 * this log, and is added to the mgc's list of logs to follow.
74 */
75 int lustre_process_log(struct super_block *sb, char *logname,
76 struct config_llog_instance *cfg)
77 {
78 struct lustre_cfg *lcfg;
79 struct lustre_cfg_bufs *bufs;
80 struct lustre_sb_info *lsi = s2lsi(sb);
81 struct obd_device *mgc = lsi->lsi_mgc;
82 int rc;
83
84 LASSERT(mgc);
85 LASSERT(cfg);
86
87 bufs = kzalloc(sizeof(*bufs), GFP_NOFS);
88 if (!bufs)
89 return -ENOMEM;
90
91 /* mgc_process_config */
92 lustre_cfg_bufs_reset(bufs, mgc->obd_name);
93 lustre_cfg_bufs_set_string(bufs, 1, logname);
94 lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
95 lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
96 lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
97 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
98 lustre_cfg_free(lcfg);
99
100 kfree(bufs);
101
102 if (rc == -EINVAL)
103 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d). Make sure this client and the MGS are running compatible versions of Lustre.\n",
104 mgc->obd_name, logname, rc);
105
106 if (rc)
107 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n",
108 mgc->obd_name, logname,
109 rc);
110
111 /* class_obd_list(); */
112 return rc;
113 }
114 EXPORT_SYMBOL(lustre_process_log);
115
116 /* Stop watching this config log for updates */
117 int lustre_end_log(struct super_block *sb, char *logname,
118 struct config_llog_instance *cfg)
119 {
120 struct lustre_cfg *lcfg;
121 struct lustre_cfg_bufs bufs;
122 struct lustre_sb_info *lsi = s2lsi(sb);
123 struct obd_device *mgc = lsi->lsi_mgc;
124 int rc;
125
126 if (!mgc)
127 return -ENOENT;
128
129 /* mgc_process_config */
130 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
131 lustre_cfg_bufs_set_string(&bufs, 1, logname);
132 if (cfg)
133 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
134 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
135 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
136 lustre_cfg_free(lcfg);
137 return rc;
138 }
139 EXPORT_SYMBOL(lustre_end_log);
140
141 /**************** obd start *******************/
142
143 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
144 * lctl (and do for echo cli/srv.
145 */
146 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
147 char *s1, char *s2, char *s3, char *s4)
148 {
149 struct lustre_cfg_bufs bufs;
150 struct lustre_cfg *lcfg = NULL;
151 int rc;
152
153 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
154 cmd, s1, s2, s3, s4);
155
156 lustre_cfg_bufs_reset(&bufs, cfgname);
157 if (s1)
158 lustre_cfg_bufs_set_string(&bufs, 1, s1);
159 if (s2)
160 lustre_cfg_bufs_set_string(&bufs, 2, s2);
161 if (s3)
162 lustre_cfg_bufs_set_string(&bufs, 3, s3);
163 if (s4)
164 lustre_cfg_bufs_set_string(&bufs, 4, s4);
165
166 lcfg = lustre_cfg_new(cmd, &bufs);
167 lcfg->lcfg_nid = nid;
168 rc = class_process_config(lcfg);
169 lustre_cfg_free(lcfg);
170 return rc;
171 }
172 EXPORT_SYMBOL(do_lcfg);
173
174 /** Call class_attach and class_setup. These methods in turn call
175 * obd type-specific methods.
176 */
177 int lustre_start_simple(char *obdname, char *type, char *uuid,
178 char *s1, char *s2, char *s3, char *s4)
179 {
180 int rc;
181 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
182
183 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, NULL, NULL);
184 if (rc) {
185 CERROR("%s attach error %d\n", obdname, rc);
186 return rc;
187 }
188 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
189 if (rc) {
190 CERROR("%s setup error %d\n", obdname, rc);
191 do_lcfg(obdname, 0, LCFG_DETACH, NULL, NULL, NULL, NULL);
192 }
193 return rc;
194 }
195
196 DEFINE_MUTEX(mgc_start_lock);
197
198 /** Set up a mgc obd to process startup logs
199 *
200 * \param sb [in] super block of the mgc obd
201 *
202 * \retval 0 success, otherwise error code
203 */
204 int lustre_start_mgc(struct super_block *sb)
205 {
206 struct obd_connect_data *data = NULL;
207 struct lustre_sb_info *lsi = s2lsi(sb);
208 struct obd_device *obd;
209 struct obd_export *exp;
210 struct obd_uuid *uuid;
211 class_uuid_t uuidc;
212 lnet_nid_t nid;
213 char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
214 char *ptr;
215 int rc = 0, i = 0, j, len;
216
217 LASSERT(lsi->lsi_lmd);
218
219 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
220 ptr = lsi->lsi_lmd->lmd_dev;
221 if (class_parse_nid(ptr, &nid, &ptr) == 0)
222 i++;
223 if (i == 0) {
224 CERROR("No valid MGS nids found.\n");
225 return -EINVAL;
226 }
227
228 mutex_lock(&mgc_start_lock);
229
230 len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
231 mgcname = kasprintf(GFP_NOFS,
232 "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
233 niduuid = kasprintf(GFP_NOFS, "%s_%x", mgcname, i);
234 if (!mgcname || !niduuid) {
235 rc = -ENOMEM;
236 goto out_free;
237 }
238
239 mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
240
241 data = kzalloc(sizeof(*data), GFP_NOFS);
242 if (!data) {
243 rc = -ENOMEM;
244 goto out_free;
245 }
246
247 obd = class_name2obd(mgcname);
248 if (obd && !obd->obd_stopping) {
249 int recov_bk;
250
251 rc = obd_set_info_async(NULL, obd->obd_self_export,
252 strlen(KEY_MGSSEC), KEY_MGSSEC,
253 strlen(mgssec), mgssec, NULL);
254 if (rc)
255 goto out_free;
256
257 /* Re-using an existing MGC */
258 atomic_inc(&obd->u.cli.cl_mgc_refcount);
259
260 /* IR compatibility check, only for clients */
261 if (lmd_is_client(lsi->lsi_lmd)) {
262 int has_ir;
263 int vallen = sizeof(*data);
264 __u32 *flags = &lsi->lsi_lmd->lmd_flags;
265
266 rc = obd_get_info(NULL, obd->obd_self_export,
267 strlen(KEY_CONN_DATA), KEY_CONN_DATA,
268 &vallen, data, NULL);
269 LASSERT(rc == 0);
270 has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
271 if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
272 /* LMD_FLG_NOIR is for test purpose only */
273 LCONSOLE_WARN(
274 "Trying to mount a client with IR setting not compatible with current mgc. Force to use current mgc setting that is IR %s.\n",
275 has_ir ? "enabled" : "disabled");
276 if (has_ir)
277 *flags &= ~LMD_FLG_NOIR;
278 else
279 *flags |= LMD_FLG_NOIR;
280 }
281 }
282
283 recov_bk = 0;
284 /* If we are restarting the MGS, don't try to keep the MGC's
285 old connection, or registration will fail. */
286 if (IS_MGS(lsi)) {
287 CDEBUG(D_MOUNT, "New MGS with live MGC\n");
288 recov_bk = 1;
289 }
290
291 /* Try all connections, but only once (again).
292 We don't want to block another target from starting
293 (using its local copy of the log), but we do want to connect
294 if at all possible. */
295 recov_bk++;
296 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,
297 recov_bk);
298 rc = obd_set_info_async(NULL, obd->obd_self_export,
299 sizeof(KEY_INIT_RECOV_BACKUP),
300 KEY_INIT_RECOV_BACKUP,
301 sizeof(recov_bk), &recov_bk, NULL);
302 rc = 0;
303 goto out;
304 }
305
306 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
307
308 /* Add the primary nids for the MGS */
309 i = 0;
310 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
311 ptr = lsi->lsi_lmd->lmd_dev;
312 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
313 rc = do_lcfg(mgcname, nid,
314 LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
315 i++;
316 /* Stop at the first failover nid */
317 if (*ptr == ':')
318 break;
319 }
320 if (i == 0) {
321 CERROR("No valid MGS nids found.\n");
322 rc = -EINVAL;
323 goto out_free;
324 }
325 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
326
327 /* Random uuid for MGC allows easier reconnects */
328 uuid = kzalloc(sizeof(*uuid), GFP_NOFS);
329 if (!uuid) {
330 rc = -ENOMEM;
331 goto out_free;
332 }
333
334 ll_generate_random_uuid(uuidc);
335 class_uuid_unparse(uuidc, uuid);
336
337 /* Start the MGC */
338 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
339 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
340 niduuid, NULL, NULL);
341 kfree(uuid);
342 if (rc)
343 goto out_free;
344
345 /* Add any failover MGS nids */
346 i = 1;
347 while (ptr && ((*ptr == ':' ||
348 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
349 /* New failover node */
350 sprintf(niduuid, "%s_%x", mgcname, i);
351 j = 0;
352 while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
353 j++;
354 rc = do_lcfg(mgcname, nid,
355 LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
356 if (*ptr == ':')
357 break;
358 }
359 if (j > 0) {
360 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
361 niduuid, NULL, NULL, NULL);
362 i++;
363 } else {
364 /* at ":/fsname" */
365 break;
366 }
367 }
368 lsi->lsi_lmd->lmd_mgs_failnodes = i;
369
370 obd = class_name2obd(mgcname);
371 if (!obd) {
372 CERROR("Can't find mgcobd %s\n", mgcname);
373 rc = -ENOTCONN;
374 goto out_free;
375 }
376
377 rc = obd_set_info_async(NULL, obd->obd_self_export,
378 strlen(KEY_MGSSEC), KEY_MGSSEC,
379 strlen(mgssec), mgssec, NULL);
380 if (rc)
381 goto out_free;
382
383 /* Keep a refcount of servers/clients who started with "mount",
384 so we know when we can get rid of the mgc. */
385 atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
386
387 /* We connect to the MGS at setup, and don't disconnect until cleanup */
388 data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
389 OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
390 OBD_CONNECT_LVB_TYPE;
391
392 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
393 data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
394 #else
395 #warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
396 #endif
397
398 if (lmd_is_client(lsi->lsi_lmd) &&
399 lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
400 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
401 data->ocd_version = LUSTRE_VERSION_CODE;
402 rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
403 if (rc) {
404 CERROR("connect failed %d\n", rc);
405 goto out;
406 }
407
408 obd->u.cli.cl_mgc_mgsexp = exp;
409
410 out:
411 /* Keep the mgc info in the sb. Note that many lsi's can point
412 to the same mgc.*/
413 lsi->lsi_mgc = obd;
414 out_free:
415 mutex_unlock(&mgc_start_lock);
416
417 kfree(data);
418 kfree(mgcname);
419 kfree(niduuid);
420 return rc;
421 }
422
423 static int lustre_stop_mgc(struct super_block *sb)
424 {
425 struct lustre_sb_info *lsi = s2lsi(sb);
426 struct obd_device *obd;
427 char *niduuid = NULL, *ptr = NULL;
428 int i, rc = 0, len = 0;
429
430 if (!lsi)
431 return -ENOENT;
432 obd = lsi->lsi_mgc;
433 if (!obd)
434 return -ENOENT;
435 lsi->lsi_mgc = NULL;
436
437 mutex_lock(&mgc_start_lock);
438 LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
439 if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
440 /* This is not fatal, every client that stops
441 will call in here. */
442 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
443 atomic_read(&obd->u.cli.cl_mgc_refcount));
444 rc = -EBUSY;
445 goto out;
446 }
447
448 /* The MGC has no recoverable data in any case.
449 * force shutdown set in umount_begin */
450 obd->obd_no_recov = 1;
451
452 if (obd->u.cli.cl_mgc_mgsexp) {
453 /* An error is not fatal, if we are unable to send the
454 disconnect mgs ping evictor cleans up the export */
455 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
456 if (rc)
457 CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
458 }
459
460 /* Save the obdname for cleaning the nid uuids, which are
461 obdname_XX */
462 len = strlen(obd->obd_name) + 6;
463 niduuid = kzalloc(len, GFP_NOFS);
464 if (niduuid) {
465 strcpy(niduuid, obd->obd_name);
466 ptr = niduuid + strlen(niduuid);
467 }
468
469 rc = class_manual_cleanup(obd);
470 if (rc)
471 goto out;
472
473 /* Clean the nid uuids */
474 if (!niduuid) {
475 rc = -ENOMEM;
476 goto out;
477 }
478
479 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
480 sprintf(ptr, "_%x", i);
481 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
482 niduuid, NULL, NULL, NULL);
483 if (rc)
484 CERROR("del MDC UUID %s failed: rc = %d\n",
485 niduuid, rc);
486 }
487 out:
488 kfree(niduuid);
489
490 /* class_import_put will get rid of the additional connections */
491 mutex_unlock(&mgc_start_lock);
492 return rc;
493 }
494
495 /***************** lustre superblock **************/
496
497 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
498 {
499 struct lustre_sb_info *lsi;
500
501 lsi = kzalloc(sizeof(*lsi), GFP_NOFS);
502 if (!lsi)
503 return NULL;
504 lsi->lsi_lmd = kzalloc(sizeof(*lsi->lsi_lmd), GFP_NOFS);
505 if (!lsi->lsi_lmd) {
506 kfree(lsi);
507 return NULL;
508 }
509
510 lsi->lsi_lmd->lmd_exclude_count = 0;
511 lsi->lsi_lmd->lmd_recovery_time_soft = 0;
512 lsi->lsi_lmd->lmd_recovery_time_hard = 0;
513 s2lsi_nocast(sb) = lsi;
514 /* we take 1 extra ref for our setup */
515 atomic_set(&lsi->lsi_mounts, 1);
516
517 /* Default umount style */
518 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
519
520 return lsi;
521 }
522
523 static int lustre_free_lsi(struct super_block *sb)
524 {
525 struct lustre_sb_info *lsi = s2lsi(sb);
526
527 LASSERT(lsi != NULL);
528 CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
529
530 /* someone didn't call server_put_mount. */
531 LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
532
533 if (lsi->lsi_lmd != NULL) {
534 kfree(lsi->lsi_lmd->lmd_dev);
535 kfree(lsi->lsi_lmd->lmd_profile);
536 kfree(lsi->lsi_lmd->lmd_mgssec);
537 kfree(lsi->lsi_lmd->lmd_opts);
538 if (lsi->lsi_lmd->lmd_exclude_count)
539 kfree(lsi->lsi_lmd->lmd_exclude);
540 kfree(lsi->lsi_lmd->lmd_mgs);
541 kfree(lsi->lsi_lmd->lmd_osd_type);
542 kfree(lsi->lsi_lmd->lmd_params);
543
544 kfree(lsi->lsi_lmd);
545 }
546
547 LASSERT(lsi->lsi_llsbi == NULL);
548 kfree(lsi);
549 s2lsi_nocast(sb) = NULL;
550
551 return 0;
552 }
553
554 /* The lsi has one reference for every server that is using the disk -
555 e.g. MDT, MGS, and potentially MGC */
556 int lustre_put_lsi(struct super_block *sb)
557 {
558 struct lustre_sb_info *lsi = s2lsi(sb);
559
560 LASSERT(lsi != NULL);
561
562 CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
563 if (atomic_dec_and_test(&lsi->lsi_mounts)) {
564 lustre_free_lsi(sb);
565 return 1;
566 }
567 return 0;
568 }
569
570 /*** SERVER NAME ***
571 * <FSNAME><SEPARATOR><TYPE><INDEX>
572 * FSNAME is between 1 and 8 characters (inclusive).
573 * Excluded characters are '/' and ':'
574 * SEPARATOR is either ':' or '-'
575 * TYPE: "OST", "MDT", etc.
576 * INDEX: Hex representation of the index
577 */
578
579 /** Get the fsname ("lustre") from the server name ("lustre-OST003F").
580 * @param [in] svname server name including type and index
581 * @param [out] fsname Buffer to copy filesystem name prefix into.
582 * Must have at least 'strlen(fsname) + 1' chars.
583 * @param [out] endptr if endptr isn't NULL it is set to end of fsname
584 * rc < 0 on error
585 */
586 int server_name2fsname(const char *svname, char *fsname, const char **endptr)
587 {
588 const char *dash;
589
590 dash = svname + strnlen(svname, 8); /* max fsname length is 8 */
591 for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
592 ;
593 if (dash == svname)
594 return -EINVAL;
595
596 if (fsname != NULL) {
597 strncpy(fsname, svname, dash - svname);
598 fsname[dash - svname] = '\0';
599 }
600
601 if (endptr != NULL)
602 *endptr = dash;
603
604 return 0;
605 }
606 EXPORT_SYMBOL(server_name2fsname);
607
608 /**
609 * Get service name (svname) from string
610 * rc < 0 on error
611 * if endptr isn't NULL it is set to end of fsname *
612 */
613 int server_name2svname(const char *label, char *svname, const char **endptr,
614 size_t svsize)
615 {
616 int rc;
617 const char *dash;
618
619 /* We use server_name2fsname() just for parsing */
620 rc = server_name2fsname(label, NULL, &dash);
621 if (rc != 0)
622 return rc;
623
624 if (endptr != NULL)
625 *endptr = dash;
626
627 if (strlcpy(svname, dash + 1, svsize) >= svsize)
628 return -E2BIG;
629
630 return 0;
631 }
632 EXPORT_SYMBOL(server_name2svname);
633
634
635 /* Get the index from the obd name.
636 rc = server type, or
637 rc < 0 on error
638 if endptr isn't NULL it is set to end of name */
639 int server_name2index(const char *svname, __u32 *idx, const char **endptr)
640 {
641 unsigned long index;
642 int rc;
643 const char *dash;
644
645 /* We use server_name2fsname() just for parsing */
646 rc = server_name2fsname(svname, NULL, &dash);
647 if (rc != 0)
648 return rc;
649
650 dash++;
651
652 if (strncmp(dash, "MDT", 3) == 0)
653 rc = LDD_F_SV_TYPE_MDT;
654 else if (strncmp(dash, "OST", 3) == 0)
655 rc = LDD_F_SV_TYPE_OST;
656 else
657 return -EINVAL;
658
659 dash += 3;
660
661 if (strncmp(dash, "all", 3) == 0) {
662 if (endptr != NULL)
663 *endptr = dash + 3;
664 return rc | LDD_F_SV_ALL;
665 }
666
667 index = simple_strtoul(dash, (char **)endptr, 16);
668 if (idx != NULL)
669 *idx = index;
670
671 /* Account for -mdc after index that is possible when specifying mdt */
672 if (endptr != NULL && strncmp(LUSTRE_MDC_NAME, *endptr + 1,
673 sizeof(LUSTRE_MDC_NAME)-1) == 0)
674 *endptr += sizeof(LUSTRE_MDC_NAME);
675
676 return rc;
677 }
678 EXPORT_SYMBOL(server_name2index);
679
680 /*************** mount common between server and client ***************/
681
682 /* Common umount */
683 int lustre_common_put_super(struct super_block *sb)
684 {
685 int rc;
686
687 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
688
689 /* Drop a ref to the MGC */
690 rc = lustre_stop_mgc(sb);
691 if (rc && (rc != -ENOENT)) {
692 if (rc != -EBUSY) {
693 CERROR("Can't stop MGC: %d\n", rc);
694 return rc;
695 }
696 /* BUSY just means that there's some other obd that
697 needs the mgc. Let him clean it up. */
698 CDEBUG(D_MOUNT, "MGC still in use\n");
699 }
700 /* Drop a ref to the mounted disk */
701 lustre_put_lsi(sb);
702 lu_types_stop();
703 return rc;
704 }
705 EXPORT_SYMBOL(lustre_common_put_super);
706
707 static void lmd_print(struct lustre_mount_data *lmd)
708 {
709 int i;
710
711 PRINT_CMD(D_MOUNT, " mount data:\n");
712 if (lmd_is_client(lmd))
713 PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile);
714 PRINT_CMD(D_MOUNT, "device: %s\n", lmd->lmd_dev);
715 PRINT_CMD(D_MOUNT, "flags: %x\n", lmd->lmd_flags);
716
717 if (lmd->lmd_opts)
718 PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts);
719
720 if (lmd->lmd_recovery_time_soft)
721 PRINT_CMD(D_MOUNT, "recovery time soft: %d\n",
722 lmd->lmd_recovery_time_soft);
723
724 if (lmd->lmd_recovery_time_hard)
725 PRINT_CMD(D_MOUNT, "recovery time hard: %d\n",
726 lmd->lmd_recovery_time_hard);
727
728 for (i = 0; i < lmd->lmd_exclude_count; i++) {
729 PRINT_CMD(D_MOUNT, "exclude %d: OST%04x\n", i,
730 lmd->lmd_exclude[i]);
731 }
732 }
733
734 /* Is this server on the exclusion list */
735 int lustre_check_exclusion(struct super_block *sb, char *svname)
736 {
737 struct lustre_sb_info *lsi = s2lsi(sb);
738 struct lustre_mount_data *lmd = lsi->lsi_lmd;
739 __u32 index;
740 int i, rc;
741
742 rc = server_name2index(svname, &index, NULL);
743 if (rc != LDD_F_SV_TYPE_OST)
744 /* Only exclude OSTs */
745 return 0;
746
747 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
748 index, lmd->lmd_exclude_count, lmd->lmd_dev);
749
750 for (i = 0; i < lmd->lmd_exclude_count; i++) {
751 if (index == lmd->lmd_exclude[i]) {
752 CWARN("Excluding %s (on exclusion list)\n", svname);
753 return 1;
754 }
755 }
756 return 0;
757 }
758
759 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
760 static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
761 {
762 const char *s1 = ptr, *s2;
763 __u32 index, *exclude_list;
764 int rc = 0, devmax;
765
766 /* The shortest an ost name can be is 8 chars: -OST0000.
767 We don't actually know the fsname at this time, so in fact
768 a user could specify any fsname. */
769 devmax = strlen(ptr) / 8 + 1;
770
771 /* temp storage until we figure out how many we have */
772 exclude_list = kcalloc(devmax, sizeof(index), GFP_NOFS);
773 if (!exclude_list)
774 return -ENOMEM;
775
776 /* we enter this fn pointing at the '=' */
777 while (*s1 && *s1 != ' ' && *s1 != ',') {
778 s1++;
779 rc = server_name2index(s1, &index, &s2);
780 if (rc < 0) {
781 CERROR("Can't parse server name '%s': rc = %d\n",
782 s1, rc);
783 break;
784 }
785 if (rc == LDD_F_SV_TYPE_OST)
786 exclude_list[lmd->lmd_exclude_count++] = index;
787 else
788 CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n",
789 (uint)(s2-s1), s1, rc);
790 s1 = s2;
791 /* now we are pointing at ':' (next exclude)
792 or ',' (end of excludes) */
793 if (lmd->lmd_exclude_count >= devmax)
794 break;
795 }
796 if (rc >= 0) /* non-err */
797 rc = 0;
798
799 if (lmd->lmd_exclude_count) {
800 /* permanent, freed in lustre_free_lsi */
801 lmd->lmd_exclude = kcalloc(lmd->lmd_exclude_count,
802 sizeof(index), GFP_NOFS);
803 if (lmd->lmd_exclude) {
804 memcpy(lmd->lmd_exclude, exclude_list,
805 sizeof(index) * lmd->lmd_exclude_count);
806 } else {
807 rc = -ENOMEM;
808 lmd->lmd_exclude_count = 0;
809 }
810 }
811 kfree(exclude_list);
812 return rc;
813 }
814
815 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
816 {
817 char *tail;
818 int length;
819
820 kfree(lmd->lmd_mgssec);
821 lmd->lmd_mgssec = NULL;
822
823 tail = strchr(ptr, ',');
824 if (tail == NULL)
825 length = strlen(ptr);
826 else
827 length = tail - ptr;
828
829 lmd->lmd_mgssec = kzalloc(length + 1, GFP_NOFS);
830 if (!lmd->lmd_mgssec)
831 return -ENOMEM;
832
833 memcpy(lmd->lmd_mgssec, ptr, length);
834 lmd->lmd_mgssec[length] = '\0';
835 return 0;
836 }
837
838 static int lmd_parse_string(char **handle, char *ptr)
839 {
840 char *tail;
841 int length;
842
843 if ((handle == NULL) || (ptr == NULL))
844 return -EINVAL;
845
846 kfree(*handle);
847 *handle = NULL;
848
849 tail = strchr(ptr, ',');
850 if (tail == NULL)
851 length = strlen(ptr);
852 else
853 length = tail - ptr;
854
855 *handle = kzalloc(length + 1, GFP_NOFS);
856 if (!*handle)
857 return -ENOMEM;
858
859 memcpy(*handle, ptr, length);
860 (*handle)[length] = '\0';
861
862 return 0;
863 }
864
865 /* Collect multiple values for mgsnid specifiers */
866 static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
867 {
868 lnet_nid_t nid;
869 char *tail = *ptr;
870 char *mgsnid;
871 int length;
872 int oldlen = 0;
873
874 /* Find end of nidlist */
875 while (class_parse_nid_quiet(tail, &nid, &tail) == 0)
876 ;
877 length = tail - *ptr;
878 if (length == 0) {
879 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
880 return -EINVAL;
881 }
882
883 if (lmd->lmd_mgs != NULL)
884 oldlen = strlen(lmd->lmd_mgs) + 1;
885
886 mgsnid = kzalloc(oldlen + length + 1, GFP_NOFS);
887 if (!mgsnid)
888 return -ENOMEM;
889
890 if (lmd->lmd_mgs != NULL) {
891 /* Multiple mgsnid= are taken to mean failover locations */
892 memcpy(mgsnid, lmd->lmd_mgs, oldlen);
893 mgsnid[oldlen - 1] = ':';
894 kfree(lmd->lmd_mgs);
895 }
896 memcpy(mgsnid + oldlen, *ptr, length);
897 mgsnid[oldlen + length] = '\0';
898 lmd->lmd_mgs = mgsnid;
899 *ptr = tail;
900
901 return 0;
902 }
903
904 /** Parse mount line options
905 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
906 * dev is passed as device=uml1:/lustre by mount.lustre
907 */
908 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
909 {
910 char *s1, *s2, *devname = NULL;
911 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
912 int rc = 0;
913
914 LASSERT(lmd);
915 if (!options) {
916 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that /sbin/mount.lustre is installed.\n");
917 return -EINVAL;
918 }
919
920 /* Options should be a string - try to detect old lmd data */
921 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
922 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of /sbin/mount.lustre. Please install version %s\n",
923 LUSTRE_VERSION_STRING);
924 return -EINVAL;
925 }
926 lmd->lmd_magic = LMD_MAGIC;
927
928 lmd->lmd_params = kzalloc(4096, GFP_NOFS);
929 if (!lmd->lmd_params)
930 return -ENOMEM;
931 lmd->lmd_params[0] = '\0';
932
933 /* Set default flags here */
934
935 s1 = options;
936 while (*s1) {
937 int clear = 0;
938 int time_min = OBD_RECOVERY_TIME_MIN;
939
940 /* Skip whitespace and extra commas */
941 while (*s1 == ' ' || *s1 == ',')
942 s1++;
943
944 /* Client options are parsed in ll_options: eg. flock,
945 user_xattr, acl */
946
947 /* Parse non-ldiskfs options here. Rather than modifying
948 ldiskfs, we just zero these out here */
949 if (strncmp(s1, "abort_recov", 11) == 0) {
950 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
951 clear++;
952 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
953 lmd->lmd_recovery_time_soft = max_t(int,
954 simple_strtoul(s1 + 19, NULL, 10), time_min);
955 clear++;
956 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
957 lmd->lmd_recovery_time_hard = max_t(int,
958 simple_strtoul(s1 + 19, NULL, 10), time_min);
959 clear++;
960 } else if (strncmp(s1, "noir", 4) == 0) {
961 lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
962 clear++;
963 } else if (strncmp(s1, "nosvc", 5) == 0) {
964 lmd->lmd_flags |= LMD_FLG_NOSVC;
965 clear++;
966 } else if (strncmp(s1, "nomgs", 5) == 0) {
967 lmd->lmd_flags |= LMD_FLG_NOMGS;
968 clear++;
969 } else if (strncmp(s1, "noscrub", 7) == 0) {
970 lmd->lmd_flags |= LMD_FLG_NOSCRUB;
971 clear++;
972 } else if (strncmp(s1, PARAM_MGSNODE,
973 sizeof(PARAM_MGSNODE) - 1) == 0) {
974 s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
975 /* Assume the next mount opt is the first
976 invalid nid we get to. */
977 rc = lmd_parse_mgs(lmd, &s2);
978 if (rc)
979 goto invalid;
980 clear++;
981 } else if (strncmp(s1, "writeconf", 9) == 0) {
982 lmd->lmd_flags |= LMD_FLG_WRITECONF;
983 clear++;
984 } else if (strncmp(s1, "update", 6) == 0) {
985 lmd->lmd_flags |= LMD_FLG_UPDATE;
986 clear++;
987 } else if (strncmp(s1, "virgin", 6) == 0) {
988 lmd->lmd_flags |= LMD_FLG_VIRGIN;
989 clear++;
990 } else if (strncmp(s1, "noprimnode", 10) == 0) {
991 lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE;
992 clear++;
993 } else if (strncmp(s1, "mgssec=", 7) == 0) {
994 rc = lmd_parse_mgssec(lmd, s1 + 7);
995 if (rc)
996 goto invalid;
997 clear++;
998 /* ost exclusion list */
999 } else if (strncmp(s1, "exclude=", 8) == 0) {
1000 rc = lmd_make_exclusion(lmd, s1 + 7);
1001 if (rc)
1002 goto invalid;
1003 clear++;
1004 } else if (strncmp(s1, "mgs", 3) == 0) {
1005 /* We are an MGS */
1006 lmd->lmd_flags |= LMD_FLG_MGS;
1007 clear++;
1008 } else if (strncmp(s1, "svname=", 7) == 0) {
1009 rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
1010 if (rc)
1011 goto invalid;
1012 clear++;
1013 } else if (strncmp(s1, "param=", 6) == 0) {
1014 int length;
1015 char *tail = strchr(s1 + 6, ',');
1016 if (tail == NULL)
1017 length = strlen(s1);
1018 else
1019 length = tail - s1;
1020 length -= 6;
1021 strncat(lmd->lmd_params, s1 + 6, length);
1022 strcat(lmd->lmd_params, " ");
1023 clear++;
1024 } else if (strncmp(s1, "osd=", 4) == 0) {
1025 rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
1026 if (rc)
1027 goto invalid;
1028 clear++;
1029 }
1030 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1031 end of the options. */
1032 else if (strncmp(s1, "device=", 7) == 0) {
1033 devname = s1 + 7;
1034 /* terminate options right before device. device
1035 must be the last one. */
1036 *s1 = '\0';
1037 break;
1038 }
1039
1040 /* Find next opt */
1041 s2 = strchr(s1, ',');
1042 if (s2 == NULL) {
1043 if (clear)
1044 *s1 = '\0';
1045 break;
1046 }
1047 s2++;
1048 if (clear)
1049 memmove(s1, s2, strlen(s2) + 1);
1050 else
1051 s1 = s2;
1052 }
1053
1054 if (!devname) {
1055 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name (need mount option 'device=...')\n");
1056 goto invalid;
1057 }
1058
1059 s1 = strstr(devname, ":/");
1060 if (s1) {
1061 ++s1;
1062 lmd->lmd_flags |= LMD_FLG_CLIENT;
1063 /* Remove leading /s from fsname */
1064 while (*++s1 == '/')
1065 ;
1066 /* Freed in lustre_free_lsi */
1067 lmd->lmd_profile = kasprintf(GFP_NOFS, "%s-client", s1);
1068 if (!lmd->lmd_profile)
1069 return -ENOMEM;
1070 }
1071
1072 /* Freed in lustre_free_lsi */
1073 lmd->lmd_dev = kzalloc(strlen(devname) + 1, GFP_NOFS);
1074 if (!lmd->lmd_dev)
1075 return -ENOMEM;
1076 strcpy(lmd->lmd_dev, devname);
1077
1078 /* Save mount options */
1079 s1 = options + strlen(options) - 1;
1080 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
1081 *s1-- = 0;
1082 if (*options != 0) {
1083 /* Freed in lustre_free_lsi */
1084 lmd->lmd_opts = kzalloc(strlen(options) + 1, GFP_NOFS);
1085 if (!lmd->lmd_opts)
1086 return -ENOMEM;
1087 strcpy(lmd->lmd_opts, options);
1088 }
1089
1090 lmd_print(lmd);
1091 lmd->lmd_magic = LMD_MAGIC;
1092
1093 return rc;
1094
1095 invalid:
1096 CERROR("Bad mount options %s\n", options);
1097 return -EINVAL;
1098 }
1099
1100 struct lustre_mount_data2 {
1101 void *lmd2_data;
1102 struct vfsmount *lmd2_mnt;
1103 };
1104
1105 /** This is the entry point for the mount call into Lustre.
1106 * This is called when a server or client is mounted,
1107 * and this is where we start setting things up.
1108 * @param data Mount options (e.g. -o flock,abort_recov)
1109 */
1110 int lustre_fill_super(struct super_block *sb, void *data, int silent)
1111 {
1112 struct lustre_mount_data *lmd;
1113 struct lustre_mount_data2 *lmd2 = data;
1114 struct lustre_sb_info *lsi;
1115 int rc;
1116
1117 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
1118
1119 lsi = lustre_init_lsi(sb);
1120 if (!lsi)
1121 return -ENOMEM;
1122 lmd = lsi->lsi_lmd;
1123
1124 /*
1125 * Disable lockdep during mount, because mount locking patterns are
1126 * `special'.
1127 */
1128 lockdep_off();
1129
1130 /*
1131 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
1132 */
1133 obd_zombie_barrier();
1134
1135 /* Figure out the lmd from the mount options */
1136 if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
1137 lustre_put_lsi(sb);
1138 rc = -EINVAL;
1139 goto out;
1140 }
1141
1142 if (lmd_is_client(lmd)) {
1143 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
1144 if (client_fill_super == NULL)
1145 request_module("lustre");
1146 if (client_fill_super == NULL) {
1147 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for client mount! Is the 'lustre' module loaded?\n");
1148 lustre_put_lsi(sb);
1149 rc = -ENODEV;
1150 } else {
1151 rc = lustre_start_mgc(sb);
1152 if (rc) {
1153 lustre_put_lsi(sb);
1154 goto out;
1155 }
1156 /* Connect and start */
1157 /* (should always be ll_fill_super) */
1158 rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
1159 /* c_f_s will call lustre_common_put_super on failure */
1160 }
1161 } else {
1162 CERROR("This is client-side-only module, cannot handle server mount.\n");
1163 rc = -EINVAL;
1164 }
1165
1166 /* If error happens in fill_super() call, @lsi will be killed there.
1167 * This is why we do not put it here. */
1168 goto out;
1169 out:
1170 if (rc) {
1171 CERROR("Unable to mount %s (%d)\n",
1172 s2lsi(sb) ? lmd->lmd_dev : "", rc);
1173 } else {
1174 CDEBUG(D_SUPER, "Mount %s complete\n",
1175 lmd->lmd_dev);
1176 }
1177 lockdep_on();
1178 return rc;
1179 }
1180
1181
1182 /* We can't call ll_fill_super by name because it lives in a module that
1183 must be loaded after this one. */
1184 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
1185 struct vfsmount *mnt))
1186 {
1187 client_fill_super = cfs;
1188 }
1189 EXPORT_SYMBOL(lustre_register_client_fill_super);
1190
1191 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
1192 {
1193 kill_super_cb = cfs;
1194 }
1195 EXPORT_SYMBOL(lustre_register_kill_super_cb);
1196
1197 /***************** FS registration ******************/
1198 struct dentry *lustre_mount(struct file_system_type *fs_type, int flags,
1199 const char *devname, void *data)
1200 {
1201 struct lustre_mount_data2 lmd2 = {
1202 .lmd2_data = data,
1203 .lmd2_mnt = NULL
1204 };
1205
1206 return mount_nodev(fs_type, flags, &lmd2, lustre_fill_super);
1207 }
1208
1209 static void lustre_kill_super(struct super_block *sb)
1210 {
1211 struct lustre_sb_info *lsi = s2lsi(sb);
1212
1213 if (kill_super_cb && lsi)
1214 (*kill_super_cb)(sb);
1215
1216 kill_anon_super(sb);
1217 }
1218
1219 /** Register the "lustre" fs type
1220 */
1221 struct file_system_type lustre_fs_type = {
1222 .owner = THIS_MODULE,
1223 .name = "lustre",
1224 .mount = lustre_mount,
1225 .kill_sb = lustre_kill_super,
1226 .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
1227 FS_RENAME_DOES_D_MOVE,
1228 };
1229 MODULE_ALIAS_FS("lustre");
1230
1231 int lustre_register_fs(void)
1232 {
1233 return register_filesystem(&lustre_fs_type);
1234 }
1235
1236 int lustre_unregister_fs(void)
1237 {
1238 return unregister_filesystem(&lustre_fs_type);
1239 }
This page took 0.166247 seconds and 4 git commands to generate.