staging/lustre: remove IS_MDS|IS_OST|IS_MGS defines and users
[deliverable/linux.git] / drivers / staging / lustre / lustre / obdclass / obd_mount.c
1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2012, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/obdclass/obd_mount.c
37 *
38 * Client mount routines
39 *
40 * Author: Nathan Rutman <nathan@clusterfs.com>
41 */
42
43
44 #define DEBUG_SUBSYSTEM S_CLASS
45 #define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
46 #define PRINT_CMD CDEBUG
47
48 #include "../include/obd.h"
49 #include "../include/linux/lustre_compat25.h"
50 #include "../include/obd_class.h"
51 #include "../include/lustre/lustre_user.h"
52 #include "../include/lustre_log.h"
53 #include "../include/lustre_disk.h"
54 #include "../include/lustre_param.h"
55
56 static int (*client_fill_super)(struct super_block *sb,
57 struct vfsmount *mnt);
58
59 static void (*kill_super_cb)(struct super_block *sb);
60
61 /**************** config llog ********************/
62
63 /** Get a config log from the MGS and process it.
64 * This func is called for both clients and servers.
65 * Continue to process new statements appended to the logs
66 * (whenever the config lock is revoked) until lustre_end_log
67 * is called.
68 * @param sb The superblock is used by the MGC to write to the local copy of
69 * the config log
70 * @param logname The name of the llog to replicate from the MGS
71 * @param cfg Since the same mgc may be used to follow multiple config logs
72 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
73 * this log, and is added to the mgc's list of logs to follow.
74 */
75 int lustre_process_log(struct super_block *sb, char *logname,
76 struct config_llog_instance *cfg)
77 {
78 struct lustre_cfg *lcfg;
79 struct lustre_cfg_bufs *bufs;
80 struct lustre_sb_info *lsi = s2lsi(sb);
81 struct obd_device *mgc = lsi->lsi_mgc;
82 int rc;
83
84 LASSERT(mgc);
85 LASSERT(cfg);
86
87 bufs = kzalloc(sizeof(*bufs), GFP_NOFS);
88 if (!bufs)
89 return -ENOMEM;
90
91 /* mgc_process_config */
92 lustre_cfg_bufs_reset(bufs, mgc->obd_name);
93 lustre_cfg_bufs_set_string(bufs, 1, logname);
94 lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
95 lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
96 lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
97 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
98 lustre_cfg_free(lcfg);
99
100 kfree(bufs);
101
102 if (rc == -EINVAL)
103 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d). Make sure this client and the MGS are running compatible versions of Lustre.\n",
104 mgc->obd_name, logname, rc);
105
106 if (rc)
107 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n",
108 mgc->obd_name, logname,
109 rc);
110
111 /* class_obd_list(); */
112 return rc;
113 }
114 EXPORT_SYMBOL(lustre_process_log);
115
116 /* Stop watching this config log for updates */
117 int lustre_end_log(struct super_block *sb, char *logname,
118 struct config_llog_instance *cfg)
119 {
120 struct lustre_cfg *lcfg;
121 struct lustre_cfg_bufs bufs;
122 struct lustre_sb_info *lsi = s2lsi(sb);
123 struct obd_device *mgc = lsi->lsi_mgc;
124 int rc;
125
126 if (!mgc)
127 return -ENOENT;
128
129 /* mgc_process_config */
130 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
131 lustre_cfg_bufs_set_string(&bufs, 1, logname);
132 if (cfg)
133 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
134 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
135 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
136 lustre_cfg_free(lcfg);
137 return rc;
138 }
139 EXPORT_SYMBOL(lustre_end_log);
140
141 /**************** obd start *******************/
142
143 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
144 * lctl (and do for echo cli/srv.
145 */
146 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
147 char *s1, char *s2, char *s3, char *s4)
148 {
149 struct lustre_cfg_bufs bufs;
150 struct lustre_cfg *lcfg = NULL;
151 int rc;
152
153 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
154 cmd, s1, s2, s3, s4);
155
156 lustre_cfg_bufs_reset(&bufs, cfgname);
157 if (s1)
158 lustre_cfg_bufs_set_string(&bufs, 1, s1);
159 if (s2)
160 lustre_cfg_bufs_set_string(&bufs, 2, s2);
161 if (s3)
162 lustre_cfg_bufs_set_string(&bufs, 3, s3);
163 if (s4)
164 lustre_cfg_bufs_set_string(&bufs, 4, s4);
165
166 lcfg = lustre_cfg_new(cmd, &bufs);
167 lcfg->lcfg_nid = nid;
168 rc = class_process_config(lcfg);
169 lustre_cfg_free(lcfg);
170 return rc;
171 }
172 EXPORT_SYMBOL(do_lcfg);
173
174 /** Call class_attach and class_setup. These methods in turn call
175 * obd type-specific methods.
176 */
177 int lustre_start_simple(char *obdname, char *type, char *uuid,
178 char *s1, char *s2, char *s3, char *s4)
179 {
180 int rc;
181 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
182
183 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, NULL, NULL);
184 if (rc) {
185 CERROR("%s attach error %d\n", obdname, rc);
186 return rc;
187 }
188 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
189 if (rc) {
190 CERROR("%s setup error %d\n", obdname, rc);
191 do_lcfg(obdname, 0, LCFG_DETACH, NULL, NULL, NULL, NULL);
192 }
193 return rc;
194 }
195
196 DEFINE_MUTEX(mgc_start_lock);
197
198 /** Set up a mgc obd to process startup logs
199 *
200 * \param sb [in] super block of the mgc obd
201 *
202 * \retval 0 success, otherwise error code
203 */
204 int lustre_start_mgc(struct super_block *sb)
205 {
206 struct obd_connect_data *data = NULL;
207 struct lustre_sb_info *lsi = s2lsi(sb);
208 struct obd_device *obd;
209 struct obd_export *exp;
210 struct obd_uuid *uuid;
211 class_uuid_t uuidc;
212 lnet_nid_t nid;
213 char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
214 char *ptr;
215 int rc = 0, i = 0, j, len;
216
217 LASSERT(lsi->lsi_lmd);
218
219 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
220 ptr = lsi->lsi_lmd->lmd_dev;
221 if (class_parse_nid(ptr, &nid, &ptr) == 0)
222 i++;
223 if (i == 0) {
224 CERROR("No valid MGS nids found.\n");
225 return -EINVAL;
226 }
227
228 mutex_lock(&mgc_start_lock);
229
230 len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
231 mgcname = kasprintf(GFP_NOFS,
232 "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
233 niduuid = kasprintf(GFP_NOFS, "%s_%x", mgcname, i);
234 if (!mgcname || !niduuid) {
235 rc = -ENOMEM;
236 goto out_free;
237 }
238
239 mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
240
241 data = kzalloc(sizeof(*data), GFP_NOFS);
242 if (!data) {
243 rc = -ENOMEM;
244 goto out_free;
245 }
246
247 obd = class_name2obd(mgcname);
248 if (obd && !obd->obd_stopping) {
249 int recov_bk;
250
251 rc = obd_set_info_async(NULL, obd->obd_self_export,
252 strlen(KEY_MGSSEC), KEY_MGSSEC,
253 strlen(mgssec), mgssec, NULL);
254 if (rc)
255 goto out_free;
256
257 /* Re-using an existing MGC */
258 atomic_inc(&obd->u.cli.cl_mgc_refcount);
259
260 /* IR compatibility check, only for clients */
261 if (lmd_is_client(lsi->lsi_lmd)) {
262 int has_ir;
263 int vallen = sizeof(*data);
264 __u32 *flags = &lsi->lsi_lmd->lmd_flags;
265
266 rc = obd_get_info(NULL, obd->obd_self_export,
267 strlen(KEY_CONN_DATA), KEY_CONN_DATA,
268 &vallen, data, NULL);
269 LASSERT(rc == 0);
270 has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
271 if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
272 /* LMD_FLG_NOIR is for test purpose only */
273 LCONSOLE_WARN(
274 "Trying to mount a client with IR setting not compatible with current mgc. Force to use current mgc setting that is IR %s.\n",
275 has_ir ? "enabled" : "disabled");
276 if (has_ir)
277 *flags &= ~LMD_FLG_NOIR;
278 else
279 *flags |= LMD_FLG_NOIR;
280 }
281 }
282
283 recov_bk = 0;
284
285 /* Try all connections, but only once (again).
286 We don't want to block another target from starting
287 (using its local copy of the log), but we do want to connect
288 if at all possible. */
289 recov_bk++;
290 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,
291 recov_bk);
292 rc = obd_set_info_async(NULL, obd->obd_self_export,
293 sizeof(KEY_INIT_RECOV_BACKUP),
294 KEY_INIT_RECOV_BACKUP,
295 sizeof(recov_bk), &recov_bk, NULL);
296 rc = 0;
297 goto out;
298 }
299
300 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
301
302 /* Add the primary nids for the MGS */
303 i = 0;
304 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
305 ptr = lsi->lsi_lmd->lmd_dev;
306 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
307 rc = do_lcfg(mgcname, nid,
308 LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
309 i++;
310 /* Stop at the first failover nid */
311 if (*ptr == ':')
312 break;
313 }
314 if (i == 0) {
315 CERROR("No valid MGS nids found.\n");
316 rc = -EINVAL;
317 goto out_free;
318 }
319 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
320
321 /* Random uuid for MGC allows easier reconnects */
322 uuid = kzalloc(sizeof(*uuid), GFP_NOFS);
323 if (!uuid) {
324 rc = -ENOMEM;
325 goto out_free;
326 }
327
328 ll_generate_random_uuid(uuidc);
329 class_uuid_unparse(uuidc, uuid);
330
331 /* Start the MGC */
332 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
333 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
334 niduuid, NULL, NULL);
335 kfree(uuid);
336 if (rc)
337 goto out_free;
338
339 /* Add any failover MGS nids */
340 i = 1;
341 while (ptr && ((*ptr == ':' ||
342 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
343 /* New failover node */
344 sprintf(niduuid, "%s_%x", mgcname, i);
345 j = 0;
346 while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
347 j++;
348 rc = do_lcfg(mgcname, nid,
349 LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
350 if (*ptr == ':')
351 break;
352 }
353 if (j > 0) {
354 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
355 niduuid, NULL, NULL, NULL);
356 i++;
357 } else {
358 /* at ":/fsname" */
359 break;
360 }
361 }
362 lsi->lsi_lmd->lmd_mgs_failnodes = i;
363
364 obd = class_name2obd(mgcname);
365 if (!obd) {
366 CERROR("Can't find mgcobd %s\n", mgcname);
367 rc = -ENOTCONN;
368 goto out_free;
369 }
370
371 rc = obd_set_info_async(NULL, obd->obd_self_export,
372 strlen(KEY_MGSSEC), KEY_MGSSEC,
373 strlen(mgssec), mgssec, NULL);
374 if (rc)
375 goto out_free;
376
377 /* Keep a refcount of servers/clients who started with "mount",
378 so we know when we can get rid of the mgc. */
379 atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
380
381 /* We connect to the MGS at setup, and don't disconnect until cleanup */
382 data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
383 OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
384 OBD_CONNECT_LVB_TYPE;
385
386 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
387 data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
388 #else
389 #warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
390 #endif
391
392 if (lmd_is_client(lsi->lsi_lmd) &&
393 lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
394 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
395 data->ocd_version = LUSTRE_VERSION_CODE;
396 rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
397 if (rc) {
398 CERROR("connect failed %d\n", rc);
399 goto out;
400 }
401
402 obd->u.cli.cl_mgc_mgsexp = exp;
403
404 out:
405 /* Keep the mgc info in the sb. Note that many lsi's can point
406 to the same mgc.*/
407 lsi->lsi_mgc = obd;
408 out_free:
409 mutex_unlock(&mgc_start_lock);
410
411 kfree(data);
412 kfree(mgcname);
413 kfree(niduuid);
414 return rc;
415 }
416
417 static int lustre_stop_mgc(struct super_block *sb)
418 {
419 struct lustre_sb_info *lsi = s2lsi(sb);
420 struct obd_device *obd;
421 char *niduuid = NULL, *ptr = NULL;
422 int i, rc = 0, len = 0;
423
424 if (!lsi)
425 return -ENOENT;
426 obd = lsi->lsi_mgc;
427 if (!obd)
428 return -ENOENT;
429 lsi->lsi_mgc = NULL;
430
431 mutex_lock(&mgc_start_lock);
432 LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
433 if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
434 /* This is not fatal, every client that stops
435 will call in here. */
436 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
437 atomic_read(&obd->u.cli.cl_mgc_refcount));
438 rc = -EBUSY;
439 goto out;
440 }
441
442 /* The MGC has no recoverable data in any case.
443 * force shutdown set in umount_begin */
444 obd->obd_no_recov = 1;
445
446 if (obd->u.cli.cl_mgc_mgsexp) {
447 /* An error is not fatal, if we are unable to send the
448 disconnect mgs ping evictor cleans up the export */
449 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
450 if (rc)
451 CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
452 }
453
454 /* Save the obdname for cleaning the nid uuids, which are
455 obdname_XX */
456 len = strlen(obd->obd_name) + 6;
457 niduuid = kzalloc(len, GFP_NOFS);
458 if (niduuid) {
459 strcpy(niduuid, obd->obd_name);
460 ptr = niduuid + strlen(niduuid);
461 }
462
463 rc = class_manual_cleanup(obd);
464 if (rc)
465 goto out;
466
467 /* Clean the nid uuids */
468 if (!niduuid) {
469 rc = -ENOMEM;
470 goto out;
471 }
472
473 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
474 sprintf(ptr, "_%x", i);
475 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
476 niduuid, NULL, NULL, NULL);
477 if (rc)
478 CERROR("del MDC UUID %s failed: rc = %d\n",
479 niduuid, rc);
480 }
481 out:
482 kfree(niduuid);
483
484 /* class_import_put will get rid of the additional connections */
485 mutex_unlock(&mgc_start_lock);
486 return rc;
487 }
488
489 /***************** lustre superblock **************/
490
491 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
492 {
493 struct lustre_sb_info *lsi;
494
495 lsi = kzalloc(sizeof(*lsi), GFP_NOFS);
496 if (!lsi)
497 return NULL;
498 lsi->lsi_lmd = kzalloc(sizeof(*lsi->lsi_lmd), GFP_NOFS);
499 if (!lsi->lsi_lmd) {
500 kfree(lsi);
501 return NULL;
502 }
503
504 lsi->lsi_lmd->lmd_exclude_count = 0;
505 lsi->lsi_lmd->lmd_recovery_time_soft = 0;
506 lsi->lsi_lmd->lmd_recovery_time_hard = 0;
507 s2lsi_nocast(sb) = lsi;
508 /* we take 1 extra ref for our setup */
509 atomic_set(&lsi->lsi_mounts, 1);
510
511 /* Default umount style */
512 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
513
514 return lsi;
515 }
516
517 static int lustre_free_lsi(struct super_block *sb)
518 {
519 struct lustre_sb_info *lsi = s2lsi(sb);
520
521 LASSERT(lsi != NULL);
522 CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
523
524 /* someone didn't call server_put_mount. */
525 LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
526
527 if (lsi->lsi_lmd != NULL) {
528 kfree(lsi->lsi_lmd->lmd_dev);
529 kfree(lsi->lsi_lmd->lmd_profile);
530 kfree(lsi->lsi_lmd->lmd_mgssec);
531 kfree(lsi->lsi_lmd->lmd_opts);
532 if (lsi->lsi_lmd->lmd_exclude_count)
533 kfree(lsi->lsi_lmd->lmd_exclude);
534 kfree(lsi->lsi_lmd->lmd_mgs);
535 kfree(lsi->lsi_lmd->lmd_osd_type);
536 kfree(lsi->lsi_lmd->lmd_params);
537
538 kfree(lsi->lsi_lmd);
539 }
540
541 LASSERT(lsi->lsi_llsbi == NULL);
542 kfree(lsi);
543 s2lsi_nocast(sb) = NULL;
544
545 return 0;
546 }
547
548 /* The lsi has one reference for every server that is using the disk -
549 e.g. MDT, MGS, and potentially MGC */
550 int lustre_put_lsi(struct super_block *sb)
551 {
552 struct lustre_sb_info *lsi = s2lsi(sb);
553
554 LASSERT(lsi != NULL);
555
556 CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
557 if (atomic_dec_and_test(&lsi->lsi_mounts)) {
558 lustre_free_lsi(sb);
559 return 1;
560 }
561 return 0;
562 }
563
564 /*** SERVER NAME ***
565 * <FSNAME><SEPARATOR><TYPE><INDEX>
566 * FSNAME is between 1 and 8 characters (inclusive).
567 * Excluded characters are '/' and ':'
568 * SEPARATOR is either ':' or '-'
569 * TYPE: "OST", "MDT", etc.
570 * INDEX: Hex representation of the index
571 */
572
573 /** Get the fsname ("lustre") from the server name ("lustre-OST003F").
574 * @param [in] svname server name including type and index
575 * @param [out] fsname Buffer to copy filesystem name prefix into.
576 * Must have at least 'strlen(fsname) + 1' chars.
577 * @param [out] endptr if endptr isn't NULL it is set to end of fsname
578 * rc < 0 on error
579 */
580 int server_name2fsname(const char *svname, char *fsname, const char **endptr)
581 {
582 const char *dash;
583
584 dash = svname + strnlen(svname, 8); /* max fsname length is 8 */
585 for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
586 ;
587 if (dash == svname)
588 return -EINVAL;
589
590 if (fsname != NULL) {
591 strncpy(fsname, svname, dash - svname);
592 fsname[dash - svname] = '\0';
593 }
594
595 if (endptr != NULL)
596 *endptr = dash;
597
598 return 0;
599 }
600 EXPORT_SYMBOL(server_name2fsname);
601
602 /**
603 * Get service name (svname) from string
604 * rc < 0 on error
605 * if endptr isn't NULL it is set to end of fsname *
606 */
607 int server_name2svname(const char *label, char *svname, const char **endptr,
608 size_t svsize)
609 {
610 int rc;
611 const char *dash;
612
613 /* We use server_name2fsname() just for parsing */
614 rc = server_name2fsname(label, NULL, &dash);
615 if (rc != 0)
616 return rc;
617
618 if (endptr != NULL)
619 *endptr = dash;
620
621 if (strlcpy(svname, dash + 1, svsize) >= svsize)
622 return -E2BIG;
623
624 return 0;
625 }
626 EXPORT_SYMBOL(server_name2svname);
627
628
629 /* Get the index from the obd name.
630 rc = server type, or
631 rc < 0 on error
632 if endptr isn't NULL it is set to end of name */
633 int server_name2index(const char *svname, __u32 *idx, const char **endptr)
634 {
635 unsigned long index;
636 int rc;
637 const char *dash;
638
639 /* We use server_name2fsname() just for parsing */
640 rc = server_name2fsname(svname, NULL, &dash);
641 if (rc != 0)
642 return rc;
643
644 dash++;
645
646 if (strncmp(dash, "MDT", 3) == 0)
647 rc = LDD_F_SV_TYPE_MDT;
648 else if (strncmp(dash, "OST", 3) == 0)
649 rc = LDD_F_SV_TYPE_OST;
650 else
651 return -EINVAL;
652
653 dash += 3;
654
655 if (strncmp(dash, "all", 3) == 0) {
656 if (endptr != NULL)
657 *endptr = dash + 3;
658 return rc | LDD_F_SV_ALL;
659 }
660
661 index = simple_strtoul(dash, (char **)endptr, 16);
662 if (idx != NULL)
663 *idx = index;
664
665 /* Account for -mdc after index that is possible when specifying mdt */
666 if (endptr != NULL && strncmp(LUSTRE_MDC_NAME, *endptr + 1,
667 sizeof(LUSTRE_MDC_NAME)-1) == 0)
668 *endptr += sizeof(LUSTRE_MDC_NAME);
669
670 return rc;
671 }
672 EXPORT_SYMBOL(server_name2index);
673
674 /*************** mount common between server and client ***************/
675
676 /* Common umount */
677 int lustre_common_put_super(struct super_block *sb)
678 {
679 int rc;
680
681 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
682
683 /* Drop a ref to the MGC */
684 rc = lustre_stop_mgc(sb);
685 if (rc && (rc != -ENOENT)) {
686 if (rc != -EBUSY) {
687 CERROR("Can't stop MGC: %d\n", rc);
688 return rc;
689 }
690 /* BUSY just means that there's some other obd that
691 needs the mgc. Let him clean it up. */
692 CDEBUG(D_MOUNT, "MGC still in use\n");
693 }
694 /* Drop a ref to the mounted disk */
695 lustre_put_lsi(sb);
696 lu_types_stop();
697 return rc;
698 }
699 EXPORT_SYMBOL(lustre_common_put_super);
700
701 static void lmd_print(struct lustre_mount_data *lmd)
702 {
703 int i;
704
705 PRINT_CMD(D_MOUNT, " mount data:\n");
706 if (lmd_is_client(lmd))
707 PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile);
708 PRINT_CMD(D_MOUNT, "device: %s\n", lmd->lmd_dev);
709 PRINT_CMD(D_MOUNT, "flags: %x\n", lmd->lmd_flags);
710
711 if (lmd->lmd_opts)
712 PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts);
713
714 if (lmd->lmd_recovery_time_soft)
715 PRINT_CMD(D_MOUNT, "recovery time soft: %d\n",
716 lmd->lmd_recovery_time_soft);
717
718 if (lmd->lmd_recovery_time_hard)
719 PRINT_CMD(D_MOUNT, "recovery time hard: %d\n",
720 lmd->lmd_recovery_time_hard);
721
722 for (i = 0; i < lmd->lmd_exclude_count; i++) {
723 PRINT_CMD(D_MOUNT, "exclude %d: OST%04x\n", i,
724 lmd->lmd_exclude[i]);
725 }
726 }
727
728 /* Is this server on the exclusion list */
729 int lustre_check_exclusion(struct super_block *sb, char *svname)
730 {
731 struct lustre_sb_info *lsi = s2lsi(sb);
732 struct lustre_mount_data *lmd = lsi->lsi_lmd;
733 __u32 index;
734 int i, rc;
735
736 rc = server_name2index(svname, &index, NULL);
737 if (rc != LDD_F_SV_TYPE_OST)
738 /* Only exclude OSTs */
739 return 0;
740
741 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
742 index, lmd->lmd_exclude_count, lmd->lmd_dev);
743
744 for (i = 0; i < lmd->lmd_exclude_count; i++) {
745 if (index == lmd->lmd_exclude[i]) {
746 CWARN("Excluding %s (on exclusion list)\n", svname);
747 return 1;
748 }
749 }
750 return 0;
751 }
752
753 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
754 static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
755 {
756 const char *s1 = ptr, *s2;
757 __u32 index, *exclude_list;
758 int rc = 0, devmax;
759
760 /* The shortest an ost name can be is 8 chars: -OST0000.
761 We don't actually know the fsname at this time, so in fact
762 a user could specify any fsname. */
763 devmax = strlen(ptr) / 8 + 1;
764
765 /* temp storage until we figure out how many we have */
766 exclude_list = kcalloc(devmax, sizeof(index), GFP_NOFS);
767 if (!exclude_list)
768 return -ENOMEM;
769
770 /* we enter this fn pointing at the '=' */
771 while (*s1 && *s1 != ' ' && *s1 != ',') {
772 s1++;
773 rc = server_name2index(s1, &index, &s2);
774 if (rc < 0) {
775 CERROR("Can't parse server name '%s': rc = %d\n",
776 s1, rc);
777 break;
778 }
779 if (rc == LDD_F_SV_TYPE_OST)
780 exclude_list[lmd->lmd_exclude_count++] = index;
781 else
782 CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n",
783 (uint)(s2-s1), s1, rc);
784 s1 = s2;
785 /* now we are pointing at ':' (next exclude)
786 or ',' (end of excludes) */
787 if (lmd->lmd_exclude_count >= devmax)
788 break;
789 }
790 if (rc >= 0) /* non-err */
791 rc = 0;
792
793 if (lmd->lmd_exclude_count) {
794 /* permanent, freed in lustre_free_lsi */
795 lmd->lmd_exclude = kcalloc(lmd->lmd_exclude_count,
796 sizeof(index), GFP_NOFS);
797 if (lmd->lmd_exclude) {
798 memcpy(lmd->lmd_exclude, exclude_list,
799 sizeof(index) * lmd->lmd_exclude_count);
800 } else {
801 rc = -ENOMEM;
802 lmd->lmd_exclude_count = 0;
803 }
804 }
805 kfree(exclude_list);
806 return rc;
807 }
808
809 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
810 {
811 char *tail;
812 int length;
813
814 kfree(lmd->lmd_mgssec);
815 lmd->lmd_mgssec = NULL;
816
817 tail = strchr(ptr, ',');
818 if (tail == NULL)
819 length = strlen(ptr);
820 else
821 length = tail - ptr;
822
823 lmd->lmd_mgssec = kzalloc(length + 1, GFP_NOFS);
824 if (!lmd->lmd_mgssec)
825 return -ENOMEM;
826
827 memcpy(lmd->lmd_mgssec, ptr, length);
828 lmd->lmd_mgssec[length] = '\0';
829 return 0;
830 }
831
832 static int lmd_parse_string(char **handle, char *ptr)
833 {
834 char *tail;
835 int length;
836
837 if ((handle == NULL) || (ptr == NULL))
838 return -EINVAL;
839
840 kfree(*handle);
841 *handle = NULL;
842
843 tail = strchr(ptr, ',');
844 if (tail == NULL)
845 length = strlen(ptr);
846 else
847 length = tail - ptr;
848
849 *handle = kzalloc(length + 1, GFP_NOFS);
850 if (!*handle)
851 return -ENOMEM;
852
853 memcpy(*handle, ptr, length);
854 (*handle)[length] = '\0';
855
856 return 0;
857 }
858
859 /* Collect multiple values for mgsnid specifiers */
860 static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
861 {
862 lnet_nid_t nid;
863 char *tail = *ptr;
864 char *mgsnid;
865 int length;
866 int oldlen = 0;
867
868 /* Find end of nidlist */
869 while (class_parse_nid_quiet(tail, &nid, &tail) == 0)
870 ;
871 length = tail - *ptr;
872 if (length == 0) {
873 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
874 return -EINVAL;
875 }
876
877 if (lmd->lmd_mgs != NULL)
878 oldlen = strlen(lmd->lmd_mgs) + 1;
879
880 mgsnid = kzalloc(oldlen + length + 1, GFP_NOFS);
881 if (!mgsnid)
882 return -ENOMEM;
883
884 if (lmd->lmd_mgs != NULL) {
885 /* Multiple mgsnid= are taken to mean failover locations */
886 memcpy(mgsnid, lmd->lmd_mgs, oldlen);
887 mgsnid[oldlen - 1] = ':';
888 kfree(lmd->lmd_mgs);
889 }
890 memcpy(mgsnid + oldlen, *ptr, length);
891 mgsnid[oldlen + length] = '\0';
892 lmd->lmd_mgs = mgsnid;
893 *ptr = tail;
894
895 return 0;
896 }
897
898 /** Parse mount line options
899 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
900 * dev is passed as device=uml1:/lustre by mount.lustre
901 */
902 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
903 {
904 char *s1, *s2, *devname = NULL;
905 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
906 int rc = 0;
907
908 LASSERT(lmd);
909 if (!options) {
910 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that /sbin/mount.lustre is installed.\n");
911 return -EINVAL;
912 }
913
914 /* Options should be a string - try to detect old lmd data */
915 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
916 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of /sbin/mount.lustre. Please install version %s\n",
917 LUSTRE_VERSION_STRING);
918 return -EINVAL;
919 }
920 lmd->lmd_magic = LMD_MAGIC;
921
922 lmd->lmd_params = kzalloc(4096, GFP_NOFS);
923 if (!lmd->lmd_params)
924 return -ENOMEM;
925 lmd->lmd_params[0] = '\0';
926
927 /* Set default flags here */
928
929 s1 = options;
930 while (*s1) {
931 int clear = 0;
932 int time_min = OBD_RECOVERY_TIME_MIN;
933
934 /* Skip whitespace and extra commas */
935 while (*s1 == ' ' || *s1 == ',')
936 s1++;
937
938 /* Client options are parsed in ll_options: eg. flock,
939 user_xattr, acl */
940
941 /* Parse non-ldiskfs options here. Rather than modifying
942 ldiskfs, we just zero these out here */
943 if (strncmp(s1, "abort_recov", 11) == 0) {
944 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
945 clear++;
946 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
947 lmd->lmd_recovery_time_soft = max_t(int,
948 simple_strtoul(s1 + 19, NULL, 10), time_min);
949 clear++;
950 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
951 lmd->lmd_recovery_time_hard = max_t(int,
952 simple_strtoul(s1 + 19, NULL, 10), time_min);
953 clear++;
954 } else if (strncmp(s1, "noir", 4) == 0) {
955 lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
956 clear++;
957 } else if (strncmp(s1, "nosvc", 5) == 0) {
958 lmd->lmd_flags |= LMD_FLG_NOSVC;
959 clear++;
960 } else if (strncmp(s1, "nomgs", 5) == 0) {
961 lmd->lmd_flags |= LMD_FLG_NOMGS;
962 clear++;
963 } else if (strncmp(s1, "noscrub", 7) == 0) {
964 lmd->lmd_flags |= LMD_FLG_NOSCRUB;
965 clear++;
966 } else if (strncmp(s1, PARAM_MGSNODE,
967 sizeof(PARAM_MGSNODE) - 1) == 0) {
968 s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
969 /* Assume the next mount opt is the first
970 invalid nid we get to. */
971 rc = lmd_parse_mgs(lmd, &s2);
972 if (rc)
973 goto invalid;
974 clear++;
975 } else if (strncmp(s1, "writeconf", 9) == 0) {
976 lmd->lmd_flags |= LMD_FLG_WRITECONF;
977 clear++;
978 } else if (strncmp(s1, "update", 6) == 0) {
979 lmd->lmd_flags |= LMD_FLG_UPDATE;
980 clear++;
981 } else if (strncmp(s1, "virgin", 6) == 0) {
982 lmd->lmd_flags |= LMD_FLG_VIRGIN;
983 clear++;
984 } else if (strncmp(s1, "noprimnode", 10) == 0) {
985 lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE;
986 clear++;
987 } else if (strncmp(s1, "mgssec=", 7) == 0) {
988 rc = lmd_parse_mgssec(lmd, s1 + 7);
989 if (rc)
990 goto invalid;
991 clear++;
992 /* ost exclusion list */
993 } else if (strncmp(s1, "exclude=", 8) == 0) {
994 rc = lmd_make_exclusion(lmd, s1 + 7);
995 if (rc)
996 goto invalid;
997 clear++;
998 } else if (strncmp(s1, "mgs", 3) == 0) {
999 /* We are an MGS */
1000 lmd->lmd_flags |= LMD_FLG_MGS;
1001 clear++;
1002 } else if (strncmp(s1, "svname=", 7) == 0) {
1003 rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
1004 if (rc)
1005 goto invalid;
1006 clear++;
1007 } else if (strncmp(s1, "param=", 6) == 0) {
1008 int length;
1009 char *tail = strchr(s1 + 6, ',');
1010 if (tail == NULL)
1011 length = strlen(s1);
1012 else
1013 length = tail - s1;
1014 length -= 6;
1015 strncat(lmd->lmd_params, s1 + 6, length);
1016 strcat(lmd->lmd_params, " ");
1017 clear++;
1018 } else if (strncmp(s1, "osd=", 4) == 0) {
1019 rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
1020 if (rc)
1021 goto invalid;
1022 clear++;
1023 }
1024 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1025 end of the options. */
1026 else if (strncmp(s1, "device=", 7) == 0) {
1027 devname = s1 + 7;
1028 /* terminate options right before device. device
1029 must be the last one. */
1030 *s1 = '\0';
1031 break;
1032 }
1033
1034 /* Find next opt */
1035 s2 = strchr(s1, ',');
1036 if (s2 == NULL) {
1037 if (clear)
1038 *s1 = '\0';
1039 break;
1040 }
1041 s2++;
1042 if (clear)
1043 memmove(s1, s2, strlen(s2) + 1);
1044 else
1045 s1 = s2;
1046 }
1047
1048 if (!devname) {
1049 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name (need mount option 'device=...')\n");
1050 goto invalid;
1051 }
1052
1053 s1 = strstr(devname, ":/");
1054 if (s1) {
1055 ++s1;
1056 lmd->lmd_flags |= LMD_FLG_CLIENT;
1057 /* Remove leading /s from fsname */
1058 while (*++s1 == '/')
1059 ;
1060 /* Freed in lustre_free_lsi */
1061 lmd->lmd_profile = kasprintf(GFP_NOFS, "%s-client", s1);
1062 if (!lmd->lmd_profile)
1063 return -ENOMEM;
1064 }
1065
1066 /* Freed in lustre_free_lsi */
1067 lmd->lmd_dev = kzalloc(strlen(devname) + 1, GFP_NOFS);
1068 if (!lmd->lmd_dev)
1069 return -ENOMEM;
1070 strcpy(lmd->lmd_dev, devname);
1071
1072 /* Save mount options */
1073 s1 = options + strlen(options) - 1;
1074 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
1075 *s1-- = 0;
1076 if (*options != 0) {
1077 /* Freed in lustre_free_lsi */
1078 lmd->lmd_opts = kzalloc(strlen(options) + 1, GFP_NOFS);
1079 if (!lmd->lmd_opts)
1080 return -ENOMEM;
1081 strcpy(lmd->lmd_opts, options);
1082 }
1083
1084 lmd_print(lmd);
1085 lmd->lmd_magic = LMD_MAGIC;
1086
1087 return rc;
1088
1089 invalid:
1090 CERROR("Bad mount options %s\n", options);
1091 return -EINVAL;
1092 }
1093
1094 struct lustre_mount_data2 {
1095 void *lmd2_data;
1096 struct vfsmount *lmd2_mnt;
1097 };
1098
1099 /** This is the entry point for the mount call into Lustre.
1100 * This is called when a server or client is mounted,
1101 * and this is where we start setting things up.
1102 * @param data Mount options (e.g. -o flock,abort_recov)
1103 */
1104 int lustre_fill_super(struct super_block *sb, void *data, int silent)
1105 {
1106 struct lustre_mount_data *lmd;
1107 struct lustre_mount_data2 *lmd2 = data;
1108 struct lustre_sb_info *lsi;
1109 int rc;
1110
1111 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
1112
1113 lsi = lustre_init_lsi(sb);
1114 if (!lsi)
1115 return -ENOMEM;
1116 lmd = lsi->lsi_lmd;
1117
1118 /*
1119 * Disable lockdep during mount, because mount locking patterns are
1120 * `special'.
1121 */
1122 lockdep_off();
1123
1124 /*
1125 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
1126 */
1127 obd_zombie_barrier();
1128
1129 /* Figure out the lmd from the mount options */
1130 if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
1131 lustre_put_lsi(sb);
1132 rc = -EINVAL;
1133 goto out;
1134 }
1135
1136 if (lmd_is_client(lmd)) {
1137 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
1138 if (client_fill_super == NULL)
1139 request_module("lustre");
1140 if (client_fill_super == NULL) {
1141 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for client mount! Is the 'lustre' module loaded?\n");
1142 lustre_put_lsi(sb);
1143 rc = -ENODEV;
1144 } else {
1145 rc = lustre_start_mgc(sb);
1146 if (rc) {
1147 lustre_put_lsi(sb);
1148 goto out;
1149 }
1150 /* Connect and start */
1151 /* (should always be ll_fill_super) */
1152 rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
1153 /* c_f_s will call lustre_common_put_super on failure */
1154 }
1155 } else {
1156 CERROR("This is client-side-only module, cannot handle server mount.\n");
1157 rc = -EINVAL;
1158 }
1159
1160 /* If error happens in fill_super() call, @lsi will be killed there.
1161 * This is why we do not put it here. */
1162 goto out;
1163 out:
1164 if (rc) {
1165 CERROR("Unable to mount %s (%d)\n",
1166 s2lsi(sb) ? lmd->lmd_dev : "", rc);
1167 } else {
1168 CDEBUG(D_SUPER, "Mount %s complete\n",
1169 lmd->lmd_dev);
1170 }
1171 lockdep_on();
1172 return rc;
1173 }
1174
1175
1176 /* We can't call ll_fill_super by name because it lives in a module that
1177 must be loaded after this one. */
1178 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
1179 struct vfsmount *mnt))
1180 {
1181 client_fill_super = cfs;
1182 }
1183 EXPORT_SYMBOL(lustre_register_client_fill_super);
1184
1185 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
1186 {
1187 kill_super_cb = cfs;
1188 }
1189 EXPORT_SYMBOL(lustre_register_kill_super_cb);
1190
1191 /***************** FS registration ******************/
1192 struct dentry *lustre_mount(struct file_system_type *fs_type, int flags,
1193 const char *devname, void *data)
1194 {
1195 struct lustre_mount_data2 lmd2 = {
1196 .lmd2_data = data,
1197 .lmd2_mnt = NULL
1198 };
1199
1200 return mount_nodev(fs_type, flags, &lmd2, lustre_fill_super);
1201 }
1202
1203 static void lustre_kill_super(struct super_block *sb)
1204 {
1205 struct lustre_sb_info *lsi = s2lsi(sb);
1206
1207 if (kill_super_cb && lsi)
1208 (*kill_super_cb)(sb);
1209
1210 kill_anon_super(sb);
1211 }
1212
1213 /** Register the "lustre" fs type
1214 */
1215 struct file_system_type lustre_fs_type = {
1216 .owner = THIS_MODULE,
1217 .name = "lustre",
1218 .mount = lustre_mount,
1219 .kill_sb = lustre_kill_super,
1220 .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
1221 FS_RENAME_DOES_D_MOVE,
1222 };
1223 MODULE_ALIAS_FS("lustre");
1224
1225 int lustre_register_fs(void)
1226 {
1227 return register_filesystem(&lustre_fs_type);
1228 }
1229
1230 int lustre_unregister_fs(void)
1231 {
1232 return unregister_filesystem(&lustre_fs_type);
1233 }
This page took 0.08689 seconds and 5 git commands to generate.