Commit | Line | Data |
---|---|---|
3d14c5d2 | 1 | #include <linux/ceph/ceph_debug.h> |
40819f6f GF |
2 | |
3 | #include <linux/file.h> | |
4 | #include <linux/namei.h> | |
eb13e832 | 5 | #include <linux/random.h> |
40819f6f GF |
6 | |
7 | #include "super.h" | |
8 | #include "mds_client.h" | |
3d14c5d2 | 9 | #include <linux/ceph/pagelist.h> |
40819f6f | 10 | |
eb13e832 YZ |
11 | static u64 lock_secret; |
12 | ||
13 | static inline u64 secure_addr(void *addr) | |
14 | { | |
15 | u64 v = lock_secret ^ (u64)(unsigned long)addr; | |
16 | /* | |
17 | * Set the most significant bit, so that MDS knows the 'owner' | |
18 | * is sufficient to identify the owner of lock. (old code uses | |
19 | * both 'owner' and 'pid') | |
20 | */ | |
21 | v |= (1ULL << 63); | |
22 | return v; | |
23 | } | |
24 | ||
25 | void __init ceph_flock_init(void) | |
26 | { | |
27 | get_random_bytes(&lock_secret, sizeof(lock_secret)); | |
28 | } | |
29 | ||
40819f6f GF |
30 | /** |
31 | * Implement fcntl and flock locking functions. | |
32 | */ | |
33 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |
637ae8d5 | 34 | int cmd, u8 wait, struct file_lock *fl) |
40819f6f | 35 | { |
496ad9aa | 36 | struct inode *inode = file_inode(file); |
eb13e832 | 37 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
40819f6f GF |
38 | struct ceph_mds_request *req; |
39 | int err; | |
637ae8d5 | 40 | u64 length = 0; |
eb13e832 | 41 | u64 owner; |
40819f6f GF |
42 | |
43 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); | |
44 | if (IS_ERR(req)) | |
45 | return PTR_ERR(req); | |
70b666c3 SW |
46 | req->r_inode = inode; |
47 | ihold(inode); | |
3bd58143 | 48 | req->r_num_caps = 1; |
40819f6f | 49 | |
637ae8d5 HS |
50 | /* mds requires start and length rather than start and end */ |
51 | if (LLONG_MAX == fl->fl_end) | |
52 | length = 0; | |
53 | else | |
54 | length = fl->fl_end - fl->fl_start + 1; | |
55 | ||
130d1f95 | 56 | owner = secure_addr(fl->fl_owner); |
eb13e832 YZ |
57 | |
58 | dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " | |
59 | "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type, | |
60 | (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, | |
61 | wait, fl->fl_type); | |
637ae8d5 | 62 | |
40819f6f GF |
63 | req->r_args.filelock_change.rule = lock_type; |
64 | req->r_args.filelock_change.type = cmd; | |
eb13e832 | 65 | req->r_args.filelock_change.owner = cpu_to_le64(owner); |
637ae8d5 | 66 | req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); |
637ae8d5 | 67 | req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); |
40819f6f GF |
68 | req->r_args.filelock_change.length = cpu_to_le64(length); |
69 | req->r_args.filelock_change.wait = wait; | |
70 | ||
71 | err = ceph_mdsc_do_request(mdsc, inode, req); | |
a5b10629 | 72 | |
eb13e832 | 73 | if (operation == CEPH_MDS_OP_GETFILELOCK) { |
a5b10629 HS |
74 | fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); |
75 | if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) | |
76 | fl->fl_type = F_RDLCK; | |
77 | else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type) | |
78 | fl->fl_type = F_WRLCK; | |
79 | else | |
80 | fl->fl_type = F_UNLCK; | |
81 | ||
82 | fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start); | |
83 | length = le64_to_cpu(req->r_reply_info.filelock_reply->start) + | |
84 | le64_to_cpu(req->r_reply_info.filelock_reply->length); | |
85 | if (length >= 1) | |
86 | fl->fl_end = length -1; | |
87 | else | |
88 | fl->fl_end = 0; | |
89 | ||
90 | } | |
40819f6f GF |
91 | ceph_mdsc_put_request(req); |
92 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | |
0c1f91f2 | 93 | "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type, |
637ae8d5 HS |
94 | (int)operation, (u64)fl->fl_pid, fl->fl_start, |
95 | length, wait, fl->fl_type, err); | |
40819f6f GF |
96 | return err; |
97 | } | |
98 | ||
99 | /** | |
100 | * Attempt to set an fcntl lock. | |
101 | * For now, this just goes away to the server. Later it may be more awesome. | |
102 | */ | |
103 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |
104 | { | |
40819f6f GF |
105 | u8 lock_cmd; |
106 | int err; | |
107 | u8 wait = 0; | |
108 | u16 op = CEPH_MDS_OP_SETFILELOCK; | |
109 | ||
eb70c0ce YZ |
110 | if (!(fl->fl_flags & FL_POSIX)) |
111 | return -ENOLCK; | |
112 | /* No mandatory locks */ | |
113 | if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) | |
114 | return -ENOLCK; | |
115 | ||
eb13e832 | 116 | dout("ceph_lock, fl_owner: %p", fl->fl_owner); |
40819f6f GF |
117 | |
118 | /* set wait bit as appropriate, then make command as Ceph expects it*/ | |
0e8e95d6 | 119 | if (IS_GETLK(cmd)) |
40819f6f | 120 | op = CEPH_MDS_OP_GETFILELOCK; |
0e8e95d6 YZ |
121 | else if (IS_SETLKW(cmd)) |
122 | wait = 1; | |
40819f6f GF |
123 | |
124 | if (F_RDLCK == fl->fl_type) | |
125 | lock_cmd = CEPH_LOCK_SHARED; | |
126 | else if (F_WRLCK == fl->fl_type) | |
127 | lock_cmd = CEPH_LOCK_EXCL; | |
128 | else | |
129 | lock_cmd = CEPH_LOCK_UNLOCK; | |
130 | ||
637ae8d5 | 131 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); |
40819f6f | 132 | if (!err) { |
eb13e832 | 133 | if (op != CEPH_MDS_OP_GETFILELOCK) { |
a5b10629 HS |
134 | dout("mds locked, locking locally"); |
135 | err = posix_lock_file(file, fl, NULL); | |
136 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | |
0c1f91f2 SW |
137 | /* undo! This should only happen if |
138 | * the kernel detects local | |
139 | * deadlock. */ | |
a5b10629 HS |
140 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, |
141 | CEPH_LOCK_UNLOCK, 0, fl); | |
0c1f91f2 SW |
142 | dout("got %d on posix_lock_file, undid lock", |
143 | err); | |
a5b10629 | 144 | } |
40819f6f | 145 | } |
a5b10629 | 146 | |
0c1f91f2 SW |
147 | } else if (err == -ERESTARTSYS) { |
148 | dout("undoing lock\n"); | |
149 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | |
150 | CEPH_LOCK_UNLOCK, 0, fl); | |
40819f6f GF |
151 | } |
152 | return err; | |
153 | } | |
154 | ||
155 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |
156 | { | |
40819f6f GF |
157 | u8 lock_cmd; |
158 | int err; | |
0e8e95d6 | 159 | u8 wait = 0; |
40819f6f | 160 | |
eb70c0ce YZ |
161 | if (!(fl->fl_flags & FL_FLOCK)) |
162 | return -ENOLCK; | |
163 | /* No mandatory locks */ | |
164 | if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) | |
165 | return -ENOLCK; | |
166 | ||
eb13e832 | 167 | dout("ceph_flock, fl_file: %p", fl->fl_file); |
40819f6f | 168 | |
0e8e95d6 YZ |
169 | if (IS_SETLKW(cmd)) |
170 | wait = 1; | |
171 | ||
172 | if (F_RDLCK == fl->fl_type) | |
40819f6f | 173 | lock_cmd = CEPH_LOCK_SHARED; |
0e8e95d6 | 174 | else if (F_WRLCK == fl->fl_type) |
40819f6f GF |
175 | lock_cmd = CEPH_LOCK_EXCL; |
176 | else | |
177 | lock_cmd = CEPH_LOCK_UNLOCK; | |
40819f6f GF |
178 | |
179 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | |
637ae8d5 | 180 | file, lock_cmd, wait, fl); |
40819f6f GF |
181 | if (!err) { |
182 | err = flock_lock_file_wait(file, fl); | |
183 | if (err) { | |
184 | ceph_lock_message(CEPH_LOCK_FLOCK, | |
185 | CEPH_MDS_OP_SETFILELOCK, | |
637ae8d5 | 186 | file, CEPH_LOCK_UNLOCK, 0, fl); |
40819f6f GF |
187 | dout("got %d on flock_lock_file_wait, undid lock", err); |
188 | } | |
0c1f91f2 SW |
189 | } else if (err == -ERESTARTSYS) { |
190 | dout("undoing lock\n"); | |
191 | ceph_lock_message(CEPH_LOCK_FLOCK, | |
192 | CEPH_MDS_OP_SETFILELOCK, | |
193 | file, CEPH_LOCK_UNLOCK, 0, fl); | |
40819f6f GF |
194 | } |
195 | return err; | |
196 | } | |
197 | ||
198 | /** | |
4d1bf79a | 199 | * Must be called with lock_flocks() already held. Fills in the passed |
40819f6f GF |
200 | * counter variables, so you can prepare pagelist metadata before calling |
201 | * ceph_encode_locks. | |
202 | */ | |
203 | void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | |
204 | { | |
205 | struct file_lock *lock; | |
206 | ||
207 | *fcntl_count = 0; | |
208 | *flock_count = 0; | |
209 | ||
210 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | |
211 | if (lock->fl_flags & FL_POSIX) | |
212 | ++(*fcntl_count); | |
213 | else if (lock->fl_flags & FL_FLOCK) | |
214 | ++(*flock_count); | |
215 | } | |
216 | dout("counted %d flock locks and %d fcntl locks", | |
217 | *flock_count, *fcntl_count); | |
218 | } | |
219 | ||
220 | /** | |
39be95e9 | 221 | * Encode the flock and fcntl locks for the given inode into the ceph_filelock |
1c8c601a | 222 | * array. Must be called with inode->i_lock already held. |
39be95e9 | 223 | * If we encounter more of a specific lock type than expected, return -ENOSPC. |
40819f6f | 224 | */ |
39be95e9 JS |
225 | int ceph_encode_locks_to_buffer(struct inode *inode, |
226 | struct ceph_filelock *flocks, | |
227 | int num_fcntl_locks, int num_flock_locks) | |
40819f6f GF |
228 | { |
229 | struct file_lock *lock; | |
40819f6f | 230 | int err = 0; |
fca4451a GF |
231 | int seen_fcntl = 0; |
232 | int seen_flock = 0; | |
39be95e9 | 233 | int l = 0; |
40819f6f GF |
234 | |
235 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, | |
236 | num_fcntl_locks); | |
39be95e9 | 237 | |
40819f6f GF |
238 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { |
239 | if (lock->fl_flags & FL_POSIX) { | |
fca4451a GF |
240 | ++seen_fcntl; |
241 | if (seen_fcntl > num_fcntl_locks) { | |
242 | err = -ENOSPC; | |
243 | goto fail; | |
244 | } | |
39be95e9 | 245 | err = lock_to_ceph_filelock(lock, &flocks[l]); |
40819f6f GF |
246 | if (err) |
247 | goto fail; | |
39be95e9 | 248 | ++l; |
40819f6f | 249 | } |
40819f6f | 250 | } |
40819f6f GF |
251 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { |
252 | if (lock->fl_flags & FL_FLOCK) { | |
fca4451a GF |
253 | ++seen_flock; |
254 | if (seen_flock > num_flock_locks) { | |
255 | err = -ENOSPC; | |
256 | goto fail; | |
257 | } | |
39be95e9 | 258 | err = lock_to_ceph_filelock(lock, &flocks[l]); |
40819f6f GF |
259 | if (err) |
260 | goto fail; | |
39be95e9 | 261 | ++l; |
40819f6f | 262 | } |
40819f6f GF |
263 | } |
264 | fail: | |
265 | return err; | |
266 | } | |
267 | ||
39be95e9 JS |
268 | /** |
269 | * Copy the encoded flock and fcntl locks into the pagelist. | |
270 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, | |
271 | * sequential flock locks. | |
272 | * Returns zero on success. | |
273 | */ | |
274 | int ceph_locks_to_pagelist(struct ceph_filelock *flocks, | |
275 | struct ceph_pagelist *pagelist, | |
276 | int num_fcntl_locks, int num_flock_locks) | |
277 | { | |
278 | int err = 0; | |
279 | __le32 nlocks; | |
280 | ||
281 | nlocks = cpu_to_le32(num_fcntl_locks); | |
282 | err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); | |
283 | if (err) | |
284 | goto out_fail; | |
285 | ||
286 | err = ceph_pagelist_append(pagelist, flocks, | |
287 | num_fcntl_locks * sizeof(*flocks)); | |
288 | if (err) | |
289 | goto out_fail; | |
290 | ||
291 | nlocks = cpu_to_le32(num_flock_locks); | |
292 | err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); | |
293 | if (err) | |
294 | goto out_fail; | |
295 | ||
296 | err = ceph_pagelist_append(pagelist, | |
297 | &flocks[num_fcntl_locks], | |
298 | num_flock_locks * sizeof(*flocks)); | |
299 | out_fail: | |
300 | return err; | |
301 | } | |
302 | ||
40819f6f GF |
303 | /* |
304 | * Given a pointer to a lock, convert it to a ceph filelock | |
305 | */ | |
306 | int lock_to_ceph_filelock(struct file_lock *lock, | |
307 | struct ceph_filelock *cephlock) | |
308 | { | |
309 | int err = 0; | |
40819f6f GF |
310 | cephlock->start = cpu_to_le64(lock->fl_start); |
311 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | |
312 | cephlock->client = cpu_to_le64(0); | |
eb13e832 | 313 | cephlock->pid = cpu_to_le64((u64)lock->fl_pid); |
130d1f95 | 314 | cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); |
40819f6f GF |
315 | |
316 | switch (lock->fl_type) { | |
317 | case F_RDLCK: | |
318 | cephlock->type = CEPH_LOCK_SHARED; | |
319 | break; | |
320 | case F_WRLCK: | |
321 | cephlock->type = CEPH_LOCK_EXCL; | |
322 | break; | |
323 | case F_UNLCK: | |
324 | cephlock->type = CEPH_LOCK_UNLOCK; | |
325 | break; | |
326 | default: | |
327 | dout("Have unknown lock type %d", lock->fl_type); | |
328 | err = -EINVAL; | |
329 | } | |
330 | ||
331 | return err; | |
332 | } |