Commit | Line | Data |
---|---|---|
3d14c5d2 | 1 | #include <linux/ceph/ceph_debug.h> |
40819f6f GF |
2 | |
3 | #include <linux/file.h> | |
4 | #include <linux/namei.h> | |
eb13e832 | 5 | #include <linux/random.h> |
40819f6f GF |
6 | |
7 | #include "super.h" | |
8 | #include "mds_client.h" | |
3d14c5d2 | 9 | #include <linux/ceph/pagelist.h> |
40819f6f | 10 | |
eb13e832 YZ |
11 | static u64 lock_secret; |
12 | ||
13 | static inline u64 secure_addr(void *addr) | |
14 | { | |
15 | u64 v = lock_secret ^ (u64)(unsigned long)addr; | |
16 | /* | |
17 | * Set the most significant bit, so that MDS knows the 'owner' | |
18 | * is sufficient to identify the owner of lock. (old code uses | |
19 | * both 'owner' and 'pid') | |
20 | */ | |
21 | v |= (1ULL << 63); | |
22 | return v; | |
23 | } | |
24 | ||
25 | void __init ceph_flock_init(void) | |
26 | { | |
27 | get_random_bytes(&lock_secret, sizeof(lock_secret)); | |
28 | } | |
29 | ||
40819f6f GF |
30 | /** |
31 | * Implement fcntl and flock locking functions. | |
32 | */ | |
33 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |
637ae8d5 | 34 | int cmd, u8 wait, struct file_lock *fl) |
40819f6f | 35 | { |
496ad9aa | 36 | struct inode *inode = file_inode(file); |
eb13e832 | 37 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
40819f6f GF |
38 | struct ceph_mds_request *req; |
39 | int err; | |
637ae8d5 | 40 | u64 length = 0; |
eb13e832 | 41 | u64 owner; |
40819f6f GF |
42 | |
43 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); | |
44 | if (IS_ERR(req)) | |
45 | return PTR_ERR(req); | |
70b666c3 SW |
46 | req->r_inode = inode; |
47 | ihold(inode); | |
40819f6f | 48 | |
637ae8d5 HS |
49 | /* mds requires start and length rather than start and end */ |
50 | if (LLONG_MAX == fl->fl_end) | |
51 | length = 0; | |
52 | else | |
53 | length = fl->fl_end - fl->fl_start + 1; | |
54 | ||
eb13e832 YZ |
55 | if (lock_type == CEPH_LOCK_FCNTL) |
56 | owner = secure_addr(fl->fl_owner); | |
57 | else | |
58 | owner = secure_addr(fl->fl_file); | |
59 | ||
60 | dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " | |
61 | "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type, | |
62 | (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, | |
63 | wait, fl->fl_type); | |
637ae8d5 | 64 | |
40819f6f GF |
65 | req->r_args.filelock_change.rule = lock_type; |
66 | req->r_args.filelock_change.type = cmd; | |
eb13e832 | 67 | req->r_args.filelock_change.owner = cpu_to_le64(owner); |
637ae8d5 | 68 | req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); |
637ae8d5 | 69 | req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); |
40819f6f GF |
70 | req->r_args.filelock_change.length = cpu_to_le64(length); |
71 | req->r_args.filelock_change.wait = wait; | |
72 | ||
73 | err = ceph_mdsc_do_request(mdsc, inode, req); | |
a5b10629 | 74 | |
eb13e832 | 75 | if (operation == CEPH_MDS_OP_GETFILELOCK) { |
a5b10629 HS |
76 | fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); |
77 | if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) | |
78 | fl->fl_type = F_RDLCK; | |
79 | else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type) | |
80 | fl->fl_type = F_WRLCK; | |
81 | else | |
82 | fl->fl_type = F_UNLCK; | |
83 | ||
84 | fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start); | |
85 | length = le64_to_cpu(req->r_reply_info.filelock_reply->start) + | |
86 | le64_to_cpu(req->r_reply_info.filelock_reply->length); | |
87 | if (length >= 1) | |
88 | fl->fl_end = length -1; | |
89 | else | |
90 | fl->fl_end = 0; | |
91 | ||
92 | } | |
40819f6f GF |
93 | ceph_mdsc_put_request(req); |
94 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | |
0c1f91f2 | 95 | "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type, |
637ae8d5 HS |
96 | (int)operation, (u64)fl->fl_pid, fl->fl_start, |
97 | length, wait, fl->fl_type, err); | |
40819f6f GF |
98 | return err; |
99 | } | |
100 | ||
101 | /** | |
102 | * Attempt to set an fcntl lock. | |
103 | * For now, this just goes away to the server. Later it may be more awesome. | |
104 | */ | |
105 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |
106 | { | |
40819f6f GF |
107 | u8 lock_cmd; |
108 | int err; | |
109 | u8 wait = 0; | |
110 | u16 op = CEPH_MDS_OP_SETFILELOCK; | |
111 | ||
eb70c0ce YZ |
112 | if (!(fl->fl_flags & FL_POSIX)) |
113 | return -ENOLCK; | |
114 | /* No mandatory locks */ | |
115 | if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) | |
116 | return -ENOLCK; | |
117 | ||
eb13e832 | 118 | dout("ceph_lock, fl_owner: %p", fl->fl_owner); |
40819f6f GF |
119 | |
120 | /* set wait bit as appropriate, then make command as Ceph expects it*/ | |
0e8e95d6 | 121 | if (IS_GETLK(cmd)) |
40819f6f | 122 | op = CEPH_MDS_OP_GETFILELOCK; |
0e8e95d6 YZ |
123 | else if (IS_SETLKW(cmd)) |
124 | wait = 1; | |
40819f6f GF |
125 | |
126 | if (F_RDLCK == fl->fl_type) | |
127 | lock_cmd = CEPH_LOCK_SHARED; | |
128 | else if (F_WRLCK == fl->fl_type) | |
129 | lock_cmd = CEPH_LOCK_EXCL; | |
130 | else | |
131 | lock_cmd = CEPH_LOCK_UNLOCK; | |
132 | ||
637ae8d5 | 133 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); |
40819f6f | 134 | if (!err) { |
eb13e832 | 135 | if (op != CEPH_MDS_OP_GETFILELOCK) { |
a5b10629 HS |
136 | dout("mds locked, locking locally"); |
137 | err = posix_lock_file(file, fl, NULL); | |
138 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | |
0c1f91f2 SW |
139 | /* undo! This should only happen if |
140 | * the kernel detects local | |
141 | * deadlock. */ | |
a5b10629 HS |
142 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, |
143 | CEPH_LOCK_UNLOCK, 0, fl); | |
0c1f91f2 SW |
144 | dout("got %d on posix_lock_file, undid lock", |
145 | err); | |
a5b10629 | 146 | } |
40819f6f | 147 | } |
a5b10629 | 148 | |
0c1f91f2 SW |
149 | } else if (err == -ERESTARTSYS) { |
150 | dout("undoing lock\n"); | |
151 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | |
152 | CEPH_LOCK_UNLOCK, 0, fl); | |
40819f6f GF |
153 | } |
154 | return err; | |
155 | } | |
156 | ||
157 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |
158 | { | |
40819f6f GF |
159 | u8 lock_cmd; |
160 | int err; | |
0e8e95d6 | 161 | u8 wait = 0; |
40819f6f | 162 | |
eb70c0ce YZ |
163 | if (!(fl->fl_flags & FL_FLOCK)) |
164 | return -ENOLCK; | |
165 | /* No mandatory locks */ | |
166 | if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) | |
167 | return -ENOLCK; | |
168 | ||
eb13e832 | 169 | dout("ceph_flock, fl_file: %p", fl->fl_file); |
40819f6f | 170 | |
0e8e95d6 YZ |
171 | if (IS_SETLKW(cmd)) |
172 | wait = 1; | |
173 | ||
174 | if (F_RDLCK == fl->fl_type) | |
40819f6f | 175 | lock_cmd = CEPH_LOCK_SHARED; |
0e8e95d6 | 176 | else if (F_WRLCK == fl->fl_type) |
40819f6f GF |
177 | lock_cmd = CEPH_LOCK_EXCL; |
178 | else | |
179 | lock_cmd = CEPH_LOCK_UNLOCK; | |
40819f6f GF |
180 | |
181 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | |
637ae8d5 | 182 | file, lock_cmd, wait, fl); |
40819f6f GF |
183 | if (!err) { |
184 | err = flock_lock_file_wait(file, fl); | |
185 | if (err) { | |
186 | ceph_lock_message(CEPH_LOCK_FLOCK, | |
187 | CEPH_MDS_OP_SETFILELOCK, | |
637ae8d5 | 188 | file, CEPH_LOCK_UNLOCK, 0, fl); |
40819f6f GF |
189 | dout("got %d on flock_lock_file_wait, undid lock", err); |
190 | } | |
0c1f91f2 SW |
191 | } else if (err == -ERESTARTSYS) { |
192 | dout("undoing lock\n"); | |
193 | ceph_lock_message(CEPH_LOCK_FLOCK, | |
194 | CEPH_MDS_OP_SETFILELOCK, | |
195 | file, CEPH_LOCK_UNLOCK, 0, fl); | |
40819f6f GF |
196 | } |
197 | return err; | |
198 | } | |
199 | ||
200 | /** | |
4d1bf79a | 201 | * Must be called with lock_flocks() already held. Fills in the passed |
40819f6f GF |
202 | * counter variables, so you can prepare pagelist metadata before calling |
203 | * ceph_encode_locks. | |
204 | */ | |
205 | void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | |
206 | { | |
207 | struct file_lock *lock; | |
208 | ||
209 | *fcntl_count = 0; | |
210 | *flock_count = 0; | |
211 | ||
212 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | |
213 | if (lock->fl_flags & FL_POSIX) | |
214 | ++(*fcntl_count); | |
215 | else if (lock->fl_flags & FL_FLOCK) | |
216 | ++(*flock_count); | |
217 | } | |
218 | dout("counted %d flock locks and %d fcntl locks", | |
219 | *flock_count, *fcntl_count); | |
220 | } | |
221 | ||
222 | /** | |
39be95e9 | 223 | * Encode the flock and fcntl locks for the given inode into the ceph_filelock |
1c8c601a | 224 | * array. Must be called with inode->i_lock already held. |
39be95e9 | 225 | * If we encounter more of a specific lock type than expected, return -ENOSPC. |
40819f6f | 226 | */ |
39be95e9 JS |
227 | int ceph_encode_locks_to_buffer(struct inode *inode, |
228 | struct ceph_filelock *flocks, | |
229 | int num_fcntl_locks, int num_flock_locks) | |
40819f6f GF |
230 | { |
231 | struct file_lock *lock; | |
40819f6f | 232 | int err = 0; |
fca4451a GF |
233 | int seen_fcntl = 0; |
234 | int seen_flock = 0; | |
39be95e9 | 235 | int l = 0; |
40819f6f GF |
236 | |
237 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, | |
238 | num_fcntl_locks); | |
39be95e9 | 239 | |
40819f6f GF |
240 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { |
241 | if (lock->fl_flags & FL_POSIX) { | |
fca4451a GF |
242 | ++seen_fcntl; |
243 | if (seen_fcntl > num_fcntl_locks) { | |
244 | err = -ENOSPC; | |
245 | goto fail; | |
246 | } | |
39be95e9 | 247 | err = lock_to_ceph_filelock(lock, &flocks[l]); |
40819f6f GF |
248 | if (err) |
249 | goto fail; | |
39be95e9 | 250 | ++l; |
40819f6f | 251 | } |
40819f6f | 252 | } |
40819f6f GF |
253 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { |
254 | if (lock->fl_flags & FL_FLOCK) { | |
fca4451a GF |
255 | ++seen_flock; |
256 | if (seen_flock > num_flock_locks) { | |
257 | err = -ENOSPC; | |
258 | goto fail; | |
259 | } | |
39be95e9 | 260 | err = lock_to_ceph_filelock(lock, &flocks[l]); |
40819f6f GF |
261 | if (err) |
262 | goto fail; | |
39be95e9 | 263 | ++l; |
40819f6f | 264 | } |
40819f6f GF |
265 | } |
266 | fail: | |
267 | return err; | |
268 | } | |
269 | ||
39be95e9 JS |
270 | /** |
271 | * Copy the encoded flock and fcntl locks into the pagelist. | |
272 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, | |
273 | * sequential flock locks. | |
274 | * Returns zero on success. | |
275 | */ | |
276 | int ceph_locks_to_pagelist(struct ceph_filelock *flocks, | |
277 | struct ceph_pagelist *pagelist, | |
278 | int num_fcntl_locks, int num_flock_locks) | |
279 | { | |
280 | int err = 0; | |
281 | __le32 nlocks; | |
282 | ||
283 | nlocks = cpu_to_le32(num_fcntl_locks); | |
284 | err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); | |
285 | if (err) | |
286 | goto out_fail; | |
287 | ||
288 | err = ceph_pagelist_append(pagelist, flocks, | |
289 | num_fcntl_locks * sizeof(*flocks)); | |
290 | if (err) | |
291 | goto out_fail; | |
292 | ||
293 | nlocks = cpu_to_le32(num_flock_locks); | |
294 | err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); | |
295 | if (err) | |
296 | goto out_fail; | |
297 | ||
298 | err = ceph_pagelist_append(pagelist, | |
299 | &flocks[num_fcntl_locks], | |
300 | num_flock_locks * sizeof(*flocks)); | |
301 | out_fail: | |
302 | return err; | |
303 | } | |
304 | ||
40819f6f GF |
305 | /* |
306 | * Given a pointer to a lock, convert it to a ceph filelock | |
307 | */ | |
308 | int lock_to_ceph_filelock(struct file_lock *lock, | |
309 | struct ceph_filelock *cephlock) | |
310 | { | |
311 | int err = 0; | |
40819f6f GF |
312 | cephlock->start = cpu_to_le64(lock->fl_start); |
313 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | |
314 | cephlock->client = cpu_to_le64(0); | |
eb13e832 YZ |
315 | cephlock->pid = cpu_to_le64((u64)lock->fl_pid); |
316 | if (lock->fl_flags & FL_POSIX) | |
317 | cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); | |
318 | else | |
319 | cephlock->owner = cpu_to_le64(secure_addr(lock->fl_file)); | |
40819f6f GF |
320 | |
321 | switch (lock->fl_type) { | |
322 | case F_RDLCK: | |
323 | cephlock->type = CEPH_LOCK_SHARED; | |
324 | break; | |
325 | case F_WRLCK: | |
326 | cephlock->type = CEPH_LOCK_EXCL; | |
327 | break; | |
328 | case F_UNLCK: | |
329 | cephlock->type = CEPH_LOCK_UNLOCK; | |
330 | break; | |
331 | default: | |
332 | dout("Have unknown lock type %d", lock->fl_type); | |
333 | err = -EINVAL; | |
334 | } | |
335 | ||
336 | return err; | |
337 | } |