Commit | Line | Data |
---|---|---|
1107ba88 AZ |
1 | /* |
2 | * Driver giving user-space access to the kernel's xenbus connection | |
3 | * to xenstore. | |
4 | * | |
5 | * Copyright (c) 2005, Christian Limpach | |
6 | * Copyright (c) 2005, Rusty Russell, IBM Corporation | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public License version 2 | |
10 | * as published by the Free Software Foundation; or, when distributed | |
11 | * separately from the Linux kernel or incorporated into other | |
12 | * software packages, subject to the following license: | |
13 | * | |
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
15 | * of this source file (the "Software"), to deal in the Software without | |
16 | * restriction, including without limitation the rights to use, copy, modify, | |
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
18 | * and to permit persons to whom the Software is furnished to do so, subject to | |
19 | * the following conditions: | |
20 | * | |
21 | * The above copyright notice and this permission notice shall be included in | |
22 | * all copies or substantial portions of the Software. | |
23 | * | |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
30 | * IN THE SOFTWARE. | |
31 | * | |
32 | * Changes: | |
33 | * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem | |
34 | * and /proc/xen compatibility mount point. | |
35 | * Turned xenfs into a loadable module. | |
36 | */ | |
37 | ||
38 | #include <linux/kernel.h> | |
39 | #include <linux/errno.h> | |
40 | #include <linux/uio.h> | |
41 | #include <linux/notifier.h> | |
42 | #include <linux/wait.h> | |
43 | #include <linux/fs.h> | |
44 | #include <linux/poll.h> | |
45 | #include <linux/mutex.h> | |
46 | #include <linux/spinlock.h> | |
47 | #include <linux/mount.h> | |
48 | #include <linux/pagemap.h> | |
49 | #include <linux/uaccess.h> | |
50 | #include <linux/init.h> | |
51 | #include <linux/namei.h> | |
52 | #include <linux/string.h> | |
53 | ||
54 | #include "xenfs.h" | |
55 | #include "../xenbus/xenbus_comms.h" | |
56 | ||
57 | #include <xen/xenbus.h> | |
58 | #include <asm/xen/hypervisor.h> | |
59 | ||
60 | /* | |
61 | * An element of a list of outstanding transactions, for which we're | |
62 | * still waiting a reply. | |
63 | */ | |
64 | struct xenbus_transaction_holder { | |
65 | struct list_head list; | |
66 | struct xenbus_transaction handle; | |
67 | }; | |
68 | ||
69 | /* | |
70 | * A buffer of data on the queue. | |
71 | */ | |
72 | struct read_buffer { | |
73 | struct list_head list; | |
74 | unsigned int cons; | |
75 | unsigned int len; | |
76 | char msg[]; | |
77 | }; | |
78 | ||
79 | struct xenbus_file_priv { | |
80 | /* | |
81 | * msgbuffer_mutex is held while partial requests are built up | |
82 | * and complete requests are acted on. It therefore protects | |
83 | * the "transactions" and "watches" lists, and the partial | |
84 | * request length and buffer. | |
85 | * | |
86 | * reply_mutex protects the reply being built up to return to | |
87 | * usermode. It nests inside msgbuffer_mutex but may be held | |
88 | * alone during a watch callback. | |
89 | */ | |
90 | struct mutex msgbuffer_mutex; | |
91 | ||
92 | /* In-progress transactions */ | |
93 | struct list_head transactions; | |
94 | ||
95 | /* Active watches. */ | |
96 | struct list_head watches; | |
97 | ||
98 | /* Partial request. */ | |
99 | unsigned int len; | |
100 | union { | |
101 | struct xsd_sockmsg msg; | |
102 | char buffer[PAGE_SIZE]; | |
103 | } u; | |
104 | ||
105 | /* Response queue. */ | |
106 | struct mutex reply_mutex; | |
107 | struct list_head read_buffers; | |
108 | wait_queue_head_t read_waitq; | |
109 | ||
110 | }; | |
111 | ||
112 | /* Read out any raw xenbus messages queued up. */ | |
113 | static ssize_t xenbus_file_read(struct file *filp, | |
114 | char __user *ubuf, | |
115 | size_t len, loff_t *ppos) | |
116 | { | |
117 | struct xenbus_file_priv *u = filp->private_data; | |
118 | struct read_buffer *rb; | |
119 | unsigned i; | |
120 | int ret; | |
121 | ||
122 | mutex_lock(&u->reply_mutex); | |
123 | while (list_empty(&u->read_buffers)) { | |
124 | mutex_unlock(&u->reply_mutex); | |
125 | ret = wait_event_interruptible(u->read_waitq, | |
126 | !list_empty(&u->read_buffers)); | |
127 | if (ret) | |
128 | return ret; | |
129 | mutex_lock(&u->reply_mutex); | |
130 | } | |
131 | ||
132 | rb = list_entry(u->read_buffers.next, struct read_buffer, list); | |
133 | i = 0; | |
134 | while (i < len) { | |
135 | unsigned sz = min((unsigned)len - i, rb->len - rb->cons); | |
136 | ||
137 | ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz); | |
138 | ||
139 | i += sz - ret; | |
140 | rb->cons += sz - ret; | |
141 | ||
142 | if (ret != sz) { | |
143 | if (i == 0) | |
144 | i = -EFAULT; | |
145 | goto out; | |
146 | } | |
147 | ||
148 | /* Clear out buffer if it has been consumed */ | |
149 | if (rb->cons == rb->len) { | |
150 | list_del(&rb->list); | |
151 | kfree(rb); | |
152 | if (list_empty(&u->read_buffers)) | |
153 | break; | |
154 | rb = list_entry(u->read_buffers.next, | |
155 | struct read_buffer, list); | |
156 | } | |
157 | } | |
158 | ||
159 | out: | |
160 | mutex_unlock(&u->reply_mutex); | |
161 | return i; | |
162 | } | |
163 | ||
164 | /* | |
165 | * Add a buffer to the queue. Caller must hold the appropriate lock | |
166 | * if the queue is not local. (Commonly the caller will build up | |
167 | * multiple queued buffers on a temporary local list, and then add it | |
168 | * to the appropriate list under lock once all the buffers have een | |
169 | * successfully allocated.) | |
170 | */ | |
171 | static int queue_reply(struct list_head *queue, const void *data, size_t len) | |
172 | { | |
173 | struct read_buffer *rb; | |
174 | ||
175 | if (len == 0) | |
176 | return 0; | |
177 | ||
178 | rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); | |
179 | if (rb == NULL) | |
180 | return -ENOMEM; | |
181 | ||
182 | rb->cons = 0; | |
183 | rb->len = len; | |
184 | ||
185 | memcpy(rb->msg, data, len); | |
186 | ||
187 | list_add_tail(&rb->list, queue); | |
188 | return 0; | |
189 | } | |
190 | ||
191 | /* | |
192 | * Free all the read_buffer s on a list. | |
193 | * Caller must have sole reference to list. | |
194 | */ | |
195 | static void queue_cleanup(struct list_head *list) | |
196 | { | |
197 | struct read_buffer *rb; | |
198 | ||
199 | while (!list_empty(list)) { | |
200 | rb = list_entry(list->next, struct read_buffer, list); | |
201 | list_del(list->next); | |
202 | kfree(rb); | |
203 | } | |
204 | } | |
205 | ||
206 | struct watch_adapter { | |
207 | struct list_head list; | |
208 | struct xenbus_watch watch; | |
209 | struct xenbus_file_priv *dev_data; | |
210 | char *token; | |
211 | }; | |
212 | ||
213 | static void free_watch_adapter(struct watch_adapter *watch) | |
214 | { | |
215 | kfree(watch->watch.node); | |
216 | kfree(watch->token); | |
217 | kfree(watch); | |
218 | } | |
219 | ||
220 | static struct watch_adapter *alloc_watch_adapter(const char *path, | |
221 | const char *token) | |
222 | { | |
223 | struct watch_adapter *watch; | |
224 | ||
225 | watch = kzalloc(sizeof(*watch), GFP_KERNEL); | |
226 | if (watch == NULL) | |
227 | goto out_fail; | |
228 | ||
229 | watch->watch.node = kstrdup(path, GFP_KERNEL); | |
230 | if (watch->watch.node == NULL) | |
231 | goto out_free; | |
232 | ||
233 | watch->token = kstrdup(token, GFP_KERNEL); | |
234 | if (watch->token == NULL) | |
235 | goto out_free; | |
236 | ||
237 | return watch; | |
238 | ||
239 | out_free: | |
240 | free_watch_adapter(watch); | |
241 | ||
242 | out_fail: | |
243 | return NULL; | |
244 | } | |
245 | ||
246 | static void watch_fired(struct xenbus_watch *watch, | |
247 | const char **vec, | |
248 | unsigned int len) | |
249 | { | |
250 | struct watch_adapter *adap; | |
251 | struct xsd_sockmsg hdr; | |
252 | const char *path, *token; | |
253 | int path_len, tok_len, body_len, data_len = 0; | |
254 | int ret; | |
255 | LIST_HEAD(staging_q); | |
256 | ||
257 | adap = container_of(watch, struct watch_adapter, watch); | |
258 | ||
259 | path = vec[XS_WATCH_PATH]; | |
260 | token = adap->token; | |
261 | ||
262 | path_len = strlen(path) + 1; | |
263 | tok_len = strlen(token) + 1; | |
264 | if (len > 2) | |
265 | data_len = vec[len] - vec[2] + 1; | |
266 | body_len = path_len + tok_len + data_len; | |
267 | ||
268 | hdr.type = XS_WATCH_EVENT; | |
269 | hdr.len = body_len; | |
270 | ||
271 | mutex_lock(&adap->dev_data->reply_mutex); | |
272 | ||
273 | ret = queue_reply(&staging_q, &hdr, sizeof(hdr)); | |
274 | if (!ret) | |
275 | ret = queue_reply(&staging_q, path, path_len); | |
276 | if (!ret) | |
277 | ret = queue_reply(&staging_q, token, tok_len); | |
278 | if (!ret && len > 2) | |
279 | ret = queue_reply(&staging_q, vec[2], data_len); | |
280 | ||
281 | if (!ret) { | |
282 | /* success: pass reply list onto watcher */ | |
283 | list_splice_tail(&staging_q, &adap->dev_data->read_buffers); | |
284 | wake_up(&adap->dev_data->read_waitq); | |
285 | } else | |
286 | queue_cleanup(&staging_q); | |
287 | ||
288 | mutex_unlock(&adap->dev_data->reply_mutex); | |
289 | } | |
290 | ||
291 | static int xenbus_write_transaction(unsigned msg_type, | |
292 | struct xenbus_file_priv *u) | |
293 | { | |
e88a0faa | 294 | int rc; |
1107ba88 AZ |
295 | void *reply; |
296 | struct xenbus_transaction_holder *trans = NULL; | |
297 | LIST_HEAD(staging_q); | |
298 | ||
299 | if (msg_type == XS_TRANSACTION_START) { | |
300 | trans = kmalloc(sizeof(*trans), GFP_KERNEL); | |
301 | if (!trans) { | |
302 | rc = -ENOMEM; | |
303 | goto out; | |
304 | } | |
305 | } | |
306 | ||
307 | reply = xenbus_dev_request_and_reply(&u->u.msg); | |
308 | if (IS_ERR(reply)) { | |
309 | kfree(trans); | |
310 | rc = PTR_ERR(reply); | |
311 | goto out; | |
312 | } | |
313 | ||
314 | if (msg_type == XS_TRANSACTION_START) { | |
315 | trans->handle.id = simple_strtoul(reply, NULL, 0); | |
316 | ||
317 | list_add(&trans->list, &u->transactions); | |
318 | } else if (msg_type == XS_TRANSACTION_END) { | |
319 | list_for_each_entry(trans, &u->transactions, list) | |
320 | if (trans->handle.id == u->u.msg.tx_id) | |
321 | break; | |
322 | BUG_ON(&trans->list == &u->transactions); | |
323 | list_del(&trans->list); | |
324 | ||
325 | kfree(trans); | |
326 | } | |
327 | ||
328 | mutex_lock(&u->reply_mutex); | |
e88a0faa IC |
329 | rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg)); |
330 | if (!rc) | |
331 | rc = queue_reply(&staging_q, reply, u->u.msg.len); | |
332 | if (!rc) { | |
1107ba88 AZ |
333 | list_splice_tail(&staging_q, &u->read_buffers); |
334 | wake_up(&u->read_waitq); | |
335 | } else { | |
336 | queue_cleanup(&staging_q); | |
1107ba88 AZ |
337 | } |
338 | mutex_unlock(&u->reply_mutex); | |
339 | ||
340 | kfree(reply); | |
341 | ||
342 | out: | |
343 | return rc; | |
344 | } | |
345 | ||
346 | static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) | |
347 | { | |
348 | struct watch_adapter *watch, *tmp_watch; | |
349 | char *path, *token; | |
350 | int err, rc; | |
351 | LIST_HEAD(staging_q); | |
352 | ||
353 | path = u->u.buffer + sizeof(u->u.msg); | |
354 | token = memchr(path, 0, u->u.msg.len); | |
355 | if (token == NULL) { | |
356 | rc = -EILSEQ; | |
357 | goto out; | |
358 | } | |
359 | token++; | |
360 | ||
361 | if (msg_type == XS_WATCH) { | |
362 | watch = alloc_watch_adapter(path, token); | |
363 | if (watch == NULL) { | |
364 | rc = -ENOMEM; | |
365 | goto out; | |
366 | } | |
367 | ||
368 | watch->watch.callback = watch_fired; | |
369 | watch->dev_data = u; | |
370 | ||
371 | err = register_xenbus_watch(&watch->watch); | |
372 | if (err) { | |
373 | free_watch_adapter(watch); | |
374 | rc = err; | |
375 | goto out; | |
376 | } | |
377 | list_add(&watch->list, &u->watches); | |
378 | } else { | |
379 | list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { | |
380 | if (!strcmp(watch->token, token) && | |
381 | !strcmp(watch->watch.node, path)) { | |
382 | unregister_xenbus_watch(&watch->watch); | |
383 | list_del(&watch->list); | |
384 | free_watch_adapter(watch); | |
385 | break; | |
386 | } | |
387 | } | |
388 | } | |
389 | ||
390 | /* Success. Synthesize a reply to say all is OK. */ | |
391 | { | |
392 | struct { | |
393 | struct xsd_sockmsg hdr; | |
394 | char body[3]; | |
395 | } __packed reply = { | |
396 | { | |
397 | .type = msg_type, | |
398 | .len = sizeof(reply.body) | |
399 | }, | |
400 | "OK" | |
401 | }; | |
402 | ||
403 | mutex_lock(&u->reply_mutex); | |
404 | rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); | |
405 | mutex_unlock(&u->reply_mutex); | |
406 | } | |
407 | ||
408 | out: | |
409 | return rc; | |
410 | } | |
411 | ||
412 | static ssize_t xenbus_file_write(struct file *filp, | |
413 | const char __user *ubuf, | |
414 | size_t len, loff_t *ppos) | |
415 | { | |
416 | struct xenbus_file_priv *u = filp->private_data; | |
417 | uint32_t msg_type; | |
418 | int rc = len; | |
419 | int ret; | |
420 | LIST_HEAD(staging_q); | |
421 | ||
422 | /* | |
423 | * We're expecting usermode to be writing properly formed | |
424 | * xenbus messages. If they write an incomplete message we | |
425 | * buffer it up. Once it is complete, we act on it. | |
426 | */ | |
427 | ||
428 | /* | |
429 | * Make sure concurrent writers can't stomp all over each | |
430 | * other's messages and make a mess of our partial message | |
431 | * buffer. We don't make any attemppt to stop multiple | |
432 | * writers from making a mess of each other's incomplete | |
433 | * messages; we're just trying to guarantee our own internal | |
434 | * consistency and make sure that single writes are handled | |
435 | * atomically. | |
436 | */ | |
437 | mutex_lock(&u->msgbuffer_mutex); | |
438 | ||
439 | /* Get this out of the way early to avoid confusion */ | |
440 | if (len == 0) | |
441 | goto out; | |
442 | ||
443 | /* Can't write a xenbus message larger we can buffer */ | |
444 | if ((len + u->len) > sizeof(u->u.buffer)) { | |
445 | /* On error, dump existing buffer */ | |
446 | u->len = 0; | |
447 | rc = -EINVAL; | |
448 | goto out; | |
449 | } | |
450 | ||
451 | ret = copy_from_user(u->u.buffer + u->len, ubuf, len); | |
452 | ||
453 | if (ret == len) { | |
454 | rc = -EFAULT; | |
455 | goto out; | |
456 | } | |
457 | ||
458 | /* Deal with a partial copy. */ | |
459 | len -= ret; | |
460 | rc = len; | |
461 | ||
462 | u->len += len; | |
463 | ||
464 | /* Return if we haven't got a full message yet */ | |
465 | if (u->len < sizeof(u->u.msg)) | |
466 | goto out; /* not even the header yet */ | |
467 | ||
468 | /* If we're expecting a message that's larger than we can | |
469 | possibly send, dump what we have and return an error. */ | |
470 | if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) { | |
471 | rc = -E2BIG; | |
472 | u->len = 0; | |
473 | goto out; | |
474 | } | |
475 | ||
476 | if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) | |
477 | goto out; /* incomplete data portion */ | |
478 | ||
479 | /* | |
480 | * OK, now we have a complete message. Do something with it. | |
481 | */ | |
482 | ||
483 | msg_type = u->u.msg.type; | |
484 | ||
485 | switch (msg_type) { | |
486 | case XS_TRANSACTION_START: | |
487 | case XS_TRANSACTION_END: | |
488 | case XS_DIRECTORY: | |
489 | case XS_READ: | |
490 | case XS_GET_PERMS: | |
491 | case XS_RELEASE: | |
492 | case XS_GET_DOMAIN_PATH: | |
493 | case XS_WRITE: | |
494 | case XS_MKDIR: | |
495 | case XS_RM: | |
496 | case XS_SET_PERMS: | |
497 | /* Send out a transaction */ | |
498 | ret = xenbus_write_transaction(msg_type, u); | |
499 | break; | |
500 | ||
501 | case XS_WATCH: | |
502 | case XS_UNWATCH: | |
503 | /* (Un)Ask for some path to be watched for changes */ | |
504 | ret = xenbus_write_watch(msg_type, u); | |
505 | break; | |
506 | ||
507 | default: | |
508 | ret = -EINVAL; | |
509 | break; | |
510 | } | |
511 | if (ret != 0) | |
512 | rc = ret; | |
513 | ||
514 | /* Buffered message consumed */ | |
515 | u->len = 0; | |
516 | ||
517 | out: | |
518 | mutex_unlock(&u->msgbuffer_mutex); | |
519 | return rc; | |
520 | } | |
521 | ||
522 | static int xenbus_file_open(struct inode *inode, struct file *filp) | |
523 | { | |
524 | struct xenbus_file_priv *u; | |
525 | ||
526 | if (xen_store_evtchn == 0) | |
527 | return -ENOENT; | |
528 | ||
529 | nonseekable_open(inode, filp); | |
530 | ||
531 | u = kzalloc(sizeof(*u), GFP_KERNEL); | |
532 | if (u == NULL) | |
533 | return -ENOMEM; | |
534 | ||
535 | INIT_LIST_HEAD(&u->transactions); | |
536 | INIT_LIST_HEAD(&u->watches); | |
537 | INIT_LIST_HEAD(&u->read_buffers); | |
538 | init_waitqueue_head(&u->read_waitq); | |
539 | ||
540 | mutex_init(&u->reply_mutex); | |
541 | mutex_init(&u->msgbuffer_mutex); | |
542 | ||
543 | filp->private_data = u; | |
544 | ||
545 | return 0; | |
546 | } | |
547 | ||
548 | static int xenbus_file_release(struct inode *inode, struct file *filp) | |
549 | { | |
550 | struct xenbus_file_priv *u = filp->private_data; | |
551 | struct xenbus_transaction_holder *trans, *tmp; | |
552 | struct watch_adapter *watch, *tmp_watch; | |
553 | ||
554 | /* | |
555 | * No need for locking here because there are no other users, | |
556 | * by definition. | |
557 | */ | |
558 | ||
559 | list_for_each_entry_safe(trans, tmp, &u->transactions, list) { | |
560 | xenbus_transaction_end(trans->handle, 1); | |
561 | list_del(&trans->list); | |
562 | kfree(trans); | |
563 | } | |
564 | ||
565 | list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { | |
566 | unregister_xenbus_watch(&watch->watch); | |
567 | list_del(&watch->list); | |
568 | free_watch_adapter(watch); | |
569 | } | |
570 | ||
571 | kfree(u); | |
572 | ||
573 | return 0; | |
574 | } | |
575 | ||
576 | static unsigned int xenbus_file_poll(struct file *file, poll_table *wait) | |
577 | { | |
578 | struct xenbus_file_priv *u = file->private_data; | |
579 | ||
580 | poll_wait(file, &u->read_waitq, wait); | |
581 | if (!list_empty(&u->read_buffers)) | |
582 | return POLLIN | POLLRDNORM; | |
583 | return 0; | |
584 | } | |
585 | ||
586 | const struct file_operations xenbus_file_ops = { | |
587 | .read = xenbus_file_read, | |
588 | .write = xenbus_file_write, | |
589 | .open = xenbus_file_open, | |
590 | .release = xenbus_file_release, | |
591 | .poll = xenbus_file_poll, | |
592 | }; |