Commit | Line | Data |
---|---|---|
19f1537b | 1 | /*P:050 Lguest guests use a very simple method to describe devices. It's a |
a6bd8e13 | 2 | * series of device descriptors contained just above the top of normal Guest |
19f1537b RR |
3 | * memory. |
4 | * | |
5 | * We use the standard "virtio" device infrastructure, which provides us with a | |
6 | * console, a network and a block driver. Each one expects some configuration | |
a6bd8e13 | 7 | * information and a "virtqueue" or two to send and receive data. :*/ |
19f1537b RR |
8 | #include <linux/init.h> |
9 | #include <linux/bootmem.h> | |
10 | #include <linux/lguest_launcher.h> | |
11 | #include <linux/virtio.h> | |
12 | #include <linux/virtio_config.h> | |
13 | #include <linux/interrupt.h> | |
14 | #include <linux/virtio_ring.h> | |
15 | #include <linux/err.h> | |
16 | #include <asm/io.h> | |
17 | #include <asm/paravirt.h> | |
18 | #include <asm/lguest_hcall.h> | |
19 | ||
20 | /* The pointer to our (page) of device descriptions. */ | |
21 | static void *lguest_devices; | |
22 | ||
19f1537b RR |
23 | /* For Guests, device memory can be used as normal memory, so we cast away the |
24 | * __iomem to quieten sparse. */ | |
25 | static inline void *lguest_map(unsigned long phys_addr, unsigned long pages) | |
26 | { | |
e27810f1 | 27 | return (__force void *)ioremap_cache(phys_addr, PAGE_SIZE*pages); |
19f1537b RR |
28 | } |
29 | ||
30 | static inline void lguest_unmap(void *addr) | |
31 | { | |
32 | iounmap((__force void __iomem *)addr); | |
33 | } | |
34 | ||
35 | /*D:100 Each lguest device is just a virtio device plus a pointer to its entry | |
36 | * in the lguest_devices page. */ | |
37 | struct lguest_device { | |
38 | struct virtio_device vdev; | |
39 | ||
40 | /* The entry in the lguest_devices page for this device. */ | |
41 | struct lguest_device_desc *desc; | |
42 | }; | |
43 | ||
44 | /* Since the virtio infrastructure hands us a pointer to the virtio_device all | |
45 | * the time, it helps to have a curt macro to get a pointer to the struct | |
46 | * lguest_device it's enclosed in. */ | |
25478445 | 47 | #define to_lgdev(vd) container_of(vd, struct lguest_device, vdev) |
19f1537b RR |
48 | |
49 | /*D:130 | |
50 | * Device configurations | |
51 | * | |
a586d4f6 | 52 | * The configuration information for a device consists of one or more |
a6bd8e13 | 53 | * virtqueues, a feature bitmap, and some configuration bytes. The |
6e5aa7ef | 54 | * configuration bytes don't really matter to us: the Launcher sets them up, and |
a586d4f6 | 55 | * the driver will look at them during setup. |
19f1537b | 56 | * |
a586d4f6 RR |
57 | * A convenient routine to return the device's virtqueue config array: |
58 | * immediately after the descriptor. */ | |
59 | static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc) | |
60 | { | |
61 | return (void *)(desc + 1); | |
62 | } | |
19f1537b | 63 | |
a586d4f6 RR |
64 | /* The features come immediately after the virtqueues. */ |
65 | static u8 *lg_features(const struct lguest_device_desc *desc) | |
66 | { | |
67 | return (void *)(lg_vq(desc) + desc->num_vq); | |
68 | } | |
19f1537b | 69 | |
a586d4f6 RR |
70 | /* The config space comes after the two feature bitmasks. */ |
71 | static u8 *lg_config(const struct lguest_device_desc *desc) | |
19f1537b | 72 | { |
a586d4f6 RR |
73 | return lg_features(desc) + desc->feature_len * 2; |
74 | } | |
19f1537b | 75 | |
a586d4f6 RR |
76 | /* The total size of the config page used by this device (incl. desc) */ |
77 | static unsigned desc_size(const struct lguest_device_desc *desc) | |
78 | { | |
79 | return sizeof(*desc) | |
80 | + desc->num_vq * sizeof(struct lguest_vqconfig) | |
81 | + desc->feature_len * 2 | |
82 | + desc->config_len; | |
83 | } | |
84 | ||
c45a6816 RR |
85 | /* This gets the device's feature bits. */ |
86 | static u32 lg_get_features(struct virtio_device *vdev) | |
a586d4f6 | 87 | { |
c45a6816 RR |
88 | unsigned int i; |
89 | u32 features = 0; | |
a586d4f6 | 90 | struct lguest_device_desc *desc = to_lgdev(vdev)->desc; |
c45a6816 RR |
91 | u8 *in_features = lg_features(desc); |
92 | ||
93 | /* We do this the slow but generic way. */ | |
94 | for (i = 0; i < min(desc->feature_len * 8, 32); i++) | |
95 | if (in_features[i / 8] & (1 << (i % 8))) | |
96 | features |= (1 << i); | |
97 | ||
98 | return features; | |
99 | } | |
100 | ||
1dc3e3bc RR |
101 | /* The virtio core takes the features the Host offers, and copies the |
102 | * ones supported by the driver into the vdev->features array. Once | |
103 | * that's all sorted out, this routine is called so we can tell the | |
104 | * Host which features we understand and accept. */ | |
c624896e | 105 | static void lg_finalize_features(struct virtio_device *vdev) |
c45a6816 | 106 | { |
c624896e | 107 | unsigned int i, bits; |
c45a6816 RR |
108 | struct lguest_device_desc *desc = to_lgdev(vdev)->desc; |
109 | /* Second half of bitmap is features we accept. */ | |
110 | u8 *out_features = lg_features(desc) + desc->feature_len; | |
111 | ||
e34f8725 RR |
112 | /* Give virtio_ring a chance to accept features. */ |
113 | vring_transport_features(vdev); | |
114 | ||
1dc3e3bc RR |
115 | /* The vdev->feature array is a Linux bitmask: this isn't the |
116 | * same as a the simple array of bits used by lguest devices | |
117 | * for features. So we do this slow, manual conversion which is | |
118 | * completely general. */ | |
c45a6816 | 119 | memset(out_features, 0, desc->feature_len); |
c624896e RR |
120 | bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; |
121 | for (i = 0; i < bits; i++) { | |
122 | if (test_bit(i, vdev->features)) | |
c45a6816 RR |
123 | out_features[i / 8] |= (1 << (i % 8)); |
124 | } | |
19f1537b RR |
125 | } |
126 | ||
127 | /* Once they've found a field, getting a copy of it is easy. */ | |
a586d4f6 | 128 | static void lg_get(struct virtio_device *vdev, unsigned int offset, |
19f1537b RR |
129 | void *buf, unsigned len) |
130 | { | |
a586d4f6 RR |
131 | struct lguest_device_desc *desc = to_lgdev(vdev)->desc; |
132 | ||
133 | /* Check they didn't ask for more than the length of the config! */ | |
134 | BUG_ON(offset + len > desc->config_len); | |
135 | memcpy(buf, lg_config(desc) + offset, len); | |
19f1537b RR |
136 | } |
137 | ||
138 | /* Setting the contents is also trivial. */ | |
a586d4f6 | 139 | static void lg_set(struct virtio_device *vdev, unsigned int offset, |
19f1537b RR |
140 | const void *buf, unsigned len) |
141 | { | |
a586d4f6 RR |
142 | struct lguest_device_desc *desc = to_lgdev(vdev)->desc; |
143 | ||
144 | /* Check they didn't ask for more than the length of the config! */ | |
145 | BUG_ON(offset + len > desc->config_len); | |
146 | memcpy(lg_config(desc) + offset, buf, len); | |
19f1537b RR |
147 | } |
148 | ||
149 | /* The operations to get and set the status word just access the status field | |
150 | * of the device descriptor. */ | |
151 | static u8 lg_get_status(struct virtio_device *vdev) | |
152 | { | |
153 | return to_lgdev(vdev)->desc->status; | |
154 | } | |
155 | ||
a007a751 RR |
156 | /* To notify on status updates, we (ab)use the NOTIFY hypercall, with the |
157 | * descriptor address of the device. A zero status means "reset". */ | |
158 | static void set_status(struct virtio_device *vdev, u8 status) | |
159 | { | |
160 | unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices; | |
161 | ||
162 | /* We set the status. */ | |
163 | to_lgdev(vdev)->desc->status = status; | |
164 | hcall(LHCALL_NOTIFY, (max_pfn<<PAGE_SHIFT) + offset, 0, 0); | |
165 | } | |
166 | ||
19f1537b RR |
167 | static void lg_set_status(struct virtio_device *vdev, u8 status) |
168 | { | |
6e5aa7ef | 169 | BUG_ON(!status); |
a007a751 | 170 | set_status(vdev, status); |
19f1537b RR |
171 | } |
172 | ||
6e5aa7ef RR |
173 | static void lg_reset(struct virtio_device *vdev) |
174 | { | |
a007a751 | 175 | set_status(vdev, 0); |
6e5aa7ef RR |
176 | } |
177 | ||
19f1537b RR |
178 | /* |
179 | * Virtqueues | |
180 | * | |
181 | * The other piece of infrastructure virtio needs is a "virtqueue": a way of | |
182 | * the Guest device registering buffers for the other side to read from or | |
183 | * write into (ie. send and receive buffers). Each device can have multiple | |
e1e72965 RR |
184 | * virtqueues: for example the console driver uses one queue for sending and |
185 | * another for receiving. | |
19f1537b RR |
186 | * |
187 | * Fortunately for us, a very fast shared-memory-plus-descriptors virtqueue | |
188 | * already exists in virtio_ring.c. We just need to connect it up. | |
189 | * | |
190 | * We start with the information we need to keep about each virtqueue. | |
191 | */ | |
192 | ||
193 | /*D:140 This is the information we remember about each virtqueue. */ | |
194 | struct lguest_vq_info | |
195 | { | |
196 | /* A copy of the information contained in the device config. */ | |
197 | struct lguest_vqconfig config; | |
198 | ||
199 | /* The address where we mapped the virtio ring, so we can unmap it. */ | |
200 | void *pages; | |
201 | }; | |
202 | ||
203 | /* When the virtio_ring code wants to prod the Host, it calls us here and we | |
a6bd8e13 | 204 | * make a hypercall. We hand the physical address of the virtqueue so the Host |
19f1537b RR |
205 | * knows which virtqueue we're talking about. */ |
206 | static void lg_notify(struct virtqueue *vq) | |
207 | { | |
208 | /* We store our virtqueue information in the "priv" pointer of the | |
209 | * virtqueue structure. */ | |
210 | struct lguest_vq_info *lvq = vq->priv; | |
211 | ||
212 | hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0); | |
213 | } | |
214 | ||
6db6a5f3 RR |
215 | /* An extern declaration inside a C file is bad form. Don't do it. */ |
216 | extern void lguest_setup_irq(unsigned int irq); | |
217 | ||
19f1537b RR |
218 | /* This routine finds the first virtqueue described in the configuration of |
219 | * this device and sets it up. | |
220 | * | |
221 | * This is kind of an ugly duckling. It'd be nicer to have a standard | |
222 | * representation of a virtqueue in the configuration space, but it seems that | |
e1e72965 | 223 | * everyone wants to do it differently. The KVM coders want the Guest to |
19f1537b RR |
224 | * allocate its own pages and tell the Host where they are, but for lguest it's |
225 | * simpler for the Host to simply tell us where the pages are. | |
226 | * | |
a6bd8e13 RR |
227 | * So we provide drivers with a "find the Nth virtqueue and set it up" |
228 | * function. */ | |
19f1537b | 229 | static struct virtqueue *lg_find_vq(struct virtio_device *vdev, |
a586d4f6 | 230 | unsigned index, |
18445c4d | 231 | void (*callback)(struct virtqueue *vq)) |
19f1537b | 232 | { |
a586d4f6 | 233 | struct lguest_device *ldev = to_lgdev(vdev); |
19f1537b RR |
234 | struct lguest_vq_info *lvq; |
235 | struct virtqueue *vq; | |
19f1537b RR |
236 | int err; |
237 | ||
a586d4f6 RR |
238 | /* We must have this many virtqueues. */ |
239 | if (index >= ldev->desc->num_vq) | |
19f1537b RR |
240 | return ERR_PTR(-ENOENT); |
241 | ||
242 | lvq = kmalloc(sizeof(*lvq), GFP_KERNEL); | |
243 | if (!lvq) | |
244 | return ERR_PTR(-ENOMEM); | |
245 | ||
a586d4f6 RR |
246 | /* Make a copy of the "struct lguest_vqconfig" entry, which sits after |
247 | * the descriptor. We need a copy because the config space might not | |
248 | * be aligned correctly. */ | |
249 | memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config)); | |
19f1537b | 250 | |
a586d4f6 RR |
251 | printk("Mapping virtqueue %i addr %lx\n", index, |
252 | (unsigned long)lvq->config.pfn << PAGE_SHIFT); | |
19f1537b RR |
253 | /* Figure out how many pages the ring will take, and map that memory */ |
254 | lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, | |
42b36cc0 | 255 | DIV_ROUND_UP(vring_size(lvq->config.num, |
2966af73 | 256 | LGUEST_VRING_ALIGN), |
19f1537b RR |
257 | PAGE_SIZE)); |
258 | if (!lvq->pages) { | |
259 | err = -ENOMEM; | |
260 | goto free_lvq; | |
261 | } | |
262 | ||
263 | /* OK, tell virtio_ring.c to set up a virtqueue now we know its size | |
264 | * and we've got a pointer to its pages. */ | |
87c7d57c RR |
265 | vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, |
266 | vdev, lvq->pages, lg_notify, callback); | |
19f1537b RR |
267 | if (!vq) { |
268 | err = -ENOMEM; | |
269 | goto unmap; | |
270 | } | |
271 | ||
6db6a5f3 RR |
272 | /* Make sure the interrupt is allocated. */ |
273 | lguest_setup_irq(lvq->config.irq); | |
274 | ||
19f1537b RR |
275 | /* Tell the interrupt for this virtqueue to go to the virtio_ring |
276 | * interrupt handler. */ | |
277 | /* FIXME: We used to have a flag for the Host to tell us we could use | |
278 | * the interrupt as a source of randomness: it'd be nice to have that | |
279 | * back.. */ | |
280 | err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED, | |
bda53cd5 | 281 | dev_name(&vdev->dev), vq); |
19f1537b RR |
282 | if (err) |
283 | goto destroy_vring; | |
284 | ||
285 | /* Last of all we hook up our 'struct lguest_vq_info" to the | |
286 | * virtqueue's priv pointer. */ | |
287 | vq->priv = lvq; | |
288 | return vq; | |
289 | ||
290 | destroy_vring: | |
291 | vring_del_virtqueue(vq); | |
292 | unmap: | |
293 | lguest_unmap(lvq->pages); | |
294 | free_lvq: | |
295 | kfree(lvq); | |
296 | return ERR_PTR(err); | |
297 | } | |
298 | /*:*/ | |
299 | ||
300 | /* Cleaning up a virtqueue is easy */ | |
301 | static void lg_del_vq(struct virtqueue *vq) | |
302 | { | |
303 | struct lguest_vq_info *lvq = vq->priv; | |
304 | ||
74b2553f RR |
305 | /* Release the interrupt */ |
306 | free_irq(lvq->config.irq, vq); | |
19f1537b RR |
307 | /* Tell virtio_ring.c to free the virtqueue. */ |
308 | vring_del_virtqueue(vq); | |
309 | /* Unmap the pages containing the ring. */ | |
310 | lguest_unmap(lvq->pages); | |
311 | /* Free our own queue information. */ | |
312 | kfree(lvq); | |
313 | } | |
314 | ||
315 | /* The ops structure which hooks everything together. */ | |
316 | static struct virtio_config_ops lguest_config_ops = { | |
c45a6816 | 317 | .get_features = lg_get_features, |
c624896e | 318 | .finalize_features = lg_finalize_features, |
19f1537b RR |
319 | .get = lg_get, |
320 | .set = lg_set, | |
321 | .get_status = lg_get_status, | |
322 | .set_status = lg_set_status, | |
6e5aa7ef | 323 | .reset = lg_reset, |
19f1537b RR |
324 | .find_vq = lg_find_vq, |
325 | .del_vq = lg_del_vq, | |
326 | }; | |
327 | ||
328 | /* The root device for the lguest virtio devices. This makes them appear as | |
329 | * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. */ | |
ff8561c4 | 330 | static struct device *lguest_root; |
19f1537b RR |
331 | |
332 | /*D:120 This is the core of the lguest bus: actually adding a new device. | |
333 | * It's a separate function because it's neater that way, and because an | |
334 | * earlier version of the code supported hotplug and unplug. They were removed | |
335 | * early on because they were never used. | |
336 | * | |
337 | * As Andrew Tridgell says, "Untested code is buggy code". | |
338 | * | |
339 | * It's worth reading this carefully: we start with a pointer to the new device | |
b769f579 RR |
340 | * descriptor in the "lguest_devices" page, and the offset into the device |
341 | * descriptor page so we can uniquely identify it if things go badly wrong. */ | |
342 | static void add_lguest_device(struct lguest_device_desc *d, | |
343 | unsigned int offset) | |
19f1537b RR |
344 | { |
345 | struct lguest_device *ldev; | |
346 | ||
e1e72965 RR |
347 | /* Start with zeroed memory; Linux's device layer seems to count on |
348 | * it. */ | |
19f1537b RR |
349 | ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); |
350 | if (!ldev) { | |
b769f579 RR |
351 | printk(KERN_EMERG "Cannot allocate lguest dev %u type %u\n", |
352 | offset, d->type); | |
19f1537b RR |
353 | return; |
354 | } | |
355 | ||
356 | /* This devices' parent is the lguest/ dir. */ | |
ff8561c4 | 357 | ldev->vdev.dev.parent = lguest_root; |
19f1537b | 358 | /* We have a unique device index thanks to the dev_index counter. */ |
19f1537b RR |
359 | ldev->vdev.id.device = d->type; |
360 | /* We have a simple set of routines for querying the device's | |
361 | * configuration information and setting its status. */ | |
362 | ldev->vdev.config = &lguest_config_ops; | |
363 | /* And we remember the device's descriptor for lguest_config_ops. */ | |
364 | ldev->desc = d; | |
365 | ||
366 | /* register_virtio_device() sets up the generic fields for the struct | |
367 | * virtio_device and calls device_register(). This makes the bus | |
368 | * infrastructure look for a matching driver. */ | |
369 | if (register_virtio_device(&ldev->vdev) != 0) { | |
b769f579 RR |
370 | printk(KERN_ERR "Failed to register lguest dev %u type %u\n", |
371 | offset, d->type); | |
19f1537b RR |
372 | kfree(ldev); |
373 | } | |
374 | } | |
375 | ||
376 | /*D:110 scan_devices() simply iterates through the device page. The type 0 is | |
377 | * reserved to mean "end of devices". */ | |
378 | static void scan_devices(void) | |
379 | { | |
380 | unsigned int i; | |
381 | struct lguest_device_desc *d; | |
382 | ||
383 | /* We start at the page beginning, and skip over each entry. */ | |
a586d4f6 | 384 | for (i = 0; i < PAGE_SIZE; i += desc_size(d)) { |
19f1537b RR |
385 | d = lguest_devices + i; |
386 | ||
387 | /* Once we hit a zero, stop. */ | |
388 | if (d->type == 0) | |
389 | break; | |
390 | ||
a586d4f6 | 391 | printk("Device at %i has size %u\n", i, desc_size(d)); |
b769f579 | 392 | add_lguest_device(d, i); |
19f1537b RR |
393 | } |
394 | } | |
395 | ||
396 | /*D:105 Fairly early in boot, lguest_devices_init() is called to set up the | |
397 | * lguest device infrastructure. We check that we are a Guest by checking | |
398 | * pv_info.name: there are other ways of checking, but this seems most | |
399 | * obvious to me. | |
400 | * | |
401 | * So we can access the "struct lguest_device_desc"s easily, we map that memory | |
402 | * and store the pointer in the global "lguest_devices". Then we register a | |
403 | * root device from which all our devices will hang (this seems to be the | |
404 | * correct sysfs incantation). | |
405 | * | |
406 | * Finally we call scan_devices() which adds all the devices found in the | |
407 | * lguest_devices page. */ | |
408 | static int __init lguest_devices_init(void) | |
409 | { | |
410 | if (strcmp(pv_info.name, "lguest") != 0) | |
411 | return 0; | |
412 | ||
ff8561c4 MM |
413 | lguest_root = root_device_register("lguest"); |
414 | if (IS_ERR(lguest_root)) | |
19f1537b RR |
415 | panic("Could not register lguest root"); |
416 | ||
417 | /* Devices are in a single page above top of "normal" mem */ | |
418 | lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1); | |
419 | ||
420 | scan_devices(); | |
421 | return 0; | |
422 | } | |
423 | /* We do this after core stuff, but before the drivers. */ | |
424 | postcore_initcall(lguest_devices_init); | |
425 | ||
426 | /*D:150 At this point in the journey we used to now wade through the lguest | |
427 | * devices themselves: net, block and console. Since they're all now virtio | |
428 | * devices rather than lguest-specific, I've decided to ignore them. Mostly, | |
429 | * they're kind of boring. But this does mean you'll never experience the | |
430 | * thrill of reading the forbidden love scene buried deep in the block driver. | |
431 | * | |
432 | * "make Launcher" beckons, where we answer questions like "Where do Guests | |
433 | * come from?", and "What do you do when someone asks for optimization?". */ |