Commit | Line | Data |
---|---|---|
481eaec3 MT |
1 | /* |
2 | * Copyright (C) 2016 Red Hat, Inc. | |
3 | * Author: Michael S. Tsirkin <mst@redhat.com> | |
4 | * This work is licensed under the terms of the GNU GPL, version 2. | |
5 | * | |
6 | * Simple descriptor-based ring. virtio 0.9 compatible event index is used for | |
7 | * signalling, unconditionally. | |
8 | */ | |
9 | #define _GNU_SOURCE | |
10 | #include "main.h" | |
11 | #include <stdlib.h> | |
12 | #include <stdio.h> | |
13 | #include <string.h> | |
14 | ||
15 | /* Next - Where next entry will be written. | |
16 | * Prev - "Next" value when event triggered previously. | |
17 | * Event - Peer requested event after writing this entry. | |
18 | */ | |
19 | static inline bool need_event(unsigned short event, | |
20 | unsigned short next, | |
21 | unsigned short prev) | |
22 | { | |
23 | return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); | |
24 | } | |
25 | ||
26 | /* Design: | |
27 | * Guest adds descriptors with unique index values and DESC_HW in flags. | |
28 | * Host overwrites used descriptors with correct len, index, and DESC_HW clear. | |
29 | * Flags are always set last. | |
30 | */ | |
31 | #define DESC_HW 0x1 | |
32 | ||
33 | struct desc { | |
34 | unsigned short flags; | |
35 | unsigned short index; | |
36 | unsigned len; | |
37 | unsigned long long addr; | |
38 | }; | |
39 | ||
40 | /* how much padding is needed to avoid false cache sharing */ | |
41 | #define HOST_GUEST_PADDING 0x80 | |
42 | ||
43 | /* Mostly read */ | |
44 | struct event { | |
45 | unsigned short kick_index; | |
46 | unsigned char reserved0[HOST_GUEST_PADDING - 2]; | |
47 | unsigned short call_index; | |
48 | unsigned char reserved1[HOST_GUEST_PADDING - 2]; | |
49 | }; | |
50 | ||
51 | struct data { | |
52 | void *buf; /* descriptor is writeable, we can't get buf from there */ | |
53 | void *data; | |
54 | } *data; | |
55 | ||
56 | struct desc *ring; | |
57 | struct event *event; | |
58 | ||
59 | struct guest { | |
60 | unsigned avail_idx; | |
61 | unsigned last_used_idx; | |
62 | unsigned num_free; | |
63 | unsigned kicked_avail_idx; | |
64 | unsigned char reserved[HOST_GUEST_PADDING - 12]; | |
65 | } guest; | |
66 | ||
67 | struct host { | |
68 | /* we do not need to track last avail index | |
69 | * unless we have more than one in flight. | |
70 | */ | |
71 | unsigned used_idx; | |
72 | unsigned called_used_idx; | |
73 | unsigned char reserved[HOST_GUEST_PADDING - 4]; | |
74 | } host; | |
75 | ||
76 | /* implemented by ring */ | |
77 | void alloc_ring(void) | |
78 | { | |
79 | int ret; | |
80 | int i; | |
81 | ||
82 | ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); | |
83 | if (ret) { | |
84 | perror("Unable to allocate ring buffer.\n"); | |
85 | exit(3); | |
86 | } | |
87 | event = malloc(sizeof *event); | |
88 | if (!event) { | |
89 | perror("Unable to allocate event buffer.\n"); | |
90 | exit(3); | |
91 | } | |
92 | memset(event, 0, sizeof *event); | |
93 | guest.avail_idx = 0; | |
94 | guest.kicked_avail_idx = -1; | |
95 | guest.last_used_idx = 0; | |
96 | host.used_idx = 0; | |
97 | host.called_used_idx = -1; | |
98 | for (i = 0; i < ring_size; ++i) { | |
99 | struct desc desc = { | |
100 | .index = i, | |
101 | }; | |
102 | ring[i] = desc; | |
103 | } | |
104 | guest.num_free = ring_size; | |
105 | data = malloc(ring_size * sizeof *data); | |
106 | if (!data) { | |
107 | perror("Unable to allocate data buffer.\n"); | |
108 | exit(3); | |
109 | } | |
110 | memset(data, 0, ring_size * sizeof *data); | |
111 | } | |
112 | ||
113 | /* guest side */ | |
114 | int add_inbuf(unsigned len, void *buf, void *datap) | |
115 | { | |
116 | unsigned head, index; | |
117 | ||
118 | if (!guest.num_free) | |
119 | return -1; | |
120 | ||
121 | guest.num_free--; | |
122 | head = (ring_size - 1) & (guest.avail_idx++); | |
123 | ||
124 | /* Start with a write. On MESI architectures this helps | |
125 | * avoid a shared state with consumer that is polling this descriptor. | |
126 | */ | |
127 | ring[head].addr = (unsigned long)(void*)buf; | |
128 | ring[head].len = len; | |
129 | /* read below might bypass write above. That is OK because it's just an | |
130 | * optimization. If this happens, we will get the cache line in a | |
131 | * shared state which is unfortunate, but probably not worth it to | |
132 | * add an explicit full barrier to avoid this. | |
133 | */ | |
134 | barrier(); | |
135 | index = ring[head].index; | |
136 | data[index].buf = buf; | |
137 | data[index].data = datap; | |
138 | /* Barrier A (for pairing) */ | |
139 | smp_release(); | |
140 | ring[head].flags = DESC_HW; | |
141 | ||
142 | return 0; | |
143 | } | |
144 | ||
145 | void *get_buf(unsigned *lenp, void **bufp) | |
146 | { | |
147 | unsigned head = (ring_size - 1) & guest.last_used_idx; | |
148 | unsigned index; | |
149 | void *datap; | |
150 | ||
151 | if (ring[head].flags & DESC_HW) | |
152 | return NULL; | |
153 | /* Barrier B (for pairing) */ | |
154 | smp_acquire(); | |
155 | *lenp = ring[head].len; | |
156 | index = ring[head].index & (ring_size - 1); | |
157 | datap = data[index].data; | |
158 | *bufp = data[index].buf; | |
159 | data[index].buf = NULL; | |
160 | data[index].data = NULL; | |
161 | guest.num_free++; | |
162 | guest.last_used_idx++; | |
163 | return datap; | |
164 | } | |
165 | ||
166 | void poll_used(void) | |
167 | { | |
168 | unsigned head = (ring_size - 1) & guest.last_used_idx; | |
169 | ||
170 | while (ring[head].flags & DESC_HW) | |
171 | busy_wait(); | |
172 | } | |
173 | ||
174 | void disable_call() | |
175 | { | |
176 | /* Doing nothing to disable calls might cause | |
177 | * extra interrupts, but reduces the number of cache misses. | |
178 | */ | |
179 | } | |
180 | ||
181 | bool enable_call() | |
182 | { | |
183 | unsigned head = (ring_size - 1) & guest.last_used_idx; | |
184 | ||
185 | event->call_index = guest.last_used_idx; | |
186 | /* Flush call index write */ | |
187 | /* Barrier D (for pairing) */ | |
188 | smp_mb(); | |
189 | return ring[head].flags & DESC_HW; | |
190 | } | |
191 | ||
192 | void kick_available(void) | |
193 | { | |
194 | /* Flush in previous flags write */ | |
195 | /* Barrier C (for pairing) */ | |
196 | smp_mb(); | |
197 | if (!need_event(event->kick_index, | |
198 | guest.avail_idx, | |
199 | guest.kicked_avail_idx)) | |
200 | return; | |
201 | ||
202 | guest.kicked_avail_idx = guest.avail_idx; | |
203 | kick(); | |
204 | } | |
205 | ||
206 | /* host side */ | |
207 | void disable_kick() | |
208 | { | |
209 | /* Doing nothing to disable kicks might cause | |
210 | * extra interrupts, but reduces the number of cache misses. | |
211 | */ | |
212 | } | |
213 | ||
214 | bool enable_kick() | |
215 | { | |
216 | unsigned head = (ring_size - 1) & host.used_idx; | |
217 | ||
218 | event->kick_index = host.used_idx; | |
219 | /* Barrier C (for pairing) */ | |
220 | smp_mb(); | |
221 | return !(ring[head].flags & DESC_HW); | |
222 | } | |
223 | ||
224 | void poll_avail(void) | |
225 | { | |
226 | unsigned head = (ring_size - 1) & host.used_idx; | |
227 | ||
228 | while (!(ring[head].flags & DESC_HW)) | |
229 | busy_wait(); | |
230 | } | |
231 | ||
232 | bool use_buf(unsigned *lenp, void **bufp) | |
233 | { | |
234 | unsigned head = (ring_size - 1) & host.used_idx; | |
235 | ||
236 | if (!(ring[head].flags & DESC_HW)) | |
237 | return false; | |
238 | ||
239 | /* make sure length read below is not speculated */ | |
240 | /* Barrier A (for pairing) */ | |
241 | smp_acquire(); | |
242 | ||
243 | /* simple in-order completion: we don't need | |
244 | * to touch index at all. This also means we | |
245 | * can just modify the descriptor in-place. | |
246 | */ | |
247 | ring[head].len--; | |
248 | /* Make sure len is valid before flags. | |
249 | * Note: alternative is to write len and flags in one access - | |
250 | * possible on 64 bit architectures but wmb is free on Intel anyway | |
251 | * so I have no way to test whether it's a gain. | |
252 | */ | |
253 | /* Barrier B (for pairing) */ | |
254 | smp_release(); | |
255 | ring[head].flags = 0; | |
256 | host.used_idx++; | |
257 | return true; | |
258 | } | |
259 | ||
260 | void call_used(void) | |
261 | { | |
262 | /* Flush in previous flags write */ | |
263 | /* Barrier D (for pairing) */ | |
264 | smp_mb(); | |
265 | if (!need_event(event->call_index, | |
266 | host.used_idx, | |
267 | host.called_used_idx)) | |
268 | return; | |
269 | ||
270 | host.called_used_idx = host.used_idx; | |
271 | call(); | |
272 | } |