Drivers: hv: vmbus: Use the new virt_xx barrier code
[deliverable/linux.git] / drivers / hv / ring_buffer.c
CommitLineData
3e7ee490
HJ
1/*
2 *
3 * Copyright (c) 2009, Microsoft Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 * Authors:
19 * Haiyang Zhang <haiyangz@microsoft.com>
20 * Hank Janssen <hjanssen@microsoft.com>
b2a5a585 21 * K. Y. Srinivasan <kys@microsoft.com>
3e7ee490
HJ
22 *
23 */
0a46618d 24#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3e7ee490 25
a0086dc5
GKH
26#include <linux/kernel.h>
27#include <linux/mm.h>
46a97191 28#include <linux/hyperv.h>
011a7c3c 29#include <linux/uio.h>
3f335ea2 30
0f2a6619 31#include "hyperv_vmbus.h"
3e7ee490 32
6fdf3b21
S
33void hv_begin_read(struct hv_ring_buffer_info *rbi)
34{
35 rbi->ring_buffer->interrupt_mask = 1;
dcd0eeca 36 virt_mb();
6fdf3b21
S
37}
38
39u32 hv_end_read(struct hv_ring_buffer_info *rbi)
40{
6fdf3b21
S
41
42 rbi->ring_buffer->interrupt_mask = 0;
dcd0eeca 43 virt_mb();
6fdf3b21
S
44
45 /*
46 * Now check to see if the ring buffer is still empty.
47 * If it is not, we raced and we need to process new
48 * incoming messages.
49 */
a6341f00 50 return hv_get_bytes_to_read(rbi);
6fdf3b21
S
51}
52
98fa8cf4
S
53/*
54 * When we write to the ring buffer, check if the host needs to
55 * be signaled. Here is the details of this protocol:
56 *
57 * 1. The host guarantees that while it is draining the
58 * ring buffer, it will set the interrupt_mask to
59 * indicate it does not need to be interrupted when
60 * new data is placed.
61 *
62 * 2. The host guarantees that it will completely drain
63 * the ring buffer before exiting the read loop. Further,
64 * once the ring buffer is empty, it will clear the
65 * interrupt_mask and re-check to see if new data has
66 * arrived.
67 */
68
69static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi)
70{
dcd0eeca 71 virt_mb();
d45faaee 72 if (READ_ONCE(rbi->ring_buffer->interrupt_mask))
98fa8cf4
S
73 return false;
74
e91e84fa 75 /* check interrupt_mask before read_index */
dcd0eeca 76 virt_rmb();
98fa8cf4
S
77 /*
78 * This is the only case we need to signal when the
79 * ring transitions from being empty to non-empty.
80 */
d45faaee 81 if (old_write == READ_ONCE(rbi->ring_buffer->read_index))
98fa8cf4
S
82 return true;
83
84 return false;
85}
86
c2b8e520
S
87/*
88 * To optimize the flow management on the send-side,
89 * when the sender is blocked because of lack of
90 * sufficient space in the ring buffer, potential the
91 * consumer of the ring buffer can signal the producer.
92 * This is controlled by the following parameters:
93 *
94 * 1. pending_send_sz: This is the size in bytes that the
95 * producer is trying to send.
96 * 2. The feature bit feat_pending_send_sz set to indicate if
97 * the consumer of the ring will signal when the ring
98 * state transitions from being full to a state where
99 * there is room for the producer to send the pending packet.
100 */
101
a389fcfd 102static bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi)
c2b8e520 103{
c2b8e520 104 u32 cur_write_sz;
a389fcfd 105 u32 pending_sz;
c2b8e520 106
a389fcfd
S
107 /*
108 * Issue a full memory barrier before making the signaling decision.
109 * Here is the reason for having this barrier:
110 * If the reading of the pend_sz (in this function)
111 * were to be reordered and read before we commit the new read
112 * index (in the calling function) we could
113 * have a problem. If the host were to set the pending_sz after we
114 * have sampled pending_sz and go to sleep before we commit the
115 * read index, we could miss sending the interrupt. Issue a full
116 * memory barrier to address this.
117 */
dcd0eeca 118 virt_mb();
a389fcfd 119
d45faaee 120 pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
822f18d4 121 /* If the other end is not blocked on write don't bother. */
c2b8e520
S
122 if (pending_sz == 0)
123 return false;
124
a6341f00 125 cur_write_sz = hv_get_bytes_to_write(rbi);
c2b8e520 126
a389fcfd 127 if (cur_write_sz >= pending_sz)
c2b8e520
S
128 return true;
129
130 return false;
131}
3e7ee490 132
822f18d4 133/* Get the next write location for the specified ring buffer. */
4d643114 134static inline u32
2b8a912e 135hv_get_next_write_location(struct hv_ring_buffer_info *ring_info)
3e7ee490 136{
fc8c72eb 137 u32 next = ring_info->ring_buffer->write_index;
3e7ee490 138
3e7ee490
HJ
139 return next;
140}
141
822f18d4 142/* Set the next write location for the specified ring buffer. */
3e7ee490 143static inline void
2b8a912e 144hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
fc8c72eb 145 u32 next_write_location)
3e7ee490 146{
fc8c72eb 147 ring_info->ring_buffer->write_index = next_write_location;
3e7ee490
HJ
148}
149
822f18d4 150/* Get the next read location for the specified ring buffer. */
4d643114 151static inline u32
2b8a912e 152hv_get_next_read_location(struct hv_ring_buffer_info *ring_info)
3e7ee490 153{
fc8c72eb 154 u32 next = ring_info->ring_buffer->read_index;
3e7ee490 155
3e7ee490
HJ
156 return next;
157}
158
b2a5a585 159/*
b2a5a585 160 * Get the next read location + offset for the specified ring buffer.
822f18d4 161 * This allows the caller to skip.
b2a5a585 162 */
4d643114 163static inline u32
2b8a912e 164hv_get_next_readlocation_withoffset(struct hv_ring_buffer_info *ring_info,
1ac58644 165 u32 offset)
3e7ee490 166{
fc8c72eb 167 u32 next = ring_info->ring_buffer->read_index;
3e7ee490 168
fc8c72eb
HZ
169 next += offset;
170 next %= ring_info->ring_datasize;
3e7ee490
HJ
171
172 return next;
173}
174
822f18d4 175/* Set the next read location for the specified ring buffer. */
3e7ee490 176static inline void
2b8a912e 177hv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
fc8c72eb 178 u32 next_read_location)
3e7ee490 179{
fc8c72eb 180 ring_info->ring_buffer->read_index = next_read_location;
3e7ee490
HJ
181}
182
183
822f18d4 184/* Get the start of the ring buffer. */
8282c400 185static inline void *
2b8a912e 186hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info)
3e7ee490 187{
fc8c72eb 188 return (void *)ring_info->ring_buffer->buffer;
3e7ee490
HJ
189}
190
191
822f18d4 192/* Get the size of the ring buffer. */
4d643114 193static inline u32
2b8a912e 194hv_get_ring_buffersize(struct hv_ring_buffer_info *ring_info)
3e7ee490 195{
fc8c72eb 196 return ring_info->ring_datasize;
3e7ee490
HJ
197}
198
822f18d4 199/* Get the read and write indices as u64 of the specified ring buffer. */
59471438 200static inline u64
2b8a912e 201hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
3e7ee490 202{
fc8c72eb 203 return (u64)ring_info->ring_buffer->write_index << 32;
3e7ee490
HJ
204}
205
8f1136ae 206/*
8f1136ae
S
207 * Helper routine to copy to source from ring buffer.
208 * Assume there is enough room. Handles wrap-around in src case only!!
8f1136ae
S
209 */
210static u32 hv_copyfrom_ringbuffer(
211 struct hv_ring_buffer_info *ring_info,
212 void *dest,
213 u32 destlen,
214 u32 start_read_offset)
215{
216 void *ring_buffer = hv_get_ring_buffer(ring_info);
217 u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
218
219 u32 frag_len;
220
221 /* wrap-around detected at the src */
222 if (destlen > ring_buffer_size - start_read_offset) {
223 frag_len = ring_buffer_size - start_read_offset;
224
225 memcpy(dest, ring_buffer + start_read_offset, frag_len);
226 memcpy(dest + frag_len, ring_buffer, destlen - frag_len);
227 } else
228
229 memcpy(dest, ring_buffer + start_read_offset, destlen);
230
231
232 start_read_offset += destlen;
233 start_read_offset %= ring_buffer_size;
234
235 return start_read_offset;
236}
237
238
7581578d 239/*
7581578d
S
240 * Helper routine to copy from source to ring buffer.
241 * Assume there is enough room. Handles wrap-around in dest case only!!
7581578d
S
242 */
243static u32 hv_copyto_ringbuffer(
fc8c72eb
HZ
244 struct hv_ring_buffer_info *ring_info,
245 u32 start_write_offset,
246 void *src,
7581578d
S
247 u32 srclen)
248{
249 void *ring_buffer = hv_get_ring_buffer(ring_info);
250 u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
251 u32 frag_len;
252
253 /* wrap-around detected! */
254 if (srclen > ring_buffer_size - start_write_offset) {
255 frag_len = ring_buffer_size - start_write_offset;
256 memcpy(ring_buffer + start_write_offset, src, frag_len);
257 memcpy(ring_buffer, src + frag_len, srclen - frag_len);
258 } else
259 memcpy(ring_buffer + start_write_offset, src, srclen);
3e7ee490 260
7581578d
S
261 start_write_offset += srclen;
262 start_write_offset %= ring_buffer_size;
263
264 return start_write_offset;
265}
3e7ee490 266
822f18d4 267/* Get various debug metrics for the specified ring buffer. */
a75b61d5 268void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
80682b7a 269 struct hv_ring_buffer_debug_info *debug_info)
3e7ee490 270{
fc8c72eb
HZ
271 u32 bytes_avail_towrite;
272 u32 bytes_avail_toread;
3e7ee490 273
fc8c72eb 274 if (ring_info->ring_buffer) {
2b8a912e 275 hv_get_ringbuffer_availbytes(ring_info,
fc8c72eb
HZ
276 &bytes_avail_toread,
277 &bytes_avail_towrite);
3e7ee490 278
fc8c72eb
HZ
279 debug_info->bytes_avail_toread = bytes_avail_toread;
280 debug_info->bytes_avail_towrite = bytes_avail_towrite;
82f8bd40 281 debug_info->current_read_index =
fc8c72eb 282 ring_info->ring_buffer->read_index;
82f8bd40 283 debug_info->current_write_index =
fc8c72eb 284 ring_info->ring_buffer->write_index;
82f8bd40 285 debug_info->current_interrupt_mask =
fc8c72eb 286 ring_info->ring_buffer->interrupt_mask;
3e7ee490
HJ
287 }
288}
289
822f18d4 290/* Initialize the ring buffer. */
72a95cbc 291int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
fc8c72eb 292 void *buffer, u32 buflen)
3e7ee490 293{
4a1b3acc 294 if (sizeof(struct hv_ring_buffer) != PAGE_SIZE)
3324fb40 295 return -EINVAL;
3e7ee490 296
fc8c72eb 297 memset(ring_info, 0, sizeof(struct hv_ring_buffer_info));
3e7ee490 298
fc8c72eb
HZ
299 ring_info->ring_buffer = (struct hv_ring_buffer *)buffer;
300 ring_info->ring_buffer->read_index =
301 ring_info->ring_buffer->write_index = 0;
3e7ee490 302
822f18d4 303 /* Set the feature bit for enabling flow control. */
046c7911
S
304 ring_info->ring_buffer->feature_bits.value = 1;
305
fc8c72eb
HZ
306 ring_info->ring_size = buflen;
307 ring_info->ring_datasize = buflen - sizeof(struct hv_ring_buffer);
3e7ee490 308
fc8c72eb 309 spin_lock_init(&ring_info->ring_lock);
3e7ee490
HJ
310
311 return 0;
312}
313
822f18d4 314/* Cleanup the ring buffer. */
2dba688b 315void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
3e7ee490 316{
3e7ee490
HJ
317}
318
822f18d4 319/* Write to the ring buffer. */
633c4dce 320int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
fe760e4d 321 struct kvec *kv_list, u32 kv_count, bool *signal, bool lock)
3e7ee490 322{
4408f531 323 int i = 0;
fc8c72eb 324 u32 bytes_avail_towrite;
fc8c72eb 325 u32 totalbytes_towrite = 0;
3e7ee490 326
66a60543 327 u32 next_write_location;
98fa8cf4 328 u32 old_write;
fc8c72eb 329 u64 prev_indices = 0;
fe760e4d 330 unsigned long flags = 0;
3e7ee490 331
011a7c3c
S
332 for (i = 0; i < kv_count; i++)
333 totalbytes_towrite += kv_list[i].iov_len;
3e7ee490 334
fc8c72eb 335 totalbytes_towrite += sizeof(u64);
3e7ee490 336
fe760e4d
S
337 if (lock)
338 spin_lock_irqsave(&outring_info->ring_lock, flags);
3e7ee490 339
a6341f00 340 bytes_avail_towrite = hv_get_bytes_to_write(outring_info);
3e7ee490 341
822f18d4
VK
342 /*
343 * If there is only room for the packet, assume it is full.
344 * Otherwise, the next time around, we think the ring buffer
345 * is empty since the read index == write index.
346 */
fc8c72eb 347 if (bytes_avail_towrite <= totalbytes_towrite) {
fe760e4d
S
348 if (lock)
349 spin_unlock_irqrestore(&outring_info->ring_lock, flags);
d2598f01 350 return -EAGAIN;
3e7ee490
HJ
351 }
352
454f18a9 353 /* Write to the ring buffer */
2b8a912e 354 next_write_location = hv_get_next_write_location(outring_info);
3e7ee490 355
98fa8cf4
S
356 old_write = next_write_location;
357
011a7c3c 358 for (i = 0; i < kv_count; i++) {
2b8a912e 359 next_write_location = hv_copyto_ringbuffer(outring_info,
fc8c72eb 360 next_write_location,
011a7c3c
S
361 kv_list[i].iov_base,
362 kv_list[i].iov_len);
3e7ee490
HJ
363 }
364
454f18a9 365 /* Set previous packet start */
2b8a912e 366 prev_indices = hv_get_ring_bufferindices(outring_info);
3e7ee490 367
2b8a912e 368 next_write_location = hv_copyto_ringbuffer(outring_info,
fc8c72eb
HZ
369 next_write_location,
370 &prev_indices,
b219b3f7 371 sizeof(u64));
3e7ee490 372
98fa8cf4 373 /* Issue a full memory barrier before updating the write index */
dcd0eeca 374 virt_mb();
3e7ee490 375
454f18a9 376 /* Now, update the write location */
2b8a912e 377 hv_set_next_write_location(outring_info, next_write_location);
3e7ee490 378
3e7ee490 379
fe760e4d
S
380 if (lock)
381 spin_unlock_irqrestore(&outring_info->ring_lock, flags);
98fa8cf4
S
382
383 *signal = hv_need_to_signal(old_write, outring_info);
3e7ee490
HJ
384 return 0;
385}
386
940b68e2
VK
387int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
388 void *buffer, u32 buflen, u32 *buffer_actual_len,
389 u64 *requestid, bool *signal, bool raw)
3e7ee490 390{
fc8c72eb
HZ
391 u32 bytes_avail_toread;
392 u32 next_read_location = 0;
393 u64 prev_indices = 0;
940b68e2
VK
394 struct vmpacket_descriptor desc;
395 u32 offset;
396 u32 packetlen;
397 int ret = 0;
3e7ee490 398
fc8c72eb 399 if (buflen <= 0)
a16e1485 400 return -EINVAL;
3e7ee490 401
3e7ee490 402
940b68e2
VK
403 *buffer_actual_len = 0;
404 *requestid = 0;
405
a6341f00 406 bytes_avail_toread = hv_get_bytes_to_read(inring_info);
454f18a9 407 /* Make sure there is something to read */
940b68e2
VK
408 if (bytes_avail_toread < sizeof(desc)) {
409 /*
410 * No error is set when there is even no header, drivers are
411 * supposed to analyze buffer_actual_len.
412 */
3eba9a77 413 return ret;
940b68e2 414 }
3e7ee490 415
940b68e2
VK
416 next_read_location = hv_get_next_read_location(inring_info);
417 next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc,
418 sizeof(desc),
419 next_read_location);
420
421 offset = raw ? 0 : (desc.offset8 << 3);
422 packetlen = (desc.len8 << 3) - offset;
423 *buffer_actual_len = packetlen;
424 *requestid = desc.trans_id;
425
3eba9a77
S
426 if (bytes_avail_toread < packetlen + offset)
427 return -EAGAIN;
940b68e2 428
3eba9a77
S
429 if (packetlen > buflen)
430 return -ENOBUFS;
3e7ee490 431
1ac58644 432 next_read_location =
2b8a912e 433 hv_get_next_readlocation_withoffset(inring_info, offset);
3e7ee490 434
2b8a912e 435 next_read_location = hv_copyfrom_ringbuffer(inring_info,
fc8c72eb 436 buffer,
940b68e2 437 packetlen,
fc8c72eb 438 next_read_location);
3e7ee490 439
2b8a912e 440 next_read_location = hv_copyfrom_ringbuffer(inring_info,
fc8c72eb 441 &prev_indices,
4408f531 442 sizeof(u64),
fc8c72eb 443 next_read_location);
3e7ee490 444
822f18d4
VK
445 /*
446 * Make sure all reads are done before we update the read index since
447 * the writer may start writing to the read area once the read index
448 * is updated.
449 */
dcd0eeca 450 virt_mb();
3e7ee490 451
454f18a9 452 /* Update the read index */
2b8a912e 453 hv_set_next_read_location(inring_info, next_read_location);
3e7ee490 454
a389fcfd 455 *signal = hv_need_to_signal_on_read(inring_info);
c2b8e520 456
940b68e2 457 return ret;
b5f53dde 458}
This page took 0.498923 seconds and 5 git commands to generate.