Commit | Line | Data |
---|---|---|
ce0d9d72 EP |
1 | /* |
2 | * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru> | |
3 | * All rights reserved. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of the GNU General Public License as published by | |
7 | * the Free Software Foundation; either version 2 of the License, or | |
8 | * (at your option) any later version. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * GNU General Public License for more details. | |
14 | */ | |
15 | ||
16 | #ifndef __DST_H | |
17 | #define __DST_H | |
18 | ||
19 | #include <linux/types.h> | |
20 | #include <linux/connector.h> | |
21 | ||
22 | #define DST_NAMELEN 32 | |
23 | #define DST_NAME "dst" | |
24 | ||
25 | enum { | |
26 | /* Remove node with given id from storage */ | |
27 | DST_DEL_NODE = 0, | |
28 | /* Add remote node with given id to the storage */ | |
29 | DST_ADD_REMOTE, | |
30 | /* Add local node with given id to the storage to be exported and used by remote peers */ | |
31 | DST_ADD_EXPORT, | |
32 | /* Crypto initialization command (hash/cipher used to protect the connection) */ | |
33 | DST_CRYPTO, | |
34 | /* Security attributes for given connection (permissions for example) */ | |
35 | DST_SECURITY, | |
36 | /* Register given node in the block layer subsystem */ | |
37 | DST_START, | |
38 | DST_CMD_MAX | |
39 | }; | |
40 | ||
41 | struct dst_ctl | |
42 | { | |
43 | /* Storage name */ | |
44 | char name[DST_NAMELEN]; | |
45 | /* Command flags */ | |
46 | __u32 flags; | |
47 | /* Command itself (see above) */ | |
48 | __u32 cmd; | |
49 | /* Maximum number of pages per single request in this device */ | |
50 | __u32 max_pages; | |
51 | /* Stale/error transaction scanning timeout in milliseconds */ | |
52 | __u32 trans_scan_timeout; | |
53 | /* Maximum number of retry sends before completing transaction as broken */ | |
54 | __u32 trans_max_retries; | |
55 | /* Storage size */ | |
56 | __u64 size; | |
57 | }; | |
58 | ||
59 | /* Reply command carries completion status */ | |
60 | struct dst_ctl_ack | |
61 | { | |
62 | struct cn_msg msg; | |
63 | int error; | |
64 | int unused[3]; | |
65 | }; | |
66 | ||
67 | /* | |
68 | * Unfortunaltely socket address structure is not exported to userspace | |
69 | * and is redefined there. | |
70 | */ | |
71 | #define SADDR_MAX_DATA 128 | |
72 | ||
73 | struct saddr { | |
74 | /* address family, AF_xxx */ | |
75 | unsigned short sa_family; | |
76 | /* 14 bytes of protocol address */ | |
77 | char sa_data[SADDR_MAX_DATA]; | |
78 | /* Number of bytes used in sa_data */ | |
79 | unsigned short sa_data_len; | |
80 | }; | |
81 | ||
82 | /* Address structure */ | |
83 | struct dst_network_ctl | |
84 | { | |
85 | /* Socket type: datagram, stream...*/ | |
86 | unsigned int type; | |
87 | /* Let me guess, is it a Jupiter diameter? */ | |
88 | unsigned int proto; | |
89 | /* Peer's address */ | |
90 | struct saddr addr; | |
91 | }; | |
92 | ||
93 | struct dst_crypto_ctl | |
94 | { | |
95 | /* Cipher and hash names */ | |
96 | char cipher_algo[DST_NAMELEN]; | |
97 | char hash_algo[DST_NAMELEN]; | |
98 | ||
99 | /* Key sizes. Can be zero for digest for example */ | |
100 | unsigned int cipher_keysize, hash_keysize; | |
101 | /* Alignment. Calculated by the DST itself. */ | |
102 | unsigned int crypto_attached_size; | |
103 | /* Number of threads to perform crypto operations */ | |
104 | int thread_num; | |
105 | }; | |
106 | ||
107 | /* Export security attributes have this bits checked in when client connects */ | |
108 | #define DST_PERM_READ (1<<0) | |
109 | #define DST_PERM_WRITE (1<<1) | |
110 | ||
111 | /* | |
112 | * Right now it is simple model, where each remote address | |
113 | * is assigned to set of permissions it is allowed to perform. | |
114 | * In real world block device does not know anything but | |
115 | * reading and writing, so it should be more than enough. | |
116 | */ | |
117 | struct dst_secure_user | |
118 | { | |
119 | unsigned int permissions; | |
120 | struct saddr addr; | |
121 | }; | |
122 | ||
123 | /* | |
124 | * Export control command: device to export and network address to accept | |
125 | * clients to work with given device | |
126 | */ | |
127 | struct dst_export_ctl | |
128 | { | |
129 | char device[DST_NAMELEN]; | |
130 | struct dst_network_ctl ctl; | |
131 | }; | |
132 | ||
133 | enum { | |
134 | DST_CFG = 1, /* Request remote configuration */ | |
135 | DST_IO, /* IO command */ | |
136 | DST_IO_RESPONSE, /* IO response */ | |
137 | DST_PING, /* Keepalive message */ | |
138 | DST_NCMD_MAX, | |
139 | }; | |
140 | ||
141 | struct dst_cmd | |
142 | { | |
143 | /* Network command itself, see above */ | |
144 | __u32 cmd; | |
145 | /* | |
146 | * Size of the attached data | |
147 | * (in most cases, for READ command it means how many bytes were requested) | |
148 | */ | |
149 | __u32 size; | |
150 | /* Crypto size: number of attached bytes with digest/hmac */ | |
151 | __u32 csize; | |
152 | /* Here we can carry secret data */ | |
153 | __u32 reserved; | |
154 | /* Read/write bits, see how they are encoded in bio structure */ | |
155 | __u64 rw; | |
156 | /* BIO flags */ | |
157 | __u64 flags; | |
158 | /* Unique command id (like transaction ID) */ | |
159 | __u64 id; | |
160 | /* Sector to start IO from */ | |
161 | __u64 sector; | |
162 | /* Hash data is placed after this header */ | |
163 | __u8 hash[0]; | |
164 | }; | |
165 | ||
166 | /* | |
167 | * Convert command to/from network byte order. | |
168 | * We do not use hton*() functions, since there is | |
169 | * no 64-bit implementation. | |
170 | */ | |
171 | static inline void dst_convert_cmd(struct dst_cmd *c) | |
172 | { | |
173 | c->cmd = __cpu_to_be32(c->cmd); | |
174 | c->csize = __cpu_to_be32(c->csize); | |
175 | c->size = __cpu_to_be32(c->size); | |
176 | c->sector = __cpu_to_be64(c->sector); | |
177 | c->id = __cpu_to_be64(c->id); | |
178 | c->flags = __cpu_to_be64(c->flags); | |
179 | c->rw = __cpu_to_be64(c->rw); | |
180 | } | |
181 | ||
182 | /* Transaction id */ | |
183 | typedef __u64 dst_gen_t; | |
184 | ||
185 | #ifdef __KERNEL__ | |
186 | ||
187 | #include <linux/blkdev.h> | |
188 | #include <linux/bio.h> | |
189 | #include <linux/device.h> | |
190 | #include <linux/mempool.h> | |
191 | #include <linux/net.h> | |
192 | #include <linux/poll.h> | |
193 | #include <linux/rbtree.h> | |
194 | ||
195 | #ifdef CONFIG_DST_DEBUG | |
196 | #define dprintk(f, a...) printk(KERN_NOTICE f, ##a) | |
197 | #else | |
198 | static inline void __attribute__ ((format (printf, 1, 2))) | |
199 | dprintk(const char *fmt, ...) {} | |
200 | #endif | |
201 | ||
202 | struct dst_node; | |
203 | ||
204 | struct dst_trans | |
205 | { | |
206 | /* DST node we are working with */ | |
207 | struct dst_node *n; | |
208 | ||
209 | /* Entry inside transaction tree */ | |
210 | struct rb_node trans_entry; | |
211 | ||
212 | /* Merlin kills this transaction when this memory cell equals zero */ | |
213 | atomic_t refcnt; | |
214 | ||
215 | /* How this transaction should be processed by crypto engine */ | |
216 | short enc; | |
217 | /* How many times this transaction was resent */ | |
218 | short retries; | |
219 | /* Completion status */ | |
220 | int error; | |
221 | ||
222 | /* When did we send it to the remote peer */ | |
223 | long send_time; | |
224 | ||
225 | /* My name is... | |
226 | * Well, computers does not speak, they have unique id instead */ | |
227 | dst_gen_t gen; | |
228 | ||
229 | /* Block IO we are working with */ | |
230 | struct bio *bio; | |
231 | ||
232 | /* Network command for above block IO request */ | |
233 | struct dst_cmd cmd; | |
234 | }; | |
235 | ||
236 | struct dst_crypto_engine | |
237 | { | |
238 | /* What should we do with all block requests */ | |
239 | struct crypto_hash *hash; | |
240 | struct crypto_ablkcipher *cipher; | |
241 | ||
242 | /* Pool of pages used to encrypt data into before sending */ | |
243 | int page_num; | |
244 | struct page **pages; | |
245 | ||
246 | /* What to do with current request */ | |
247 | int enc; | |
248 | /* Who we are and where do we go */ | |
249 | struct scatterlist *src, *dst; | |
250 | ||
251 | /* Maximum timeout waiting for encryption to be completed */ | |
252 | long timeout; | |
253 | /* IV is a 64-bit sequential counter */ | |
254 | u64 iv; | |
255 | ||
256 | /* Secret data */ | |
257 | void *private; | |
258 | ||
259 | /* Cached temporary data lives here */ | |
260 | int size; | |
261 | void *data; | |
262 | }; | |
263 | ||
264 | struct dst_state | |
265 | { | |
266 | /* The main state protection */ | |
267 | struct mutex state_lock; | |
268 | ||
269 | /* Polling machinery for sockets */ | |
270 | wait_queue_t wait; | |
271 | wait_queue_head_t *whead; | |
272 | /* Most of events are being waited here */ | |
273 | wait_queue_head_t thread_wait; | |
274 | ||
275 | /* Who owns this? */ | |
276 | struct dst_node *node; | |
277 | ||
278 | /* Network address for this state */ | |
279 | struct dst_network_ctl ctl; | |
280 | ||
281 | /* Permissions to work with: read-only or rw connection */ | |
282 | u32 permissions; | |
283 | ||
284 | /* Called when we need to clean private data */ | |
285 | void (* cleanup)(struct dst_state *st); | |
286 | ||
287 | /* Used by the server: BIO completion queues BIOs here */ | |
288 | struct list_head request_list; | |
289 | spinlock_t request_lock; | |
290 | ||
291 | /* Guess what? No, it is not number of planets */ | |
292 | atomic_t refcnt; | |
293 | ||
294 | /* This flags is set when connection should be dropped */ | |
295 | int need_exit; | |
296 | ||
297 | /* | |
298 | * Socket to work with. Second pointer is used for | |
299 | * lockless check if socket was changed before performing | |
300 | * next action (like working with cached polling result) | |
301 | */ | |
302 | struct socket *socket, *read_socket; | |
303 | ||
304 | /* Cached preallocated data */ | |
305 | void *data; | |
306 | unsigned int size; | |
307 | ||
308 | /* Currently processed command */ | |
309 | struct dst_cmd cmd; | |
310 | }; | |
311 | ||
312 | struct dst_info | |
313 | { | |
314 | /* Device size */ | |
315 | u64 size; | |
316 | ||
317 | /* Local device name for export devices */ | |
318 | char local[DST_NAMELEN]; | |
319 | ||
320 | /* Network setup */ | |
321 | struct dst_network_ctl net; | |
322 | ||
323 | /* Sysfs bits use this */ | |
324 | struct device device; | |
325 | }; | |
326 | ||
327 | struct dst_node | |
328 | { | |
329 | struct list_head node_entry; | |
330 | ||
331 | /* Hi, my name is stored here */ | |
332 | char name[DST_NAMELEN]; | |
333 | /* My cache name is stored here */ | |
334 | char cache_name[DST_NAMELEN]; | |
335 | ||
336 | /* Block device attached to given node. | |
337 | * Only valid for exporting nodes */ | |
338 | struct block_device *bdev; | |
339 | /* Network state machine for given peer */ | |
340 | struct dst_state *state; | |
341 | ||
342 | /* Block IO machinery */ | |
343 | struct request_queue *queue; | |
344 | struct gendisk *disk; | |
345 | ||
346 | /* Number of threads in processing pool */ | |
347 | int thread_num; | |
348 | /* Maximum number of pages in single IO */ | |
349 | int max_pages; | |
350 | ||
351 | /* I'm that big in bytes */ | |
352 | loff_t size; | |
353 | ||
354 | /* Exported to userspace node information */ | |
355 | struct dst_info *info; | |
356 | ||
357 | /* | |
358 | * Security attribute list. | |
359 | * Used only by exporting node currently. | |
360 | */ | |
361 | struct list_head security_list; | |
362 | struct mutex security_lock; | |
363 | ||
364 | /* | |
365 | * When this unerflows below zero, university collapses. | |
366 | * But this will not happen, since node will be freed, | |
367 | * when reference counter reaches zero. | |
368 | */ | |
369 | atomic_t refcnt; | |
370 | ||
371 | /* How precisely should I be started? */ | |
372 | int (*start)(struct dst_node *); | |
373 | ||
374 | /* Crypto capabilities */ | |
375 | struct dst_crypto_ctl crypto; | |
376 | u8 *hash_key; | |
377 | u8 *cipher_key; | |
378 | ||
379 | /* Pool of processing thread */ | |
380 | struct thread_pool *pool; | |
381 | ||
382 | /* Transaction IDs live here */ | |
383 | atomic_long_t gen; | |
384 | ||
385 | /* | |
386 | * How frequently and how many times transaction | |
387 | * tree should be scanned to drop stale objects. | |
388 | */ | |
389 | long trans_scan_timeout; | |
390 | int trans_max_retries; | |
391 | ||
392 | /* Small gnomes live here */ | |
393 | struct rb_root trans_root; | |
394 | struct mutex trans_lock; | |
395 | ||
396 | /* | |
397 | * Transaction cache/memory pool. | |
398 | * It is big enough to contain not only transaction | |
399 | * itself, but additional crypto data (digest/hmac). | |
400 | */ | |
401 | struct kmem_cache *trans_cache; | |
402 | mempool_t *trans_pool; | |
403 | ||
404 | /* This entity scans transaction tree */ | |
405 | struct delayed_work trans_work; | |
406 | ||
407 | wait_queue_head_t wait; | |
408 | }; | |
409 | ||
410 | /* Kernel representation of the security attribute */ | |
411 | struct dst_secure | |
412 | { | |
413 | struct list_head sec_entry; | |
414 | struct dst_secure_user sec; | |
415 | }; | |
416 | ||
417 | int dst_process_bio(struct dst_node *n, struct bio *bio); | |
418 | ||
419 | int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r); | |
420 | int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le); | |
421 | ||
422 | static inline struct dst_state *dst_state_get(struct dst_state *st) | |
423 | { | |
424 | BUG_ON(atomic_read(&st->refcnt) == 0); | |
425 | atomic_inc(&st->refcnt); | |
426 | return st; | |
427 | } | |
428 | ||
429 | void dst_state_put(struct dst_state *st); | |
430 | ||
431 | struct dst_state *dst_state_alloc(struct dst_node *n); | |
432 | int dst_state_socket_create(struct dst_state *st); | |
433 | void dst_state_socket_release(struct dst_state *st); | |
434 | ||
435 | void dst_state_exit_connected(struct dst_state *st); | |
436 | ||
437 | int dst_state_schedule_receiver(struct dst_state *st); | |
438 | ||
439 | void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str); | |
440 | ||
441 | static inline void dst_state_lock(struct dst_state *st) | |
442 | { | |
443 | mutex_lock(&st->state_lock); | |
444 | } | |
445 | ||
446 | static inline void dst_state_unlock(struct dst_state *st) | |
447 | { | |
448 | mutex_unlock(&st->state_lock); | |
449 | } | |
450 | ||
451 | void dst_poll_exit(struct dst_state *st); | |
452 | int dst_poll_init(struct dst_state *st); | |
453 | ||
454 | static inline unsigned int dst_state_poll(struct dst_state *st) | |
455 | { | |
456 | unsigned int revents = POLLHUP | POLLERR; | |
457 | ||
458 | dst_state_lock(st); | |
459 | if (st->socket) | |
460 | revents = st->socket->ops->poll(NULL, st->socket, NULL); | |
461 | dst_state_unlock(st); | |
462 | ||
463 | return revents; | |
464 | } | |
465 | ||
466 | static inline int dst_thread_setup(void *private, void *data) | |
467 | { | |
468 | return 0; | |
469 | } | |
470 | ||
471 | void dst_node_put(struct dst_node *n); | |
472 | ||
473 | static inline struct dst_node *dst_node_get(struct dst_node *n) | |
474 | { | |
475 | atomic_inc(&n->refcnt); | |
476 | return n; | |
477 | } | |
478 | ||
479 | int dst_data_recv(struct dst_state *st, void *data, unsigned int size); | |
480 | int dst_recv_cdata(struct dst_state *st, void *cdata); | |
481 | int dst_data_send_header(struct socket *sock, | |
482 | void *data, unsigned int size, int more); | |
483 | ||
484 | int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio); | |
485 | ||
486 | int dst_process_io(struct dst_state *st); | |
487 | int dst_export_crypto(struct dst_node *n, struct bio *bio); | |
488 | int dst_export_send_bio(struct bio *bio); | |
489 | int dst_start_export(struct dst_node *n); | |
490 | ||
491 | int __init dst_export_init(void); | |
492 | void dst_export_exit(void); | |
493 | ||
494 | /* Private structure for export block IO requests */ | |
495 | struct dst_export_priv | |
496 | { | |
497 | struct list_head request_entry; | |
498 | struct dst_state *state; | |
499 | struct bio *bio; | |
500 | struct dst_cmd cmd; | |
501 | }; | |
502 | ||
503 | static inline void dst_trans_get(struct dst_trans *t) | |
504 | { | |
505 | atomic_inc(&t->refcnt); | |
506 | } | |
507 | ||
508 | struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen); | |
509 | int dst_trans_remove(struct dst_trans *t); | |
510 | int dst_trans_remove_nolock(struct dst_trans *t); | |
511 | void dst_trans_put(struct dst_trans *t); | |
512 | ||
513 | /* | |
514 | * Convert bio into network command. | |
515 | */ | |
516 | static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd, | |
517 | u32 command, u64 id) | |
518 | { | |
519 | cmd->cmd = command; | |
520 | cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS; | |
521 | cmd->rw = bio->bi_rw; | |
522 | cmd->size = bio->bi_size; | |
523 | cmd->csize = 0; | |
524 | cmd->id = id; | |
525 | cmd->sector = bio->bi_sector; | |
526 | }; | |
527 | ||
528 | int dst_trans_send(struct dst_trans *t); | |
529 | int dst_trans_crypto(struct dst_trans *t); | |
530 | ||
531 | int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl); | |
532 | void dst_node_crypto_exit(struct dst_node *n); | |
533 | ||
534 | static inline int dst_need_crypto(struct dst_node *n) | |
535 | { | |
536 | struct dst_crypto_ctl *c = &n->crypto; | |
537 | /* | |
538 | * Logical OR is appropriate here, but boolean one produces | |
539 | * more optimal code, so it is used instead. | |
540 | */ | |
541 | return (c->hash_algo[0] | c->cipher_algo[0]); | |
542 | } | |
543 | ||
544 | int dst_node_trans_init(struct dst_node *n, unsigned int size); | |
545 | void dst_node_trans_exit(struct dst_node *n); | |
546 | ||
547 | /* | |
548 | * Pool of threads. | |
549 | * Ready list contains threads currently free to be used, | |
550 | * active one contains threads with some work scheduled for them. | |
551 | * Caller can wait in given queue when thread is ready. | |
552 | */ | |
553 | struct thread_pool | |
554 | { | |
555 | int thread_num; | |
556 | struct mutex thread_lock; | |
557 | struct list_head ready_list, active_list; | |
558 | ||
559 | wait_queue_head_t wait; | |
560 | }; | |
561 | ||
562 | void thread_pool_del_worker(struct thread_pool *p); | |
563 | void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id); | |
564 | int thread_pool_add_worker(struct thread_pool *p, | |
565 | char *name, | |
566 | unsigned int id, | |
567 | void *(* init)(void *data), | |
568 | void (* cleanup)(void *data), | |
569 | void *data); | |
570 | ||
571 | void thread_pool_destroy(struct thread_pool *p); | |
572 | struct thread_pool *thread_pool_create(int num, char *name, | |
573 | void *(* init)(void *data), | |
574 | void (* cleanup)(void *data), | |
575 | void *data); | |
576 | ||
577 | int thread_pool_schedule(struct thread_pool *p, | |
578 | int (* setup)(void *stored_private, void *setup_data), | |
579 | int (* action)(void *stored_private, void *setup_data), | |
580 | void *setup_data, long timeout); | |
581 | int thread_pool_schedule_private(struct thread_pool *p, | |
582 | int (* setup)(void *private, void *data), | |
583 | int (* action)(void *private, void *data), | |
584 | void *data, long timeout, void *id); | |
585 | ||
586 | #endif /* __KERNEL__ */ | |
587 | #endif /* __DST_H */ |