1 /*P:100 This is the Launcher code, a simple program which lays out the
2 * "physical" memory for the new Guest by mapping the kernel image and the
3 * virtual devices, then reads repeatedly from /dev/lguest to run the Guest.
5 * The only trick: the Makefile links it at a high address so it will be clear
6 * of the guest memory region. It means that each Guest cannot have more than
7 * about 2.5G of memory on a normally configured Host. :*/
8 #define _LARGEFILE64_SOURCE
18 #include <sys/types.h>
25 #include <sys/socket.h>
26 #include <sys/ioctl.h>
29 #include <netinet/in.h>
31 #include <linux/sockios.h>
32 #include <linux/if_tun.h>
37 typedef unsigned long long u64
;
41 #include "../../include/linux/lguest_launcher.h"
42 #include "../../include/asm-i386/e820.h"
44 #define PAGE_PRESENT 0x7 /* Present, RW, Execute */
46 #define BRIDGE_PFX "bridge:"
48 #define SIOCBRADDIF 0x89a2 /* add interface to bridge */
52 #define verbose(args...) \
53 do { if (verbose) printf(args); } while(0)
62 struct lguest_device_desc
*descs
;
64 struct device
**lastdev
;
70 struct lguest_device_desc
*desc
;
73 /* Watch this fd if handle_input non-NULL. */
75 bool (*handle_input
)(int fd
, struct device
*me
);
77 /* Watch DMA to this key if handle_input non-NULL. */
78 unsigned long watch_key
;
79 u32 (*handle_output
)(int fd
, const struct iovec
*iov
,
80 unsigned int num
, struct device
*me
);
82 /* Device-specific data. */
86 static int open_or_die(const char *name
, int flags
)
88 int fd
= open(name
, flags
);
90 err(1, "Failed to open %s", name
);
94 static void *map_zeroed_pages(unsigned long addr
, unsigned int num
)
99 fd
= open_or_die("/dev/zero", O_RDONLY
);
101 if (mmap((void *)addr
, getpagesize() * num
,
102 PROT_READ
|PROT_WRITE
|PROT_EXEC
, MAP_FIXED
|MAP_PRIVATE
, fd
, 0)
104 err(1, "Mmaping %u pages of /dev/zero @%p", num
, (void *)addr
);
108 /* Find magic string marking entry point, return entry point. */
109 static unsigned long entry_point(void *start
, void *end
,
110 unsigned long page_offset
)
114 for (p
= start
; p
< end
; p
++)
115 if (memcmp(p
, "GenuineLguest", strlen("GenuineLguest")) == 0)
116 return (long)p
+ strlen("GenuineLguest") + page_offset
;
118 err(1, "Is this image a genuine lguest?");
121 /* Returns the entry point */
122 static unsigned long map_elf(int elf_fd
, const Elf32_Ehdr
*ehdr
,
123 unsigned long *page_offset
)
126 Elf32_Phdr phdr
[ehdr
->e_phnum
];
128 unsigned long start
= -1UL, end
= 0;
131 if (ehdr
->e_type
!= ET_EXEC
132 || ehdr
->e_machine
!= EM_386
133 || ehdr
->e_phentsize
!= sizeof(Elf32_Phdr
)
134 || ehdr
->e_phnum
< 1 || ehdr
->e_phnum
> 65536U/sizeof(Elf32_Phdr
))
135 errx(1, "Malformed elf header");
137 if (lseek(elf_fd
, ehdr
->e_phoff
, SEEK_SET
) < 0)
138 err(1, "Seeking to program headers");
139 if (read(elf_fd
, phdr
, sizeof(phdr
)) != sizeof(phdr
))
140 err(1, "Reading program headers");
143 /* We map the loadable segments at virtual addresses corresponding
144 * to their physical addresses (our virtual == guest physical). */
145 for (i
= 0; i
< ehdr
->e_phnum
; i
++) {
146 if (phdr
[i
].p_type
!= PT_LOAD
)
149 verbose("Section %i: size %i addr %p\n",
150 i
, phdr
[i
].p_memsz
, (void *)phdr
[i
].p_paddr
);
152 /* We expect linear address space. */
154 *page_offset
= phdr
[i
].p_vaddr
- phdr
[i
].p_paddr
;
155 else if (*page_offset
!= phdr
[i
].p_vaddr
- phdr
[i
].p_paddr
)
156 errx(1, "Page offset of section %i different", i
);
158 if (phdr
[i
].p_paddr
< start
)
159 start
= phdr
[i
].p_paddr
;
160 if (phdr
[i
].p_paddr
+ phdr
[i
].p_filesz
> end
)
161 end
= phdr
[i
].p_paddr
+ phdr
[i
].p_filesz
;
163 /* We map everything private, writable. */
164 addr
= mmap((void *)phdr
[i
].p_paddr
,
166 PROT_READ
|PROT_WRITE
|PROT_EXEC
,
167 MAP_FIXED
|MAP_PRIVATE
,
168 elf_fd
, phdr
[i
].p_offset
);
169 if (addr
!= (void *)phdr
[i
].p_paddr
)
170 err(1, "Mmaping vmlinux seg %i gave %p not %p",
171 i
, addr
, (void *)phdr
[i
].p_paddr
);
174 return entry_point((void *)start
, (void *)end
, *page_offset
);
177 /* This is amazingly reliable. */
178 static unsigned long intuit_page_offset(unsigned char *img
, unsigned long len
)
180 unsigned int i
, possibilities
[256] = { 0 };
182 for (i
= 0; i
+ 4 < len
; i
++) {
183 /* mov 0xXXXXXXXX,%eax */
184 if (img
[i
] == 0xA1 && ++possibilities
[img
[i
+4]] > 3)
185 return (unsigned long)img
[i
+4] << 24;
187 errx(1, "could not determine page offset");
190 static unsigned long unpack_bzimage(int fd
, unsigned long *page_offset
)
194 void *img
= (void *)0x100000;
196 f
= gzdopen(fd
, "rb");
197 while ((ret
= gzread(f
, img
+ len
, 65536)) > 0)
200 err(1, "reading image from bzImage");
202 verbose("Unpacked size %i addr %p\n", len
, img
);
203 *page_offset
= intuit_page_offset(img
, len
);
205 return entry_point(img
, img
+ len
, *page_offset
);
208 static unsigned long load_bzimage(int fd
, unsigned long *page_offset
)
213 /* Ugly brute force search for gzip header. */
214 while (read(fd
, &c
, 1) == 1) {
230 lseek(fd
, -10, SEEK_CUR
);
231 if (c
!= 0x03) /* Compressed under UNIX. */
234 return unpack_bzimage(fd
, page_offset
);
237 errx(1, "Could not find kernel in bzImage");
240 static unsigned long load_kernel(int fd
, unsigned long *page_offset
)
244 if (read(fd
, &hdr
, sizeof(hdr
)) != sizeof(hdr
))
245 err(1, "Reading kernel");
247 if (memcmp(hdr
.e_ident
, ELFMAG
, SELFMAG
) == 0)
248 return map_elf(fd
, &hdr
, page_offset
);
250 return load_bzimage(fd
, page_offset
);
253 static inline unsigned long page_align(unsigned long addr
)
255 return ((addr
+ getpagesize()-1) & ~(getpagesize()-1));
258 /* initrd gets loaded at top of memory: return length. */
259 static unsigned long load_initrd(const char *name
, unsigned long mem
)
266 ifd
= open_or_die(name
, O_RDONLY
);
267 if (fstat(ifd
, &st
) < 0)
268 err(1, "fstat() on initrd '%s'", name
);
270 len
= page_align(st
.st_size
);
271 iaddr
= mmap((void *)mem
- len
, st
.st_size
,
272 PROT_READ
|PROT_EXEC
|PROT_WRITE
,
273 MAP_FIXED
|MAP_PRIVATE
, ifd
, 0);
274 if (iaddr
!= (void *)mem
- len
)
275 err(1, "Mmaping initrd '%s' returned %p not %p",
276 name
, iaddr
, (void *)mem
- len
);
278 verbose("mapped initrd %s size=%lu @ %p\n", name
, st
.st_size
, iaddr
);
282 static unsigned long setup_pagetables(unsigned long mem
,
283 unsigned long initrd_size
,
284 unsigned long page_offset
)
287 unsigned int mapped_pages
, i
, linear_pages
;
288 unsigned int ptes_per_page
= getpagesize()/sizeof(u32
);
290 /* If we can map all of memory above page_offset, we do so. */
291 if (mem
<= -page_offset
)
292 mapped_pages
= mem
/getpagesize();
294 mapped_pages
= -page_offset
/getpagesize();
296 /* Each linear PTE page can map ptes_per_page pages. */
297 linear_pages
= (mapped_pages
+ ptes_per_page
-1)/ptes_per_page
;
299 /* We lay out top-level then linear mapping immediately below initrd */
300 pgdir
= (void *)mem
- initrd_size
- getpagesize();
301 linear
= (void *)pgdir
- linear_pages
*getpagesize();
303 for (i
= 0; i
< mapped_pages
; i
++)
304 linear
[i
] = ((i
* getpagesize()) | PAGE_PRESENT
);
306 /* Now set up pgd so that this memory is at page_offset */
307 for (i
= 0; i
< mapped_pages
; i
+= ptes_per_page
) {
308 pgdir
[(i
+ page_offset
/getpagesize())/ptes_per_page
]
309 = (((u32
)linear
+ i
*sizeof(u32
)) | PAGE_PRESENT
);
312 verbose("Linear mapping of %u pages in %u pte pages at %p\n",
313 mapped_pages
, linear_pages
, linear
);
315 return (unsigned long)pgdir
;
318 static void concat(char *dst
, char *args
[])
320 unsigned int i
, len
= 0;
322 for (i
= 0; args
[i
]; i
++) {
323 strcpy(dst
+len
, args
[i
]);
324 strcat(dst
+len
, " ");
325 len
+= strlen(args
[i
]) + 1;
327 /* In case it's empty. */
331 static int tell_kernel(u32 pgdir
, u32 start
, u32 page_offset
)
333 u32 args
[] = { LHREQ_INITIALIZE
,
334 top
/getpagesize(), pgdir
, start
, page_offset
};
337 fd
= open_or_die("/dev/lguest", O_RDWR
);
338 if (write(fd
, args
, sizeof(args
)) < 0)
339 err(1, "Writing to /dev/lguest");
343 static void set_fd(int fd
, struct device_list
*devices
)
345 FD_SET(fd
, &devices
->infds
);
346 if (fd
> devices
->max_infd
)
347 devices
->max_infd
= fd
;
350 /* When input arrives, we tell the kernel to kick lguest out with -EAGAIN. */
351 static void wake_parent(int pipefd
, int lguest_fd
, struct device_list
*devices
)
353 set_fd(pipefd
, devices
);
356 fd_set rfds
= devices
->infds
;
357 u32 args
[] = { LHREQ_BREAK
, 1 };
359 select(devices
->max_infd
+1, &rfds
, NULL
, NULL
, NULL
);
360 if (FD_ISSET(pipefd
, &rfds
)) {
362 if (read(pipefd
, &ignorefd
, sizeof(ignorefd
)) == 0)
364 FD_CLR(ignorefd
, &devices
->infds
);
366 write(lguest_fd
, args
, sizeof(args
));
370 static int setup_waker(int lguest_fd
, struct device_list
*device_list
)
372 int pipefd
[2], child
;
381 wake_parent(pipefd
[0], lguest_fd
, device_list
);
388 static void *_check_pointer(unsigned long addr
, unsigned int size
,
391 if (addr
>= top
|| addr
+ size
>= top
)
392 errx(1, "%s:%i: Invalid address %li", __FILE__
, line
, addr
);
395 #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
397 /* Returns pointer to dma->used_len */
398 static u32
*dma2iov(unsigned long dma
, struct iovec iov
[], unsigned *num
)
401 struct lguest_dma
*udma
;
403 udma
= check_pointer(dma
, sizeof(*udma
));
404 for (i
= 0; i
< LGUEST_MAX_DMA_SECTIONS
; i
++) {
408 iov
[i
].iov_base
= check_pointer(udma
->addr
[i
], udma
->len
[i
]);
409 iov
[i
].iov_len
= udma
->len
[i
];
412 return &udma
->used_len
;
415 static u32
*get_dma_buffer(int fd
, void *key
,
416 struct iovec iov
[], unsigned int *num
, u32
*irq
)
418 u32 buf
[] = { LHREQ_GETDMA
, (u32
)key
};
422 udma
= write(fd
, buf
, sizeof(buf
));
423 if (udma
== (unsigned long)-1)
426 /* Kernel stashes irq in ->used_len. */
427 res
= dma2iov(udma
, iov
, num
);
432 static void trigger_irq(int fd
, u32 irq
)
434 u32 buf
[] = { LHREQ_IRQ
, irq
};
435 if (write(fd
, buf
, sizeof(buf
)) != 0)
436 err(1, "Triggering irq %i", irq
);
439 static void discard_iovec(struct iovec
*iov
, unsigned int *num
)
441 static char discard_buf
[1024];
443 iov
->iov_base
= discard_buf
;
444 iov
->iov_len
= sizeof(discard_buf
);
447 static struct termios orig_term
;
448 static void restore_term(void)
450 tcsetattr(STDIN_FILENO
, TCSANOW
, &orig_term
);
456 struct timeval start
;
459 /* We DMA input to buffer bound at start of console page. */
460 static bool handle_console_input(int fd
, struct device
*dev
)
465 struct iovec iov
[LGUEST_MAX_DMA_SECTIONS
];
466 struct console_abort
*abort
= dev
->priv
;
468 lenp
= get_dma_buffer(fd
, dev
->mem
, iov
, &num
, &irq
);
470 warn("console: no dma buffer!");
471 discard_iovec(iov
, &num
);
474 len
= readv(dev
->fd
, iov
, num
);
476 warnx("Failed to get console input, ignoring console.");
482 trigger_irq(fd
, irq
);
485 /* Three ^C within one second? Exit. */
486 if (len
== 1 && ((char *)iov
[0].iov_base
)[0] == 3) {
488 gettimeofday(&abort
->start
, NULL
);
489 else if (abort
->count
== 3) {
491 gettimeofday(&now
, NULL
);
492 if (now
.tv_sec
<= abort
->start
.tv_sec
+1) {
493 /* Make sure waker is not blocked in BREAK */
494 u32 args
[] = { LHREQ_BREAK
, 0 };
496 write(fd
, args
, sizeof(args
));
511 static u32
handle_console_output(int fd
, const struct iovec
*iov
,
512 unsigned num
, struct device
*dev
)
514 return writev(STDOUT_FILENO
, iov
, num
);
517 static u32
handle_tun_output(int fd
, const struct iovec
*iov
,
518 unsigned num
, struct device
*dev
)
520 /* Now we've seen output, we should warn if we can't get buffers. */
521 *(bool *)dev
->priv
= true;
522 return writev(dev
->fd
, iov
, num
);
525 static unsigned long peer_offset(unsigned int peernum
)
530 static bool handle_tun_input(int fd
, struct device
*dev
)
535 struct iovec iov
[LGUEST_MAX_DMA_SECTIONS
];
537 lenp
= get_dma_buffer(fd
, dev
->mem
+peer_offset(NET_PEERNUM
), iov
, &num
,
540 if (*(bool *)dev
->priv
)
541 warn("network: no dma buffer!");
542 discard_iovec(iov
, &num
);
545 len
= readv(dev
->fd
, iov
, num
);
547 err(1, "reading network");
550 trigger_irq(fd
, irq
);
552 verbose("tun input packet len %i [%02x %02x] (%s)\n", len
,
553 ((u8
*)iov
[0].iov_base
)[0], ((u8
*)iov
[0].iov_base
)[1],
554 lenp
? "sent" : "discarded");
558 static u32
handle_block_output(int fd
, const struct iovec
*iov
,
559 unsigned num
, struct device
*dev
)
561 struct lguest_block_page
*p
= dev
->mem
;
563 unsigned int len
, reply_num
;
564 struct iovec reply
[LGUEST_MAX_DMA_SECTIONS
];
565 off64_t device_len
, off
= (off64_t
)p
->sector
* 512;
567 device_len
= *(off64_t
*)dev
->priv
;
569 if (off
>= device_len
)
570 err(1, "Bad offset %llu vs %llu", off
, device_len
);
571 if (lseek64(dev
->fd
, off
, SEEK_SET
) != off
)
572 err(1, "Bad seek to sector %i", p
->sector
);
574 verbose("Block: %s at offset %llu\n", p
->type
? "WRITE" : "READ", off
);
576 lenp
= get_dma_buffer(fd
, dev
->mem
, reply
, &reply_num
, &irq
);
578 err(1, "Block request didn't give us a dma buffer");
581 len
= writev(dev
->fd
, iov
, num
);
582 if (off
+ len
> device_len
) {
583 ftruncate(dev
->fd
, device_len
);
584 errx(1, "Write past end %llu+%u", off
, len
);
588 len
= readv(dev
->fd
, reply
, reply_num
);
592 p
->result
= 1 + (p
->bytes
!= len
);
593 trigger_irq(fd
, irq
);
597 static void handle_output(int fd
, unsigned long dma
, unsigned long key
,
598 struct device_list
*devices
)
602 struct iovec iov
[LGUEST_MAX_DMA_SECTIONS
];
605 lenp
= dma2iov(dma
, iov
, &num
);
606 for (i
= devices
->dev
; i
; i
= i
->next
) {
607 if (i
->handle_output
&& key
== i
->watch_key
) {
608 *lenp
= i
->handle_output(fd
, iov
, num
, i
);
612 warnx("Pending dma %p, key %p", (void *)dma
, (void *)key
);
615 static void handle_input(int fd
, struct device_list
*devices
)
617 struct timeval poll
= { .tv_sec
= 0, .tv_usec
= 0 };
621 fd_set fds
= devices
->infds
;
623 if (select(devices
->max_infd
+1, &fds
, NULL
, NULL
, &poll
) == 0)
626 for (i
= devices
->dev
; i
; i
= i
->next
) {
627 if (i
->handle_input
&& FD_ISSET(i
->fd
, &fds
)) {
628 if (!i
->handle_input(fd
, i
)) {
629 FD_CLR(i
->fd
, &devices
->infds
);
630 /* Tell waker to ignore it too... */
631 write(waker_fd
, &i
->fd
, sizeof(i
->fd
));
638 static struct lguest_device_desc
*
639 new_dev_desc(struct lguest_device_desc
*descs
,
640 u16 type
, u16 features
, u16 num_pages
)
644 for (i
= 0; i
< LGUEST_MAX_DEVICES
; i
++) {
645 if (!descs
[i
].type
) {
646 descs
[i
].type
= type
;
647 descs
[i
].features
= features
;
648 descs
[i
].num_pages
= num_pages
;
650 map_zeroed_pages(top
, num_pages
);
651 descs
[i
].pfn
= top
/getpagesize();
652 top
+= num_pages
*getpagesize();
657 errx(1, "too many devices");
660 static struct device
*new_device(struct device_list
*devices
,
661 u16 type
, u16 num_pages
, u16 features
,
663 bool (*handle_input
)(int, struct device
*),
664 unsigned long watch_off
,
665 u32 (*handle_output
)(int,
666 const struct iovec
*,
670 struct device
*dev
= malloc(sizeof(*dev
));
672 /* Append to device list. */
673 *devices
->lastdev
= dev
;
675 devices
->lastdev
= &dev
->next
;
679 set_fd(dev
->fd
, devices
);
680 dev
->desc
= new_dev_desc(devices
->descs
, type
, features
, num_pages
);
681 dev
->mem
= (void *)(dev
->desc
->pfn
* getpagesize());
682 dev
->handle_input
= handle_input
;
683 dev
->watch_key
= (unsigned long)dev
->mem
+ watch_off
;
684 dev
->handle_output
= handle_output
;
688 static void setup_console(struct device_list
*devices
)
692 if (tcgetattr(STDIN_FILENO
, &orig_term
) == 0) {
693 struct termios term
= orig_term
;
694 term
.c_lflag
&= ~(ISIG
|ICANON
|ECHO
);
695 tcsetattr(STDIN_FILENO
, TCSANOW
, &term
);
696 atexit(restore_term
);
699 /* We don't currently require a page for the console. */
700 dev
= new_device(devices
, LGUEST_DEVICE_T_CONSOLE
, 0, 0,
701 STDIN_FILENO
, handle_console_input
,
702 LGUEST_CONSOLE_DMA_KEY
, handle_console_output
);
703 dev
->priv
= malloc(sizeof(struct console_abort
));
704 ((struct console_abort
*)dev
->priv
)->count
= 0;
705 verbose("device %p: console\n",
706 (void *)(dev
->desc
->pfn
* getpagesize()));
709 static void setup_block_file(const char *filename
, struct device_list
*devices
)
714 struct lguest_block_page
*p
;
716 fd
= open_or_die(filename
, O_RDWR
|O_LARGEFILE
|O_DIRECT
);
717 dev
= new_device(devices
, LGUEST_DEVICE_T_BLOCK
, 1,
718 LGUEST_DEVICE_F_RANDOMNESS
,
719 fd
, NULL
, 0, handle_block_output
);
720 device_len
= dev
->priv
= malloc(sizeof(*device_len
));
721 *device_len
= lseek64(fd
, 0, SEEK_END
);
724 p
->num_sectors
= *device_len
/512;
725 verbose("device %p: block %i sectors\n",
726 (void *)(dev
->desc
->pfn
* getpagesize()), p
->num_sectors
);
729 /* We use fnctl locks to reserve network slots (autocleanup!) */
730 static unsigned int find_slot(int netfd
, const char *filename
)
735 fl
.l_whence
= SEEK_SET
;
738 fl
.l_start
< getpagesize()/sizeof(struct lguest_net
);
740 if (fcntl(netfd
, F_SETLK
, &fl
) == 0)
743 errx(1, "No free slots in network file %s", filename
);
746 static void setup_net_file(const char *filename
,
747 struct device_list
*devices
)
752 netfd
= open(filename
, O_RDWR
, 0);
754 if (errno
== ENOENT
) {
755 netfd
= open(filename
, O_RDWR
|O_CREAT
, 0600);
757 char page
[getpagesize()];
758 memset(page
, 0, sizeof(page
));
759 write(netfd
, page
, sizeof(page
));
763 err(1, "cannot open net file '%s'", filename
);
766 dev
= new_device(devices
, LGUEST_DEVICE_T_NET
, 1,
767 find_slot(netfd
, filename
)|LGUEST_NET_F_NOCSUM
,
770 /* We overwrite the /dev/zero mapping with the actual file. */
771 if (mmap(dev
->mem
, getpagesize(), PROT_READ
|PROT_WRITE
,
772 MAP_FIXED
|MAP_SHARED
, netfd
, 0) != dev
->mem
)
773 err(1, "could not mmap '%s'", filename
);
774 verbose("device %p: shared net %s, peer %i\n",
775 (void *)(dev
->desc
->pfn
* getpagesize()), filename
,
776 dev
->desc
->features
& ~LGUEST_NET_F_NOCSUM
);
779 static u32
str2ip(const char *ipaddr
)
781 unsigned int byte
[4];
783 sscanf(ipaddr
, "%u.%u.%u.%u", &byte
[0], &byte
[1], &byte
[2], &byte
[3]);
784 return (byte
[0] << 24) | (byte
[1] << 16) | (byte
[2] << 8) | byte
[3];
787 /* adapted from libbridge */
788 static void add_to_bridge(int fd
, const char *if_name
, const char *br_name
)
794 errx(1, "must specify bridge name");
796 ifidx
= if_nametoindex(if_name
);
798 errx(1, "interface %s does not exist!", if_name
);
800 strncpy(ifr
.ifr_name
, br_name
, IFNAMSIZ
);
801 ifr
.ifr_ifindex
= ifidx
;
802 if (ioctl(fd
, SIOCBRADDIF
, &ifr
) < 0)
803 err(1, "can't add %s to bridge %s", if_name
, br_name
);
806 static void configure_device(int fd
, const char *devname
, u32 ipaddr
,
807 unsigned char hwaddr
[6])
810 struct sockaddr_in
*sin
= (struct sockaddr_in
*)&ifr
.ifr_addr
;
812 memset(&ifr
, 0, sizeof(ifr
));
813 strcpy(ifr
.ifr_name
, devname
);
814 sin
->sin_family
= AF_INET
;
815 sin
->sin_addr
.s_addr
= htonl(ipaddr
);
816 if (ioctl(fd
, SIOCSIFADDR
, &ifr
) != 0)
817 err(1, "Setting %s interface address", devname
);
818 ifr
.ifr_flags
= IFF_UP
;
819 if (ioctl(fd
, SIOCSIFFLAGS
, &ifr
) != 0)
820 err(1, "Bringing interface %s up", devname
);
822 if (ioctl(fd
, SIOCGIFHWADDR
, &ifr
) != 0)
823 err(1, "getting hw address for %s", devname
);
825 memcpy(hwaddr
, ifr
.ifr_hwaddr
.sa_data
, 6);
828 static void setup_tun_net(const char *arg
, struct device_list
*devices
)
834 const char *br_name
= NULL
;
836 netfd
= open_or_die("/dev/net/tun", O_RDWR
);
837 memset(&ifr
, 0, sizeof(ifr
));
838 ifr
.ifr_flags
= IFF_TAP
| IFF_NO_PI
;
839 strcpy(ifr
.ifr_name
, "tap%d");
840 if (ioctl(netfd
, TUNSETIFF
, &ifr
) != 0)
841 err(1, "configuring /dev/net/tun");
842 ioctl(netfd
, TUNSETNOCSUM
, 1);
844 /* You will be peer 1: we should create enough jitter to randomize */
845 dev
= new_device(devices
, LGUEST_DEVICE_T_NET
, 1,
846 NET_PEERNUM
|LGUEST_DEVICE_F_RANDOMNESS
, netfd
,
847 handle_tun_input
, peer_offset(0), handle_tun_output
);
848 dev
->priv
= malloc(sizeof(bool));
849 *(bool *)dev
->priv
= false;
851 ipfd
= socket(PF_INET
, SOCK_DGRAM
, IPPROTO_IP
);
853 err(1, "opening IP socket");
855 if (!strncmp(BRIDGE_PFX
, arg
, strlen(BRIDGE_PFX
))) {
857 br_name
= arg
+ strlen(BRIDGE_PFX
);
858 add_to_bridge(ipfd
, ifr
.ifr_name
, br_name
);
862 /* We are peer 0, ie. first slot. */
863 configure_device(ipfd
, ifr
.ifr_name
, ip
, dev
->mem
);
865 /* Set "promisc" bit: we want every single packet. */
866 *((u8
*)dev
->mem
) |= 0x1;
870 verbose("device %p: tun net %u.%u.%u.%u\n",
871 (void *)(dev
->desc
->pfn
* getpagesize()),
872 (u8
)(ip
>>24), (u8
)(ip
>>16), (u8
)(ip
>>8), (u8
)ip
);
874 verbose("attached to bridge: %s\n", br_name
);
877 static void __attribute__((noreturn
))
878 run_guest(int lguest_fd
, struct device_list
*device_list
)
881 u32 args
[] = { LHREQ_BREAK
, 0 };
882 unsigned long arr
[2];
885 /* We read from the /dev/lguest device to run the Guest. */
886 readval
= read(lguest_fd
, arr
, sizeof(arr
));
888 if (readval
== sizeof(arr
)) {
889 handle_output(lguest_fd
, arr
[0], arr
[1], device_list
);
891 } else if (errno
== ENOENT
) {
892 char reason
[1024] = { 0 };
893 read(lguest_fd
, reason
, sizeof(reason
)-1);
894 errx(1, "%s", reason
);
895 } else if (errno
!= EAGAIN
)
896 err(1, "Running guest failed");
897 handle_input(lguest_fd
, device_list
);
898 if (write(lguest_fd
, args
, sizeof(args
)) < 0)
899 err(1, "Resetting break");
903 static struct option opts
[] = {
904 { "verbose", 0, NULL
, 'v' },
905 { "sharenet", 1, NULL
, 's' },
906 { "tunnet", 1, NULL
, 't' },
907 { "block", 1, NULL
, 'b' },
908 { "initrd", 1, NULL
, 'i' },
911 static void usage(void)
913 errx(1, "Usage: lguest [--verbose] "
914 "[--sharenet=<filename>|--tunnet=(<ipaddr>|bridge:<bridgename>)\n"
915 "|--block=<filename>|--initrd=<filename>]...\n"
916 "<mem-in-mb> vmlinux [args...]");
919 int main(int argc
, char *argv
[])
921 unsigned long mem
= 0, pgdir
, start
, page_offset
, initrd_size
= 0;
923 struct device_list device_list
;
924 void *boot
= (void *)0;
925 const char *initrd_name
= NULL
;
927 device_list
.max_infd
= -1;
928 device_list
.dev
= NULL
;
929 device_list
.lastdev
= &device_list
.dev
;
930 FD_ZERO(&device_list
.infds
);
932 /* We need to know how much memory so we can allocate devices. */
933 for (i
= 1; i
< argc
; i
++) {
934 if (argv
[i
][0] != '-') {
935 mem
= top
= atoi(argv
[i
]) * 1024 * 1024;
936 device_list
.descs
= map_zeroed_pages(top
, 1);
937 top
+= getpagesize();
941 while ((c
= getopt_long(argc
, argv
, "v", opts
, NULL
)) != EOF
) {
947 setup_net_file(optarg
, &device_list
);
950 setup_tun_net(optarg
, &device_list
);
953 setup_block_file(optarg
, &device_list
);
956 initrd_name
= optarg
;
959 warnx("Unknown argument %s", argv
[optind
]);
963 if (optind
+ 2 > argc
)
966 /* We need a console device */
967 setup_console(&device_list
);
969 /* First we map /dev/zero over all of guest-physical memory. */
970 map_zeroed_pages(0, mem
/ getpagesize());
972 /* Now we load the kernel */
973 start
= load_kernel(open_or_die(argv
[optind
+1], O_RDONLY
),
976 /* Map the initrd image if requested */
978 initrd_size
= load_initrd(initrd_name
, mem
);
979 *(unsigned long *)(boot
+0x218) = mem
- initrd_size
;
980 *(unsigned long *)(boot
+0x21c) = initrd_size
;
981 *(unsigned char *)(boot
+0x210) = 0xFF;
984 /* Set up the initial linar pagetables. */
985 pgdir
= setup_pagetables(mem
, initrd_size
, page_offset
);
987 /* E820 memory map: ours is a simple, single region. */
988 *(char*)(boot
+E820NR
) = 1;
989 *((struct e820entry
*)(boot
+E820MAP
))
990 = ((struct e820entry
) { 0, mem
, E820_RAM
});
991 /* Command line pointer and command line (at 4096) */
992 *(void **)(boot
+ 0x228) = boot
+ 4096;
993 concat(boot
+ 4096, argv
+optind
+2);
994 /* Paravirt type: 1 == lguest */
995 *(int *)(boot
+ 0x23c) = 1;
997 lguest_fd
= tell_kernel(pgdir
, start
, page_offset
);
998 waker_fd
= setup_waker(lguest_fd
, &device_list
);
1000 run_guest(lguest_fd
, &device_list
);