[libvirt] Fix LXC I/O handling
Daniel P. Berrange
berrange at fedoraproject.org
Thu Jan 12 21:04:50 UTC 2012
commit 21a02c2e90f1b09d94e0c7e63b0de55188eaf108
Author: Daniel P. Berrange <berrange at redhat.com>
Date: Thu Jan 12 21:04:43 2012 +0000
Fix LXC I/O handling
libvirt-0.9.9-lxc-io.patch | 345 ++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 345 insertions(+), 0 deletions(-)
---
diff --git a/libvirt-0.9.9-lxc-io.patch b/libvirt-0.9.9-lxc-io.patch
new file mode 100644
index 0000000..e91957a
--- /dev/null
+++ b/libvirt-0.9.9-lxc-io.patch
@@ -0,0 +1,345 @@
+commit 9130396214975ba2251082f943c9717281039050
+Author: Daniel P. Berrange <berrange at redhat.com>
+Date: Thu Jan 12 17:03:03 2012 +0000
+
+ Re-write LXC controller end-of-file I/O handling yet again
+
+ Currently the LXC controller attempts to deal with EOF on a
+ tty by spawning a thread to do an edge triggered epoll_wait().
+ This avoids the normal event loop spinning on POLLHUP. There
+ is a subtle mistake though - even after seeing POLLHUP on a
+ master PTY, it is still perfectly possible & valid to write
+ data to the PTY. There is a buffer that can be filled with
+ data, even when no client is present.
+
+ The second mistake is that the epoll_wait() thread was not
+ looking for the EPOLLOUT condition, so when a new client
+ connects to the LXC console, it had to explicitly send a
+ character before any queued output would appear.
+
+ Finally, there was in fact no need to spawn a new thread to
+ deal with epoll_wait(). The epoll file descriptor itself
+ can be poll()'d on normally.
+
+ This patch attempts to deal with all these problems.
+
+ - The blocking epoll_wait() thread is replaced by a poll
+ on the epoll file descriptor which then does a non-blocking
+ epoll_wait() to handle events
+ - Even if POLLHUP is seen, we continue trying to write
+ any pending output until getting EAGAIN from write.
+ - Once write returns EAGAIN, we modify the epoll event
+ mask to also look for EPOLLOUT
+
+ * src/lxc/lxc_controller.c: Avoid stalled I/O upon
+ connected to an LXC console
+
+diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
+index bb936ee..49727dd 100644
+--- a/src/lxc/lxc_controller.c
++++ b/src/lxc/lxc_controller.c
+@@ -736,9 +736,17 @@ struct lxcConsole {
+ int hostWatch;
+ int hostFd; /* PTY FD in the host OS */
+ bool hostClosed;
++ int hostEpoll;
++ bool hostBlocking;
++
+ int contWatch;
+ int contFd; /* PTY FD in the container */
+ bool contClosed;
++ int contEpoll;
++ bool contBlocking;
++
++ int epollWatch;
++ int epollFd; /* epoll FD for dealing with EOF */
+
+ size_t fromHostLen;
+ char fromHostBuf[1024];
+@@ -834,102 +842,148 @@ static void lxcConsoleUpdateWatch(struct lxcConsole *console)
+ int hostEvents = 0;
+ int contEvents = 0;
+
+- if (!console->hostClosed) {
++ if (!console->hostClosed || (!console->hostBlocking && console->fromContLen)) {
+ if (console->fromHostLen < sizeof(console->fromHostBuf))
+ hostEvents |= VIR_EVENT_HANDLE_READABLE;
+ if (console->fromContLen)
+ hostEvents |= VIR_EVENT_HANDLE_WRITABLE;
+ }
+- if (!console->contClosed) {
++ if (!console->contClosed || (!console->contBlocking && console->fromHostLen)) {
+ if (console->fromContLen < sizeof(console->fromContBuf))
+ contEvents |= VIR_EVENT_HANDLE_READABLE;
+ if (console->fromHostLen)
+ contEvents |= VIR_EVENT_HANDLE_WRITABLE;
+ }
+
++ VIR_DEBUG("Container watch %d=%d host watch %d=%d",
++ console->contWatch, contEvents,
++ console->hostWatch, hostEvents);
+ virEventUpdateHandle(console->contWatch, contEvents);
+ virEventUpdateHandle(console->hostWatch, hostEvents);
+-}
+
++ if (console->hostClosed) {
++ int events = EPOLLIN | EPOLLET;
++ if (console->hostBlocking)
++ events |= EPOLLOUT;
+
+-struct lxcConsoleEOFData {
+- struct lxcConsole *console;
+- int fd;
+-};
+-
++ if (events != console->hostEpoll) {
++ struct epoll_event event;
++ int action = EPOLL_CTL_ADD;
++ if (console->hostEpoll)
++ action = EPOLL_CTL_MOD;
+
+-static void lxcConsoleEOFThread(void *opaque)
+-{
+- struct lxcConsoleEOFData *data = opaque;
+- int ret;
+- int epollfd = -1;
+- struct epoll_event event;
++ VIR_DEBUG("newHostEvents=%x oldHostEvents=%x", events, console->hostEpoll);
+
+- if ((epollfd = epoll_create(2)) < 0) {
+- virReportSystemError(errno, "%s",
+- _("Unable to create epoll fd"));
+- goto cleanup;
++ event.events = events;
++ event.data.fd = console->hostFd;
++ if (epoll_ctl(console->epollFd, action, console->hostFd, &event) < 0) {
++ VIR_DEBUG(":fail");
++ virReportSystemError(errno, "%s",
++ _("Unable to add epoll fd"));
++ quit = true;
++ goto cleanup;
++ }
++ console->hostEpoll = events;
++ VIR_DEBUG("newHostEvents=%x oldHostEvents=%x", events, console->hostEpoll);
++ }
++ } else if (console->hostEpoll) {
++ VIR_DEBUG("Stop epoll oldContEvents=%x", console->hostEpoll);
++ if (epoll_ctl(console->epollFd, EPOLL_CTL_DEL, console->hostFd, NULL) < 0) {
++ virReportSystemError(errno, "%s",
++ _("Unable to remove epoll fd"));
++ VIR_DEBUG(":fail");
++ quit = true;
++ goto cleanup;
++ }
++ console->hostEpoll = 0;
+ }
+
+- event.events = EPOLLIN | EPOLLET;
+- event.data.fd = data->fd;
+- if (epoll_ctl(epollfd, EPOLL_CTL_ADD, data->fd, &event) < 0) {
+- virReportSystemError(errno, "%s",
+- _("Unable to add epoll fd"));
+- goto cleanup;
++ if (console->contClosed) {
++ int events = EPOLLIN | EPOLLET;
++ if (console->contBlocking)
++ events |= EPOLLOUT;
++
++ if (events != console->contEpoll) {
++ struct epoll_event event;
++ int action = EPOLL_CTL_ADD;
++ if (console->contEpoll)
++ action = EPOLL_CTL_MOD;
++
++ VIR_DEBUG("newContEvents=%x oldContEvents=%x", events, console->contEpoll);
++
++ event.events = events;
++ event.data.fd = console->contFd;
++ if (epoll_ctl(console->epollFd, action, console->contFd, &event) < 0) {
++ virReportSystemError(errno, "%s",
++ _("Unable to add epoll fd"));
++ VIR_DEBUG(":fail");
++ quit = true;
++ goto cleanup;
++ }
++ console->contEpoll = events;
++ VIR_DEBUG("newHostEvents=%x oldHostEvents=%x", events, console->contEpoll);
++ }
++ } else if (console->contEpoll) {
++ VIR_DEBUG("Stop epoll oldContEvents=%x", console->contEpoll);
++ if (epoll_ctl(console->epollFd, EPOLL_CTL_DEL, console->contFd, NULL) < 0) {
++ virReportSystemError(errno, "%s",
++ _("Unable to remove epoll fd"));
++ VIR_DEBUG(":fail");
++ quit = true;
++ goto cleanup;
++ }
++ console->contEpoll = 0;
+ }
++cleanup:
++ return;
++}
++
+
+- for (;;) {
+- ret = epoll_wait(epollfd, &event, 1, -1);
++static void lxcEpollIO(int watch, int fd, int events, void *opaque)
++{
++ struct lxcConsole *console = opaque;
++
++ virMutexLock(&lock);
++ VIR_DEBUG("IO event watch=%d fd=%d events=%d fromHost=%zu fromcont=%zu",
++ watch, fd, events,
++ console->fromHostLen,
++ console->fromContLen);
++
++ while (1) {
++ struct epoll_event event;
++ int ret;
++ ret = epoll_wait(console->epollFd, &event, 1, 0);
+ if (ret < 0) {
+ if (ret == EINTR)
+ continue;
+ virReportSystemError(errno, "%s",
+ _("Unable to wait on epoll"));
+- virMutexLock(&lock);
+ quit = true;
+- virMutexUnlock(&lock);
+ goto cleanup;
+ }
+
++ if (ret == 0)
++ break;
++
++ VIR_DEBUG("fd=%d hostFd=%d contFd=%d hostEpoll=%x contEpoll=%x",
++ event.data.fd, console->hostFd, console->contFd,
++ console->hostEpoll, console->contEpoll);
++
+ /* If we get HUP+dead PID, we just re-enable the main loop
+ * which will see the PID has died and exit */
+ if ((event.events & EPOLLIN)) {
+- virMutexLock(&lock);
+- if (event.data.fd == data->console->hostFd) {
+- data->console->hostClosed = false;
++ if (event.data.fd == console->hostFd) {
++ console->hostClosed = false;
+ } else {
+- data->console->contClosed = false;
++ console->contClosed = false;
+ }
+- lxcConsoleUpdateWatch(data->console);
+- virMutexUnlock(&lock);
++ lxcConsoleUpdateWatch(console);
+ break;
+ }
+ }
+
+ cleanup:
+- VIR_FORCE_CLOSE(epollfd);
+- VIR_FREE(data);
+-}
+-
+-static int lxcCheckEOF(struct lxcConsole *console, int fd)
+-{
+- struct lxcConsoleEOFData *data;
+- virThread thread;
+-
+- if (VIR_ALLOC(data) < 0) {
+- virReportOOMError();
+- return -1;
+- }
+-
+- data->console = console;
+- data->fd = fd;
+-
+- if (virThreadCreate(&thread, false, lxcConsoleEOFThread, data) < 0) {
+- VIR_FREE(data);
+- return -1;
+- }
+- return 0;
++ virMutexUnlock(&lock);
+ }
+
+ static void lxcConsoleIO(int watch, int fd, int events, void *opaque)
+@@ -937,6 +991,10 @@ static void lxcConsoleIO(int watch, int fd, int events, void *opaque)
+ struct lxcConsole *console = opaque;
+
+ virMutexLock(&lock);
++ VIR_DEBUG("IO event watch=%d fd=%d events=%d fromHost=%zu fromcont=%zu",
++ watch, fd, events,
++ console->fromHostLen,
++ console->fromContLen);
+ if (events & VIR_EVENT_HANDLE_READABLE) {
+ char *buf;
+ size_t *len;
+@@ -993,6 +1051,10 @@ static void lxcConsoleIO(int watch, int fd, int events, void *opaque)
+ *len -= done;
+ } else {
+ VIR_DEBUG("Write fd %d done %d errno %d", fd, (int)done, errno);
++ if (watch == console->hostWatch)
++ console->hostBlocking = true;
++ else
++ console->contBlocking = true;
+ }
+ }
+
+@@ -1003,8 +1065,6 @@ static void lxcConsoleIO(int watch, int fd, int events, void *opaque)
+ console->contClosed = true;
+ }
+ VIR_DEBUG("Got EOF on %d %d", watch, fd);
+- if (lxcCheckEOF(console, fd) < 0)
+- goto error;
+ }
+
+ lxcConsoleUpdateWatch(console);
+@@ -1103,9 +1163,32 @@ static int lxcControllerMain(int serverFd,
+ }
+
+ for (i = 0 ; i < nFds ; i++) {
++ consoles[i].epollFd = -1;
++ consoles[i].epollWatch = -1;
++ consoles[i].hostWatch = -1;
++ consoles[i].contWatch = -1;
++ }
++
++ for (i = 0 ; i < nFds ; i++) {
+ consoles[i].hostFd = hostFds[i];
+ consoles[i].contFd = contFds[i];
+
++ if ((consoles[i].epollFd = epoll_create1(EPOLL_CLOEXEC)) < 0) {
++ virReportSystemError(errno, "%s",
++ _("Unable to create epoll fd"));
++ goto cleanup;
++ }
++
++ if ((consoles[i].epollWatch = virEventAddHandle(consoles[i].epollFd,
++ VIR_EVENT_HANDLE_READABLE,
++ lxcEpollIO,
++ &consoles[i],
++ NULL)) < 0) {
++ lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
++ _("Unable to watch epoll FD"));
++ goto cleanup;
++ }
++
+ if ((consoles[i].hostWatch = virEventAddHandle(consoles[i].hostFd,
+ VIR_EVENT_HANDLE_READABLE,
+ lxcConsoleIO,
+@@ -1146,6 +1229,17 @@ cleanup:
+ cleanup2:
+ VIR_FORCE_CLOSE(monitor.serverFd);
+ VIR_FORCE_CLOSE(monitor.clientFd);
++
++ for (i = 0 ; i < nFds ; i++) {
++ if (consoles[i].epollWatch != -1)
++ virEventRemoveHandle(consoles[i].epollWatch);
++ VIR_FORCE_CLOSE(consoles[i].epollFd);
++ if (consoles[i].contWatch != -1)
++ virEventRemoveHandle(consoles[i].contWatch);
++ if (consoles[i].hostWatch != -1)
++ virEventRemoveHandle(consoles[i].hostWatch);
++ }
++
+ VIR_FREE(consoles);
+ return rc;
+ }
More information about the scm-commits
mailing list