[xen/f20] Long latency virtual-mmu operations are not preemptible
myoung
myoung at fedoraproject.org
Wed Aug 13 16:13:44 UTC 2014
commit da5c5ea5bcf73c995f26df8dae857ee72dd085cf
Author: Michael Young <m.a.young at durham.ac.uk>
Date: Wed Aug 13 17:13:03 2014 +0100
Long latency virtual-mmu operations are not preemptible
xen.spec | 8 +-
xsa97-hap-4.3.patch | 485 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 492 insertions(+), 1 deletions(-)
---
diff --git a/xen.spec b/xen.spec
index 3e78e7c..2ed5ab7 100644
--- a/xen.spec
+++ b/xen.spec
@@ -51,7 +51,7 @@
Summary: Xen is a virtual machine monitor
Name: xen
Version: 4.3.2
-Release: 6%{?dist}
+Release: 7%{?dist}
Group: Development/Libraries
License: GPLv2+ and LGPLv2+ and BSD
URL: http://xen.org/
@@ -108,6 +108,7 @@ Patch23: xsa92.patch
Patch24: xsa96.patch
Patch25: xsa100.patch
Patch26: xen.git-6b4d71d028f445cba7426a144751fddc8bfdd67b.patch
+Patch27: xsa97-hap-4.3.patch
Patch100: xen-configure-xend.patch
@@ -299,6 +300,7 @@ manage Xen virtual machines.
%patch24 -p1
%patch25 -p1
%patch26 -p1
+%patch27 -p1
%patch100 -p1
@@ -870,6 +872,10 @@ rm -rf %{buildroot}
%endif
%changelog
+* Wed Aug 13 2014 Michael Young <m.a.young at durham.ac.uk> - 4.3.2-7
+- Long latency virtual-mmu operations are not preemptible
+ [XSA-97, CVE-2014-5146]
+
* Fri Jun 20 2014 Michael Young <m.a.young at durham.ac.uk> - 4.3.2-6
- Hypervisor heap contents leaked to guest [XSA-100, CVE-2014-4021]
(#1110316) with extra patch to avoid regression
diff --git a/xsa97-hap-4.3.patch b/xsa97-hap-4.3.patch
new file mode 100644
index 0000000..90a85be
--- /dev/null
+++ b/xsa97-hap-4.3.patch
@@ -0,0 +1,485 @@
+x86/paging: make log-dirty operations preemptible
+
+Both the freeing and the inspection of the bitmap get done in (nested)
+loops which - besides having a rather high iteration count in general,
+albeit that would be covered by XSA-77 - have the number of non-trivial
+iterations they need to perform (indirectly) controllable by both the
+guest they are for and any domain controlling the guest (including the
+one running qemu for it).
+
+This is XSA-97.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Tim Deegan <tim at xen.org>
+
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -1867,7 +1867,9 @@ int domain_relinquish_resources(struct d
+ pci_release_devices(d);
+
+ /* Tear down paging-assistance stuff. */
+- paging_teardown(d);
++ ret = paging_teardown(d);
++ if ( ret )
++ return ret;
+
+ /* Drop the in-use references to page-table bases. */
+ for_each_vcpu ( d, v )
+--- a/xen/arch/x86/domctl.c
++++ b/xen/arch/x86/domctl.c
+@@ -61,6 +61,9 @@ long arch_do_domctl(
+ ret = paging_domctl(d,
+ &domctl->u.shadow_op,
+ guest_handle_cast(u_domctl, void));
++ if ( ret == -EAGAIN )
++ return hypercall_create_continuation(__HYPERVISOR_domctl,
++ "h", u_domctl);
+ copyback = 1;
+ }
+ break;
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -565,8 +565,7 @@ int hap_domctl(struct domain *d, xen_dom
+ paging_unlock(d);
+ if ( preempted )
+ /* Not finished. Set up to re-run the call. */
+- rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
+- u_domctl);
++ rc = -EAGAIN;
+ else
+ /* Finished. Return the new allocation */
+ sc->mb = hap_get_allocation(d);
+--- a/xen/arch/x86/mm/paging.c
++++ b/xen/arch/x86/mm/paging.c
+@@ -26,6 +26,7 @@
+ #include <asm/shadow.h>
+ #include <asm/p2m.h>
+ #include <asm/hap.h>
++#include <asm/event.h>
+ #include <asm/hvm/nestedhvm.h>
+ #include <xen/numa.h>
+ #include <xsm/xsm.h>
+@@ -116,26 +117,46 @@ static void paging_free_log_dirty_page(s
+ d->arch.paging.free_page(d, mfn_to_page(mfn));
+ }
+
+-void paging_free_log_dirty_bitmap(struct domain *d)
++static int paging_free_log_dirty_bitmap(struct domain *d, int rc)
+ {
+ mfn_t *l4, *l3, *l2;
+ int i4, i3, i2;
+
++ paging_lock(d);
++
+ if ( !mfn_valid(d->arch.paging.log_dirty.top) )
+- return;
++ {
++ paging_unlock(d);
++ return 0;
++ }
+
+- paging_lock(d);
++ if ( !d->arch.paging.preempt.vcpu )
++ {
++ memset(&d->arch.paging.preempt.log_dirty, 0,
++ sizeof(d->arch.paging.preempt.log_dirty));
++ ASSERT(rc <= 0);
++ d->arch.paging.preempt.log_dirty.done = -rc;
++ }
++ else if ( d->arch.paging.preempt.vcpu != current ||
++ d->arch.paging.preempt.op != XEN_DOMCTL_SHADOW_OP_OFF )
++ {
++ paging_unlock(d);
++ return -EBUSY;
++ }
+
+ l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
++ i4 = d->arch.paging.preempt.log_dirty.i4;
++ i3 = d->arch.paging.preempt.log_dirty.i3;
++ rc = 0;
+
+- for ( i4 = 0; i4 < LOGDIRTY_NODE_ENTRIES; i4++ )
++ for ( ; i4 < LOGDIRTY_NODE_ENTRIES; i4++, i3 = 0 )
+ {
+ if ( !mfn_valid(l4[i4]) )
+ continue;
+
+ l3 = map_domain_page(mfn_x(l4[i4]));
+
+- for ( i3 = 0; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
++ for ( ; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
+ {
+ if ( !mfn_valid(l3[i3]) )
+ continue;
+@@ -148,20 +169,54 @@ void paging_free_log_dirty_bitmap(struct
+
+ unmap_domain_page(l2);
+ paging_free_log_dirty_page(d, l3[i3]);
++ l3[i3] = _mfn(INVALID_MFN);
++
++ if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
++ {
++ d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
++ d->arch.paging.preempt.log_dirty.i4 = i4;
++ rc = -EAGAIN;
++ break;
++ }
+ }
+
+ unmap_domain_page(l3);
++ if ( rc )
++ break;
+ paging_free_log_dirty_page(d, l4[i4]);
++ l4[i4] = _mfn(INVALID_MFN);
++
++ if ( i4 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
++ {
++ d->arch.paging.preempt.log_dirty.i3 = 0;
++ d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
++ rc = -EAGAIN;
++ break;
++ }
+ }
+
+ unmap_domain_page(l4);
+- paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
+- d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
+
+- ASSERT(d->arch.paging.log_dirty.allocs == 0);
+- d->arch.paging.log_dirty.failed_allocs = 0;
++ if ( !rc )
++ {
++ paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
++ d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
++
++ ASSERT(d->arch.paging.log_dirty.allocs == 0);
++ d->arch.paging.log_dirty.failed_allocs = 0;
++
++ rc = -d->arch.paging.preempt.log_dirty.done;
++ d->arch.paging.preempt.vcpu = NULL;
++ }
++ else
++ {
++ d->arch.paging.preempt.vcpu = current;
++ d->arch.paging.preempt.op = XEN_DOMCTL_SHADOW_OP_OFF;
++ }
+
+ paging_unlock(d);
++
++ return rc;
+ }
+
+ int paging_log_dirty_enable(struct domain *d)
+@@ -178,15 +233,25 @@ int paging_log_dirty_enable(struct domai
+ return ret;
+ }
+
+-int paging_log_dirty_disable(struct domain *d)
++static int paging_log_dirty_disable(struct domain *d, bool_t resuming)
+ {
+- int ret;
++ int ret = 1;
++
++ if ( !resuming )
++ {
++ domain_pause(d);
++ /* Safe because the domain is paused. */
++ ret = d->arch.paging.log_dirty.disable_log_dirty(d);
++ ASSERT(ret <= 0);
++ }
+
+- domain_pause(d);
+- /* Safe because the domain is paused. */
+- ret = d->arch.paging.log_dirty.disable_log_dirty(d);
+ if ( !paging_mode_log_dirty(d) )
+- paging_free_log_dirty_bitmap(d);
++ {
++ ret = paging_free_log_dirty_bitmap(d, ret);
++ if ( ret == -EAGAIN )
++ return ret;
++ }
++
+ domain_unpause(d);
+
+ return ret;
+@@ -326,7 +391,9 @@ int paging_mfn_is_dirty(struct domain *d
+
+ /* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN,
+ * clear the bitmap and stats as well. */
+-int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
++static int paging_log_dirty_op(struct domain *d,
++ struct xen_domctl_shadow_op *sc,
++ bool_t resuming)
+ {
+ int rv = 0, clean = 0, peek = 1;
+ unsigned long pages = 0;
+@@ -334,9 +401,22 @@ int paging_log_dirty_op(struct domain *d
+ unsigned long *l1 = NULL;
+ int i4, i3, i2;
+
+- domain_pause(d);
++ if ( !resuming )
++ domain_pause(d);
+ paging_lock(d);
+
++ if ( !d->arch.paging.preempt.vcpu )
++ memset(&d->arch.paging.preempt.log_dirty, 0,
++ sizeof(d->arch.paging.preempt.log_dirty));
++ else if ( d->arch.paging.preempt.vcpu != current ||
++ d->arch.paging.preempt.op != sc->op )
++ {
++ paging_unlock(d);
++ ASSERT(!resuming);
++ domain_unpause(d);
++ return -EBUSY;
++ }
++
+ clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
+
+ PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
+@@ -365,17 +445,15 @@ int paging_log_dirty_op(struct domain *d
+ goto out;
+ }
+
+- pages = 0;
+ l4 = paging_map_log_dirty_bitmap(d);
++ i4 = d->arch.paging.preempt.log_dirty.i4;
++ i3 = d->arch.paging.preempt.log_dirty.i3;
++ pages = d->arch.paging.preempt.log_dirty.done;
+
+- for ( i4 = 0;
+- (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES);
+- i4++ )
++ for ( ; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++, i3 = 0 )
+ {
+ l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
+- for ( i3 = 0;
+- (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES);
+- i3++ )
++ for ( ; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ )
+ {
+ l2 = ((l3 && mfn_valid(l3[i3])) ?
+ map_domain_page(mfn_x(l3[i3])) : NULL);
+@@ -410,18 +488,51 @@ int paging_log_dirty_op(struct domain *d
+ }
+ if ( l2 )
+ unmap_domain_page(l2);
++
++ if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
++ {
++ d->arch.paging.preempt.log_dirty.i4 = i4;
++ d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
++ rv = -EAGAIN;
++ break;
++ }
+ }
+ if ( l3 )
+ unmap_domain_page(l3);
++
++ if ( !rv && i4 < LOGDIRTY_NODE_ENTRIES - 1 &&
++ hypercall_preempt_check() )
++ {
++ d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
++ d->arch.paging.preempt.log_dirty.i3 = 0;
++ rv = -EAGAIN;
++ }
++ if ( rv )
++ break;
+ }
+ if ( l4 )
+ unmap_domain_page(l4);
+
+- if ( pages < sc->pages )
+- sc->pages = pages;
++ if ( !rv )
++ d->arch.paging.preempt.vcpu = NULL;
++ else
++ {
++ d->arch.paging.preempt.vcpu = current;
++ d->arch.paging.preempt.op = sc->op;
++ d->arch.paging.preempt.log_dirty.done = pages;
++ }
+
+ paging_unlock(d);
+
++ if ( rv )
++ {
++ /* Never leave the domain paused for other errors. */
++ ASSERT(rv == -EAGAIN);
++ return rv;
++ }
++
++ if ( pages < sc->pages )
++ sc->pages = pages;
+ if ( clean )
+ {
+ /* We need to further call clean_dirty_bitmap() functions of specific
+@@ -432,6 +543,7 @@ int paging_log_dirty_op(struct domain *d
+ return rv;
+
+ out:
++ d->arch.paging.preempt.vcpu = NULL;
+ paging_unlock(d);
+ domain_unpause(d);
+
+@@ -498,12 +610,6 @@ void paging_log_dirty_init(struct domain
+ d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap;
+ }
+
+-/* This function fress log dirty bitmap resources. */
+-static void paging_log_dirty_teardown(struct domain*d)
+-{
+- paging_free_log_dirty_bitmap(d);
+-}
+-
+ /************************************************/
+ /* CODE FOR PAGING SUPPORT */
+ /************************************************/
+@@ -547,6 +653,7 @@ void paging_vcpu_init(struct vcpu *v)
+ int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
+ XEN_GUEST_HANDLE_PARAM(void) u_domctl)
+ {
++ bool_t resuming = 0;
+ int rc;
+
+ if ( unlikely(d == current->domain) )
+@@ -569,6 +676,20 @@ int paging_domctl(struct domain *d, xen_
+ return -EINVAL;
+ }
+
++ if ( d->arch.paging.preempt.vcpu )
++ {
++ if ( d->arch.paging.preempt.vcpu != current ||
++ d->arch.paging.preempt.op != sc->op )
++ {
++ printk(XENLOG_G_DEBUG
++ "d%d:v%d: Paging op %#x on Dom%u with unfinished prior op %#x\n",
++ current->domain->domain_id, current->vcpu_id,
++ sc->op, d->domain_id, d->arch.paging.preempt.op);
++ return -EBUSY;
++ }
++ resuming = 1;
++ }
++
+ rc = xsm_shadow_control(XSM_HOOK, d, sc->op);
+ if ( rc )
+ return rc;
+@@ -594,13 +714,13 @@ int paging_domctl(struct domain *d, xen_
+
+ case XEN_DOMCTL_SHADOW_OP_OFF:
+ if ( paging_mode_log_dirty(d) )
+- if ( (rc = paging_log_dirty_disable(d)) != 0 )
++ if ( (rc = paging_log_dirty_disable(d, resuming)) != 0 )
+ return rc;
+ break;
+
+ case XEN_DOMCTL_SHADOW_OP_CLEAN:
+ case XEN_DOMCTL_SHADOW_OP_PEEK:
+- return paging_log_dirty_op(d, sc);
++ return paging_log_dirty_op(d, sc, resuming);
+ }
+
+ /* Here, dispatch domctl to the appropriate paging code */
+@@ -611,18 +731,24 @@ int paging_domctl(struct domain *d, xen_
+ }
+
+ /* Call when destroying a domain */
+-void paging_teardown(struct domain *d)
++int paging_teardown(struct domain *d)
+ {
++ int rc;
++
+ if ( hap_enabled(d) )
+ hap_teardown(d);
+ else
+ shadow_teardown(d);
+
+ /* clean up log dirty resources. */
+- paging_log_dirty_teardown(d);
++ rc = paging_free_log_dirty_bitmap(d, 0);
++ if ( rc == -EAGAIN )
++ return rc;
+
+ /* Move populate-on-demand cache back to domain_list for destruction */
+ p2m_pod_empty_cache(d);
++
++ return rc;
+ }
+
+ /* Call once all of the references to the domain have gone away */
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -3706,8 +3706,7 @@ int shadow_domctl(struct domain *d,
+ paging_unlock(d);
+ if ( preempted )
+ /* Not finished. Set up to re-run the call. */
+- rc = hypercall_create_continuation(
+- __HYPERVISOR_domctl, "h", u_domctl);
++ rc = -EAGAIN;
+ else
+ /* Finished. Return the new allocation */
+ sc->mb = shadow_get_allocation(d);
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -527,7 +527,6 @@ int domain_kill(struct domain *d)
+ rc = domain_relinquish_resources(d);
+ if ( rc != 0 )
+ {
+- BUG_ON(rc != -EAGAIN);
+ break;
+ }
+ for_each_vcpu ( d, v )
+--- a/xen/include/asm-x86/domain.h
++++ b/xen/include/asm-x86/domain.h
+@@ -186,6 +186,20 @@ struct paging_domain {
+ struct hap_domain hap;
+ /* log dirty support */
+ struct log_dirty_domain log_dirty;
++
++ /* preemption handling */
++ struct {
++ struct vcpu *vcpu;
++ unsigned int op;
++ union {
++ struct {
++ unsigned long done:PADDR_BITS - PAGE_SHIFT;
++ unsigned long i4:PAGETABLE_ORDER;
++ unsigned long i3:PAGETABLE_ORDER;
++ } log_dirty;
++ };
++ } preempt;
++
+ /* alloc/free pages from the pool for paging-assistance structures
+ * (used by p2m and log-dirty code for their tries) */
+ struct page_info * (*alloc_page)(struct domain *d);
+--- a/xen/include/asm-x86/paging.h
++++ b/xen/include/asm-x86/paging.h
+@@ -133,9 +133,6 @@ struct paging_mode {
+ /*****************************************************************************
+ * Log dirty code */
+
+-/* free log dirty bitmap resource */
+-void paging_free_log_dirty_bitmap(struct domain *d);
+-
+ /* get the dirty bitmap for a specific range of pfns */
+ void paging_log_dirty_range(struct domain *d,
+ unsigned long begin_pfn,
+@@ -145,9 +142,6 @@ void paging_log_dirty_range(struct domai
+ /* enable log dirty */
+ int paging_log_dirty_enable(struct domain *d);
+
+-/* disable log dirty */
+-int paging_log_dirty_disable(struct domain *d);
+-
+ /* log dirty initialization */
+ void paging_log_dirty_init(struct domain *d,
+ int (*enable_log_dirty)(struct domain *d),
+@@ -206,7 +200,7 @@ int paging_domctl(struct domain *d, xen_
+ XEN_GUEST_HANDLE_PARAM(void) u_domctl);
+
+ /* Call when destroying a domain */
+-void paging_teardown(struct domain *d);
++int paging_teardown(struct domain *d);
+
+ /* Call once all of the references to the domain have gone away */
+ void paging_final_teardown(struct domain *d);
More information about the scm-commits
mailing list