[xen] Denial of service security fixes for CVE-2013-1918 CVE-2013-1952

myoung myoung at fedoraproject.org
Thu May 2 17:46:41 UTC 2013


commit 41ce54e1878b16c1cf7b278658b1be983842004d
Author: Michael Young <m.a.young at durham.ac.uk>
Date:   Thu May 2 18:44:09 2013 +0100

    Denial of service security fixes for CVE-2013-1918 CVE-2013-1952

 xen.spec                                           |   26 ++-
 ....2-01-vcpu-destroy-pagetables-preemptible.patch |  252 +++++++++++++++++++
 xsa45-4.2-02-new-guest-cr3-preemptible.patch       |  169 +++++++++++++
 xsa45-4.2-03-new-user-base-preemptible.patch       |   74 ++++++
 xsa45-4.2-04-vcpu-reset-preemptible.patch          |  200 +++++++++++++++
 xsa45-4.2-05-set-info-guest-preemptible.patch      |  204 ++++++++++++++++
 xsa45-4.2-06-unpin-preemptible.patch               |  127 ++++++++++
 xsa45-4.2-07-mm-error-paths-preemptible.patch      |  255 ++++++++++++++++++++
 xsa49-4.2.patch                                    |   50 ++++
 9 files changed, 1356 insertions(+), 1 deletions(-)
---
diff --git a/xen.spec b/xen.spec
index 3f6d63d..6a03dd0 100644
--- a/xen.spec
+++ b/xen.spec
@@ -27,7 +27,7 @@
 Summary: Xen is a virtual machine monitor
 Name:    xen
 Version: 4.2.2
-Release: 2%{?dist}
+Release: 3%{?dist}
 Group:   Development/Libraries
 License: GPLv2+ and LGPLv2+ and BSD
 URL:     http://xen.org/
@@ -78,6 +78,14 @@ Patch56: xen.fedora19.buildfix.patch
 Patch62: man.formatting.patch
 Patch63: xl.list.-l.format.patch
 Patch64: xen.git-9c23a1d0eb7a6b5e3273d527cfd7960838fbfee6.patch
+Patch65: xsa45-4.2-01-vcpu-destroy-pagetables-preemptible.patch
+Patch66: xsa45-4.2-02-new-guest-cr3-preemptible.patch
+Patch67: xsa45-4.2-03-new-user-base-preemptible.patch
+Patch68: xsa45-4.2-04-vcpu-reset-preemptible.patch
+Patch69: xsa45-4.2-05-set-info-guest-preemptible.patch
+Patch70: xsa45-4.2-06-unpin-preemptible.patch
+Patch71: xsa45-4.2-07-mm-error-paths-preemptible.patch
+Patch72: xsa49-4.2.patch
 
 Patch100: xen-configure-xend.patch
 
@@ -246,6 +254,14 @@ manage Xen virtual machines.
 %patch62 -p1
 %patch63 -p1
 %patch64 -p1
+%patch65 -p1
+%patch66 -p1
+%patch67 -p1
+%patch68 -p1
+%patch69 -p1
+%patch70 -p1
+%patch71 -p1
+%patch72 -p1
 
 %patch100 -p1
 
@@ -735,6 +751,14 @@ rm -rf %{buildroot}
 %endif
 
 %changelog
+* Thu May 02 2013 Michael Young <m.a.young at durham.ac.uk> - 4.2.2-3
+- PV guests can use non-preemptible long latency operations to
+  mount a denial of service attack on the whole system
+  [XSA-45, CVE-2013-1918] (#958918)
+- malicious guests can inject interrupts through bridge devices to
+  mount a denial of service attack on the whole system
+  [XSA-49, CVE-2013-1952] (#958919)
+
 * Fri Apr 26 2013 Michael Young <m.a.young at durham.ac.uk> - 4.2.2-2
 - fix further man page issues to allow building on F19 and F20
 
diff --git a/xsa45-4.2-01-vcpu-destroy-pagetables-preemptible.patch b/xsa45-4.2-01-vcpu-destroy-pagetables-preemptible.patch
new file mode 100644
index 0000000..f5ded0a
--- /dev/null
+++ b/xsa45-4.2-01-vcpu-destroy-pagetables-preemptible.patch
@@ -0,0 +1,252 @@
+x86: make vcpu_destroy_pagetables() preemptible
+
+... as it may take significant amounts of time.
+
+The function, being moved to mm.c as the better home for it anyway, and
+to avoid having to make a new helper function there non-static, is
+given a "preemptible" parameter temporarily (until, in a subsequent
+patch, its other caller is also being made capable of dealing with
+preemption).
+
+This is part of CVE-2013-1918 / XSA-45.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Acked-by: Tim Deegan <tim at xen.org>
+
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -73,8 +73,6 @@ void (*dead_idle) (void) __read_mostly =
+ static void paravirt_ctxt_switch_from(struct vcpu *v);
+ static void paravirt_ctxt_switch_to(struct vcpu *v);
+ 
+-static void vcpu_destroy_pagetables(struct vcpu *v);
+-
+ static void default_idle(void)
+ {
+     local_irq_disable();
+@@ -1058,7 +1056,7 @@ void arch_vcpu_reset(struct vcpu *v)
+     if ( !is_hvm_vcpu(v) )
+     {
+         destroy_gdt(v);
+-        vcpu_destroy_pagetables(v);
++        vcpu_destroy_pagetables(v, 0);
+     }
+     else
+     {
+@@ -2069,63 +2067,6 @@ static int relinquish_memory(
+     return ret;
+ }
+ 
+-static void vcpu_destroy_pagetables(struct vcpu *v)
+-{
+-    struct domain *d = v->domain;
+-    unsigned long pfn;
+-
+-#ifdef __x86_64__
+-    if ( is_pv_32on64_vcpu(v) )
+-    {
+-        pfn = l4e_get_pfn(*(l4_pgentry_t *)
+-                          __va(pagetable_get_paddr(v->arch.guest_table)));
+-
+-        if ( pfn != 0 )
+-        {
+-            if ( paging_mode_refcounts(d) )
+-                put_page(mfn_to_page(pfn));
+-            else
+-                put_page_and_type(mfn_to_page(pfn));
+-        }
+-
+-        l4e_write(
+-            (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)),
+-            l4e_empty());
+-
+-        v->arch.cr3 = 0;
+-        return;
+-    }
+-#endif
+-
+-    pfn = pagetable_get_pfn(v->arch.guest_table);
+-    if ( pfn != 0 )
+-    {
+-        if ( paging_mode_refcounts(d) )
+-            put_page(mfn_to_page(pfn));
+-        else
+-            put_page_and_type(mfn_to_page(pfn));
+-        v->arch.guest_table = pagetable_null();
+-    }
+-
+-#ifdef __x86_64__
+-    /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */
+-    pfn = pagetable_get_pfn(v->arch.guest_table_user);
+-    if ( pfn != 0 )
+-    {
+-        if ( !is_pv_32bit_vcpu(v) )
+-        {
+-            if ( paging_mode_refcounts(d) )
+-                put_page(mfn_to_page(pfn));
+-            else
+-                put_page_and_type(mfn_to_page(pfn));
+-        }
+-        v->arch.guest_table_user = pagetable_null();
+-    }
+-#endif
+-
+-    v->arch.cr3 = 0;
+-}
+-
+ int domain_relinquish_resources(struct domain *d)
+ {
+     int ret;
+@@ -2143,7 +2084,11 @@ int domain_relinquish_resources(struct d
+ 
+         /* Drop the in-use references to page-table bases. */
+         for_each_vcpu ( d, v )
+-            vcpu_destroy_pagetables(v);
++        {
++            ret = vcpu_destroy_pagetables(v, 1);
++            if ( ret )
++                return ret;
++        }
+ 
+         if ( !is_hvm_domain(d) )
+         {
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2808,6 +2808,82 @@ static void put_superpage(unsigned long 
+ 
+ #endif
+ 
++static int put_old_guest_table(struct vcpu *v)
++{
++    int rc;
++
++    if ( !v->arch.old_guest_table )
++        return 0;
++
++    switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table, 1) )
++    {
++    case -EINTR:
++    case -EAGAIN:
++        return -EAGAIN;
++    }
++
++    v->arch.old_guest_table = NULL;
++
++    return rc;
++}
++
++int vcpu_destroy_pagetables(struct vcpu *v, bool_t preemptible)
++{
++    unsigned long mfn = pagetable_get_pfn(v->arch.guest_table);
++    struct page_info *page;
++    int rc = put_old_guest_table(v);
++
++    if ( rc )
++        return rc;
++
++#ifdef __x86_64__
++    if ( is_pv_32on64_vcpu(v) )
++        mfn = l4e_get_pfn(*(l4_pgentry_t *)mfn_to_virt(mfn));
++#endif
++
++    if ( mfn )
++    {
++        page = mfn_to_page(mfn);
++        if ( paging_mode_refcounts(v->domain) )
++            put_page(page);
++        else
++            rc = put_page_and_type_preemptible(page, preemptible);
++    }
++
++#ifdef __x86_64__
++    if ( is_pv_32on64_vcpu(v) )
++    {
++        if ( !rc )
++            l4e_write(
++                (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)),
++                l4e_empty());
++    }
++    else
++#endif
++    if ( !rc )
++    {
++        v->arch.guest_table = pagetable_null();
++
++#ifdef __x86_64__
++        /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */
++        mfn = pagetable_get_pfn(v->arch.guest_table_user);
++        if ( mfn )
++        {
++            page = mfn_to_page(mfn);
++            if ( paging_mode_refcounts(v->domain) )
++                put_page(page);
++            else
++                rc = put_page_and_type_preemptible(page, preemptible);
++        }
++        if ( !rc )
++            v->arch.guest_table_user = pagetable_null();
++#endif
++    }
++
++    v->arch.cr3 = 0;
++
++    return rc;
++}
+ 
+ int new_guest_cr3(unsigned long mfn)
+ {
+@@ -2994,12 +3070,21 @@ long do_mmuext_op(
+     unsigned int foreigndom)
+ {
+     struct mmuext_op op;
+-    int rc = 0, i = 0, okay;
+     unsigned long type;
+-    unsigned int done = 0;
++    unsigned int i = 0, done = 0;
+     struct vcpu *curr = current;
+     struct domain *d = curr->domain;
+     struct domain *pg_owner;
++    int okay, rc = put_old_guest_table(curr);
++
++    if ( unlikely(rc) )
++    {
++        if ( likely(rc == -EAGAIN) )
++            rc = hypercall_create_continuation(
++                     __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone,
++                     foreigndom);
++        return rc;
++    }
+ 
+     if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
+     {
+--- a/xen/arch/x86/x86_64/compat/mm.c
++++ b/xen/arch/x86/x86_64/compat/mm.c
+@@ -365,7 +365,7 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm
+                                     : mcs->call.args[1];
+                 unsigned int left = arg1 & ~MMU_UPDATE_PREEMPTED;
+ 
+-                BUG_ON(left == arg1);
++                BUG_ON(left == arg1 && left != i);
+                 BUG_ON(left > count);
+                 guest_handle_add_offset(nat_ops, i - left);
+                 guest_handle_subtract_offset(cmp_uops, left);
+--- a/xen/include/asm-x86/domain.h
++++ b/xen/include/asm-x86/domain.h
+@@ -464,6 +464,7 @@ struct arch_vcpu
+     pagetable_t guest_table_user;       /* (MFN) x86/64 user-space pagetable */
+ #endif
+     pagetable_t guest_table;            /* (MFN) guest notion of cr3 */
++    struct page_info *old_guest_table;  /* partially destructed pagetable */
+     /* guest_table holds a ref to the page, and also a type-count unless
+      * shadow refcounts are in use */
+     pagetable_t shadow_table[4];        /* (MFN) shadow(s) of guest */
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -605,6 +605,7 @@ void audit_domains(void);
+ int new_guest_cr3(unsigned long pfn);
+ void make_cr3(struct vcpu *v, unsigned long mfn);
+ void update_cr3(struct vcpu *v);
++int vcpu_destroy_pagetables(struct vcpu *, bool_t preemptible);
+ void propagate_page_fault(unsigned long addr, u16 error_code);
+ void *do_page_walk(struct vcpu *v, unsigned long addr);
+ 
diff --git a/xsa45-4.2-02-new-guest-cr3-preemptible.patch b/xsa45-4.2-02-new-guest-cr3-preemptible.patch
new file mode 100644
index 0000000..f730b67
--- /dev/null
+++ b/xsa45-4.2-02-new-guest-cr3-preemptible.patch
@@ -0,0 +1,169 @@
+x86: make new_guest_cr3() preemptible
+
+... as it may take significant amounts of time.
+
+This is part of CVE-2013-1918 / XSA-45.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Acked-by: Tim Deegan <tim at xen.org>
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2889,44 +2889,69 @@ int new_guest_cr3(unsigned long mfn)
+ {
+     struct vcpu *curr = current;
+     struct domain *d = curr->domain;
+-    int okay;
++    int rc;
+     unsigned long old_base_mfn;
+ 
+ #ifdef __x86_64__
+     if ( is_pv_32on64_domain(d) )
+     {
+-        okay = paging_mode_refcounts(d)
+-            ? 0 /* Old code was broken, but what should it be? */
+-            : mod_l4_entry(
++        rc = paging_mode_refcounts(d)
++             ? -EINVAL /* Old code was broken, but what should it be? */
++             : mod_l4_entry(
+                     __va(pagetable_get_paddr(curr->arch.guest_table)),
+                     l4e_from_pfn(
+                         mfn,
+                         (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
+-                    pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0;
+-        if ( unlikely(!okay) )
++                    pagetable_get_pfn(curr->arch.guest_table), 0, 1, curr);
++        switch ( rc )
+         {
++        case 0:
++            break;
++        case -EINTR:
++        case -EAGAIN:
++            return -EAGAIN;
++        default:
+             MEM_LOG("Error while installing new compat baseptr %lx", mfn);
+-            return 0;
++            return rc;
+         }
+ 
+         invalidate_shadow_ldt(curr, 0);
+         write_ptbase(curr);
+ 
+-        return 1;
++        return 0;
+     }
+ #endif
+-    okay = paging_mode_refcounts(d)
+-        ? get_page_from_pagenr(mfn, d)
+-        : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0);
+-    if ( unlikely(!okay) )
++    rc = put_old_guest_table(curr);
++    if ( unlikely(rc) )
++        return rc;
++
++    old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
++    /*
++     * This is particularly important when getting restarted after the
++     * previous attempt got preempted in the put-old-MFN phase.
++     */
++    if ( old_base_mfn == mfn )
+     {
+-        MEM_LOG("Error while installing new baseptr %lx", mfn);
++        write_ptbase(curr);
+         return 0;
+     }
+ 
+-    invalidate_shadow_ldt(curr, 0);
++    rc = paging_mode_refcounts(d)
++         ? (get_page_from_pagenr(mfn, d) ? 0 : -EINVAL)
++         : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 1);
++    switch ( rc )
++    {
++    case 0:
++        break;
++    case -EINTR:
++    case -EAGAIN:
++        return -EAGAIN;
++    default:
++        MEM_LOG("Error while installing new baseptr %lx", mfn);
++        return rc;
++    }
+ 
+-    old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
++    invalidate_shadow_ldt(curr, 0);
+ 
+     curr->arch.guest_table = pagetable_from_pfn(mfn);
+     update_cr3(curr);
+@@ -2935,13 +2960,25 @@ int new_guest_cr3(unsigned long mfn)
+ 
+     if ( likely(old_base_mfn != 0) )
+     {
++        struct page_info *page = mfn_to_page(old_base_mfn);
++
+         if ( paging_mode_refcounts(d) )
+-            put_page(mfn_to_page(old_base_mfn));
++            put_page(page);
+         else
+-            put_page_and_type(mfn_to_page(old_base_mfn));
++            switch ( rc = put_page_and_type_preemptible(page, 1) )
++            {
++            case -EINTR:
++                rc = -EAGAIN;
++            case -EAGAIN:
++                curr->arch.old_guest_table = page;
++                break;
++            default:
++                BUG_ON(rc);
++                break;
++            }
+     }
+ 
+-    return 1;
++    return rc;
+ }
+ 
+ static struct domain *get_pg_owner(domid_t domid)
+@@ -3239,8 +3276,13 @@ long do_mmuext_op(
+         }
+ 
+         case MMUEXT_NEW_BASEPTR:
+-            okay = (!paging_mode_translate(d)
+-                    && new_guest_cr3(op.arg1.mfn));
++            if ( paging_mode_translate(d) )
++                okay = 0;
++            else
++            {
++                rc = new_guest_cr3(op.arg1.mfn);
++                okay = !rc;
++            }
+             break;
+ 
+         
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -2407,12 +2407,23 @@ static int emulate_privileged_op(struct 
+ #endif
+             }
+             page = get_page_from_gfn(v->domain, gfn, NULL, P2M_ALLOC);
+-            rc = page ? new_guest_cr3(page_to_mfn(page)) : 0;
+             if ( page )
++            {
++                rc = new_guest_cr3(page_to_mfn(page));
+                 put_page(page);
++            }
++            else
++                rc = -EINVAL;
+             domain_unlock(v->domain);
+-            if ( rc == 0 ) /* not okay */
++            switch ( rc )
++            {
++            case 0:
++                break;
++            case -EAGAIN: /* retry after preemption */
++                goto skip;
++            default:      /* not okay */
+                 goto fail;
++            }
+             break;
+         }
+ 
diff --git a/xsa45-4.2-03-new-user-base-preemptible.patch b/xsa45-4.2-03-new-user-base-preemptible.patch
new file mode 100644
index 0000000..9d5679e
--- /dev/null
+++ b/xsa45-4.2-03-new-user-base-preemptible.patch
@@ -0,0 +1,74 @@
+x86: make MMUEXT_NEW_USER_BASEPTR preemptible
+
+... as it may take significant amounts of time.
+
+This is part of CVE-2013-1918 / XSA-45.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Acked-by: Tim Deegan <tim at xen.org>
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -3296,29 +3296,56 @@ long do_mmuext_op(
+                 break;
+             }
+ 
++            old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
++            /*
++             * This is particularly important when getting restarted after the
++             * previous attempt got preempted in the put-old-MFN phase.
++             */
++            if ( old_mfn == op.arg1.mfn )
++                break;
++
+             if ( op.arg1.mfn != 0 )
+             {
+                 if ( paging_mode_refcounts(d) )
+                     okay = get_page_from_pagenr(op.arg1.mfn, d);
+                 else
+-                    okay = !get_page_and_type_from_pagenr(
+-                        op.arg1.mfn, PGT_root_page_table, d, 0, 0);
++                {
++                    rc = get_page_and_type_from_pagenr(
++                        op.arg1.mfn, PGT_root_page_table, d, 0, 1);
++                    okay = !rc;
++                }
+                 if ( unlikely(!okay) )
+                 {
+-                    MEM_LOG("Error while installing new mfn %lx", op.arg1.mfn);
++                    if ( rc == -EINTR )
++                        rc = -EAGAIN;
++                    else if ( rc != -EAGAIN )
++                        MEM_LOG("Error while installing new mfn %lx",
++                                op.arg1.mfn);
+                     break;
+                 }
+             }
+ 
+-            old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
+             curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn);
+ 
+             if ( old_mfn != 0 )
+             {
++                struct page_info *page = mfn_to_page(old_mfn);
++
+                 if ( paging_mode_refcounts(d) )
+-                    put_page(mfn_to_page(old_mfn));
++                    put_page(page);
+                 else
+-                    put_page_and_type(mfn_to_page(old_mfn));
++                    switch ( rc = put_page_and_type_preemptible(page, 1) )
++                    {
++                    case -EINTR:
++                        rc = -EAGAIN;
++                    case -EAGAIN:
++                        curr->arch.old_guest_table = page;
++                        okay = 0;
++                        break;
++                    default:
++                        BUG_ON(rc);
++                        break;
++                    }
+             }
+ 
+             break;
diff --git a/xsa45-4.2-04-vcpu-reset-preemptible.patch b/xsa45-4.2-04-vcpu-reset-preemptible.patch
new file mode 100644
index 0000000..bbce567
--- /dev/null
+++ b/xsa45-4.2-04-vcpu-reset-preemptible.patch
@@ -0,0 +1,200 @@
+x86: make vcpu_reset() preemptible
+
+... as dropping the old page tables may take significant amounts of
+time.
+
+This is part of CVE-2013-1918 / XSA-45.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Acked-by: Tim Deegan <tim at xen.org>
+
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -1051,17 +1051,16 @@ int arch_set_info_guest(
+ #undef c
+ }
+ 
+-void arch_vcpu_reset(struct vcpu *v)
++int arch_vcpu_reset(struct vcpu *v)
+ {
+     if ( !is_hvm_vcpu(v) )
+     {
+         destroy_gdt(v);
+-        vcpu_destroy_pagetables(v, 0);
+-    }
+-    else
+-    {
+-        vcpu_end_shutdown_deferral(v);
++        return vcpu_destroy_pagetables(v);
+     }
++
++    vcpu_end_shutdown_deferral(v);
++    return 0;
+ }
+ 
+ /* 
+@@ -2085,7 +2084,7 @@ int domain_relinquish_resources(struct d
+         /* Drop the in-use references to page-table bases. */
+         for_each_vcpu ( d, v )
+         {
+-            ret = vcpu_destroy_pagetables(v, 1);
++            ret = vcpu_destroy_pagetables(v);
+             if ( ret )
+                 return ret;
+         }
+--- a/xen/arch/x86/hvm/hvm.c
++++ b/xen/arch/x86/hvm/hvm.c
+@@ -3509,8 +3509,11 @@ static void hvm_s3_suspend(struct domain
+ 
+     for_each_vcpu ( d, v )
+     {
++        int rc;
++
+         vlapic_reset(vcpu_vlapic(v));
+-        vcpu_reset(v);
++        rc = vcpu_reset(v);
++        ASSERT(!rc);
+     }
+ 
+     vpic_reset(d);
+--- a/xen/arch/x86/hvm/vlapic.c
++++ b/xen/arch/x86/hvm/vlapic.c
+@@ -252,10 +252,13 @@ static void vlapic_init_sipi_action(unsi
+     {
+     case APIC_DM_INIT: {
+         bool_t fpu_initialised;
++        int rc;
++
+         domain_lock(target->domain);
+         /* Reset necessary VCPU state. This does not include FPU state. */
+         fpu_initialised = target->fpu_initialised;
+-        vcpu_reset(target);
++        rc = vcpu_reset(target);
++        ASSERT(!rc);
+         target->fpu_initialised = fpu_initialised;
+         vlapic_reset(vcpu_vlapic(target));
+         domain_unlock(target->domain);
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2827,7 +2827,7 @@ static int put_old_guest_table(struct vc
+     return rc;
+ }
+ 
+-int vcpu_destroy_pagetables(struct vcpu *v, bool_t preemptible)
++int vcpu_destroy_pagetables(struct vcpu *v)
+ {
+     unsigned long mfn = pagetable_get_pfn(v->arch.guest_table);
+     struct page_info *page;
+@@ -2847,7 +2847,7 @@ int vcpu_destroy_pagetables(struct vcpu 
+         if ( paging_mode_refcounts(v->domain) )
+             put_page(page);
+         else
+-            rc = put_page_and_type_preemptible(page, preemptible);
++            rc = put_page_and_type_preemptible(page, 1);
+     }
+ 
+ #ifdef __x86_64__
+@@ -2873,7 +2873,7 @@ int vcpu_destroy_pagetables(struct vcpu 
+             if ( paging_mode_refcounts(v->domain) )
+                 put_page(page);
+             else
+-                rc = put_page_and_type_preemptible(page, preemptible);
++                rc = put_page_and_type_preemptible(page, 1);
+         }
+         if ( !rc )
+             v->arch.guest_table_user = pagetable_null();
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -779,14 +779,18 @@ void domain_unpause_by_systemcontroller(
+         domain_unpause(d);
+ }
+ 
+-void vcpu_reset(struct vcpu *v)
++int vcpu_reset(struct vcpu *v)
+ {
+     struct domain *d = v->domain;
++    int rc;
+ 
+     vcpu_pause(v);
+     domain_lock(d);
+ 
+-    arch_vcpu_reset(v);
++    set_bit(_VPF_in_reset, &v->pause_flags);
++    rc = arch_vcpu_reset(v);
++    if ( rc )
++        goto out_unlock;
+ 
+     set_bit(_VPF_down, &v->pause_flags);
+ 
+@@ -802,9 +806,13 @@ void vcpu_reset(struct vcpu *v)
+ #endif
+     cpumask_clear(v->cpu_affinity_tmp);
+     clear_bit(_VPF_blocked, &v->pause_flags);
++    clear_bit(_VPF_in_reset, &v->pause_flags);
+ 
++ out_unlock:
+     domain_unlock(v->domain);
+     vcpu_unpause(v);
++
++    return rc;
+ }
+ 
+ 
+--- a/xen/common/domctl.c
++++ b/xen/common/domctl.c
+@@ -306,8 +306,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
+ 
+         if ( guest_handle_is_null(op->u.vcpucontext.ctxt) )
+         {
+-            vcpu_reset(v);
+-            ret = 0;
++            ret = vcpu_reset(v);
++            if ( ret == -EAGAIN )
++                ret = hypercall_create_continuation(
++                          __HYPERVISOR_domctl, "h", u_domctl);
+             goto svc_out;
+         }
+ 
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -605,7 +605,7 @@ void audit_domains(void);
+ int new_guest_cr3(unsigned long pfn);
+ void make_cr3(struct vcpu *v, unsigned long mfn);
+ void update_cr3(struct vcpu *v);
+-int vcpu_destroy_pagetables(struct vcpu *, bool_t preemptible);
++int vcpu_destroy_pagetables(struct vcpu *);
+ void propagate_page_fault(unsigned long addr, u16 error_code);
+ void *do_page_walk(struct vcpu *v, unsigned long addr);
+ 
+--- a/xen/include/xen/domain.h
++++ b/xen/include/xen/domain.h
+@@ -13,7 +13,7 @@ typedef union {
+ struct vcpu *alloc_vcpu(
+     struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
+ struct vcpu *alloc_dom0_vcpu0(void);
+-void vcpu_reset(struct vcpu *v);
++int vcpu_reset(struct vcpu *);
+ 
+ struct xen_domctl_getdomaininfo;
+ void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info);
+@@ -67,7 +67,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
+ 
+ void arch_dump_domain_info(struct domain *d);
+ 
+-void arch_vcpu_reset(struct vcpu *v);
++int arch_vcpu_reset(struct vcpu *);
+ 
+ extern spinlock_t vcpu_alloc_lock;
+ bool_t domctl_lock_acquire(void);
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -644,6 +644,9 @@ static inline struct domain *next_domain
+  /* VCPU is blocked due to missing mem_sharing ring. */
+ #define _VPF_mem_sharing     6
+ #define VPF_mem_sharing      (1UL<<_VPF_mem_sharing)
++ /* VCPU is being reset. */
++#define _VPF_in_reset        7
++#define VPF_in_reset         (1UL<<_VPF_in_reset)
+ 
+ static inline int vcpu_runnable(struct vcpu *v)
+ {
diff --git a/xsa45-4.2-05-set-info-guest-preemptible.patch b/xsa45-4.2-05-set-info-guest-preemptible.patch
new file mode 100644
index 0000000..b584b38
--- /dev/null
+++ b/xsa45-4.2-05-set-info-guest-preemptible.patch
@@ -0,0 +1,204 @@
+x86: make arch_set_info_guest() preemptible
+
+.. as the root page table validation (and the dropping of an eventual
+old one) can require meaningful amounts of time.
+
+This is part of CVE-2013-1918 / XSA-45.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Acked-by: Tim Deegan <tim at xen.org>
+
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -858,6 +858,9 @@ int arch_set_info_guest(
+ 
+     if ( !v->is_initialised )
+     {
++        if ( !compat && !(flags & VGCF_in_kernel) && !c.nat->ctrlreg[1] )
++            return -EINVAL;
++
+         v->arch.pv_vcpu.ldt_base = c(ldt_base);
+         v->arch.pv_vcpu.ldt_ents = c(ldt_ents);
+     }
+@@ -955,24 +958,44 @@ int arch_set_info_guest(
+     if ( rc != 0 )
+         return rc;
+ 
++    set_bit(_VPF_in_reset, &v->pause_flags);
++
+     if ( !compat )
+-    {
+         cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[3]);
+-        cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
+-
+-        if ( !cr3_page )
+-        {
+-            destroy_gdt(v);
+-            return -EINVAL;
+-        }
+-        if ( !paging_mode_refcounts(d)
+-             && !get_page_type(cr3_page, PGT_base_page_table) )
+-        {
+-            put_page(cr3_page);
+-            destroy_gdt(v);
+-            return -EINVAL;
+-        }
++#ifdef CONFIG_COMPAT
++    else
++        cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]);
++#endif
++    cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
+ 
++    if ( !cr3_page )
++        rc = -EINVAL;
++    else if ( paging_mode_refcounts(d) )
++        /* nothing */;
++    else if ( cr3_page == v->arch.old_guest_table )
++    {
++        v->arch.old_guest_table = NULL;
++        put_page(cr3_page);
++    }
++    else
++    {
++        /*
++         * Since v->arch.guest_table{,_user} are both NULL, this effectively
++         * is just a call to put_old_guest_table().
++         */
++        if ( !compat )
++            rc = vcpu_destroy_pagetables(v);
++        if ( !rc )
++            rc = get_page_type_preemptible(cr3_page,
++                                           !compat ? PGT_root_page_table
++                                                   : PGT_l3_page_table);
++        if ( rc == -EINTR )
++            rc = -EAGAIN;
++    }
++    if ( rc )
++        /* handled below */;
++    else if ( !compat )
++    {
+         v->arch.guest_table = pagetable_from_page(cr3_page);
+ #ifdef __x86_64__
+         if ( c.nat->ctrlreg[1] )
+@@ -980,56 +1003,44 @@ int arch_set_info_guest(
+             cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[1]);
+             cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
+ 
+-            if ( !cr3_page ||
+-                 (!paging_mode_refcounts(d)
+-                  && !get_page_type(cr3_page, PGT_base_page_table)) )
++            if ( !cr3_page )
++                rc = -EINVAL;
++            else if ( !paging_mode_refcounts(d) )
+             {
+-                if (cr3_page)
+-                    put_page(cr3_page);
+-                cr3_page = pagetable_get_page(v->arch.guest_table);
+-                v->arch.guest_table = pagetable_null();
+-                if ( paging_mode_refcounts(d) )
+-                    put_page(cr3_page);
+-                else
+-                    put_page_and_type(cr3_page);
+-                destroy_gdt(v);
+-                return -EINVAL;
++                rc = get_page_type_preemptible(cr3_page, PGT_root_page_table);
++                switch ( rc )
++                {
++                case -EINTR:
++                    rc = -EAGAIN;
++                case -EAGAIN:
++                    v->arch.old_guest_table =
++                        pagetable_get_page(v->arch.guest_table);
++                    v->arch.guest_table = pagetable_null();
++                    break;
++                }
+             }
+-
+-            v->arch.guest_table_user = pagetable_from_page(cr3_page);
+-        }
+-        else if ( !(flags & VGCF_in_kernel) )
+-        {
+-            destroy_gdt(v);
+-            return -EINVAL;
++            if ( !rc )
++               v->arch.guest_table_user = pagetable_from_page(cr3_page);
+         }
+     }
+     else
+     {
+         l4_pgentry_t *l4tab;
+ 
+-        cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]);
+-        cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
+-
+-        if ( !cr3_page)
+-        {
+-            destroy_gdt(v);
+-            return -EINVAL;
+-        }
+-
+-        if (!paging_mode_refcounts(d)
+-            && !get_page_type(cr3_page, PGT_l3_page_table) )
+-        {
+-            put_page(cr3_page);
+-            destroy_gdt(v);
+-            return -EINVAL;
+-        }
+-
+         l4tab = __va(pagetable_get_paddr(v->arch.guest_table));
+         *l4tab = l4e_from_pfn(page_to_mfn(cr3_page),
+             _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED);
+ #endif
+     }
++    if ( rc )
++    {
++        if ( cr3_page )
++            put_page(cr3_page);
++        destroy_gdt(v);
++        return rc;
++    }
++
++    clear_bit(_VPF_in_reset, &v->pause_flags);
+ 
+     if ( v->vcpu_id == 0 )
+         update_domain_wallclock_time(d);
+--- a/xen/common/compat/domain.c
++++ b/xen/common/compat/domain.c
+@@ -50,6 +50,10 @@ int compat_vcpu_op(int cmd, int vcpuid, 
+         rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, cmp_ctxt);
+         domain_unlock(d);
+ 
++        if ( rc == -EAGAIN )
++            rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
++                                               cmd, vcpuid, arg);
++
+         xfree(cmp_ctxt);
+         break;
+     }
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -849,6 +849,11 @@ long do_vcpu_op(int cmd, int vcpuid, XEN
+         domain_unlock(d);
+ 
+         free_vcpu_guest_context(ctxt);
++
++        if ( rc == -EAGAIN )
++            rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
++                                               cmd, vcpuid, arg);
++
+         break;
+ 
+     case VCPUOP_up: {
+--- a/xen/common/domctl.c
++++ b/xen/common/domctl.c
+@@ -338,6 +338,10 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
+             domain_pause(d);
+             ret = arch_set_info_guest(v, c);
+             domain_unpause(d);
++
++            if ( ret == -EAGAIN )
++                ret = hypercall_create_continuation(
++                          __HYPERVISOR_domctl, "h", u_domctl);
+         }
+ 
+     svc_out:
diff --git a/xsa45-4.2-06-unpin-preemptible.patch b/xsa45-4.2-06-unpin-preemptible.patch
new file mode 100644
index 0000000..a18f3dd
--- /dev/null
+++ b/xsa45-4.2-06-unpin-preemptible.patch
@@ -0,0 +1,127 @@
+x86: make page table unpinning preemptible
+
+... as it may take significant amounts of time.
+
+Since we can't re-invoke the operation in a second attempt, the
+continuation logic must be slightly tweaked so that we make sure
+do_mmuext_op() gets run one more time even when the preempted unpin
+operation was the last one in a batch.
+
+This is part of CVE-2013-1918 / XSA-45.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Acked-by: Tim Deegan <tim at xen.org>
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -3123,6 +3123,14 @@ long do_mmuext_op(
+         return rc;
+     }
+ 
++    if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
++         likely(guest_handle_is_null(uops)) )
++    {
++        /* See the curr->arch.old_guest_table related
++         * hypercall_create_continuation() below. */
++        return (int)foreigndom;
++    }
++
+     if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
+     {
+         count &= ~MMU_UPDATE_PREEMPTED;
+@@ -3146,7 +3154,7 @@ long do_mmuext_op(
+ 
+     for ( i = 0; i < count; i++ )
+     {
+-        if ( hypercall_preempt_check() )
++        if ( curr->arch.old_guest_table || hypercall_preempt_check() )
+         {
+             rc = -EAGAIN;
+             break;
+@@ -3266,7 +3274,17 @@ long do_mmuext_op(
+                 break;
+             }
+ 
+-            put_page_and_type(page);
++            switch ( rc = put_page_and_type_preemptible(page, 1) )
++            {
++            case -EINTR:
++            case -EAGAIN:
++                curr->arch.old_guest_table = page;
++                rc = 0;
++                break;
++            default:
++                BUG_ON(rc);
++                break;
++            }
+             put_page(page);
+ 
+             /* A page is dirtied when its pin status is cleared. */
+@@ -3587,9 +3605,27 @@ long do_mmuext_op(
+     }
+ 
+     if ( rc == -EAGAIN )
++    {
++        ASSERT(i < count);
+         rc = hypercall_create_continuation(
+             __HYPERVISOR_mmuext_op, "hihi",
+             uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
++    }
++    else if ( curr->arch.old_guest_table )
++    {
++        XEN_GUEST_HANDLE(void) null;
++
++        ASSERT(rc || i == count);
++        set_xen_guest_handle(null, NULL);
++        /*
++         * In order to have a way to communicate the final return value to
++         * our continuation, we pass this in place of "foreigndom", building
++         * on the fact that this argument isn't needed anymore.
++         */
++        rc = hypercall_create_continuation(
++                __HYPERVISOR_mmuext_op, "hihi", null,
++                MMU_UPDATE_PREEMPTED, null, rc);
++    }
+ 
+     put_pg_owner(pg_owner);
+ 
+--- a/xen/arch/x86/x86_64/compat/mm.c
++++ b/xen/arch/x86/x86_64/compat/mm.c
+@@ -268,6 +268,13 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm
+     int rc = 0;
+     XEN_GUEST_HANDLE(mmuext_op_t) nat_ops;
+ 
++    if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
++         likely(guest_handle_is_null(cmp_uops)) )
++    {
++        set_xen_guest_handle(nat_ops, NULL);
++        return do_mmuext_op(nat_ops, count, pdone, foreigndom);
++    }
++
+     preempt_mask = count & MMU_UPDATE_PREEMPTED;
+     count ^= preempt_mask;
+ 
+@@ -370,12 +377,18 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm
+                 guest_handle_add_offset(nat_ops, i - left);
+                 guest_handle_subtract_offset(cmp_uops, left);
+                 left = 1;
+-                BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, cmp_uops));
+-                BUG_ON(left != arg1);
+-                if (!test_bit(_MCSF_in_multicall, &mcs->flags))
+-                    regs->_ecx += count - i;
++                if ( arg1 != MMU_UPDATE_PREEMPTED )
++                {
++                    BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops,
++                                                        cmp_uops));
++                    if ( !test_bit(_MCSF_in_multicall, &mcs->flags) )
++                        regs->_ecx += count - i;
++                    else
++                        mcs->compat_call.args[1] += count - i;
++                }
+                 else
+-                    mcs->compat_call.args[1] += count - i;
++                    BUG_ON(hypercall_xlat_continuation(&left, 0));
++                BUG_ON(left != arg1);
+             }
+             else
+                 BUG_ON(err > 0);
diff --git a/xsa45-4.2-07-mm-error-paths-preemptible.patch b/xsa45-4.2-07-mm-error-paths-preemptible.patch
new file mode 100644
index 0000000..0b7ce18
--- /dev/null
+++ b/xsa45-4.2-07-mm-error-paths-preemptible.patch
@@ -0,0 +1,255 @@
+x86: make page table handling error paths preemptible
+
+... as they may take significant amounts of time.
+
+This requires cloning the tweaked continuation logic from
+do_mmuext_op() to do_mmu_update().
+
+Note that in mod_l[34]_entry() a negative "preemptible" value gets
+passed to put_page_from_l[34]e() now, telling the callee to store the
+respective page in current->arch.old_guest_table (for a hypercall
+continuation to pick up), rather than carrying out the put right away.
+This is going to be made a little more explicit by a subsequent cleanup
+patch.
+
+This is part of CVE-2013-1918 / XSA-45.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Acked-by: Tim Deegan <tim at xen.org>
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -1241,7 +1241,16 @@ static int put_page_from_l3e(l3_pgentry_
+ #endif
+ 
+     if ( unlikely(partial > 0) )
++    {
++        ASSERT(preemptible >= 0);
+         return __put_page_type(l3e_get_page(l3e), preemptible);
++    }
++
++    if ( preemptible < 0 )
++    {
++        current->arch.old_guest_table = l3e_get_page(l3e);
++        return 0;
++    }
+ 
+     return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
+ }
+@@ -1254,7 +1263,17 @@ static int put_page_from_l4e(l4_pgentry_
+          (l4e_get_pfn(l4e) != pfn) )
+     {
+         if ( unlikely(partial > 0) )
++        {
++            ASSERT(preemptible >= 0);
+             return __put_page_type(l4e_get_page(l4e), preemptible);
++        }
++
++        if ( preemptible < 0 )
++        {
++            current->arch.old_guest_table = l4e_get_page(l4e);
++            return 0;
++        }
++
+         return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
+     }
+     return 1;
+@@ -1549,12 +1568,17 @@ static int alloc_l3_table(struct page_in
+     if ( rc < 0 && rc != -EAGAIN && rc != -EINTR )
+     {
+         MEM_LOG("Failure in alloc_l3_table: entry %d", i);
++        if ( i )
++        {
++            page->nr_validated_ptes = i;
++            page->partial_pte = 0;
++            current->arch.old_guest_table = page;
++        }
+         while ( i-- > 0 )
+         {
+             if ( !is_guest_l3_slot(i) )
+                 continue;
+             unadjust_guest_l3e(pl3e[i], d);
+-            put_page_from_l3e(pl3e[i], pfn, 0, 0);
+         }
+     }
+ 
+@@ -1584,22 +1608,24 @@ static int alloc_l4_table(struct page_in
+             page->nr_validated_ptes = i;
+             page->partial_pte = partial ?: 1;
+         }
+-        else if ( rc == -EINTR )
++        else if ( rc < 0 )
+         {
++            if ( rc != -EINTR )
++                MEM_LOG("Failure in alloc_l4_table: entry %d", i);
+             if ( i )
+             {
+                 page->nr_validated_ptes = i;
+                 page->partial_pte = 0;
+-                rc = -EAGAIN;
++                if ( rc == -EINTR )
++                    rc = -EAGAIN;
++                else
++                {
++                    if ( current->arch.old_guest_table )
++                        page->nr_validated_ptes++;
++                    current->arch.old_guest_table = page;
++                }
+             }
+         }
+-        else if ( rc < 0 )
+-        {
+-            MEM_LOG("Failure in alloc_l4_table: entry %d", i);
+-            while ( i-- > 0 )
+-                if ( is_guest_l4_slot(d, i) )
+-                    put_page_from_l4e(pl4e[i], pfn, 0, 0);
+-        }
+         if ( rc < 0 )
+             return rc;
+ 
+@@ -2047,7 +2073,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
+         pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
+     }
+ 
+-    put_page_from_l3e(ol3e, pfn, 0, 0);
++    put_page_from_l3e(ol3e, pfn, 0, -preemptible);
+     return rc;
+ }
+ 
+@@ -2110,7 +2136,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
+         return -EFAULT;
+     }
+ 
+-    put_page_from_l4e(ol4e, pfn, 0, 0);
++    put_page_from_l4e(ol4e, pfn, 0, -preemptible);
+     return rc;
+ }
+ 
+@@ -2268,7 +2294,15 @@ static int alloc_page_type(struct page_i
+                 PRtype_info ": caf=%08lx taf=%" PRtype_info,
+                 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
+                 type, page->count_info, page->u.inuse.type_info);
+-        page->u.inuse.type_info = 0;
++        if ( page != current->arch.old_guest_table )
++            page->u.inuse.type_info = 0;
++        else
++        {
++            ASSERT((page->u.inuse.type_info &
++                    (PGT_count_mask | PGT_validated)) == 1);
++            get_page_light(page);
++            page->u.inuse.type_info |= PGT_partial;
++        }
+     }
+     else
+     {
+@@ -3218,21 +3252,17 @@ long do_mmuext_op(
+             }
+ 
+             if ( (rc = xsm_memory_pin_page(d, pg_owner, page)) != 0 )
+-            {
+-                put_page_and_type(page);
+                 okay = 0;
+-                break;
+-            }
+-
+-            if ( unlikely(test_and_set_bit(_PGT_pinned,
+-                                           &page->u.inuse.type_info)) )
++            else if ( unlikely(test_and_set_bit(_PGT_pinned,
++                                                &page->u.inuse.type_info)) )
+             {
+                 MEM_LOG("Mfn %lx already pinned", page_to_mfn(page));
+-                put_page_and_type(page);
+                 okay = 0;
+-                break;
+             }
+ 
++            if ( unlikely(!okay) )
++                goto pin_drop;
++
+             /* A page is dirtied when its pin status is set. */
+             paging_mark_dirty(pg_owner, page_to_mfn(page));
+ 
+@@ -3246,7 +3276,13 @@ long do_mmuext_op(
+                                                &page->u.inuse.type_info));
+                 spin_unlock(&pg_owner->page_alloc_lock);
+                 if ( drop_ref )
+-                    put_page_and_type(page);
++                {
++        pin_drop:
++                    if ( type == PGT_l1_page_table )
++                        put_page_and_type(page);
++                    else
++                        curr->arch.old_guest_table = page;
++                }
+             }
+ 
+             break;
+@@ -3652,11 +3688,28 @@ long do_mmu_update(
+     void *va;
+     unsigned long gpfn, gmfn, mfn;
+     struct page_info *page;
+-    int rc = 0, i = 0;
+-    unsigned int cmd, done = 0, pt_dom;
+-    struct vcpu *v = current;
++    unsigned int cmd, i = 0, done = 0, pt_dom;
++    struct vcpu *curr = current, *v = curr;
+     struct domain *d = v->domain, *pt_owner = d, *pg_owner;
+     struct domain_mmap_cache mapcache;
++    int rc = put_old_guest_table(curr);
++
++    if ( unlikely(rc) )
++    {
++        if ( likely(rc == -EAGAIN) )
++            rc = hypercall_create_continuation(
++                     __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone,
++                     foreigndom);
++        return rc;
++    }
++
++    if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
++         likely(guest_handle_is_null(ureqs)) )
++    {
++        /* See the curr->arch.old_guest_table related
++         * hypercall_create_continuation() below. */
++        return (int)foreigndom;
++    }
+ 
+     if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
+     {
+@@ -3705,7 +3758,7 @@ long do_mmu_update(
+ 
+     for ( i = 0; i < count; i++ )
+     {
+-        if ( hypercall_preempt_check() )
++        if ( curr->arch.old_guest_table || hypercall_preempt_check() )
+         {
+             rc = -EAGAIN;
+             break;
+@@ -3886,9 +3939,27 @@ long do_mmu_update(
+     }
+ 
+     if ( rc == -EAGAIN )
++    {
++        ASSERT(i < count);
+         rc = hypercall_create_continuation(
+             __HYPERVISOR_mmu_update, "hihi",
+             ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
++    }
++    else if ( curr->arch.old_guest_table )
++    {
++        XEN_GUEST_HANDLE(void) null;
++
++        ASSERT(rc || i == count);
++        set_xen_guest_handle(null, NULL);
++        /*
++         * In order to have a way to communicate the final return value to
++         * our continuation, we pass this in place of "foreigndom", building
++         * on the fact that this argument isn't needed anymore.
++         */
++        rc = hypercall_create_continuation(
++                __HYPERVISOR_mmu_update, "hihi", null,
++                MMU_UPDATE_PREEMPTED, null, rc);
++    }
+ 
+     put_pg_owner(pg_owner);
+ 
diff --git a/xsa49-4.2.patch b/xsa49-4.2.patch
new file mode 100644
index 0000000..4b92c7f
--- /dev/null
+++ b/xsa49-4.2.patch
@@ -0,0 +1,50 @@
+VT-d: don't permit SVT_NO_VERIFY entries for known device types
+
+Only in cases where we don't know what to do we should leave the IRTE
+blank (suppressing all validation), but we should always log a warning
+in those cases (as being insecure).
+
+This is CVE-2013-1952 / XSA-49.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Acked-by: "Zhang, Xiantao" <xiantao.zhang at intel.com>
+
+--- a/xen/drivers/passthrough/vtd/intremap.c
++++ b/xen/drivers/passthrough/vtd/intremap.c
+@@ -440,16 +440,15 @@ static void set_msi_source_id(struct pci
+     type = pdev_type(seg, bus, devfn);
+     switch ( type )
+     {
++    case DEV_TYPE_PCIe_ENDPOINT:
+     case DEV_TYPE_PCIe_BRIDGE:
+     case DEV_TYPE_PCIe2PCI_BRIDGE:
+-    case DEV_TYPE_LEGACY_PCI_BRIDGE:
+-        break;
+-
+-    case DEV_TYPE_PCIe_ENDPOINT:
+         set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16, PCI_BDF2(bus, devfn));
+         break;
+ 
+     case DEV_TYPE_PCI:
++    case DEV_TYPE_LEGACY_PCI_BRIDGE:
++    /* case DEV_TYPE_PCI2PCIe_BRIDGE: */
+         ret = find_upstream_bridge(seg, &bus, &devfn, &secbus);
+         if ( ret == 0 ) /* integrated PCI device */
+         {
+@@ -461,10 +460,15 @@ static void set_msi_source_id(struct pci
+             if ( pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE )
+                 set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
+                             (bus << 8) | pdev->bus);
+-            else if ( pdev_type(seg, bus, devfn) == DEV_TYPE_LEGACY_PCI_BRIDGE )
++            else
+                 set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
+                             PCI_BDF2(bus, devfn));
+         }
++        else
++            dprintk(XENLOG_WARNING VTDPREFIX,
++                    "d%d: no upstream bridge for %04x:%02x:%02x.%u\n",
++                    pdev->domain->domain_id,
++                    seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+         break;
+ 
+     default:


More information about the scm-commits mailing list