[kernel/f12/user/myoung/xendom0: 8/8] update pvops

myoung myoung at fedoraproject.org
Mon Sep 20 19:23:56 UTC 2010


commit c52a0441d702570191731cd61e545f264be0397f
Author: Michael Young <m.a.young at durham.ac.uk>
Date:   Mon Sep 20 20:22:36 2010 +0100

    update pvops

 kernel.spec     |    3 +
 xen.pvops.patch | 1169 ++++++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 814 insertions(+), 358 deletions(-)
---
diff --git a/kernel.spec b/kernel.spec
index e87e09e..14a5272 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -2254,6 +2254,9 @@ fi
 %kernel_variant_files -k vmlinux %{with_kdump} kdump
 
 %changelog
+* Mon Sep 20 2010 Michael Young <m.a.young at durham.ac.uk>
+- update pvops
+
 * Tue Sep 14 2010 Chuck Ebbert <cebbert at redhat.com> 2.6.32.21-168
 - Fix three CVEs:
   CVE-2010-3080: /dev/sequencer open failure is not handled correctly
diff --git a/xen.pvops.patch b/xen.pvops.patch
index c5dbbcb..6a8c1d4 100644
--- a/xen.pvops.patch
+++ b/xen.pvops.patch
@@ -238,6 +238,19 @@ index 6a25d5d..ac91eed 100644
  }
  
  static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
+index 40b4e61..fa3fd43 100644
+--- a/arch/x86/include/asm/e820.h
++++ b/arch/x86/include/asm/e820.h
+@@ -109,6 +109,8 @@ extern void reserve_early(u64 start, u64 end, char *name);
+ extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
+ extern void free_early(u64 start, u64 end);
+ extern void early_res_to_bootmem(u64 start, u64 end);
++extern u64 early_res_next_free(u64 start);
++extern u64 early_res_next_reserved(u64 addr, u64 max);
+ extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
+ 
+ extern unsigned long e820_end_of_ram_pfn(void);
 diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
 index 6cfdafa..4ac5b0f 100644
 --- a/arch/x86/include/asm/gart.h
@@ -1092,7 +1105,7 @@ index 0000000..75df312
 +#endif
 +
 diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
-index 018a0a4..a839127 100644
+index 018a0a4..8760cc6 100644
 --- a/arch/x86/include/asm/xen/page.h
 +++ b/arch/x86/include/asm/xen/page.h
 @@ -5,6 +5,7 @@
@@ -1103,7 +1116,7 @@ index 018a0a4..a839127 100644
  
  #include <asm/uaccess.h>
  #include <asm/page.h>
-@@ -35,9 +36,11 @@ typedef struct xpaddr {
+@@ -35,16 +36,25 @@ typedef struct xpaddr {
  #define MAX_DOMAIN_PAGES						\
      ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
  
@@ -1116,7 +1129,22 @@ index 018a0a4..a839127 100644
  
  static inline unsigned long pfn_to_mfn(unsigned long pfn)
  {
-@@ -62,10 +65,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
++	unsigned long mfn;
++
+ 	if (xen_feature(XENFEAT_auto_translated_physmap))
+ 		return pfn;
+ 
+-	return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT;
++	mfn = get_phys_to_machine(pfn);
++
++	if (mfn != INVALID_P2M_ENTRY)
++		mfn &= ~FOREIGN_FRAME_BIT;
++
++	return mfn;
+ }
+ 
+ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
+@@ -62,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
  	if (xen_feature(XENFEAT_auto_translated_physmap))
  		return mfn;
  
@@ -1128,7 +1156,7 @@ index 018a0a4..a839127 100644
  
  	pfn = 0;
  	/*
-@@ -112,13 +113,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
+@@ -112,13 +120,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
   */
  static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
  {
@@ -1144,7 +1172,7 @@ index 018a0a4..a839127 100644
  	return pfn;
  }
  
-@@ -163,6 +160,7 @@ static inline pte_t __pte_ma(pteval_t x)
+@@ -163,6 +167,7 @@ static inline pte_t __pte_ma(pteval_t x)
  
  #define pgd_val_ma(x)	((x).pgd)
  
@@ -2039,6 +2067,47 @@ index ff95824..ebd4c51 100644
  #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
  
  static void kdump_nmi_callback(int cpu, struct die_args *args)
+diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
+index d17d482..4d0aded 100644
+--- a/arch/x86/kernel/e820.c
++++ b/arch/x86/kernel/e820.c
+@@ -750,6 +750,36 @@ static int __init find_overlapped_early(u64 start, u64 end)
+ 	return i;
+ }
+ 
++u64 __init early_res_next_free(u64 addr)
++{
++	int i;
++	u64 end = addr;
++	struct early_res *r;
++
++	for (i = 0; i < MAX_EARLY_RES; i++) {
++		r = &early_res[i];
++		if (addr >= r->start && addr < r->end) {
++			end = r->end;
++			break;
++		}
++	}
++	return end;
++}
++
++u64 __init early_res_next_reserved(u64 addr, u64 max)
++{
++	int i;
++	struct early_res *r;
++	u64 next_res = max;
++
++	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
++		r = &early_res[i];
++		if ((r->start >= addr) && (r->start < next_res))
++			next_res = r->start;
++	}
++	return next_res;
++}
++
+ /*
+  * Drop the i-th range from the early reservation map,
+  * by copying any higher ranges down one over it, and
 diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
 index c097e7d..7764118 100644
 --- a/arch/x86/kernel/entry_32.S
@@ -3071,6 +3140,73 @@ index 71da1bc..892b8eb 100644
  	/*
  	 * XXX: batch / limit 'nr', to avoid large irq off latency
  	 * needs some instrumenting to determine the common sizes used by
+diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
+index 30938c1..10c3719 100644
+--- a/arch/x86/mm/init_32.c
++++ b/arch/x86/mm/init_32.c
+@@ -430,22 +430,45 @@ static int __init add_highpages_work_fn(unsigned long start_pfn,
+ {
+ 	int node_pfn;
+ 	struct page *page;
++	phys_addr_t chunk_end, chunk_max;
+ 	unsigned long final_start_pfn, final_end_pfn;
+-	struct add_highpages_data *data;
+-
+-	data = (struct add_highpages_data *)datax;
++	struct add_highpages_data *data = (struct add_highpages_data *)datax;
+ 
+ 	final_start_pfn = max(start_pfn, data->start_pfn);
+ 	final_end_pfn = min(end_pfn, data->end_pfn);
+ 	if (final_start_pfn >= final_end_pfn)
+ 		return 0;
+ 
+-	for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
+-	     node_pfn++) {
+-		if (!pfn_valid(node_pfn))
+-			continue;
+-		page = pfn_to_page(node_pfn);
+-		add_one_highpage_init(page, node_pfn);
++	chunk_end = PFN_PHYS(final_start_pfn);
++	chunk_max = PFN_PHYS(final_end_pfn);
++
++	/*
++	 * Check for reserved areas.
++	 */
++	for (;;) {
++		phys_addr_t chunk_start;
++		chunk_start = early_res_next_free(chunk_end);
++		
++		/*
++		 * Reserved area. Just count high mem pages.
++		 */
++		for (node_pfn = PFN_DOWN(chunk_end);
++		     node_pfn < PFN_DOWN(chunk_start); node_pfn++) {
++			if (pfn_valid(node_pfn))
++				totalhigh_pages++;
++		}
++
++		if (chunk_start >= chunk_max)
++			break;
++
++		chunk_end = early_res_next_reserved(chunk_start, chunk_max);
++		for (node_pfn = PFN_DOWN(chunk_start);
++		     node_pfn < PFN_DOWN(chunk_end); node_pfn++) {
++			if (!pfn_valid(node_pfn))
++				continue;
++			page = pfn_to_page(node_pfn);
++			add_one_highpage_init(page, node_pfn);
++		}
+ 	}
+ 
+ 	return 0;
+@@ -459,7 +482,6 @@ void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
+ 
+ 	data.start_pfn = start_pfn;
+ 	data.end_pfn = end_pfn;
+-
+ 	work_with_active_regions(nid, add_highpages_work_fn, &data);
+ }
+ 
 diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
 index e78cd0e..fb91994 100644
 --- a/arch/x86/mm/pat.c
@@ -3592,7 +3728,7 @@ index 0000000..21a3089
 +#endif
 +}
 diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
-index 942ccf1..472de02 100644
+index 942ccf1..ea32198 100644
 --- a/arch/x86/xen/enlighten.c
 +++ b/arch/x86/xen/enlighten.c
 @@ -11,6 +11,7 @@
@@ -3664,7 +3800,7 @@ index 942ccf1..472de02 100644
  static void xen_vcpu_setup(int cpu)
  {
  	struct vcpu_register_vcpu_info info;
-@@ -101,13 +122,17 @@ static void xen_vcpu_setup(int cpu)
+@@ -101,19 +122,20 @@ static void xen_vcpu_setup(int cpu)
  	struct vcpu_info *vcpup;
  
  	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
@@ -3686,7 +3822,13 @@ index 942ccf1..472de02 100644
  	info.mfn = arbitrary_virt_to_mfn(vcpup);
  	info.offset = offset_in_page(vcpup);
  
-@@ -122,6 +147,7 @@ static void xen_vcpu_setup(int cpu)
+-	printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
+-	       cpu, vcpup, info.mfn, info.offset);
+-
+ 	/* Check to see if the hypervisor will put the vcpu_info
+ 	   structure where we want it, which allows direct access via
+ 	   a percpu-variable. */
+@@ -122,13 +144,11 @@ static void xen_vcpu_setup(int cpu)
  	if (err) {
  		printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
  		have_vcpu_info_placement = 0;
@@ -3694,7 +3836,14 @@ index 942ccf1..472de02 100644
  	} else {
  		/* This cpu is using the registered vcpu info, even if
  		   later ones fail to. */
-@@ -167,13 +193,16 @@ static void __init xen_banner(void)
+ 		per_cpu(xen_vcpu, cpu) = vcpup;
+-
+-		printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n",
+-		       cpu, vcpup);
+ 	}
+ }
+ 
+@@ -167,13 +187,16 @@ static void __init xen_banner(void)
  
  	printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
  	       pv_info.name);
@@ -3713,7 +3862,7 @@ index 942ccf1..472de02 100644
  
  static void xen_cpuid(unsigned int *ax, unsigned int *bx,
  		      unsigned int *cx, unsigned int *dx)
-@@ -187,7 +216,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
+@@ -187,7 +210,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
  	 * unsupported kernel subsystems as possible.
  	 */
  	switch (*ax) {
@@ -3722,7 +3871,7 @@ index 942ccf1..472de02 100644
  		maskecx = cpuid_leaf1_ecx_mask;
  		maskedx = cpuid_leaf1_edx_mask;
  		break;
-@@ -196,6 +225,10 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
+@@ -196,6 +219,10 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
  		/* Suppress extended topology stuff */
  		maskebx = 0;
  		break;
@@ -3733,7 +3882,7 @@ index 942ccf1..472de02 100644
  	}
  
  	asm(XEN_EMULATE_PREFIX "cpuid"
-@@ -215,13 +248,15 @@ static __init void xen_init_cpuid_mask(void)
+@@ -215,13 +242,15 @@ static __init void xen_init_cpuid_mask(void)
  	unsigned int ax, bx, cx, dx;
  
  	cpuid_leaf1_edx_mask =
@@ -3753,7 +3902,7 @@ index 942ccf1..472de02 100644
  			  (1 << X86_FEATURE_ACPI));  /* disable ACPI */
  
  	ax = 1;
-@@ -406,7 +441,7 @@ static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
+@@ -406,7 +435,7 @@ static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
  
  		pte = pfn_pte(pfn, PAGE_KERNEL_RO);
  
@@ -3762,7 +3911,7 @@ index 942ccf1..472de02 100644
  			BUG();
  
  		frames[f] = mfn;
-@@ -517,13 +552,13 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
+@@ -517,13 +546,13 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
  		return 0;
  #ifdef CONFIG_X86_MCE
  	} else if (addr == (unsigned long)machine_check) {
@@ -3782,7 +3931,7 @@ index 942ccf1..472de02 100644
  #endif	/* CONFIG_X86_64 */
  	info->address = addr;
  
-@@ -679,6 +714,18 @@ static void xen_set_iopl_mask(unsigned mask)
+@@ -679,6 +708,18 @@ static void xen_set_iopl_mask(unsigned mask)
  	HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
  }
  
@@ -3801,7 +3950,7 @@ index 942ccf1..472de02 100644
  static void xen_io_delay(void)
  {
  }
-@@ -716,7 +763,7 @@ static u32 xen_safe_apic_wait_icr_idle(void)
+@@ -716,7 +757,7 @@ static u32 xen_safe_apic_wait_icr_idle(void)
          return 0;
  }
  
@@ -3810,7 +3959,7 @@ index 942ccf1..472de02 100644
  {
  	apic->read = xen_apic_read;
  	apic->write = xen_apic_write;
-@@ -728,7 +775,6 @@ static void set_xen_basic_apic_ops(void)
+@@ -728,7 +769,6 @@ static void set_xen_basic_apic_ops(void)
  
  #endif
  
@@ -3818,7 +3967,7 @@ index 942ccf1..472de02 100644
  static void xen_clts(void)
  {
  	struct multicall_space mcs;
-@@ -811,6 +857,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+@@ -811,6 +851,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
  		   Xen console noise. */
  		break;
  
@@ -3830,7 +3979,16 @@ index 942ccf1..472de02 100644
  	default:
  		ret = native_write_msr_safe(msr, low, high);
  	}
-@@ -923,10 +974,6 @@ static const struct pv_init_ops xen_init_ops __initdata = {
+@@ -849,8 +894,6 @@ void xen_setup_vcpu_info_placement(void)
+ 	/* xen_vcpu_setup managed to place the vcpu_info within the
+ 	   percpu area for all cpus, so make use of it */
+ 	if (have_vcpu_info_placement) {
+-		printk(KERN_INFO "Xen: using vcpu_info placement\n");
+-
+ 		pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
+ 		pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
+ 		pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
+@@ -923,10 +966,6 @@ static const struct pv_init_ops xen_init_ops __initdata = {
  	.patch = xen_patch,
  };
  
@@ -3841,7 +3999,7 @@ index 942ccf1..472de02 100644
  static const struct pv_cpu_ops xen_cpu_ops __initdata = {
  	.cpuid = xen_cpuid,
  
-@@ -978,6 +1025,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
+@@ -978,6 +1017,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
  	.load_sp0 = xen_load_sp0,
  
  	.set_iopl_mask = xen_set_iopl_mask,
@@ -3849,7 +4007,7 @@ index 942ccf1..472de02 100644
  	.io_delay = xen_io_delay,
  
  	/* Xen takes care of %gs when switching to usermode for us */
-@@ -1020,15 +1068,40 @@ static void xen_machine_halt(void)
+@@ -1020,15 +1060,40 @@ static void xen_machine_halt(void)
  	xen_reboot(SHUTDOWN_poweroff);
  }
  
@@ -3891,7 +4049,7 @@ index 942ccf1..472de02 100644
  	.shutdown = xen_machine_halt,
  	.crash_shutdown = xen_crash_shutdown,
  	.emergency_restart = xen_emergency_restart,
-@@ -1061,10 +1134,11 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1061,10 +1126,11 @@ asmlinkage void __init xen_start_kernel(void)
  
  	xen_domain_type = XEN_PV_DOMAIN;
  
@@ -3904,7 +4062,7 @@ index 942ccf1..472de02 100644
  	pv_cpu_ops = xen_cpu_ops;
  	pv_apic_ops = xen_apic_ops;
  
-@@ -1072,13 +1146,7 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1072,13 +1138,7 @@ asmlinkage void __init xen_start_kernel(void)
  	x86_init.oem.arch_setup = xen_arch_setup;
  	x86_init.oem.banner = xen_banner;
  
@@ -3919,7 +4077,7 @@ index 942ccf1..472de02 100644
  
  	/*
  	 * Set up some pagetable state before starting to set any ptes.
-@@ -1116,6 +1184,10 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1116,6 +1176,10 @@ asmlinkage void __init xen_start_kernel(void)
  	 */
  	xen_setup_stackprotector();
  
@@ -3930,7 +4088,7 @@ index 942ccf1..472de02 100644
  	xen_init_irq_ops();
  	xen_init_cpuid_mask();
  
-@@ -1144,6 +1216,8 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1144,6 +1208,8 @@ asmlinkage void __init xen_start_kernel(void)
  
  	pgd = (pgd_t *)xen_start_info->pt_base;
  
@@ -3939,7 +4097,7 @@ index 942ccf1..472de02 100644
  	/* Don't do the full vcpu_info placement stuff until we have a
  	   possible map and a non-dummy shared_info. */
  	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
-@@ -1153,6 +1227,10 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1153,6 +1219,10 @@ asmlinkage void __init xen_start_kernel(void)
  
  	xen_raw_console_write("mapping kernel into physical memory\n");
  	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
@@ -3950,7 +4108,7 @@ index 942ccf1..472de02 100644
  
  	init_mm.pgd = pgd;
  
-@@ -1162,6 +1240,14 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1162,6 +1232,14 @@ asmlinkage void __init xen_start_kernel(void)
  	if (xen_feature(XENFEAT_supervisor_mode_kernel))
  		pv_info.kernel_rpl = 0;
  
@@ -3965,7 +4123,7 @@ index 942ccf1..472de02 100644
  	/* set the limit of our address space */
  	xen_reserve_top();
  
-@@ -1184,6 +1270,16 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1184,6 +1262,16 @@ asmlinkage void __init xen_start_kernel(void)
  		add_preferred_console("xenboot", 0, NULL);
  		add_preferred_console("tty", 0, NULL);
  		add_preferred_console("hvc", 0, NULL);
@@ -3982,7 +4140,7 @@ index 942ccf1..472de02 100644
  	}
  
  	xen_raw_console_write("about to get started...\n");
-@@ -1197,3 +1293,126 @@ asmlinkage void __init xen_start_kernel(void)
+@@ -1197,3 +1285,126 @@ asmlinkage void __init xen_start_kernel(void)
  	x86_64_start_reservations((char *)__pa_symbol(&boot_params));
  #endif
  }
@@ -4110,7 +4268,7 @@ index 942ccf1..472de02 100644
 +}
 +#endif
 diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
-index 350a3de..c3fc5ce 100644
+index 350a3de..c3364f8 100644
 --- a/arch/x86/xen/mmu.c
 +++ b/arch/x86/xen/mmu.c
 @@ -42,6 +42,7 @@
@@ -4292,17 +4450,17 @@ index 350a3de..c3fc5ce 100644
 +		*ptep = ((uint64_t)pfn_to_mfn(page_to_pfn(pmd_page)) <<
 +			 PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
 +	return 0;
-+}
-+
+ }
+ 
+-/* Build the parallel p2m_top_mfn structures */
 +int create_lookup_pte_addr(struct mm_struct *mm,
 +			   unsigned long address,
 +			   uint64_t *ptep)
 +{
 +	return apply_to_page_range(mm, address, PAGE_SIZE,
 +				   lookup_pte_fn, ptep);
- }
- 
--/* Build the parallel p2m_top_mfn structures */
++}
++
 +EXPORT_SYMBOL(create_lookup_pte_addr);
 +
 +/*
@@ -4382,7 +4540,7 @@ index 350a3de..c3fc5ce 100644
  }
  
  /* Set up p2m_top to point to the domain-builder provided p2m pages */
-@@ -217,96 +364,168 @@ void __init xen_build_dynamic_phys_to_machine(void)
+@@ -217,96 +364,170 @@ void __init xen_build_dynamic_phys_to_machine(void)
  	unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
  	unsigned pfn;
  
@@ -4448,16 +4606,12 @@ index 350a3de..c3fc5ce 100644
 -	unsigned i;
 +	return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
 +}
- 
--	pfnp = &p2m_top[topidx];
--	mfnp = &p2m_top_mfn[topidx];
++
 +static void free_p2m_page(void *p)
 +{
 +	free_page((unsigned long)p);
 +}
- 
--	for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
--		p[i] = INVALID_P2M_ENTRY;
++
 +/* 
 + * Fully allocate the p2m structure for a given pfn.  We need to check
 + * that both the top and mid levels are allocated, and make sure the
@@ -4471,15 +4625,19 @@ index 350a3de..c3fc5ce 100644
 +	unsigned long ***top_p, **mid;
 +	unsigned long *top_mfn_p, *mid_mfn;
  
--	if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
--		*mfnp = virt_to_mfn(p);
--		return true;
+-	pfnp = &p2m_top[topidx];
+-	mfnp = &p2m_top_mfn[topidx];
 +	topidx = p2m_top_index(pfn);
 +	mididx = p2m_mid_index(pfn);
-+
+ 
+-	for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
+-		p[i] = INVALID_P2M_ENTRY;
 +	top_p = &p2m_top[topidx];
 +	mid = *top_p;
-+
+ 
+-	if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
+-		*mfnp = virt_to_mfn(p);
+-		return true;
 +	if (mid == p2m_mid_missing) {
 +		/* Mid level is missing, allocate a new one */
 +		mid = alloc_p2m_page();
@@ -4503,6 +4661,7 @@ index 350a3de..c3fc5ce 100644
 +	if (mid_mfn == p2m_mid_missing_mfn) {
 +		/* Separately check the mid mfn level */
 +		unsigned long missing_mfn;
++		unsigned long mid_mfn_mfn;
 +
 +		mid_mfn = alloc_p2m_page();
 +		if (!mid_mfn)
@@ -4511,24 +4670,25 @@ index 350a3de..c3fc5ce 100644
 +		p2m_mid_mfn_init(mid_mfn);
 +		
 +		missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
-+		if (cmpxchg(top_mfn_p, missing_mfn, mid) != missing_mfn)
-+			free_p2m_page(mid);
++		mid_mfn_mfn = virt_to_mfn(mid_mfn);
++		if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn)
++			free_p2m_page(mid_mfn);
 +	}
 +
 +	if (p2m_top[topidx][mididx] == p2m_missing) {
 +		/* p2m leaf page is missing */
 +		unsigned long *p2m;
- 
--	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
--	BUG_ON(p == NULL);
++
 +		p2m = alloc_p2m_page();
 +		if (!p2m)
 +			return false;
  
+-	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
+-	BUG_ON(p == NULL);
++		p2m_init(p2m);
+ 
 -	if (!install_p2mtop_page(pfn, p))
 -		free_page((unsigned long)p);
-+		p2m_init(p2m);
-+
 +		if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
 +			free_p2m_page(p2m);
 +		else
@@ -4592,7 +4752,7 @@ index 350a3de..c3fc5ce 100644
  }
  
  unsigned long arbitrary_virt_to_mfn(void *vaddr)
-@@ -315,6 +534,7 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr)
+@@ -315,6 +536,7 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr)
  
  	return PFN_DOWN(maddr.maddr);
  }
@@ -4600,7 +4760,7 @@ index 350a3de..c3fc5ce 100644
  
  xmaddr_t arbitrary_virt_to_machine(void *vaddr)
  {
-@@ -376,6 +596,34 @@ static bool xen_page_pinned(void *ptr)
+@@ -376,6 +598,34 @@ static bool xen_page_pinned(void *ptr)
  	return PagePinned(page);
  }
  
@@ -4635,7 +4795,7 @@ index 350a3de..c3fc5ce 100644
  static void xen_extend_mmu_update(const struct mmu_update *update)
  {
  	struct multicall_space mcs;
-@@ -452,6 +700,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
+@@ -452,6 +702,11 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
  void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
  		    pte_t *ptep, pte_t pteval)
  {
@@ -4647,10 +4807,30 @@ index 350a3de..c3fc5ce 100644
  	ADD_STATS(set_pte_at, 1);
  //	ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
  	ADD_STATS(set_pte_at_current, mm == current->mm);
-@@ -522,9 +775,34 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
- 	return val;
- }
- 
+@@ -516,7 +771,34 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
+ 	if (val & _PAGE_PRESENT) {
+ 		unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
+ 		pteval_t flags = val & PTE_FLAGS_MASK;
+-		val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
++		unsigned long mfn = pfn_to_mfn(pfn);
++
++		/*
++		 * If there's no mfn for the pfn, then just create an
++		 * empty non-present pte.  Unfortunately this loses
++		 * information about the original pfn, so
++		 * pte_mfn_to_pfn is asymmetric.
++		 */
++		if (unlikely(mfn == INVALID_P2M_ENTRY)) {
++			mfn = 0;
++			flags = 0;
++		}
++
++		val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
++	}
++
++	return val;
++}
++
 +static pteval_t iomap_pte(pteval_t val)
 +{
 +	if (val & _PAGE_PRESENT) {
@@ -4660,11 +4840,11 @@ index 350a3de..c3fc5ce 100644
 +		/* We assume the pte frame number is a MFN, so
 +		   just use it as-is. */
 +		val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
-+	}
-+
-+	return val;
-+}
-+
+ 	}
+ 
+ 	return val;
+@@ -524,7 +806,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
+ 
  pteval_t xen_pte_val(pte_t pte)
  {
 -	return pte_mfn_to_pfn(pte.pte);
@@ -4683,7 +4863,7 @@ index 350a3de..c3fc5ce 100644
  }
  PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
  
-@@ -534,9 +812,62 @@ pgdval_t xen_pgd_val(pgd_t pgd)
+@@ -534,9 +827,62 @@ pgdval_t xen_pgd_val(pgd_t pgd)
  }
  PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
  
@@ -4747,7 +4927,7 @@ index 350a3de..c3fc5ce 100644
  	return native_make_pte(pte);
  }
  PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
-@@ -592,6 +923,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)
+@@ -592,6 +938,11 @@ void xen_set_pud(pud_t *ptr, pud_t val)
  
  void xen_set_pte(pte_t *ptep, pte_t pte)
  {
@@ -4759,7 +4939,7 @@ index 350a3de..c3fc5ce 100644
  	ADD_STATS(pte_update, 1);
  //	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
  	ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-@@ -608,6 +944,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
+@@ -608,6 +959,11 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
  #ifdef CONFIG_X86_PAE
  void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
  {
@@ -4771,7 +4951,7 @@ index 350a3de..c3fc5ce 100644
  	set_64bit((u64 *)ptep, native_pte_val(pte));
  }
  
-@@ -934,8 +1275,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
+@@ -934,8 +1290,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
     read-only, and can be pinned. */
  static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
  {
@@ -4780,7 +4960,7 @@ index 350a3de..c3fc5ce 100644
  	xen_mc_batch();
  
  	if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
-@@ -1219,7 +1558,7 @@ void xen_exit_mmap(struct mm_struct *mm)
+@@ -1219,7 +1573,7 @@ void xen_exit_mmap(struct mm_struct *mm)
  	spin_lock(&mm->page_table_lock);
  
  	/* pgd may not be pinned in the error exit path of execve */
@@ -4789,7 +4969,7 @@ index 350a3de..c3fc5ce 100644
  		xen_pgd_unpin(mm);
  
  	spin_unlock(&mm->page_table_lock);
-@@ -1288,12 +1627,19 @@ static void xen_flush_tlb_single(unsigned long addr)
+@@ -1288,12 +1642,19 @@ static void xen_flush_tlb_single(unsigned long addr)
  	preempt_enable();
  }
  
@@ -4810,7 +4990,7 @@ index 350a3de..c3fc5ce 100644
  	} *args;
  	struct multicall_space mcs;
  
-@@ -1417,6 +1763,13 @@ static int xen_pgd_alloc(struct mm_struct *mm)
+@@ -1417,6 +1778,13 @@ static int xen_pgd_alloc(struct mm_struct *mm)
  	return ret;
  }
  
@@ -4824,7 +5004,7 @@ index 350a3de..c3fc5ce 100644
  static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
  {
  #ifdef CONFIG_X86_64
-@@ -1448,10 +1801,17 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
+@@ -1448,10 +1816,17 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
  #ifdef CONFIG_X86_32
  static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
  {
@@ -4844,7 +5024,7 @@ index 350a3de..c3fc5ce 100644
  
  	return pte;
  }
-@@ -1517,7 +1877,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
+@@ -1517,7 +1892,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
  	if (PagePinned(virt_to_page(mm->pgd))) {
  		SetPagePinned(page);
  
@@ -4852,7 +5032,7 @@ index 350a3de..c3fc5ce 100644
  		if (!PageHighMem(page)) {
  			make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
  			if (level == PT_PTE && USE_SPLIT_PTLOCKS)
-@@ -1620,6 +1979,7 @@ static void *m2v(phys_addr_t maddr)
+@@ -1620,6 +1994,7 @@ static void *m2v(phys_addr_t maddr)
  	return __ka(m2p(maddr));
  }
  
@@ -4860,7 +5040,7 @@ index 350a3de..c3fc5ce 100644
  static void set_page_prot(void *addr, pgprot_t prot)
  {
  	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
-@@ -1635,6 +1995,9 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+@@ -1635,6 +2010,9 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
  	unsigned ident_pte;
  	unsigned long pfn;
  
@@ -4870,7 +5050,7 @@ index 350a3de..c3fc5ce 100644
  	ident_pte = 0;
  	pfn = 0;
  	for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
-@@ -1645,7 +2008,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+@@ -1645,7 +2023,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
  			pte_page = m2v(pmd[pmdidx].pmd);
  		else {
  			/* Check for free pte pages */
@@ -4879,7 +5059,7 @@ index 350a3de..c3fc5ce 100644
  				break;
  
  			pte_page = &level1_ident_pgt[ident_pte];
-@@ -1675,6 +2038,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+@@ -1675,6 +2053,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
  	set_page_prot(pmd, PAGE_KERNEL_RO);
  }
  
@@ -4900,7 +5080,7 @@ index 350a3de..c3fc5ce 100644
  #ifdef CONFIG_X86_64
  static void convert_pfn_mfn(void *v)
  {
-@@ -1760,12 +2137,15 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+@@ -1760,12 +2152,15 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
  	return pgd;
  }
  #else	/* !CONFIG_X86_64 */
@@ -4917,7 +5097,7 @@ index 350a3de..c3fc5ce 100644
  
  	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
  				  xen_start_info->nr_pt_frames * PAGE_SIZE +
-@@ -1777,6 +2157,20 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+@@ -1777,6 +2172,20 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
  	xen_map_identity_early(level2_kernel_pgt, max_pfn);
  
  	memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
@@ -4938,7 +5118,7 @@ index 350a3de..c3fc5ce 100644
  	set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
  			__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
  
-@@ -1799,6 +2193,8 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+@@ -1799,6 +2208,8 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
  }
  #endif	/* CONFIG_X86_64 */
  
@@ -4947,7 +5127,7 @@ index 350a3de..c3fc5ce 100644
  static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  {
  	pte_t pte;
-@@ -1828,9 +2224,26 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
+@@ -1828,9 +2239,26 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  		pte = pfn_pte(phys, prot);
  		break;
  
@@ -4975,7 +5155,7 @@ index 350a3de..c3fc5ce 100644
  	}
  
  	__native_set_fixmap(idx, pte);
-@@ -1845,6 +2258,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
+@@ -1845,6 +2273,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  #endif
  }
  
@@ -5005,7 +5185,7 @@ index 350a3de..c3fc5ce 100644
  static __init void xen_post_allocator_init(void)
  {
  	pv_mmu_ops.set_pte = xen_set_pte;
-@@ -1960,8 +2396,305 @@ void __init xen_init_mmu_ops(void)
+@@ -1960,8 +2411,305 @@ void __init xen_init_mmu_ops(void)
  	x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
  	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
  	pv_mmu_ops = xen_mmu_ops;
@@ -5839,7 +6019,7 @@ index 0000000..0f45638
 +early_param("xen_emul_unplug", parse_xen_emul_unplug);
 +#endif
 diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
-index ad0047f..804815c 100644
+index ad0047f..a0db643 100644
 --- a/arch/x86/xen/setup.c
 +++ b/arch/x86/xen/setup.c
 @@ -10,6 +10,7 @@
@@ -5860,10 +6040,30 @@ index ad0047f..804815c 100644
  #include <xen/features.h>
  
  #include "xen-ops.h"
-@@ -32,25 +35,131 @@ extern void xen_sysenter_target(void);
+@@ -32,25 +35,157 @@ extern void xen_sysenter_target(void);
  extern void xen_syscall_target(void);
  extern void xen_syscall32_target(void);
  
++/* Amount of extra memory space we add to the e820 ranges */
++phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
++
++static __init void xen_add_extra_mem(unsigned long pages)
++{
++	u64 size = (u64)pages * PAGE_SIZE;
++
++	if (!pages)
++		return;
++
++	e820_add_region(xen_extra_mem_start + xen_extra_mem_size, size, E820_RAM);
++	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
++
++	reserve_early(xen_extra_mem_start + xen_extra_mem_size,
++		      xen_extra_mem_start + xen_extra_mem_size + size,
++		      "XEN EXTRA");
++
++	xen_extra_mem_size += size;
++}
++
 +static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
 +					      phys_addr_t end_addr)
 +{
@@ -5938,17 +6138,18 @@ index ad0047f..804815c 100644
 -
  char * __init xen_memory_setup(void)
  {
-+	static __initdata struct e820entry map[E820MAX];
++	static struct e820entry map[E820MAX] __initdata;
 +
  	unsigned long max_pfn = xen_start_info->nr_pages;
-+	struct xen_memory_map memmap;
 +	unsigned long long mem_end;
-+	int op;
 +	int rc;
++	struct xen_memory_map memmap;
++	unsigned long extra_pages = 0;
++	int op;
 +	int i;
  
  	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
-+	mem_end = PFN_PHYS((u64)max_pfn);
++	mem_end = PFN_PHYS(max_pfn);
 +
 +	memmap.nr_entries = E820MAX;
 +	set_xen_guest_handle(memmap.buffer, map);
@@ -5971,17 +6172,22 @@ index ad0047f..804815c 100644
  	e820.nr_map = 0;
 -
 -	e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM);
++	xen_extra_mem_start = mem_end;
 +	for (i = 0; i < memmap.nr_entries; i++) {
 +		unsigned long long end = map[i].addr + map[i].size;
++
 +		if (map[i].type == E820_RAM) {
-+			if (map[i].addr > mem_end)
-+				continue;
 +			if (end > mem_end) {
 +				/* Truncate region to max_mem. */
 +				map[i].size -= end - mem_end;
++
++				extra_pages += PFN_DOWN(end - mem_end);
 +			}
-+		}
-+		if (map[i].size > 0)
++		} else if (map[i].type != E820_RAM)
++			xen_extra_mem_start = end;
++
++		if ((map[i].type != E820_RAM || map[i].addr < mem_end) &&
++		    map[i].size > 0)
 +			e820_add_region(map[i].addr, map[i].size, map[i].type);
 +	}
  
@@ -5995,16 +6201,18 @@ index ad0047f..804815c 100644
  	 */
  	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
  			E820_RESERVED);
-@@ -67,6 +176,8 @@ char * __init xen_memory_setup(void)
+@@ -67,6 +202,10 @@ char * __init xen_memory_setup(void)
  
  	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
  
-+	xen_return_unused_memory(xen_start_info->nr_pages, &e820);
++	extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
++
++	xen_add_extra_mem(extra_pages);
 +
  	return "Xen";
  }
  
-@@ -156,6 +267,8 @@ void __init xen_arch_setup(void)
+@@ -156,6 +295,8 @@ void __init xen_arch_setup(void)
  	struct physdev_set_iopl set_iopl;
  	int rc;
  
@@ -6013,7 +6221,7 @@ index ad0047f..804815c 100644
  	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
  	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
  
-@@ -182,13 +295,17 @@ void __init xen_arch_setup(void)
+@@ -182,13 +323,17 @@ void __init xen_arch_setup(void)
  	}
  #endif
  
@@ -7356,7 +7564,7 @@ index 1d886e0..f4a2b10 100644
  	  This driver implements the front-end of the Xen virtual
  	  block device driver.  It communicates with a back-end driver
 diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
-index b8578bb..0ce883a 100644
+index b8578bb..44059e6 100644
 --- a/drivers/block/xen-blkfront.c
 +++ b/drivers/block/xen-blkfront.c
 @@ -42,10 +42,12 @@
@@ -7407,17 +7615,17 @@ index b8578bb..0ce883a 100644
  
  #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
  	(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
-@@ -119,6 +121,10 @@ static DEFINE_SPINLOCK(blkif_io_lock);
+@@ -116,6 +118,10 @@ static DEFINE_SPINLOCK(blkif_io_lock);
+ #define EXTENDED (1<<EXT_SHIFT)
+ #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
+ #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
++#define EMULATED_HD_DISK_MINOR_OFFSET (0)
++#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
++#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
++#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
  
  #define DEV_NAME	"xvd"	/* name in /dev */
  
-+/* all the Xen major numbers we currently support are identical to Linux
-+ * major numbers */
-+static inline int xen_translate_major(int major) { return major; }
-+
- static int get_id_from_freelist(struct blkfront_info *info)
- {
- 	unsigned long free = info->shadow_free;
 @@ -136,6 +142,55 @@ static void add_id_to_freelist(struct blkfront_info *info,
  	info->shadow_free = id;
  }
@@ -7489,7 +7697,7 @@ index b8578bb..0ce883a 100644
  	if (rq == NULL)
  		return -1;
  
-@@ -370,17 +426,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+@@ -370,20 +426,84 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
  static int xlvbd_barrier(struct blkfront_info *info)
  {
  	int err;
@@ -7517,39 +7725,25 @@ index b8578bb..0ce883a 100644
  	return 0;
  }
  
-@@ -393,8 +454,9 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- 	int nr_minors = 1;
- 	int err = -ENODEV;
- 	unsigned int offset;
--	int minor;
-+	int minor = 0, major = XENVBD_MAJOR;
- 	int nr_parts;
-+	char *name = DEV_NAME;
- 
- 	BUG_ON(info->gd != NULL);
- 	BUG_ON(info->rq != NULL);
-@@ -406,57 +468,110 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- 	}
- 
- 	if (!VDEV_IS_EXTENDED(info->vdevice)) {
-+		major = BLKIF_MAJOR(info->vdevice);
- 		minor = BLKIF_MINOR(info->vdevice);
- 		nr_parts = PARTS_PER_DISK;
-+		switch (major) {
++static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
++{
++	int major;
++	major = BLKIF_MAJOR(vdevice);
++	*minor = BLKIF_MINOR(vdevice);
++	switch (major) {
 +		case XEN_IDE0_MAJOR:
-+			major = xen_translate_major(major);
-+			offset = (minor / 64);
-+			name = "hd";
++			*offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
++			*minor = ((*minor / 64) * PARTS_PER_DISK) +
++				EMULATED_HD_DISK_MINOR_OFFSET;
 +			break;
 +		case XEN_IDE1_MAJOR:
-+			major = xen_translate_major(major);
-+			offset = (minor / 64) + 2;
-+			name = "hd";
++			*offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
++			*minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
++				EMULATED_HD_DISK_MINOR_OFFSET;
 +			break;
 +		case XEN_SCSI_DISK0_MAJOR:
-+			major = xen_translate_major(major);
-+			offset = minor / nr_parts;
-+			name = "sd";
++			*offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
++			*minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
 +			break;
 +		case XEN_SCSI_DISK1_MAJOR:
 +		case XEN_SCSI_DISK2_MAJOR:
@@ -7558,10 +7752,12 @@ index b8578bb..0ce883a 100644
 +		case XEN_SCSI_DISK5_MAJOR:
 +		case XEN_SCSI_DISK6_MAJOR:
 +		case XEN_SCSI_DISK7_MAJOR:
-+			offset = (minor / nr_parts) +
-+				(major - XEN_SCSI_DISK1_MAJOR + 1) * 16;
-+			major = xen_translate_major(major);
-+			name = "sd";
++			*offset = (*minor / PARTS_PER_DISK) + 
++				((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
++				EMULATED_SD_DISK_NAME_OFFSET;
++			*minor = *minor +
++				((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
++				EMULATED_SD_DISK_MINOR_OFFSET;
 +			break;
 +		case XEN_SCSI_DISK8_MAJOR:
 +		case XEN_SCSI_DISK9_MAJOR:
@@ -7571,24 +7767,56 @@ index b8578bb..0ce883a 100644
 +		case XEN_SCSI_DISK13_MAJOR:
 +		case XEN_SCSI_DISK14_MAJOR:
 +		case XEN_SCSI_DISK15_MAJOR:
-+			offset = (minor / nr_parts) +
-+				(major - XEN_SCSI_DISK8_MAJOR + 8) * 16;
-+			major = xen_translate_major(major);
-+			name = "sd";
++			*offset = (*minor / PARTS_PER_DISK) + 
++				((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
++				EMULATED_SD_DISK_NAME_OFFSET;
++			*minor = *minor +
++				((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
++				EMULATED_SD_DISK_MINOR_OFFSET;
 +			break;
 +		case XENVBD_MAJOR:
-+			offset = minor / nr_parts;
++			*offset = *minor / PARTS_PER_DISK;
 +			break;
 +		default:
 +			printk(KERN_WARNING "blkfront: your disk configuration is "
 +					"incorrect, please use an xvd device instead\n");
 +			return -ENODEV;
-+		}
++	}
++	return 0;
++}
+ 
+ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+ 			       struct blkfront_info *info,
+@@ -391,7 +511,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+ {
+ 	struct gendisk *gd;
+ 	int nr_minors = 1;
+-	int err = -ENODEV;
++	int err;
+ 	unsigned int offset;
+ 	int minor;
+ 	int nr_parts;
+@@ -406,21 +526,33 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+ 	}
+ 
+ 	if (!VDEV_IS_EXTENDED(info->vdevice)) {
+-		minor = BLKIF_MINOR(info->vdevice);
+-		nr_parts = PARTS_PER_DISK;
++		err = xen_translate_vdev(info->vdevice, &minor, &offset);
++		if (err)
++			return err;		
++ 		nr_parts = PARTS_PER_DISK;
  	} else {
  		minor = BLKIF_MINOR_EXT(info->vdevice);
  		nr_parts = PARTS_PER_EXT_DISK;
 +		offset = minor / nr_parts;
++		if (xen_hvm_domain() && minor >= EMULATED_HD_DISK_MINOR_OFFSET) {
++			printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
++					"emulated IDE and SCSI disks; ignoring", info->vdevice);
++			return -ENODEV;
++		}
  	}
++	err = -ENODEV;
  
  	if ((minor % nr_parts) == 0)
  		nr_minors = nr_parts;
@@ -7607,32 +7835,7 @@ index b8578bb..0ce883a 100644
  
  	if (nr_minors > 1) {
  		if (offset < 26)
--			sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
-+			sprintf(gd->disk_name, "%s%c", name, 'a' + offset);
- 		else
--			sprintf(gd->disk_name, "%s%c%c", DEV_NAME,
--				'a' + ((offset / 26)-1), 'a' + (offset % 26));
-+			sprintf(gd->disk_name, "%s%c%c", name,
-+					'a' + ((offset / 26)-1), 'a' + (offset % 26));
- 	} else {
- 		if (offset < 26)
--			sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
-+			sprintf(gd->disk_name, "%s%c%d", name,
- 				'a' + offset,
- 				minor & (nr_parts - 1));
- 		else
--			sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME,
-+			sprintf(gd->disk_name, "%s%c%c%d", name,
- 				'a' + ((offset / 26) - 1),
- 				'a' + (offset % 26),
- 				minor & (nr_parts - 1));
- 	}
- 
--	gd->major = XENVBD_MAJOR;
-+	gd->major = major;
- 	gd->first_minor = minor;
- 	gd->fops = &xlvbd_block_fops;
- 	gd->private_data = info;
+@@ -447,16 +579,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
  	gd->driverfs_dev = &(info->xbdev->dev);
  	set_capacity(gd, capacity);
  
@@ -7652,7 +7855,7 @@ index b8578bb..0ce883a 100644
  
  	if (vdisk_info & VDISK_READONLY)
  		set_disk_ro(gd, 1);
-@@ -469,10 +584,45 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+@@ -469,10 +600,45 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
  
  	return 0;
  
@@ -7698,7 +7901,7 @@ index b8578bb..0ce883a 100644
  static void kick_pending_request_queues(struct blkfront_info *info)
  {
  	if (!RING_FULL(&info->ring)) {
-@@ -487,16 +637,16 @@ static void blkif_restart_queue(struct work_struct *work)
+@@ -487,16 +653,16 @@ static void blkif_restart_queue(struct work_struct *work)
  {
  	struct blkfront_info *info = container_of(work, struct blkfront_info, work);
  
@@ -7718,7 +7921,7 @@ index b8578bb..0ce883a 100644
  	info->connected = suspend ?
  		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
  	/* No more blkif_request(). */
-@@ -504,7 +654,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
+@@ -504,7 +670,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
  		blk_stop_queue(info->rq);
  	/* No more gnttab callback work. */
  	gnttab_cancel_free_callback(&info->callback);
@@ -7727,7 +7930,7 @@ index b8578bb..0ce883a 100644
  
  	/* Flush gnttab callback work. Must be done with no locks held. */
  	flush_scheduled_work();
-@@ -529,21 +679,20 @@ static void blkif_completion(struct blk_shadow *s)
+@@ -529,21 +695,20 @@ static void blkif_completion(struct blk_shadow *s)
  		gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
  }
  
@@ -7755,7 +7958,7 @@ index b8578bb..0ce883a 100644
  
   again:
  	rp = info->ring.sring->rsp_prod;
-@@ -567,7 +716,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
+@@ -567,7 +732,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
  				printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
  				       info->gd->disk_name);
  				error = -EOPNOTSUPP;
@@ -7764,7 +7967,7 @@ index b8578bb..0ce883a 100644
  				xlvbd_barrier(info);
  			}
  			/* fall through */
-@@ -596,7 +745,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
+@@ -596,7 +761,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
  
  	kick_pending_request_queues(info);
  
@@ -7783,7 +7986,7 @@ index b8578bb..0ce883a 100644
  
  	return IRQ_HANDLED;
  }
-@@ -650,7 +809,7 @@ fail:
+@@ -650,7 +825,7 @@ fail:
  
  
  /* Common code used when first setting up, and when resuming. */
@@ -7792,7 +7995,7 @@ index b8578bb..0ce883a 100644
  			   struct blkfront_info *info)
  {
  	const char *message = NULL;
-@@ -710,7 +869,6 @@ again:
+@@ -710,7 +885,6 @@ again:
  	return err;
  }
  
@@ -7800,7 +8003,7 @@ index b8578bb..0ce883a 100644
  /**
   * Entry point to this code when a new device is created.  Allocate the basic
   * structures and the ring buffer for communication with the backend, and
-@@ -736,16 +894,48 @@ static int blkfront_probe(struct xenbus_device *dev,
+@@ -736,16 +910,48 @@ static int blkfront_probe(struct xenbus_device *dev,
  		}
  	}
  
@@ -7849,7 +8052,7 @@ index b8578bb..0ce883a 100644
  
  	for (i = 0; i < BLK_RING_SIZE; i++)
  		info->shadow[i].req.id = i+1;
-@@ -755,7 +945,7 @@ static int blkfront_probe(struct xenbus_device *dev,
+@@ -755,7 +961,7 @@ static int blkfront_probe(struct xenbus_device *dev,
  	info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
  	dev_set_drvdata(&dev->dev, info);
  
@@ -7858,7 +8061,7 @@ index b8578bb..0ce883a 100644
  	if (err) {
  		kfree(info);
  		dev_set_drvdata(&dev->dev, NULL);
-@@ -819,7 +1009,7 @@ static int blkif_recover(struct blkfront_info *info)
+@@ -819,7 +1025,7 @@ static int blkif_recover(struct blkfront_info *info)
  
  	xenbus_switch_state(info->xbdev, XenbusStateConnected);
  
@@ -7867,7 +8070,7 @@ index b8578bb..0ce883a 100644
  
  	/* Now safe for us to use the shared ring */
  	info->connected = BLKIF_STATE_CONNECTED;
-@@ -830,7 +1020,7 @@ static int blkif_recover(struct blkfront_info *info)
+@@ -830,7 +1036,7 @@ static int blkif_recover(struct blkfront_info *info)
  	/* Kick any other new requests queued since we resumed */
  	kick_pending_request_queues(info);
  
@@ -7876,7 +8079,7 @@ index b8578bb..0ce883a 100644
  
  	return 0;
  }
-@@ -850,13 +1040,50 @@ static int blkfront_resume(struct xenbus_device *dev)
+@@ -850,13 +1056,50 @@ static int blkfront_resume(struct xenbus_device *dev)
  
  	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
  
@@ -7928,7 +8131,7 @@ index b8578bb..0ce883a 100644
  
  /*
   * Invoked when the backend is finally 'ready' (and has told produced
-@@ -868,11 +1095,31 @@ static void blkfront_connect(struct blkfront_info *info)
+@@ -868,11 +1111,31 @@ static void blkfront_connect(struct blkfront_info *info)
  	unsigned long sector_size;
  	unsigned int binfo;
  	int err;
@@ -7963,7 +8166,7 @@ index b8578bb..0ce883a 100644
  	dev_dbg(&info->xbdev->dev, "%s:%s.\n",
  		__func__, info->xbdev->otherend);
  
-@@ -889,10 +1136,26 @@ static void blkfront_connect(struct blkfront_info *info)
+@@ -889,10 +1152,26 @@ static void blkfront_connect(struct blkfront_info *info)
  	}
  
  	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
@@ -7992,7 +8195,7 @@ index b8578bb..0ce883a 100644
  
  	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
  	if (err) {
-@@ -904,10 +1167,10 @@ static void blkfront_connect(struct blkfront_info *info)
+@@ -904,10 +1183,10 @@ static void blkfront_connect(struct blkfront_info *info)
  	xenbus_switch_state(info->xbdev, XenbusStateConnected);
  
  	/* Kick pending requests. */
@@ -8005,7 +8208,7 @@ index b8578bb..0ce883a 100644
  
  	add_disk(info->gd);
  
-@@ -915,57 +1178,21 @@ static void blkfront_connect(struct blkfront_info *info)
+@@ -915,57 +1194,21 @@ static void blkfront_connect(struct blkfront_info *info)
  }
  
  /**
@@ -8067,7 +8270,7 @@ index b8578bb..0ce883a 100644
  	case XenbusStateUnknown:
  	case XenbusStateClosed:
  		break;
-@@ -975,35 +1202,56 @@ static void backend_changed(struct xenbus_device *dev,
+@@ -975,35 +1218,56 @@ static void backend_changed(struct xenbus_device *dev,
  		break;
  
  	case XenbusStateClosing:
@@ -8144,7 +8347,7 @@ index b8578bb..0ce883a 100644
  
  	return 0;
  }
-@@ -1012,30 +1260,68 @@ static int blkfront_is_ready(struct xenbus_device *dev)
+@@ -1012,30 +1276,68 @@ static int blkfront_is_ready(struct xenbus_device *dev)
  {
  	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
  
@@ -8227,7 +8430,7 @@ index b8578bb..0ce883a 100644
  	return 0;
  }
  
-@@ -1061,7 +1347,7 @@ static struct xenbus_driver blkfront = {
+@@ -1061,7 +1363,7 @@ static struct xenbus_driver blkfront = {
  	.probe = blkfront_probe,
  	.remove = blkfront_remove,
  	.resume = blkfront_resume,
@@ -9074,7 +9277,7 @@ index b2f71f7..b7feb84 100644
  	help
  	  The network device frontend driver allows the kernel to
 diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
-index 1a11d95..d4a80b8 100644
+index 1a11d95..aa9130b 100644
 --- a/drivers/net/xen-netfront.c
 +++ b/drivers/net/xen-netfront.c
 @@ -42,6 +42,7 @@
@@ -9085,7 +9288,16 @@ index 1a11d95..d4a80b8 100644
  #include <xen/xenbus.h>
  #include <xen/events.h>
  #include <xen/page.h>
-@@ -58,6 +59,19 @@ struct netfront_cb {
+@@ -53,19 +54,36 @@
+ 
+ static const struct ethtool_ops xennet_ethtool_ops;
+ 
++static int use_smartpoll = 0;
++module_param(use_smartpoll, int, 0600);
++MODULE_PARM_DESC (use_smartpoll, "Use smartpoll mechanism if available");
++
+ struct netfront_cb {
+ 	struct page *page;
  	unsigned offset;
  };
  
@@ -9105,7 +9317,17 @@ index 1a11d95..d4a80b8 100644
  #define NETFRONT_SKB_CB(skb)	((struct netfront_cb *)((skb)->cb))
  
  #define RX_COPY_THRESHOLD 256
-@@ -104,7 +118,7 @@ struct netfront_info {
+ 
+ #define GRANT_INVALID_REF	0
+ 
+-#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
+-#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
++#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
++#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
+ #define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+ 
+ struct netfront_info {
+@@ -104,7 +122,7 @@ struct netfront_info {
  
  	/* Receive-ring batched refills. */
  #define RX_MIN_TARGET 8
@@ -9114,7 +9336,7 @@ index 1a11d95..d4a80b8 100644
  #define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
  	unsigned rx_min_target, rx_max_target, rx_target;
  	struct sk_buff_head rx_batch;
-@@ -118,6 +132,8 @@ struct netfront_info {
+@@ -118,6 +136,8 @@ struct netfront_info {
  	unsigned long rx_pfn_array[NET_RX_RING_SIZE];
  	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
  	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
@@ -9123,7 +9345,7 @@ index 1a11d95..d4a80b8 100644
  };
  
  struct netfront_rx_info {
-@@ -337,15 +353,17 @@ static int xennet_open(struct net_device *dev)
+@@ -337,15 +357,17 @@ static int xennet_open(struct net_device *dev)
  	return 0;
  }
  
@@ -9142,7 +9364,7 @@ index 1a11d95..d4a80b8 100644
  	do {
  		prod = np->tx.sring->rsp_prod;
  		rmb(); /* Ensure we see responses up to 'rp'. */
-@@ -390,7 +408,11 @@ static void xennet_tx_buf_gc(struct net_device *dev)
+@@ -390,7 +412,11 @@ static void xennet_tx_buf_gc(struct net_device *dev)
  		mb();		/* update shared area */
  	} while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
  
@@ -9154,7 +9376,7 @@ index 1a11d95..d4a80b8 100644
  }
  
  static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
-@@ -1267,6 +1289,14 @@ static void xennet_disconnect_backend(struct netfront_info *info)
+@@ -1267,6 +1293,14 @@ static void xennet_disconnect_backend(struct netfront_info *info)
  	info->rx.sring = NULL;
  }
  
@@ -9169,7 +9391,7 @@ index 1a11d95..d4a80b8 100644
  /**
   * We are reconnecting to the backend, due to a suspend/resume, or a backend
   * driver restart.  We tear down our netif structure and recreate it, but
-@@ -1305,6 +1335,54 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+@@ -1305,6 +1339,59 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
  	return 0;
  }
  
@@ -9211,10 +9433,15 @@ index 1a11d95..d4a80b8 100644
 +		np->smart_poll.active = 0;
 +	}
 +
-+	if (np->rx.sring->private.netif.smartpoll_active)
-+		hrtimer_start(timer,
++	if (np->rx.sring->private.netif.smartpoll_active) {
++		if ( hrtimer_start(timer,
 +			ktime_set(0, NANO_SECOND/psmart_poll->smart_poll_freq),
-+			HRTIMER_MODE_REL);
++			HRTIMER_MODE_REL) ) {
++			printk(KERN_DEBUG "Failed to start hrtimer,"
++					"use interrupt mode for this packet\n");
++			np->rx.sring->private.netif.smartpoll_active = 0;
++		}
++	}
 +
 +end:
 +	spin_unlock_irqrestore(&np->tx_lock, flags);
@@ -9224,19 +9451,24 @@ index 1a11d95..d4a80b8 100644
  static irqreturn_t xennet_interrupt(int irq, void *dev_id)
  {
  	struct net_device *dev = dev_id;
-@@ -1320,6 +1398,11 @@ static irqreturn_t xennet_interrupt(int irq, void *dev_id)
+@@ -1320,6 +1407,16 @@ static irqreturn_t xennet_interrupt(int irq, void *dev_id)
  			napi_schedule(&np->napi);
  	}
  
-+	if (np->smart_poll.feature_smart_poll)
-+		hrtimer_start(&np->smart_poll.timer,
-+			ktime_set(0, NANO_SECOND/np->smart_poll.smart_poll_freq),
-+			HRTIMER_MODE_REL);
++	if (np->smart_poll.feature_smart_poll) {
++		if ( hrtimer_start(&np->smart_poll.timer,
++			ktime_set(0,NANO_SECOND/np->smart_poll.smart_poll_freq),
++			HRTIMER_MODE_REL) ) {
++			printk(KERN_DEBUG "Failed to start hrtimer,"
++					"use interrupt mode for this packet\n");
++			np->rx.sring->private.netif.smartpoll_active = 0;
++		}
++	}
 +
  	spin_unlock_irqrestore(&np->tx_lock, flags);
  
  	return IRQ_HANDLED;
-@@ -1393,7 +1476,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
+@@ -1393,7 +1490,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
  }
  
  /* Common code used when first setting up, and when resuming. */
@@ -9245,11 +9477,11 @@ index 1a11d95..d4a80b8 100644
  			   struct netfront_info *info)
  {
  	const char *message;
-@@ -1456,6 +1539,12 @@ again:
+@@ -1456,6 +1553,12 @@ again:
  		goto abort_transaction;
  	}
  
-+	err = xenbus_printf(xbt, dev->nodename, "feature-smart-poll", "%d", 1);
++	err = xenbus_printf(xbt, dev->nodename, "feature-smart-poll", "%d", use_smartpoll);
 +	if (err) {
 +		message = "writing feature-smart-poll";
 +		goto abort_transaction;
@@ -9258,16 +9490,19 @@ index 1a11d95..d4a80b8 100644
  	err = xenbus_transaction_end(xbt, 0);
  	if (err) {
  		if (err == -EAGAIN)
-@@ -1543,7 +1632,23 @@ static int xennet_connect(struct net_device *dev)
+@@ -1543,7 +1646,26 @@ static int xennet_connect(struct net_device *dev)
  		return -ENODEV;
  	}
  
 -	err = talk_to_backend(np->xbdev, np);
-+	err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
-+			   "feature-smart-poll", "%u",
-+			   &np->smart_poll.feature_smart_poll);
-+	if (err != 1)
-+		np->smart_poll.feature_smart_poll = 0;
++	np->smart_poll.feature_smart_poll = 0;
++	if (use_smartpoll) {
++		err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
++				   "feature-smart-poll", "%u",
++				   &np->smart_poll.feature_smart_poll);
++		if (err != 1)
++			np->smart_poll.feature_smart_poll = 0;
++	}
 +
 +	if (np->smart_poll.feature_smart_poll) {
 +		hrtimer_init(&np->smart_poll.timer, CLOCK_MONOTONIC,
@@ -9283,7 +9518,7 @@ index 1a11d95..d4a80b8 100644
  	if (err)
  		return err;
  
-@@ -1597,7 +1702,7 @@ static int xennet_connect(struct net_device *dev)
+@@ -1597,7 +1719,7 @@ static int xennet_connect(struct net_device *dev)
  /**
   * Callback received when the backend's state changes.
   */
@@ -9292,7 +9527,7 @@ index 1a11d95..d4a80b8 100644
  			    enum xenbus_state backend_state)
  {
  	struct netfront_info *np = dev_get_drvdata(&dev->dev);
-@@ -1608,6 +1713,8 @@ static void backend_changed(struct xenbus_device *dev,
+@@ -1608,6 +1730,8 @@ static void backend_changed(struct xenbus_device *dev,
  	switch (backend_state) {
  	case XenbusStateInitialising:
  	case XenbusStateInitialised:
@@ -9301,7 +9536,7 @@ index 1a11d95..d4a80b8 100644
  	case XenbusStateConnected:
  	case XenbusStateUnknown:
  	case XenbusStateClosed:
-@@ -1628,12 +1735,30 @@ static void backend_changed(struct xenbus_device *dev,
+@@ -1628,12 +1752,30 @@ static void backend_changed(struct xenbus_device *dev,
  	}
  }
  
@@ -9332,7 +9567,7 @@ index 1a11d95..d4a80b8 100644
  };
  
  #ifdef CONFIG_SYSFS
-@@ -1798,8 +1923,9 @@ static struct xenbus_driver netfront_driver = {
+@@ -1798,8 +1940,9 @@ static struct xenbus_driver netfront_driver = {
  	.ids = netfront_ids,
  	.probe = netfront_probe,
  	.remove = __devexit_p(xennet_remove),
@@ -11711,7 +11946,7 @@ index 0000000..e83b615
 +subsys_initcall(xen_acpi_processor_extcntl_init);
 +MODULE_LICENSE("GPL");
 diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
-index 4204336..a5ac75b 100644
+index 4204336..b76245c 100644
 --- a/drivers/xen/balloon.c
 +++ b/drivers/xen/balloon.c
 @@ -43,6 +43,7 @@
@@ -11761,7 +11996,7 @@ index 4204336..a5ac75b 100644
  
  /* We increase/decrease in batches which fit in a page */
  static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
-@@ -118,10 +122,41 @@ static struct timer_list balloon_timer;
+@@ -118,12 +122,43 @@ static struct timer_list balloon_timer;
  static void scrub_page(struct page *page)
  {
  #ifdef CONFIG_XEN_SCRUB_PAGES
@@ -11802,14 +12037,29 @@ index 4204336..a5ac75b 100644
 +}
 +
  /* balloon_append: add the given page to the balloon. */
- static void balloon_append(struct page *page)
+-static void balloon_append(struct page *page)
++static void __balloon_append(struct page *page)
  {
-@@ -195,19 +230,18 @@ static unsigned long current_target(void)
+ 	/* Lowmem is re-populated first, so highmem pages go at list tail. */
+ 	if (PageHighMem(page)) {
+@@ -134,7 +169,11 @@ static void balloon_append(struct page *page)
+ 		list_add(&page->lru, &ballooned_pages);
+ 		balloon_stats.balloon_low++;
+ 	}
++}
+ 
++static void balloon_append(struct page *page)
++{
++	__balloon_append(page);
+ 	totalram_pages--;
+ }
+ 
+@@ -195,20 +234,17 @@ static unsigned long current_target(void)
  
  static int increase_reservation(unsigned long nr_pages)
  {
 -	unsigned long  pfn, i, flags;
-+	unsigned long  pfn, mfn, i, j, flags;
++	unsigned long  pfn, mfn, i, j;
  	struct page   *page;
  	long           rc;
  	struct xen_memory_reservation reservation = {
@@ -11822,11 +12072,11 @@ index 4204336..a5ac75b 100644
  		nr_pages = ARRAY_SIZE(frame_list);
  
 -	spin_lock_irqsave(&balloon_lock, flags);
-+	spin_lock_irqsave(&xen_reservation_lock, flags);
- 
+-
  	page = balloon_first_page();
  	for (i = 0; i < nr_pages; i++) {
-@@ -218,6 +252,8 @@ static int increase_reservation(unsigned long nr_pages)
+ 		BUG_ON(page == NULL);
+@@ -218,6 +254,8 @@ static int increase_reservation(unsigned long nr_pages)
  
  	set_xen_guest_handle(reservation.extent_start, frame_list);
  	reservation.nr_extents = nr_pages;
@@ -11835,7 +12085,7 @@ index 4204336..a5ac75b 100644
  	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
  	if (rc < 0)
  		goto out;
-@@ -227,19 +263,22 @@ static int increase_reservation(unsigned long nr_pages)
+@@ -227,19 +265,22 @@ static int increase_reservation(unsigned long nr_pages)
  		BUG_ON(page == NULL);
  
  		pfn = page_to_pfn(page);
@@ -11868,13 +12118,12 @@ index 4204336..a5ac75b 100644
  		}
  
  		/* Relinquish the page back to the allocator. */
-@@ -251,20 +290,20 @@ static int increase_reservation(unsigned long nr_pages)
+@@ -251,20 +292,18 @@ static int increase_reservation(unsigned long nr_pages)
  	balloon_stats.current_pages += rc;
  
   out:
 -	spin_unlock_irqrestore(&balloon_lock, flags);
-+	spin_unlock_irqrestore(&xen_reservation_lock, flags);
- 
+-
  	return rc < 0 ? rc : rc != nr_pages;
  }
  
@@ -11882,7 +12131,7 @@ index 4204336..a5ac75b 100644
  {
 -	unsigned long  pfn, i, flags;
 -	struct page   *page;
-+	unsigned long  pfn, lpfn, mfn, i, j, flags;
++	unsigned long  pfn, lpfn, mfn, i, j;
 +	struct page   *page = NULL;
  	int            need_sleep = 0;
 -	int ret;
@@ -11903,7 +12152,7 @@ index 4204336..a5ac75b 100644
  			nr_pages = i;
  			need_sleep = 1;
  			break;
-@@ -282,37 +321,52 @@ static int decrease_reservation(unsigned long nr_pages)
+@@ -282,38 +321,49 @@ static int decrease_reservation(unsigned long nr_pages)
  		frame_list[i] = pfn_to_mfn(pfn);
  
  		scrub_page(page);
@@ -11922,8 +12171,7 @@ index 4204336..a5ac75b 100644
  	flush_tlb_all();
  
 -	spin_lock_irqsave(&balloon_lock, flags);
-+	spin_lock_irqsave(&xen_reservation_lock, flags);
- 
+-
  	/* No more mappings: invalidate P2M and add to balloon. */
  	for (i = 0; i < nr_pages; i++) {
 -		pfn = mfn_to_pfn(frame_list[i]);
@@ -11966,11 +12214,11 @@ index 4204336..a5ac75b 100644
 -	balloon_stats.current_pages -= nr_pages;
 -
 -	spin_unlock_irqrestore(&balloon_lock, flags);
-+	spin_unlock_irqrestore(&xen_reservation_lock, flags);
- 
+-
  	return need_sleep;
  }
-@@ -379,7 +433,7 @@ static void watch_target(struct xenbus_watch *watch,
+ 
+@@ -379,7 +429,7 @@ static void watch_target(struct xenbus_watch *watch,
  	/* The given memory/target value is in KiB, so it needs converting to
  	 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
  	 */
@@ -11979,31 +12227,39 @@ index 4204336..a5ac75b 100644
  }
  
  static int balloon_init_watcher(struct notifier_block *notifier,
-@@ -405,9 +459,12 @@ static int __init balloon_init(void)
+@@ -405,9 +455,12 @@ static int __init balloon_init(void)
  	if (!xen_pv_domain())
  		return -ENODEV;
  
 -	pr_info("xen_balloon: Initialising balloon driver.\n");
 +	pr_info("xen_balloon: Initialising balloon driver with page order %d.\n",
 +		balloon_order);
++
++	balloon_npages = 1 << balloon_order;
  
 -	balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
-+	balloon_npages = 1 << balloon_order;
-+
 +	balloon_stats.current_pages = (min(xen_start_info->nr_pages, max_pfn)) >> balloon_order;
  	balloon_stats.target_pages  = balloon_stats.current_pages;
  	balloon_stats.balloon_low   = 0;
  	balloon_stats.balloon_high  = 0;
-@@ -420,7 +477,7 @@ static int __init balloon_init(void)
+@@ -420,10 +473,13 @@ static int __init balloon_init(void)
  	register_balloon(&balloon_sysdev);
  
  	/* Initialise the balloon with excess memory space. */
 -	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
-+	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn += balloon_npages) {
++	for (pfn = PFN_UP(xen_extra_mem_start);
++	     pfn < PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size);
++	     pfn += balloon_npages) {
  		page = pfn_to_page(pfn);
- 		if (!PageReserved(page))
- 			balloon_append(page);
-@@ -444,6 +501,121 @@ static void balloon_exit(void)
+-		if (!PageReserved(page))
+-			balloon_append(page);
++		/* totalram_pages doesn't include the boot-time
++		   balloon extension, so don't subtract from it. */
++		__balloon_append(page);
+ 	}
+ 
+ 	target_watch.callback = watch_target;
+@@ -444,6 +500,121 @@ static void balloon_exit(void)
  
  module_exit(balloon_exit);
  
@@ -12125,7 +12381,7 @@ index 4204336..a5ac75b 100644
  #define BALLOON_SHOW(name, format, args...)				\
  	static ssize_t show_##name(struct sys_device *dev,		\
  				   struct sysdev_attribute *attr,	\
-@@ -477,7 +649,7 @@ static ssize_t store_target_kb(struct sys_device *dev,
+@@ -477,7 +648,7 @@ static ssize_t store_target_kb(struct sys_device *dev,
  
  	target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
  
@@ -12134,7 +12390,7 @@ index 4204336..a5ac75b 100644
  
  	return count;
  }
-@@ -491,7 +663,7 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr
+@@ -491,7 +662,7 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr
  {
  	return sprintf(buf, "%llu\n",
  		       (unsigned long long)balloon_stats.target_pages
@@ -12143,7 +12399,7 @@ index 4204336..a5ac75b 100644
  }
  
  static ssize_t store_target(struct sys_device *dev,
-@@ -507,7 +679,7 @@ static ssize_t store_target(struct sys_device *dev,
+@@ -507,7 +678,7 @@ static ssize_t store_target(struct sys_device *dev,
  
  	target_bytes = memparse(buf, &endchar);
  
@@ -14580,10 +14836,10 @@ index 0000000..ef54fa1
 +MODULE_LICENSE("Dual BSD/GPL");
 diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c
 new file mode 100644
-index 0000000..6091780b
+index 0000000..e4fc23e
 --- /dev/null
 +++ b/drivers/xen/blktap/device.c
-@@ -0,0 +1,943 @@
+@@ -0,0 +1,941 @@
 +#include <linux/fs.h>
 +#include <linux/blkdev.h>
 +#include <linux/cdrom.h>
@@ -14827,11 +15083,9 @@ index 0000000..6091780b
 +		      khandle->user);
 +
 +		page = map[offset];
-+		if (page) {
-+			ClearPageReserved(map[offset]);
-+			if (blkback_pagemap_contains_page(page))
-+				set_page_private(page, 0);
-+		}
++		if (page && blkback_pagemap_contains_page(page))
++			set_page_private(page, 0);
++
 +		map[offset] = NULL;
 +
 +		khandle->kernel = INVALID_GRANT_HANDLE;
@@ -15832,10 +16086,10 @@ index 0000000..eee7100
 +}
 diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c
 new file mode 100644
-index 0000000..7e2b687
+index 0000000..057e97f
 --- /dev/null
 +++ b/drivers/xen/blktap/ring.c
-@@ -0,0 +1,548 @@
+@@ -0,0 +1,545 @@
 +#include <linux/device.h>
 +#include <linux/signal.h>
 +#include <linux/sched.h>
@@ -15984,11 +16238,8 @@ index 0000000..7e2b687
 +
 +	offset  = (int)((uvaddr - vma->vm_start) >> PAGE_SHIFT);
 +	page    = map[offset];
-+	if (page) {
-+		ClearPageReserved(page);
-+		if (blkback_pagemap_contains_page(page))
-+			set_page_private(page, 0);
-+	}
++	if (page && blkback_pagemap_contains_page(page))
++		set_page_private(page, 0);
 +	map[offset] = NULL;
 +
 +	request = tap->pending_requests[usr_idx];
@@ -16654,7 +16905,7 @@ index bdfd584..6625ffe 100644
  
  #include <asm/xen/hypervisor.h>
 diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 30e0467..dd1e71b 100644
+index 30e0467..6b6f563 100644
 --- a/drivers/xen/events.c
 +++ b/drivers/xen/events.c
 @@ -16,7 +16,7 @@
@@ -16666,7 +16917,7 @@ index 30e0467..dd1e71b 100644
   *
   * Jeremy Fitzhardinge <jeremy at xensource.com>, XenSource Inc, 2007
   */
-@@ -27,18 +27,31 @@
+@@ -27,18 +27,32 @@
  #include <linux/module.h>
  #include <linux/string.h>
  #include <linux/bootmem.h>
@@ -16693,12 +16944,13 @@ index 30e0467..dd1e71b 100644
  #include <xen/interface/event_channel.h>
 +#include <xen/interface/hvm/hvm_op.h>
 +#include <xen/interface/hvm/params.h>
++#include <xen/page.h>
 +
 +#include "../pci/msi.h"
  
  /*
   * This lock protects updates to the following mapping and reference-count
-@@ -67,7 +80,7 @@ enum xen_irq_type {
+@@ -67,7 +81,7 @@ enum xen_irq_type {
   * event channel - irq->event channel mapping
   * cpu - cpu this event channel is bound to
   * index - type-specific information:
@@ -16707,7 +16959,7 @@ index 30e0467..dd1e71b 100644
   *    VIRQ - virq number
   *    IPI - IPI vector
   *    EVTCHN -
-@@ -83,20 +96,27 @@ struct irq_info
+@@ -83,20 +97,30 @@ struct irq_info
  		enum ipi_vector ipi;
  		struct {
  			unsigned short gsi;
@@ -16718,15 +16970,18 @@ index 30e0467..dd1e71b 100644
  		} pirq;
  	} u;
  };
-+#define PIRQ_NEEDS_EOI	(1 << 0)
 +#define PIRQ_SHAREABLE	(1 << 1)
  
 -static struct irq_info irq_info[NR_IRQS];
-+static struct irq_info *irq_info;
++/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
++static bool pirq_eoi_does_unmask;
++static unsigned long *pirq_needs_eoi_bits;
  
 -static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
 -	[0 ... NR_EVENT_CHANNELS-1] = -1
 -};
++static struct irq_info *irq_info;
++
 +static int *evtchn_to_irq;
  struct cpu_evtchn_s {
  	unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG];
@@ -16741,7 +16996,7 @@ index 30e0467..dd1e71b 100644
  static inline unsigned long *cpu_evtchn_mask(int cpu)
  {
  	return cpu_evtchn_mask_p[cpu].bits;
-@@ -106,6 +126,8 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
+@@ -106,6 +130,8 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
  #define VALID_EVTCHN(chn)	((chn) != 0)
  
  static struct irq_chip xen_dynamic_chip;
@@ -16750,7 +17005,7 @@ index 30e0467..dd1e71b 100644
  
  /* Constructor for packed IRQ information. */
  static struct irq_info mk_unbound_info(void)
-@@ -135,7 +157,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn,
+@@ -135,7 +161,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn,
  				    unsigned short gsi, unsigned short vector)
  {
  	return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
@@ -16760,7 +17015,7 @@ index 30e0467..dd1e71b 100644
  }
  
  /*
-@@ -218,6 +241,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn)
+@@ -218,6 +245,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn)
  	return ret;
  }
  
@@ -16770,16 +17025,39 @@ index 30e0467..dd1e71b 100644
 +
 +	BUG_ON(info->type != IRQT_PIRQ);
 +
-+	return info->u.pirq.flags & PIRQ_NEEDS_EOI;
++	return test_bit(info->u.pirq.gsi, pirq_needs_eoi_bits);
 +}
 +
  static inline unsigned long active_evtchns(unsigned int cpu,
  					   struct shared_info *sh,
  					   unsigned int idx)
-@@ -329,27 +361,368 @@ static void unmask_evtchn(int port)
+@@ -299,6 +335,14 @@ static void mask_evtchn(int port)
+ 	sync_set_bit(port, &s->evtchn_mask[0]);
+ }
+ 
++static void mask_irq(unsigned int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	if (VALID_EVTCHN(evtchn))
++		mask_evtchn(evtchn);
++}
++
+ static void unmask_evtchn(int port)
+ {
+ 	struct shared_info *s = HYPERVISOR_shared_info;
+@@ -329,26 +373,370 @@ static void unmask_evtchn(int port)
  	put_cpu();
  }
  
++static void unmask_irq(unsigned int irq)
++{
++	int evtchn = evtchn_from_irq(irq);
++
++	if (VALID_EVTCHN(evtchn))
++		unmask_evtchn(evtchn);
++}
++
 +static int get_nr_hw_irqs(void)
 +{
 +	int ret = 1;
@@ -16843,8 +17121,14 @@ index 30e0467..dd1e71b 100644
 +{
 +	struct irq_info *info = info_for_irq(irq);
 +	struct physdev_eoi eoi = { .irq = info->u.pirq.gsi };
++	bool need_eoi;
 +
-+	if (unlikely(pirq_needs_eoi(irq))) {
++	need_eoi = pirq_needs_eoi(irq);
++
++	if (!need_eoi || !pirq_eoi_does_unmask)
++		unmask_evtchn(info->evtchn);
++
++	if (need_eoi) {
 +		int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
 +		WARN_ON(rc);
 +	}
@@ -16855,15 +17139,18 @@ index 30e0467..dd1e71b 100644
 +	struct physdev_irq_status_query irq_status;
 +	struct irq_info *info = info_for_irq(irq);
 +
++	if (pirq_eoi_does_unmask)
++		return;
++
 +	BUG_ON(info->type != IRQT_PIRQ);
 +
 +	irq_status.irq = info->u.pirq.gsi;
 +	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
 +		irq_status.flags = 0;
 +
-+	info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
++	clear_bit(info->u.pirq.gsi, pirq_needs_eoi_bits);
 +	if (irq_status.flags & XENIRQSTAT_needs_eoi)
-+		info->u.pirq.flags |= PIRQ_NEEDS_EOI;
++		set_bit(info->u.pirq.gsi, pirq_needs_eoi_bits);
 +}
 +
 +static bool probing_irq(int irq)
@@ -16905,7 +17192,6 @@ index 30e0467..dd1e71b 100644
 +	info->evtchn = evtchn;
 +
 + out:
-+	unmask_evtchn(evtchn);
 +	pirq_eoi(irq);
 +
 +	return 0;
@@ -16933,23 +17219,10 @@ index 30e0467..dd1e71b 100644
 +	info->evtchn = 0;
 +}
 +
-+static void enable_pirq(unsigned int irq)
-+{
-+	startup_pirq(irq);
-+}
-+
-+static void disable_pirq(unsigned int irq)
-+{
-+}
-+
 +static void ack_pirq(unsigned int irq)
 +{
-+	int evtchn = evtchn_from_irq(irq);
-+
-+	move_native_irq(irq);
-+
-+	if (VALID_EVTCHN(evtchn))
-+		clear_evtchn(evtchn);
++	move_masked_irq(irq);
++	
 +	pirq_eoi(irq);
 +}
 +
@@ -17015,7 +17288,7 @@ index 30e0467..dd1e71b 100644
 +		irq = find_unbound_irq();
 +
 +	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
-+				      handle_edge_irq, name);
++				      handle_fasteoi_irq, name);
 +
 +	irq_op.irq = gsi;
 +	irq_op.vector = 0;
@@ -17035,10 +17308,10 @@ index 30e0467..dd1e71b 100644
 +
 +out:
 +	spin_unlock(&irq_mapping_update_lock);
- 
- 	return irq;
- }
- 
++
++	return irq;
++}
++
 +#ifdef CONFIG_PCI_MSI
 +int xen_destroy_irq(int irq)
 +{
@@ -17063,7 +17336,7 @@ index 30e0467..dd1e71b 100644
 +		}
 +	}
 +	irq_info[irq] = mk_unbound_info();
-+
+ 
 +	dynamic_irq_cleanup(irq);
 +
 +out:
@@ -17125,13 +17398,13 @@ index 30e0467..dd1e71b 100644
 +		irq_info[irq].u.pirq.domid = domid;
 +
 +	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
-+			handle_edge_irq,
-+			(type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
++				      handle_fasteoi_irq,
++				      (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
 +
 +out:
 +	spin_unlock(&irq_mapping_update_lock);
-+	return irq;
-+}
+ 	return irq;
+ }
 +#endif
 +#endif
 +
@@ -17145,20 +17418,19 @@ index 30e0467..dd1e71b 100644
 +	return gsi_from_irq(irq);
 +}
 +EXPORT_SYMBOL_GPL(xen_gsi_from_irq);
-+
+ 
  int bind_evtchn_to_irq(unsigned int evtchn)
  {
- 	int irq;
-@@ -362,7 +735,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
+@@ -362,7 +750,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
  		irq = find_unbound_irq();
  
  		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
 -					      handle_level_irq, "event");
-+					      handle_edge_irq, "event");
++					      handle_fasteoi_irq, "event");
  
  		evtchn_to_irq[evtchn] = irq;
  		irq_info[irq] = mk_evtchn_info(evtchn);
-@@ -388,8 +761,8 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+@@ -388,8 +776,8 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
  		if (irq < 0)
  			goto out;
  
@@ -17169,7 +17441,7 @@ index 30e0467..dd1e71b 100644
  
  		bind_ipi.vcpu = cpu;
  		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
-@@ -409,8 +782,23 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+@@ -409,8 +797,23 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
  	return irq;
  }
  
@@ -17178,7 +17450,8 @@ index 30e0467..dd1e71b 100644
 +{
 +        struct evtchn_bind_interdomain bind_interdomain;
 +        int err;
-+
+ 
+-static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 +        bind_interdomain.remote_dom  = remote_domain;
 +        bind_interdomain.remote_port = remote_port;
 +
@@ -17188,13 +17461,12 @@ index 30e0467..dd1e71b 100644
 +        return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
 +}
 +
- 
--static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
++
 +int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
  {
  	struct evtchn_bind_virq bind_virq;
  	int evtchn, irq;
-@@ -429,8 +817,8 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+@@ -429,8 +832,8 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
  
  		irq = find_unbound_irq();
  
@@ -17205,7 +17477,7 @@ index 30e0467..dd1e71b 100644
  
  		evtchn_to_irq[evtchn] = irq;
  		irq_info[irq] = mk_virq_info(evtchn, virq);
-@@ -504,6 +892,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
+@@ -504,6 +907,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
  }
  EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
  
@@ -17235,7 +17507,7 @@ index 30e0467..dd1e71b 100644
  int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
  			    irq_handler_t handler,
  			    unsigned long irqflags, const char *devname, void *dev_id)
-@@ -617,17 +1028,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count);
+@@ -617,17 +1043,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count);
   * a bitset of words which contain pending event bits.  The second
   * level is a bitset of pending events themselves.
   */
@@ -17254,7 +17526,7 @@ index 30e0467..dd1e71b 100644
  	do {
  		unsigned long pending_words;
  
-@@ -650,9 +1057,13 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
+@@ -650,9 +1072,16 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
  				int bit_idx = __ffs(pending_bits);
  				int port = (word_idx * BITS_PER_LONG) + bit_idx;
  				int irq = evtchn_to_irq[port];
@@ -17262,6 +17534,9 @@ index 30e0467..dd1e71b 100644
  
 -				if (irq != -1)
 -					handle_irq(irq, regs);
++				mask_evtchn(port);
++				clear_evtchn(port);
++
 +				if (irq != -1) {
 +					desc = irq_to_desc(irq);
 +					if (desc)
@@ -17270,7 +17545,7 @@ index 30e0467..dd1e71b 100644
  			}
  		}
  
-@@ -660,14 +1071,32 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
+@@ -660,14 +1089,32 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
  
  		count = __get_cpu_var(xed_nesting_count);
  		__get_cpu_var(xed_nesting_count) = 0;
@@ -17305,7 +17580,7 @@ index 30e0467..dd1e71b 100644
  
  /* Rebind a new event channel to an existing irq. */
  void rebind_evtchn_irq(int evtchn, int irq)
-@@ -704,7 +1133,10 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+@@ -704,7 +1151,10 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
  	struct evtchn_bind_vcpu bind_vcpu;
  	int evtchn = evtchn_from_irq(irq);
  
@@ -17317,7 +17592,44 @@ index 30e0467..dd1e71b 100644
  		return -1;
  
  	/* Send future instances of this interrupt to other vcpu. */
-@@ -856,7 +1288,7 @@ void xen_clear_irq_pending(int irq)
+@@ -745,33 +1195,17 @@ int resend_irq_on_evtchn(unsigned int irq)
+ 	return 1;
+ }
+ 
+-static void enable_dynirq(unsigned int irq)
+-{
+-	int evtchn = evtchn_from_irq(irq);
+-
+-	if (VALID_EVTCHN(evtchn))
+-		unmask_evtchn(evtchn);
+-}
+-
+-static void disable_dynirq(unsigned int irq)
+-{
+-	int evtchn = evtchn_from_irq(irq);
+-
+-	if (VALID_EVTCHN(evtchn))
+-		mask_evtchn(evtchn);
+-}
+-
+ static void ack_dynirq(unsigned int irq)
+ {
+ 	int evtchn = evtchn_from_irq(irq);
+ 
+-	move_native_irq(irq);
++	move_masked_irq(irq);
+ 
+ 	if (VALID_EVTCHN(evtchn))
+-		clear_evtchn(evtchn);
++		unmask_evtchn(evtchn);
+ }
+ 
+-static int retrigger_dynirq(unsigned int irq)
++static int retrigger_irq(unsigned int irq)
+ {
+ 	int evtchn = evtchn_from_irq(irq);
+ 	struct shared_info *sh = HYPERVISOR_shared_info;
+@@ -856,7 +1290,7 @@ void xen_clear_irq_pending(int irq)
  	if (VALID_EVTCHN(evtchn))
  		clear_evtchn(evtchn);
  }
@@ -17326,7 +17638,7 @@ index 30e0467..dd1e71b 100644
  void xen_set_irq_pending(int irq)
  {
  	int evtchn = evtchn_from_irq(irq);
-@@ -876,9 +1308,9 @@ bool xen_test_irq_pending(int irq)
+@@ -876,9 +1310,9 @@ bool xen_test_irq_pending(int irq)
  	return ret;
  }
  
@@ -17338,7 +17650,7 @@ index 30e0467..dd1e71b 100644
  {
  	evtchn_port_t evtchn = evtchn_from_irq(irq);
  
-@@ -886,13 +1318,33 @@ void xen_poll_irq(int irq)
+@@ -886,13 +1320,33 @@ void xen_poll_irq(int irq)
  		struct sched_poll poll;
  
  		poll.nr_ports = 1;
@@ -17373,18 +17685,45 @@ index 30e0467..dd1e71b 100644
  
  void xen_irq_resume(void)
  {
-@@ -929,13 +1381,84 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
- 	.retrigger	= retrigger_dynirq,
- };
+@@ -915,27 +1369,117 @@ void xen_irq_resume(void)
+ 		restore_cpu_virqs(cpu);
+ 		restore_cpu_ipis(cpu);
+ 	}
++
++	if (pirq_eoi_does_unmask) {
++		struct physdev_pirq_eoi_gmfn eoi_gmfn;
++		
++		eoi_gmfn.gmfn = virt_to_mfn(pirq_needs_eoi_bits);
++		if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0) {
++			/* Could recover by reverting to old method...? */
++			BUG();
++		}
++	}
+ }
+ 
+ static struct irq_chip xen_dynamic_chip __read_mostly = {
+ 	.name		= "xen-dyn",
  
+-	.disable	= disable_dynirq,
+-	.mask		= disable_dynirq,
+-	.unmask		= enable_dynirq,
++	.disable	= mask_irq,
++	.mask		= mask_irq,
++	.unmask		= unmask_irq,
++
++	.eoi		= ack_dynirq,
++	.set_affinity	= set_affinity_irq,
++	.retrigger	= retrigger_irq,
++};
++
 +static struct irq_chip xen_percpu_chip __read_mostly = {
 +	.name		= "xen-percpu",
 +
-+	.disable	= disable_dynirq,
-+	.mask		= disable_dynirq,
-+	.unmask		= enable_dynirq,
-+
-+	.ack		= ack_dynirq,
++	.disable	= mask_irq,
++	.mask		= mask_irq,
++	.unmask		= unmask_irq,
+ 
+ 	.ack		= ack_dynirq,
 +};
 +
 +static struct irq_chip xen_pirq_chip __read_mostly = {
@@ -17393,20 +17732,21 @@ index 30e0467..dd1e71b 100644
 +	.startup	= startup_pirq,
 +	.shutdown	= shutdown_pirq,
 +
-+	.enable		= enable_pirq,
-+	.unmask		= enable_pirq,
++	.enable		= pirq_eoi,
++	.unmask		= unmask_irq,
 +
-+	.disable	= disable_pirq,
-+	.mask		= disable_pirq,
++	.disable	= mask_irq,
++	.mask		= mask_irq,
 +
-+	.ack		= ack_pirq,
++	.eoi		= ack_pirq,
 +	.end		= end_pirq,
 +
-+	.set_affinity	= set_affinity_irq,
-+
-+	.retrigger	= retrigger_dynirq,
-+};
+ 	.set_affinity	= set_affinity_irq,
+-	.retrigger	= retrigger_dynirq,
 +
++	.retrigger	= retrigger_irq,
+ };
+ 
 +int xen_set_callback_via(uint64_t via)
 +{
 +	struct xen_hvm_param a;
@@ -17446,6 +17786,8 @@ index 30e0467..dd1e71b 100644
  void __init xen_init_IRQ(void)
  {
  	int i;
++	struct physdev_pirq_eoi_gmfn eoi_gmfn;
++	int nr_pirqs = NR_IRQS;
  
  	cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s),
  				    GFP_KERNEL);
@@ -17456,10 +17798,17 @@ index 30e0467..dd1e71b 100644
 +				GFP_KERNEL);
 +	for(i = 0; i < NR_EVENT_CHANNELS; i++)
 +		evtchn_to_irq[i] = -1;
++
++	i = get_order(sizeof(unsigned long) * BITS_TO_LONGS(nr_pirqs));
++	pirq_needs_eoi_bits = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, i);
++
++ 	eoi_gmfn.gmfn = virt_to_mfn(pirq_needs_eoi_bits);
++	if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0)
++		pirq_eoi_does_unmask = true;
  
  	init_evtchn_cpu_bindings();
  
-@@ -943,5 +1466,11 @@ void __init xen_init_IRQ(void)
+@@ -943,5 +1487,11 @@ void __init xen_init_IRQ(void)
  	for (i = 0; i < NR_EVENT_CHANNELS; i++)
  		mask_evtchn(i);
  
@@ -19747,10 +20096,10 @@ index 0000000..2e8508a
 +}
 diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
 new file mode 100644
-index 0000000..ed7cd65
+index 0000000..9052895
 --- /dev/null
 +++ b/drivers/xen/netback/netback.c
-@@ -0,0 +1,1879 @@
+@@ -0,0 +1,1881 @@
 +/******************************************************************************
 + * drivers/xen/netback/netback.c
 + *
@@ -20551,17 +20900,19 @@ index 0000000..ed7cd65
 +
 +static void add_to_net_schedule_list_tail(struct xen_netif *netif)
 +{
++	unsigned long flags;
++
 +	struct xen_netbk *netbk = &xen_netbk[netif->group];
 +	if (__on_net_schedule_list(netif))
 +		return;
 +
-+	spin_lock_irq(&netbk->net_schedule_list_lock);
++	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
 +	if (!__on_net_schedule_list(netif) &&
 +	    likely(netif_schedulable(netif))) {
 +		list_add_tail(&netif->list, &netbk->net_schedule_list);
 +		netif_get(netif);
 +	}
-+	spin_unlock_irq(&netbk->net_schedule_list_lock);
++	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
 +}
 +
 +void netif_schedule_work(struct xen_netif *netif)
@@ -30434,11 +30785,14 @@ index 6559e0c..afaa6ed 100644
  }
  
 diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenfs/xenbus.c
-index 6c4269b..c309f1f 100644
+index 6c4269b..0ddef43 100644
 --- a/drivers/xen/xenfs/xenbus.c
 +++ b/drivers/xen/xenfs/xenbus.c
-@@ -123,6 +123,9 @@ static ssize_t xenbus_file_read(struct file *filp,
+@@ -121,8 +121,12 @@ static ssize_t xenbus_file_read(struct file *filp,
+ 	int ret;
+ 
  	mutex_lock(&u->reply_mutex);
++again:
  	while (list_empty(&u->read_buffers)) {
  		mutex_unlock(&u->reply_mutex);
 +		if (filp->f_flags & O_NONBLOCK)
@@ -30447,7 +30801,7 @@ index 6c4269b..c309f1f 100644
  		ret = wait_event_interruptible(u->read_waitq,
  					       !list_empty(&u->read_buffers));
  		if (ret)
-@@ -140,7 +143,7 @@ static ssize_t xenbus_file_read(struct file *filp,
+@@ -140,7 +144,7 @@ static ssize_t xenbus_file_read(struct file *filp,
  		i += sz - ret;
  		rb->cons += sz - ret;
  
@@ -30456,7 +30810,24 @@ index 6c4269b..c309f1f 100644
  			if (i == 0)
  				i = -EFAULT;
  			goto out;
-@@ -451,7 +454,7 @@ static ssize_t xenbus_file_write(struct file *filp,
+@@ -156,6 +160,8 @@ static ssize_t xenbus_file_read(struct file *filp,
+ 					struct read_buffer, list);
+ 		}
+ 	}
++	if (i == 0)
++		goto again;
+ 
+ out:
+ 	mutex_unlock(&u->reply_mutex);
+@@ -403,6 +409,7 @@ static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u)
+ 
+ 		mutex_lock(&u->reply_mutex);
+ 		rc = queue_reply(&u->read_buffers, &reply, sizeof(reply));
++		wake_up(&u->read_waitq);
+ 		mutex_unlock(&u->reply_mutex);
+ 	}
+ 
+@@ -451,7 +458,7 @@ static ssize_t xenbus_file_write(struct file *filp,
  
  	ret = copy_from_user(u->u.buffer + u->len, ubuf, len);
  
@@ -30465,6 +30836,38 @@ index 6c4269b..c309f1f 100644
  		rc = -EFAULT;
  		goto out;
  	}
+@@ -484,21 +491,6 @@ static ssize_t xenbus_file_write(struct file *filp,
+ 	msg_type = u->u.msg.type;
+ 
+ 	switch (msg_type) {
+-	case XS_TRANSACTION_START:
+-	case XS_TRANSACTION_END:
+-	case XS_DIRECTORY:
+-	case XS_READ:
+-	case XS_GET_PERMS:
+-	case XS_RELEASE:
+-	case XS_GET_DOMAIN_PATH:
+-	case XS_WRITE:
+-	case XS_MKDIR:
+-	case XS_RM:
+-	case XS_SET_PERMS:
+-		/* Send out a transaction */
+-		ret = xenbus_write_transaction(msg_type, u);
+-		break;
+-
+ 	case XS_WATCH:
+ 	case XS_UNWATCH:
+ 		/* (Un)Ask for some path to be watched for changes */
+@@ -506,7 +498,8 @@ static ssize_t xenbus_file_write(struct file *filp,
+ 		break;
+ 
+ 	default:
+-		ret = -EINVAL;
++		/* Send out a transaction */
++		ret = xenbus_write_transaction(msg_type, u);
+ 		break;
+ 	}
+ 	if (ret != 0)
 diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
 index 51f08b2..b68aa62 100644
 --- a/drivers/xen/xenfs/xenfs.h
@@ -32045,10 +32448,28 @@ index 0000000..c4177f3
 + * End:
 + */
 diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
-index e8cbf43..7b301fa 100644
+index e8cbf43..c9ba846 100644
 --- a/include/xen/interface/io/ring.h
 +++ b/include/xen/interface/io/ring.h
-@@ -73,7 +73,16 @@ union __name##_sring_entry {						\
+@@ -24,8 +24,15 @@ typedef unsigned int RING_IDX;
+  * A ring contains as many entries as will fit, rounded down to the nearest
+  * power of two (so we can mask with (size-1) to loop around).
+  */
+-#define __RING_SIZE(_s, _sz) \
+-    (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
++#define __CONST_RING_SIZE(_s, _sz)				\
++	(__RD32(((_sz) - offsetof(struct _s##_sring, ring)) /	\
++		sizeof(((struct _s##_sring *)0)->ring[0])))
++
++/*
++ * The same for passing in an actual pointer instead of a name tag.
++ */
++#define __RING_SIZE(_s, _sz)						\
++	(__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
+ 
+ /*
+  * Macros to make the correct C datatypes for a new kind of ring.
+@@ -73,7 +80,16 @@ union __name##_sring_entry {						\
  struct __name##_sring {							\
      RING_IDX req_prod, req_event;					\
      RING_IDX rsp_prod, rsp_event;					\
@@ -32087,7 +32508,7 @@ index 46508c7..9fda532 100644
  
  #endif /* _XEN_PUBLIC_IO_XENBUS_H */
 diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
-index af36ead..eac3ce1 100644
+index af36ead..aa4e368 100644
 --- a/include/xen/interface/memory.h
 +++ b/include/xen/interface/memory.h
 @@ -9,6 +9,8 @@
@@ -32168,7 +32589,7 @@ index af36ead..eac3ce1 100644
   * Sets the GPFN at which a particular page appears in the specified guest's
   * pseudophysical address space.
   * arg == addr of xen_add_to_physmap_t.
-@@ -142,4 +199,39 @@ struct xen_translate_gpfn_list {
+@@ -142,4 +199,38 @@ struct xen_translate_gpfn_list {
  };
  DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
  
@@ -32201,7 +32622,6 @@ index af36ead..eac3ce1 100644
 + */
 +#define XENMEM_machine_memory_map   10
 +
-+
 +/*
 + * Prevent the balloon driver from changing the memory reservation
 + * during a driver critical region.
@@ -32209,10 +32629,30 @@ index af36ead..eac3ce1 100644
 +extern spinlock_t xen_reservation_lock;
  #endif /* __XEN_PUBLIC_MEMORY_H__ */
 diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
-index cd69391..66122aa 100644
+index cd69391..0703ef6 100644
 --- a/include/xen/interface/physdev.h
 +++ b/include/xen/interface/physdev.h
-@@ -106,6 +106,64 @@ struct physdev_irq {
+@@ -39,6 +39,19 @@ struct physdev_eoi {
+ };
+ 
+ /*
++ * Register a shared page for the hypervisor to indicate whether the guest
++ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly
++ * once the guest used this function in that the associated event channel
++ * will automatically get unmasked. The page registered is used as a bit
++ * array indexed by Xen's PIRQ value.
++ */
++#define PHYSDEVOP_pirq_eoi_gmfn         17
++struct physdev_pirq_eoi_gmfn {
++    /* IN */
++    unsigned long gmfn;
++};
++
++/*
+  * Query the status of an IRQ line.
+  * @arg == pointer to physdev_irq_status_query structure.
+  */
+@@ -106,6 +119,64 @@ struct physdev_irq {
  	uint32_t vector;
  };
  
@@ -32277,7 +32717,7 @@ index cd69391..66122aa 100644
  /*
   * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
   * hypercall since 0x00030202.
-@@ -121,6 +179,16 @@ struct physdev_op {
+@@ -121,6 +192,16 @@ struct physdev_op {
  	} u;
  };
  
@@ -33196,6 +33636,19 @@ index 2befa3e..9ffaee0 100644
  #else /* __ASSEMBLY__ */
  
  /* In assembly code we cannot use C numeric constant suffixes. */
+diff --git a/include/xen/page.h b/include/xen/page.h
+index eaf85fa..0be36b9 100644
+--- a/include/xen/page.h
++++ b/include/xen/page.h
+@@ -1 +1,8 @@
++#ifndef _XEN_PAGE_H
++#define _XEN_PAGE_H
++
+ #include <asm/xen/page.h>
++
++extern phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
++
++#endif	/* _XEN_PAGE_H */
 diff --git a/include/xen/pcpu.h b/include/xen/pcpu.h
 new file mode 100644
 index 0000000..fb2bf6b


More information about the scm-commits mailing list