[kernel/rawhide/user/myoung/xendom0: 4/4] Switch to xen/next-2.6.38 which adds net and pci backends add some memory fixes

myoung myoung at fedoraproject.org
Tue Jan 25 21:28:41 UTC 2011


commit 1f6c23a1b72c669a4d9f5713ed4bea0b1ab72505
Author: Michael Young <m.a.young at durham.ac.uk>
Date:   Tue Jan 25 21:25:53 2011 +0000

    Switch to xen/next-2.6.38 which adds net and pci backends
    add some memory fixes

 config-local          |    3 +
 kernel.spec           |   17 +-
 xen.next-2.6.38.patch |30821 +++++++++++++++++++++++++++++++++++++++++++++++++
 xen.pvhvm.fixes.patch |  197 +
 4 files changed, 31033 insertions(+), 5 deletions(-)
---
diff --git a/config-local b/config-local
index 8c32be5..8f91cda 100644
--- a/config-local
+++ b/config-local
@@ -1,2 +1,5 @@
 # This file is intentionally left empty in the stock kernel. Its a nicety
 # added for those wanting to do custom rebuilds with altered config opts.
+CONFIG_XEN_NETDEV_BACKEND=m
+CONFIG_XEN_PCIDEV_BACKEND=m
+CONFIG_XEN_PCIDEV_BE_DEBUG=n
diff --git a/kernel.spec b/kernel.spec
index b6050f0..4bd59bb 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -737,12 +737,12 @@ Patch12421: fs-call-security_d_instantiate-in-d_obtain_alias.patch
 
 # Xen patches
 # git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git branches
-Patch20000: xen.next-2.6.37.patch
+Patch20000: xen.next-2.6.38.patch
 #Patch20001: xen.upstream.core.patch
 # git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git branches
-Patch20005: xen.pcifront.fixes.patch
+#Patch20005: xen.pcifront.fixes.patch
 # git://xenbits.xen.org/people/sstabellini/linux-pvhvm branches
-#Patch20010: xen.pvhvm.fixes.patch
+Patch20010: xen.pvhvm.fixes.patch
 
 %endif
 
@@ -1357,9 +1357,9 @@ ApplyPatch dmar-disable-when-ricoh-multifunction.patch
 ApplyPatch fs-call-security_d_instantiate-in-d_obtain_alias.patch
 
 # Xen patches
-ApplyPatch xen.next-2.6.37.patch
+ApplyPatch xen.next-2.6.38.patch
 #ApplyPatch xen.upstream.core.patch
-ApplyPatch xen.pcifront.fixes.patch
+#ApplyPatch xen.pcifront.fixes.patch
 #ApplyPatch xen.pvhvm.fixes.patch
 
 # END OF PATCH APPLICATIONS
@@ -1974,6 +1974,13 @@ fi
 #                 ||----w |
 #                 ||     ||
 %changelog
+* Tue Jan 25 2011 Michael Young <m.a.young at durham.ac.uk>
+- Switch from xen/next-2.6.37 to xen/next-2.6.38
+  which adds XEN_NETDEV_BACKEND and XEN_PCIDEV_BACKEND
+- comment out xen.pcifront.fixes.patch (patches are in next-2.6.38)
+- put 2.6.38-rc1-memory-fixes branch in xen.pvhvm.fixes.patch
+  for some memory fixes including a later version of the crash on boot patch
+
 * Tue Jan 25 2011 Kyle McMartin <kmcmartin at redhat.com> 2.6.38-0.rc2.git3.1
 - Linux 2.6.38-rc2-git3
 - perf-gcc460-build-fixes.patch: fix context from [9486aa38]
diff --git a/xen.next-2.6.38.patch b/xen.next-2.6.38.patch
new file mode 100644
index 0000000..c8a28e1
--- /dev/null
+++ b/xen.next-2.6.38.patch
@@ -0,0 +1,30821 @@
+From 1e13f505ecbc011465783283ebfa05a42f7ce18f Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ijc at hellion.org.uk>
+Date: Thu, 3 Dec 2009 22:04:06 +0000
+Subject: [PATCH 001/139] xen: export xen_gsi_from_irq, it is required by modular pciback
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/events.c |    1 +
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 97612f5..a04da4b 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -778,6 +778,7 @@ int xen_gsi_from_irq(unsigned irq)
+ {
+ 	return gsi_from_irq(irq);
+ }
++EXPORT_SYMBOL_GPL(xen_gsi_from_irq);
+ 
+ int xen_irq_from_pirq(unsigned pirq)
+ {
+-- 
+1.7.3.4
+
+
+From f0885b9401a859bc7bed849925a703c03d00119b Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 8 Nov 2010 14:13:35 -0500
+Subject: [PATCH 002/139] xen/pci: Add xen_[find|register|unregister]_device_domain_owner functions.
+
+Xen PCI backend performs ownership (MSI/MSI-X) changes on the behalf of
+the guest. This means we need some mechanism to find, set and unset
+the domain id of the guest.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ arch/x86/include/asm/xen/pci.h |   16 +++++++++
+ arch/x86/pci/xen.c             |   73 ++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 89 insertions(+), 0 deletions(-)
+
+diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
+index 2329b3e..8474b4b 100644
+--- a/arch/x86/include/asm/xen/pci.h
++++ b/arch/x86/include/asm/xen/pci.h
+@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void)
+ #endif
+ #if defined(CONFIG_XEN_DOM0)
+ void __init xen_setup_pirqs(void);
++int xen_find_device_domain_owner(struct pci_dev *dev);
++int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
++int xen_unregister_device_domain_owner(struct pci_dev *dev);
+ #else
+ static inline void __init xen_setup_pirqs(void)
+ {
+ }
++static inline int xen_find_device_domain_owner(struct pci_dev *dev)
++{
++	return -1;
++}
++static inline int xen_register_device_domain_owner(struct pci_dev *dev,
++ 						   uint16_t domain)
++{
++	return -1;
++}
++static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
++{
++	return -1;
++}
+ #endif
+ 
+ #if defined(CONFIG_PCI_MSI)
+diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
+index 117f5b8..6d2a986 100644
+--- a/arch/x86/pci/xen.c
++++ b/arch/x86/pci/xen.c
+@@ -412,3 +412,76 @@ void __init xen_setup_pirqs(void)
+ 	}
+ }
+ #endif
++
++struct xen_device_domain_owner {
++	domid_t domain;
++	struct pci_dev *dev;
++	struct list_head list;
++};
++
++static DEFINE_SPINLOCK(dev_domain_list_spinlock);
++static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
++
++static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
++{
++	struct xen_device_domain_owner *owner;
++
++	list_for_each_entry(owner, &dev_domain_list, list) {
++		if (owner->dev == dev)
++			return owner;
++	}
++	return NULL;
++}
++
++int xen_find_device_domain_owner(struct pci_dev *dev)
++{
++	struct xen_device_domain_owner *owner;
++	int domain = -ENODEV;
++
++	spin_lock(&dev_domain_list_spinlock);
++	owner = find_device(dev);
++	if (owner)
++		domain = owner->domain;
++	spin_unlock(&dev_domain_list_spinlock);
++	return domain;
++}
++EXPORT_SYMBOL(xen_find_device_domain_owner);
++
++int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
++{
++	struct xen_device_domain_owner *owner;
++
++	owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
++	if (!owner)
++		return -ENODEV;
++
++	spin_lock(&dev_domain_list_spinlock);
++	if (find_device(dev)) {
++		spin_unlock(&dev_domain_list_spinlock);
++		kfree(owner);
++		return -EEXIST;
++	}
++	owner->domain = domain;
++	owner->dev = dev;
++	list_add_tail(&owner->list, &dev_domain_list);
++	spin_unlock(&dev_domain_list_spinlock);
++	return 0;
++}
++EXPORT_SYMBOL(xen_register_device_domain_owner);
++
++int xen_unregister_device_domain_owner(struct pci_dev *dev)
++{
++	struct xen_device_domain_owner *owner;
++
++	spin_lock(&dev_domain_list_spinlock);
++	owner = find_device(dev);
++	if (!owner) {
++		spin_unlock(&dev_domain_list_spinlock);
++		return -ENODEV;
++	}
++	list_del(&owner->list);
++	spin_unlock(&dev_domain_list_spinlock);
++	kfree(owner);
++	return 0;
++}
++EXPORT_SYMBOL(xen_unregister_device_domain_owner);
+-- 
+1.7.3.4
+
+
+From da24916fdf04d7b4a32c5b9d2c09e47775496e1d Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 8 Nov 2010 14:23:17 -0500
+Subject: [PATCH 003/139] xen: Check if the PCI device is owned by a domain different than DOMID_SELF.
+
+We check if there is a domain owner for the PCI device. In case of failure
+(meaning no domain has registered for this device) we make
+DOMID_SELF the owner.
+
+[v2: deal with rebasing on v2.6.37-1]
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Acked-by: Xiantao Zhang <xiantao.zhang at intel.com>
+---
+ drivers/xen/events.c |   16 +++++++++++++---
+ 1 files changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index a04da4b..96c93e7 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -40,6 +40,7 @@
+ #include <asm/xen/pci.h>
+ #include <asm/xen/hypercall.h>
+ #include <asm/xen/hypervisor.h>
++#include <asm/xen/pci.h>
+ 
+ #include <xen/xen.h>
+ #include <xen/hvm.h>
+@@ -97,6 +98,7 @@ struct irq_info
+ 			unsigned short gsi;
+ 			unsigned char vector;
+ 			unsigned char flags;
++			uint16_t domid;
+ 		} pirq;
+ 	} u;
+ };
+@@ -158,7 +160,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq,
+ {
+ 	return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
+ 			.cpu = 0,
+-			.u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } };
++			.u.pirq = { .pirq = pirq, .gsi = gsi,
++				     .vector = vector, .domid = DOMID_SELF } };
+ }
+ 
+ /*
+@@ -688,11 +691,16 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+ 	int irq = -1;
+ 	struct physdev_map_pirq map_irq;
+ 	int rc;
++	domid_t domid;
+ 	int pos;
+ 	u32 table_offset, bir;
+ 
++	domid = rc = xen_find_device_domain_owner(dev);
++	if (rc < 0)
++		domid = DOMID_SELF;
++	
+ 	memset(&map_irq, 0, sizeof(map_irq));
+-	map_irq.domid = DOMID_SELF;
++	map_irq.domid = domid;
+ 	map_irq.type = MAP_PIRQ_TYPE_MSI;
+ 	map_irq.index = -1;
+ 	map_irq.pirq = -1;
+@@ -727,6 +735,8 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+ 		goto out;
+ 	}
+ 	irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
++	if (domid)
++		irq_info[irq].u.pirq.domid = domid;
+ 
+ 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
+ 			handle_level_irq,
+@@ -753,7 +763,7 @@ int xen_destroy_irq(int irq)
+ 
+ 	if (xen_initial_domain()) {
+ 		unmap_irq.pirq = info->u.pirq.pirq;
+-		unmap_irq.domid = DOMID_SELF;
++		unmap_irq.domid = info->u.pirq.domid;
+ 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
+ 		if (rc) {
+ 			printk(KERN_WARNING "unmap irq failed %d\n", rc);
+-- 
+1.7.3.4
+
+
+From 30fecb8166bdd163bdaab795b573cf988f60fbbe Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 8 Nov 2010 14:26:36 -0500
+Subject: [PATCH 004/139] xen: Add support to check if IRQ line is shared with other domains.
+
+We do this via the PHYSDEVOP_irq_status_query support hypervisor call.
+We will get a positive value if another domain has binded its
+PIRQ to the specified IRQ line.
+
+[v2: Deal with v2.6.37-rc1 rebase fallout]
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/events.c |   13 +++++++++++++
+ include/xen/events.h |    3 +++
+ 2 files changed, 16 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 96c93e7..690dfad 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -1398,6 +1398,19 @@ void xen_poll_irq(int irq)
+ 	xen_poll_irq_timeout(irq, 0 /* no timeout */);
+ }
+ 
++/* Check whether the IRQ line is shared with other guests. */
++int xen_ignore_irq(int irq)
++{
++	struct irq_info *info = info_for_irq(irq);
++	struct physdev_irq_status_query irq_status = { .irq =
++							info->u.pirq.gsi };
++
++	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
++		return 0;
++	return !(irq_status.flags & XENIRQSTAT_shared);
++}
++EXPORT_SYMBOL_GPL(xen_ignore_irq);
++
+ void xen_irq_resume(void)
+ {
+ 	unsigned int cpu, irq, evtchn;
+diff --git a/include/xen/events.h b/include/xen/events.h
+index 646dd17..553c664 100644
+--- a/include/xen/events.h
++++ b/include/xen/events.h
+@@ -89,4 +89,7 @@ int xen_vector_from_irq(unsigned pirq);
+ /* Return irq from pirq */
+ int xen_irq_from_pirq(unsigned pirq);
+ 
++/* Determine whether to ignore this IRQ if passed to a guest. */
++int xen_ignore_irq(int irq);
++
+ #endif	/* _XEN_EVENTS_H */
+-- 
+1.7.3.4
+
+
+From 909e45104de4414897cefce2f6bbed07fc4de4b3 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Mon, 9 Feb 2009 12:05:50 -0800
+Subject: [PATCH 005/139] xen: implement bind_interdomain_evtchn_to_irqhandler for backend drivers
+
+Impact: new Xen-internal API
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/events.c |   38 ++++++++++++++++++++++++++++++++++++++
+ include/xen/events.h |    6 ++++++
+ 2 files changed, 44 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 690dfad..95eea13 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -849,6 +849,21 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ 	return irq;
+ }
+ 
++static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
++                                          unsigned int remote_port)
++{
++        struct evtchn_bind_interdomain bind_interdomain;
++        int err;
++
++        bind_interdomain.remote_dom  = remote_domain;
++        bind_interdomain.remote_port = remote_port;
++
++        err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
++                                          &bind_interdomain);
++
++        return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
++}
++
+ 
+ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+ {
+@@ -944,6 +959,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
+ }
+ EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
+ 
++int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
++					  unsigned int remote_port,
++					  irq_handler_t handler,
++					  unsigned long irqflags,
++					  const char *devname,
++					  void *dev_id)
++{
++        int irq, retval;
++
++        irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
++        if (irq < 0)
++                return irq;
++
++        retval = request_irq(irq, handler, irqflags, devname, dev_id);
++        if (retval != 0) {
++                unbind_from_irq(irq);
++                return retval;
++        }
++
++        return irq;
++}
++EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
++
+ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
+ 			    irq_handler_t handler,
+ 			    unsigned long irqflags, const char *devname, void *dev_id)
+diff --git a/include/xen/events.h b/include/xen/events.h
+index 553c664..2fe1644 100644
+--- a/include/xen/events.h
++++ b/include/xen/events.h
+@@ -23,6 +23,12 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
+ 			   unsigned long irqflags,
+ 			   const char *devname,
+ 			   void *dev_id);
++int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
++					  unsigned int remote_port,
++					  irq_handler_t handler,
++					  unsigned long irqflags,
++					  const char *devname,
++					  void *dev_id);
+ 
+ /*
+  * Common unbind function for all event sources. Takes IRQ to unbind from.
+-- 
+1.7.3.4
+
+
+From b4f664c8de09ab8537e1cd194df29056f803062e Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 8 Nov 2010 14:46:33 -0500
+Subject: [PATCH 006/139] pci/xen: Make xen_[find|register|unregister]_domain_owner be _GPL
+
+EXPORT_SYMBOL -> EXPORT_SYMBOL_GPL.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ arch/x86/pci/xen.c |    6 +++---
+ 1 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
+index 6d2a986..0fa23c8 100644
+--- a/arch/x86/pci/xen.c
++++ b/arch/x86/pci/xen.c
+@@ -445,7 +445,7 @@ int xen_find_device_domain_owner(struct pci_dev *dev)
+ 	spin_unlock(&dev_domain_list_spinlock);
+ 	return domain;
+ }
+-EXPORT_SYMBOL(xen_find_device_domain_owner);
++EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
+ 
+ int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
+ {
+@@ -467,7 +467,7 @@ int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
+ 	spin_unlock(&dev_domain_list_spinlock);
+ 	return 0;
+ }
+-EXPORT_SYMBOL(xen_register_device_domain_owner);
++EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
+ 
+ int xen_unregister_device_domain_owner(struct pci_dev *dev)
+ {
+@@ -484,4 +484,4 @@ int xen_unregister_device_domain_owner(struct pci_dev *dev)
+ 	kfree(owner);
+ 	return 0;
+ }
+-EXPORT_SYMBOL(xen_unregister_device_domain_owner);
++EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
+-- 
+1.7.3.4
+
+
+From 443b2aafbdb509f218fcb8f4665f063e3a5e1a92 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:20 -0400
+Subject: [PATCH 007/139] xen-pciback: Initial copy from linux-2.6.18.hg off pciback driver.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/Makefile                    |   17 +
+ drivers/xen/pciback/conf_space.c                |  435 ++++++++
+ drivers/xen/pciback/conf_space.h                |  126 +++
+ drivers/xen/pciback/conf_space_capability.c     |   69 ++
+ drivers/xen/pciback/conf_space_capability.h     |   23 +
+ drivers/xen/pciback/conf_space_capability_msi.c |   79 ++
+ drivers/xen/pciback/conf_space_capability_pm.c  |  126 +++
+ drivers/xen/pciback/conf_space_capability_vpd.c |   40 +
+ drivers/xen/pciback/conf_space_header.c         |  317 ++++++
+ drivers/xen/pciback/conf_space_quirks.c         |  138 +++
+ drivers/xen/pciback/conf_space_quirks.h         |   35 +
+ drivers/xen/pciback/controller.c                |  443 ++++++++
+ drivers/xen/pciback/passthrough.c               |  176 +++
+ drivers/xen/pciback/pci_stub.c                  | 1316 +++++++++++++++++++++++
+ drivers/xen/pciback/pciback.h                   |  126 +++
+ drivers/xen/pciback/pciback_ops.c               |  134 +++
+ drivers/xen/pciback/slot.c                      |  187 ++++
+ drivers/xen/pciback/vpci.c                      |  242 +++++
+ drivers/xen/pciback/xenbus.c                    |  710 ++++++++++++
+ 19 files changed, 4739 insertions(+), 0 deletions(-)
+ create mode 100644 drivers/xen/pciback/Makefile
+ create mode 100644 drivers/xen/pciback/conf_space.c
+ create mode 100644 drivers/xen/pciback/conf_space.h
+ create mode 100644 drivers/xen/pciback/conf_space_capability.c
+ create mode 100644 drivers/xen/pciback/conf_space_capability.h
+ create mode 100644 drivers/xen/pciback/conf_space_capability_msi.c
+ create mode 100644 drivers/xen/pciback/conf_space_capability_pm.c
+ create mode 100644 drivers/xen/pciback/conf_space_capability_vpd.c
+ create mode 100644 drivers/xen/pciback/conf_space_header.c
+ create mode 100644 drivers/xen/pciback/conf_space_quirks.c
+ create mode 100644 drivers/xen/pciback/conf_space_quirks.h
+ create mode 100644 drivers/xen/pciback/controller.c
+ create mode 100644 drivers/xen/pciback/passthrough.c
+ create mode 100644 drivers/xen/pciback/pci_stub.c
+ create mode 100644 drivers/xen/pciback/pciback.h
+ create mode 100644 drivers/xen/pciback/pciback_ops.c
+ create mode 100644 drivers/xen/pciback/slot.c
+ create mode 100644 drivers/xen/pciback/vpci.c
+ create mode 100644 drivers/xen/pciback/xenbus.c
+
+diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile
+new file mode 100644
+index 0000000..106dae7
+--- /dev/null
++++ b/drivers/xen/pciback/Makefile
+@@ -0,0 +1,17 @@
++obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
++
++pciback-y := pci_stub.o pciback_ops.o xenbus.o
++pciback-y += conf_space.o conf_space_header.o \
++	     conf_space_capability.o \
++	     conf_space_capability_vpd.o \
++	     conf_space_capability_pm.o \
++             conf_space_quirks.o
++pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
++
++ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
++EXTRA_CFLAGS += -DDEBUG
++endif
+diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
+new file mode 100644
+index 0000000..0c76db1
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space.c
+@@ -0,0 +1,435 @@
++/*
++ * PCI Backend - Functions for creating a virtual configuration space for
++ *               exported PCI Devices.
++ *               It's dangerous to allow PCI Driver Domains to change their
++ *               device's resources (memory, i/o ports, interrupts). We need to
++ *               restrict changes to certain PCI Configuration registers:
++ *               BARs, INTERRUPT_PIN, most registers in the header...
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_quirks.h"
++
++static int permissive;
++module_param(permissive, bool, 0644);
++
++#define DEFINE_PCI_CONFIG(op,size,type) 			\
++int pciback_##op##_config_##size 				\
++(struct pci_dev *dev, int offset, type value, void *data)	\
++{								\
++	return pci_##op##_config_##size (dev, offset, value);	\
++}
++
++DEFINE_PCI_CONFIG(read, byte, u8 *)
++DEFINE_PCI_CONFIG(read, word, u16 *)
++DEFINE_PCI_CONFIG(read, dword, u32 *)
++
++DEFINE_PCI_CONFIG(write, byte, u8)
++DEFINE_PCI_CONFIG(write, word, u16)
++DEFINE_PCI_CONFIG(write, dword, u32)
++
++static int conf_space_read(struct pci_dev *dev,
++			   const struct config_field_entry *entry,
++			   int offset, u32 *value)
++{
++	int ret = 0;
++	const struct config_field *field = entry->field;
++
++	*value = 0;
++
++	switch (field->size) {
++	case 1:
++		if (field->u.b.read)
++			ret = field->u.b.read(dev, offset, (u8 *) value,
++					      entry->data);
++		break;
++	case 2:
++		if (field->u.w.read)
++			ret = field->u.w.read(dev, offset, (u16 *) value,
++					      entry->data);
++		break;
++	case 4:
++		if (field->u.dw.read)
++			ret = field->u.dw.read(dev, offset, value, entry->data);
++		break;
++	}
++	return ret;
++}
++
++static int conf_space_write(struct pci_dev *dev,
++			    const struct config_field_entry *entry,
++			    int offset, u32 value)
++{
++	int ret = 0;
++	const struct config_field *field = entry->field;
++
++	switch (field->size) {
++	case 1:
++		if (field->u.b.write)
++			ret = field->u.b.write(dev, offset, (u8) value,
++					       entry->data);
++		break;
++	case 2:
++		if (field->u.w.write)
++			ret = field->u.w.write(dev, offset, (u16) value,
++					       entry->data);
++		break;
++	case 4:
++		if (field->u.dw.write)
++			ret = field->u.dw.write(dev, offset, value,
++						entry->data);
++		break;
++	}
++	return ret;
++}
++
++static inline u32 get_mask(int size)
++{
++	if (size == 1)
++		return 0xff;
++	else if (size == 2)
++		return 0xffff;
++	else
++		return 0xffffffff;
++}
++
++static inline int valid_request(int offset, int size)
++{
++	/* Validate request (no un-aligned requests) */
++	if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
++		return 1;
++	return 0;
++}
++
++static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
++			      int offset)
++{
++	if (offset >= 0) {
++		new_val_mask <<= (offset * 8);
++		new_val <<= (offset * 8);
++	} else {
++		new_val_mask >>= (offset * -8);
++		new_val >>= (offset * -8);
++	}
++	val = (val & ~new_val_mask) | (new_val & new_val_mask);
++
++	return val;
++}
++
++static int pcibios_err_to_errno(int err)
++{
++	switch (err) {
++	case PCIBIOS_SUCCESSFUL:
++		return XEN_PCI_ERR_success;
++	case PCIBIOS_DEVICE_NOT_FOUND:
++		return XEN_PCI_ERR_dev_not_found;
++	case PCIBIOS_BAD_REGISTER_NUMBER:
++		return XEN_PCI_ERR_invalid_offset;
++	case PCIBIOS_FUNC_NOT_SUPPORTED:
++		return XEN_PCI_ERR_not_implemented;
++	case PCIBIOS_SET_FAILED:
++		return XEN_PCI_ERR_access_denied;
++	}
++	return err;
++}
++
++int pciback_config_read(struct pci_dev *dev, int offset, int size,
++			u32 * ret_val)
++{
++	int err = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	const struct config_field_entry *cfg_entry;
++	const struct config_field *field;
++	int req_start, req_end, field_start, field_end;
++	/* if read fails for any reason, return 0 (as if device didn't respond) */
++	u32 value = 0, tmp_val;
++
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n",
++		       pci_name(dev), size, offset);
++
++	if (!valid_request(offset, size)) {
++		err = XEN_PCI_ERR_invalid_offset;
++		goto out;
++	}
++
++	/* Get the real value first, then modify as appropriate */
++	switch (size) {
++	case 1:
++		err = pci_read_config_byte(dev, offset, (u8 *) & value);
++		break;
++	case 2:
++		err = pci_read_config_word(dev, offset, (u16 *) & value);
++		break;
++	case 4:
++		err = pci_read_config_dword(dev, offset, &value);
++		break;
++	}
++
++	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++		field = cfg_entry->field;
++
++		req_start = offset;
++		req_end = offset + size;
++		field_start = OFFSET(cfg_entry);
++		field_end = OFFSET(cfg_entry) + field->size;
++
++		if ((req_start >= field_start && req_start < field_end)
++		    || (req_end > field_start && req_end <= field_end)) {
++			err = conf_space_read(dev, cfg_entry, field_start,
++					      &tmp_val);
++			if (err)
++				goto out;
++
++			value = merge_value(value, tmp_val,
++					    get_mask(field->size),
++					    field_start - req_start);
++		}
++	}
++
++      out:
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
++		       pci_name(dev), size, offset, value);
++
++	*ret_val = value;
++	return pcibios_err_to_errno(err);
++}
++
++int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
++{
++	int err = 0, handled = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	const struct config_field_entry *cfg_entry;
++	const struct config_field *field;
++	u32 tmp_val;
++	int req_start, req_end, field_start, field_end;
++
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG
++		       "pciback: %s: write request %d bytes at 0x%x = %x\n",
++		       pci_name(dev), size, offset, value);
++
++	if (!valid_request(offset, size))
++		return XEN_PCI_ERR_invalid_offset;
++
++	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++		field = cfg_entry->field;
++
++		req_start = offset;
++		req_end = offset + size;
++		field_start = OFFSET(cfg_entry);
++		field_end = OFFSET(cfg_entry) + field->size;
++
++		if ((req_start >= field_start && req_start < field_end)
++		    || (req_end > field_start && req_end <= field_end)) {
++			tmp_val = 0;
++
++			err = pciback_config_read(dev, field_start,
++						  field->size, &tmp_val);
++			if (err)
++				break;
++
++			tmp_val = merge_value(tmp_val, value, get_mask(size),
++					      req_start - field_start);
++
++			err = conf_space_write(dev, cfg_entry, field_start,
++					       tmp_val);
++
++			/* handled is set true here, but not every byte
++			 * may have been written! Properly detecting if
++			 * every byte is handled is unnecessary as the
++			 * flag is used to detect devices that need
++			 * special helpers to work correctly.
++			 */
++			handled = 1;
++		}
++	}
++
++	if (!handled && !err) {
++		/* By default, anything not specificially handled above is
++		 * read-only. The permissive flag changes this behavior so
++		 * that anything not specifically handled above is writable.
++		 * This means that some fields may still be read-only because
++		 * they have entries in the config_field list that intercept
++		 * the write and do nothing. */
++		if (dev_data->permissive || permissive) {
++			switch (size) {
++			case 1:
++				err = pci_write_config_byte(dev, offset,
++							    (u8) value);
++				break;
++			case 2:
++				err = pci_write_config_word(dev, offset,
++							    (u16) value);
++				break;
++			case 4:
++				err = pci_write_config_dword(dev, offset,
++							     (u32) value);
++				break;
++			}
++		} else if (!dev_data->warned_on_write) {
++			dev_data->warned_on_write = 1;
++			dev_warn(&dev->dev, "Driver tried to write to a "
++				 "read-only configuration space field at offset "
++				 "0x%x, size %d. This may be harmless, but if "
++				 "you have problems with your device:\n"
++				 "1) see permissive attribute in sysfs\n"
++				 "2) report problems to the xen-devel "
++				 "mailing list along with details of your "
++				 "device obtained from lspci.\n", offset, size);
++		}
++	}
++
++	return pcibios_err_to_errno(err);
++}
++
++void pciback_config_free_dyn_fields(struct pci_dev *dev)
++{
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry, *t;
++	const struct config_field *field;
++
++	dev_dbg(&dev->dev,
++		"free-ing dynamically allocated virtual configuration space fields\n");
++	if (!dev_data)
++		return;
++
++	list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
++		field = cfg_entry->field;
++
++		if (field->clean) {
++			field->clean((struct config_field *)field);
++
++			if (cfg_entry->data)
++				kfree(cfg_entry->data);
++
++			list_del(&cfg_entry->list);
++			kfree(cfg_entry);
++		}
++
++	}
++}
++
++void pciback_config_reset_dev(struct pci_dev *dev)
++{
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	const struct config_field_entry *cfg_entry;
++	const struct config_field *field;
++
++	dev_dbg(&dev->dev, "resetting virtual configuration space\n");
++	if (!dev_data)
++		return;
++
++	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++		field = cfg_entry->field;
++
++		if (field->reset)
++			field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
++	}
++}
++
++void pciback_config_free_dev(struct pci_dev *dev)
++{
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry, *t;
++	const struct config_field *field;
++
++	dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
++	if (!dev_data)
++		return;
++
++	list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
++		list_del(&cfg_entry->list);
++
++		field = cfg_entry->field;
++
++		if (field->release)
++			field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
++
++		kfree(cfg_entry);
++	}
++}
++
++int pciback_config_add_field_offset(struct pci_dev *dev,
++				    const struct config_field *field,
++				    unsigned int base_offset)
++{
++	int err = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry;
++	void *tmp;
++
++	cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
++	if (!cfg_entry) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	cfg_entry->data = NULL;
++	cfg_entry->field = field;
++	cfg_entry->base_offset = base_offset;
++
++	/* silently ignore duplicate fields */
++	err = pciback_field_is_dup(dev,OFFSET(cfg_entry));
++	if (err)
++		goto out;
++
++	if (field->init) {
++		tmp = field->init(dev, OFFSET(cfg_entry));
++
++		if (IS_ERR(tmp)) {
++			err = PTR_ERR(tmp);
++			goto out;
++		}
++
++		cfg_entry->data = tmp;
++	}
++
++	dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
++		OFFSET(cfg_entry));
++	list_add_tail(&cfg_entry->list, &dev_data->config_fields);
++
++      out:
++	if (err)
++		kfree(cfg_entry);
++
++	return err;
++}
++
++/* This sets up the device's virtual configuration space to keep track of 
++ * certain registers (like the base address registers (BARs) so that we can
++ * keep the client from manipulating them directly.
++ */
++int pciback_config_init_dev(struct pci_dev *dev)
++{
++	int err = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++
++	dev_dbg(&dev->dev, "initializing virtual configuration space\n");
++
++	INIT_LIST_HEAD(&dev_data->config_fields);
++
++	err = pciback_config_header_add_fields(dev);
++	if (err)
++		goto out;
++
++	err = pciback_config_capability_add_fields(dev);
++	if (err)
++		goto out;
++
++	err = pciback_config_quirks_init(dev);
++
++      out:
++	return err;
++}
++
++int pciback_config_init(void)
++{
++	return pciback_config_capability_init();
++}
+diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h
+new file mode 100644
+index 0000000..fe746ef
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space.h
+@@ -0,0 +1,126 @@
++/*
++ * PCI Backend - Common data structures for overriding the configuration space
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#ifndef __XEN_PCIBACK_CONF_SPACE_H__
++#define __XEN_PCIBACK_CONF_SPACE_H__
++
++#include <linux/list.h>
++#include <linux/err.h>
++
++/* conf_field_init can return an errno in a ptr with ERR_PTR() */
++typedef void *(*conf_field_init) (struct pci_dev * dev, int offset);
++typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data);
++typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data);
++
++typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value,
++				 void *data);
++typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value,
++				void *data);
++typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value,
++				void *data);
++typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value,
++				void *data);
++typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value,
++			       void *data);
++typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value,
++			       void *data);
++
++/* These are the fields within the configuration space which we
++ * are interested in intercepting reads/writes to and changing their
++ * values.
++ */
++struct config_field {
++	unsigned int offset;
++	unsigned int size;
++	unsigned int mask;
++	conf_field_init init;
++	conf_field_reset reset;
++	conf_field_free release;
++	void (*clean) (struct config_field * field);
++	union {
++		struct {
++			conf_dword_write write;
++			conf_dword_read read;
++		} dw;
++		struct {
++			conf_word_write write;
++			conf_word_read read;
++		} w;
++		struct {
++			conf_byte_write write;
++			conf_byte_read read;
++		} b;
++	} u;
++	struct list_head list;
++};
++
++struct config_field_entry {
++	struct list_head list;
++	const struct config_field *field;
++	unsigned int base_offset;
++	void *data;
++};
++
++#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
++
++/* Add fields to a device - the add_fields macro expects to get a pointer to
++ * the first entry in an array (of which the ending is marked by size==0)
++ */
++int pciback_config_add_field_offset(struct pci_dev *dev,
++				    const struct config_field *field,
++				    unsigned int offset);
++
++static inline int pciback_config_add_field(struct pci_dev *dev,
++					   const struct config_field *field)
++{
++	return pciback_config_add_field_offset(dev, field, 0);
++}
++
++static inline int pciback_config_add_fields(struct pci_dev *dev,
++					    const struct config_field *field)
++{
++	int i, err = 0;
++	for (i = 0; field[i].size != 0; i++) {
++		err = pciback_config_add_field(dev, &field[i]);
++		if (err)
++			break;
++	}
++	return err;
++}
++
++static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
++						   const struct config_field *field,
++						   unsigned int offset)
++{
++	int i, err = 0;
++	for (i = 0; field[i].size != 0; i++) {
++		err = pciback_config_add_field_offset(dev, &field[i], offset);
++		if (err)
++			break;
++	}
++	return err;
++}
++
++/* Read/Write the real configuration space */
++int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value,
++			     void *data);
++int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value,
++			     void *data);
++int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value,
++			      void *data);
++int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
++			      void *data);
++int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value,
++			      void *data);
++int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value,
++			       void *data);
++
++int pciback_config_capability_init(void);
++
++int pciback_config_header_add_fields(struct pci_dev *dev);
++int pciback_config_capability_add_fields(struct pci_dev *dev);
++
++#endif				/* __XEN_PCIBACK_CONF_SPACE_H__ */
+diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c
+new file mode 100644
+index 0000000..50efca4
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability.c
+@@ -0,0 +1,69 @@
++/*
++ * PCI Backend - Handles the virtual fields found on the capability lists
++ *               in the configuration space.
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_capability.h"
++
++static LIST_HEAD(capabilities);
++
++static const struct config_field caplist_header[] = {
++	{
++	 .offset    = PCI_CAP_LIST_ID,
++	 .size      = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
++	 .u.w.read  = pciback_read_config_word,
++	 .u.w.write = NULL,
++	},
++	{}
++};
++
++static inline void register_capability(struct pciback_config_capability *cap)
++{
++	list_add_tail(&cap->cap_list, &capabilities);
++}
++
++int pciback_config_capability_add_fields(struct pci_dev *dev)
++{
++	int err = 0;
++	struct pciback_config_capability *cap;
++	int cap_offset;
++
++	list_for_each_entry(cap, &capabilities, cap_list) {
++		cap_offset = pci_find_capability(dev, cap->capability);
++		if (cap_offset) {
++			dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
++				cap->capability, cap_offset);
++
++			err = pciback_config_add_fields_offset(dev,
++							       caplist_header,
++							       cap_offset);
++			if (err)
++				goto out;
++			err = pciback_config_add_fields_offset(dev,
++							       cap->fields,
++							       cap_offset);
++			if (err)
++				goto out;
++		}
++	}
++
++      out:
++	return err;
++}
++
++extern struct pciback_config_capability pciback_config_capability_vpd;
++extern struct pciback_config_capability pciback_config_capability_pm;
++
++int pciback_config_capability_init(void)
++{
++	register_capability(&pciback_config_capability_vpd);
++	register_capability(&pciback_config_capability_pm);
++
++	return 0;
++}
+diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h
+new file mode 100644
+index 0000000..823392e
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability.h
+@@ -0,0 +1,23 @@
++/*
++ * PCI Backend - Data structures for special overlays for structures on
++ *               the capability list.
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#ifndef __PCIBACK_CONFIG_CAPABILITY_H__
++#define __PCIBACK_CONFIG_CAPABILITY_H__
++
++#include <linux/pci.h>
++#include <linux/list.h>
++
++struct pciback_config_capability {
++	struct list_head cap_list;
++
++	int capability;
++
++	/* If the device has the capability found above, add these fields */
++	const struct config_field *fields;
++};
++
++#endif
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+new file mode 100644
+index 0000000..762e396
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -0,0 +1,79 @@
++/*
++ * PCI Backend -- Configuration overlay for MSI capability
++ */
++#include <linux/pci.h>
++#include <linux/slab.h>
++#include "conf_space.h"
++#include "conf_space_capability.h"
++#include <xen/interface/io/pciif.h>
++#include "pciback.h"
++
++int pciback_enable_msi(struct pciback_device *pdev,
++		struct pci_dev *dev, struct xen_pci_op *op)
++{
++	int otherend = pdev->xdev->otherend_id;
++	int status;
++
++	status = pci_enable_msi(dev);
++
++	if (status) {
++		printk("error enable msi for guest %x status %x\n", otherend, status);
++		op->value = 0;
++		return XEN_PCI_ERR_op_failed;
++	}
++
++	op->value = dev->irq;
++	return 0;
++}
++
++int pciback_disable_msi(struct pciback_device *pdev,
++		struct pci_dev *dev, struct xen_pci_op *op)
++{
++	pci_disable_msi(dev);
++
++	op->value = dev->irq;
++	return 0;
++}
++
++int pciback_enable_msix(struct pciback_device *pdev,
++		struct pci_dev *dev, struct xen_pci_op *op)
++{
++	int i, result;
++	struct msix_entry *entries;
++
++	if (op->value > SH_INFO_MAX_VEC)
++		return -EINVAL;
++
++	entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
++	if (entries == NULL)
++		return -ENOMEM;
++
++	for (i = 0; i < op->value; i++) {
++		entries[i].entry = op->msix_entries[i].entry;
++		entries[i].vector = op->msix_entries[i].vector;
++	}
++
++	result = pci_enable_msix(dev, entries, op->value);
++
++	for (i = 0; i < op->value; i++) {
++		op->msix_entries[i].entry = entries[i].entry;
++		op->msix_entries[i].vector = entries[i].vector;
++	}
++
++	kfree(entries);
++
++	op->value = result;
++
++	return result;
++}
++
++int pciback_disable_msix(struct pciback_device *pdev,
++		struct pci_dev *dev, struct xen_pci_op *op)
++{
++
++	pci_disable_msix(dev);
++
++	op->value = dev->irq;
++	return 0;
++}
++
+diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
+new file mode 100644
+index 0000000..e2f99c7
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability_pm.c
+@@ -0,0 +1,126 @@
++/*
++ * PCI Backend - Configuration space overlay for power management
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/pci.h>
++#include "conf_space.h"
++#include "conf_space_capability.h"
++
++static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
++			void *data)
++{
++	int err;
++	u16 real_value;
++
++	err = pci_read_config_word(dev, offset, &real_value);
++	if (err)
++		goto out;
++
++	*value = real_value & ~PCI_PM_CAP_PME_MASK;
++
++      out:
++	return err;
++}
++
++/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
++ * Can't allow driver domain to enable PMEs - they're shared */
++#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
++
++static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
++			 void *data)
++{
++	int err;
++	u16 old_value;
++	pci_power_t new_state, old_state;
++
++	err = pci_read_config_word(dev, offset, &old_value);
++	if (err)
++		goto out;
++
++	old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
++	new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
++
++	new_value &= PM_OK_BITS;
++	if ((old_value & PM_OK_BITS) != new_value) {
++		new_value = (old_value & ~PM_OK_BITS) | new_value;
++		err = pci_write_config_word(dev, offset, new_value);
++		if (err)
++			goto out;
++	}
++
++	/* Let pci core handle the power management change */
++	dev_dbg(&dev->dev, "set power state to %x\n", new_state);
++	err = pci_set_power_state(dev, new_state);
++	if (err) {
++		err = PCIBIOS_SET_FAILED;
++		goto out;
++	}
++
++	/*
++	 * Device may lose PCI config info on D3->D0 transition. This
++	 * is a problem for some guests which will not reset BARs. Even
++	 * those that have a go will be foiled by our BAR-write handler
++	 * which will discard the write! Since Linux won't re-init
++	 * the config space automatically in all cases, we do it here.
++	 * Future: Should we re-initialise all first 64 bytes of config space?
++	 */
++	if (new_state == PCI_D0 &&
++	    (old_state == PCI_D3hot || old_state == PCI_D3cold) &&
++	    !(old_value & PCI_PM_CTRL_NO_SOFT_RESET))
++		pci_restore_bars(dev);
++
++ out:
++	return err;
++}
++
++/* Ensure PMEs are disabled */
++static void *pm_ctrl_init(struct pci_dev *dev, int offset)
++{
++	int err;
++	u16 value;
++
++	err = pci_read_config_word(dev, offset, &value);
++	if (err)
++		goto out;
++
++	if (value & PCI_PM_CTRL_PME_ENABLE) {
++		value &= ~PCI_PM_CTRL_PME_ENABLE;
++		err = pci_write_config_word(dev, offset, value);
++	}
++
++      out:
++	return ERR_PTR(err);
++}
++
++static const struct config_field caplist_pm[] = {
++	{
++		.offset     = PCI_PM_PMC,
++		.size       = 2,
++		.u.w.read   = pm_caps_read,
++	},
++	{
++		.offset     = PCI_PM_CTRL,
++		.size       = 2,
++		.init       = pm_ctrl_init,
++		.u.w.read   = pciback_read_config_word,
++		.u.w.write  = pm_ctrl_write,
++	},
++	{
++		.offset     = PCI_PM_PPB_EXTENSIONS,
++		.size       = 1,
++		.u.b.read   = pciback_read_config_byte,
++	},
++	{
++		.offset     = PCI_PM_DATA_REGISTER,
++		.size       = 1,
++		.u.b.read   = pciback_read_config_byte,
++	},
++	{}
++};
++
++struct pciback_config_capability pciback_config_capability_pm = {
++	.capability = PCI_CAP_ID_PM,
++	.fields = caplist_pm,
++};
+diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c
+new file mode 100644
+index 0000000..920cb4a
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability_vpd.c
+@@ -0,0 +1,40 @@
++/*
++ * PCI Backend - Configuration space overlay for Vital Product Data
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/pci.h>
++#include "conf_space.h"
++#include "conf_space_capability.h"
++
++static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
++			     void *data)
++{
++	/* Disallow writes to the vital product data */
++	if (value & PCI_VPD_ADDR_F)
++		return PCIBIOS_SET_FAILED;
++	else
++		return pci_write_config_word(dev, offset, value);
++}
++
++static const struct config_field caplist_vpd[] = {
++	{
++	 .offset    = PCI_VPD_ADDR,
++	 .size      = 2,
++	 .u.w.read  = pciback_read_config_word,
++	 .u.w.write = vpd_address_write,
++	 },
++	{
++	 .offset     = PCI_VPD_DATA,
++	 .size       = 4,
++	 .u.dw.read  = pciback_read_config_dword,
++	 .u.dw.write = NULL,
++	 },
++	{}
++};
++ 
++struct pciback_config_capability pciback_config_capability_vpd = {
++	.capability = PCI_CAP_ID_VPD,
++	.fields = caplist_vpd,
++};
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
+new file mode 100644
+index 0000000..f794e12
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -0,0 +1,317 @@
++/*
++ * PCI Backend - Handles the virtual fields in the configuration space headers.
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++
++struct pci_bar_info {
++	u32 val;
++	u32 len_val;
++	int which;
++};
++
++#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
++#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
++
++static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
++{
++	int err;
++
++	if (!dev->is_enabled && is_enable_cmd(value)) {
++		if (unlikely(verbose_request))
++			printk(KERN_DEBUG "pciback: %s: enable\n",
++			       pci_name(dev));
++		err = pci_enable_device(dev);
++		if (err)
++			return err;
++	} else if (dev->is_enabled && !is_enable_cmd(value)) {
++		if (unlikely(verbose_request))
++			printk(KERN_DEBUG "pciback: %s: disable\n",
++			       pci_name(dev));
++		pci_disable_device(dev);
++	}
++
++	if (!dev->is_busmaster && is_master_cmd(value)) {
++		if (unlikely(verbose_request))
++			printk(KERN_DEBUG "pciback: %s: set bus master\n",
++			       pci_name(dev));
++		pci_set_master(dev);
++	}
++
++	if (value & PCI_COMMAND_INVALIDATE) {
++		if (unlikely(verbose_request))
++			printk(KERN_DEBUG
++			       "pciback: %s: enable memory-write-invalidate\n",
++			       pci_name(dev));
++		err = pci_set_mwi(dev);
++		if (err) {
++			printk(KERN_WARNING
++			       "pciback: %s: cannot enable memory-write-invalidate (%d)\n",
++			       pci_name(dev), err);
++			value &= ~PCI_COMMAND_INVALIDATE;
++		}
++	}
++
++	return pci_write_config_word(dev, offset, value);
++}
++
++static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
++{
++	struct pci_bar_info *bar = data;
++
++	if (unlikely(!bar)) {
++		printk(KERN_WARNING "pciback: driver data not found for %s\n",
++		       pci_name(dev));
++		return XEN_PCI_ERR_op_failed;
++	}
++
++	/* A write to obtain the length must happen as a 32-bit write.
++	 * This does not (yet) support writing individual bytes
++	 */
++	if (value == ~PCI_ROM_ADDRESS_ENABLE)
++		bar->which = 1;
++	else {
++		u32 tmpval;
++		pci_read_config_dword(dev, offset, &tmpval);
++		if (tmpval != bar->val && value == bar->val) {
++			/* Allow restoration of bar value. */
++			pci_write_config_dword(dev, offset, bar->val);
++		}
++		bar->which = 0;
++	}
++
++	/* Do we need to support enabling/disabling the rom address here? */
++
++	return 0;
++}
++
++/* For the BARs, only allow writes which write ~0 or
++ * the correct resource information
++ * (Needed for when the driver probes the resource usage)
++ */
++static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
++{
++	struct pci_bar_info *bar = data;
++
++	if (unlikely(!bar)) {
++		printk(KERN_WARNING "pciback: driver data not found for %s\n",
++		       pci_name(dev));
++		return XEN_PCI_ERR_op_failed;
++	}
++
++	/* A write to obtain the length must happen as a 32-bit write.
++	 * This does not (yet) support writing individual bytes
++	 */
++	if (value == ~0)
++		bar->which = 1;
++	else {
++		u32 tmpval;
++		pci_read_config_dword(dev, offset, &tmpval);
++		if (tmpval != bar->val && value == bar->val) {
++			/* Allow restoration of bar value. */
++			pci_write_config_dword(dev, offset, bar->val);
++		}
++		bar->which = 0;
++	}
++
++	return 0;
++}
++
++static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
++{
++	struct pci_bar_info *bar = data;
++
++	if (unlikely(!bar)) {
++		printk(KERN_WARNING "pciback: driver data not found for %s\n",
++		       pci_name(dev));
++		return XEN_PCI_ERR_op_failed;
++	}
++
++	*value = bar->which ? bar->len_val : bar->val;
++
++	return 0;
++}
++
++static inline void read_dev_bar(struct pci_dev *dev,
++				struct pci_bar_info *bar_info, int offset,
++				u32 len_mask)
++{
++	pci_read_config_dword(dev, offset, &bar_info->val);
++	pci_write_config_dword(dev, offset, len_mask);
++	pci_read_config_dword(dev, offset, &bar_info->len_val);
++	pci_write_config_dword(dev, offset, bar_info->val);
++}
++
++static void *bar_init(struct pci_dev *dev, int offset)
++{
++	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
++
++	if (!bar)
++		return ERR_PTR(-ENOMEM);
++
++	read_dev_bar(dev, bar, offset, ~0);
++	bar->which = 0;
++
++	return bar;
++}
++
++static void *rom_init(struct pci_dev *dev, int offset)
++{
++	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
++
++	if (!bar)
++		return ERR_PTR(-ENOMEM);
++
++	read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
++	bar->which = 0;
++
++	return bar;
++}
++
++static void bar_reset(struct pci_dev *dev, int offset, void *data)
++{
++	struct pci_bar_info *bar = data;
++
++	bar->which = 0;
++}
++
++static void bar_release(struct pci_dev *dev, int offset, void *data)
++{
++	kfree(data);
++}
++
++static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
++			  void *data)
++{
++	*value = (u8) dev->irq;
++
++	return 0;
++}
++
++static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
++{
++	u8 cur_value;
++	int err;
++
++	err = pci_read_config_byte(dev, offset, &cur_value);
++	if (err)
++		goto out;
++
++	if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
++	    || value == PCI_BIST_START)
++		err = pci_write_config_byte(dev, offset, value);
++
++      out:
++	return err;
++}
++
++static const struct config_field header_common[] = {
++	{
++	 .offset    = PCI_COMMAND,
++	 .size      = 2,
++	 .u.w.read  = pciback_read_config_word,
++	 .u.w.write = command_write,
++	},
++	{
++	 .offset    = PCI_INTERRUPT_LINE,
++	 .size      = 1,
++	 .u.b.read  = interrupt_read,
++	},
++	{
++	 .offset    = PCI_INTERRUPT_PIN,
++	 .size      = 1,
++	 .u.b.read  = pciback_read_config_byte,
++	},
++	{
++	 /* Any side effects of letting driver domain control cache line? */
++	 .offset    = PCI_CACHE_LINE_SIZE,
++	 .size      = 1,
++	 .u.b.read  = pciback_read_config_byte,
++	 .u.b.write = pciback_write_config_byte,
++	},
++	{
++	 .offset    = PCI_LATENCY_TIMER,
++	 .size      = 1,
++	 .u.b.read  = pciback_read_config_byte,
++	},
++	{
++	 .offset    = PCI_BIST,
++	 .size      = 1,
++	 .u.b.read  = pciback_read_config_byte,
++	 .u.b.write = bist_write,
++	},
++	{}
++};
++
++#define CFG_FIELD_BAR(reg_offset) 			\
++	{ 						\
++	 .offset     = reg_offset, 			\
++	 .size       = 4, 				\
++	 .init       = bar_init, 			\
++	 .reset      = bar_reset, 			\
++	 .release    = bar_release, 			\
++	 .u.dw.read  = bar_read, 			\
++	 .u.dw.write = bar_write, 			\
++	 }
++
++#define CFG_FIELD_ROM(reg_offset) 			\
++	{ 						\
++	 .offset     = reg_offset, 			\
++	 .size       = 4, 				\
++	 .init       = rom_init, 			\
++	 .reset      = bar_reset, 			\
++	 .release    = bar_release, 			\
++	 .u.dw.read  = bar_read, 			\
++	 .u.dw.write = rom_write, 			\
++	 }
++
++static const struct config_field header_0[] = {
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
++	CFG_FIELD_ROM(PCI_ROM_ADDRESS),
++	{}
++};
++
++static const struct config_field header_1[] = {
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
++	CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
++	{}
++};
++
++int pciback_config_header_add_fields(struct pci_dev *dev)
++{
++	int err;
++
++	err = pciback_config_add_fields(dev, header_common);
++	if (err)
++		goto out;
++
++	switch (dev->hdr_type) {
++	case PCI_HEADER_TYPE_NORMAL:
++		err = pciback_config_add_fields(dev, header_0);
++		break;
++
++	case PCI_HEADER_TYPE_BRIDGE:
++		err = pciback_config_add_fields(dev, header_1);
++		break;
++
++	default:
++		err = -EINVAL;
++		printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n",
++		       pci_name(dev), dev->hdr_type);
++		break;
++	}
++
++      out:
++	return err;
++}
+diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c
+new file mode 100644
+index 0000000..244a438
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_quirks.c
+@@ -0,0 +1,138 @@
++/*
++ * PCI Backend - Handle special overlays for broken devices.
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ * Author: Chris Bookholt <hap10 at epoch.ncsc.mil>
++ */
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_quirks.h"
++
++LIST_HEAD(pciback_quirks);
++
++static inline const struct pci_device_id *
++match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
++{
++	if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
++	    (id->device == PCI_ANY_ID || id->device == dev->device) &&
++	    (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
++	    (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
++	    !((id->class ^ dev->class) & id->class_mask))
++		return id;
++	return NULL;
++}
++
++struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
++{
++	struct pciback_config_quirk *tmp_quirk;
++
++	list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list)
++		if (match_one_device(&tmp_quirk->devid, dev) != NULL)
++			goto out;
++	tmp_quirk = NULL;
++	printk(KERN_DEBUG
++	       "quirk didn't match any device pciback knows about\n");
++      out:
++	return tmp_quirk;
++}
++
++static inline void register_quirk(struct pciback_config_quirk *quirk)
++{
++	list_add_tail(&quirk->quirks_list, &pciback_quirks);
++}
++
++int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
++{
++	int ret = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry;
++
++	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++		if ( OFFSET(cfg_entry) == reg) {
++			ret = 1;
++			break;
++		}
++	}
++	return ret;
++}
++
++int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
++				    *field)
++{
++	int err = 0;
++
++	switch (field->size) {
++	case 1:
++		field->u.b.read = pciback_read_config_byte;
++		field->u.b.write = pciback_write_config_byte;
++		break;
++	case 2:
++		field->u.w.read = pciback_read_config_word;
++		field->u.w.write = pciback_write_config_word;
++		break;
++	case 4:
++		field->u.dw.read = pciback_read_config_dword;
++		field->u.dw.write = pciback_write_config_dword;
++		break;
++	default:
++		err = -EINVAL;
++		goto out;
++	}
++
++	pciback_config_add_field(dev, field);
++
++      out:
++	return err;
++}
++
++int pciback_config_quirks_init(struct pci_dev *dev)
++{
++	struct pciback_config_quirk *quirk;
++	int ret = 0;
++
++	quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
++	if (!quirk) {
++		ret = -ENOMEM;
++		goto out;
++	}
++
++	quirk->devid.vendor = dev->vendor;
++	quirk->devid.device = dev->device;
++	quirk->devid.subvendor = dev->subsystem_vendor;
++	quirk->devid.subdevice = dev->subsystem_device;
++	quirk->devid.class = 0;
++	quirk->devid.class_mask = 0;
++	quirk->devid.driver_data = 0UL;
++
++	quirk->pdev = dev;
++
++	register_quirk(quirk);
++      out:
++	return ret;
++}
++
++void pciback_config_field_free(struct config_field *field)
++{
++	kfree(field);
++}
++
++int pciback_config_quirk_release(struct pci_dev *dev)
++{
++	struct pciback_config_quirk *quirk;
++	int ret = 0;
++
++	quirk = pciback_find_quirk(dev);
++	if (!quirk) {
++		ret = -ENXIO;
++		goto out;
++	}
++
++	list_del(&quirk->quirks_list);
++	kfree(quirk);
++
++      out:
++	return ret;
++}
+diff --git a/drivers/xen/pciback/conf_space_quirks.h b/drivers/xen/pciback/conf_space_quirks.h
+new file mode 100644
+index 0000000..acd0e1a
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_quirks.h
+@@ -0,0 +1,35 @@
++/*
++ * PCI Backend - Data structures for special overlays for broken devices.
++ *
++ * Ryan Wilson <hap9 at epoch.ncsc.mil>
++ * Chris Bookholt <hap10 at epoch.ncsc.mil>
++ */
++
++#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
++#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
++
++#include <linux/pci.h>
++#include <linux/list.h>
++
++struct pciback_config_quirk {
++	struct list_head quirks_list;
++	struct pci_device_id devid;
++	struct pci_dev *pdev;
++};
++
++struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev);
++
++int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
++				    *field);
++
++int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg);
++
++int pciback_config_quirks_init(struct pci_dev *dev);
++
++void pciback_config_field_free(struct config_field *field);
++
++int pciback_config_quirk_release(struct pci_dev *dev);
++
++int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg);
++
++#endif
+diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
+new file mode 100644
+index 0000000..294e48f
+--- /dev/null
++++ b/drivers/xen/pciback/controller.c
+@@ -0,0 +1,443 @@
++/*
++ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
++ *      Alex Williamson <alex.williamson at hp.com>
++ *
++ * PCI "Controller" Backend - virtualize PCI bus topology based on PCI
++ * controllers.  Devices under the same PCI controller are exposed on the
++ * same virtual domain:bus.  Within a bus, device slots are virtualized
++ * to compact the bus.
++ *
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ */
++
++#include <linux/acpi.h>
++#include <linux/list.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++#define PCI_MAX_BUSSES	255
++#define PCI_MAX_SLOTS	32
++
++struct controller_dev_entry {
++	struct list_head list;
++	struct pci_dev *dev;
++	unsigned int devfn;
++};
++
++struct controller_list_entry {
++	struct list_head list;
++	struct pci_controller *controller;
++	unsigned int domain;
++	unsigned int bus;
++	unsigned int next_devfn;
++	struct list_head dev_list;
++};
++
++struct controller_dev_data {
++	struct list_head list;
++	unsigned int next_domain;
++	unsigned int next_bus;
++	spinlock_t lock;
++};
++
++struct walk_info {
++	struct pciback_device *pdev;
++	int resource_count;
++	int root_num;
++};
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn)
++{
++	struct controller_dev_data *dev_data = pdev->pci_dev_data;
++	struct controller_dev_entry *dev_entry;
++	struct controller_list_entry *cntrl_entry;
++	struct pci_dev *dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&dev_data->lock, flags);
++
++	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++		if (cntrl_entry->domain != domain ||
++		    cntrl_entry->bus != bus)
++			continue;
++
++		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
++			if (devfn == dev_entry->devfn) {
++				dev = dev_entry->dev;
++				goto found;
++			}
++		}
++	}
++found:
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++
++	return dev;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++			int devid, publish_pci_dev_cb publish_cb)
++{
++	struct controller_dev_data *dev_data = pdev->pci_dev_data;
++	struct controller_dev_entry *dev_entry;
++	struct controller_list_entry *cntrl_entry;
++	struct pci_controller *dev_controller = PCI_CONTROLLER(dev);
++	unsigned long flags;
++	int ret = 0, found = 0;
++
++	spin_lock_irqsave(&dev_data->lock, flags);
++
++	/* Look to see if we already have a domain:bus for this controller */
++	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++		if (cntrl_entry->controller == dev_controller) {
++			found = 1;
++			break;
++		}
++	}
++
++	if (!found) {
++		cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC);
++		if (!cntrl_entry) {
++			ret =  -ENOMEM;
++			goto out;
++		}
++
++		cntrl_entry->controller = dev_controller;
++		cntrl_entry->next_devfn = PCI_DEVFN(0, 0);
++
++		cntrl_entry->domain = dev_data->next_domain;
++		cntrl_entry->bus = dev_data->next_bus++;
++		if (dev_data->next_bus > PCI_MAX_BUSSES) {
++			dev_data->next_domain++;
++			dev_data->next_bus = 0;
++		}
++
++		INIT_LIST_HEAD(&cntrl_entry->dev_list);
++
++		list_add_tail(&cntrl_entry->list, &dev_data->list);
++	}
++
++	if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) {
++		/*
++		 * While it seems unlikely, this can actually happen if
++		 * a controller has P2P bridges under it.
++		 */
++		xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x "
++				 "is full, no room to export %04x:%02x:%02x.%x",
++				 cntrl_entry->domain, cntrl_entry->bus,
++				 pci_domain_nr(dev->bus), dev->bus->number,
++				 PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
++		ret = -ENOSPC;
++		goto out;
++	}
++
++	dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC);
++	if (!dev_entry) {
++		if (list_empty(&cntrl_entry->dev_list)) {
++			list_del(&cntrl_entry->list);
++			kfree(cntrl_entry);
++		}
++		ret = -ENOMEM;
++		goto out;
++	}
++
++	dev_entry->dev = dev;
++	dev_entry->devfn = cntrl_entry->next_devfn;
++
++	list_add_tail(&dev_entry->list, &cntrl_entry->dev_list);
++
++	cntrl_entry->next_devfn += PCI_DEVFN(1, 0);
++
++out:
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++
++	/* TODO: Publish virtual domain:bus:slot.func here. */
++
++	return ret;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++	struct controller_dev_data *dev_data = pdev->pci_dev_data;
++	struct controller_list_entry *cntrl_entry;
++	struct controller_dev_entry *dev_entry = NULL;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&dev_data->lock, flags);
++
++	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++		if (cntrl_entry->controller != PCI_CONTROLLER(dev))
++			continue;
++
++		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
++			if (dev_entry->dev == dev) {
++				found_dev = dev_entry->dev;
++				break;
++			}
++		}
++	}
++
++	if (!found_dev) {
++		spin_unlock_irqrestore(&dev_data->lock, flags);
++		return;
++	}
++
++	list_del(&dev_entry->list);
++	kfree(dev_entry);
++
++	if (list_empty(&cntrl_entry->dev_list)) {
++		list_del(&cntrl_entry->list);
++		kfree(cntrl_entry);
++	}
++
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++	pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++	struct controller_dev_data *dev_data;
++
++	dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
++	if (!dev_data)
++		return -ENOMEM;
++
++	spin_lock_init(&dev_data->lock);
++
++	INIT_LIST_HEAD(&dev_data->list);
++
++	/* Starting domain:bus numbers */
++	dev_data->next_domain = 0;
++	dev_data->next_bus = 0;
++
++	pdev->pci_dev_data = dev_data;
++
++	return 0;
++}
++
++static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
++{
++	struct walk_info *info = data;
++	struct acpi_resource_address64 addr;
++	acpi_status status;
++	int i, len, err;
++	char str[32], tmp[3];
++	unsigned char *ptr, *buf;
++
++	status = acpi_resource_to_address64(res, &addr);
++
++	/* Do we care about this range?  Let's check. */
++	if (!ACPI_SUCCESS(status) ||
++	    !(addr.resource_type == ACPI_MEMORY_RANGE ||
++	      addr.resource_type == ACPI_IO_RANGE) ||
++	    !addr.address_length || addr.producer_consumer != ACPI_PRODUCER)
++		return AE_OK;
++
++	/*
++	 * Furthermore, we really only care to tell the guest about
++	 * address ranges that require address translation of some sort.
++	 */
++	if (!(addr.resource_type == ACPI_MEMORY_RANGE &&
++	      addr.info.mem.translation) &&
++	    !(addr.resource_type == ACPI_IO_RANGE &&
++	      addr.info.io.translation))
++		return AE_OK;
++	   
++	/* Store the resource in xenbus for the guest */
++	len = snprintf(str, sizeof(str), "root-%d-resource-%d",
++		       info->root_num, info->resource_count);
++	if (unlikely(len >= (sizeof(str) - 1)))
++		return AE_OK;
++
++	buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL);
++	if (!buf)
++		return AE_OK;
++
++	/* Clean out resource_source */
++	res->data.address64.resource_source.index = 0xFF;
++	res->data.address64.resource_source.string_length = 0;
++	res->data.address64.resource_source.string_ptr = NULL;
++
++	ptr = (unsigned char *)res;
++
++	/* Turn the acpi_resource into an ASCII byte stream */
++	for (i = 0; i < sizeof(*res); i++) {
++		snprintf(tmp, sizeof(tmp), "%02x", ptr[i]);
++		strncat(buf, tmp, 2);
++	}
++
++	err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename,
++			    str, "%s", buf);
++
++	if (!err)
++		info->resource_count++;
++
++	kfree(buf);
++
++	return AE_OK;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb publish_root_cb)
++{
++	struct controller_dev_data *dev_data = pdev->pci_dev_data;
++	struct controller_list_entry *cntrl_entry;
++	int i, root_num, len, err = 0;
++	unsigned int domain, bus;
++	char str[64];
++	struct walk_info info;
++
++	spin_lock(&dev_data->lock);
++
++	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++		/* First publish all the domain:bus info */
++		err = publish_root_cb(pdev, cntrl_entry->domain,
++				      cntrl_entry->bus);
++		if (err)
++			goto out;
++
++		/*
++ 		 * Now figure out which root-%d this belongs to
++		 * so we can associate resources with it.
++		 */
++		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++				   "root_num", "%d", &root_num);
++
++		if (err != 1)
++			goto out;
++
++		for (i = 0; i < root_num; i++) {
++			len = snprintf(str, sizeof(str), "root-%d", i);
++			if (unlikely(len >= (sizeof(str) - 1))) {
++				err = -ENOMEM;
++				goto out;
++			}
++
++			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++					   str, "%x:%x", &domain, &bus);
++			if (err != 2)
++				goto out;
++
++			/* Is this the one we just published? */
++			if (domain == cntrl_entry->domain &&
++			    bus == cntrl_entry->bus)
++				break;
++		}
++
++		if (i == root_num)
++			goto out;
++
++		info.pdev = pdev;
++		info.resource_count = 0;
++		info.root_num = i;
++
++		/* Let ACPI do the heavy lifting on decoding resources */
++		acpi_walk_resources(cntrl_entry->controller->acpi_handle,
++				    METHOD_NAME__CRS, write_xenbus_resource,
++				    &info);
++
++		/* No resouces.  OK.  On to the next one */
++		if (!info.resource_count)
++			continue;
++
++		/* Store the number of resources we wrote for this root-%d */
++		len = snprintf(str, sizeof(str), "root-%d-resources", i);
++		if (unlikely(len >= (sizeof(str) - 1))) {
++			err = -ENOMEM;
++			goto out;
++		}
++
++		err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++				    "%d", info.resource_count);
++		if (err)
++			goto out;
++	}
++
++	/* Finally, write some magic to synchronize with the guest. */
++	len = snprintf(str, sizeof(str), "root-resource-magic");
++	if (unlikely(len >= (sizeof(str) - 1))) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++			    "%lx", (sizeof(struct acpi_resource) * 2) + 1);
++
++out:
++	spin_unlock(&dev_data->lock);
++
++	return err;
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++	struct controller_dev_data *dev_data = pdev->pci_dev_data;
++	struct controller_list_entry *cntrl_entry, *c;
++	struct controller_dev_entry *dev_entry, *d;
++
++	list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) {
++		list_for_each_entry_safe(dev_entry, d,
++					 &cntrl_entry->dev_list, list) {
++			list_del(&dev_entry->list);
++			pcistub_put_pci_dev(dev_entry->dev);
++			kfree(dev_entry);
++		}
++		list_del(&cntrl_entry->list);
++		kfree(cntrl_entry);
++	}
++
++	kfree(dev_data);
++	pdev->pci_dev_data = NULL;
++}
++
++int pciback_get_pcifront_dev(struct pci_dev *pcidev, 
++		struct pciback_device *pdev, 
++		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++{
++	struct controller_dev_data *dev_data = pdev->pci_dev_data;
++	struct controller_dev_entry *dev_entry;
++	struct controller_list_entry *cntrl_entry;
++	unsigned long flags;
++	int found = 0;
++	spin_lock_irqsave(&dev_data->lock, flags);
++
++	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
++			if ( (dev_entry->dev->bus->number == 
++					pcidev->bus->number) &&
++			  	(dev_entry->dev->devfn ==
++					pcidev->devfn) &&
++				(pci_domain_nr(dev_entry->dev->bus) ==
++					pci_domain_nr(pcidev->bus)))
++			{
++				found = 1;
++				*domain = cntrl_entry->domain;
++				*bus = cntrl_entry->bus;
++				*devfn = dev_entry->devfn;
++				goto out;
++			}
++		}
++	}
++out:
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++	return found;
++
++}
++
+diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c
+new file mode 100644
+index 0000000..9e7a0c4
+--- /dev/null
++++ b/drivers/xen/pciback/passthrough.c
+@@ -0,0 +1,176 @@
++/*
++ * PCI Backend - Provides restricted access to the real PCI bus topology
++ *               to the frontend
++ *
++ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/list.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++struct passthrough_dev_data {
++	/* Access to dev_list must be protected by lock */
++	struct list_head dev_list;
++	spinlock_t lock;
++};
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn)
++{
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry;
++	struct pci_dev *dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&dev_data->lock, flags);
++
++	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
++		if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
++		    && bus == (unsigned int)dev_entry->dev->bus->number
++		    && devfn == dev_entry->dev->devfn) {
++			dev = dev_entry->dev;
++			break;
++		}
++	}
++
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++
++	return dev;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++			int devid, publish_pci_dev_cb publish_cb)
++{
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry;
++	unsigned long flags;
++	unsigned int domain, bus, devfn;
++	int err;
++
++	dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
++	if (!dev_entry)
++		return -ENOMEM;
++	dev_entry->dev = dev;
++
++	spin_lock_irqsave(&dev_data->lock, flags);
++	list_add_tail(&dev_entry->list, &dev_data->dev_list);
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++
++	/* Publish this device. */
++	domain = (unsigned int)pci_domain_nr(dev->bus);
++	bus = (unsigned int)dev->bus->number;
++	devfn = dev->devfn;
++	err = publish_cb(pdev, domain, bus, devfn, devid);
++
++	return err;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry, *t;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&dev_data->lock, flags);
++
++	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
++		if (dev_entry->dev == dev) {
++			list_del(&dev_entry->list);
++			found_dev = dev_entry->dev;
++			kfree(dev_entry);
++		}
++	}
++
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++
++	if (found_dev)
++		pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++	struct passthrough_dev_data *dev_data;
++
++	dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
++	if (!dev_data)
++		return -ENOMEM;
++
++	spin_lock_init(&dev_data->lock);
++
++	INIT_LIST_HEAD(&dev_data->dev_list);
++
++	pdev->pci_dev_data = dev_data;
++
++	return 0;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb publish_root_cb)
++{
++	int err = 0;
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry, *e;
++	struct pci_dev *dev;
++	int found;
++	unsigned int domain, bus;
++
++	spin_lock(&dev_data->lock);
++
++	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
++		/* Only publish this device as a root if none of its
++		 * parent bridges are exported
++		 */
++		found = 0;
++		dev = dev_entry->dev->bus->self;
++		for (; !found && dev != NULL; dev = dev->bus->self) {
++			list_for_each_entry(e, &dev_data->dev_list, list) {
++				if (dev == e->dev) {
++					found = 1;
++					break;
++				}
++			}
++		}
++
++		domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
++		bus = (unsigned int)dev_entry->dev->bus->number;
++
++		if (!found) {
++			err = publish_root_cb(pdev, domain, bus);
++			if (err)
++				break;
++		}
++	}
++
++	spin_unlock(&dev_data->lock);
++
++	return err;
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry, *t;
++
++	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
++		list_del(&dev_entry->list);
++		pcistub_put_pci_dev(dev_entry->dev);
++		kfree(dev_entry);
++	}
++
++	kfree(dev_data);
++	pdev->pci_dev_data = NULL;
++}
++
++int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
++		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++
++{
++	*domain = pci_domain_nr(pcidev->bus);
++	*bus = pcidev->bus->number;
++	*devfn = pcidev->devfn;
++	return 1;
++}
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+new file mode 100644
+index 0000000..c481a73
+--- /dev/null
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -0,0 +1,1316 @@
++/*
++ * PCI Stub Driver - Grabs devices in backend to be exported later
++ *
++ * Ryan Wilson <hap9 at epoch.ncsc.mil>
++ * Chris Bookholt <hap10 at epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/rwsem.h>
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/kref.h>
++#include <linux/pci.h>
++#include <linux/wait.h>
++#include <asm/atomic.h>
++#include <xen/evtchn.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_quirks.h"
++
++static char *pci_devs_to_hide = NULL;
++wait_queue_head_t aer_wait_queue;
++/*Add sem for sync AER handling and pciback remove/reconfigue ops,
++* We want to avoid in middle of AER ops, pciback devices is being removed
++*/
++static DECLARE_RWSEM(pcistub_sem);
++module_param_named(hide, pci_devs_to_hide, charp, 0444);
++
++struct pcistub_device_id {
++	struct list_head slot_list;
++	int domain;
++	unsigned char bus;
++	unsigned int devfn;
++};
++static LIST_HEAD(pcistub_device_ids);
++static DEFINE_SPINLOCK(device_ids_lock);
++
++struct pcistub_device {
++	struct kref kref;
++	struct list_head dev_list;
++	spinlock_t lock;
++
++	struct pci_dev *dev;
++	struct pciback_device *pdev;	/* non-NULL if struct pci_dev is in use */
++};
++
++/* Access to pcistub_devices & seized_devices lists and the initialize_devices
++ * flag must be locked with pcistub_devices_lock
++ */
++static DEFINE_SPINLOCK(pcistub_devices_lock);
++static LIST_HEAD(pcistub_devices);
++
++/* wait for device_initcall before initializing our devices
++ * (see pcistub_init_devices_late)
++ */
++static int initialize_devices = 0;
++static LIST_HEAD(seized_devices);
++
++static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++
++	dev_dbg(&dev->dev, "pcistub_device_alloc\n");
++
++	psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
++	if (!psdev)
++		return NULL;
++
++	psdev->dev = pci_dev_get(dev);
++	if (!psdev->dev) {
++		kfree(psdev);
++		return NULL;
++	}
++
++	kref_init(&psdev->kref);
++	spin_lock_init(&psdev->lock);
++
++	return psdev;
++}
++
++/* Don't call this directly as it's called by pcistub_device_put */
++static void pcistub_device_release(struct kref *kref)
++{
++	struct pcistub_device *psdev;
++
++	psdev = container_of(kref, struct pcistub_device, kref);
++
++	dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
++
++	/* Clean-up the device */
++	pciback_reset_device(psdev->dev);
++	pciback_config_free_dyn_fields(psdev->dev);
++	pciback_config_free_dev(psdev->dev);
++	kfree(pci_get_drvdata(psdev->dev));
++	pci_set_drvdata(psdev->dev, NULL);
++
++	pci_dev_put(psdev->dev);
++
++	kfree(psdev);
++}
++
++static inline void pcistub_device_get(struct pcistub_device *psdev)
++{
++	kref_get(&psdev->kref);
++}
++
++static inline void pcistub_device_put(struct pcistub_device *psdev)
++{
++	kref_put(&psdev->kref, pcistub_device_release);
++}
++
++static struct pcistub_device *pcistub_device_find(int domain, int bus,
++						  int slot, int func)
++{
++	struct pcistub_device *psdev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev != NULL
++		    && domain == pci_domain_nr(psdev->dev->bus)
++		    && bus == psdev->dev->bus->number
++		    && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
++			pcistub_device_get(psdev);
++			goto out;
++		}
++	}
++
++	/* didn't find it */
++	psdev = NULL;
++
++      out:
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return psdev;
++}
++
++static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev,
++						  struct pcistub_device *psdev)
++{
++	struct pci_dev *pci_dev = NULL;
++	unsigned long flags;
++
++	pcistub_device_get(psdev);
++
++	spin_lock_irqsave(&psdev->lock, flags);
++	if (!psdev->pdev) {
++		psdev->pdev = pdev;
++		pci_dev = psdev->dev;
++	}
++	spin_unlock_irqrestore(&psdev->lock, flags);
++
++	if (!pci_dev)
++		pcistub_device_put(psdev);
++
++	return pci_dev;
++}
++
++struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
++					    int domain, int bus,
++					    int slot, int func)
++{
++	struct pcistub_device *psdev;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev != NULL
++		    && domain == pci_domain_nr(psdev->dev->bus)
++		    && bus == psdev->dev->bus->number
++		    && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
++			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
++			break;
++		}
++	}
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return found_dev;
++}
++
++struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
++				    struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev == dev) {
++			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
++			break;
++		}
++	}
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return found_dev;
++}
++
++void pcistub_put_pci_dev(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev, *found_psdev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev == dev) {
++			found_psdev = psdev;
++			break;
++		}
++	}
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++	/*hold this lock for avoiding breaking link between
++	* pcistub and pciback when AER is in processing
++	*/
++	down_write(&pcistub_sem);
++	/* Cleanup our device
++	 * (so it's ready for the next domain)
++	 */
++	pciback_reset_device(found_psdev->dev);
++	pciback_config_free_dyn_fields(found_psdev->dev);
++	pciback_config_reset_dev(found_psdev->dev);
++
++	spin_lock_irqsave(&found_psdev->lock, flags);
++	found_psdev->pdev = NULL;
++	spin_unlock_irqrestore(&found_psdev->lock, flags);
++
++	pcistub_device_put(found_psdev);
++	up_write(&pcistub_sem);
++}
++
++static int __devinit pcistub_match_one(struct pci_dev *dev,
++				       struct pcistub_device_id *pdev_id)
++{
++	/* Match the specified device by domain, bus, slot, func and also if
++	 * any of the device's parent bridges match.
++	 */
++	for (; dev != NULL; dev = dev->bus->self) {
++		if (pci_domain_nr(dev->bus) == pdev_id->domain
++		    && dev->bus->number == pdev_id->bus
++		    && dev->devfn == pdev_id->devfn)
++			return 1;
++
++		/* Sometimes topmost bridge links to itself. */
++		if (dev == dev->bus->self)
++			break;
++	}
++
++	return 0;
++}
++
++static int __devinit pcistub_match(struct pci_dev *dev)
++{
++	struct pcistub_device_id *pdev_id;
++	unsigned long flags;
++	int found = 0;
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
++		if (pcistub_match_one(dev, pdev_id)) {
++			found = 1;
++			break;
++		}
++	}
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return found;
++}
++
++static int __devinit pcistub_init_device(struct pci_dev *dev)
++{
++	struct pciback_dev_data *dev_data;
++	int err = 0;
++
++	dev_dbg(&dev->dev, "initializing...\n");
++
++	/* The PCI backend is not intended to be a module (or to work with
++	 * removable PCI devices (yet). If it were, pciback_config_free()
++	 * would need to be called somewhere to free the memory allocated
++	 * here and then to call kfree(pci_get_drvdata(psdev->dev)).
++	 */
++	dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
++	if (!dev_data) {
++		err = -ENOMEM;
++		goto out;
++	}
++	pci_set_drvdata(dev, dev_data);
++
++	dev_dbg(&dev->dev, "initializing config\n");
++
++	init_waitqueue_head(&aer_wait_queue);
++	err = pciback_config_init_dev(dev);
++	if (err)
++		goto out;
++
++	/* HACK: Force device (& ACPI) to determine what IRQ it's on - we
++	 * must do this here because pcibios_enable_device may specify
++	 * the pci device's true irq (and possibly its other resources)
++	 * if they differ from what's in the configuration space.
++	 * This makes the assumption that the device's resources won't
++	 * change after this point (otherwise this code may break!)
++	 */
++	dev_dbg(&dev->dev, "enabling device\n");
++	err = pci_enable_device(dev);
++	if (err)
++		goto config_release;
++
++	/* Now disable the device (this also ensures some private device
++	 * data is setup before we export)
++	 */
++	dev_dbg(&dev->dev, "reset device\n");
++	pciback_reset_device(dev);
++
++	return 0;
++
++      config_release:
++	pciback_config_free_dev(dev);
++
++      out:
++	pci_set_drvdata(dev, NULL);
++	kfree(dev_data);
++	return err;
++}
++
++/*
++ * Because some initialization still happens on
++ * devices during fs_initcall, we need to defer
++ * full initialization of our devices until
++ * device_initcall.
++ */
++static int __init pcistub_init_devices_late(void)
++{
++	struct pcistub_device *psdev;
++	unsigned long flags;
++	int err = 0;
++
++	pr_debug("pciback: pcistub_init_devices_late\n");
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	while (!list_empty(&seized_devices)) {
++		psdev = container_of(seized_devices.next,
++				     struct pcistub_device, dev_list);
++		list_del(&psdev->dev_list);
++
++		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++		err = pcistub_init_device(psdev->dev);
++		if (err) {
++			dev_err(&psdev->dev->dev,
++				"error %d initializing device\n", err);
++			kfree(psdev);
++			psdev = NULL;
++		}
++
++		spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++		if (psdev)
++			list_add_tail(&psdev->dev_list, &pcistub_devices);
++	}
++
++	initialize_devices = 1;
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++	return 0;
++}
++
++static int __devinit pcistub_seize(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++	unsigned long flags;
++	int err = 0;
++
++	psdev = pcistub_device_alloc(dev);
++	if (!psdev)
++		return -ENOMEM;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	if (initialize_devices) {
++		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++		/* don't want irqs disabled when calling pcistub_init_device */
++		err = pcistub_init_device(psdev->dev);
++
++		spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++		if (!err)
++			list_add(&psdev->dev_list, &pcistub_devices);
++	} else {
++		dev_dbg(&dev->dev, "deferring initialization\n");
++		list_add(&psdev->dev_list, &seized_devices);
++	}
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++	if (err)
++		pcistub_device_put(psdev);
++
++	return err;
++}
++
++static int __devinit pcistub_probe(struct pci_dev *dev,
++				   const struct pci_device_id *id)
++{
++	int err = 0;
++
++	dev_dbg(&dev->dev, "probing...\n");
++
++	if (pcistub_match(dev)) {
++
++		if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
++		    && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
++			dev_err(&dev->dev, "can't export pci devices that "
++				"don't have a normal (0) or bridge (1) "
++				"header type!\n");
++			err = -ENODEV;
++			goto out;
++		}
++
++		dev_info(&dev->dev, "seizing device\n");
++		err = pcistub_seize(dev);
++#ifdef CONFIG_PCI_GUESTDEV
++	} else if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
++		if (!pci_is_guestdev(dev)) {
++			err = -ENODEV;
++			goto out;
++		}
++
++		dev_info(&dev->dev, "seizing device\n");
++		err = pcistub_seize(dev);
++#endif /* CONFIG_PCI_GUESTDEV */
++	} else
++		/* Didn't find the device */
++		err = -ENODEV;
++
++      out:
++	return err;
++}
++
++static void pcistub_remove(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev, *found_psdev = NULL;
++	unsigned long flags;
++
++	dev_dbg(&dev->dev, "removing\n");
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	pciback_config_quirk_release(dev);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev == dev) {
++			found_psdev = psdev;
++			break;
++		}
++	}
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++	if (found_psdev) {
++		dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
++			found_psdev->pdev);
++
++		if (found_psdev->pdev) {
++			printk(KERN_WARNING "pciback: ****** removing device "
++			       "%s while still in-use! ******\n",
++			       pci_name(found_psdev->dev));
++			printk(KERN_WARNING "pciback: ****** driver domain may "
++			       "still access this device's i/o resources!\n");
++			printk(KERN_WARNING "pciback: ****** shutdown driver "
++			       "domain before binding device\n");
++			printk(KERN_WARNING "pciback: ****** to other drivers "
++			       "or domains\n");
++
++			pciback_release_pci_dev(found_psdev->pdev,
++						found_psdev->dev);
++		}
++
++		spin_lock_irqsave(&pcistub_devices_lock, flags);
++		list_del(&found_psdev->dev_list);
++		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++		/* the final put for releasing from the list */
++		pcistub_device_put(found_psdev);
++	}
++}
++
++static const struct pci_device_id pcistub_ids[] = {
++	{
++	 .vendor = PCI_ANY_ID,
++	 .device = PCI_ANY_ID,
++	 .subvendor = PCI_ANY_ID,
++	 .subdevice = PCI_ANY_ID,
++	 },
++	{0,},
++};
++
++static void kill_domain_by_device(struct pcistub_device *psdev)
++{
++	struct xenbus_transaction xbt;
++	int err;
++	char nodename[1024];
++
++	if (!psdev) 
++		dev_err(&psdev->dev->dev,
++			"device is NULL when do AER recovery/kill_domain\n");
++	sprintf(nodename, "/local/domain/0/backend/pci/%d/0", 
++		psdev->pdev->xdev->otherend_id);
++	nodename[strlen(nodename)] = '\0';
++
++again:
++	err = xenbus_transaction_start(&xbt);
++	if (err)
++	{
++		dev_err(&psdev->dev->dev,
++			"error %d when start xenbus transaction\n", err);
++		return;
++	}
++	/*PV AER handlers will set this flag*/
++	xenbus_printf(xbt, nodename, "aerState" , "aerfail" );
++	err = xenbus_transaction_end(xbt, 0);
++	if (err)
++	{
++		if (err == -EAGAIN)
++			goto again;
++		dev_err(&psdev->dev->dev,
++			"error %d when end xenbus transaction\n", err);
++		return;
++	}
++}
++
++/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
++ * backend need to have cooperation. In pciback, those steps will do similar
++ * jobs: send service request and waiting for front_end response. 
++*/
++static pci_ers_result_t common_process(struct pcistub_device *psdev, 
++		pci_channel_state_t state, int aer_cmd, pci_ers_result_t result)
++{
++	pci_ers_result_t res = result;
++	struct xen_pcie_aer_op *aer_op;
++	int ret;
++
++	/*with PV AER drivers*/
++	aer_op = &(psdev->pdev->sh_info->aer_op);
++	aer_op->cmd = aer_cmd ;
++	/*useful for error_detected callback*/
++	aer_op->err = state;
++	/*pcifront_end BDF*/
++	ret = pciback_get_pcifront_dev(psdev->dev, psdev->pdev,
++		&aer_op->domain, &aer_op->bus, &aer_op->devfn);
++	if (!ret) {
++		dev_err(&psdev->dev->dev,
++			"pciback: failed to get pcifront device\n");
++		return PCI_ERS_RESULT_NONE; 
++	}
++	wmb();
++
++	dev_dbg(&psdev->dev->dev, 
++			"pciback: aer_op %x dom %x bus %x devfn %x\n",  
++			aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
++	/*local flag to mark there's aer request, pciback callback will use this
++	* flag to judge whether we need to check pci-front give aer service
++	* ack signal
++	*/
++	set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
++
++	/*It is possible that a pcifront conf_read_write ops request invokes
++	* the callback which cause the spurious execution of wake_up. 
++	* Yet it is harmless and better than a spinlock here
++	*/
++	set_bit(_XEN_PCIB_active, 
++		(unsigned long *)&psdev->pdev->sh_info->flags);
++	wmb();
++	notify_remote_via_irq(psdev->pdev->evtchn_irq);
++
++	ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active,
++                (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
++
++	if (!ret) {
++		if (test_bit(_XEN_PCIB_active, 
++			(unsigned long *)&psdev->pdev->sh_info->flags)) {
++			dev_err(&psdev->dev->dev, 
++				"pcifront aer process not responding!\n");
++			clear_bit(_XEN_PCIB_active,
++			  (unsigned long *)&psdev->pdev->sh_info->flags);
++			aer_op->err = PCI_ERS_RESULT_NONE;
++			return res;
++		}
++	}
++	clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
++
++	if ( test_bit( _XEN_PCIF_active,
++		(unsigned long*)&psdev->pdev->sh_info->flags)) {
++		dev_dbg(&psdev->dev->dev, 
++			"schedule pci_conf service in pciback \n");
++		test_and_schedule_op(psdev->pdev);
++	}
++
++	res = (pci_ers_result_t)aer_op->err;
++	return res;
++} 
++
++/*
++* pciback_slot_reset: it will send the slot_reset request to  pcifront in case
++* of the device driver could provide this service, and then wait for pcifront
++* ack.
++* @dev: pointer to PCI devices
++* return value is used by aer_core do_recovery policy
++*/
++static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++	pci_ers_result_t result;
++
++	result = PCI_ERS_RESULT_RECOVERED;
++	dev_dbg(&dev->dev, "pciback_slot_reset(bus:%x,devfn:%x)\n",
++		dev->bus->number, dev->devfn);
++
++	down_write(&pcistub_sem);
++	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
++				dev->bus->number,
++				PCI_SLOT(dev->devfn),
++				PCI_FUNC(dev->devfn));
++
++	if ( !psdev || !psdev->pdev )
++	{
++		dev_err(&dev->dev, 
++			"pciback device is not found/assigned\n");
++		goto end;
++	}
++
++	if ( !psdev->pdev->sh_info )
++	{
++		dev_err(&dev->dev, "pciback device is not connected or owned"
++			" by HVM, kill it\n");
++		kill_domain_by_device(psdev);
++		goto release;
++	}
++
++	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
++		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
++		dev_err(&dev->dev, 
++			"guest with no AER driver should have been killed\n");
++		goto release;
++	}
++	result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
++
++	if (result == PCI_ERS_RESULT_NONE ||
++		result == PCI_ERS_RESULT_DISCONNECT) {
++		dev_dbg(&dev->dev, 
++			"No AER slot_reset service or disconnected!\n");
++		kill_domain_by_device(psdev);
++	}
++release:
++	pcistub_device_put(psdev);
++end:
++	up_write(&pcistub_sem);
++	return result;
++
++}
++
++
++/*pciback_mmio_enabled: it will send the mmio_enabled request to  pcifront 
++* in case of the device driver could provide this service, and then wait 
++* for pcifront ack.
++* @dev: pointer to PCI devices
++* return value is used by aer_core do_recovery policy
++*/
++
++static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++	pci_ers_result_t result;
++
++	result = PCI_ERS_RESULT_RECOVERED;
++	dev_dbg(&dev->dev, "pciback_mmio_enabled(bus:%x,devfn:%x)\n",
++		dev->bus->number, dev->devfn);
++
++	down_write(&pcistub_sem);
++	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
++				dev->bus->number,
++				PCI_SLOT(dev->devfn),
++				PCI_FUNC(dev->devfn));
++
++	if ( !psdev || !psdev->pdev )
++	{
++		dev_err(&dev->dev, 
++			"pciback device is not found/assigned\n");
++		goto end;
++	}
++
++	if ( !psdev->pdev->sh_info )
++	{
++		dev_err(&dev->dev, "pciback device is not connected or owned"
++			" by HVM, kill it\n");
++		kill_domain_by_device(psdev);
++		goto release;
++	}
++
++	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
++		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
++		dev_err(&dev->dev, 
++			"guest with no AER driver should have been killed\n");
++		goto release;
++	}
++	result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
++
++	if (result == PCI_ERS_RESULT_NONE ||
++		result == PCI_ERS_RESULT_DISCONNECT) {
++		dev_dbg(&dev->dev, 
++			"No AER mmio_enabled service or disconnected!\n");
++		kill_domain_by_device(psdev);
++	}
++release:
++	pcistub_device_put(psdev);
++end:
++	up_write(&pcistub_sem);
++	return result;
++}
++
++/*pciback_error_detected: it will send the error_detected request to  pcifront 
++* in case of the device driver could provide this service, and then wait 
++* for pcifront ack.
++* @dev: pointer to PCI devices
++* @error: the current PCI connection state
++* return value is used by aer_core do_recovery policy
++*/
++
++static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
++	pci_channel_state_t error)
++{
++	struct pcistub_device *psdev;
++	pci_ers_result_t result;
++
++	result = PCI_ERS_RESULT_CAN_RECOVER;
++	dev_dbg(&dev->dev, "pciback_error_detected(bus:%x,devfn:%x)\n",
++		dev->bus->number, dev->devfn);
++
++	down_write(&pcistub_sem);
++	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
++				dev->bus->number,
++				PCI_SLOT(dev->devfn),
++				PCI_FUNC(dev->devfn));
++
++	if ( !psdev || !psdev->pdev )
++	{
++		dev_err(&dev->dev, 
++			"pciback device is not found/assigned\n");
++		goto end;
++	}
++
++	if ( !psdev->pdev->sh_info )
++	{
++		dev_err(&dev->dev, "pciback device is not connected or owned"
++			" by HVM, kill it\n");
++		kill_domain_by_device(psdev);
++		goto release;
++	}
++
++	/*Guest owns the device yet no aer handler regiested, kill guest*/
++	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
++		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
++		dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
++		kill_domain_by_device(psdev);
++		goto release;
++	}
++	result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
++
++	if (result == PCI_ERS_RESULT_NONE ||
++		result == PCI_ERS_RESULT_DISCONNECT) {
++		dev_dbg(&dev->dev, 
++			"No AER error_detected service or disconnected!\n");
++		kill_domain_by_device(psdev);
++	}
++release:
++	pcistub_device_put(psdev);
++end:
++	up_write(&pcistub_sem);
++	return result;
++}
++
++/*pciback_error_resume: it will send the error_resume request to  pcifront 
++* in case of the device driver could provide this service, and then wait 
++* for pcifront ack.
++* @dev: pointer to PCI devices
++*/
++
++static void pciback_error_resume(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++
++	dev_dbg(&dev->dev, "pciback_error_resume(bus:%x,devfn:%x)\n",
++		dev->bus->number, dev->devfn);
++
++	down_write(&pcistub_sem);
++	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
++				dev->bus->number,
++				PCI_SLOT(dev->devfn),
++				PCI_FUNC(dev->devfn));
++
++	if ( !psdev || !psdev->pdev )
++	{
++		dev_err(&dev->dev, 
++			"pciback device is not found/assigned\n");
++		goto end;
++	}
++
++	if ( !psdev->pdev->sh_info )
++	{
++		dev_err(&dev->dev, "pciback device is not connected or owned"
++			" by HVM, kill it\n");
++		kill_domain_by_device(psdev);
++		goto release;
++	}
++
++	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
++		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
++		dev_err(&dev->dev, 
++			"guest with no AER driver should have been killed\n");
++		kill_domain_by_device(psdev);
++		goto release;
++	}
++	common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED);
++release:
++	pcistub_device_put(psdev);
++end:
++	up_write(&pcistub_sem);
++	return;
++}
++
++/*add pciback AER handling*/
++static struct pci_error_handlers pciback_error_handler = {
++	.error_detected = pciback_error_detected,
++	.mmio_enabled = pciback_mmio_enabled,
++	.slot_reset = pciback_slot_reset,
++	.resume = pciback_error_resume,
++};
++
++/*
++ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
++ * for a normal device. I don't want it to be loaded automatically.
++ */
++
++static struct pci_driver pciback_pci_driver = {
++	.name = "pciback",
++	.id_table = pcistub_ids,
++	.probe = pcistub_probe,
++	.remove = pcistub_remove,
++	.err_handler = &pciback_error_handler,
++};
++
++static inline int str_to_slot(const char *buf, int *domain, int *bus,
++			      int *slot, int *func)
++{
++	int err;
++
++	err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
++	if (err == 4)
++		return 0;
++	else if (err < 0)
++		return -EINVAL;
++
++	/* try again without domain */
++	*domain = 0;
++	err = sscanf(buf, " %x:%x.%x", bus, slot, func);
++	if (err == 3)
++		return 0;
++
++	return -EINVAL;
++}
++
++static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
++			       *slot, int *func, int *reg, int *size, int *mask)
++{
++	int err;
++
++	err =
++	    sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
++		   func, reg, size, mask);
++	if (err == 7)
++		return 0;
++	return -EINVAL;
++}
++
++static int pcistub_device_id_add(int domain, int bus, int slot, int func)
++{
++	struct pcistub_device_id *pci_dev_id;
++	unsigned long flags;
++
++	pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
++	if (!pci_dev_id)
++		return -ENOMEM;
++
++	pci_dev_id->domain = domain;
++	pci_dev_id->bus = bus;
++	pci_dev_id->devfn = PCI_DEVFN(slot, func);
++
++	pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n",
++		 domain, bus, slot, func);
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return 0;
++}
++
++static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
++{
++	struct pcistub_device_id *pci_dev_id, *t;
++	int devfn = PCI_DEVFN(slot, func);
++	int err = -ENOENT;
++	unsigned long flags;
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) {
++
++		if (pci_dev_id->domain == domain
++		    && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
++			/* Don't break; here because it's possible the same
++			 * slot could be in the list more than once
++			 */
++			list_del(&pci_dev_id->slot_list);
++			kfree(pci_dev_id);
++
++			err = 0;
++
++			pr_debug("pciback: removed %04x:%02x:%02x.%01x from "
++				 "seize list\n", domain, bus, slot, func);
++		}
++	}
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return err;
++}
++
++static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
++			   int size, int mask)
++{
++	int err = 0;
++	struct pcistub_device *psdev;
++	struct pci_dev *dev;
++	struct config_field *field;
++
++	psdev = pcistub_device_find(domain, bus, slot, func);
++	if (!psdev || !psdev->dev) {
++		err = -ENODEV;
++		goto out;
++	}
++	dev = psdev->dev;
++
++	field = kzalloc(sizeof(*field), GFP_ATOMIC);
++	if (!field) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	field->offset = reg;
++	field->size = size;
++	field->mask = mask;
++	field->init = NULL;
++	field->reset = NULL;
++	field->release = NULL;
++	field->clean = pciback_config_field_free;
++
++	err = pciback_config_quirks_add_field(dev, field);
++	if (err)
++		kfree(field);
++      out:
++	return err;
++}
++
++static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
++				size_t count)
++{
++	int domain, bus, slot, func;
++	int err;
++
++	err = str_to_slot(buf, &domain, &bus, &slot, &func);
++	if (err)
++		goto out;
++
++	err = pcistub_device_id_add(domain, bus, slot, func);
++
++      out:
++	if (!err)
++		err = count;
++	return err;
++}
++
++DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
++
++static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
++				   size_t count)
++{
++	int domain, bus, slot, func;
++	int err;
++
++	err = str_to_slot(buf, &domain, &bus, &slot, &func);
++	if (err)
++		goto out;
++
++	err = pcistub_device_id_remove(domain, bus, slot, func);
++
++      out:
++	if (!err)
++		err = count;
++	return err;
++}
++
++DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
++
++static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
++{
++	struct pcistub_device_id *pci_dev_id;
++	size_t count = 0;
++	unsigned long flags;
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
++		if (count >= PAGE_SIZE)
++			break;
++
++		count += scnprintf(buf + count, PAGE_SIZE - count,
++				   "%04x:%02x:%02x.%01x\n",
++				   pci_dev_id->domain, pci_dev_id->bus,
++				   PCI_SLOT(pci_dev_id->devfn),
++				   PCI_FUNC(pci_dev_id->devfn));
++	}
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return count;
++}
++
++DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
++
++static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
++				 size_t count)
++{
++	int domain, bus, slot, func, reg, size, mask;
++	int err;
++
++	err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
++			   &mask);
++	if (err)
++		goto out;
++
++	err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
++
++      out:
++	if (!err)
++		err = count;
++	return err;
++}
++
++static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
++{
++	int count = 0;
++	unsigned long flags;
++	extern struct list_head pciback_quirks;
++	struct pciback_config_quirk *quirk;
++	struct pciback_dev_data *dev_data;
++	const struct config_field *field;
++	const struct config_field_entry *cfg_entry;
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_for_each_entry(quirk, &pciback_quirks, quirks_list) {
++		if (count >= PAGE_SIZE)
++			goto out;
++
++		count += scnprintf(buf + count, PAGE_SIZE - count,
++				   "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
++				   quirk->pdev->bus->number,
++				   PCI_SLOT(quirk->pdev->devfn),
++				   PCI_FUNC(quirk->pdev->devfn),
++				   quirk->devid.vendor, quirk->devid.device,
++				   quirk->devid.subvendor,
++				   quirk->devid.subdevice);
++
++		dev_data = pci_get_drvdata(quirk->pdev);
++
++		list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++			field = cfg_entry->field;
++			if (count >= PAGE_SIZE)
++				goto out;
++
++			count += scnprintf(buf + count, PAGE_SIZE - count,
++					   "\t\t%08x:%01x:%08x\n",
++					   cfg_entry->base_offset + field->offset, 
++					   field->size, field->mask);
++		}
++	}
++
++      out:
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return count;
++}
++
++DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
++
++static ssize_t permissive_add(struct device_driver *drv, const char *buf,
++			      size_t count)
++{
++	int domain, bus, slot, func;
++	int err;
++	struct pcistub_device *psdev;
++	struct pciback_dev_data *dev_data;
++	err = str_to_slot(buf, &domain, &bus, &slot, &func);
++	if (err)
++		goto out;
++	psdev = pcistub_device_find(domain, bus, slot, func);
++	if (!psdev) {
++		err = -ENODEV;
++		goto out;
++	}
++	if (!psdev->dev) {
++		err = -ENODEV;
++		goto release;
++	}
++	dev_data = pci_get_drvdata(psdev->dev);
++	/* the driver data for a device should never be null at this point */
++	if (!dev_data) {
++		err = -ENXIO;
++		goto release;
++	}
++	if (!dev_data->permissive) {
++		dev_data->permissive = 1;
++		/* Let user know that what they're doing could be unsafe */
++		dev_warn(&psdev->dev->dev,
++			 "enabling permissive mode configuration space accesses!\n");
++		dev_warn(&psdev->dev->dev,
++			 "permissive mode is potentially unsafe!\n");
++	}
++      release:
++	pcistub_device_put(psdev);
++      out:
++	if (!err)
++		err = count;
++	return err;
++}
++
++static ssize_t permissive_show(struct device_driver *drv, char *buf)
++{
++	struct pcistub_device *psdev;
++	struct pciback_dev_data *dev_data;
++	size_t count = 0;
++	unsigned long flags;
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (count >= PAGE_SIZE)
++			break;
++		if (!psdev->dev)
++			continue;
++		dev_data = pci_get_drvdata(psdev->dev);
++		if (!dev_data || !dev_data->permissive)
++			continue;
++		count +=
++		    scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
++			      pci_name(psdev->dev));
++	}
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return count;
++}
++
++DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
++
++#ifdef CONFIG_PCI_MSI
++
++int pciback_get_owner(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++
++	psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number,
++			PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
++
++	if (!psdev || !psdev->pdev)
++		return -1;
++
++	return psdev->pdev->xdev->otherend_id;
++}
++#endif
++
++static void pcistub_exit(void)
++{
++	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
++	driver_remove_file(&pciback_pci_driver.driver,
++			   &driver_attr_remove_slot);
++	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
++	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
++	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
++
++	pci_unregister_driver(&pciback_pci_driver);
++	WARN_ON(unregister_msi_get_owner(pciback_get_owner));
++}
++
++static int __init pcistub_init(void)
++{
++	int pos = 0;
++	int err = 0;
++	int domain, bus, slot, func;
++	int parsed;
++
++	if (pci_devs_to_hide && *pci_devs_to_hide) {
++		do {
++			parsed = 0;
++
++			err = sscanf(pci_devs_to_hide + pos,
++				     " (%x:%x:%x.%x) %n",
++				     &domain, &bus, &slot, &func, &parsed);
++			if (err != 4) {
++				domain = 0;
++				err = sscanf(pci_devs_to_hide + pos,
++					     " (%x:%x.%x) %n",
++					     &bus, &slot, &func, &parsed);
++				if (err != 3)
++					goto parse_error;
++			}
++
++			err = pcistub_device_id_add(domain, bus, slot, func);
++			if (err)
++				goto out;
++
++			/* if parsed<=0, we've reached the end of the string */
++			pos += parsed;
++		} while (parsed > 0 && pci_devs_to_hide[pos]);
++	}
++
++	/* If we're the first PCI Device Driver to register, we're the
++	 * first one to get offered PCI devices as they become
++	 * available (and thus we can be the first to grab them)
++	 */
++	err = pci_register_driver(&pciback_pci_driver);
++	if (err < 0)
++		goto out;
++
++	err = driver_create_file(&pciback_pci_driver.driver,
++				 &driver_attr_new_slot);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_remove_slot);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_slots);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_quirks);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_permissive);
++
++	if (!err)
++		err = register_msi_get_owner(pciback_get_owner);
++	if (err)
++		pcistub_exit();
++
++      out:
++	return err;
++
++      parse_error:
++	printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
++	       pci_devs_to_hide + pos);
++	return -EINVAL;
++}
++
++#ifndef MODULE
++/*
++ * fs_initcall happens before device_initcall
++ * so pciback *should* get called first (b/c we 
++ * want to suck up any device before other drivers
++ * get a chance by being the first pci device
++ * driver to register)
++ */
++fs_initcall(pcistub_init);
++#endif
++
++static int __init pciback_init(void)
++{
++	int err;
++
++	err = pciback_config_init();
++	if (err)
++		return err;
++
++#ifdef MODULE
++	err = pcistub_init();
++	if (err < 0)
++		return err;
++#endif
++
++	pcistub_init_devices_late();
++	err = pciback_xenbus_register();
++	if (err)
++		pcistub_exit();
++
++	return err;
++}
++
++static void __exit pciback_cleanup(void)
++{
++	pciback_xenbus_unregister();
++	pcistub_exit();
++}
++
++module_init(pciback_init);
++module_exit(pciback_cleanup);
++
++MODULE_LICENSE("Dual BSD/GPL");
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+new file mode 100644
+index 0000000..6744f45
+--- /dev/null
++++ b/drivers/xen/pciback/pciback.h
+@@ -0,0 +1,126 @@
++/*
++ * PCI Backend Common Data Structures & Function Declarations
++ *
++ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++#ifndef __XEN_PCIBACK_H__
++#define __XEN_PCIBACK_H__
++
++#include <linux/pci.h>
++#include <linux/interrupt.h>
++#include <xen/xenbus.h>
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/workqueue.h>
++#include <asm/atomic.h>
++#include <xen/interface/io/pciif.h>
++
++struct pci_dev_entry {
++	struct list_head list;
++	struct pci_dev *dev;
++};
++
++#define _PDEVF_op_active 	(0)
++#define PDEVF_op_active 	(1<<(_PDEVF_op_active))
++#define _PCIB_op_pending	(1)
++#define PCIB_op_pending		(1<<(_PCIB_op_pending))
++
++struct pciback_device {
++	void *pci_dev_data;
++	spinlock_t dev_lock;
++
++	struct xenbus_device *xdev;
++
++	struct xenbus_watch be_watch;
++	u8 be_watching;
++
++	int evtchn_irq;
++
++	struct vm_struct *sh_area;
++	struct xen_pci_sharedinfo *sh_info;
++
++	unsigned long flags;
++
++	struct work_struct op_work;
++};
++
++struct pciback_dev_data {
++	struct list_head config_fields;
++	int permissive;
++	int warned_on_write;
++};
++
++/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
++struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
++					    int domain, int bus,
++					    int slot, int func);
++struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
++				    struct pci_dev *dev);
++void pcistub_put_pci_dev(struct pci_dev *dev);
++
++/* Ensure a device is turned off or reset */
++void pciback_reset_device(struct pci_dev *pdev);
++
++/* Access a virtual configuration space for a PCI device */
++int pciback_config_init(void);
++int pciback_config_init_dev(struct pci_dev *dev);
++void pciback_config_free_dyn_fields(struct pci_dev *dev);
++void pciback_config_reset_dev(struct pci_dev *dev);
++void pciback_config_free_dev(struct pci_dev *dev);
++int pciback_config_read(struct pci_dev *dev, int offset, int size,
++			u32 * ret_val);
++int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
++
++/* Handle requests for specific devices from the frontend */
++typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
++				   unsigned int domain, unsigned int bus,
++				   unsigned int devfn, unsigned int devid);
++typedef int (*publish_pci_root_cb) (struct pciback_device * pdev,
++				    unsigned int domain, unsigned int bus);
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++			int devid, publish_pci_dev_cb publish_cb);
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev);
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn);
++
++/** 
++* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback
++* before sending aer request to pcifront, so that guest could identify 
++* device, coopearte with pciback to finish aer recovery job if device driver
++* has the capability
++*/
++
++int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
++				unsigned int *domain, unsigned int *bus, unsigned int *devfn);
++int pciback_init_devices(struct pciback_device *pdev);
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb cb);
++void pciback_release_devices(struct pciback_device *pdev);
++
++/* Handles events from front-end */
++irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
++void pciback_do_op(void *data);
++
++int pciback_xenbus_register(void);
++void pciback_xenbus_unregister(void);
++
++#ifdef CONFIG_PCI_MSI
++int pciback_enable_msi(struct pciback_device *pdev,
++                       struct pci_dev *dev, struct xen_pci_op *op);
++
++int pciback_disable_msi(struct pciback_device *pdev,
++                         struct pci_dev *dev, struct xen_pci_op *op);
++
++
++int pciback_enable_msix(struct pciback_device *pdev,
++                        struct pci_dev *dev, struct xen_pci_op *op);
++
++int pciback_disable_msix(struct pciback_device *pdev,
++                        struct pci_dev *dev, struct xen_pci_op *op);
++#endif
++extern int verbose_request;
++
++void test_and_schedule_op(struct pciback_device *pdev);
++#endif
++
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+new file mode 100644
+index 0000000..b85b2db
+--- /dev/null
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -0,0 +1,134 @@
++/*
++ * PCI Backend Operations - respond to PCI requests from Frontend
++ *
++ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/wait.h>
++#include <asm/bitops.h>
++#include <xen/evtchn.h>
++#include "pciback.h"
++
++int verbose_request = 0;
++module_param(verbose_request, int, 0644);
++
++/* Ensure a device is "turned off" and ready to be exported.
++ * (Also see pciback_config_reset to ensure virtual configuration space is
++ * ready to be re-exported)
++ */
++void pciback_reset_device(struct pci_dev *dev)
++{
++	u16 cmd;
++
++	/* Disable devices (but not bridges) */
++	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
++		pci_disable_device(dev);
++
++		pci_write_config_word(dev, PCI_COMMAND, 0);
++
++		dev->is_enabled = 0;
++		dev->is_busmaster = 0;
++	} else {
++		pci_read_config_word(dev, PCI_COMMAND, &cmd);
++		if (cmd & (PCI_COMMAND_INVALIDATE)) {
++			cmd &= ~(PCI_COMMAND_INVALIDATE);
++			pci_write_config_word(dev, PCI_COMMAND, cmd);
++
++			dev->is_busmaster = 0;
++		}
++	}
++}
++extern wait_queue_head_t aer_wait_queue;
++extern struct workqueue_struct *pciback_wq;
++/*
++* Now the same evtchn is used for both pcifront conf_read_write request
++* as well as pcie aer front end ack. We use a new work_queue to schedule
++* pciback conf_read_write service for avoiding confict with aer_core 
++* do_recovery job which also use the system default work_queue
++*/
++void test_and_schedule_op(struct pciback_device *pdev)
++{
++	/* Check that frontend is requesting an operation and that we are not
++	 * already processing a request */
++	if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
++	    && !test_and_set_bit(_PDEVF_op_active, &pdev->flags))
++	{
++		queue_work(pciback_wq, &pdev->op_work);
++	}
++	/*_XEN_PCIB_active should have been cleared by pcifront. And also make
++	sure pciback is waiting for ack by checking _PCIB_op_pending*/
++	if (!test_bit(_XEN_PCIB_active,(unsigned long *)&pdev->sh_info->flags)
++	    &&test_bit(_PCIB_op_pending, &pdev->flags)) {
++		wake_up(&aer_wait_queue);
++	}
++}
++
++/* Performing the configuration space reads/writes must not be done in atomic
++ * context because some of the pci_* functions can sleep (mostly due to ACPI
++ * use of semaphores). This function is intended to be called from a work
++ * queue in process context taking a struct pciback_device as a parameter */
++void pciback_do_op(void *data)
++{
++	struct pciback_device *pdev = data;
++	struct pci_dev *dev;
++	struct xen_pci_op *op = &pdev->sh_info->op;
++
++	dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
++
++	if (dev == NULL)
++		op->err = XEN_PCI_ERR_dev_not_found;
++	else
++	{
++		switch (op->cmd)
++		{
++			case XEN_PCI_OP_conf_read:
++				op->err = pciback_config_read(dev,
++					  op->offset, op->size, &op->value);
++				break;
++			case XEN_PCI_OP_conf_write:
++				op->err = pciback_config_write(dev,
++					  op->offset, op->size,	op->value);
++				break;
++#ifdef CONFIG_PCI_MSI
++			case XEN_PCI_OP_enable_msi:
++				op->err = pciback_enable_msi(pdev, dev, op);
++				break;
++			case XEN_PCI_OP_disable_msi:
++				op->err = pciback_disable_msi(pdev, dev, op);
++				break;
++			case XEN_PCI_OP_enable_msix:
++				op->err = pciback_enable_msix(pdev, dev, op);
++				break;
++			case XEN_PCI_OP_disable_msix:
++				op->err = pciback_disable_msix(pdev, dev, op);
++				break;
++#endif
++			default:
++				op->err = XEN_PCI_ERR_not_implemented;
++				break;
++		}
++	}
++	/* Tell the driver domain that we're done. */ 
++	wmb();
++	clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
++	notify_remote_via_irq(pdev->evtchn_irq);
++
++	/* Mark that we're done. */
++	smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
++	clear_bit(_PDEVF_op_active, &pdev->flags);
++	smp_mb__after_clear_bit(); /* /before/ final check for work */
++
++	/* Check to see if the driver domain tried to start another request in
++	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. 
++	*/
++	test_and_schedule_op(pdev);
++}
++
++irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
++{
++	struct pciback_device *pdev = dev_id;
++
++	test_and_schedule_op(pdev);
++
++	return IRQ_HANDLED;
++}
+diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c
+new file mode 100644
+index 0000000..105a8b6
+--- /dev/null
++++ b/drivers/xen/pciback/slot.c
+@@ -0,0 +1,187 @@
++/*
++ * PCI Backend - Provides a Virtual PCI bus (with real devices)
++ *               to the frontend
++ *
++ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil> (vpci.c)
++ *   Author: Tristan Gingold <tristan.gingold at bull.net>, from vpci.c
++ */
++
++#include <linux/list.h>
++#include <linux/slab.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++/* There are at most 32 slots in a pci bus.  */
++#define PCI_SLOT_MAX 32
++
++#define PCI_BUS_NBR 2
++
++struct slot_dev_data {
++	/* Access to dev_list must be protected by lock */
++	struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX];
++	spinlock_t lock;
++};
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn)
++{
++	struct pci_dev *dev = NULL;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	unsigned long flags;
++
++	if (domain != 0 || PCI_FUNC(devfn) != 0)
++		return NULL;
++
++	if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR)
++		return NULL;
++
++	spin_lock_irqsave(&slot_dev->lock, flags);
++	dev = slot_dev->slots[bus][PCI_SLOT(devfn)];
++	spin_unlock_irqrestore(&slot_dev->lock, flags);
++
++	return dev;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++			int devid, publish_pci_dev_cb publish_cb)
++{
++	int err = 0, slot, bus;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	unsigned long flags;
++
++	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
++		err = -EFAULT;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Can't export bridges on the virtual PCI bus");
++		goto out;
++	}
++
++	spin_lock_irqsave(&slot_dev->lock, flags);
++
++	/* Assign to a new slot on the virtual PCI bus */
++	for (bus = 0; bus < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++			if (slot_dev->slots[bus][slot] == NULL) {
++				printk(KERN_INFO
++				       "pciback: slot: %s: assign to virtual slot %d, bus %d\n",
++				       pci_name(dev), slot, bus);
++				slot_dev->slots[bus][slot] = dev;
++				goto unlock;
++			}
++		}
++
++	err = -ENOMEM;
++	xenbus_dev_fatal(pdev->xdev, err,
++			 "No more space on root virtual PCI bus");
++
++      unlock:
++	spin_unlock_irqrestore(&slot_dev->lock, flags);
++
++	/* Publish this device. */
++	if(!err)
++		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
++
++      out:
++	return err;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++	int slot, bus;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&slot_dev->lock, flags);
++
++	for (bus = 0; bus < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++			if (slot_dev->slots[bus][slot] == dev) {
++				slot_dev->slots[bus][slot] = NULL;
++				found_dev = dev;
++				goto out;
++			}
++		}
++
++      out:
++	spin_unlock_irqrestore(&slot_dev->lock, flags);
++
++	if (found_dev)
++		pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++	int slot, bus;
++	struct slot_dev_data *slot_dev;
++
++	slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL);
++	if (!slot_dev)
++		return -ENOMEM;
++
++	spin_lock_init(&slot_dev->lock);
++
++	for (bus = 0; bus < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++)
++			slot_dev->slots[bus][slot] = NULL;
++
++	pdev->pci_dev_data = slot_dev;
++
++	return 0;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb publish_cb)
++{
++	/* The Virtual PCI bus has only one root */
++	return publish_cb(pdev, 0, 0);
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++	int slot, bus;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	struct pci_dev *dev;
++
++	for (bus = 0; bus < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++			dev = slot_dev->slots[bus][slot];
++			if (dev != NULL)
++				pcistub_put_pci_dev(dev);
++		}
++
++	kfree(slot_dev);
++	pdev->pci_dev_data = NULL;
++}
++
++int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
++		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++{
++	int slot, busnr;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	struct pci_dev *dev;
++	int found = 0;
++	unsigned long flags;
++
++	spin_lock_irqsave(&slot_dev->lock, flags);
++
++	for (busnr = 0; busnr < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++			dev = slot_dev->slots[busnr][slot];
++			if (dev && dev->bus->number == pcidev->bus->number
++				&& dev->devfn == pcidev->devfn
++				&& pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)) {
++				found = 1;
++				*domain = 0;
++				*bus = busnr;
++				*devfn = PCI_DEVFN(slot,0);
++				goto out;
++			}
++		}
++out:
++	spin_unlock_irqrestore(&slot_dev->lock, flags);
++	return found;
++
++}
+diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
+new file mode 100644
+index 0000000..a5b7ece
+--- /dev/null
++++ b/drivers/xen/pciback/vpci.c
+@@ -0,0 +1,242 @@
++/*
++ * PCI Backend - Provides a Virtual PCI bus (with real devices)
++ *               to the frontend
++ *
++ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/list.h>
++#include <linux/slab.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++#define PCI_SLOT_MAX 32
++
++struct vpci_dev_data {
++	/* Access to dev_list must be protected by lock */
++	struct list_head dev_list[PCI_SLOT_MAX];
++	spinlock_t lock;
++};
++
++static inline struct list_head *list_first(struct list_head *head)
++{
++	return head->next;
++}
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn)
++{
++	struct pci_dev_entry *entry;
++	struct pci_dev *dev = NULL;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	unsigned long flags;
++
++	if (domain != 0 || bus != 0)
++		return NULL;
++
++	if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
++		spin_lock_irqsave(&vpci_dev->lock, flags);
++
++		list_for_each_entry(entry,
++				    &vpci_dev->dev_list[PCI_SLOT(devfn)],
++				    list) {
++			if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
++				dev = entry->dev;
++				break;
++			}
++		}
++
++		spin_unlock_irqrestore(&vpci_dev->lock, flags);
++	}
++	return dev;
++}
++
++static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
++{
++	if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
++	    && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
++		return 1;
++
++	return 0;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++			int devid, publish_pci_dev_cb publish_cb)
++{
++	int err = 0, slot, func;
++	struct pci_dev_entry *t, *dev_entry;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	unsigned long flags;
++
++	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
++		err = -EFAULT;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Can't export bridges on the virtual PCI bus");
++		goto out;
++	}
++
++	dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
++	if (!dev_entry) {
++		err = -ENOMEM;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error adding entry to virtual PCI bus");
++		goto out;
++	}
++
++	dev_entry->dev = dev;
++
++	spin_lock_irqsave(&vpci_dev->lock, flags);
++
++	/* Keep multi-function devices together on the virtual PCI bus */
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		if (!list_empty(&vpci_dev->dev_list[slot])) {
++			t = list_entry(list_first(&vpci_dev->dev_list[slot]),
++				       struct pci_dev_entry, list);
++
++			if (match_slot(dev, t->dev)) {
++				pr_info("pciback: vpci: %s: "
++					"assign to virtual slot %d func %d\n",
++					pci_name(dev), slot,
++					PCI_FUNC(dev->devfn));
++				list_add_tail(&dev_entry->list,
++					      &vpci_dev->dev_list[slot]);
++				func = PCI_FUNC(dev->devfn);
++				goto unlock;
++			}
++		}
++	}
++
++	/* Assign to a new slot on the virtual PCI bus */
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		if (list_empty(&vpci_dev->dev_list[slot])) {
++			printk(KERN_INFO
++			       "pciback: vpci: %s: assign to virtual slot %d\n",
++			       pci_name(dev), slot);
++			list_add_tail(&dev_entry->list,
++				      &vpci_dev->dev_list[slot]);
++			func = PCI_FUNC(dev->devfn);
++			goto unlock;
++		}
++	}
++
++	err = -ENOMEM;
++	xenbus_dev_fatal(pdev->xdev, err,
++			 "No more space on root virtual PCI bus");
++
++      unlock:
++	spin_unlock_irqrestore(&vpci_dev->lock, flags);
++
++	/* Publish this device. */
++	if(!err)
++		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
++
++      out:
++	return err;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++	int slot;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&vpci_dev->lock, flags);
++
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		struct pci_dev_entry *e, *tmp;
++		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
++					 list) {
++			if (e->dev == dev) {
++				list_del(&e->list);
++				found_dev = e->dev;
++				kfree(e);
++				goto out;
++			}
++		}
++	}
++
++      out:
++	spin_unlock_irqrestore(&vpci_dev->lock, flags);
++
++	if (found_dev)
++		pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++	int slot;
++	struct vpci_dev_data *vpci_dev;
++
++	vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
++	if (!vpci_dev)
++		return -ENOMEM;
++
++	spin_lock_init(&vpci_dev->lock);
++
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
++	}
++
++	pdev->pci_dev_data = vpci_dev;
++
++	return 0;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb publish_cb)
++{
++	/* The Virtual PCI bus has only one root */
++	return publish_cb(pdev, 0, 0);
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++	int slot;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		struct pci_dev_entry *e, *tmp;
++		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
++					 list) {
++			list_del(&e->list);
++			pcistub_put_pci_dev(e->dev);
++			kfree(e);
++		}
++	}
++
++	kfree(vpci_dev);
++	pdev->pci_dev_data = NULL;
++}
++
++int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
++		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++{
++	struct pci_dev_entry *entry;
++	struct pci_dev *dev = NULL;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	unsigned long flags;
++	int found = 0, slot;
++
++	spin_lock_irqsave(&vpci_dev->lock, flags);
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		list_for_each_entry(entry,
++			    &vpci_dev->dev_list[slot],
++			    list) {
++			dev = entry->dev;
++			if (dev && dev->bus->number == pcidev->bus->number
++				&& pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)
++				&& dev->devfn == pcidev->devfn)
++			{
++				found = 1;
++				*domain = 0;
++				*bus = 0;
++				*devfn = PCI_DEVFN(slot, PCI_FUNC(pcidev->devfn));
++			}
++		}		
++	}
++	spin_unlock_irqrestore(&vpci_dev->lock, flags);
++	return found;
++}
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+new file mode 100644
+index 0000000..4d56c45
+--- /dev/null
++++ b/drivers/xen/pciback/xenbus.c
+@@ -0,0 +1,710 @@
++/*
++ * PCI Backend Xenbus Setup - handles setup with frontend and xend
++ *
++ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/vmalloc.h>
++#include <xen/xenbus.h>
++#include <xen/evtchn.h>
++#include "pciback.h"
++
++#define INVALID_EVTCHN_IRQ  (-1)
++struct workqueue_struct *pciback_wq;
++
++static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
++{
++	struct pciback_device *pdev;
++
++	pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
++	if (pdev == NULL)
++		goto out;
++	dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
++
++	pdev->xdev = xdev;
++	xdev->dev.driver_data = pdev;
++
++	spin_lock_init(&pdev->dev_lock);
++
++	pdev->sh_area = NULL;
++	pdev->sh_info = NULL;
++	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
++	pdev->be_watching = 0;
++
++	INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
++
++	if (pciback_init_devices(pdev)) {
++		kfree(pdev);
++		pdev = NULL;
++	}
++      out:
++	return pdev;
++}
++
++static void pciback_disconnect(struct pciback_device *pdev)
++{
++	spin_lock(&pdev->dev_lock);
++
++	/* Ensure the guest can't trigger our handler before removing devices */
++	if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
++		unbind_from_irqhandler(pdev->evtchn_irq, pdev);
++		pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
++	}
++
++	/* If the driver domain started an op, make sure we complete it
++	 * before releasing the shared memory */
++	flush_workqueue(pciback_wq);
++
++	if (pdev->sh_info != NULL) {
++		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
++		pdev->sh_info = NULL;
++	}
++
++	spin_unlock(&pdev->dev_lock);
++}
++
++static void free_pdev(struct pciback_device *pdev)
++{
++	if (pdev->be_watching)
++		unregister_xenbus_watch(&pdev->be_watch);
++
++	pciback_disconnect(pdev);
++
++	pciback_release_devices(pdev);
++
++	pdev->xdev->dev.driver_data = NULL;
++	pdev->xdev = NULL;
++
++	kfree(pdev);
++}
++
++static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
++			     int remote_evtchn)
++{
++	int err = 0;
++	struct vm_struct *area;
++
++	dev_dbg(&pdev->xdev->dev,
++		"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
++		gnt_ref, remote_evtchn);
++
++	area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
++	if (IS_ERR(area)) {
++		err = PTR_ERR(area);
++		goto out;
++	}
++	pdev->sh_area = area;
++	pdev->sh_info = area->addr;
++
++	err = bind_interdomain_evtchn_to_irqhandler(
++		pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
++		SA_SAMPLE_RANDOM, "pciback", pdev);
++	if (err < 0) {
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error binding event channel to IRQ");
++		goto out;
++	}
++	pdev->evtchn_irq = err;
++	err = 0;
++
++	dev_dbg(&pdev->xdev->dev, "Attached!\n");
++      out:
++	return err;
++}
++
++static int pciback_attach(struct pciback_device *pdev)
++{
++	int err = 0;
++	int gnt_ref, remote_evtchn;
++	char *magic = NULL;
++
++	spin_lock(&pdev->dev_lock);
++
++	/* Make sure we only do this setup once */
++	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
++	    XenbusStateInitialised)
++		goto out;
++
++	/* Wait for frontend to state that it has published the configuration */
++	if (xenbus_read_driver_state(pdev->xdev->otherend) !=
++	    XenbusStateInitialised)
++		goto out;
++
++	dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
++
++	err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
++			    "pci-op-ref", "%u", &gnt_ref,
++			    "event-channel", "%u", &remote_evtchn,
++			    "magic", NULL, &magic, NULL);
++	if (err) {
++		/* If configuration didn't get read correctly, wait longer */
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error reading configuration from frontend");
++		goto out;
++	}
++
++	if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
++		xenbus_dev_fatal(pdev->xdev, -EFAULT,
++				 "version mismatch (%s/%s) with pcifront - "
++				 "halting pciback",
++				 magic, XEN_PCI_MAGIC);
++		goto out;
++	}
++
++	err = pciback_do_attach(pdev, gnt_ref, remote_evtchn);
++	if (err)
++		goto out;
++
++	dev_dbg(&pdev->xdev->dev, "Connecting...\n");
++
++	err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
++	if (err)
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error switching to connected state!");
++
++	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
++      out:
++	spin_unlock(&pdev->dev_lock);
++
++	if (magic)
++		kfree(magic);
++
++	return err;
++}
++
++static int pciback_publish_pci_dev(struct pciback_device *pdev,
++				   unsigned int domain, unsigned int bus,
++				   unsigned int devfn, unsigned int devid)
++{
++	int err;
++	int len;
++	char str[64];
++
++	len = snprintf(str, sizeof(str), "vdev-%d", devid);
++	if (unlikely(len >= (sizeof(str) - 1))) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++			    "%04x:%02x:%02x.%02x", domain, bus,
++			    PCI_SLOT(devfn), PCI_FUNC(devfn));
++
++      out:
++	return err;
++}
++
++static int pciback_export_device(struct pciback_device *pdev,
++				 int domain, int bus, int slot, int func,
++				 int devid)
++{
++	struct pci_dev *dev;
++	int err = 0;
++
++	dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
++		domain, bus, slot, func);
++
++	dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
++	if (!dev) {
++		err = -EINVAL;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Couldn't locate PCI device "
++				 "(%04x:%02x:%02x.%01x)! "
++				 "perhaps already in-use?",
++				 domain, bus, slot, func);
++		goto out;
++	}
++
++	err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev);
++	if (err)
++		goto out;
++
++	/* TODO: It'd be nice to export a bridge and have all of its children
++	 * get exported with it. This may be best done in xend (which will
++	 * have to calculate resource usage anyway) but we probably want to
++	 * put something in here to ensure that if a bridge gets given to a
++	 * driver domain, that all devices under that bridge are not given
++	 * to other driver domains (as he who controls the bridge can disable
++	 * it and stop the other devices from working).
++	 */
++      out:
++	return err;
++}
++
++static int pciback_remove_device(struct pciback_device *pdev,
++				 int domain, int bus, int slot, int func)
++{
++	int err = 0;
++	struct pci_dev *dev;
++
++	dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
++		domain, bus, slot, func);
++
++	dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
++	if (!dev) {
++		err = -EINVAL;
++		dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
++			"(%04x:%02x:%02x.%01x)! not owned by this domain\n",
++			domain, bus, slot, func);
++		goto out;
++	}
++
++	pciback_release_pci_dev(pdev, dev);
++	
++      out:
++	return err;
++}
++
++static int pciback_publish_pci_root(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus)
++{
++	unsigned int d, b;
++	int i, root_num, len, err;
++	char str[64];
++
++	dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
++
++	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++			   "root_num", "%d", &root_num);
++	if (err == 0 || err == -ENOENT)
++		root_num = 0;
++	else if (err < 0)
++		goto out;
++
++	/* Verify that we haven't already published this pci root */
++	for (i = 0; i < root_num; i++) {
++		len = snprintf(str, sizeof(str), "root-%d", i);
++		if (unlikely(len >= (sizeof(str) - 1))) {
++			err = -ENOMEM;
++			goto out;
++		}
++
++		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++				   str, "%x:%x", &d, &b);
++		if (err < 0)
++			goto out;
++		if (err != 2) {
++			err = -EINVAL;
++			goto out;
++		}
++
++		if (d == domain && b == bus) {
++			err = 0;
++			goto out;
++		}
++	}
++
++	len = snprintf(str, sizeof(str), "root-%d", root_num);
++	if (unlikely(len >= (sizeof(str) - 1))) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
++		root_num, domain, bus);
++
++	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++			    "%04x:%02x", domain, bus);
++	if (err)
++		goto out;
++
++	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
++			    "root_num", "%d", (root_num + 1));
++
++      out:
++	return err;
++}
++
++static int pciback_reconfigure(struct pciback_device *pdev)
++{
++	int err = 0;
++	int num_devs;
++	int domain, bus, slot, func;
++	int substate;
++	int i, len;
++	char state_str[64];
++	char dev_str[64];
++
++	spin_lock(&pdev->dev_lock);
++
++	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
++
++	/* Make sure we only reconfigure once */
++	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
++	    XenbusStateReconfiguring)
++		goto out;
++
++	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
++			   &num_devs);
++	if (err != 1) {
++		if (err >= 0)
++			err = -EINVAL;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error reading number of devices");
++		goto out;
++	}
++
++	for (i = 0; i < num_devs; i++) {
++		len = snprintf(state_str, sizeof(state_str), "state-%d", i);
++		if (unlikely(len >= (sizeof(state_str) - 1))) {
++			err = -ENOMEM;
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "String overflow while reading "
++					 "configuration");
++			goto out;
++		}
++		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
++				   "%d", &substate);
++		if (err != 1) 
++			substate = XenbusStateUnknown;
++
++		switch (substate) {
++		case XenbusStateInitialising:
++			dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
++
++			len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
++			if (unlikely(len >= (sizeof(dev_str) - 1))) {
++				err = -ENOMEM;
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "String overflow while "
++						 "reading configuration");
++				goto out;
++			}
++			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++					   dev_str, "%x:%x:%x.%x",
++					   &domain, &bus, &slot, &func);
++			if (err < 0) {
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "Error reading device "
++						 "configuration");
++				goto out;
++			}
++			if (err != 4) {
++				err = -EINVAL;
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "Error parsing pci device "
++						 "configuration");
++				goto out;
++			}
++	
++			err = pciback_export_device(pdev, domain, bus, slot,
++						    func, i);
++			if (err)
++				goto out;
++
++			/* Publish pci roots. */
++			err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
++			if (err) {
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "Error while publish PCI root"
++						 "buses for frontend");
++				goto out;
++			}
++
++			err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
++					    state_str, "%d",
++					    XenbusStateInitialised);
++			if (err) {
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "Error switching substate of "
++						 "dev-%d\n", i);
++				goto out;
++			}	
++			break;
++
++		case XenbusStateClosing:
++			dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
++
++			len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
++			if (unlikely(len >= (sizeof(dev_str) - 1))) {
++				err = -ENOMEM;
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "String overflow while "
++						 "reading configuration");
++				goto out;
++			}
++			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++					   dev_str, "%x:%x:%x.%x",
++					   &domain, &bus, &slot, &func);
++			if (err < 0) {
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "Error reading device "
++						 "configuration");
++				goto out;
++			}
++			if (err != 4) {
++				err = -EINVAL;
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "Error parsing pci device "
++						 "configuration");
++				goto out;
++			}
++
++			err = pciback_remove_device(pdev, domain, bus, slot,
++						    func);
++			if(err)
++				goto out;
++
++			/* TODO: If at some point we implement support for pci
++			 * root hot-remove on pcifront side, we'll need to
++			 * remove unnecessary xenstore nodes of pci roots here.
++			 */
++
++			break;
++
++		default:
++			break;
++		}
++	}
++
++	err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
++	if (err) {
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error switching to reconfigured state!");
++		goto out;
++	}
++	
++      out:
++	spin_unlock(&pdev->dev_lock);
++
++	return 0;
++}
++
++static void pciback_frontend_changed(struct xenbus_device *xdev,
++				     enum xenbus_state fe_state)
++{
++	struct pciback_device *pdev = xdev->dev.driver_data;
++
++	dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
++
++	switch (fe_state) {
++	case XenbusStateInitialised:
++		pciback_attach(pdev);
++		break;
++
++	case XenbusStateReconfiguring:
++		pciback_reconfigure(pdev);
++		break;
++
++	case XenbusStateConnected:
++		/* pcifront switched its state from reconfiguring to connected.
++		 * Then switch to connected state.
++		 */
++		xenbus_switch_state(xdev, XenbusStateConnected);
++		break;
++
++	case XenbusStateClosing:
++		pciback_disconnect(pdev);
++		xenbus_switch_state(xdev, XenbusStateClosing);
++		break;
++
++	case XenbusStateClosed:
++		pciback_disconnect(pdev);
++		xenbus_switch_state(xdev, XenbusStateClosed);
++		if (xenbus_dev_is_online(xdev))
++			break;
++		/* fall through if not online */
++	case XenbusStateUnknown:
++		dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
++		device_unregister(&xdev->dev);
++		break;
++
++	default:
++		break;
++	}
++}
++
++static int pciback_setup_backend(struct pciback_device *pdev)
++{
++	/* Get configuration from xend (if available now) */
++	int domain, bus, slot, func;
++	int err = 0;
++	int i, num_devs;
++	char dev_str[64];
++	char state_str[64];
++
++	spin_lock(&pdev->dev_lock);
++
++	/* It's possible we could get the call to setup twice, so make sure
++	 * we're not already connected.
++	 */
++	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
++	    XenbusStateInitWait)
++		goto out;
++
++	dev_dbg(&pdev->xdev->dev, "getting be setup\n");
++
++	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
++			   &num_devs);
++	if (err != 1) {
++		if (err >= 0)
++			err = -EINVAL;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error reading number of devices");
++		goto out;
++	}
++
++	for (i = 0; i < num_devs; i++) {
++		int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
++		if (unlikely(l >= (sizeof(dev_str) - 1))) {
++			err = -ENOMEM;
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "String overflow while reading "
++					 "configuration");
++			goto out;
++		}
++
++		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
++				   "%x:%x:%x.%x", &domain, &bus, &slot, &func);
++		if (err < 0) {
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "Error reading device configuration");
++			goto out;
++		}
++		if (err != 4) {
++			err = -EINVAL;
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "Error parsing pci device "
++					 "configuration");
++			goto out;
++		}
++
++		err = pciback_export_device(pdev, domain, bus, slot, func, i);
++		if (err)
++			goto out;
++
++		/* Switch substate of this device. */
++		l = snprintf(state_str, sizeof(state_str), "state-%d", i);
++		if (unlikely(l >= (sizeof(state_str) - 1))) {
++			err = -ENOMEM;
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "String overflow while reading "
++					 "configuration");
++			goto out;
++		}
++		err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
++				    "%d", XenbusStateInitialised);
++		if (err) {
++			xenbus_dev_fatal(pdev->xdev, err, "Error switching "
++					 "substate of dev-%d\n", i);
++			goto out;
++		}	
++	}
++
++	err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
++	if (err) {
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error while publish PCI root buses "
++				 "for frontend");
++		goto out;
++	}
++
++	err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
++	if (err)
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error switching to initialised state!");
++
++      out:
++	spin_unlock(&pdev->dev_lock);
++
++	if (!err)
++		/* see if pcifront is already configured (if not, we'll wait) */
++		pciback_attach(pdev);
++
++	return err;
++}
++
++static void pciback_be_watch(struct xenbus_watch *watch,
++			     const char **vec, unsigned int len)
++{
++	struct pciback_device *pdev =
++	    container_of(watch, struct pciback_device, be_watch);
++
++	switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
++	case XenbusStateInitWait:
++		pciback_setup_backend(pdev);
++		break;
++
++	default:
++		break;
++	}
++}
++
++static int pciback_xenbus_probe(struct xenbus_device *dev,
++				const struct xenbus_device_id *id)
++{
++	int err = 0;
++	struct pciback_device *pdev = alloc_pdev(dev);
++
++	if (pdev == NULL) {
++		err = -ENOMEM;
++		xenbus_dev_fatal(dev, err,
++				 "Error allocating pciback_device struct");
++		goto out;
++	}
++
++	/* wait for xend to configure us */
++	err = xenbus_switch_state(dev, XenbusStateInitWait);
++	if (err)
++		goto out;
++
++	/* watch the backend node for backend configuration information */
++	err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
++				pciback_be_watch);
++	if (err)
++		goto out;
++	pdev->be_watching = 1;
++
++	/* We need to force a call to our callback here in case
++	 * xend already configured us!
++	 */
++	pciback_be_watch(&pdev->be_watch, NULL, 0);
++
++      out:
++	return err;
++}
++
++static int pciback_xenbus_remove(struct xenbus_device *dev)
++{
++	struct pciback_device *pdev = dev->dev.driver_data;
++
++	if (pdev != NULL)
++		free_pdev(pdev);
++
++	return 0;
++}
++
++static const struct xenbus_device_id xenpci_ids[] = {
++	{"pci"},
++	{{0}},
++};
++
++static struct xenbus_driver xenbus_pciback_driver = {
++	.name 			= "pciback",
++	.owner 			= THIS_MODULE,
++	.ids 			= xenpci_ids,
++	.probe 			= pciback_xenbus_probe,
++	.remove 		= pciback_xenbus_remove,
++	.otherend_changed 	= pciback_frontend_changed,
++};
++
++int __init pciback_xenbus_register(void)
++{
++	if (!is_running_on_xen())
++		return -ENODEV;
++	pciback_wq = create_workqueue("pciback_workqueue");
++	if (!pciback_wq) {
++		printk(KERN_ERR "pciback_xenbus_register: create"
++			"pciback_workqueue failed\n");
++		return -EFAULT;
++	}
++	return xenbus_register_backend(&xenbus_pciback_driver);
++}
++
++void __exit pciback_xenbus_unregister(void)
++{
++	destroy_workqueue(pciback_wq);
++	xenbus_unregister_driver(&xenbus_pciback_driver);
++}
+-- 
+1.7.3.4
+
+
+From cf2a64556286b762ce6a3a9b408ba7ecdcaea03a Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:22 -0400
+Subject: [PATCH 008/139] xen-pciback: Fix include header name change (evtchn.h is now events.h)
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pci_stub.c    |    2 +-
+ drivers/xen/pciback/pciback_ops.c |    2 +-
+ drivers/xen/pciback/xenbus.c      |    2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index c481a73..c02f21f 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -13,7 +13,7 @@
+ #include <linux/pci.h>
+ #include <linux/wait.h>
+ #include <asm/atomic.h>
+-#include <xen/evtchn.h>
++#include <xen/events.h>
+ #include "pciback.h"
+ #include "conf_space.h"
+ #include "conf_space_quirks.h"
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index b85b2db..58d09eb 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -6,7 +6,7 @@
+ #include <linux/module.h>
+ #include <linux/wait.h>
+ #include <asm/bitops.h>
+-#include <xen/evtchn.h>
++#include <xen/events.h>
+ #include "pciback.h"
+ 
+ int verbose_request = 0;
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index 4d56c45..bbca3fe 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -8,7 +8,7 @@
+ #include <linux/list.h>
+ #include <linux/vmalloc.h>
+ #include <xen/xenbus.h>
+-#include <xen/evtchn.h>
++#include <xen/events.h>
+ #include "pciback.h"
+ 
+ #define INVALID_EVTCHN_IRQ  (-1)
+-- 
+1.7.3.4
+
+
+From f6222ae41f2fee3f67983f833ee8dcba2c7a1362 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:24 -0400
+Subject: [PATCH 009/139] xen-pciback: Use pci_is_enabled() instead of is_enabled.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/conf_space_header.c |    4 ++--
+ drivers/xen/pciback/pciback_ops.c       |    1 -
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
+index f794e12..5a9e028 100644
+--- a/drivers/xen/pciback/conf_space_header.c
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -22,14 +22,14 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
+ {
+ 	int err;
+ 
+-	if (!dev->is_enabled && is_enable_cmd(value)) {
++	if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
+ 		if (unlikely(verbose_request))
+ 			printk(KERN_DEBUG "pciback: %s: enable\n",
+ 			       pci_name(dev));
+ 		err = pci_enable_device(dev);
+ 		if (err)
+ 			return err;
+-	} else if (dev->is_enabled && !is_enable_cmd(value)) {
++	} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
+ 		if (unlikely(verbose_request))
+ 			printk(KERN_DEBUG "pciback: %s: disable\n",
+ 			       pci_name(dev));
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index 58d09eb..2d570e7 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -26,7 +26,6 @@ void pciback_reset_device(struct pci_dev *dev)
+ 
+ 		pci_write_config_word(dev, PCI_COMMAND, 0);
+ 
+-		dev->is_enabled = 0;
+ 		dev->is_busmaster = 0;
+ 	} else {
+ 		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+-- 
+1.7.3.4
+
+
+From 0d379d03a3284e4b4d890b7e1b8163d485cc72e6 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:25 -0400
+Subject: [PATCH 010/139] xen-pciback: Fix usage of INIT_WORK.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pciback.h     |    4 ++--
+ drivers/xen/pciback/pciback_ops.c |    7 ++++---
+ drivers/xen/pciback/xenbus.c      |    3 ++-
+ 3 files changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+index 6744f45..4fb8c05 100644
+--- a/drivers/xen/pciback/pciback.h
++++ b/drivers/xen/pciback/pciback.h
+@@ -99,8 +99,8 @@ int pciback_publish_pci_roots(struct pciback_device *pdev,
+ void pciback_release_devices(struct pciback_device *pdev);
+ 
+ /* Handles events from front-end */
+-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
+-void pciback_do_op(void *data);
++irqreturn_t pciback_handle_event(int irq, void *dev_id);
++void pciback_do_op(struct work_struct *data);
+ 
+ int pciback_xenbus_register(void);
+ void pciback_xenbus_unregister(void);
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index 2d570e7..6624faf 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -66,9 +66,10 @@ void test_and_schedule_op(struct pciback_device *pdev)
+  * context because some of the pci_* functions can sleep (mostly due to ACPI
+  * use of semaphores). This function is intended to be called from a work
+  * queue in process context taking a struct pciback_device as a parameter */
+-void pciback_do_op(void *data)
++
++void pciback_do_op(struct work_struct *data)
+ {
+-	struct pciback_device *pdev = data;
++	struct pciback_device *pdev = container_of(data, struct pciback_device, op_work);
+ 	struct pci_dev *dev;
+ 	struct xen_pci_op *op = &pdev->sh_info->op;
+ 
+@@ -123,7 +124,7 @@ void pciback_do_op(void *data)
+ 	test_and_schedule_op(pdev);
+ }
+ 
+-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t pciback_handle_event(int irq, void *dev_id)
+ {
+ 	struct pciback_device *pdev = dev_id;
+ 
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index bbca3fe..bd52289 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -9,6 +9,7 @@
+ #include <linux/vmalloc.h>
+ #include <xen/xenbus.h>
+ #include <xen/events.h>
++#include <linux/workqueue.h>
+ #include "pciback.h"
+ 
+ #define INVALID_EVTCHN_IRQ  (-1)
+@@ -33,7 +34,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
+ 	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
+ 	pdev->be_watching = 0;
+ 
+-	INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
++	INIT_WORK(&pdev->op_work, pciback_do_op);
+ 
+ 	if (pciback_init_devices(pdev)) {
+ 		kfree(pdev);
+-- 
+1.7.3.4
+
+
+From 57f6c49d0f428f96cca49147d68b0bb6156613a6 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:26 -0400
+Subject: [PATCH 011/139] xen-pciback: Update the calling mechanism for xenbus_[map|unmap]_ring_valloc functions.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pciback.h |    1 -
+ drivers/xen/pciback/xenbus.c  |   18 +++++++++---------
+ 2 files changed, 9 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+index 4fb8c05..5e8e14e 100644
+--- a/drivers/xen/pciback/pciback.h
++++ b/drivers/xen/pciback/pciback.h
+@@ -36,7 +36,6 @@ struct pciback_device {
+ 
+ 	int evtchn_irq;
+ 
+-	struct vm_struct *sh_area;
+ 	struct xen_pci_sharedinfo *sh_info;
+ 
+ 	unsigned long flags;
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index bd52289..5be1350 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -7,6 +7,7 @@
+ #include <linux/init.h>
+ #include <linux/list.h>
+ #include <linux/vmalloc.h>
++#include <linux/workqueue.h>
+ #include <xen/xenbus.h>
+ #include <xen/events.h>
+ #include <linux/workqueue.h>
+@@ -29,7 +30,6 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
+ 
+ 	spin_lock_init(&pdev->dev_lock);
+ 
+-	pdev->sh_area = NULL;
+ 	pdev->sh_info = NULL;
+ 	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
+ 	pdev->be_watching = 0;
+@@ -59,7 +59,7 @@ static void pciback_disconnect(struct pciback_device *pdev)
+ 	flush_workqueue(pciback_wq);
+ 
+ 	if (pdev->sh_info != NULL) {
+-		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
++		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
+ 		pdev->sh_info = NULL;
+ 	}
+ 
+@@ -85,23 +85,23 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
+ 			     int remote_evtchn)
+ {
+ 	int err = 0;
+-	struct vm_struct *area;
++	void *vaddr;
+ 
+ 	dev_dbg(&pdev->xdev->dev,
+ 		"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
+ 		gnt_ref, remote_evtchn);
+ 
+-	area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
+-	if (IS_ERR(area)) {
+-		err = PTR_ERR(area);
++	err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
++	if (err < 0) {
++		xenbus_dev_fatal(pdev->xdev, err,
++				"Error mapping other domain page in ours.");
+ 		goto out;
+ 	}
+-	pdev->sh_area = area;
+-	pdev->sh_info = area->addr;
++	pdev->sh_info = vaddr;
+ 
+ 	err = bind_interdomain_evtchn_to_irqhandler(
+ 		pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
+-		SA_SAMPLE_RANDOM, "pciback", pdev);
++		0, "pciback", pdev);
+ 	if (err < 0) {
+ 		xenbus_dev_fatal(pdev->xdev, err,
+ 				 "Error binding event channel to IRQ");
+-- 
+1.7.3.4
+
+
+From 6e86fcb926e41fb55f512972603e5aaf77e2efb8 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:30 -0400
+Subject: [PATCH 012/139] xen-pciback: Add check to load only under priviliged domain.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pci_stub.c |    5 +++++
+ drivers/xen/pciback/xenbus.c   |    2 --
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index c02f21f..d97dac5 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -14,6 +14,8 @@
+ #include <linux/wait.h>
+ #include <asm/atomic.h>
+ #include <xen/events.h>
++#include <asm/xen/pci.h>
++#include <asm/xen/hypervisor.h>
+ #include "pciback.h"
+ #include "conf_space.h"
+ #include "conf_space_quirks.h"
+@@ -1286,6 +1288,9 @@ static int __init pciback_init(void)
+ {
+ 	int err;
+ 
++	if (!xen_initial_domain())
++		return -ENODEV;
++
+ 	err = pciback_config_init();
+ 	if (err)
+ 		return err;
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index 5be1350..a85c413 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -693,8 +693,6 @@ static struct xenbus_driver xenbus_pciback_driver = {
+ 
+ int __init pciback_xenbus_register(void)
+ {
+-	if (!is_running_on_xen())
+-		return -ENODEV;
+ 	pciback_wq = create_workqueue("pciback_workqueue");
+ 	if (!pciback_wq) {
+ 		printk(KERN_ERR "pciback_xenbus_register: create"
+-- 
+1.7.3.4
+
+
+From c1139f912c1336538e51966d56e5905954052cba Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:31 -0400
+Subject: [PATCH 013/139] xen-pciback: Remove usage of pci_restore_bars() as Linux handles the power-up states correctly now.
+
+Originally this code was pulled from the upstream kernel, and stuck
+in the linux-2.6-sparse tree. At that point of time, the Linux tree (2.6.16?)
+did not know how to handle this. Nowadays the pci_set_power_state routine
+handles this case so we do not need this anymore.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/conf_space_capability_pm.c |   13 -------------
+ 1 files changed, 0 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
+index e2f99c7..e1d3af4 100644
+--- a/drivers/xen/pciback/conf_space_capability_pm.c
++++ b/drivers/xen/pciback/conf_space_capability_pm.c
+@@ -58,19 +58,6 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
+ 		goto out;
+ 	}
+ 
+-	/*
+-	 * Device may lose PCI config info on D3->D0 transition. This
+-	 * is a problem for some guests which will not reset BARs. Even
+-	 * those that have a go will be foiled by our BAR-write handler
+-	 * which will discard the write! Since Linux won't re-init
+-	 * the config space automatically in all cases, we do it here.
+-	 * Future: Should we re-initialise all first 64 bytes of config space?
+-	 */
+-	if (new_state == PCI_D0 &&
+-	    (old_state == PCI_D3hot || old_state == PCI_D3cold) &&
+-	    !(old_value & PCI_PM_CTRL_NO_SOFT_RESET))
+-		pci_restore_bars(dev);
+-
+  out:
+ 	return err;
+ }
+-- 
+1.7.3.4
+
+
+From 721657d92623cfcf2f6f68c14abf97eb40fa6b20 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:32 -0400
+Subject: [PATCH 014/139] xen-pciback: Enable Xen-PCI-back to be compiled.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/Kconfig  |   65 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ drivers/xen/Makefile |    1 +
+ 2 files changed, 66 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
+index 6e6180c..d874453 100644
+--- a/drivers/xen/Kconfig
++++ b/drivers/xen/Kconfig
+@@ -29,6 +29,71 @@ config XEN_DEV_EVTCHN
+ 	  Support for backend device drivers that provide I/O services
+ 	  to other virtual machines.
+ 
++config XEN_PCIDEV_BACKEND
++	tristate "PCI-device backend driver"
++	depends on PCI
++	depends on XEN_BACKEND
++	help
++	  The PCI device backend driver allows the kernel to export arbitrary
++	  PCI devices to other guests. If you select this to be a module, you
++	  will need to make sure no other driver has bound to the device(s)
++	  you want to make visible to other guests.
++
++choice
++	prompt "PCI Backend Mode"
++	depends on XEN_PCIDEV_BACKEND
++	default XEN_PCIDEV_BACKEND_VPCI if !IA64
++	default XEN_PCIDEV_BACKEND_CONTROLLER if IA64
++
++config XEN_PCIDEV_BACKEND_VPCI
++	bool "Virtual PCI"
++	---help---
++	  This PCI Backend hides the true PCI topology and makes the frontend
++	  think there is a single PCI bus with only the exported devices on it.
++	  For example, a device at 03:05.0 will be re-assigned to 00:00.0. A
++	  second device at 02:1a.1 will be re-assigned to 00:01.1.
++
++config XEN_PCIDEV_BACKEND_PASS
++	bool "Passthrough"
++	---help---
++	  This PCI Backend provides a real view of the PCI topology to the
++	  frontend (for example, a device at 06:01.b will still appear at
++	  06:01.b to the frontend). This is similar to how Xen 2.0.x exposed
++	  PCI devices to its driver domains. This may be required for drivers
++	  which depend on finding their hardward in certain bus/slot
++	  locations.
++
++config XEN_PCIDEV_BACKEND_SLOT
++	bool "Slot"
++	---help---
++	  This PCI Backend hides the true PCI topology and makes the frontend
++	  think there is a single PCI bus with only the exported devices on it.
++	  Contrary to the virtual PCI backend, a function becomes a new slot.
++	  For example, a device at 03:05.2 will be re-assigned to 00:00.0. A
++	  second device at 02:1a.1 will be re-assigned to 00:01.0.
++
++config XEN_PCIDEV_BACKEND_CONTROLLER
++	bool "Controller"
++	depends on IA64
++	---help---
++	  This PCI backend virtualizes the PCI bus topology by providing a
++	  virtual bus per PCI root device.  Devices which are physically under
++	  the same root bus will appear on the same virtual bus.  For systems
++	  with complex I/O addressing, this is the only backend which supports
++	  extended I/O port spaces and MMIO translation offsets.  This backend
++	  also supports slot virtualization.  For example, a device at
++	  0000:01:02.1 will be re-assigned to 0000:00:00.0.  A second device
++	  at 0000:02:05.0 (behind a P2P bridge on bus 0000:01) will be
++	  re-assigned to 0000:00:01.0.  A third device at 0000:16:05.0 (under
++	  a different PCI root bus) will be re-assigned to 0000:01:00.0.
++
++endchoice
++
++config XEN_PCIDEV_BE_DEBUG
++	bool "PCI Backend Debugging"
++	depends on XEN_PCIDEV_BACKEND
++
++
+ config XENFS
+ 	tristate "Xen filesystem"
+ 	default y
+diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
+index eb8a78d..3737dee 100644
+--- a/drivers/xen/Makefile
++++ b/drivers/xen/Makefile
+@@ -9,6 +9,7 @@ obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
+ obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
+ obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
+ obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
++obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
+ obj-$(CONFIG_XENFS)		+= xenfs/
+ obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
+ obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
+-- 
+1.7.3.4
+
+
+From c164cd8577017d1c4e001b475fadddc7d2ff5c78 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 5 Nov 2009 15:25:43 -0500
+Subject: [PATCH 015/139] xen-pciback: Return the physical IRQ number instead of the allocated IRQ number to pcifront.
+
+The allocation of IRQ numbers in Linux privileged domains is based
+on finding the first unbound IRQ number. After the allocation is done
+a HYPERCALL to Xen is done, which allocates a PIRQ globally.
+That PIRQ->IRQ binding is saved in data structures that are used
+during ISR executions.
+
+Before this patch, for non-privileged domains we would return the local
+IRQ number instead of the PIRQ. The non-privileged domains require the
+PIRQ so that they can attach the their own interrupt handler to it.
+Fortunatly there is a function, 'xen_gsi_from_irq' that returns
+that global IRQ number.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/conf_space_capability_msi.c |   12 ++++++++----
+ 1 files changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+index 762e396..7fb5371 100644
+--- a/drivers/xen/pciback/conf_space_capability_msi.c
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -6,6 +6,7 @@
+ #include "conf_space.h"
+ #include "conf_space_capability.h"
+ #include <xen/interface/io/pciif.h>
++#include <xen/events.h>
+ #include "pciback.h"
+ 
+ int pciback_enable_msi(struct pciback_device *pdev,
+@@ -22,7 +23,9 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ 		return XEN_PCI_ERR_op_failed;
+ 	}
+ 
+-	op->value = dev->irq;
++	/* The value the guest needs is actually the IDT vector, not the
++	 * the local domain's IRQ number. */
++	op->value = xen_gsi_from_irq(dev->irq);
+ 	return 0;
+ }
+ 
+@@ -31,7 +34,7 @@ int pciback_disable_msi(struct pciback_device *pdev,
+ {
+ 	pci_disable_msi(dev);
+ 
+-	op->value = dev->irq;
++	op->value = xen_gsi_from_irq(dev->irq);
+ 	return 0;
+ }
+ 
+@@ -57,7 +60,8 @@ int pciback_enable_msix(struct pciback_device *pdev,
+ 
+ 	for (i = 0; i < op->value; i++) {
+ 		op->msix_entries[i].entry = entries[i].entry;
+-		op->msix_entries[i].vector = entries[i].vector;
++		op->msix_entries[i].vector =
++					xen_gsi_from_irq(entries[i].vector);
+ 	}
+ 
+ 	kfree(entries);
+@@ -73,7 +77,7 @@ int pciback_disable_msix(struct pciback_device *pdev,
+ 
+ 	pci_disable_msix(dev);
+ 
+-	op->value = dev->irq;
++	op->value = xen_gsi_from_irq(dev->irq);
+ 	return 0;
+ }
+ 
+-- 
+1.7.3.4
+
+
+From b0b035f1de3282aa96a6dc28007d513e8fce793d Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 5 Nov 2009 15:25:44 -0500
+Subject: [PATCH 016/139] xen-pciback: Fix checkpatch warnings and errors for pciback/ directory.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/conf_space.c                |   34 +++---
+ drivers/xen/pciback/conf_space.h                |   30 ++--
+ drivers/xen/pciback/conf_space_capability.c     |    5 +-
+ drivers/xen/pciback/conf_space_capability.h     |    3 +
+ drivers/xen/pciback/conf_space_capability_msi.c |    3 +-
+ drivers/xen/pciback/conf_space_capability_pm.c  |    4 +-
+ drivers/xen/pciback/conf_space_capability_vpd.c |    2 +-
+ drivers/xen/pciback/conf_space_header.c         |    7 +-
+ drivers/xen/pciback/conf_space_quirks.c         |   16 ++-
+ drivers/xen/pciback/controller.c                |   15 +-
+ drivers/xen/pciback/passthrough.c               |    6 +-
+ drivers/xen/pciback/pci_stub.c                  |  165 +++++++++++------------
+ drivers/xen/pciback/pciback.h                   |   28 +++--
+ drivers/xen/pciback/pciback_ops.c               |   74 +++++------
+ drivers/xen/pciback/slot.c                      |   22 ++--
+ drivers/xen/pciback/vpci.c                      |   28 ++--
+ drivers/xen/pciback/xenbus.c                    |   42 +++---
+ 17 files changed, 245 insertions(+), 239 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
+index 0c76db1..370c18e 100644
+--- a/drivers/xen/pciback/conf_space.c
++++ b/drivers/xen/pciback/conf_space.c
+@@ -18,11 +18,11 @@
+ static int permissive;
+ module_param(permissive, bool, 0644);
+ 
+-#define DEFINE_PCI_CONFIG(op,size,type) 			\
++#define DEFINE_PCI_CONFIG(op, size, type) 			\
+ int pciback_##op##_config_##size 				\
+ (struct pci_dev *dev, int offset, type value, void *data)	\
+ {								\
+-	return pci_##op##_config_##size (dev, offset, value);	\
++	return pci_##op##_config_##size(dev, offset, value);	\
+ }
+ 
+ DEFINE_PCI_CONFIG(read, byte, u8 *)
+@@ -139,14 +139,15 @@ static int pcibios_err_to_errno(int err)
+ }
+ 
+ int pciback_config_read(struct pci_dev *dev, int offset, int size,
+-			u32 * ret_val)
++			u32 *ret_val)
+ {
+ 	int err = 0;
+ 	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+ 	const struct config_field_entry *cfg_entry;
+ 	const struct config_field *field;
+ 	int req_start, req_end, field_start, field_end;
+-	/* if read fails for any reason, return 0 (as if device didn't respond) */
++	/* if read fails for any reason, return 0
++	 * (as if device didn't respond) */
+ 	u32 value = 0, tmp_val;
+ 
+ 	if (unlikely(verbose_request))
+@@ -161,10 +162,10 @@ int pciback_config_read(struct pci_dev *dev, int offset, int size,
+ 	/* Get the real value first, then modify as appropriate */
+ 	switch (size) {
+ 	case 1:
+-		err = pci_read_config_byte(dev, offset, (u8 *) & value);
++		err = pci_read_config_byte(dev, offset, (u8 *) &value);
+ 		break;
+ 	case 2:
+-		err = pci_read_config_word(dev, offset, (u16 *) & value);
++		err = pci_read_config_word(dev, offset, (u16 *) &value);
+ 		break;
+ 	case 4:
+ 		err = pci_read_config_dword(dev, offset, &value);
+@@ -192,7 +193,7 @@ int pciback_config_read(struct pci_dev *dev, int offset, int size,
+ 		}
+ 	}
+ 
+-      out:
++out:
+ 	if (unlikely(verbose_request))
+ 		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
+ 		       pci_name(dev), size, offset, value);
+@@ -276,8 +277,8 @@ int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
+ 		} else if (!dev_data->warned_on_write) {
+ 			dev_data->warned_on_write = 1;
+ 			dev_warn(&dev->dev, "Driver tried to write to a "
+-				 "read-only configuration space field at offset "
+-				 "0x%x, size %d. This may be harmless, but if "
++				 "read-only configuration space field at offset"
++				 " 0x%x, size %d. This may be harmless, but if "
+ 				 "you have problems with your device:\n"
+ 				 "1) see permissive attribute in sysfs\n"
+ 				 "2) report problems to the xen-devel "
+@@ -295,8 +296,8 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev)
+ 	struct config_field_entry *cfg_entry, *t;
+ 	const struct config_field *field;
+ 
+-	dev_dbg(&dev->dev,
+-		"free-ing dynamically allocated virtual configuration space fields\n");
++	dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
++			   "configuration space fields\n");
+ 	if (!dev_data)
+ 		return;
+ 
+@@ -306,8 +307,7 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev)
+ 		if (field->clean) {
+ 			field->clean((struct config_field *)field);
+ 
+-			if (cfg_entry->data)
+-				kfree(cfg_entry->data);
++			kfree(cfg_entry->data);
+ 
+ 			list_del(&cfg_entry->list);
+ 			kfree(cfg_entry);
+@@ -376,7 +376,7 @@ int pciback_config_add_field_offset(struct pci_dev *dev,
+ 	cfg_entry->base_offset = base_offset;
+ 
+ 	/* silently ignore duplicate fields */
+-	err = pciback_field_is_dup(dev,OFFSET(cfg_entry));
++	err = pciback_field_is_dup(dev, OFFSET(cfg_entry));
+ 	if (err)
+ 		goto out;
+ 
+@@ -395,14 +395,14 @@ int pciback_config_add_field_offset(struct pci_dev *dev,
+ 		OFFSET(cfg_entry));
+ 	list_add_tail(&cfg_entry->list, &dev_data->config_fields);
+ 
+-      out:
++out:
+ 	if (err)
+ 		kfree(cfg_entry);
+ 
+ 	return err;
+ }
+ 
+-/* This sets up the device's virtual configuration space to keep track of 
++/* This sets up the device's virtual configuration space to keep track of
+  * certain registers (like the base address registers (BARs) so that we can
+  * keep the client from manipulating them directly.
+  */
+@@ -425,7 +425,7 @@ int pciback_config_init_dev(struct pci_dev *dev)
+ 
+ 	err = pciback_config_quirks_init(dev);
+ 
+-      out:
++out:
+ 	return err;
+ }
+ 
+diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h
+index fe746ef..50ebef2 100644
+--- a/drivers/xen/pciback/conf_space.h
++++ b/drivers/xen/pciback/conf_space.h
+@@ -11,21 +11,21 @@
+ #include <linux/err.h>
+ 
+ /* conf_field_init can return an errno in a ptr with ERR_PTR() */
+-typedef void *(*conf_field_init) (struct pci_dev * dev, int offset);
+-typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data);
+-typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data);
++typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
++typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
++typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
+ 
+-typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value,
++typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
+ 				 void *data);
+-typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value,
++typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
+ 				void *data);
+-typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value,
++typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
+ 				void *data);
+-typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value,
++typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
+ 				void *data);
+-typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value,
++typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
+ 			       void *data);
+-typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value,
++typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
+ 			       void *data);
+ 
+ /* These are the fields within the configuration space which we
+@@ -39,7 +39,7 @@ struct config_field {
+ 	conf_field_init init;
+ 	conf_field_reset reset;
+ 	conf_field_free release;
+-	void (*clean) (struct config_field * field);
++	void (*clean) (struct config_field *field);
+ 	union {
+ 		struct {
+ 			conf_dword_write write;
+@@ -92,8 +92,8 @@ static inline int pciback_config_add_fields(struct pci_dev *dev,
+ }
+ 
+ static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
+-						   const struct config_field *field,
+-						   unsigned int offset)
++					const struct config_field *field,
++					unsigned int offset)
+ {
+ 	int i, err = 0;
+ 	for (i = 0; field[i].size != 0; i++) {
+@@ -105,11 +105,11 @@ static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
+ }
+ 
+ /* Read/Write the real configuration space */
+-int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value,
++int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
+ 			     void *data);
+-int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value,
++int pciback_read_config_word(struct pci_dev *dev, int offset, u16 *value,
+ 			     void *data);
+-int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value,
++int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
+ 			      void *data);
+ int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
+ 			      void *data);
+diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c
+index 50efca4..0ea84d6 100644
+--- a/drivers/xen/pciback/conf_space_capability.c
++++ b/drivers/xen/pciback/conf_space_capability.c
+@@ -53,13 +53,10 @@ int pciback_config_capability_add_fields(struct pci_dev *dev)
+ 		}
+ 	}
+ 
+-      out:
++out:
+ 	return err;
+ }
+ 
+-extern struct pciback_config_capability pciback_config_capability_vpd;
+-extern struct pciback_config_capability pciback_config_capability_pm;
+-
+ int pciback_config_capability_init(void)
+ {
+ 	register_capability(&pciback_config_capability_vpd);
+diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h
+index 823392e..8da3ac4 100644
+--- a/drivers/xen/pciback/conf_space_capability.h
++++ b/drivers/xen/pciback/conf_space_capability.h
+@@ -20,4 +20,7 @@ struct pciback_config_capability {
+ 	const struct config_field *fields;
+ };
+ 
++extern struct pciback_config_capability pciback_config_capability_vpd;
++extern struct pciback_config_capability pciback_config_capability_pm;
++
+ #endif
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+index 7fb5371..b70ea8b 100644
+--- a/drivers/xen/pciback/conf_space_capability_msi.c
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -18,7 +18,8 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ 	status = pci_enable_msi(dev);
+ 
+ 	if (status) {
+-		printk("error enable msi for guest %x status %x\n", otherend, status);
++		printk(KERN_ERR "error enable msi for guest %x status %x\n",
++			otherend, status);
+ 		op->value = 0;
+ 		return XEN_PCI_ERR_op_failed;
+ 	}
+diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
+index e1d3af4..0442616 100644
+--- a/drivers/xen/pciback/conf_space_capability_pm.c
++++ b/drivers/xen/pciback/conf_space_capability_pm.c
+@@ -20,7 +20,7 @@ static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
+ 
+ 	*value = real_value & ~PCI_PM_CAP_PME_MASK;
+ 
+-      out:
++out:
+ 	return err;
+ }
+ 
+@@ -77,7 +77,7 @@ static void *pm_ctrl_init(struct pci_dev *dev, int offset)
+ 		err = pci_write_config_word(dev, offset, value);
+ 	}
+ 
+-      out:
++out:
+ 	return ERR_PTR(err);
+ }
+ 
+diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c
+index 920cb4a..e7b4d66 100644
+--- a/drivers/xen/pciback/conf_space_capability_vpd.c
++++ b/drivers/xen/pciback/conf_space_capability_vpd.c
+@@ -33,7 +33,7 @@ static const struct config_field caplist_vpd[] = {
+ 	 },
+ 	{}
+ };
+- 
++
+ struct pciback_config_capability pciback_config_capability_vpd = {
+ 	.capability = PCI_CAP_ID_VPD,
+ 	.fields = caplist_vpd,
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
+index 5a9e028..3ae7da1 100644
+--- a/drivers/xen/pciback/conf_space_header.c
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -51,7 +51,8 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
+ 		err = pci_set_mwi(dev);
+ 		if (err) {
+ 			printk(KERN_WARNING
+-			       "pciback: %s: cannot enable memory-write-invalidate (%d)\n",
++			       "pciback: %s: cannot enable "
++			       "memory-write-invalidate (%d)\n",
+ 			       pci_name(dev), err);
+ 			value &= ~PCI_COMMAND_INVALIDATE;
+ 		}
+@@ -206,7 +207,7 @@ static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
+ 	    || value == PCI_BIST_START)
+ 		err = pci_write_config_byte(dev, offset, value);
+ 
+-      out:
++out:
+ 	return err;
+ }
+ 
+@@ -312,6 +313,6 @@ int pciback_config_header_add_fields(struct pci_dev *dev)
+ 		break;
+ 	}
+ 
+-      out:
++out:
+ 	return err;
+ }
+diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c
+index 244a438..45c31fb 100644
+--- a/drivers/xen/pciback/conf_space_quirks.c
++++ b/drivers/xen/pciback/conf_space_quirks.c
+@@ -18,8 +18,10 @@ match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
+ {
+ 	if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
+ 	    (id->device == PCI_ANY_ID || id->device == dev->device) &&
+-	    (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
+-	    (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
++	    (id->subvendor == PCI_ANY_ID ||
++				id->subvendor == dev->subsystem_vendor) &&
++	    (id->subdevice == PCI_ANY_ID ||
++				id->subdevice == dev->subsystem_device) &&
+ 	    !((id->class ^ dev->class) & id->class_mask))
+ 		return id;
+ 	return NULL;
+@@ -35,7 +37,7 @@ struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
+ 	tmp_quirk = NULL;
+ 	printk(KERN_DEBUG
+ 	       "quirk didn't match any device pciback knows about\n");
+-      out:
++out:
+ 	return tmp_quirk;
+ }
+ 
+@@ -51,7 +53,7 @@ int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
+ 	struct config_field_entry *cfg_entry;
+ 
+ 	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+-		if ( OFFSET(cfg_entry) == reg) {
++		if (OFFSET(cfg_entry) == reg) {
+ 			ret = 1;
+ 			break;
+ 		}
+@@ -84,7 +86,7 @@ int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
+ 
+ 	pciback_config_add_field(dev, field);
+ 
+-      out:
++out:
+ 	return err;
+ }
+ 
+@@ -110,7 +112,7 @@ int pciback_config_quirks_init(struct pci_dev *dev)
+ 	quirk->pdev = dev;
+ 
+ 	register_quirk(quirk);
+-      out:
++out:
+ 	return ret;
+ }
+ 
+@@ -133,6 +135,6 @@ int pciback_config_quirk_release(struct pci_dev *dev)
+ 	list_del(&quirk->quirks_list);
+ 	kfree(quirk);
+ 
+-      out:
++out:
+ 	return ret;
+ }
+diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
+index 294e48f..7f04f11 100644
+--- a/drivers/xen/pciback/controller.c
++++ b/drivers/xen/pciback/controller.c
+@@ -259,7 +259,7 @@ static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
+ 	    !(addr.resource_type == ACPI_IO_RANGE &&
+ 	      addr.info.io.translation))
+ 		return AE_OK;
+-	   
++
+ 	/* Store the resource in xenbus for the guest */
+ 	len = snprintf(str, sizeof(str), "root-%d-resource-%d",
+ 		       info->root_num, info->resource_count);
+@@ -314,7 +314,7 @@ int pciback_publish_pci_roots(struct pciback_device *pdev,
+ 			goto out;
+ 
+ 		/*
+- 		 * Now figure out which root-%d this belongs to
++		 * Now figure out which root-%d this belongs to
+ 		 * so we can associate resources with it.
+ 		 */
+ 		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
+@@ -407,8 +407,8 @@ void pciback_release_devices(struct pciback_device *pdev)
+ 	pdev->pci_dev_data = NULL;
+ }
+ 
+-int pciback_get_pcifront_dev(struct pci_dev *pcidev, 
+-		struct pciback_device *pdev, 
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++		struct pciback_device *pdev,
+ 		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
+ {
+ 	struct controller_dev_data *dev_data = pdev->pci_dev_data;
+@@ -420,13 +420,12 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev,
+ 
+ 	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
+ 		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
+-			if ( (dev_entry->dev->bus->number == 
++			if ((dev_entry->dev->bus->number ==
+ 					pcidev->bus->number) &&
+-			  	(dev_entry->dev->devfn ==
++				(dev_entry->dev->devfn ==
+ 					pcidev->devfn) &&
+ 				(pci_domain_nr(dev_entry->dev->bus) ==
+-					pci_domain_nr(pcidev->bus)))
+-			{
++					pci_domain_nr(pcidev->bus))) {
+ 				found = 1;
+ 				*domain = cntrl_entry->domain;
+ 				*bus = cntrl_entry->bus;
+diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c
+index 9e7a0c4..5386bebf 100644
+--- a/drivers/xen/pciback/passthrough.c
++++ b/drivers/xen/pciback/passthrough.c
+@@ -165,8 +165,10 @@ void pciback_release_devices(struct pciback_device *pdev)
+ 	pdev->pci_dev_data = NULL;
+ }
+ 
+-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
+-		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++			     struct pciback_device *pdev,
++			     unsigned int *domain, unsigned int *bus,
++			     unsigned int *devfn)
+ 
+ {
+ 	*domain = pci_domain_nr(pcidev->bus);
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index d97dac5..28222ee 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -20,7 +20,7 @@
+ #include "conf_space.h"
+ #include "conf_space_quirks.h"
+ 
+-static char *pci_devs_to_hide = NULL;
++static char *pci_devs_to_hide;
+ wait_queue_head_t aer_wait_queue;
+ /*Add sem for sync AER handling and pciback remove/reconfigue ops,
+ * We want to avoid in middle of AER ops, pciback devices is being removed
+@@ -43,7 +43,7 @@ struct pcistub_device {
+ 	spinlock_t lock;
+ 
+ 	struct pci_dev *dev;
+-	struct pciback_device *pdev;	/* non-NULL if struct pci_dev is in use */
++	struct pciback_device *pdev;/* non-NULL if struct pci_dev is in use */
+ };
+ 
+ /* Access to pcistub_devices & seized_devices lists and the initialize_devices
+@@ -55,7 +55,7 @@ static LIST_HEAD(pcistub_devices);
+ /* wait for device_initcall before initializing our devices
+  * (see pcistub_init_devices_late)
+  */
+-static int initialize_devices = 0;
++static int initialize_devices;
+ static LIST_HEAD(seized_devices);
+ 
+ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
+@@ -132,7 +132,7 @@ static struct pcistub_device *pcistub_device_find(int domain, int bus,
+ 	/* didn't find it */
+ 	psdev = NULL;
+ 
+-      out:
++out:
+ 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ 	return psdev;
+ }
+@@ -321,10 +321,10 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)
+ 
+ 	return 0;
+ 
+-      config_release:
++config_release:
+ 	pciback_config_free_dev(dev);
+ 
+-      out:
++out:
+ 	pci_set_drvdata(dev, NULL);
+ 	kfree(dev_data);
+ 	return err;
+@@ -443,7 +443,7 @@ static int __devinit pcistub_probe(struct pci_dev *dev,
+ 		/* Didn't find the device */
+ 		err = -ENODEV;
+ 
+-      out:
++out:
+ 	return err;
+ }
+ 
+@@ -511,26 +511,24 @@ static void kill_domain_by_device(struct pcistub_device *psdev)
+ 	int err;
+ 	char nodename[1024];
+ 
+-	if (!psdev) 
++	if (!psdev)
+ 		dev_err(&psdev->dev->dev,
+ 			"device is NULL when do AER recovery/kill_domain\n");
+-	sprintf(nodename, "/local/domain/0/backend/pci/%d/0", 
++	sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
+ 		psdev->pdev->xdev->otherend_id);
+ 	nodename[strlen(nodename)] = '\0';
+ 
+ again:
+ 	err = xenbus_transaction_start(&xbt);
+-	if (err)
+-	{
++	if (err) {
+ 		dev_err(&psdev->dev->dev,
+ 			"error %d when start xenbus transaction\n", err);
+ 		return;
+ 	}
+ 	/*PV AER handlers will set this flag*/
+-	xenbus_printf(xbt, nodename, "aerState" , "aerfail" );
++	xenbus_printf(xbt, nodename, "aerState" , "aerfail");
+ 	err = xenbus_transaction_end(xbt, 0);
+-	if (err)
+-	{
++	if (err) {
+ 		if (err == -EAGAIN)
+ 			goto again;
+ 		dev_err(&psdev->dev->dev,
+@@ -541,9 +539,9 @@ again:
+ 
+ /* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
+  * backend need to have cooperation. In pciback, those steps will do similar
+- * jobs: send service request and waiting for front_end response. 
++ * jobs: send service request and waiting for front_end response.
+ */
+-static pci_ers_result_t common_process(struct pcistub_device *psdev, 
++static pci_ers_result_t common_process(struct pcistub_device *psdev,
+ 		pci_channel_state_t state, int aer_cmd, pci_ers_result_t result)
+ {
+ 	pci_ers_result_t res = result;
+@@ -561,12 +559,12 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
+ 	if (!ret) {
+ 		dev_err(&psdev->dev->dev,
+ 			"pciback: failed to get pcifront device\n");
+-		return PCI_ERS_RESULT_NONE; 
++		return PCI_ERS_RESULT_NONE;
+ 	}
+ 	wmb();
+ 
+-	dev_dbg(&psdev->dev->dev, 
+-			"pciback: aer_op %x dom %x bus %x devfn %x\n",  
++	dev_dbg(&psdev->dev->dev,
++			"pciback: aer_op %x dom %x bus %x devfn %x\n",
+ 			aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
+ 	/*local flag to mark there's aer request, pciback callback will use this
+ 	* flag to judge whether we need to check pci-front give aer service
+@@ -575,21 +573,21 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
+ 	set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
+ 
+ 	/*It is possible that a pcifront conf_read_write ops request invokes
+-	* the callback which cause the spurious execution of wake_up. 
++	* the callback which cause the spurious execution of wake_up.
+ 	* Yet it is harmless and better than a spinlock here
+ 	*/
+-	set_bit(_XEN_PCIB_active, 
++	set_bit(_XEN_PCIB_active,
+ 		(unsigned long *)&psdev->pdev->sh_info->flags);
+ 	wmb();
+ 	notify_remote_via_irq(psdev->pdev->evtchn_irq);
+ 
+ 	ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active,
+-                (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
++		(unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
+ 
+ 	if (!ret) {
+-		if (test_bit(_XEN_PCIB_active, 
++		if (test_bit(_XEN_PCIB_active,
+ 			(unsigned long *)&psdev->pdev->sh_info->flags)) {
+-			dev_err(&psdev->dev->dev, 
++			dev_err(&psdev->dev->dev,
+ 				"pcifront aer process not responding!\n");
+ 			clear_bit(_XEN_PCIB_active,
+ 			  (unsigned long *)&psdev->pdev->sh_info->flags);
+@@ -599,16 +597,16 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
+ 	}
+ 	clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
+ 
+-	if ( test_bit( _XEN_PCIF_active,
+-		(unsigned long*)&psdev->pdev->sh_info->flags)) {
+-		dev_dbg(&psdev->dev->dev, 
++	if (test_bit(_XEN_PCIF_active,
++		(unsigned long *)&psdev->pdev->sh_info->flags)) {
++		dev_dbg(&psdev->dev->dev,
+ 			"schedule pci_conf service in pciback \n");
+ 		test_and_schedule_op(psdev->pdev);
+ 	}
+ 
+ 	res = (pci_ers_result_t)aer_op->err;
+ 	return res;
+-} 
++}
+ 
+ /*
+ * pciback_slot_reset: it will send the slot_reset request to  pcifront in case
+@@ -632,24 +630,22 @@ static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
+ 				PCI_SLOT(dev->devfn),
+ 				PCI_FUNC(dev->devfn));
+ 
+-	if ( !psdev || !psdev->pdev )
+-	{
+-		dev_err(&dev->dev, 
++	if (!psdev || !psdev->pdev) {
++		dev_err(&dev->dev,
+ 			"pciback device is not found/assigned\n");
+ 		goto end;
+ 	}
+ 
+-	if ( !psdev->pdev->sh_info )
+-	{
++	if (!psdev->pdev->sh_info) {
+ 		dev_err(&dev->dev, "pciback device is not connected or owned"
+ 			" by HVM, kill it\n");
+ 		kill_domain_by_device(psdev);
+ 		goto release;
+ 	}
+ 
+-	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
+-		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
+-		dev_err(&dev->dev, 
++	if (!test_bit(_XEN_PCIB_AERHANDLER,
++		(unsigned long *)&psdev->pdev->sh_info->flags)) {
++		dev_err(&dev->dev,
+ 			"guest with no AER driver should have been killed\n");
+ 		goto release;
+ 	}
+@@ -657,7 +653,7 @@ static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
+ 
+ 	if (result == PCI_ERS_RESULT_NONE ||
+ 		result == PCI_ERS_RESULT_DISCONNECT) {
+-		dev_dbg(&dev->dev, 
++		dev_dbg(&dev->dev,
+ 			"No AER slot_reset service or disconnected!\n");
+ 		kill_domain_by_device(psdev);
+ 	}
+@@ -670,9 +666,9 @@ end:
+ }
+ 
+ 
+-/*pciback_mmio_enabled: it will send the mmio_enabled request to  pcifront 
+-* in case of the device driver could provide this service, and then wait 
+-* for pcifront ack.
++/*pciback_mmio_enabled: it will send the mmio_enabled request to  pcifront
++* in case of the device driver could provide this service, and then wait
++* for pcifront ack
+ * @dev: pointer to PCI devices
+ * return value is used by aer_core do_recovery policy
+ */
+@@ -692,24 +688,22 @@ static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
+ 				PCI_SLOT(dev->devfn),
+ 				PCI_FUNC(dev->devfn));
+ 
+-	if ( !psdev || !psdev->pdev )
+-	{
+-		dev_err(&dev->dev, 
++	if (!psdev || !psdev->pdev) {
++		dev_err(&dev->dev,
+ 			"pciback device is not found/assigned\n");
+ 		goto end;
+ 	}
+ 
+-	if ( !psdev->pdev->sh_info )
+-	{
++	if (!psdev->pdev->sh_info) {
+ 		dev_err(&dev->dev, "pciback device is not connected or owned"
+ 			" by HVM, kill it\n");
+ 		kill_domain_by_device(psdev);
+ 		goto release;
+ 	}
+ 
+-	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
+-		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
+-		dev_err(&dev->dev, 
++	if (!test_bit(_XEN_PCIB_AERHANDLER,
++		(unsigned long *)&psdev->pdev->sh_info->flags)) {
++		dev_err(&dev->dev,
+ 			"guest with no AER driver should have been killed\n");
+ 		goto release;
+ 	}
+@@ -717,7 +711,7 @@ static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
+ 
+ 	if (result == PCI_ERS_RESULT_NONE ||
+ 		result == PCI_ERS_RESULT_DISCONNECT) {
+-		dev_dbg(&dev->dev, 
++		dev_dbg(&dev->dev,
+ 			"No AER mmio_enabled service or disconnected!\n");
+ 		kill_domain_by_device(psdev);
+ 	}
+@@ -728,8 +722,8 @@ end:
+ 	return result;
+ }
+ 
+-/*pciback_error_detected: it will send the error_detected request to  pcifront 
+-* in case of the device driver could provide this service, and then wait 
++/*pciback_error_detected: it will send the error_detected request to  pcifront
++* in case of the device driver could provide this service, and then wait
+ * for pcifront ack.
+ * @dev: pointer to PCI devices
+ * @error: the current PCI connection state
+@@ -752,15 +746,13 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
+ 				PCI_SLOT(dev->devfn),
+ 				PCI_FUNC(dev->devfn));
+ 
+-	if ( !psdev || !psdev->pdev )
+-	{
+-		dev_err(&dev->dev, 
++	if (!psdev || !psdev->pdev) {
++		dev_err(&dev->dev,
+ 			"pciback device is not found/assigned\n");
+ 		goto end;
+ 	}
+ 
+-	if ( !psdev->pdev->sh_info )
+-	{
++	if (!psdev->pdev->sh_info) {
+ 		dev_err(&dev->dev, "pciback device is not connected or owned"
+ 			" by HVM, kill it\n");
+ 		kill_domain_by_device(psdev);
+@@ -768,8 +760,8 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
+ 	}
+ 
+ 	/*Guest owns the device yet no aer handler regiested, kill guest*/
+-	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
+-		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
++	if (!test_bit(_XEN_PCIB_AERHANDLER,
++		(unsigned long *)&psdev->pdev->sh_info->flags)) {
+ 		dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
+ 		kill_domain_by_device(psdev);
+ 		goto release;
+@@ -778,7 +770,7 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
+ 
+ 	if (result == PCI_ERS_RESULT_NONE ||
+ 		result == PCI_ERS_RESULT_DISCONNECT) {
+-		dev_dbg(&dev->dev, 
++		dev_dbg(&dev->dev,
+ 			"No AER error_detected service or disconnected!\n");
+ 		kill_domain_by_device(psdev);
+ 	}
+@@ -789,8 +781,8 @@ end:
+ 	return result;
+ }
+ 
+-/*pciback_error_resume: it will send the error_resume request to  pcifront 
+-* in case of the device driver could provide this service, and then wait 
++/*pciback_error_resume: it will send the error_resume request to  pcifront
++* in case of the device driver could provide this service, and then wait
+ * for pcifront ack.
+ * @dev: pointer to PCI devices
+ */
+@@ -808,29 +800,28 @@ static void pciback_error_resume(struct pci_dev *dev)
+ 				PCI_SLOT(dev->devfn),
+ 				PCI_FUNC(dev->devfn));
+ 
+-	if ( !psdev || !psdev->pdev )
+-	{
+-		dev_err(&dev->dev, 
++	if (!psdev || !psdev->pdev) {
++		dev_err(&dev->dev,
+ 			"pciback device is not found/assigned\n");
+ 		goto end;
+ 	}
+ 
+-	if ( !psdev->pdev->sh_info )
+-	{
++	if (!psdev->pdev->sh_info) {
+ 		dev_err(&dev->dev, "pciback device is not connected or owned"
+ 			" by HVM, kill it\n");
+ 		kill_domain_by_device(psdev);
+ 		goto release;
+ 	}
+ 
+-	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
+-		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
+-		dev_err(&dev->dev, 
++	if (!test_bit(_XEN_PCIB_AERHANDLER,
++		(unsigned long *)&psdev->pdev->sh_info->flags)) {
++		dev_err(&dev->dev,
+ 			"guest with no AER driver should have been killed\n");
+ 		kill_domain_by_device(psdev);
+ 		goto release;
+ 	}
+-	common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED);
++	common_process(psdev, 1, XEN_PCI_OP_aer_resume,
++		       PCI_ERS_RESULT_RECOVERED);
+ release:
+ 	pcistub_device_put(psdev);
+ end:
+@@ -923,8 +914,8 @@ static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&device_ids_lock, flags);
+-	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) {
+-
++	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
++				 slot_list) {
+ 		if (pci_dev_id->domain == domain
+ 		    && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
+ 			/* Don't break; here because it's possible the same
+@@ -976,7 +967,7 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
+ 	err = pciback_config_quirks_add_field(dev, field);
+ 	if (err)
+ 		kfree(field);
+-      out:
++out:
+ 	return err;
+ }
+ 
+@@ -992,7 +983,7 @@ static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
+ 
+ 	err = pcistub_device_id_add(domain, bus, slot, func);
+ 
+-      out:
++out:
+ 	if (!err)
+ 		err = count;
+ 	return err;
+@@ -1012,7 +1003,7 @@ static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
+ 
+ 	err = pcistub_device_id_remove(domain, bus, slot, func);
+ 
+-      out:
++out:
+ 	if (!err)
+ 		err = count;
+ 	return err;
+@@ -1057,7 +1048,7 @@ static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
+ 
+ 	err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
+ 
+-      out:
++out:
+ 	if (!err)
+ 		err = count;
+ 	return err;
+@@ -1067,7 +1058,6 @@ static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
+ {
+ 	int count = 0;
+ 	unsigned long flags;
+-	extern struct list_head pciback_quirks;
+ 	struct pciback_config_quirk *quirk;
+ 	struct pciback_dev_data *dev_data;
+ 	const struct config_field *field;
+@@ -1096,12 +1086,13 @@ static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
+ 
+ 			count += scnprintf(buf + count, PAGE_SIZE - count,
+ 					   "\t\t%08x:%01x:%08x\n",
+-					   cfg_entry->base_offset + field->offset, 
+-					   field->size, field->mask);
++					   cfg_entry->base_offset +
++					   field->offset, field->size,
++					   field->mask);
+ 		}
+ 	}
+ 
+-      out:
++out:
+ 	spin_unlock_irqrestore(&device_ids_lock, flags);
+ 
+ 	return count;
+@@ -1137,14 +1128,14 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf,
+ 	if (!dev_data->permissive) {
+ 		dev_data->permissive = 1;
+ 		/* Let user know that what they're doing could be unsafe */
+-		dev_warn(&psdev->dev->dev,
+-			 "enabling permissive mode configuration space accesses!\n");
++		dev_warn(&psdev->dev->dev, "enabling permissive mode "
++			 "configuration space accesses!\n");
+ 		dev_warn(&psdev->dev->dev,
+ 			 "permissive mode is potentially unsafe!\n");
+ 	}
+-      release:
++release:
+ 	pcistub_device_put(psdev);
+-      out:
++out:
+ 	if (!err)
+ 		err = count;
+ 	return err;
+@@ -1264,10 +1255,10 @@ static int __init pcistub_init(void)
+ 	if (err)
+ 		pcistub_exit();
+ 
+-      out:
++out:
+ 	return err;
+ 
+-      parse_error:
++parse_error:
+ 	printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
+ 	       pci_devs_to_hide + pos);
+ 	return -EINVAL;
+@@ -1276,7 +1267,7 @@ static int __init pcistub_init(void)
+ #ifndef MODULE
+ /*
+  * fs_initcall happens before device_initcall
+- * so pciback *should* get called first (b/c we 
++ * so pciback *should* get called first (b/c we
+  * want to suck up any device before other drivers
+  * get a chance by being the first pci device
+  * driver to register)
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+index 5e8e14e..98e2912 100644
+--- a/drivers/xen/pciback/pciback.h
++++ b/drivers/xen/pciback/pciback.h
+@@ -49,6 +49,12 @@ struct pciback_dev_data {
+ 	int warned_on_write;
+ };
+ 
++/* Used by XenBus and pciback_ops.c */
++extern wait_queue_head_t aer_wait_queue;
++extern struct workqueue_struct *pciback_wq;
++/* Used by pcistub.c and conf_space_quirks.c */
++extern struct list_head pciback_quirks;
++
+ /* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
+ struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
+ 					    int domain, int bus,
+@@ -67,14 +73,14 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev);
+ void pciback_config_reset_dev(struct pci_dev *dev);
+ void pciback_config_free_dev(struct pci_dev *dev);
+ int pciback_config_read(struct pci_dev *dev, int offset, int size,
+-			u32 * ret_val);
++			u32 *ret_val);
+ int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
+ 
+ /* Handle requests for specific devices from the frontend */
+ typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
+ 				   unsigned int domain, unsigned int bus,
+ 				   unsigned int devfn, unsigned int devid);
+-typedef int (*publish_pci_root_cb) (struct pciback_device * pdev,
++typedef int (*publish_pci_root_cb) (struct pciback_device *pdev,
+ 				    unsigned int domain, unsigned int bus);
+ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+ 			int devid, publish_pci_dev_cb publish_cb);
+@@ -83,15 +89,17 @@ struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
+ 				    unsigned int domain, unsigned int bus,
+ 				    unsigned int devfn);
+ 
+-/** 
++/**
+ * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback
+-* before sending aer request to pcifront, so that guest could identify 
++* before sending aer request to pcifront, so that guest could identify
+ * device, coopearte with pciback to finish aer recovery job if device driver
+ * has the capability
+ */
+ 
+-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
+-				unsigned int *domain, unsigned int *bus, unsigned int *devfn);
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++			     struct pciback_device *pdev,
++			     unsigned int *domain, unsigned int *bus,
++			     unsigned int *devfn);
+ int pciback_init_devices(struct pciback_device *pdev);
+ int pciback_publish_pci_roots(struct pciback_device *pdev,
+ 			      publish_pci_root_cb cb);
+@@ -106,17 +114,17 @@ void pciback_xenbus_unregister(void);
+ 
+ #ifdef CONFIG_PCI_MSI
+ int pciback_enable_msi(struct pciback_device *pdev,
+-                       struct pci_dev *dev, struct xen_pci_op *op);
++			struct pci_dev *dev, struct xen_pci_op *op);
+ 
+ int pciback_disable_msi(struct pciback_device *pdev,
+-                         struct pci_dev *dev, struct xen_pci_op *op);
++			struct pci_dev *dev, struct xen_pci_op *op);
+ 
+ 
+ int pciback_enable_msix(struct pciback_device *pdev,
+-                        struct pci_dev *dev, struct xen_pci_op *op);
++			struct pci_dev *dev, struct xen_pci_op *op);
+ 
+ int pciback_disable_msix(struct pciback_device *pdev,
+-                        struct pci_dev *dev, struct xen_pci_op *op);
++			struct pci_dev *dev, struct xen_pci_op *op);
+ #endif
+ extern int verbose_request;
+ 
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index 6624faf..bf83dca 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -5,11 +5,11 @@
+  */
+ #include <linux/module.h>
+ #include <linux/wait.h>
+-#include <asm/bitops.h>
++#include <linux/bitops.h>
+ #include <xen/events.h>
+ #include "pciback.h"
+ 
+-int verbose_request = 0;
++int verbose_request;
+ module_param(verbose_request, int, 0644);
+ 
+ /* Ensure a device is "turned off" and ready to be exported.
+@@ -37,12 +37,10 @@ void pciback_reset_device(struct pci_dev *dev)
+ 		}
+ 	}
+ }
+-extern wait_queue_head_t aer_wait_queue;
+-extern struct workqueue_struct *pciback_wq;
+ /*
+ * Now the same evtchn is used for both pcifront conf_read_write request
+ * as well as pcie aer front end ack. We use a new work_queue to schedule
+-* pciback conf_read_write service for avoiding confict with aer_core 
++* pciback conf_read_write service for avoiding confict with aer_core
+ * do_recovery job which also use the system default work_queue
+ */
+ void test_and_schedule_op(struct pciback_device *pdev)
+@@ -50,14 +48,13 @@ void test_and_schedule_op(struct pciback_device *pdev)
+ 	/* Check that frontend is requesting an operation and that we are not
+ 	 * already processing a request */
+ 	if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
+-	    && !test_and_set_bit(_PDEVF_op_active, &pdev->flags))
+-	{
++	    && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
+ 		queue_work(pciback_wq, &pdev->op_work);
+ 	}
+ 	/*_XEN_PCIB_active should have been cleared by pcifront. And also make
+ 	sure pciback is waiting for ack by checking _PCIB_op_pending*/
+-	if (!test_bit(_XEN_PCIB_active,(unsigned long *)&pdev->sh_info->flags)
+-	    &&test_bit(_PCIB_op_pending, &pdev->flags)) {
++	if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
++	    && test_bit(_PCIB_op_pending, &pdev->flags)) {
+ 		wake_up(&aer_wait_queue);
+ 	}
+ }
+@@ -69,7 +66,8 @@ void test_and_schedule_op(struct pciback_device *pdev)
+ 
+ void pciback_do_op(struct work_struct *data)
+ {
+-	struct pciback_device *pdev = container_of(data, struct pciback_device, op_work);
++	struct pciback_device *pdev =
++		container_of(data, struct pciback_device, op_work);
+ 	struct pci_dev *dev;
+ 	struct xen_pci_op *op = &pdev->sh_info->op;
+ 
+@@ -77,38 +75,36 @@ void pciback_do_op(struct work_struct *data)
+ 
+ 	if (dev == NULL)
+ 		op->err = XEN_PCI_ERR_dev_not_found;
+-	else
+-	{
+-		switch (op->cmd)
+-		{
+-			case XEN_PCI_OP_conf_read:
+-				op->err = pciback_config_read(dev,
+-					  op->offset, op->size, &op->value);
+-				break;
+-			case XEN_PCI_OP_conf_write:
+-				op->err = pciback_config_write(dev,
+-					  op->offset, op->size,	op->value);
+-				break;
++	else {
++		switch (op->cmd) {
++		case XEN_PCI_OP_conf_read:
++			op->err = pciback_config_read(dev,
++				  op->offset, op->size, &op->value);
++			break;
++		case XEN_PCI_OP_conf_write:
++			op->err = pciback_config_write(dev,
++				  op->offset, op->size,	op->value);
++			break;
+ #ifdef CONFIG_PCI_MSI
+-			case XEN_PCI_OP_enable_msi:
+-				op->err = pciback_enable_msi(pdev, dev, op);
+-				break;
+-			case XEN_PCI_OP_disable_msi:
+-				op->err = pciback_disable_msi(pdev, dev, op);
+-				break;
+-			case XEN_PCI_OP_enable_msix:
+-				op->err = pciback_enable_msix(pdev, dev, op);
+-				break;
+-			case XEN_PCI_OP_disable_msix:
+-				op->err = pciback_disable_msix(pdev, dev, op);
+-				break;
++		case XEN_PCI_OP_enable_msi:
++			op->err = pciback_enable_msi(pdev, dev, op);
++			break;
++		case XEN_PCI_OP_disable_msi:
++			op->err = pciback_disable_msi(pdev, dev, op);
++			break;
++		case XEN_PCI_OP_enable_msix:
++			op->err = pciback_enable_msix(pdev, dev, op);
++			break;
++		case XEN_PCI_OP_disable_msix:
++			op->err = pciback_disable_msix(pdev, dev, op);
++			break;
+ #endif
+-			default:
+-				op->err = XEN_PCI_ERR_not_implemented;
+-				break;
++		default:
++			op->err = XEN_PCI_ERR_not_implemented;
++			break;
+ 		}
+ 	}
+-	/* Tell the driver domain that we're done. */ 
++	/* Tell the driver domain that we're done. */
+ 	wmb();
+ 	clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
+ 	notify_remote_via_irq(pdev->evtchn_irq);
+@@ -119,7 +115,7 @@ void pciback_do_op(struct work_struct *data)
+ 	smp_mb__after_clear_bit(); /* /before/ final check for work */
+ 
+ 	/* Check to see if the driver domain tried to start another request in
+-	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. 
++	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
+ 	*/
+ 	test_and_schedule_op(pdev);
+ }
+diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c
+index 105a8b6..efb922d 100644
+--- a/drivers/xen/pciback/slot.c
++++ b/drivers/xen/pciback/slot.c
+@@ -65,7 +65,8 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+ 		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+ 			if (slot_dev->slots[bus][slot] == NULL) {
+ 				printk(KERN_INFO
+-				       "pciback: slot: %s: assign to virtual slot %d, bus %d\n",
++				       "pciback: slot: %s: assign to virtual "
++				       "slot %d, bus %d\n",
+ 				       pci_name(dev), slot, bus);
+ 				slot_dev->slots[bus][slot] = dev;
+ 				goto unlock;
+@@ -76,14 +77,14 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+ 	xenbus_dev_fatal(pdev->xdev, err,
+ 			 "No more space on root virtual PCI bus");
+ 
+-      unlock:
++unlock:
+ 	spin_unlock_irqrestore(&slot_dev->lock, flags);
+ 
+ 	/* Publish this device. */
+-	if(!err)
++	if (!err)
+ 		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
+ 
+-      out:
++out:
+ 	return err;
+ }
+ 
+@@ -105,7 +106,7 @@ void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
+ 			}
+ 		}
+ 
+-      out:
++out:
+ 	spin_unlock_irqrestore(&slot_dev->lock, flags);
+ 
+ 	if (found_dev)
+@@ -156,8 +157,10 @@ void pciback_release_devices(struct pciback_device *pdev)
+ 	pdev->pci_dev_data = NULL;
+ }
+ 
+-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
+-		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++			     struct pciback_device *pdev,
++			     unsigned int *domain, unsigned int *bus,
++			     unsigned int *devfn)
+ {
+ 	int slot, busnr;
+ 	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
+@@ -172,11 +175,12 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev
+ 			dev = slot_dev->slots[busnr][slot];
+ 			if (dev && dev->bus->number == pcidev->bus->number
+ 				&& dev->devfn == pcidev->devfn
+-				&& pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)) {
++				&& pci_domain_nr(dev->bus) ==
++					pci_domain_nr(pcidev->bus)) {
+ 				found = 1;
+ 				*domain = 0;
+ 				*bus = busnr;
+-				*devfn = PCI_DEVFN(slot,0);
++				*devfn = PCI_DEVFN(slot, 0);
+ 				goto out;
+ 			}
+ 		}
+diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
+index a5b7ece..721b81b 100644
+--- a/drivers/xen/pciback/vpci.c
++++ b/drivers/xen/pciback/vpci.c
+@@ -125,14 +125,14 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+ 	xenbus_dev_fatal(pdev->xdev, err,
+ 			 "No more space on root virtual PCI bus");
+ 
+-      unlock:
++unlock:
+ 	spin_unlock_irqrestore(&vpci_dev->lock, flags);
+ 
+ 	/* Publish this device. */
+-	if(!err)
++	if (!err)
+ 		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
+ 
+-      out:
++out:
+ 	return err;
+ }
+ 
+@@ -158,7 +158,7 @@ void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
+ 		}
+ 	}
+ 
+-      out:
++out:
+ 	spin_unlock_irqrestore(&vpci_dev->lock, flags);
+ 
+ 	if (found_dev)
+@@ -176,9 +176,8 @@ int pciback_init_devices(struct pciback_device *pdev)
+ 
+ 	spin_lock_init(&vpci_dev->lock);
+ 
+-	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++)
+ 		INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
+-	}
+ 
+ 	pdev->pci_dev_data = vpci_dev;
+ 
+@@ -211,8 +210,10 @@ void pciback_release_devices(struct pciback_device *pdev)
+ 	pdev->pci_dev_data = NULL;
+ }
+ 
+-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
+-		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++			     struct pciback_device *pdev,
++			     unsigned int *domain, unsigned int *bus,
++			     unsigned int *devfn)
+ {
+ 	struct pci_dev_entry *entry;
+ 	struct pci_dev *dev = NULL;
+@@ -227,15 +228,16 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev
+ 			    list) {
+ 			dev = entry->dev;
+ 			if (dev && dev->bus->number == pcidev->bus->number
+-				&& pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)
+-				&& dev->devfn == pcidev->devfn)
+-			{
++				&& pci_domain_nr(dev->bus) ==
++					pci_domain_nr(pcidev->bus)
++				&& dev->devfn == pcidev->devfn) {
+ 				found = 1;
+ 				*domain = 0;
+ 				*bus = 0;
+-				*devfn = PCI_DEVFN(slot, PCI_FUNC(pcidev->devfn));
++				*devfn = PCI_DEVFN(slot,
++					 PCI_FUNC(pcidev->devfn));
+ 			}
+-		}		
++		}
+ 	}
+ 	spin_unlock_irqrestore(&vpci_dev->lock, flags);
+ 	return found;
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index a85c413..efec585 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -40,7 +40,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
+ 		kfree(pdev);
+ 		pdev = NULL;
+ 	}
+-      out:
++out:
+ 	return pdev;
+ }
+ 
+@@ -111,7 +111,7 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
+ 	err = 0;
+ 
+ 	dev_dbg(&pdev->xdev->dev, "Attached!\n");
+-      out:
++out:
+ 	return err;
+ }
+ 
+@@ -166,11 +166,10 @@ static int pciback_attach(struct pciback_device *pdev)
+ 				 "Error switching to connected state!");
+ 
+ 	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
+-      out:
++out:
+ 	spin_unlock(&pdev->dev_lock);
+ 
+-	if (magic)
+-		kfree(magic);
++	kfree(magic);
+ 
+ 	return err;
+ }
+@@ -193,7 +192,7 @@ static int pciback_publish_pci_dev(struct pciback_device *pdev,
+ 			    "%04x:%02x:%02x.%02x", domain, bus,
+ 			    PCI_SLOT(devfn), PCI_FUNC(devfn));
+ 
+-      out:
++out:
+ 	return err;
+ }
+ 
+@@ -230,7 +229,7 @@ static int pciback_export_device(struct pciback_device *pdev,
+ 	 * to other driver domains (as he who controls the bridge can disable
+ 	 * it and stop the other devices from working).
+ 	 */
+-      out:
++out:
+ 	return err;
+ }
+ 
+@@ -253,8 +252,8 @@ static int pciback_remove_device(struct pciback_device *pdev,
+ 	}
+ 
+ 	pciback_release_pci_dev(pdev, dev);
+-	
+-      out:
++
++out:
+ 	return err;
+ }
+ 
+@@ -314,7 +313,7 @@ static int pciback_publish_pci_root(struct pciback_device *pdev,
+ 	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
+ 			    "root_num", "%d", (root_num + 1));
+ 
+-      out:
++out:
+ 	return err;
+ }
+ 
+@@ -358,7 +357,7 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ 		}
+ 		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
+ 				   "%d", &substate);
+-		if (err != 1) 
++		if (err != 1)
+ 			substate = XenbusStateUnknown;
+ 
+ 		switch (substate) {
+@@ -389,14 +388,15 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ 						 "configuration");
+ 				goto out;
+ 			}
+-	
++
+ 			err = pciback_export_device(pdev, domain, bus, slot,
+ 						    func, i);
+ 			if (err)
+ 				goto out;
+ 
+ 			/* Publish pci roots. */
+-			err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
++			err = pciback_publish_pci_roots(pdev,
++						pciback_publish_pci_root);
+ 			if (err) {
+ 				xenbus_dev_fatal(pdev->xdev, err,
+ 						 "Error while publish PCI root"
+@@ -412,7 +412,7 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ 						 "Error switching substate of "
+ 						 "dev-%d\n", i);
+ 				goto out;
+-			}	
++			}
+ 			break;
+ 
+ 		case XenbusStateClosing:
+@@ -445,7 +445,7 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ 
+ 			err = pciback_remove_device(pdev, domain, bus, slot,
+ 						    func);
+-			if(err)
++			if (err)
+ 				goto out;
+ 
+ 			/* TODO: If at some point we implement support for pci
+@@ -466,8 +466,8 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ 				 "Error switching to reconfigured state!");
+ 		goto out;
+ 	}
+-	
+-      out:
++
++out:
+ 	spin_unlock(&pdev->dev_lock);
+ 
+ 	return 0;
+@@ -591,7 +591,7 @@ static int pciback_setup_backend(struct pciback_device *pdev)
+ 			xenbus_dev_fatal(pdev->xdev, err, "Error switching "
+ 					 "substate of dev-%d\n", i);
+ 			goto out;
+-		}	
++		}
+ 	}
+ 
+ 	err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
+@@ -607,7 +607,7 @@ static int pciback_setup_backend(struct pciback_device *pdev)
+ 		xenbus_dev_fatal(pdev->xdev, err,
+ 				 "Error switching to initialised state!");
+ 
+-      out:
++out:
+ 	spin_unlock(&pdev->dev_lock);
+ 
+ 	if (!err)
+@@ -663,7 +663,7 @@ static int pciback_xenbus_probe(struct xenbus_device *dev,
+ 	 */
+ 	pciback_be_watch(&pdev->be_watch, NULL, 0);
+ 
+-      out:
++out:
+ 	return err;
+ }
+ 
+@@ -679,7 +679,7 @@ static int pciback_xenbus_remove(struct xenbus_device *dev)
+ 
+ static const struct xenbus_device_id xenpci_ids[] = {
+ 	{"pci"},
+-	{{0}},
++	{""},
+ };
+ 
+ static struct xenbus_driver xenbus_pciback_driver = {
+-- 
+1.7.3.4
+
+
+From ca1ee0c25b425d9739b1a24cf911de2e041a2514 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 8 Mar 2010 18:39:15 -0500
+Subject: [PATCH 017/139] xen-pciback: remove driver_data direct access to struct device
+
+The driver core is going to not allow direct access to the
+driver_data pointer in struct device.  Instead, the functions
+dev_get_drvdata() and dev_set_drvdata() should be used.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/xenbus.c |    8 ++++----
+ 1 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index efec585..af6c25a 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -26,7 +26,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
+ 	dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
+ 
+ 	pdev->xdev = xdev;
+-	xdev->dev.driver_data = pdev;
++	dev_set_drvdata(&xdev->dev, pdev);
+ 
+ 	spin_lock_init(&pdev->dev_lock);
+ 
+@@ -75,7 +75,7 @@ static void free_pdev(struct pciback_device *pdev)
+ 
+ 	pciback_release_devices(pdev);
+ 
+-	pdev->xdev->dev.driver_data = NULL;
++	dev_set_drvdata(&pdev->xdev->dev, NULL);
+ 	pdev->xdev = NULL;
+ 
+ 	kfree(pdev);
+@@ -476,7 +476,7 @@ out:
+ static void pciback_frontend_changed(struct xenbus_device *xdev,
+ 				     enum xenbus_state fe_state)
+ {
+-	struct pciback_device *pdev = xdev->dev.driver_data;
++	struct pciback_device *pdev = dev_get_drvdata(&xdev->dev);
+ 
+ 	dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
+ 
+@@ -669,7 +669,7 @@ out:
+ 
+ static int pciback_xenbus_remove(struct xenbus_device *dev)
+ {
+-	struct pciback_device *pdev = dev->dev.driver_data;
++	struct pciback_device *pdev = dev_get_drvdata(&dev->dev);
+ 
+ 	if (pdev != NULL)
+ 		free_pdev(pdev);
+-- 
+1.7.3.4
+
+
+From 585f088e6aec3e4514ac2563852961f71c74e47e Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 8 Mar 2010 18:47:55 -0500
+Subject: [PATCH 018/139] xen-pciback: Fix compile error: 'TASK_NORMAL' undeclared.
+
+Both files were missing the #include <linux/sched.h>
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/pci_stub.c    |    1 +
+ drivers/xen/pciback/pciback_ops.c |    1 +
+ 2 files changed, 2 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 28222ee..6fc0b6e 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -12,6 +12,7 @@
+ #include <linux/kref.h>
+ #include <linux/pci.h>
+ #include <linux/wait.h>
++#include <linux/sched.h>
+ #include <asm/atomic.h>
+ #include <xen/events.h>
+ #include <asm/xen/pci.h>
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index bf83dca..2b9a93e 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -7,6 +7,7 @@
+ #include <linux/wait.h>
+ #include <linux/bitops.h>
+ #include <xen/events.h>
++#include <linux/sched.h>
+ #include "pciback.h"
+ 
+ int verbose_request;
+-- 
+1.7.3.4
+
+
+From 03dd111c81bad8e69cdb8b5d67381702adb24593 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 9 Dec 2009 17:43:16 -0500
+Subject: [PATCH 019/139] xen-pciback: Remove the vestiges of CONFIG_PCI_GUESTDEV.
+
+The same functionality for this (that used to be called
+pci_is_guestdev) is now via: "pci=resource_alignment="
+command line argument.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pci_stub.c |   10 ----------
+ 1 files changed, 0 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 6fc0b6e..d30aa7c 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -430,16 +430,6 @@ static int __devinit pcistub_probe(struct pci_dev *dev,
+ 
+ 		dev_info(&dev->dev, "seizing device\n");
+ 		err = pcistub_seize(dev);
+-#ifdef CONFIG_PCI_GUESTDEV
+-	} else if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
+-		if (!pci_is_guestdev(dev)) {
+-			err = -ENODEV;
+-			goto out;
+-		}
+-
+-		dev_info(&dev->dev, "seizing device\n");
+-		err = pcistub_seize(dev);
+-#endif /* CONFIG_PCI_GUESTDEV */
+ 	} else
+ 		/* Didn't find the device */
+ 		err = -ENODEV;
+-- 
+1.7.3.4
+
+
+From 30acb3491495a43b59a64612ad92a7a290c59e82 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 9 Dec 2009 17:43:17 -0500
+Subject: [PATCH 020/139] xen-pciback: Remove deprecated routine to find domain owner of PCI device.
+
+In linux-2.6.18.hg tree the mechanism to find the domain owner was
+for the MSI driver (msi-xen.c) to call in this function to retrieve
+the domain number. This is not the way anymore.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pci_stub.c |   19 -------------------
+ 1 files changed, 0 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index d30aa7c..30e7b59 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -1157,22 +1157,6 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf)
+ 
+ DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
+ 
+-#ifdef CONFIG_PCI_MSI
+-
+-int pciback_get_owner(struct pci_dev *dev)
+-{
+-	struct pcistub_device *psdev;
+-
+-	psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number,
+-			PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+-
+-	if (!psdev || !psdev->pdev)
+-		return -1;
+-
+-	return psdev->pdev->xdev->otherend_id;
+-}
+-#endif
+-
+ static void pcistub_exit(void)
+ {
+ 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
+@@ -1183,7 +1167,6 @@ static void pcistub_exit(void)
+ 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
+ 
+ 	pci_unregister_driver(&pciback_pci_driver);
+-	WARN_ON(unregister_msi_get_owner(pciback_get_owner));
+ }
+ 
+ static int __init pcistub_init(void)
+@@ -1241,8 +1224,6 @@ static int __init pcistub_init(void)
+ 		err = driver_create_file(&pciback_pci_driver.driver,
+ 					 &driver_attr_permissive);
+ 
+-	if (!err)
+-		err = register_msi_get_owner(pciback_get_owner);
+ 	if (err)
+ 		pcistub_exit();
+ 
+-- 
+1.7.3.4
+
+
+From da36c7662d9738ce44c37b4f1f41c045c64d6914 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 22 Dec 2009 13:53:41 -0500
+Subject: [PATCH 021/139] xen-pciback: Fix compiler warning in pci_stub.c.
+
+warning: the frame size of 1036 bytes is larger than 1024 bytes
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pci_stub.c |    5 +++--
+ 1 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 30e7b59..0b5a16b 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -496,16 +496,17 @@ static const struct pci_device_id pcistub_ids[] = {
+ 	{0,},
+ };
+ 
++#define PCI_NODENAME_MAX 40
+ static void kill_domain_by_device(struct pcistub_device *psdev)
+ {
+ 	struct xenbus_transaction xbt;
+ 	int err;
+-	char nodename[1024];
++	char nodename[PCI_NODENAME_MAX];
+ 
+ 	if (!psdev)
+ 		dev_err(&psdev->dev->dev,
+ 			"device is NULL when do AER recovery/kill_domain\n");
+-	sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
++	snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
+ 		psdev->pdev->xdev->otherend_id);
+ 	nodename[strlen(nodename)] = '\0';
+ 
+-- 
+1.7.3.4
+
+
+From 83484f34b2cc42807c71514fbabbd40e281ec094 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 22 Dec 2009 13:53:42 -0500
+Subject: [PATCH 022/139] xen-pciback: Fix compile warning in vpci.c
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+warning: ‘func’ may be used uninitialized in this function
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/vpci.c |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
+index 721b81b..2857ab8 100644
+--- a/drivers/xen/pciback/vpci.c
++++ b/drivers/xen/pciback/vpci.c
+@@ -65,7 +65,7 @@ static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
+ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+ 			int devid, publish_pci_dev_cb publish_cb)
+ {
+-	int err = 0, slot, func;
++	int err = 0, slot, func = -1;
+ 	struct pci_dev_entry *t, *dev_entry;
+ 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+ 	unsigned long flags;
+-- 
+1.7.3.4
+
+
+From 5612e6358835700c49d8be5671823614ace30c94 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ijc at hellion.org.uk>
+Date: Thu, 3 Dec 2009 21:56:20 +0000
+Subject: [PATCH 023/139] xen: rename pciback module to xen-pciback.
+
+pciback is rather generic for a modular distro style kernel.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/Makefile |   24 ++++++++++++------------
+ 1 files changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile
+index 106dae7..38bc123 100644
+--- a/drivers/xen/pciback/Makefile
++++ b/drivers/xen/pciback/Makefile
+@@ -1,16 +1,16 @@
+-obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
++obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
+ 
+-pciback-y := pci_stub.o pciback_ops.o xenbus.o
+-pciback-y += conf_space.o conf_space_header.o \
+-	     conf_space_capability.o \
+-	     conf_space_capability_vpd.o \
+-	     conf_space_capability_pm.o \
+-             conf_space_quirks.o
+-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
+-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
+-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
+-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
+-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
++xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
++xen-pciback-y += conf_space.o conf_space_header.o \
++		 conf_space_capability.o \
++		 conf_space_capability_vpd.o \
++		 conf_space_capability_pm.o \
++		 conf_space_quirks.o
++xen-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
+ 
+ ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
+ EXTRA_CFLAGS += -DDEBUG
+-- 
+1.7.3.4
+
+
+From 245a9ec5ef1f9c8a6bc6b5c0ac1bb616c3c8c979 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 9 Dec 2009 17:43:15 -0500
+Subject: [PATCH 024/139] xen-pciback: Register the owner (domain) of the PCI device.
+
+When the front-end and back-end start negotiating we register
+the domain that will use the PCI device. Furthermore during shutdown
+of guest or unbinding of the PCI device (and unloading of module)
+from pciback we unregister the domain owner.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pci_stub.c |    2 ++
+ drivers/xen/pciback/xenbus.c   |   13 +++++++++++++
+ 2 files changed, 15 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 0b5a16b..02178e2 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -90,6 +90,8 @@ static void pcistub_device_release(struct kref *kref)
+ 
+ 	dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
+ 
++	xen_unregister_device_domain_owner(psdev->dev);
++
+ 	/* Clean-up the device */
+ 	pciback_reset_device(psdev->dev);
+ 	pciback_config_free_dyn_fields(psdev->dev);
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index af6c25a..d448bf5 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -10,6 +10,7 @@
+ #include <linux/workqueue.h>
+ #include <xen/xenbus.h>
+ #include <xen/events.h>
++#include <asm/xen/pci.h>
+ #include <linux/workqueue.h>
+ #include "pciback.h"
+ 
+@@ -221,6 +222,15 @@ static int pciback_export_device(struct pciback_device *pdev,
+ 	if (err)
+ 		goto out;
+ 
++	dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
++	if (xen_register_device_domain_owner(dev,
++					     pdev->xdev->otherend_id) != 0) {
++		dev_err(&dev->dev, "device has been assigned to another " \
++			"domain! Over-writting the ownership, but beware.\n");
++		xen_unregister_device_domain_owner(dev);
++		xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
++	}
++
+ 	/* TODO: It'd be nice to export a bridge and have all of its children
+ 	 * get exported with it. This may be best done in xend (which will
+ 	 * have to calculate resource usage anyway) but we probably want to
+@@ -251,6 +261,9 @@ static int pciback_remove_device(struct pciback_device *pdev,
+ 		goto out;
+ 	}
+ 
++	dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
++	xen_unregister_device_domain_owner(dev);
++
+ 	pciback_release_pci_dev(pdev, dev);
+ 
+ out:
+-- 
+1.7.3.4
+
+
+From cb6c976606d16119e8608c8bcc1ef9265881dd7f Mon Sep 17 00:00:00 2001
+From: Zhao, Yu <yu.zhao at intel.com>
+Date: Wed, 3 Mar 2010 13:27:55 -0500
+Subject: [PATCH 025/139] xen-pciback: guest SR-IOV support for PV guest
+
+These changes are for PV guest to use Virtual Function. Because the VF's
+vendor, device registers in cfg space are 0xffff, which are invalid and
+ignored by PCI device scan. Values in 'struct pci_dev' are fixed up by
+SR-IOV code, and using these values will present correct VID and DID to
+PV guest kernel.
+
+And command registers in the cfg space are read only 0, which means we
+have to emulate MMIO enable bit (VF only uses MMIO  resource) so PV
+kernel can work properly.
+
+Acked-by: jbeulich at novell.com
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/conf_space_header.c |   71 ++++++++++++++++++++++++++++--
+ 1 files changed, 66 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
+index 3ae7da1..1f4f86e 100644
+--- a/drivers/xen/pciback/conf_space_header.c
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -18,6 +18,25 @@ struct pci_bar_info {
+ #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
+ #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
+ 
++static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
++{
++	int i;
++	int ret;
++
++	ret = pciback_read_config_word(dev, offset, value, data);
++	if (!atomic_read(&dev->enable_cnt))
++		return ret;
++
++	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
++		if (dev->resource[i].flags & IORESOURCE_IO)
++			*value |= PCI_COMMAND_IO;
++		if (dev->resource[i].flags & IORESOURCE_MEM)
++			*value |= PCI_COMMAND_MEMORY;
++	}
++
++	return ret;
++}
++
+ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
+ {
+ 	int err;
+@@ -142,10 +161,26 @@ static inline void read_dev_bar(struct pci_dev *dev,
+ 				struct pci_bar_info *bar_info, int offset,
+ 				u32 len_mask)
+ {
+-	pci_read_config_dword(dev, offset, &bar_info->val);
+-	pci_write_config_dword(dev, offset, len_mask);
+-	pci_read_config_dword(dev, offset, &bar_info->len_val);
+-	pci_write_config_dword(dev, offset, bar_info->val);
++	int	pos;
++	struct resource	*res = dev->resource;
++
++	if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
++		pos = PCI_ROM_RESOURCE;
++	else {
++		pos = (offset - PCI_BASE_ADDRESS_0) / 4;
++		if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
++				PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
++			   (PCI_BASE_ADDRESS_SPACE_MEMORY |
++				PCI_BASE_ADDRESS_MEM_TYPE_64))) {
++			bar_info->val = res[pos - 1].start >> 32;
++			bar_info->len_val = res[pos - 1].end >> 32;
++			return;
++		}
++	}
++
++	bar_info->val = res[pos].start |
++			(res[pos].flags & PCI_REGION_FLAG_MASK);
++	bar_info->len_val = res[pos].end - res[pos].start + 1;
+ }
+ 
+ static void *bar_init(struct pci_dev *dev, int offset)
+@@ -186,6 +221,22 @@ static void bar_release(struct pci_dev *dev, int offset, void *data)
+ 	kfree(data);
+ }
+ 
++static int pciback_read_vendor(struct pci_dev *dev, int offset,
++			       u16 *value, void *data)
++{
++	*value = dev->vendor;
++
++	return 0;
++}
++
++static int pciback_read_device(struct pci_dev *dev, int offset,
++			       u16 *value, void *data)
++{
++	*value = dev->device;
++
++	return 0;
++}
++
+ static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
+ 			  void *data)
+ {
+@@ -213,9 +264,19 @@ out:
+ 
+ static const struct config_field header_common[] = {
+ 	{
++	 .offset    = PCI_VENDOR_ID,
++	 .size      = 2,
++	 .u.w.read  = pciback_read_vendor,
++	},
++	{
++	 .offset    = PCI_DEVICE_ID,
++	 .size      = 2,
++	 .u.w.read  = pciback_read_device,
++	},
++	{
+ 	 .offset    = PCI_COMMAND,
+ 	 .size      = 2,
+-	 .u.w.read  = pciback_read_config_word,
++	 .u.w.read  = command_read,
+ 	 .u.w.write = command_write,
+ 	},
+ 	{
+-- 
+1.7.3.4
+
+
+From 1d77305c7900f3b6ec5d403d9aba6f0034b0112e Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 3 Mar 2010 13:38:43 -0500
+Subject: [PATCH 026/139] xen-pciback: Disable MSI/MSI-X when reseting device
+
+In cases where the guest is abruptly killed and has not disabled
+MSI/MSI-X interrupts we want to do that.
+
+Otherwise when the guest is started up and enables MSI, we would
+get a WARN() that the device already had been enabled.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/pciback_ops.c |    8 ++++++++
+ 1 files changed, 8 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index 2b9a93e..011db67 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -23,6 +23,14 @@ void pciback_reset_device(struct pci_dev *dev)
+ 
+ 	/* Disable devices (but not bridges) */
+ 	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
++#ifdef CONFIG_PCI_MSI
++		/* The guest could have been abruptly killed without
++		 * disabling MSI/MSI-X interrupts.*/
++		if (dev->msix_enabled)
++			pci_disable_msix(dev);
++		if (dev->msi_enabled)
++			pci_disable_msi(dev);
++#endif
+ 		pci_disable_device(dev);
+ 
+ 		pci_write_config_word(dev, PCI_COMMAND, 0);
+-- 
+1.7.3.4
+
+
+From c89edb63b60166fe354493dd465cf5662b2c077d Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 12 Apr 2010 11:46:00 -0400
+Subject: [PATCH 027/139] xen-pciback: Allocate IRQ handler for device that is shared with guest.
+
+If the pciback module is loaded with fake_irq_handler=1 we install
+for all devices that are to be passed to the guest domain a IRQ handler.
+The IRQ handler will return IRQ_HANDLED or IRQ_NONE depending on
+on the ack_intr flag.
+
+The trigger to install this IRQ handler is when the enable_isr flag
+is set.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/pci_stub.c    |   13 ++++-
+ drivers/xen/pciback/pciback.h     |   12 ++++-
+ drivers/xen/pciback/pciback_ops.c |   95 ++++++++++++++++++++++++++++++++++++-
+ 3 files changed, 115 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 02178e2..45bbe99 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -21,6 +21,8 @@
+ #include "conf_space.h"
+ #include "conf_space_quirks.h"
+ 
++#define DRV_NAME	"pciback"
++
+ static char *pci_devs_to_hide;
+ wait_queue_head_t aer_wait_queue;
+ /*Add sem for sync AER handling and pciback remove/reconfigue ops,
+@@ -290,13 +292,20 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)
+ 	 * would need to be called somewhere to free the memory allocated
+ 	 * here and then to call kfree(pci_get_drvdata(psdev->dev)).
+ 	 */
+-	dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
++	dev_data = kzalloc(sizeof(*dev_data) +  strlen(DRV_NAME "[]")
++				+ strlen(pci_name(dev)) + 1, GFP_ATOMIC);
+ 	if (!dev_data) {
+ 		err = -ENOMEM;
+ 		goto out;
+ 	}
+ 	pci_set_drvdata(dev, dev_data);
+ 
++	/*
++	 * Setup name for fake IRQ handler. It will only be enabled
++	 * once the device is turned on by the guest.
++	 */
++	sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
++
+ 	dev_dbg(&dev->dev, "initializing config\n");
+ 
+ 	init_waitqueue_head(&aer_wait_queue);
+@@ -837,7 +846,7 @@ static struct pci_error_handlers pciback_error_handler = {
+  */
+ 
+ static struct pci_driver pciback_pci_driver = {
+-	.name = "pciback",
++	.name = DRV_NAME,
+ 	.id_table = pcistub_ids,
+ 	.probe = pcistub_probe,
+ 	.remove = pcistub_remove,
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+index 98e2912..9d1b0a6 100644
+--- a/drivers/xen/pciback/pciback.h
++++ b/drivers/xen/pciback/pciback.h
+@@ -45,8 +45,13 @@ struct pciback_device {
+ 
+ struct pciback_dev_data {
+ 	struct list_head config_fields;
+-	int permissive;
+-	int warned_on_write;
++	unsigned int permissive : 1;
++	unsigned int warned_on_write : 1;
++	unsigned int enable_intx : 1;
++	unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */ 
++	unsigned int ack_intr : 1; /* .. and ACK-ing */
++	unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
++	char irq_name[0]; /* pciback[000:04:00.0] */
+ };
+ 
+ /* Used by XenBus and pciback_ops.c */
+@@ -131,3 +136,6 @@ extern int verbose_request;
+ void test_and_schedule_op(struct pciback_device *pdev);
+ #endif
+ 
++/* Handles shared IRQs that can to device domain and control domain. */
++void pciback_irq_handler(struct pci_dev *dev, int reset);
++irqreturn_t pciback_guest_interrupt(int irq, void *dev_id);
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index 011db67..cb54893 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -13,6 +13,78 @@
+ int verbose_request;
+ module_param(verbose_request, int, 0644);
+ 
++/* Ensure a device is has the fake IRQ handler "turned on/off" and is
++ * ready to be exported. This MUST be run after pciback_reset_device
++ * which does the actual PCI device enable/disable.
++ */
++void pciback_control_isr(struct pci_dev *dev, int reset)
++{
++	struct pciback_dev_data *dev_data;
++	int rc;
++	int enable = 0;
++
++	dev_data = pci_get_drvdata(dev);
++	if (!dev_data)
++		return;
++
++	/* We don't deal with bridges */
++	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
++		return;
++
++	if (reset) {
++		dev_data->enable_intx = 0;
++		dev_data->ack_intr = 0;
++	}
++	enable =  dev_data->enable_intx;
++
++	/* Asked to disable, but ISR isn't runnig */
++	if (!enable && !dev_data->isr_on)
++		return;
++
++	/* Squirrel away the IRQs in the dev_data. We need this
++	 * b/c when device transitions to MSI, the dev->irq is
++	 * overwritten with the MSI vector.
++	 */
++	if (enable)
++		dev_data->irq = dev->irq;
++
++	dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
++		dev_data->irq_name,
++		dev_data->irq,
++		pci_is_enabled(dev) ? "on" : "off",
++		dev->msi_enabled ? "MSI" : "",
++		dev->msix_enabled ? "MSI/X" : "",
++		dev_data->isr_on ? "enable" : "disable",
++		enable ? "enable" : "disable");
++
++	if (enable) {
++		rc = request_irq(dev_data->irq,
++				pciback_guest_interrupt, IRQF_SHARED,
++				dev_data->irq_name, dev);
++		if (rc) {
++			dev_err(&dev->dev, "%s: failed to install fake IRQ " \
++				"handler for IRQ %d! (rc:%d)\n", dev_data->irq_name,
++				dev_data->irq, rc);
++			goto out;
++		}
++	}
++	else {
++		free_irq(dev_data->irq, dev);
++		dev_data->irq = 0;
++	}
++	dev_data->isr_on = enable;
++	dev_data->ack_intr = enable;
++out:
++	dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
++		dev_data->irq_name,
++		dev_data->irq,
++		pci_is_enabled(dev) ? "on" : "off",
++		dev->msi_enabled ? "MSI" : "",
++		dev->msix_enabled ? "MSI/X" : "",
++		enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
++			(dev_data->isr_on ? "failed to disable" : "disabled"));
++}
++
+ /* Ensure a device is "turned off" and ready to be exported.
+  * (Also see pciback_config_reset to ensure virtual configuration space is
+  * ready to be re-exported)
+@@ -21,6 +93,8 @@ void pciback_reset_device(struct pci_dev *dev)
+ {
+ 	u16 cmd;
+ 
++	pciback_control_isr(dev, 1 /* reset device */);
++
+ 	/* Disable devices (but not bridges) */
+ 	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
+ #ifdef CONFIG_PCI_MSI
+@@ -78,13 +152,18 @@ void pciback_do_op(struct work_struct *data)
+ 	struct pciback_device *pdev =
+ 		container_of(data, struct pciback_device, op_work);
+ 	struct pci_dev *dev;
++	struct pciback_dev_data *dev_data = NULL;
+ 	struct xen_pci_op *op = &pdev->sh_info->op;
++	int test_intx = 0;
+ 
+ 	dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
+ 
+ 	if (dev == NULL)
+ 		op->err = XEN_PCI_ERR_dev_not_found;
+ 	else {
++		dev_data = pci_get_drvdata(dev);
++		if (dev_data)
++			test_intx = dev_data->enable_intx;
+ 		switch (op->cmd) {
+ 		case XEN_PCI_OP_conf_read:
+ 			op->err = pciback_config_read(dev,
+@@ -109,10 +188,15 @@ void pciback_do_op(struct work_struct *data)
+ 			break;
+ #endif
+ 		default:
+-			op->err = XEN_PCI_ERR_not_implemented;
++			op->err = XEN_PCI_ERR_not_implemented;	
+ 			break;
+ 		}
+ 	}
++	if (!op->err && dev && dev_data) {
++		/* Transition detected */
++		if ((dev_data->enable_intx != test_intx))
++			pciback_control_isr(dev, 0 /* no reset */);
++	}
+ 	/* Tell the driver domain that we're done. */
+ 	wmb();
+ 	clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
+@@ -137,3 +221,12 @@ irqreturn_t pciback_handle_event(int irq, void *dev_id)
+ 
+ 	return IRQ_HANDLED;
+ }
++irqreturn_t pciback_guest_interrupt(int irq, void *dev_id)
++{
++	struct pci_dev *dev = (struct pci_dev *)dev_id;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++
++	if (dev_data->isr_on && dev_data->ack_intr)
++		return IRQ_HANDLED;
++	return IRQ_NONE;
++}
+-- 
+1.7.3.4
+
+
+From 29a451f41647deedc2fa535520e648c76755568c Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 12 Apr 2010 11:47:15 -0400
+Subject: [PATCH 028/139] xen-pciback: Add SysFS instrumentation for the fake IRQ handler.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/pci_stub.c |   75 +++++++++++++++++++++++++++++++++++++++-
+ 1 files changed, 74 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 45bbe99..ee2cd68 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -1038,6 +1038,70 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
+ 
+ DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
+ 
++static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
++{
++	struct pcistub_device *psdev;
++	struct pciback_dev_data *dev_data;
++	size_t count = 0;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (count >= PAGE_SIZE)
++			break;
++		if (!psdev->dev)
++			continue;
++		dev_data = pci_get_drvdata(psdev->dev);
++		if (!dev_data)
++			continue;
++		count +=
++		    scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing\n",
++			      pci_name(psdev->dev),
++			      dev_data->isr_on ? "on" : "off",
++			      dev_data->ack_intr ? "ack" : "not ack");
++	}
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return count;
++}
++
++DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
++
++static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
++					  const char *buf,
++					  size_t count)
++{
++	struct pcistub_device *psdev;
++	struct pciback_dev_data *dev_data;
++	int domain, bus, slot, func;
++	int err = -ENOENT;
++
++	err = str_to_slot(buf, &domain, &bus, &slot, &func);
++	if (err)
++		goto out;
++
++	psdev = pcistub_device_find(domain, bus, slot, func);
++
++	if (!psdev)
++		goto out;
++
++	dev_data = pci_get_drvdata(psdev->dev);
++	if (!dev_data)
++		goto out;
++
++	dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
++		dev_data->irq_name, dev_data->isr_on,
++		!dev_data->isr_on);
++
++	dev_data->isr_on = !(dev_data->isr_on);
++	if (dev_data->isr_on)
++		dev_data->ack_intr = 1;
++out:
++	if (!err)
++		err = count;
++	return err;
++}
++DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
++
+ static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
+ 				 size_t count)
+ {
+@@ -1177,7 +1241,10 @@ static void pcistub_exit(void)
+ 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
+ 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
+ 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
+-
++	driver_remove_file(&pciback_pci_driver.driver,
++			   &driver_attr_irq_handlers);
++	driver_remove_file(&pciback_pci_driver.driver,
++			   &driver_attr_irq_handler_state);
+ 	pci_unregister_driver(&pciback_pci_driver);
+ }
+ 
+@@ -1236,6 +1303,12 @@ static int __init pcistub_init(void)
+ 		err = driver_create_file(&pciback_pci_driver.driver,
+ 					 &driver_attr_permissive);
+ 
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_irq_handlers);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					&driver_attr_irq_handler_state);
+ 	if (err)
+ 		pcistub_exit();
+ 
+-- 
+1.7.3.4
+
+
+From 6c7c36d411eeab67192fe0ed96ac1e048b4a1755 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 19 Apr 2010 14:39:10 -0400
+Subject: [PATCH 029/139] xen-pciback: When device transitions to MSI/MSI-X stop ACK-ing on the
+ legacy interrupt.
+
+But don't remove the irq handler from the legacy interrupt. The device
+might still transition back to the legacy interrupts.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/conf_space_capability_msi.c |   17 ++++++++++++++++-
+ 1 files changed, 16 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+index b70ea8b..a236e2d 100644
+--- a/drivers/xen/pciback/conf_space_capability_msi.c
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -12,6 +12,7 @@
+ int pciback_enable_msi(struct pciback_device *pdev,
+ 		struct pci_dev *dev, struct xen_pci_op *op)
+ {
++	struct pciback_dev_data *dev_data;
+ 	int otherend = pdev->xdev->otherend_id;
+ 	int status;
+ 
+@@ -27,21 +28,29 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ 	/* The value the guest needs is actually the IDT vector, not the
+ 	 * the local domain's IRQ number. */
+ 	op->value = xen_gsi_from_irq(dev->irq);
++	dev_data = pci_get_drvdata(dev);
++	if (dev_data)
++		dev_data->ack_intr = 0;
+ 	return 0;
+ }
+ 
+ int pciback_disable_msi(struct pciback_device *pdev,
+ 		struct pci_dev *dev, struct xen_pci_op *op)
+ {
++	struct pciback_dev_data *dev_data;
+ 	pci_disable_msi(dev);
+ 
+ 	op->value = xen_gsi_from_irq(dev->irq);
++	dev_data = pci_get_drvdata(dev);
++	if (dev_data)
++		dev_data->ack_intr = 1;
+ 	return 0;
+ }
+ 
+ int pciback_enable_msix(struct pciback_device *pdev,
+ 		struct pci_dev *dev, struct xen_pci_op *op)
+ {
++	struct pciback_dev_data *dev_data;
+ 	int i, result;
+ 	struct msix_entry *entries;
+ 
+@@ -68,6 +77,9 @@ int pciback_enable_msix(struct pciback_device *pdev,
+ 	kfree(entries);
+ 
+ 	op->value = result;
++	dev_data = pci_get_drvdata(dev);
++	if (dev_data)
++		dev_data->ack_intr = 0;
+ 
+ 	return result;
+ }
+@@ -75,10 +87,13 @@ int pciback_enable_msix(struct pciback_device *pdev,
+ int pciback_disable_msix(struct pciback_device *pdev,
+ 		struct pci_dev *dev, struct xen_pci_op *op)
+ {
+-
++	struct pciback_dev_data *dev_data;
+ 	pci_disable_msix(dev);
+ 
+ 	op->value = xen_gsi_from_irq(dev->irq);
++	dev_data = pci_get_drvdata(dev);
++	if (dev_data)
++		dev_data->ack_intr = 1;
+ 	return 0;
+ }
+ 
+-- 
+1.7.3.4
+
+
+From c1cc36c68f096f2b1e796ba84d9c583009939d91 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 19 Apr 2010 14:40:38 -0400
+Subject: [PATCH 030/139] xen-pciback: Enable interrupt handler when device is enabled.
+
+And also request it to be disabled when the device has been
+disabled.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/conf_space_header.c |    6 ++++++
+ 1 files changed, 6 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
+index 1f4f86e..cb450f4 100644
+--- a/drivers/xen/pciback/conf_space_header.c
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -39,8 +39,10 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
+ 
+ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
+ {
++	struct pciback_dev_data *dev_data;
+ 	int err;
+ 
++	dev_data = pci_get_drvdata(dev);
+ 	if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
+ 		if (unlikely(verbose_request))
+ 			printk(KERN_DEBUG "pciback: %s: enable\n",
+@@ -48,11 +50,15 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
+ 		err = pci_enable_device(dev);
+ 		if (err)
+ 			return err;
++		if (dev_data)
++			dev_data->enable_intx = 1;
+ 	} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
+ 		if (unlikely(verbose_request))
+ 			printk(KERN_DEBUG "pciback: %s: disable\n",
+ 			       pci_name(dev));
+ 		pci_disable_device(dev);
++		if (dev_data)
++			dev_data->enable_intx = 0;
+ 	}
+ 
+ 	if (!dev->is_busmaster && is_master_cmd(value)) {
+-- 
+1.7.3.4
+
+
+From a732e3d6ed4831c460586bd7a16ef7f6b7d28936 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 19 Apr 2010 16:23:06 -0400
+Subject: [PATCH 031/139] xen-pciback: Probe the IRQ line to check if it is not shared.
+
+If it is not shared, we stop ACK-ing the IRQ line as there is
+no need for this irq handler to return IRQ_HANDLED.
+
+We have to this check once much much later than the pciback
+and pcifront have started talking as guests doing the hypercall
+that would notify the other guest that the IRQ line is shared
+is done asynchronously.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/pci_stub.c    |    5 +++--
+ drivers/xen/pciback/pciback.h     |    1 +
+ drivers/xen/pciback/pciback_ops.c |   12 +++++++++++-
+ 3 files changed, 15 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index ee2cd68..88c7ca1 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -1055,10 +1055,11 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
+ 		if (!dev_data)
+ 			continue;
+ 		count +=
+-		    scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing\n",
++		    scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing:%ld\n",
+ 			      pci_name(psdev->dev),
+ 			      dev_data->isr_on ? "on" : "off",
+-			      dev_data->ack_intr ? "ack" : "not ack");
++			      dev_data->ack_intr ? "ack" : "not ack",
++			      dev_data->handled);
+ 	}
+ 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ 	return count;
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+index 9d1b0a6..fc31052 100644
+--- a/drivers/xen/pciback/pciback.h
++++ b/drivers/xen/pciback/pciback.h
+@@ -50,6 +50,7 @@ struct pciback_dev_data {
+ 	unsigned int enable_intx : 1;
+ 	unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */ 
+ 	unsigned int ack_intr : 1; /* .. and ACK-ing */
++	unsigned long handled;
+ 	unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
+ 	char irq_name[0]; /* pciback[000:04:00.0] */
+ };
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index cb54893..5543881 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -226,7 +226,17 @@ irqreturn_t pciback_guest_interrupt(int irq, void *dev_id)
+ 	struct pci_dev *dev = (struct pci_dev *)dev_id;
+ 	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+ 
+-	if (dev_data->isr_on && dev_data->ack_intr)
++	if (dev_data->isr_on && dev_data->ack_intr) {
++		dev_data->handled++;
++		if ((dev_data->handled % 1000) == 0) {
++			if (xen_ignore_irq(irq)) {
++				printk(KERN_INFO "%s IRQ line is not shared "
++					"with other domains. Turning ISR off\n",
++					 dev_data->irq_name);
++				dev_data->ack_intr = 0;
++			}
++		}
+ 		return IRQ_HANDLED;
++	}
+ 	return IRQ_NONE;
+ }
+-- 
+1.7.3.4
+
+
+From 3312c11c3f9c857b2457c293e6b6e15928a32f32 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 20 Apr 2010 20:22:40 -0400
+Subject: [PATCH 032/139] xen-pciback: Add debug statements for the MSI/MSI-X configuration module.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/conf_space_capability_msi.c |   11 +++++++++++
+ 1 files changed, 11 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+index a236e2d..b15131e 100644
+--- a/drivers/xen/pciback/conf_space_capability_msi.c
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -16,6 +16,9 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ 	int otherend = pdev->xdev->otherend_id;
+ 	int status;
+ 
++ 	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev));
++
+ 	status = pci_enable_msi(dev);
+ 
+ 	if (status) {
+@@ -31,6 +34,7 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ 	dev_data = pci_get_drvdata(dev);
+ 	if (dev_data)
+ 		dev_data->ack_intr = 0;
++
+ 	return 0;
+ }
+ 
+@@ -38,6 +42,9 @@ int pciback_disable_msi(struct pciback_device *pdev,
+ 		struct pci_dev *dev, struct xen_pci_op *op)
+ {
+ 	struct pciback_dev_data *dev_data;
++
++ 	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev));
+ 	pci_disable_msi(dev);
+ 
+ 	op->value = xen_gsi_from_irq(dev->irq);
+@@ -54,6 +61,8 @@ int pciback_enable_msix(struct pciback_device *pdev,
+ 	int i, result;
+ 	struct msix_entry *entries;
+ 
++ 	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev));
+ 	if (op->value > SH_INFO_MAX_VEC)
+ 		return -EINVAL;
+ 
+@@ -88,6 +97,8 @@ int pciback_disable_msix(struct pciback_device *pdev,
+ 		struct pci_dev *dev, struct xen_pci_op *op)
+ {
+ 	struct pciback_dev_data *dev_data;
++ 	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", pci_name(dev));
+ 	pci_disable_msix(dev);
+ 
+ 	op->value = xen_gsi_from_irq(dev->irq);
+-- 
+1.7.3.4
+
+
+From 52257d7ad18bd91fd614df5ef960a88af3ed5200 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Fri, 23 Jul 2010 14:35:47 -0400
+Subject: [PATCH 033/139] xen-pciback: Redo spinlock usage.
+
+We were using coarse spinlocks that could end up with a deadlock.
+This patch fixes that and makes the spinlocks much more fine-grained.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/xenbus.c |   34 +++++++++++++++++++++-------------
+ 1 files changed, 21 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index d448bf5..f0d5426 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -54,23 +54,31 @@ static void pciback_disconnect(struct pciback_device *pdev)
+ 		unbind_from_irqhandler(pdev->evtchn_irq, pdev);
+ 		pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
+ 	}
++	spin_unlock(&pdev->dev_lock);
+ 
+ 	/* If the driver domain started an op, make sure we complete it
+ 	 * before releasing the shared memory */
++
++	/* Note, the workqueue does not use spinlocks at all.*/
+ 	flush_workqueue(pciback_wq);
+ 
++	spin_lock(&pdev->dev_lock);
+ 	if (pdev->sh_info != NULL) {
+ 		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
+ 		pdev->sh_info = NULL;
+ 	}
+-
+ 	spin_unlock(&pdev->dev_lock);
++
+ }
+ 
+ static void free_pdev(struct pciback_device *pdev)
+ {
+-	if (pdev->be_watching)
++	spin_lock(&pdev->dev_lock);
++	if (pdev->be_watching) {
+ 		unregister_xenbus_watch(&pdev->be_watch);
++		pdev->be_watching = 0;
++	}
++	spin_unlock(&pdev->dev_lock);
+ 
+ 	pciback_disconnect(pdev);
+ 
+@@ -98,7 +106,10 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
+ 				"Error mapping other domain page in ours.");
+ 		goto out;
+ 	}
++
++	spin_lock(&pdev->dev_lock);
+ 	pdev->sh_info = vaddr;
++	spin_unlock(&pdev->dev_lock);
+ 
+ 	err = bind_interdomain_evtchn_to_irqhandler(
+ 		pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
+@@ -108,7 +119,10 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
+ 				 "Error binding event channel to IRQ");
+ 		goto out;
+ 	}
++
++	spin_lock(&pdev->dev_lock);
+ 	pdev->evtchn_irq = err;
++	spin_unlock(&pdev->dev_lock);
+ 	err = 0;
+ 
+ 	dev_dbg(&pdev->xdev->dev, "Attached!\n");
+@@ -122,7 +136,6 @@ static int pciback_attach(struct pciback_device *pdev)
+ 	int gnt_ref, remote_evtchn;
+ 	char *magic = NULL;
+ 
+-	spin_lock(&pdev->dev_lock);
+ 
+ 	/* Make sure we only do this setup once */
+ 	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+@@ -168,7 +181,6 @@ static int pciback_attach(struct pciback_device *pdev)
+ 
+ 	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
+ out:
+-	spin_unlock(&pdev->dev_lock);
+ 
+ 	kfree(magic);
+ 
+@@ -340,7 +352,6 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ 	char state_str[64];
+ 	char dev_str[64];
+ 
+-	spin_lock(&pdev->dev_lock);
+ 
+ 	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
+ 
+@@ -481,8 +492,6 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ 	}
+ 
+ out:
+-	spin_unlock(&pdev->dev_lock);
+-
+ 	return 0;
+ }
+ 
+@@ -539,8 +548,6 @@ static int pciback_setup_backend(struct pciback_device *pdev)
+ 	char dev_str[64];
+ 	char state_str[64];
+ 
+-	spin_lock(&pdev->dev_lock);
+-
+ 	/* It's possible we could get the call to setup twice, so make sure
+ 	 * we're not already connected.
+ 	 */
+@@ -621,8 +628,6 @@ static int pciback_setup_backend(struct pciback_device *pdev)
+ 				 "Error switching to initialised state!");
+ 
+ out:
+-	spin_unlock(&pdev->dev_lock);
+-
+ 	if (!err)
+ 		/* see if pcifront is already configured (if not, we'll wait) */
+ 		pciback_attach(pdev);
+@@ -669,7 +674,10 @@ static int pciback_xenbus_probe(struct xenbus_device *dev,
+ 				pciback_be_watch);
+ 	if (err)
+ 		goto out;
++
++	spin_lock(&pdev->dev_lock);
+ 	pdev->be_watching = 1;
++	spin_unlock(&pdev->dev_lock);
+ 
+ 	/* We need to force a call to our callback here in case
+ 	 * xend already configured us!
+@@ -708,8 +716,8 @@ int __init pciback_xenbus_register(void)
+ {
+ 	pciback_wq = create_workqueue("pciback_workqueue");
+ 	if (!pciback_wq) {
+-		printk(KERN_ERR "pciback_xenbus_register: create"
+-			"pciback_workqueue failed\n");
++		printk(KERN_ERR "%s: create"
++			"pciback_workqueue failed\n",__FUNCTION__);
+ 		return -EFAULT;
+ 	}
+ 	return xenbus_register_backend(&xenbus_pciback_driver);
+-- 
+1.7.3.4
+
+
+From a9e0cfab0577730e74787b701edc727756a52b11 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 28 Jul 2010 13:28:34 -0400
+Subject: [PATCH 034/139] xen-pciback: Remove spinlock for be->watching state.
+
+There is no need to guard this with a spinlock. It
+is already guarded by the xenwatch_thread against multiple
+customers.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/xenbus.c |    4 ----
+ 1 files changed, 0 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index f0d5426..993b659 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -73,12 +73,10 @@ static void pciback_disconnect(struct pciback_device *pdev)
+ 
+ static void free_pdev(struct pciback_device *pdev)
+ {
+-	spin_lock(&pdev->dev_lock);
+ 	if (pdev->be_watching) {
+ 		unregister_xenbus_watch(&pdev->be_watch);
+ 		pdev->be_watching = 0;
+ 	}
+-	spin_unlock(&pdev->dev_lock);
+ 
+ 	pciback_disconnect(pdev);
+ 
+@@ -675,9 +673,7 @@ static int pciback_xenbus_probe(struct xenbus_device *dev,
+ 	if (err)
+ 		goto out;
+ 
+-	spin_lock(&pdev->dev_lock);
+ 	pdev->be_watching = 1;
+-	spin_unlock(&pdev->dev_lock);
+ 
+ 	/* We need to force a call to our callback here in case
+ 	 * xend already configured us!
+-- 
+1.7.3.4
+
+
+From c0cae0b36c43e75d4d69c60f5319e6ba802b2233 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 13 Dec 2010 11:06:36 -0500
+Subject: [PATCH 035/139] xen/pciback: Fix checkpatch warnings and errors.
+
+Checkpatch found some extra warnings and errors. This mega
+patch fixes all of them in one swoop.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ arch/x86/include/asm/xen/pci.h                  |    2 +-
+ drivers/xen/events.c                            |   38 ++++++++++----------
+ drivers/xen/pciback/conf_space.c                |    4 +-
+ drivers/xen/pciback/conf_space_capability_msi.c |   11 +++---
+ drivers/xen/pciback/conf_space_header.c         |   42 +++++++++++-----------
+ drivers/xen/pciback/controller.c                |    2 +-
+ drivers/xen/pciback/pci_stub.c                  |    7 ++--
+ drivers/xen/pciback/pciback.h                   |   16 ++++----
+ drivers/xen/pciback/pciback_ops.c               |    9 ++---
+ drivers/xen/pciback/xenbus.c                    |   14 ++++----
+ 10 files changed, 73 insertions(+), 72 deletions(-)
+
+diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
+index 8474b4b..7e61d78 100644
+--- a/arch/x86/include/asm/xen/pci.h
++++ b/arch/x86/include/asm/xen/pci.h
+@@ -27,7 +27,7 @@ static inline int xen_find_device_domain_owner(struct pci_dev *dev)
+ 	return -1;
+ }
+ static inline int xen_register_device_domain_owner(struct pci_dev *dev,
+- 						   uint16_t domain)
++						   uint16_t domain)
+ {
+ 	return -1;
+ }
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 95eea13..3929c20 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -698,7 +698,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+ 	domid = rc = xen_find_device_domain_owner(dev);
+ 	if (rc < 0)
+ 		domid = DOMID_SELF;
+-	
++
+ 	memset(&map_irq, 0, sizeof(map_irq));
+ 	map_irq.domid = domid;
+ 	map_irq.type = MAP_PIRQ_TYPE_MSI;
+@@ -850,18 +850,18 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ }
+ 
+ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
+-                                          unsigned int remote_port)
++					  unsigned int remote_port)
+ {
+-        struct evtchn_bind_interdomain bind_interdomain;
+-        int err;
++	struct evtchn_bind_interdomain bind_interdomain;
++	int err;
+ 
+-        bind_interdomain.remote_dom  = remote_domain;
+-        bind_interdomain.remote_port = remote_port;
++	bind_interdomain.remote_dom  = remote_domain;
++	bind_interdomain.remote_port = remote_port;
+ 
+-        err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+-                                          &bind_interdomain);
++	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
++					  &bind_interdomain);
+ 
+-        return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
++	return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
+ }
+ 
+ 
+@@ -966,19 +966,19 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
+ 					  const char *devname,
+ 					  void *dev_id)
+ {
+-        int irq, retval;
++	int irq, retval;
+ 
+-        irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
+-        if (irq < 0)
+-                return irq;
++	irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
++	if (irq < 0)
++		return irq;
+ 
+-        retval = request_irq(irq, handler, irqflags, devname, dev_id);
+-        if (retval != 0) {
+-                unbind_from_irq(irq);
+-                return retval;
+-        }
++	retval = request_irq(irq, handler, irqflags, devname, dev_id);
++	if (retval != 0) {
++		unbind_from_irq(irq);
++		return retval;
++	}
+ 
+-        return irq;
++	return irq;
+ }
+ EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
+ 
+diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
+index 370c18e..eb6bba0 100644
+--- a/drivers/xen/pciback/conf_space.c
++++ b/drivers/xen/pciback/conf_space.c
+@@ -18,8 +18,8 @@
+ static int permissive;
+ module_param(permissive, bool, 0644);
+ 
+-#define DEFINE_PCI_CONFIG(op, size, type) 			\
+-int pciback_##op##_config_##size 				\
++#define DEFINE_PCI_CONFIG(op, size, type)			\
++int pciback_##op##_config_##size				\
+ (struct pci_dev *dev, int offset, type value, void *data)	\
+ {								\
+ 	return pci_##op##_config_##size(dev, offset, value);	\
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+index b15131e..3acda69 100644
+--- a/drivers/xen/pciback/conf_space_capability_msi.c
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -16,7 +16,7 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ 	int otherend = pdev->xdev->otherend_id;
+ 	int status;
+ 
+- 	if (unlikely(verbose_request))
++	if (unlikely(verbose_request))
+ 		printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev));
+ 
+ 	status = pci_enable_msi(dev);
+@@ -43,7 +43,7 @@ int pciback_disable_msi(struct pciback_device *pdev,
+ {
+ 	struct pciback_dev_data *dev_data;
+ 
+- 	if (unlikely(verbose_request))
++	if (unlikely(verbose_request))
+ 		printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev));
+ 	pci_disable_msi(dev);
+ 
+@@ -61,7 +61,7 @@ int pciback_enable_msix(struct pciback_device *pdev,
+ 	int i, result;
+ 	struct msix_entry *entries;
+ 
+- 	if (unlikely(verbose_request))
++	if (unlikely(verbose_request))
+ 		printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev));
+ 	if (op->value > SH_INFO_MAX_VEC)
+ 		return -EINVAL;
+@@ -97,8 +97,9 @@ int pciback_disable_msix(struct pciback_device *pdev,
+ 		struct pci_dev *dev, struct xen_pci_op *op)
+ {
+ 	struct pciback_dev_data *dev_data;
+- 	if (unlikely(verbose_request))
+-		printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", pci_name(dev));
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: disable MSI-X\n",
++			pci_name(dev));
+ 	pci_disable_msix(dev);
+ 
+ 	op->value = xen_gsi_from_irq(dev->irq);
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
+index cb450f4..22ad0f5 100644
+--- a/drivers/xen/pciback/conf_space_header.c
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -316,27 +316,27 @@ static const struct config_field header_common[] = {
+ 	{}
+ };
+ 
+-#define CFG_FIELD_BAR(reg_offset) 			\
+-	{ 						\
+-	 .offset     = reg_offset, 			\
+-	 .size       = 4, 				\
+-	 .init       = bar_init, 			\
+-	 .reset      = bar_reset, 			\
+-	 .release    = bar_release, 			\
+-	 .u.dw.read  = bar_read, 			\
+-	 .u.dw.write = bar_write, 			\
+-	 }
+-
+-#define CFG_FIELD_ROM(reg_offset) 			\
+-	{ 						\
+-	 .offset     = reg_offset, 			\
+-	 .size       = 4, 				\
+-	 .init       = rom_init, 			\
+-	 .reset      = bar_reset, 			\
+-	 .release    = bar_release, 			\
+-	 .u.dw.read  = bar_read, 			\
+-	 .u.dw.write = rom_write, 			\
+-	 }
++#define CFG_FIELD_BAR(reg_offset)			\
++	{						\
++	.offset     = reg_offset,			\
++	.size       = 4,				\
++	.init       = bar_init,				\
++	.reset      = bar_reset,			\
++	.release    = bar_release,			\
++	.u.dw.read  = bar_read,				\
++	.u.dw.write = bar_write,			\
++	}
++
++#define CFG_FIELD_ROM(reg_offset)			\
++	{						\
++	.offset     = reg_offset,			\
++	.size       = 4,				\
++	.init       = rom_init,				\
++	.reset      = bar_reset,			\
++	.release    = bar_release,			\
++	.u.dw.read  = bar_read,				\
++	.u.dw.write = rom_write,			\
++	}
+ 
+ static const struct config_field header_0[] = {
+ 	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
+diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
+index 7f04f11..5a7e4cc 100644
+--- a/drivers/xen/pciback/controller.c
++++ b/drivers/xen/pciback/controller.c
+@@ -378,7 +378,7 @@ int pciback_publish_pci_roots(struct pciback_device *pdev,
+ 	}
+ 
+ 	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
+-			    "%lx", (sizeof(struct acpi_resource) * 2) + 1);
++			    "%lx", (sizeof(struct acpi_resource) *2) + 1);
+ 
+ out:
+ 	spin_unlock(&dev_data->lock);
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 88c7ca1..c8f6f29 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -13,7 +13,7 @@
+ #include <linux/pci.h>
+ #include <linux/wait.h>
+ #include <linux/sched.h>
+-#include <asm/atomic.h>
++#include <linux/atomic.h>
+ #include <xen/events.h>
+ #include <asm/xen/pci.h>
+ #include <asm/xen/hypervisor.h>
+@@ -603,7 +603,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
+ 	if (test_bit(_XEN_PCIF_active,
+ 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
+ 		dev_dbg(&psdev->dev->dev,
+-			"schedule pci_conf service in pciback \n");
++			"schedule pci_conf service in pciback\n");
+ 		test_and_schedule_op(psdev->pdev);
+ 	}
+ 
+@@ -1055,7 +1055,8 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
+ 		if (!dev_data)
+ 			continue;
+ 		count +=
+-		    scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing:%ld\n",
++		    scnprintf(buf + count, PAGE_SIZE - count,
++			      "%s:%s:%sing:%ld\n",
+ 			      pci_name(psdev->dev),
+ 			      dev_data->isr_on ? "on" : "off",
+ 			      dev_data->ack_intr ? "ack" : "not ack",
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+index fc31052..5c14020 100644
+--- a/drivers/xen/pciback/pciback.h
++++ b/drivers/xen/pciback/pciback.h
+@@ -12,7 +12,7 @@
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
+ #include <linux/workqueue.h>
+-#include <asm/atomic.h>
++#include <linux/atomic.h>
+ #include <xen/interface/io/pciif.h>
+ 
+ struct pci_dev_entry {
+@@ -20,8 +20,8 @@ struct pci_dev_entry {
+ 	struct pci_dev *dev;
+ };
+ 
+-#define _PDEVF_op_active 	(0)
+-#define PDEVF_op_active 	(1<<(_PDEVF_op_active))
++#define _PDEVF_op_active	(0)
++#define PDEVF_op_active		(1<<(_PDEVF_op_active))
+ #define _PCIB_op_pending	(1)
+ #define PCIB_op_pending		(1<<(_PCIB_op_pending))
+ 
+@@ -45,11 +45,11 @@ struct pciback_device {
+ 
+ struct pciback_dev_data {
+ 	struct list_head config_fields;
+-	unsigned int permissive : 1;
+-	unsigned int warned_on_write : 1;
+-	unsigned int enable_intx : 1;
+-	unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */ 
+-	unsigned int ack_intr : 1; /* .. and ACK-ing */
++	unsigned int permissive:1;
++	unsigned int warned_on_write:1;
++	unsigned int enable_intx:1;
++	unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
++	unsigned int ack_intr:1; /* .. and ACK-ing */
+ 	unsigned long handled;
+ 	unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
+ 	char irq_name[0]; /* pciback[000:04:00.0] */
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index 5543881..9a465e9 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -63,12 +63,11 @@ void pciback_control_isr(struct pci_dev *dev, int reset)
+ 				dev_data->irq_name, dev);
+ 		if (rc) {
+ 			dev_err(&dev->dev, "%s: failed to install fake IRQ " \
+-				"handler for IRQ %d! (rc:%d)\n", dev_data->irq_name,
+-				dev_data->irq, rc);
++				"handler for IRQ %d! (rc:%d)\n",
++				dev_data->irq_name, dev_data->irq, rc);
+ 			goto out;
+ 		}
+-	}
+-	else {
++	} else {
+ 		free_irq(dev_data->irq, dev);
+ 		dev_data->irq = 0;
+ 	}
+@@ -188,7 +187,7 @@ void pciback_do_op(struct work_struct *data)
+ 			break;
+ #endif
+ 		default:
+-			op->err = XEN_PCI_ERR_not_implemented;	
++			op->err = XEN_PCI_ERR_not_implemented;
+ 			break;
+ 		}
+ 	}
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index 993b659..70030c4 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -700,12 +700,12 @@ static const struct xenbus_device_id xenpci_ids[] = {
+ };
+ 
+ static struct xenbus_driver xenbus_pciback_driver = {
+-	.name 			= "pciback",
+-	.owner 			= THIS_MODULE,
+-	.ids 			= xenpci_ids,
+-	.probe 			= pciback_xenbus_probe,
+-	.remove 		= pciback_xenbus_remove,
+-	.otherend_changed 	= pciback_frontend_changed,
++	.name			= "pciback",
++	.owner			= THIS_MODULE,
++	.ids			= xenpci_ids,
++	.probe			= pciback_xenbus_probe,
++	.remove			= pciback_xenbus_remove,
++	.otherend_changed	= pciback_frontend_changed,
+ };
+ 
+ int __init pciback_xenbus_register(void)
+@@ -713,7 +713,7 @@ int __init pciback_xenbus_register(void)
+ 	pciback_wq = create_workqueue("pciback_workqueue");
+ 	if (!pciback_wq) {
+ 		printk(KERN_ERR "%s: create"
+-			"pciback_workqueue failed\n",__FUNCTION__);
++			"pciback_workqueue failed\n", __func__);
+ 		return -EFAULT;
+ 	}
+ 	return xenbus_register_backend(&xenbus_pciback_driver);
+-- 
+1.7.3.4
+
+
+From 83d24d8dbd9e52a7ac94deae2d9fff6681ce8761 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 13 Dec 2010 11:30:29 -0500
+Subject: [PATCH 036/139] xen/xen-pciback: Swap over to DEFINE_PCI_DEVICE_TABLE
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/pci_stub.c |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index c8f6f29..09dd60c 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -497,7 +497,7 @@ static void pcistub_remove(struct pci_dev *dev)
+ 	}
+ }
+ 
+-static const struct pci_device_id pcistub_ids[] = {
++DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = {
+ 	{
+ 	 .vendor = PCI_ANY_ID,
+ 	 .device = PCI_ANY_ID,
+-- 
+1.7.3.4
+
+
+From 105aad363797212fbd2a4c887b723407c5851175 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 9 Dec 2010 15:01:11 -0500
+Subject: [PATCH 038/139] xen/irq: Don't fall over when nr_irqs_gsi > nr_irqs.
+
+This scenario where the nr_irq_gsi is greater than nr_irqs
+is rather strange but lets still try to survive. Make sure
+to print a warning so the user wouldn't be surprised in case
+things don't work.
+
+Solves a bootup-crash when booting Xen and Linux under QEMU.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/events.c |    9 +++++++++
+ 1 files changed, 9 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 4d4a23d..98b7220 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -411,6 +411,7 @@ static int find_unbound_irq(void)
+ 	if (bottom == nr_irqs)
+ 		goto no_irqs;
+ 
++retry:
+ 	/* This loop starts from the top of IRQ space and goes down.
+ 	 * We need this b/c if we have a PCI device in a Xen PV guest
+ 	 * we do not have an IO-APIC (though the backend might have them)
+@@ -434,6 +435,14 @@ static int find_unbound_irq(void)
+ 		goto no_irqs;
+ 
+ 	res = irq_alloc_desc_at(irq, -1);
++	if (res == -EEXIST) {
++		top--;
++		if (bottom > top)
++			printk(KERN_ERR "Eating in GSI/MSI space (%d)!" \
++				" Your PCI device might not work!\n", top);
++		if (top > NR_IRQS_LEGACY)
++			goto retry;
++	}
+ 
+ 	if (WARN_ON(res != irq))
+ 		return -1;
+-- 
+1.7.3.4
+
+
+From bfb56cd0b9304ddc6cfb411315bf7e5fea3e8bc7 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 11 Jan 2011 17:20:13 +0000
+Subject: [PATCH 039/139] xen: handled remapped IRQs when enabling a pcifront PCI device.
+
+This happens to not be an issue currently because we take pains to try
+to ensure that the GSI-IRQ mapping is 1-1 in a PV guest and that
+regular event channels do not clash. However a subsequent patch is
+going to break this 1-1 mapping.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Cc: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+---
+ arch/x86/pci/xen.c |   22 ++++++++++++++--------
+ 1 files changed, 14 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
+index 25cd4a0..2a12f3d 100644
+--- a/arch/x86/pci/xen.c
++++ b/arch/x86/pci/xen.c
+@@ -226,21 +226,27 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev)
+ {
+ 	int rc;
+ 	int share = 1;
++	u8 gsi;
+ 
+-	dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
+-
+-	if (dev->irq < 0)
+-		return -EINVAL;
++	rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
++	if (rc < 0) {
++		dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
++			 rc);
++		return rc;
++	}
+ 
+-	if (dev->irq < NR_IRQS_LEGACY)
++	if (gsi < NR_IRQS_LEGACY)
+ 		share = 0;
+ 
+-	rc = xen_allocate_pirq(dev->irq, share, "pcifront");
++	rc = xen_allocate_pirq(gsi, share, "pcifront");
+ 	if (rc < 0) {
+-		dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
+-			 dev->irq, rc);
++		dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
++			 gsi, rc);
+ 		return rc;
+ 	}
++
++	dev->irq = rc;
++	dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
+ 	return 0;
+ }
+ 
+-- 
+1.7.3.4
+
+
+From 80b3b503bba489dcbdd808c5dd50a6be3aa06949 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 11 Jan 2011 17:20:14 +0000
+Subject: [PATCH 040/139] xen:events: move find_unbound_irq inside CONFIG_PCI_MSI
+
+The only caller is xen_allocate_pirq_msi which is also under this
+ifdef so this fixes:
+    drivers/xen/events.c:377: warning: 'find_unbound_pirq' defined but not used
+when CONFIG_PCI_MSI=n
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Cc: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+---
+ drivers/xen/events.c |   34 +++++++++++++++++-----------------
+ 1 files changed, 17 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 98b7220..ae8d45d 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -384,23 +384,6 @@ static int get_nr_hw_irqs(void)
+ 	return ret;
+ }
+ 
+-static int find_unbound_pirq(int type)
+-{
+-	int rc, i;
+-	struct physdev_get_free_pirq op_get_free_pirq;
+-	op_get_free_pirq.type = type;
+-
+-	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
+-	if (!rc)
+-		return op_get_free_pirq.pirq;
+-
+-	for (i = 0; i < nr_irqs; i++) {
+-		if (pirq_to_irq[i] < 0)
+-			return i;
+-	}
+-	return -1;
+-}
+-
+ static int find_unbound_irq(void)
+ {
+ 	struct irq_data *data;
+@@ -683,6 +666,23 @@ out:
+ #include <linux/msi.h>
+ #include "../pci/msi.h"
+ 
++static int find_unbound_pirq(int type)
++{
++	int rc, i;
++	struct physdev_get_free_pirq op_get_free_pirq;
++	op_get_free_pirq.type = type;
++
++	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
++	if (!rc)
++		return op_get_free_pirq.pirq;
++
++	for (i = 0; i < nr_irqs; i++) {
++		if (pirq_to_irq[i] < 0)
++			return i;
++	}
++	return -1;
++}
++
+ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
+ {
+ 	spin_lock(&irq_mapping_update_lock);
+-- 
+1.7.3.4
+
+
+From c514d00c80574e839d34c239363153b90bb8efcc Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 11 Jan 2011 17:20:15 +0000
+Subject: [PATCH 041/139] xen: events: add xen_allocate_irq_{dynamic, gsi} and xen_free_irq
+
+This is neater than open-coded calls to irq_alloc_desc_at and
+irq_free_desc.
+
+No intended behavioural change.
+
+Note that we previously were not checking the return value of
+irq_alloc_desc_at which would be failing for GSI<NR_IRQS_LEGACY
+because the core architecture code has already allocated those for
+us. Hence the additional check against NR_IRQS_LEGACY in
+xen_allocate_irq_gsi.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Cc: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+---
+ drivers/xen/events.c |   53 +++++++++++++++++++++++++++++++++-----------------
+ 1 files changed, 35 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index ae8d45d..74fb216 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -384,7 +384,7 @@ static int get_nr_hw_irqs(void)
+ 	return ret;
+ }
+ 
+-static int find_unbound_irq(void)
++static int xen_allocate_irq_dynamic(void)
+ {
+ 	struct irq_data *data;
+ 	int irq, res;
+@@ -442,6 +442,30 @@ static bool identity_mapped_irq(unsigned irq)
+ 	return irq < get_nr_hw_irqs();
+ }
+ 
++static int xen_allocate_irq_gsi(unsigned gsi)
++{
++	int irq;
++
++	if (!identity_mapped_irq(gsi) &&
++	    (xen_initial_domain() || !xen_pv_domain()))
++		return xen_allocate_irq_dynamic();
++
++	/* Legacy IRQ descriptors are already allocated by the arch. */
++	if (gsi < NR_IRQS_LEGACY)
++		return gsi;
++
++	irq = irq_alloc_desc_at(gsi, -1);
++	if (irq < 0)
++		panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq);
++
++	return irq;
++}
++
++static void xen_free_irq(unsigned irq)
++{
++	irq_free_desc(irq);
++}
++
+ static void pirq_unmask_notify(int irq)
+ {
+ 	struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) };
+@@ -627,14 +651,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
+ 		goto out;	/* XXX need refcount? */
+ 	}
+ 
+-	/* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
+-	 * we are using the !xen_initial_domain() to drop in the function.*/
+-	if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
+-				xen_pv_domain())) {
+-		irq = gsi;
+-		irq_alloc_desc_at(irq, -1);
+-	} else
+-		irq = find_unbound_irq();
++	irq = xen_allocate_irq_gsi(gsi);
+ 
+ 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
+ 				      handle_level_irq, name);
+@@ -647,7 +664,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
+ 	 * this in the priv domain. */
+ 	if (xen_initial_domain() &&
+ 	    HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+-		irq_free_desc(irq);
++		xen_free_irq(irq);
+ 		irq = -ENOSPC;
+ 		goto out;
+ 	}
+@@ -688,7 +705,7 @@ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
+ 	spin_lock(&irq_mapping_update_lock);
+ 
+ 	if (alloc & XEN_ALLOC_IRQ) {
+-		*irq = find_unbound_irq();
++		*irq = xen_allocate_irq_dynamic();
+ 		if (*irq == -1)
+ 			goto out;
+ 	}
+@@ -738,7 +755,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+ 
+ 	spin_lock(&irq_mapping_update_lock);
+ 
+-	irq = find_unbound_irq();
++	irq = xen_allocate_irq_dynamic();
+ 
+ 	if (irq == -1)
+ 		goto out;
+@@ -747,7 +764,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+ 	if (rc) {
+ 		printk(KERN_WARNING "xen map irq failed %d\n", rc);
+ 
+-		irq_free_desc(irq);
++		xen_free_irq(irq);
+ 
+ 		irq = -1;
+ 		goto out;
+@@ -789,7 +806,7 @@ int xen_destroy_irq(int irq)
+ 	}
+ 	irq_info[irq] = mk_unbound_info();
+ 
+-	irq_free_desc(irq);
++	xen_free_irq(irq);
+ 
+ out:
+ 	spin_unlock(&irq_mapping_update_lock);
+@@ -820,7 +837,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
+ 	irq = evtchn_to_irq[evtchn];
+ 
+ 	if (irq == -1) {
+-		irq = find_unbound_irq();
++		irq = xen_allocate_irq_dynamic();
+ 
+ 		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+ 					      handle_fasteoi_irq, "event");
+@@ -845,7 +862,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ 	irq = per_cpu(ipi_to_irq, cpu)[ipi];
+ 
+ 	if (irq == -1) {
+-		irq = find_unbound_irq();
++		irq = xen_allocate_irq_dynamic();
+ 		if (irq < 0)
+ 			goto out;
+ 
+@@ -881,7 +898,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+ 	irq = per_cpu(virq_to_irq, cpu)[virq];
+ 
+ 	if (irq == -1) {
+-		irq = find_unbound_irq();
++		irq = xen_allocate_irq_dynamic();
+ 
+ 		set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
+ 					      handle_percpu_irq, "virq");
+@@ -940,7 +957,7 @@ static void unbind_from_irq(unsigned int irq)
+ 	if (irq_info[irq].type != IRQT_UNBOUND) {
+ 		irq_info[irq] = mk_unbound_info();
+ 
+-		irq_free_desc(irq);
++		xen_free_irq(irq);
+ 	}
+ 
+ 	spin_unlock(&irq_mapping_update_lock);
+-- 
+1.7.3.4
+
+
+From 323430f2697268e6261b673fa2b86d6f3f3c7cff Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 11 Jan 2011 17:20:16 +0000
+Subject: [PATCH 042/139] xen: events: allocate GSIs and dynamic IRQs from separate IRQ ranges.
+
+There are three cases which we need to care about, PV guest, PV domain
+0 and HVM guest.
+
+The PV guest case is simple since it has no access to ACPI or real
+APICs and therefore has no GSIs therefore we simply dynamically
+allocate all IRQs. The potentially interesting case here is PIRQ type
+event channels associated with passed through PCI devices. However
+even in this case the guest has no direct interaction with the
+physical GSI since that happens in the PCI backend.
+
+The PV domain 0 and HVM guest cases are actually the same. In domain 0
+case the kernel sees the host ACPI and GSIs (although it only sees the
+APIC indirectly via the hypervisor) and in the HVM guest case it sees
+the virtualised ACPI and emulated APICs. In these cases we start
+allocating dynamic IRQs at nr_irqs_gsi so that they cannot clash with
+any GSI.
+
+Currently xen_allocate_irq_dynamic starts at nr_irqs and works
+backwards looking for a free IRQ in order to (try and) avoid clashing
+with GSIs used in domain 0 and in HVM guests. This change avoids that
+although we retain the behaviour of allowing dynamic IRQs to encroach
+on the GSI range if no suitable IRQs are available since a future IRQ
+clash is deemed preferable to failure right now.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Cc: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+---
+ drivers/xen/events.c |   84 +++++++++++++++----------------------------------
+ 1 files changed, 26 insertions(+), 58 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 74fb216..a7b60f6 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -373,81 +373,49 @@ static void unmask_evtchn(int port)
+ 	put_cpu();
+ }
+ 
+-static int get_nr_hw_irqs(void)
++static int xen_allocate_irq_dynamic(void)
+ {
+-	int ret = 1;
++	int first = 0;
++	int irq;
+ 
+ #ifdef CONFIG_X86_IO_APIC
+-	ret = get_nr_irqs_gsi();
++	/*
++	 * For an HVM guest or domain 0 which see "real" (emulated or
++	 * actual repectively) GSIs we allocate dynamic IRQs
++	 * e.g. those corresponding to event channels or MSIs
++	 * etc. from the range above those "real" GSIs to avoid
++	 * collisions.
++	 */
++	if (xen_initial_domain() || xen_hvm_domain())
++		first = get_nr_irqs_gsi();
+ #endif
+ 
+-	return ret;
+-}
+-
+-static int xen_allocate_irq_dynamic(void)
+-{
+-	struct irq_data *data;
+-	int irq, res;
+-	int bottom = get_nr_hw_irqs();
+-	int top = nr_irqs-1;
+-
+-	if (bottom == nr_irqs)
+-		goto no_irqs;
+-
+ retry:
+-	/* This loop starts from the top of IRQ space and goes down.
+-	 * We need this b/c if we have a PCI device in a Xen PV guest
+-	 * we do not have an IO-APIC (though the backend might have them)
+-	 * mapped in. To not have a collision of physical IRQs with the Xen
+-	 * event channels start at the top of the IRQ space for virtual IRQs.
+-	 */
+-	for (irq = top; irq > bottom; irq--) {
+-		data = irq_get_irq_data(irq);
+-		/* only 15->0 have init'd desc; handle irq > 16 */
+-		if (!data)
+-			break;
+-		if (data->chip == &no_irq_chip)
+-			break;
+-		if (data->chip != &xen_dynamic_chip)
+-			continue;
+-		if (irq_info[irq].type == IRQT_UNBOUND)
+-			return irq;
+-	}
++	irq = irq_alloc_desc_from(first, -1);
+ 
+-	if (irq == bottom)
+-		goto no_irqs;
+-
+-	res = irq_alloc_desc_at(irq, -1);
+-	if (res == -EEXIST) {
+-		top--;
+-		if (bottom > top)
+-			printk(KERN_ERR "Eating in GSI/MSI space (%d)!" \
+-				" Your PCI device might not work!\n", top);
+-		if (top > NR_IRQS_LEGACY)
+-			goto retry;
++	if (irq == -ENOMEM && first > NR_IRQS_LEGACY) {
++		printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n");
++		first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY);
++		goto retry;
+ 	}
+ 
+-	if (WARN_ON(res != irq))
+-		return -1;
++	if (irq < 0)
++		panic("No available IRQ to bind to: increase nr_irqs!\n");
+ 
+ 	return irq;
+-
+-no_irqs:
+-	panic("No available IRQ to bind to: increase nr_irqs!\n");
+-}
+-
+-static bool identity_mapped_irq(unsigned irq)
+-{
+-	/* identity map all the hardware irqs */
+-	return irq < get_nr_hw_irqs();
+ }
+ 
+ static int xen_allocate_irq_gsi(unsigned gsi)
+ {
+ 	int irq;
+ 
+-	if (!identity_mapped_irq(gsi) &&
+-	    (xen_initial_domain() || !xen_pv_domain()))
++	/*
++	 * A PV guest has no concept of a GSI (since it has no ACPI
++	 * nor access to/knowledge of the physical APICs). Therefore
++	 * all IRQs are dynamically allocated from the entire IRQ
++	 * space.
++	 */
++	if (xen_pv_domain() && !xen_initial_domain())
+ 		return xen_allocate_irq_dynamic();
+ 
+ 	/* Legacy IRQ descriptors are already allocated by the arch. */
+-- 
+1.7.3.4
+
+
+From c986ab83cede3fc02d9f73c65dd83c20ebdf3d0e Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 29 Nov 2010 13:52:18 -0500
+Subject: [PATCH 043/139] ttm: Introduce a placeholder for DMA (bus) addresses.
+
+This is right now limited to only non-pool constructs.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Tested-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/gpu/drm/ttm/ttm_page_alloc.c |    9 ++++++---
+ drivers/gpu/drm/ttm/ttm_tt.c         |   10 ++++++++--
+ include/drm/ttm/ttm_bo_driver.h      |    2 ++
+ include/drm/ttm/ttm_page_alloc.h     |    8 ++++++--
+ 4 files changed, 22 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
+index b1e02ff..6859288 100644
+--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
++++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
+@@ -38,6 +38,7 @@
+ #include <linux/mm.h>
+ #include <linux/seq_file.h> /* for seq_printf */
+ #include <linux/slab.h>
++#include <linux/dma-mapping.h>
+ 
+ #include <asm/atomic.h>
+ 
+@@ -662,7 +663,8 @@ out:
+  * cached pages.
+  */
+ int ttm_get_pages(struct list_head *pages, int flags,
+-		enum ttm_caching_state cstate, unsigned count)
++		enum ttm_caching_state cstate, unsigned count,
++		dma_addr_t *dma_address)
+ {
+ 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
+ 	struct page *p = NULL;
+@@ -720,7 +722,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
+ 			printk(KERN_ERR TTM_PFX
+ 			       "Failed to allocate extra pages "
+ 			       "for large request.");
+-			ttm_put_pages(pages, 0, flags, cstate);
++			ttm_put_pages(pages, 0, flags, cstate, NULL);
+ 			return r;
+ 		}
+ 	}
+@@ -731,7 +733,8 @@ int ttm_get_pages(struct list_head *pages, int flags,
+ 
+ /* Put all pages in pages list to correct pool to wait for reuse */
+ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
+-		enum ttm_caching_state cstate)
++		enum ttm_caching_state cstate,
++		dma_addr_t *dma_address)
+ {
+ 	unsigned long irq_flags;
+ 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
+diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
+index af789dc..0d39001 100644
+--- a/drivers/gpu/drm/ttm/ttm_tt.c
++++ b/drivers/gpu/drm/ttm/ttm_tt.c
+@@ -49,12 +49,16 @@ static int ttm_tt_swapin(struct ttm_tt *ttm);
+ static void ttm_tt_alloc_page_directory(struct ttm_tt *ttm)
+ {
+ 	ttm->pages = drm_calloc_large(ttm->num_pages, sizeof(*ttm->pages));
++	ttm->dma_address = drm_calloc_large(ttm->num_pages,
++					    sizeof(*ttm->dma_address));
+ }
+ 
+ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
+ {
+ 	drm_free_large(ttm->pages);
+ 	ttm->pages = NULL;
++	drm_free_large(ttm->dma_address);
++	ttm->dma_address = NULL;
+ }
+ 
+ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
+@@ -105,7 +109,8 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
+ 
+ 		INIT_LIST_HEAD(&h);
+ 
+-		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
++		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1,
++				    &ttm->dma_address[index]);
+ 
+ 		if (ret != 0)
+ 			return NULL;
+@@ -298,7 +303,8 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
+ 			count++;
+ 		}
+ 	}
+-	ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state);
++	ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
++		      ttm->dma_address);
+ 	ttm->state = tt_unpopulated;
+ 	ttm->first_himem_page = ttm->num_pages;
+ 	ttm->last_lomem_page = -1;
+diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
+index 8e0c848..6dc4fcc 100644
+--- a/include/drm/ttm/ttm_bo_driver.h
++++ b/include/drm/ttm/ttm_bo_driver.h
+@@ -149,6 +149,7 @@ enum ttm_caching_state {
+  * @swap_storage: Pointer to shmem struct file for swap storage.
+  * @caching_state: The current caching state of the pages.
+  * @state: The current binding state of the pages.
++ * @dma_address: The DMA (bus) addresses of the pages (if TTM_PAGE_FLAG_DMA32)
+  *
+  * This is a structure holding the pages, caching- and aperture binding
+  * status for a buffer object that isn't backed by fixed (VRAM / AGP)
+@@ -173,6 +174,7 @@ struct ttm_tt {
+ 		tt_unbound,
+ 		tt_unpopulated,
+ 	} state;
++	dma_addr_t *dma_address;
+ };
+ 
+ #define TTM_MEMTYPE_FLAG_FIXED         (1 << 0)	/* Fixed (on-card) PCI memory */
+diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
+index 1168214..8062890 100644
+--- a/include/drm/ttm/ttm_page_alloc.h
++++ b/include/drm/ttm/ttm_page_alloc.h
+@@ -36,11 +36,13 @@
+  * @flags: ttm flags for page allocation.
+  * @cstate: ttm caching state for the page.
+  * @count: number of pages to allocate.
++ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
+  */
+ int ttm_get_pages(struct list_head *pages,
+ 		  int flags,
+ 		  enum ttm_caching_state cstate,
+-		  unsigned count);
++		  unsigned count,
++		  dma_addr_t *dma_address);
+ /**
+  * Put linked list of pages to pool.
+  *
+@@ -49,11 +51,13 @@ int ttm_get_pages(struct list_head *pages,
+  * count.
+  * @flags: ttm flags for page allocation.
+  * @cstate: ttm caching state.
++ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
+  */
+ void ttm_put_pages(struct list_head *pages,
+ 		   unsigned page_count,
+ 		   int flags,
+-		   enum ttm_caching_state cstate);
++		   enum ttm_caching_state cstate,
++		   dma_addr_t *dma_address);
+ /**
+  * Initialize pool allocator.
+  */
+-- 
+1.7.3.4
+
+
+From ed805774fd3e3a8a0baa003d6419c5bd07d8cc5a Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 29 Nov 2010 14:03:30 -0500
+Subject: [PATCH 044/139] tm: Utilize the dma_addr_t array for pages that are to in DMA32 pool.
+
+We only use the "if (pool == NULL)" path for right now.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Tested-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/gpu/drm/ttm/ttm_page_alloc.c |   26 +++++++++++++++++++++++---
+ 1 files changed, 23 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
+index 6859288..5d09677 100644
+--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
++++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
+@@ -683,14 +683,22 @@ int ttm_get_pages(struct list_head *pages, int flags,
+ 			gfp_flags |= GFP_HIGHUSER;
+ 
+ 		for (r = 0; r < count; ++r) {
+-			p = alloc_page(gfp_flags);
++			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
++				void *addr;
++				addr = dma_alloc_coherent(NULL, PAGE_SIZE,
++							&dma_address[r],
++							gfp_flags);
++				if (addr == NULL)
++					return -ENOMEM;
++				p = virt_to_page(addr);
++			} else
++				p = alloc_page(gfp_flags);
+ 			if (!p) {
+ 
+ 				printk(KERN_ERR TTM_PFX
+ 				       "Unable to allocate page.");
+ 				return -ENOMEM;
+ 			}
+-
+ 			list_add(&p->lru, pages);
+ 		}
+ 		return 0;
+@@ -739,12 +747,24 @@ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
+ 	unsigned long irq_flags;
+ 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
+ 	struct page *p, *tmp;
++	unsigned r;
+ 
+ 	if (pool == NULL) {
+ 		/* No pool for this memory type so free the pages */
+ 
++		r = page_count-1;
+ 		list_for_each_entry_safe(p, tmp, pages, lru) {
+-			__free_page(p);
++			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
++				void *addr = page_address(p);
++				WARN_ON(!addr || !dma_address[r]);
++				if (addr)
++					dma_free_coherent(NULL, PAGE_SIZE,
++							addr,
++							dma_address[r]);
++				dma_address[r] = 0;
++			} else
++				__free_page(p);
++			r--;
+ 		}
+ 		/* Make the pages list empty */
+ 		INIT_LIST_HEAD(pages);
+-- 
+1.7.3.4
+
+
+From c779160e3b0246d7de606eafb855df9b283a5c2a Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 2 Dec 2010 10:24:13 -0500
+Subject: [PATCH 045/139] ttm: Expand (*populate) to support an array of DMA addresses.
+
+We pass in the array of ttm pages to be populated in the GART/MM
+of the card (or AGP). Patch titled: "ttm: Utilize the dma_addr_t array
+for pages that are to in DMA32 pool." uses the DMA API to make those
+pages have a proper DMA addresses (in the situation where
+page_to_phys or virt_to_phys do not give use the DMA (bus) address).
+
+Since we are using the DMA API on those pages, we should pass in the
+DMA address to this function so it can save it in its proper fields
+(later patches use it).
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Tested-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/gpu/drm/nouveau/nouveau_sgdma.c |    3 ++-
+ drivers/gpu/drm/radeon/radeon_ttm.c     |    3 ++-
+ drivers/gpu/drm/ttm/ttm_agp_backend.c   |    3 ++-
+ drivers/gpu/drm/ttm/ttm_tt.c            |    2 +-
+ drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c  |    3 ++-
+ include/drm/ttm/ttm_bo_driver.h         |    4 +++-
+ 6 files changed, 12 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+index 288baca..edc140a 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
++++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+@@ -20,7 +20,8 @@ struct nouveau_sgdma_be {
+ 
+ static int
+ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
+-		       struct page **pages, struct page *dummy_read_page)
++		       struct page **pages, struct page *dummy_read_page,
++		       dma_addr_t *dma_addrs)
+ {
+ 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+ 	struct drm_device *dev = nvbe->dev;
+diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
+index 01c2c73..6f156e9 100644
+--- a/drivers/gpu/drm/radeon/radeon_ttm.c
++++ b/drivers/gpu/drm/radeon/radeon_ttm.c
+@@ -655,7 +655,8 @@ struct radeon_ttm_backend {
+ static int radeon_ttm_backend_populate(struct ttm_backend *backend,
+ 				       unsigned long num_pages,
+ 				       struct page **pages,
+-				       struct page *dummy_read_page)
++				       struct page *dummy_read_page,
++				       dma_addr_t *dma_addrs)
+ {
+ 	struct radeon_ttm_backend *gtt;
+ 
+diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
+index f999e36..1c4a72f 100644
+--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
++++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
+@@ -47,7 +47,8 @@ struct ttm_agp_backend {
+ 
+ static int ttm_agp_populate(struct ttm_backend *backend,
+ 			    unsigned long num_pages, struct page **pages,
+-			    struct page *dummy_read_page)
++			    struct page *dummy_read_page,
++			    dma_addr_t *dma_addrs)
+ {
+ 	struct ttm_agp_backend *agp_be =
+ 	    container_of(backend, struct ttm_agp_backend, backend);
+diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
+index 0d39001..86d5b17 100644
+--- a/drivers/gpu/drm/ttm/ttm_tt.c
++++ b/drivers/gpu/drm/ttm/ttm_tt.c
+@@ -169,7 +169,7 @@ int ttm_tt_populate(struct ttm_tt *ttm)
+ 	}
+ 
+ 	be->func->populate(be, ttm->num_pages, ttm->pages,
+-			   ttm->dummy_read_page);
++			   ttm->dummy_read_page, ttm->dma_address);
+ 	ttm->state = tt_unbound;
+ 	return 0;
+ }
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+index 80bc37b..87e43e0 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+@@ -102,7 +102,8 @@ struct vmw_ttm_backend {
+ 
+ static int vmw_ttm_populate(struct ttm_backend *backend,
+ 			    unsigned long num_pages, struct page **pages,
+-			    struct page *dummy_read_page)
++			    struct page *dummy_read_page,
++			    dma_addr_t *dma_addrs)
+ {
+ 	struct vmw_ttm_backend *vmw_be =
+ 	    container_of(backend, struct vmw_ttm_backend, backend);
+diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
+index 6dc4fcc..ebcd3dd 100644
+--- a/include/drm/ttm/ttm_bo_driver.h
++++ b/include/drm/ttm/ttm_bo_driver.h
+@@ -50,13 +50,15 @@ struct ttm_backend_func {
+ 	 * @pages: Array of pointers to ttm pages.
+ 	 * @dummy_read_page: Page to be used instead of NULL pages in the
+ 	 * array @pages.
++	 * @dma_addrs: Array of DMA (bus) address of the ttm pages.
+ 	 *
+ 	 * Populate the backend with ttm pages. Depending on the backend,
+ 	 * it may or may not copy the @pages array.
+ 	 */
+ 	int (*populate) (struct ttm_backend *backend,
+ 			 unsigned long num_pages, struct page **pages,
+-			 struct page *dummy_read_page);
++			 struct page *dummy_read_page,
++			 dma_addr_t *dma_addrs);
+ 	/**
+ 	 * struct ttm_backend_func member clear
+ 	 *
+-- 
+1.7.3.4
+
+
+From 340dec734021d8600029e8b3178280cc8d3af251 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 2 Dec 2010 11:04:29 -0500
+Subject: [PATCH 046/139] radeon/ttm/PCIe: Use dma_addr if TTM has set it.
+
+If the TTM layer has used the DMA API to setup pages that are
+TTM_PAGE_FLAG_DMA32 (look at patch titled: "ttm: Utilize the dma_addr_t
+array for pages that are to in DMA32 pool."), lets use it
+when programming the GART in the PCIe type cards.
+
+This patch skips doing the pci_map_page (and pci_unmap_page) if
+there is a DMA addresses passed in for that page. If the dma_address
+is zero (or DMA_ERROR_CODE), then we continue on with our old
+behaviour.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Tested-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/gpu/drm/radeon/radeon.h      |    4 ++-
+ drivers/gpu/drm/radeon/radeon_gart.c |   36 ++++++++++++++++++++++++---------
+ drivers/gpu/drm/radeon/radeon_ttm.c  |    5 +++-
+ 3 files changed, 33 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
+index 73f600d..c9bbab9 100644
+--- a/drivers/gpu/drm/radeon/radeon.h
++++ b/drivers/gpu/drm/radeon/radeon.h
+@@ -317,6 +317,7 @@ struct radeon_gart {
+ 	union radeon_gart_table		table;
+ 	struct page			**pages;
+ 	dma_addr_t			*pages_addr;
++	bool				*ttm_alloced;
+ 	bool				ready;
+ };
+ 
+@@ -329,7 +330,8 @@ void radeon_gart_fini(struct radeon_device *rdev);
+ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
+ 			int pages);
+ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
+-		     int pages, struct page **pagelist);
++		     int pages, struct page **pagelist,
++		     dma_addr_t *dma_addr);
+ 
+ 
+ /*
+diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
+index e65b903..4a5ac4b 100644
+--- a/drivers/gpu/drm/radeon/radeon_gart.c
++++ b/drivers/gpu/drm/radeon/radeon_gart.c
+@@ -149,8 +149,9 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
+ 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
+ 	for (i = 0; i < pages; i++, p++) {
+ 		if (rdev->gart.pages[p]) {
+-			pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
+-				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
++			if (!rdev->gart.ttm_alloced[p])
++				pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
++				       		PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ 			rdev->gart.pages[p] = NULL;
+ 			rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
+ 			page_base = rdev->gart.pages_addr[p];
+@@ -165,7 +166,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
+ }
+ 
+ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
+-		     int pages, struct page **pagelist)
++		     int pages, struct page **pagelist, dma_addr_t *dma_addr)
+ {
+ 	unsigned t;
+ 	unsigned p;
+@@ -180,15 +181,22 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
+ 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
+ 
+ 	for (i = 0; i < pages; i++, p++) {
+-		/* we need to support large memory configurations */
+-		/* assume that unbind have already been call on the range */
+-		rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
++		/* On TTM path, we only use the DMA API if TTM_PAGE_FLAG_DMA32
++		 * is requested. */
++		if (dma_addr[i] != DMA_ERROR_CODE) {
++			rdev->gart.ttm_alloced[p] = true;
++			rdev->gart.pages_addr[p] = dma_addr[i];
++		} else {
++			/* we need to support large memory configurations */
++			/* assume that unbind have already been call on the range */
++			rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
+ 							0, PAGE_SIZE,
+ 							PCI_DMA_BIDIRECTIONAL);
+-		if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
+-			/* FIXME: failed to map page (return -ENOMEM?) */
+-			radeon_gart_unbind(rdev, offset, pages);
+-			return -ENOMEM;
++			if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
++				/* FIXME: failed to map page (return -ENOMEM?) */
++				radeon_gart_unbind(rdev, offset, pages);
++				return -ENOMEM;
++			}
+ 		}
+ 		rdev->gart.pages[p] = pagelist[i];
+ 		page_base = rdev->gart.pages_addr[p];
+@@ -251,6 +259,12 @@ int radeon_gart_init(struct radeon_device *rdev)
+ 		radeon_gart_fini(rdev);
+ 		return -ENOMEM;
+ 	}
++	rdev->gart.ttm_alloced = kzalloc(sizeof(bool) *
++					rdev->gart.num_cpu_pages, GFP_KERNEL);
++	if (rdev->gart.ttm_alloced == NULL) {
++		radeon_gart_fini(rdev);
++		return -ENOMEM;
++	}
+ 	/* set GART entry to point to the dummy page by default */
+ 	for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
+ 		rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
+@@ -267,6 +281,8 @@ void radeon_gart_fini(struct radeon_device *rdev)
+ 	rdev->gart.ready = false;
+ 	kfree(rdev->gart.pages);
+ 	kfree(rdev->gart.pages_addr);
++	kfree(rdev->gart.ttm_alloced);
+ 	rdev->gart.pages = NULL;
+ 	rdev->gart.pages_addr = NULL;
++	rdev->gart.ttm_alloced = NULL;
+ }
+diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
+index 6f156e9..ca04505 100644
+--- a/drivers/gpu/drm/radeon/radeon_ttm.c
++++ b/drivers/gpu/drm/radeon/radeon_ttm.c
+@@ -647,6 +647,7 @@ struct radeon_ttm_backend {
+ 	unsigned long			num_pages;
+ 	struct page			**pages;
+ 	struct page			*dummy_read_page;
++	dma_addr_t			*dma_addrs;
+ 	bool				populated;
+ 	bool				bound;
+ 	unsigned			offset;
+@@ -662,6 +663,7 @@ static int radeon_ttm_backend_populate(struct ttm_backend *backend,
+ 
+ 	gtt = container_of(backend, struct radeon_ttm_backend, backend);
+ 	gtt->pages = pages;
++	gtt->dma_addrs = dma_addrs;
+ 	gtt->num_pages = num_pages;
+ 	gtt->dummy_read_page = dummy_read_page;
+ 	gtt->populated = true;
+@@ -674,6 +676,7 @@ static void radeon_ttm_backend_clear(struct ttm_backend *backend)
+ 
+ 	gtt = container_of(backend, struct radeon_ttm_backend, backend);
+ 	gtt->pages = NULL;
++	gtt->dma_addrs = NULL;
+ 	gtt->num_pages = 0;
+ 	gtt->dummy_read_page = NULL;
+ 	gtt->populated = false;
+@@ -694,7 +697,7 @@ static int radeon_ttm_backend_bind(struct ttm_backend *backend,
+ 		     gtt->num_pages, bo_mem, backend);
+ 	}
+ 	r = radeon_gart_bind(gtt->rdev, gtt->offset,
+-			     gtt->num_pages, gtt->pages);
++			     gtt->num_pages, gtt->pages, gtt->dma_addrs);
+ 	if (r) {
+ 		DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
+ 			  gtt->num_pages, gtt->offset);
+-- 
+1.7.3.4
+
+
+From b4efe20150e5e9a483faf67ba0d947dbface96cd Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 2 Dec 2010 11:36:24 -0500
+Subject: [PATCH 047/139] nouveau/ttm/PCIe: Use dma_addr if TTM has set it.
+
+If the TTM layer has used the DMA API to setup pages that are
+TTM_PAGE_FLAG_DMA32 (look at patch titled: "ttm: Utilize the dma_addr_t
+array for pages that are to in DMA32 pool."), lets use it
+when programming the GART in the PCIe type cards.
+
+This patch skips doing the pci_map_page (and pci_unmap_page) if
+there is a DMA addresses passed in for that page. If the dma_address
+is zero (or DMA_ERROR_CODE), then we continue on with our old
+behaviour.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Tested-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/gpu/drm/nouveau/nouveau_sgdma.c |   28 +++++++++++++++++++++-------
+ 1 files changed, 21 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+index edc140a..bbdd982 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
++++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+@@ -12,6 +12,7 @@ struct nouveau_sgdma_be {
+ 	struct drm_device *dev;
+ 
+ 	dma_addr_t *pages;
++	bool *ttm_alloced;
+ 	unsigned nr_pages;
+ 
+ 	unsigned pte_start;
+@@ -35,15 +36,25 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
+ 	if (!nvbe->pages)
+ 		return -ENOMEM;
+ 
++	nvbe->ttm_alloced = kmalloc(sizeof(bool) * num_pages, GFP_KERNEL);
++	if (!nvbe->ttm_alloced)
++		return -ENOMEM;
++
+ 	nvbe->nr_pages = 0;
+ 	while (num_pages--) {
+-		nvbe->pages[nvbe->nr_pages] =
+-			pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
++		if (dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE) {
++			nvbe->pages[nvbe->nr_pages] =
++					dma_addrs[nvbe->nr_pages];
++		 	nvbe->ttm_alloced[nvbe->nr_pages] = true;
++		} else {
++			nvbe->pages[nvbe->nr_pages] =
++				pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
+ 				     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+-		if (pci_dma_mapping_error(dev->pdev,
+-					  nvbe->pages[nvbe->nr_pages])) {
+-			be->func->clear(be);
+-			return -EFAULT;
++			if (pci_dma_mapping_error(dev->pdev,
++						  nvbe->pages[nvbe->nr_pages])) {
++				be->func->clear(be);
++				return -EFAULT;
++			}
+ 		}
+ 
+ 		nvbe->nr_pages++;
+@@ -66,11 +77,14 @@ nouveau_sgdma_clear(struct ttm_backend *be)
+ 			be->func->unbind(be);
+ 
+ 		while (nvbe->nr_pages--) {
+-			pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
++			if (!nvbe->ttm_alloced[nvbe->nr_pages])
++				pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
+ 				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ 		}
+ 		kfree(nvbe->pages);
++		kfree(nvbe->ttm_alloced);
+ 		nvbe->pages = NULL;
++		nvbe->ttm_alloced = NULL;
+ 		nvbe->nr_pages = 0;
+ 	}
+ }
+-- 
+1.7.3.4
+
+
+From 03c4949992e2b7e84b7cdeb156d803db3f848b6c Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Mon, 9 Feb 2009 12:05:52 -0800
+Subject: [PATCH 051/139] xen: netback: Initial import of linux-2.6.18-xen.hg netback driver.
+
+This corresponds to 774:107e10e0e07c in that tree.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/Kconfig             |    7 +
+ drivers/xen/Makefile            |    1 +
+ drivers/xen/netback/Makefile    |    3 +
+ drivers/xen/netback/common.h    |  217 ++++++
+ drivers/xen/netback/interface.c |  336 ++++++++
+ drivers/xen/netback/netback.c   | 1637 +++++++++++++++++++++++++++++++++++++++
+ drivers/xen/netback/xenbus.c    |  454 +++++++++++
+ 7 files changed, 2655 insertions(+), 0 deletions(-)
+ create mode 100644 drivers/xen/netback/Makefile
+ create mode 100644 drivers/xen/netback/common.h
+ create mode 100644 drivers/xen/netback/interface.c
+ create mode 100644 drivers/xen/netback/netback.c
+ create mode 100644 drivers/xen/netback/xenbus.c
+
+diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
+index 5a48ce9..7e83d43 100644
+--- a/drivers/xen/Kconfig
++++ b/drivers/xen/Kconfig
+@@ -37,6 +37,13 @@ config XEN_BACKEND
+ 	depends on XEN_PCIDEV_BACKEND
+ 
+ 
++config XEN_NETDEV_BACKEND
++       bool "Xen backend network device"
++       depends on XEN_BACKEND && NET
++       help
++         Implement the network backend driver, which passes packets
++         from the guest domain's frontend drivers to the network.
++
+ config XENFS
+ 	tristate "Xen filesystem"
+ 	default y
+diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
+index 533a199..c0e0509 100644
+--- a/drivers/xen/Makefile
++++ b/drivers/xen/Makefile
+@@ -9,6 +9,7 @@ obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
+ obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
+ obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
+ obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
++obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
+ obj-$(CONFIG_XENFS)		+= xenfs/
+ obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
+ obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
+diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
+new file mode 100644
+index 0000000..f4a0c51
+--- /dev/null
++++ b/drivers/xen/netback/Makefile
+@@ -0,0 +1,3 @@
++obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
++
++netbk-y   := netback.o xenbus.o interface.o
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+new file mode 100644
+index 0000000..9a54d57
+--- /dev/null
++++ b/drivers/xen/netback/common.h
+@@ -0,0 +1,217 @@
++/******************************************************************************
++ * arch/xen/drivers/netif/backend/common.h
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#ifndef __NETIF__BACKEND__COMMON_H__
++#define __NETIF__BACKEND__COMMON_H__
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/slab.h>
++#include <linux/ip.h>
++#include <linux/in.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/wait.h>
++#include <xen/evtchn.h>
++#include <xen/interface/io/netif.h>
++#include <asm/io.h>
++#include <asm/pgalloc.h>
++#include <xen/interface/grant_table.h>
++#include <xen/gnttab.h>
++#include <xen/driver_util.h>
++#include <xen/xenbus.h>
++
++#define DPRINTK(_f, _a...)			\
++	pr_debug("(file=%s, line=%d) " _f,	\
++		 __FILE__ , __LINE__ , ## _a )
++#define IPRINTK(fmt, args...)				\
++	printk(KERN_INFO "xen_net: " fmt, ##args)
++#define WPRINTK(fmt, args...)				\
++	printk(KERN_WARNING "xen_net: " fmt, ##args)
++
++typedef struct netif_st {
++	/* Unique identifier for this interface. */
++	domid_t          domid;
++	unsigned int     handle;
++
++	u8               fe_dev_addr[6];
++
++	/* Physical parameters of the comms window. */
++	grant_handle_t   tx_shmem_handle;
++	grant_ref_t      tx_shmem_ref;
++	grant_handle_t   rx_shmem_handle;
++	grant_ref_t      rx_shmem_ref;
++	unsigned int     irq;
++
++	/* The shared rings and indexes. */
++	netif_tx_back_ring_t tx;
++	netif_rx_back_ring_t rx;
++	struct vm_struct *tx_comms_area;
++	struct vm_struct *rx_comms_area;
++
++	/* Set of features that can be turned on in dev->features. */
++	int features;
++
++	/* Internal feature information. */
++	u8 can_queue:1;	/* can queue packets for receiver? */
++	u8 copying_receiver:1;	/* copy packets to receiver?       */
++
++	/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
++	RING_IDX rx_req_cons_peek;
++
++	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
++	unsigned long   credit_bytes;
++	unsigned long   credit_usec;
++	unsigned long   remaining_credit;
++	struct timer_list credit_timeout;
++
++	/* Enforce draining of the transmit queue. */
++	struct timer_list tx_queue_timeout;
++
++	/* Miscellaneous private stuff. */
++	struct list_head list;  /* scheduling list */
++	atomic_t         refcnt;
++	struct net_device *dev;
++	struct net_device_stats stats;
++
++	unsigned int carrier;
++
++	wait_queue_head_t waiting_to_free;
++} netif_t;
++
++/*
++ * Implement our own carrier flag: the network stack's version causes delays
++ * when the carrier is re-enabled (in particular, dev_activate() may not
++ * immediately be called, which can cause packet loss; also the etherbridge
++ * can be rather lazy in activating its port).
++ */
++#define netback_carrier_on(netif)	((netif)->carrier = 1)
++#define netback_carrier_off(netif)	((netif)->carrier = 0)
++#define netback_carrier_ok(netif)	((netif)->carrier)
++
++enum {
++	NETBK_DONT_COPY_SKB,
++	NETBK_DELAYED_COPY_SKB,
++	NETBK_ALWAYS_COPY_SKB,
++};
++
++extern int netbk_copy_skb_mode;
++
++/* Function pointers into netback accelerator plugin modules */
++struct netback_accel_hooks {
++	struct module *owner;
++	int  (*probe)(struct xenbus_device *dev);
++	int (*remove)(struct xenbus_device *dev);
++};
++
++/* Structure to track the state of a netback accelerator plugin */
++struct netback_accelerator {
++	struct list_head link;
++	int id;
++	char *eth_name;
++	atomic_t use_count;
++	struct netback_accel_hooks *hooks;
++};
++
++struct backend_info {
++	struct xenbus_device *dev;
++	netif_t *netif;
++	enum xenbus_state frontend_state;
++
++	/* State relating to the netback accelerator */
++	void *netback_accel_priv;
++	/* The accelerator that this backend is currently using */
++	struct netback_accelerator *accelerator;
++};
++
++#define NETBACK_ACCEL_VERSION 0x00010001
++
++/*
++ * Connect an accelerator plugin module to netback.  Returns zero on
++ * success, < 0 on error, > 0 (with highest version number supported)
++ * if version mismatch.
++ */
++extern int netback_connect_accelerator(unsigned version,
++				       int id, const char *eth_name,
++				       struct netback_accel_hooks *hooks);
++/* Disconnect a previously connected accelerator plugin module */
++extern void netback_disconnect_accelerator(int id, const char *eth_name);
++
++
++extern
++void netback_probe_accelerators(struct backend_info *be,
++				struct xenbus_device *dev);
++extern
++void netback_remove_accelerators(struct backend_info *be,
++				 struct xenbus_device *dev);
++extern
++void netif_accel_init(void);
++
++
++#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
++#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
++
++void netif_disconnect(netif_t *netif);
++
++netif_t *netif_alloc(domid_t domid, unsigned int handle);
++int netif_map(netif_t *netif, unsigned long tx_ring_ref,
++	      unsigned long rx_ring_ref, unsigned int evtchn);
++
++#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
++#define netif_put(_b)						\
++	do {							\
++		if ( atomic_dec_and_test(&(_b)->refcnt) )	\
++			wake_up(&(_b)->waiting_to_free);	\
++	} while (0)
++
++void netif_xenbus_init(void);
++
++#define netif_schedulable(netif)				\
++	(netif_running((netif)->dev) && netback_carrier_ok(netif))
++
++void netif_schedule_work(netif_t *netif);
++void netif_deschedule_work(netif_t *netif);
++
++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
++struct net_device_stats *netif_be_get_stats(struct net_device *dev);
++irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++
++static inline int netbk_can_queue(struct net_device *dev)
++{
++	netif_t *netif = netdev_priv(dev);
++	return netif->can_queue;
++}
++
++static inline int netbk_can_sg(struct net_device *dev)
++{
++	netif_t *netif = netdev_priv(dev);
++	return netif->features & NETIF_F_SG;
++}
++
++#endif /* __NETIF__BACKEND__COMMON_H__ */
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+new file mode 100644
+index 0000000..7e67941
+--- /dev/null
++++ b/drivers/xen/netback/interface.c
+@@ -0,0 +1,336 @@
++/******************************************************************************
++ * arch/xen/drivers/netif/backend/interface.c
++ *
++ * Network-device interface management.
++ *
++ * Copyright (c) 2004-2005, Keir Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++#include <linux/ethtool.h>
++#include <linux/rtnetlink.h>
++
++/*
++ * Module parameter 'queue_length':
++ *
++ * Enables queuing in the network stack when a client has run out of receive
++ * descriptors. Although this feature can improve receive bandwidth by avoiding
++ * packet loss, it can also result in packets sitting in the 'tx_queue' for
++ * unbounded time. This is bad if those packets hold onto foreign resources.
++ * For example, consider a packet that holds onto resources belonging to the
++ * guest for which it is queued (e.g., packet received on vif1.0, destined for
++ * vif1.1 which is not activated in the guest): in this situation the guest
++ * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
++ * run a timer (tx_queue_timeout) to drain the queue when the interface is
++ * blocked.
++ */
++static unsigned long netbk_queue_length = 32;
++module_param_named(queue_length, netbk_queue_length, ulong, 0);
++
++static void __netif_up(netif_t *netif)
++{
++	enable_irq(netif->irq);
++	netif_schedule_work(netif);
++}
++
++static void __netif_down(netif_t *netif)
++{
++	disable_irq(netif->irq);
++	netif_deschedule_work(netif);
++}
++
++static int net_open(struct net_device *dev)
++{
++	netif_t *netif = netdev_priv(dev);
++	if (netback_carrier_ok(netif)) {
++		__netif_up(netif);
++		netif_start_queue(dev);
++	}
++	return 0;
++}
++
++static int net_close(struct net_device *dev)
++{
++	netif_t *netif = netdev_priv(dev);
++	if (netback_carrier_ok(netif))
++		__netif_down(netif);
++	netif_stop_queue(dev);
++	return 0;
++}
++
++static int netbk_change_mtu(struct net_device *dev, int mtu)
++{
++	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
++
++	if (mtu > max)
++		return -EINVAL;
++	dev->mtu = mtu;
++	return 0;
++}
++
++static int netbk_set_sg(struct net_device *dev, u32 data)
++{
++	if (data) {
++		netif_t *netif = netdev_priv(dev);
++
++		if (!(netif->features & NETIF_F_SG))
++			return -ENOSYS;
++	}
++
++	return ethtool_op_set_sg(dev, data);
++}
++
++static int netbk_set_tso(struct net_device *dev, u32 data)
++{
++	if (data) {
++		netif_t *netif = netdev_priv(dev);
++
++		if (!(netif->features & NETIF_F_TSO))
++			return -ENOSYS;
++	}
++
++	return ethtool_op_set_tso(dev, data);
++}
++
++static struct ethtool_ops network_ethtool_ops =
++{
++	.get_tx_csum = ethtool_op_get_tx_csum,
++	.set_tx_csum = ethtool_op_set_tx_csum,
++	.get_sg = ethtool_op_get_sg,
++	.set_sg = netbk_set_sg,
++	.get_tso = ethtool_op_get_tso,
++	.set_tso = netbk_set_tso,
++	.get_link = ethtool_op_get_link,
++};
++
++netif_t *netif_alloc(domid_t domid, unsigned int handle)
++{
++	int err = 0;
++	struct net_device *dev;
++	netif_t *netif;
++	char name[IFNAMSIZ] = {};
++
++	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
++	dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
++	if (dev == NULL) {
++		DPRINTK("Could not create netif: out of memory\n");
++		return ERR_PTR(-ENOMEM);
++	}
++
++	netif = netdev_priv(dev);
++	memset(netif, 0, sizeof(*netif));
++	netif->domid  = domid;
++	netif->handle = handle;
++	atomic_set(&netif->refcnt, 1);
++	init_waitqueue_head(&netif->waiting_to_free);
++	netif->dev = dev;
++
++	netback_carrier_off(netif);
++
++	netif->credit_bytes = netif->remaining_credit = ~0UL;
++	netif->credit_usec  = 0UL;
++	init_timer(&netif->credit_timeout);
++	/* Initialize 'expires' now: it's used to track the credit window. */
++	netif->credit_timeout.expires = jiffies;
++
++	init_timer(&netif->tx_queue_timeout);
++
++	dev->hard_start_xmit = netif_be_start_xmit;
++	dev->get_stats       = netif_be_get_stats;
++	dev->open            = net_open;
++	dev->stop            = net_close;
++	dev->change_mtu	     = netbk_change_mtu;
++	dev->features        = NETIF_F_IP_CSUM;
++
++	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
++
++	dev->tx_queue_len = netbk_queue_length;
++
++	/*
++	 * Initialise a dummy MAC address. We choose the numerically
++	 * largest non-broadcast address to prevent the address getting
++	 * stolen by an Ethernet bridge for STP purposes.
++	 * (FE:FF:FF:FF:FF:FF)
++	 */
++	memset(dev->dev_addr, 0xFF, ETH_ALEN);
++	dev->dev_addr[0] &= ~0x01;
++
++	rtnl_lock();
++	err = register_netdevice(dev);
++	rtnl_unlock();
++	if (err) {
++		DPRINTK("Could not register new net device %s: err=%d\n",
++			dev->name, err);
++		free_netdev(dev);
++		return ERR_PTR(err);
++	}
++
++	DPRINTK("Successfully created netif\n");
++	return netif;
++}
++
++static int map_frontend_pages(
++	netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
++{
++	struct gnttab_map_grant_ref op;
++
++	gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
++			  GNTMAP_host_map, tx_ring_ref, netif->domid);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status) {
++		DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
++		return op.status;
++	}
++
++	netif->tx_shmem_ref    = tx_ring_ref;
++	netif->tx_shmem_handle = op.handle;
++
++	gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
++			  GNTMAP_host_map, rx_ring_ref, netif->domid);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status) {
++		DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
++		return op.status;
++	}
++
++	netif->rx_shmem_ref    = rx_ring_ref;
++	netif->rx_shmem_handle = op.handle;
++
++	return 0;
++}
++
++static void unmap_frontend_pages(netif_t *netif)
++{
++	struct gnttab_unmap_grant_ref op;
++
++	gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
++			    GNTMAP_host_map, netif->tx_shmem_handle);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++		BUG();
++
++	gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
++			    GNTMAP_host_map, netif->rx_shmem_handle);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++		BUG();
++}
++
++int netif_map(netif_t *netif, unsigned long tx_ring_ref,
++	      unsigned long rx_ring_ref, unsigned int evtchn)
++{
++	int err = -ENOMEM;
++	netif_tx_sring_t *txs;
++	netif_rx_sring_t *rxs;
++
++	/* Already connected through? */
++	if (netif->irq)
++		return 0;
++
++	netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
++	if (netif->tx_comms_area == NULL)
++		return -ENOMEM;
++	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
++	if (netif->rx_comms_area == NULL)
++		goto err_rx;
++
++	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
++	if (err)
++		goto err_map;
++
++	err = bind_interdomain_evtchn_to_irqhandler(
++		netif->domid, evtchn, netif_be_int, 0,
++		netif->dev->name, netif);
++	if (err < 0)
++		goto err_hypervisor;
++	netif->irq = err;
++	disable_irq(netif->irq);
++
++	txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
++	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
++
++	rxs = (netif_rx_sring_t *)
++		((char *)netif->rx_comms_area->addr);
++	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
++
++	netif->rx_req_cons_peek = 0;
++
++	netif_get(netif);
++
++	rtnl_lock();
++	netback_carrier_on(netif);
++	if (netif_running(netif->dev))
++		__netif_up(netif);
++	rtnl_unlock();
++
++	return 0;
++err_hypervisor:
++	unmap_frontend_pages(netif);
++err_map:
++	free_vm_area(netif->rx_comms_area);
++err_rx:
++	free_vm_area(netif->tx_comms_area);
++	return err;
++}
++
++void netif_disconnect(netif_t *netif)
++{
++	if (netback_carrier_ok(netif)) {
++		rtnl_lock();
++		netback_carrier_off(netif);
++		netif_carrier_off(netif->dev); /* discard queued packets */
++		if (netif_running(netif->dev))
++			__netif_down(netif);
++		rtnl_unlock();
++		netif_put(netif);
++	}
++
++	atomic_dec(&netif->refcnt);
++	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
++
++	del_timer_sync(&netif->credit_timeout);
++	del_timer_sync(&netif->tx_queue_timeout);
++
++	if (netif->irq)
++		unbind_from_irqhandler(netif->irq, netif);
++
++	unregister_netdev(netif->dev);
++
++	if (netif->tx.sring) {
++		unmap_frontend_pages(netif);
++		free_vm_area(netif->tx_comms_area);
++		free_vm_area(netif->rx_comms_area);
++	}
++
++	free_netdev(netif->dev);
++}
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+new file mode 100644
+index 0000000..db629d4
+--- /dev/null
++++ b/drivers/xen/netback/netback.c
+@@ -0,0 +1,1637 @@
++/******************************************************************************
++ * drivers/xen/netback/netback.c
++ *
++ * Back-end of the driver for virtual network devices. This portion of the
++ * driver exports a 'unified' network-device interface that can be accessed
++ * by any operating system that implements a compatible front end. A
++ * reference front-end implementation can be found in:
++ *  drivers/xen/netfront/netfront.c
++ *
++ * Copyright (c) 2002-2005, K A Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++#include <xen/balloon.h>
++#include <xen/interface/memory.h>
++
++/*define NETBE_DEBUG_INTERRUPT*/
++
++struct netbk_rx_meta {
++	skb_frag_t frag;
++	int id;
++	u8 copy:1;
++};
++
++struct netbk_tx_pending_inuse {
++	struct list_head list;
++	unsigned long alloc_time;
++};
++
++static void netif_idx_release(u16 pending_idx);
++static void make_tx_response(netif_t *netif,
++			     netif_tx_request_t *txp,
++			     s8       st);
++static netif_rx_response_t *make_rx_response(netif_t *netif,
++					     u16      id,
++					     s8       st,
++					     u16      offset,
++					     u16      size,
++					     u16      flags);
++
++static void net_tx_action(unsigned long unused);
++static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
++
++static void net_rx_action(unsigned long unused);
++static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
++
++static struct timer_list net_timer;
++static struct timer_list netbk_tx_pending_timer;
++
++#define MAX_PENDING_REQS 256
++
++static struct sk_buff_head rx_queue;
++
++static struct page **mmap_pages;
++static inline unsigned long idx_to_pfn(unsigned int idx)
++{
++	return page_to_pfn(mmap_pages[idx]);
++}
++
++static inline unsigned long idx_to_kaddr(unsigned int idx)
++{
++	return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
++}
++
++/* extra field used in struct page */
++static inline void netif_set_page_index(struct page *pg, unsigned int index)
++{
++	*(unsigned long *)&pg->mapping = index;
++}
++
++static inline int netif_page_index(struct page *pg)
++{
++	unsigned long idx = (unsigned long)pg->mapping;
++
++	if (!PageForeign(pg))
++		return -1;
++
++	if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg))
++		return -1;
++
++	return idx;
++}
++
++#define PKT_PROT_LEN 64
++
++static struct pending_tx_info {
++	netif_tx_request_t req;
++	netif_t *netif;
++} pending_tx_info[MAX_PENDING_REQS];
++static u16 pending_ring[MAX_PENDING_REQS];
++typedef unsigned int PEND_RING_IDX;
++#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
++static PEND_RING_IDX pending_prod, pending_cons;
++#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
++
++/* Freed TX SKBs get batched on this ring before return to pending_ring. */
++static u16 dealloc_ring[MAX_PENDING_REQS];
++static PEND_RING_IDX dealloc_prod, dealloc_cons;
++
++/* Doubly-linked list of in-use pending entries. */
++static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
++static LIST_HEAD(pending_inuse_head);
++
++static struct sk_buff_head tx_queue;
++
++static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
++static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
++static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
++
++static struct list_head net_schedule_list;
++static spinlock_t net_schedule_list_lock;
++
++#define MAX_MFN_ALLOC 64
++static unsigned long mfn_list[MAX_MFN_ALLOC];
++static unsigned int alloc_index = 0;
++
++/* Setting this allows the safe use of this driver without netloop. */
++static int MODPARM_copy_skb = 1;
++module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
++MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
++
++int netbk_copy_skb_mode;
++
++static inline unsigned long alloc_mfn(void)
++{
++	BUG_ON(alloc_index == 0);
++	return mfn_list[--alloc_index];
++}
++
++static int check_mfn(int nr)
++{
++	struct xen_memory_reservation reservation = {
++		.extent_order = 0,
++		.domid        = DOMID_SELF
++	};
++	int rc;
++
++	if (likely(alloc_index >= nr))
++		return 0;
++
++	set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
++	reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
++	rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
++	if (likely(rc > 0))
++		alloc_index += rc;
++
++	return alloc_index >= nr ? 0 : -ENOMEM;
++}
++
++static inline void maybe_schedule_tx_action(void)
++{
++	smp_mb();
++	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
++	    !list_empty(&net_schedule_list))
++		tasklet_schedule(&net_tx_tasklet);
++}
++
++static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
++{
++	struct skb_shared_info *ninfo;
++	struct sk_buff *nskb;
++	unsigned long offset;
++	int ret;
++	int len;
++	int headlen;
++
++	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
++
++	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
++	if (unlikely(!nskb))
++		goto err;
++
++	skb_reserve(nskb, 16 + NET_IP_ALIGN);
++	headlen = nskb->end - nskb->data;
++	if (headlen > skb_headlen(skb))
++		headlen = skb_headlen(skb);
++	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
++	BUG_ON(ret);
++
++	ninfo = skb_shinfo(nskb);
++	ninfo->gso_size = skb_shinfo(skb)->gso_size;
++	ninfo->gso_type = skb_shinfo(skb)->gso_type;
++
++	offset = headlen;
++	len = skb->len - headlen;
++
++	nskb->len = skb->len;
++	nskb->data_len = len;
++	nskb->truesize += len;
++
++	while (len) {
++		struct page *page;
++		int copy;
++		int zero;
++
++		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
++			dump_stack();
++			goto err_free;
++		}
++
++		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
++		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
++
++		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
++		if (unlikely(!page))
++			goto err_free;
++
++		ret = skb_copy_bits(skb, offset, page_address(page), copy);
++		BUG_ON(ret);
++
++		ninfo->frags[ninfo->nr_frags].page = page;
++		ninfo->frags[ninfo->nr_frags].page_offset = 0;
++		ninfo->frags[ninfo->nr_frags].size = copy;
++		ninfo->nr_frags++;
++
++		offset += copy;
++		len -= copy;
++	}
++
++	offset = nskb->data - skb->data;
++
++	nskb->h.raw = skb->h.raw + offset;
++	nskb->nh.raw = skb->nh.raw + offset;
++	nskb->mac.raw = skb->mac.raw + offset;
++
++	return nskb;
++
++ err_free:
++	kfree_skb(nskb);
++ err:
++	return NULL;
++}
++
++static inline int netbk_max_required_rx_slots(netif_t *netif)
++{
++	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
++		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
++	return 1; /* all in one */
++}
++
++static inline int netbk_queue_full(netif_t *netif)
++{
++	RING_IDX peek   = netif->rx_req_cons_peek;
++	RING_IDX needed = netbk_max_required_rx_slots(netif);
++
++	return ((netif->rx.sring->req_prod - peek) < needed) ||
++	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
++}
++
++static void tx_queue_callback(unsigned long data)
++{
++	netif_t *netif = (netif_t *)data;
++	if (netif_schedulable(netif))
++		netif_wake_queue(netif->dev);
++}
++
++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	netif_t *netif = netdev_priv(dev);
++
++	BUG_ON(skb->dev != dev);
++
++	/* Drop the packet if the target domain has no receive buffers. */
++	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
++		goto drop;
++
++	/*
++	 * Copy the packet here if it's destined for a flipping interface
++	 * but isn't flippable (e.g. extra references to data).
++	 * XXX For now we also copy skbuffs whose head crosses a page
++	 * boundary, because netbk_gop_skb can't handle them.
++	 */
++	if (!netif->copying_receiver ||
++	    ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
++		struct sk_buff *nskb = netbk_copy_skb(skb);
++		if ( unlikely(nskb == NULL) )
++			goto drop;
++		/* Copy only the header fields we use in this driver. */
++		nskb->dev = skb->dev;
++		nskb->ip_summed = skb->ip_summed;
++		nskb->proto_data_valid = skb->proto_data_valid;
++		dev_kfree_skb(skb);
++		skb = nskb;
++	}
++
++	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
++				   !!skb_shinfo(skb)->gso_size;
++	netif_get(netif);
++
++	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
++		netif->rx.sring->req_event = netif->rx_req_cons_peek +
++			netbk_max_required_rx_slots(netif);
++		mb(); /* request notification /then/ check & stop the queue */
++		if (netbk_queue_full(netif)) {
++			netif_stop_queue(dev);
++			/*
++			 * Schedule 500ms timeout to restart the queue, thus
++			 * ensuring that an inactive queue will be drained.
++			 * Packets will be immediately be dropped until more
++			 * receive buffers become available (see
++			 * netbk_queue_full() check above).
++			 */
++			netif->tx_queue_timeout.data = (unsigned long)netif;
++			netif->tx_queue_timeout.function = tx_queue_callback;
++			__mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
++		}
++	}
++
++	skb_queue_tail(&rx_queue, skb);
++	tasklet_schedule(&net_rx_tasklet);
++
++	return 0;
++
++ drop:
++	netif->stats.tx_dropped++;
++	dev_kfree_skb(skb);
++	return 0;
++}
++
++#if 0
++static void xen_network_done_notify(void)
++{
++	static struct net_device *eth0_dev = NULL;
++	if (unlikely(eth0_dev == NULL))
++		eth0_dev = __dev_get_by_name("eth0");
++	netif_rx_schedule(eth0_dev);
++}
++/*
++ * Add following to poll() function in NAPI driver (Tigon3 is example):
++ *  if ( xen_network_done() )
++ *      tg3_enable_ints(tp);
++ */
++int xen_network_done(void)
++{
++	return skb_queue_empty(&rx_queue);
++}
++#endif
++
++struct netrx_pending_operations {
++	unsigned trans_prod, trans_cons;
++	unsigned mmu_prod, mmu_mcl;
++	unsigned mcl_prod, mcl_cons;
++	unsigned copy_prod, copy_cons;
++	unsigned meta_prod, meta_cons;
++	mmu_update_t *mmu;
++	gnttab_transfer_t *trans;
++	gnttab_copy_t *copy;
++	multicall_entry_t *mcl;
++	struct netbk_rx_meta *meta;
++};
++
++/* Set up the grant operations for this fragment.  If it's a flipping
++   interface, we also set up the unmap request from here. */
++static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
++			  int i, struct netrx_pending_operations *npo,
++			  struct page *page, unsigned long size,
++			  unsigned long offset)
++{
++	mmu_update_t *mmu;
++	gnttab_transfer_t *gop;
++	gnttab_copy_t *copy_gop;
++	multicall_entry_t *mcl;
++	netif_rx_request_t *req;
++	unsigned long old_mfn, new_mfn;
++	int idx = netif_page_index(page);
++
++	old_mfn = virt_to_mfn(page_address(page));
++
++	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
++	if (netif->copying_receiver) {
++		/* The fragment needs to be copied rather than
++		   flipped. */
++		meta->copy = 1;
++		copy_gop = npo->copy + npo->copy_prod++;
++		copy_gop->flags = GNTCOPY_dest_gref;
++		if (idx > -1) {
++			struct pending_tx_info *src_pend = &pending_tx_info[idx];
++			copy_gop->source.domid = src_pend->netif->domid;
++			copy_gop->source.u.ref = src_pend->req.gref;
++			copy_gop->flags |= GNTCOPY_source_gref;
++		} else {
++			copy_gop->source.domid = DOMID_SELF;
++			copy_gop->source.u.gmfn = old_mfn;
++		}
++		copy_gop->source.offset = offset;
++		copy_gop->dest.domid = netif->domid;
++		copy_gop->dest.offset = 0;
++		copy_gop->dest.u.ref = req->gref;
++		copy_gop->len = size;
++	} else {
++		meta->copy = 0;
++		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++			new_mfn = alloc_mfn();
++
++			/*
++			 * Set the new P2M table entry before
++			 * reassigning the old data page. Heed the
++			 * comment in pgtable-2level.h:pte_page(). :-)
++			 */
++			set_phys_to_machine(page_to_pfn(page), new_mfn);
++
++			mcl = npo->mcl + npo->mcl_prod++;
++			MULTI_update_va_mapping(mcl,
++					     (unsigned long)page_address(page),
++					     pfn_pte_ma(new_mfn, PAGE_KERNEL),
++					     0);
++
++			mmu = npo->mmu + npo->mmu_prod++;
++			mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
++				MMU_MACHPHYS_UPDATE;
++			mmu->val = page_to_pfn(page);
++		}
++
++		gop = npo->trans + npo->trans_prod++;
++		gop->mfn = old_mfn;
++		gop->domid = netif->domid;
++		gop->ref = req->gref;
++	}
++	return req->id;
++}
++
++static void netbk_gop_skb(struct sk_buff *skb,
++			  struct netrx_pending_operations *npo)
++{
++	netif_t *netif = netdev_priv(skb->dev);
++	int nr_frags = skb_shinfo(skb)->nr_frags;
++	int i;
++	int extra;
++	struct netbk_rx_meta *head_meta, *meta;
++
++	head_meta = npo->meta + npo->meta_prod++;
++	head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
++	head_meta->frag.size = skb_shinfo(skb)->gso_size;
++	extra = !!head_meta->frag.size + 1;
++
++	for (i = 0; i < nr_frags; i++) {
++		meta = npo->meta + npo->meta_prod++;
++		meta->frag = skb_shinfo(skb)->frags[i];
++		meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
++					  meta->frag.page,
++					  meta->frag.size,
++					  meta->frag.page_offset);
++	}
++
++	/*
++	 * This must occur at the end to ensure that we don't trash skb_shinfo
++	 * until we're done. We know that the head doesn't cross a page
++	 * boundary because such packets get copied in netif_be_start_xmit.
++	 */
++	head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
++				       virt_to_page(skb->data),
++				       skb_headlen(skb),
++				       offset_in_page(skb->data));
++
++	netif->rx.req_cons += nr_frags + extra;
++}
++
++static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
++{
++	int i;
++
++	for (i = 0; i < nr_frags; i++)
++		put_page(meta[i].frag.page);
++}
++
++/* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
++   used to set up the operations on the top of
++   netrx_pending_operations, which have since been done.  Check that
++   they didn't give any errors and advance over them. */
++static int netbk_check_gop(int nr_frags, domid_t domid,
++			   struct netrx_pending_operations *npo)
++{
++	multicall_entry_t *mcl;
++	gnttab_transfer_t *gop;
++	gnttab_copy_t     *copy_op;
++	int status = NETIF_RSP_OKAY;
++	int i;
++
++	for (i = 0; i <= nr_frags; i++) {
++		if (npo->meta[npo->meta_cons + i].copy) {
++			copy_op = npo->copy + npo->copy_cons++;
++			if (copy_op->status != GNTST_okay) {
++				DPRINTK("Bad status %d from copy to DOM%d.\n",
++					copy_op->status, domid);
++				status = NETIF_RSP_ERROR;
++			}
++		} else {
++			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++				mcl = npo->mcl + npo->mcl_cons++;
++				/* The update_va_mapping() must not fail. */
++				BUG_ON(mcl->result != 0);
++			}
++
++			gop = npo->trans + npo->trans_cons++;
++			/* Check the reassignment error code. */
++			if (gop->status != 0) {
++				DPRINTK("Bad status %d from grant transfer to DOM%u\n",
++					gop->status, domid);
++				/*
++				 * Page no longer belongs to us unless
++				 * GNTST_bad_page, but that should be
++				 * a fatal error anyway.
++				 */
++				BUG_ON(gop->status == GNTST_bad_page);
++				status = NETIF_RSP_ERROR;
++			}
++		}
++	}
++
++	return status;
++}
++
++static void netbk_add_frag_responses(netif_t *netif, int status,
++				     struct netbk_rx_meta *meta, int nr_frags)
++{
++	int i;
++	unsigned long offset;
++
++	for (i = 0; i < nr_frags; i++) {
++		int id = meta[i].id;
++		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
++
++		if (meta[i].copy)
++			offset = 0;
++		else
++			offset = meta[i].frag.page_offset;
++		make_rx_response(netif, id, status, offset,
++				 meta[i].frag.size, flags);
++	}
++}
++
++static void net_rx_action(unsigned long unused)
++{
++	netif_t *netif = NULL;
++	s8 status;
++	u16 id, irq, flags;
++	netif_rx_response_t *resp;
++	multicall_entry_t *mcl;
++	struct sk_buff_head rxq;
++	struct sk_buff *skb;
++	int notify_nr = 0;
++	int ret;
++	int nr_frags;
++	int count;
++	unsigned long offset;
++
++	/*
++	 * Putting hundreds of bytes on the stack is considered rude.
++	 * Static works because a tasklet can only be on one CPU at any time.
++	 */
++	static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
++	static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
++	static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
++	static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
++	static unsigned char rx_notify[NR_IRQS];
++	static u16 notify_list[NET_RX_RING_SIZE];
++	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++
++	struct netrx_pending_operations npo = {
++		mmu: rx_mmu,
++		trans: grant_trans_op,
++		copy: grant_copy_op,
++		mcl: rx_mcl,
++		meta: meta};
++
++	skb_queue_head_init(&rxq);
++
++	count = 0;
++
++	while ((skb = skb_dequeue(&rx_queue)) != NULL) {
++		nr_frags = skb_shinfo(skb)->nr_frags;
++		*(int *)skb->cb = nr_frags;
++
++		if (!xen_feature(XENFEAT_auto_translated_physmap) &&
++		    !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
++		    check_mfn(nr_frags + 1)) {
++			/* Memory squeeze? Back off for an arbitrary while. */
++			if ( net_ratelimit() )
++				WPRINTK("Memory squeeze in netback "
++					"driver.\n");
++			mod_timer(&net_timer, jiffies + HZ);
++			skb_queue_head(&rx_queue, skb);
++			break;
++		}
++
++		netbk_gop_skb(skb, &npo);
++
++		count += nr_frags + 1;
++
++		__skb_queue_tail(&rxq, skb);
++
++		/* Filled the batch queue? */
++		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
++			break;
++	}
++
++	BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
++
++	npo.mmu_mcl = npo.mcl_prod;
++	if (npo.mcl_prod) {
++		BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
++		BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
++		mcl = npo.mcl + npo.mcl_prod++;
++
++		BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
++		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
++
++		mcl->op = __HYPERVISOR_mmu_update;
++		mcl->args[0] = (unsigned long)rx_mmu;
++		mcl->args[1] = npo.mmu_prod;
++		mcl->args[2] = 0;
++		mcl->args[3] = DOMID_SELF;
++	}
++
++	if (npo.trans_prod) {
++		BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
++		mcl = npo.mcl + npo.mcl_prod++;
++		mcl->op = __HYPERVISOR_grant_table_op;
++		mcl->args[0] = GNTTABOP_transfer;
++		mcl->args[1] = (unsigned long)grant_trans_op;
++		mcl->args[2] = npo.trans_prod;
++	}
++
++	if (npo.copy_prod) {
++		BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
++		mcl = npo.mcl + npo.mcl_prod++;
++		mcl->op = __HYPERVISOR_grant_table_op;
++		mcl->args[0] = GNTTABOP_copy;
++		mcl->args[1] = (unsigned long)grant_copy_op;
++		mcl->args[2] = npo.copy_prod;
++	}
++
++	/* Nothing to do? */
++	if (!npo.mcl_prod)
++		return;
++
++	BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
++
++	ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
++	BUG_ON(ret != 0);
++	/* The mmu_machphys_update() must not fail. */
++	BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
++
++	while ((skb = __skb_dequeue(&rxq)) != NULL) {
++		nr_frags = *(int *)skb->cb;
++
++		netif = netdev_priv(skb->dev);
++		/* We can't rely on skb_release_data to release the
++		   pages used by fragments for us, since it tries to
++		   touch the pages in the fraglist.  If we're in
++		   flipping mode, that doesn't work.  In copying mode,
++		   we still have access to all of the pages, and so
++		   it's safe to let release_data deal with it. */
++		/* (Freeing the fragments is safe since we copy
++		   non-linear skbs destined for flipping interfaces) */
++		if (!netif->copying_receiver) {
++			atomic_set(&(skb_shinfo(skb)->dataref), 1);
++			skb_shinfo(skb)->frag_list = NULL;
++			skb_shinfo(skb)->nr_frags = 0;
++			netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
++		}
++
++		netif->stats.tx_bytes += skb->len;
++		netif->stats.tx_packets++;
++
++		status = netbk_check_gop(nr_frags, netif->domid, &npo);
++
++		id = meta[npo.meta_cons].id;
++		flags = nr_frags ? NETRXF_more_data : 0;
++
++		if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
++			flags |= NETRXF_csum_blank | NETRXF_data_validated;
++		else if (skb->proto_data_valid) /* remote but checksummed? */
++			flags |= NETRXF_data_validated;
++
++		if (meta[npo.meta_cons].copy)
++			offset = 0;
++		else
++			offset = offset_in_page(skb->data);
++		resp = make_rx_response(netif, id, status, offset,
++					skb_headlen(skb), flags);
++
++		if (meta[npo.meta_cons].frag.size) {
++			struct netif_extra_info *gso =
++				(struct netif_extra_info *)
++				RING_GET_RESPONSE(&netif->rx,
++						  netif->rx.rsp_prod_pvt++);
++
++			resp->flags |= NETRXF_extra_info;
++
++			gso->u.gso.size = meta[npo.meta_cons].frag.size;
++			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
++			gso->u.gso.pad = 0;
++			gso->u.gso.features = 0;
++
++			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
++			gso->flags = 0;
++		}
++
++		netbk_add_frag_responses(netif, status,
++					 meta + npo.meta_cons + 1,
++					 nr_frags);
++
++		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
++		irq = netif->irq;
++		if (ret && !rx_notify[irq]) {
++			rx_notify[irq] = 1;
++			notify_list[notify_nr++] = irq;
++		}
++
++		if (netif_queue_stopped(netif->dev) &&
++		    netif_schedulable(netif) &&
++		    !netbk_queue_full(netif))
++			netif_wake_queue(netif->dev);
++
++		netif_put(netif);
++		dev_kfree_skb(skb);
++		npo.meta_cons += nr_frags + 1;
++	}
++
++	while (notify_nr != 0) {
++		irq = notify_list[--notify_nr];
++		rx_notify[irq] = 0;
++		notify_remote_via_irq(irq);
++	}
++
++	/* More work to do? */
++	if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
++		tasklet_schedule(&net_rx_tasklet);
++#if 0
++	else
++		xen_network_done_notify();
++#endif
++}
++
++static void net_alarm(unsigned long unused)
++{
++	tasklet_schedule(&net_rx_tasklet);
++}
++
++static void netbk_tx_pending_timeout(unsigned long unused)
++{
++	tasklet_schedule(&net_tx_tasklet);
++}
++
++struct net_device_stats *netif_be_get_stats(struct net_device *dev)
++{
++	netif_t *netif = netdev_priv(dev);
++	return &netif->stats;
++}
++
++static int __on_net_schedule_list(netif_t *netif)
++{
++	return netif->list.next != NULL;
++}
++
++static void remove_from_net_schedule_list(netif_t *netif)
++{
++	spin_lock_irq(&net_schedule_list_lock);
++	if (likely(__on_net_schedule_list(netif))) {
++		list_del(&netif->list);
++		netif->list.next = NULL;
++		netif_put(netif);
++	}
++	spin_unlock_irq(&net_schedule_list_lock);
++}
++
++static void add_to_net_schedule_list_tail(netif_t *netif)
++{
++	if (__on_net_schedule_list(netif))
++		return;
++
++	spin_lock_irq(&net_schedule_list_lock);
++	if (!__on_net_schedule_list(netif) &&
++	    likely(netif_schedulable(netif))) {
++		list_add_tail(&netif->list, &net_schedule_list);
++		netif_get(netif);
++	}
++	spin_unlock_irq(&net_schedule_list_lock);
++}
++
++/*
++ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
++ * If this driver is pipelining transmit requests then we can be very
++ * aggressive in avoiding new-packet notifications -- frontend only needs to
++ * send a notification if there are no outstanding unreceived responses.
++ * If we may be buffer transmit buffers for any reason then we must be rather
++ * more conservative and treat this as the final check for pending work.
++ */
++void netif_schedule_work(netif_t *netif)
++{
++	int more_to_do;
++
++#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
++	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
++#else
++	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
++#endif
++
++	if (more_to_do) {
++		add_to_net_schedule_list_tail(netif);
++		maybe_schedule_tx_action();
++	}
++}
++
++void netif_deschedule_work(netif_t *netif)
++{
++	remove_from_net_schedule_list(netif);
++}
++
++
++static void tx_add_credit(netif_t *netif)
++{
++	unsigned long max_burst, max_credit;
++
++	/*
++	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
++	 * Otherwise the interface can seize up due to insufficient credit.
++	 */
++	max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
++	max_burst = min(max_burst, 131072UL);
++	max_burst = max(max_burst, netif->credit_bytes);
++
++	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
++	max_credit = netif->remaining_credit + netif->credit_bytes;
++	if (max_credit < netif->remaining_credit)
++		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
++
++	netif->remaining_credit = min(max_credit, max_burst);
++}
++
++static void tx_credit_callback(unsigned long data)
++{
++	netif_t *netif = (netif_t *)data;
++	tx_add_credit(netif);
++	netif_schedule_work(netif);
++}
++
++static inline int copy_pending_req(PEND_RING_IDX pending_idx)
++{
++	return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
++				      &mmap_pages[pending_idx]);
++}
++
++inline static void net_tx_action_dealloc(void)
++{
++	struct netbk_tx_pending_inuse *inuse, *n;
++	gnttab_unmap_grant_ref_t *gop;
++	u16 pending_idx;
++	PEND_RING_IDX dc, dp;
++	netif_t *netif;
++	int ret;
++	LIST_HEAD(list);
++
++	dc = dealloc_cons;
++	gop = tx_unmap_ops;
++
++	/*
++	 * Free up any grants we have finished using
++	 */
++	do {
++		dp = dealloc_prod;
++
++		/* Ensure we see all indices enqueued by netif_idx_release(). */
++		smp_rmb();
++
++		while (dc != dp) {
++			unsigned long pfn;
++
++			pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
++			list_move_tail(&pending_inuse[pending_idx].list, &list);
++
++			pfn = idx_to_pfn(pending_idx);
++			/* Already unmapped? */
++			if (!phys_to_machine_mapping_valid(pfn))
++				continue;
++
++			gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
++					    GNTMAP_host_map,
++					    grant_tx_handle[pending_idx]);
++			gop++;
++		}
++
++		if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
++		    list_empty(&pending_inuse_head))
++			break;
++
++		/* Copy any entries that have been pending for too long. */
++		list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
++			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
++				break;
++
++			switch (copy_pending_req(inuse - pending_inuse)) {
++			case 0:
++				list_move_tail(&inuse->list, &list);
++				continue;
++			case -EBUSY:
++				list_del_init(&inuse->list);
++				continue;
++			case -ENOENT:
++				continue;
++			}
++
++			break;
++		}
++	} while (dp != dealloc_prod);
++
++	dealloc_cons = dc;
++
++	ret = HYPERVISOR_grant_table_op(
++		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
++	BUG_ON(ret);
++
++	list_for_each_entry_safe(inuse, n, &list, list) {
++		pending_idx = inuse - pending_inuse;
++
++		netif = pending_tx_info[pending_idx].netif;
++
++		make_tx_response(netif, &pending_tx_info[pending_idx].req,
++				 NETIF_RSP_OKAY);
++
++		/* Ready for next use. */
++		gnttab_reset_grant_page(mmap_pages[pending_idx]);
++
++		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++
++		netif_put(netif);
++
++		list_del_init(&inuse->list);
++	}
++}
++
++static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
++{
++	RING_IDX cons = netif->tx.req_cons;
++
++	do {
++		make_tx_response(netif, txp, NETIF_RSP_ERROR);
++		if (cons >= end)
++			break;
++		txp = RING_GET_REQUEST(&netif->tx, cons++);
++	} while (1);
++	netif->tx.req_cons = cons;
++	netif_schedule_work(netif);
++	netif_put(netif);
++}
++
++static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
++				netif_tx_request_t *txp, int work_to_do)
++{
++	RING_IDX cons = netif->tx.req_cons;
++	int frags = 0;
++
++	if (!(first->flags & NETTXF_more_data))
++		return 0;
++
++	do {
++		if (frags >= work_to_do) {
++			DPRINTK("Need more frags\n");
++			return -frags;
++		}
++
++		if (unlikely(frags >= MAX_SKB_FRAGS)) {
++			DPRINTK("Too many frags\n");
++			return -frags;
++		}
++
++		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
++		       sizeof(*txp));
++		if (txp->size > first->size) {
++			DPRINTK("Frags galore\n");
++			return -frags;
++		}
++
++		first->size -= txp->size;
++		frags++;
++
++		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
++			DPRINTK("txp->offset: %x, size: %u\n",
++				txp->offset, txp->size);
++			return -frags;
++		}
++	} while ((txp++)->flags & NETTXF_more_data);
++
++	return frags;
++}
++
++static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
++						  struct sk_buff *skb,
++						  netif_tx_request_t *txp,
++						  gnttab_map_grant_ref_t *mop)
++{
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	skb_frag_t *frags = shinfo->frags;
++	unsigned long pending_idx = *((u16 *)skb->data);
++	int i, start;
++
++	/* Skip first skb fragment if it is on same page as header fragment. */
++	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
++
++	for (i = start; i < shinfo->nr_frags; i++, txp++) {
++		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
++
++		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
++				  GNTMAP_host_map | GNTMAP_readonly,
++				  txp->gref, netif->domid);
++
++		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
++		netif_get(netif);
++		pending_tx_info[pending_idx].netif = netif;
++		frags[i].page = (void *)pending_idx;
++	}
++
++	return mop;
++}
++
++static int netbk_tx_check_mop(struct sk_buff *skb,
++			       gnttab_map_grant_ref_t **mopp)
++{
++	gnttab_map_grant_ref_t *mop = *mopp;
++	int pending_idx = *((u16 *)skb->data);
++	netif_t *netif = pending_tx_info[pending_idx].netif;
++	netif_tx_request_t *txp;
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	int nr_frags = shinfo->nr_frags;
++	int i, err, start;
++
++	/* Check status of header. */
++	err = mop->status;
++	if (unlikely(err)) {
++		txp = &pending_tx_info[pending_idx].req;
++		make_tx_response(netif, txp, NETIF_RSP_ERROR);
++		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++		netif_put(netif);
++	} else {
++		set_phys_to_machine(
++			__pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
++			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
++		grant_tx_handle[pending_idx] = mop->handle;
++	}
++
++	/* Skip first skb fragment if it is on same page as header fragment. */
++	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
++
++	for (i = start; i < nr_frags; i++) {
++		int j, newerr;
++
++		pending_idx = (unsigned long)shinfo->frags[i].page;
++
++		/* Check error status: if okay then remember grant handle. */
++		newerr = (++mop)->status;
++		if (likely(!newerr)) {
++			set_phys_to_machine(
++				__pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
++				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
++			grant_tx_handle[pending_idx] = mop->handle;
++			/* Had a previous error? Invalidate this fragment. */
++			if (unlikely(err))
++				netif_idx_release(pending_idx);
++			continue;
++		}
++
++		/* Error on this fragment: respond to client with an error. */
++		txp = &pending_tx_info[pending_idx].req;
++		make_tx_response(netif, txp, NETIF_RSP_ERROR);
++		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++		netif_put(netif);
++
++		/* Not the first error? Preceding frags already invalidated. */
++		if (err)
++			continue;
++
++		/* First error: invalidate header and preceding fragments. */
++		pending_idx = *((u16 *)skb->data);
++		netif_idx_release(pending_idx);
++		for (j = start; j < i; j++) {
++			pending_idx = (unsigned long)shinfo->frags[i].page;
++			netif_idx_release(pending_idx);
++		}
++
++		/* Remember the error: invalidate all subsequent fragments. */
++		err = newerr;
++	}
++
++	*mopp = mop + 1;
++	return err;
++}
++
++static void netbk_fill_frags(struct sk_buff *skb)
++{
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	int nr_frags = shinfo->nr_frags;
++	int i;
++
++	for (i = 0; i < nr_frags; i++) {
++		skb_frag_t *frag = shinfo->frags + i;
++		netif_tx_request_t *txp;
++		unsigned long pending_idx;
++
++		pending_idx = (unsigned long)frag->page;
++
++		pending_inuse[pending_idx].alloc_time = jiffies;
++		list_add_tail(&pending_inuse[pending_idx].list,
++			      &pending_inuse_head);
++
++		txp = &pending_tx_info[pending_idx].req;
++		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
++		frag->size = txp->size;
++		frag->page_offset = txp->offset;
++
++		skb->len += txp->size;
++		skb->data_len += txp->size;
++		skb->truesize += txp->size;
++	}
++}
++
++int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
++		     int work_to_do)
++{
++	struct netif_extra_info extra;
++	RING_IDX cons = netif->tx.req_cons;
++
++	do {
++		if (unlikely(work_to_do-- <= 0)) {
++			DPRINTK("Missing extra info\n");
++			return -EBADR;
++		}
++
++		memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
++		       sizeof(extra));
++		if (unlikely(!extra.type ||
++			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
++			netif->tx.req_cons = ++cons;
++			DPRINTK("Invalid extra type: %d\n", extra.type);
++			return -EINVAL;
++		}
++
++		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
++		netif->tx.req_cons = ++cons;
++	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
++
++	return work_to_do;
++}
++
++static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
++{
++	if (!gso->u.gso.size) {
++		DPRINTK("GSO size must not be zero.\n");
++		return -EINVAL;
++	}
++
++	/* Currently only TCPv4 S.O. is supported. */
++	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
++		DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
++		return -EINVAL;
++	}
++
++	skb_shinfo(skb)->gso_size = gso->u.gso.size;
++	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
++
++	/* Header must be checked, and gso_segs computed. */
++	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
++	skb_shinfo(skb)->gso_segs = 0;
++
++	return 0;
++}
++
++/* Called after netfront has transmitted */
++static void net_tx_action(unsigned long unused)
++{
++	struct list_head *ent;
++	struct sk_buff *skb;
++	netif_t *netif;
++	netif_tx_request_t txreq;
++	netif_tx_request_t txfrags[MAX_SKB_FRAGS];
++	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
++	u16 pending_idx;
++	RING_IDX i;
++	gnttab_map_grant_ref_t *mop;
++	unsigned int data_len;
++	int ret, work_to_do;
++
++	if (dealloc_cons != dealloc_prod)
++		net_tx_action_dealloc();
++
++	mop = tx_map_ops;
++	while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++		!list_empty(&net_schedule_list)) {
++		/* Get a netif from the list with work to do. */
++		ent = net_schedule_list.next;
++		netif = list_entry(ent, netif_t, list);
++		netif_get(netif);
++		remove_from_net_schedule_list(netif);
++
++		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
++		if (!work_to_do) {
++			netif_put(netif);
++			continue;
++		}
++
++		i = netif->tx.req_cons;
++		rmb(); /* Ensure that we see the request before we copy it. */
++		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
++
++		/* Credit-based scheduling. */
++		if (txreq.size > netif->remaining_credit) {
++			unsigned long now = jiffies;
++			unsigned long next_credit =
++				netif->credit_timeout.expires +
++				msecs_to_jiffies(netif->credit_usec / 1000);
++
++			/* Timer could already be pending in rare cases. */
++			if (timer_pending(&netif->credit_timeout)) {
++				netif_put(netif);
++				continue;
++			}
++
++			/* Passed the point where we can replenish credit? */
++			if (time_after_eq(now, next_credit)) {
++				netif->credit_timeout.expires = now;
++				tx_add_credit(netif);
++			}
++
++			/* Still too big to send right now? Set a callback. */
++			if (txreq.size > netif->remaining_credit) {
++				netif->credit_timeout.data     =
++					(unsigned long)netif;
++				netif->credit_timeout.function =
++					tx_credit_callback;
++				__mod_timer(&netif->credit_timeout,
++					    next_credit);
++				netif_put(netif);
++				continue;
++			}
++		}
++		netif->remaining_credit -= txreq.size;
++
++		work_to_do--;
++		netif->tx.req_cons = ++i;
++
++		memset(extras, 0, sizeof(extras));
++		if (txreq.flags & NETTXF_extra_info) {
++			work_to_do = netbk_get_extras(netif, extras,
++						      work_to_do);
++			i = netif->tx.req_cons;
++			if (unlikely(work_to_do < 0)) {
++				netbk_tx_err(netif, &txreq, i);
++				continue;
++			}
++		}
++
++		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
++		if (unlikely(ret < 0)) {
++			netbk_tx_err(netif, &txreq, i - ret);
++			continue;
++		}
++		i += ret;
++
++		if (unlikely(txreq.size < ETH_HLEN)) {
++			DPRINTK("Bad packet size: %d\n", txreq.size);
++			netbk_tx_err(netif, &txreq, i);
++			continue;
++		}
++
++		/* No crossing a page as the payload mustn't fragment. */
++		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
++			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
++				txreq.offset, txreq.size,
++				(txreq.offset &~PAGE_MASK) + txreq.size);
++			netbk_tx_err(netif, &txreq, i);
++			continue;
++		}
++
++		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
++
++		data_len = (txreq.size > PKT_PROT_LEN &&
++			    ret < MAX_SKB_FRAGS) ?
++			PKT_PROT_LEN : txreq.size;
++
++		skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
++				GFP_ATOMIC | __GFP_NOWARN);
++		if (unlikely(skb == NULL)) {
++			DPRINTK("Can't allocate a skb in start_xmit.\n");
++			netbk_tx_err(netif, &txreq, i);
++			break;
++		}
++
++		/* Packets passed to netif_rx() must have some headroom. */
++		skb_reserve(skb, 16 + NET_IP_ALIGN);
++
++		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
++			struct netif_extra_info *gso;
++			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
++
++			if (netbk_set_skb_gso(skb, gso)) {
++				kfree_skb(skb);
++				netbk_tx_err(netif, &txreq, i);
++				continue;
++			}
++		}
++
++		gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
++				  GNTMAP_host_map | GNTMAP_readonly,
++				  txreq.gref, netif->domid);
++		mop++;
++
++		memcpy(&pending_tx_info[pending_idx].req,
++		       &txreq, sizeof(txreq));
++		pending_tx_info[pending_idx].netif = netif;
++		*((u16 *)skb->data) = pending_idx;
++
++		__skb_put(skb, data_len);
++
++		skb_shinfo(skb)->nr_frags = ret;
++		if (data_len < txreq.size) {
++			skb_shinfo(skb)->nr_frags++;
++			skb_shinfo(skb)->frags[0].page =
++				(void *)(unsigned long)pending_idx;
++		} else {
++			/* Discriminate from any valid pending_idx value. */
++			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
++		}
++
++		if (skb->data_len < skb_shinfo(skb)->gso_size) {
++			skb_shinfo(skb)->gso_size = 0;
++			skb_shinfo(skb)->gso_type = 0;
++		}
++
++		__skb_queue_tail(&tx_queue, skb);
++
++		pending_cons++;
++
++		mop = netbk_get_requests(netif, skb, txfrags, mop);
++
++		netif->tx.req_cons = i;
++		netif_schedule_work(netif);
++
++		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
++			break;
++	}
++
++	if (mop == tx_map_ops)
++		return;
++
++	ret = HYPERVISOR_grant_table_op(
++		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
++	BUG_ON(ret);
++
++	mop = tx_map_ops;
++	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
++		netif_tx_request_t *txp;
++
++		pending_idx = *((u16 *)skb->data);
++		netif       = pending_tx_info[pending_idx].netif;
++		txp         = &pending_tx_info[pending_idx].req;
++
++		/* Check the remap error code. */
++		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
++			DPRINTK("netback grant failed.\n");
++			skb_shinfo(skb)->nr_frags = 0;
++			kfree_skb(skb);
++			continue;
++		}
++
++		data_len = skb->len;
++		memcpy(skb->data,
++		       (void *)(idx_to_kaddr(pending_idx)|txp->offset),
++		       data_len);
++		if (data_len < txp->size) {
++			/* Append the packet payload as a fragment. */
++			txp->offset += data_len;
++			txp->size -= data_len;
++		} else {
++			/* Schedule a response immediately. */
++			netif_idx_release(pending_idx);
++		}
++
++		/*
++		 * Old frontends do not assert data_validated but we
++		 * can infer it from csum_blank so test both flags.
++		 */
++		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
++			skb->ip_summed = CHECKSUM_UNNECESSARY;
++			skb->proto_data_valid = 1;
++		} else {
++			skb->ip_summed = CHECKSUM_NONE;
++			skb->proto_data_valid = 0;
++		}
++		skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
++
++		netbk_fill_frags(skb);
++
++		skb->dev      = netif->dev;
++		skb->protocol = eth_type_trans(skb, skb->dev);
++
++		netif->stats.rx_bytes += skb->len;
++		netif->stats.rx_packets++;
++
++		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
++		    unlikely(skb_linearize(skb))) {
++			DPRINTK("Can't linearize skb in net_tx_action.\n");
++			kfree_skb(skb);
++			continue;
++		}
++
++		netif_rx(skb);
++		netif->dev->last_rx = jiffies;
++	}
++
++	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++	    !list_empty(&pending_inuse_head)) {
++		struct netbk_tx_pending_inuse *oldest;
++
++		oldest = list_entry(pending_inuse_head.next,
++				    struct netbk_tx_pending_inuse, list);
++		mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
++	}
++}
++
++static void netif_idx_release(u16 pending_idx)
++{
++	static DEFINE_SPINLOCK(_lock);
++	unsigned long flags;
++
++	spin_lock_irqsave(&_lock, flags);
++	dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
++	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
++	smp_wmb();
++	dealloc_prod++;
++	spin_unlock_irqrestore(&_lock, flags);
++
++	tasklet_schedule(&net_tx_tasklet);
++}
++
++static void netif_page_release(struct page *page, unsigned int order)
++{
++	int idx = netif_page_index(page);
++	BUG_ON(order);
++	BUG_ON(idx < 0);
++	netif_idx_release(idx);
++}
++
++irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++{
++	netif_t *netif = dev_id;
++
++	add_to_net_schedule_list_tail(netif);
++	maybe_schedule_tx_action();
++
++	if (netif_schedulable(netif) && !netbk_queue_full(netif))
++		netif_wake_queue(netif->dev);
++
++	return IRQ_HANDLED;
++}
++
++static void make_tx_response(netif_t *netif,
++			     netif_tx_request_t *txp,
++			     s8       st)
++{
++	RING_IDX i = netif->tx.rsp_prod_pvt;
++	netif_tx_response_t *resp;
++	int notify;
++
++	resp = RING_GET_RESPONSE(&netif->tx, i);
++	resp->id     = txp->id;
++	resp->status = st;
++
++	if (txp->flags & NETTXF_extra_info)
++		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
++
++	netif->tx.rsp_prod_pvt = ++i;
++	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
++	if (notify)
++		notify_remote_via_irq(netif->irq);
++
++#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
++	if (i == netif->tx.req_cons) {
++		int more_to_do;
++		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
++		if (more_to_do)
++			add_to_net_schedule_list_tail(netif);
++	}
++#endif
++}
++
++static netif_rx_response_t *make_rx_response(netif_t *netif,
++					     u16      id,
++					     s8       st,
++					     u16      offset,
++					     u16      size,
++					     u16      flags)
++{
++	RING_IDX i = netif->rx.rsp_prod_pvt;
++	netif_rx_response_t *resp;
++
++	resp = RING_GET_RESPONSE(&netif->rx, i);
++	resp->offset     = offset;
++	resp->flags      = flags;
++	resp->id         = id;
++	resp->status     = (s16)size;
++	if (st < 0)
++		resp->status = (s16)st;
++
++	netif->rx.rsp_prod_pvt = ++i;
++
++	return resp;
++}
++
++#ifdef NETBE_DEBUG_INTERRUPT
++static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
++{
++	struct list_head *ent;
++	netif_t *netif;
++	int i = 0;
++
++	printk(KERN_ALERT "netif_schedule_list:\n");
++	spin_lock_irq(&net_schedule_list_lock);
++
++	list_for_each (ent, &net_schedule_list) {
++		netif = list_entry(ent, netif_t, list);
++		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
++		       "rx_resp_prod=%08x\n",
++		       i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
++		printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
++		       netif->tx.req_cons, netif->tx.rsp_prod_pvt);
++		printk(KERN_ALERT "   shared(rx_req_prod=%08x "
++		       "rx_resp_prod=%08x\n",
++		       netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
++		printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
++		       netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
++		printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
++		       netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
++		i++;
++	}
++
++	spin_unlock_irq(&net_schedule_list_lock);
++	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
++
++	return IRQ_HANDLED;
++}
++#endif
++
++static int __init netback_init(void)
++{
++	int i;
++	struct page *page;
++
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	/* We can increase reservation by this much in net_rx_action(). */
++	balloon_update_driver_allowance(NET_RX_RING_SIZE);
++
++	skb_queue_head_init(&rx_queue);
++	skb_queue_head_init(&tx_queue);
++
++	init_timer(&net_timer);
++	net_timer.data = 0;
++	net_timer.function = net_alarm;
++
++	init_timer(&netbk_tx_pending_timer);
++	netbk_tx_pending_timer.data = 0;
++	netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
++
++	mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
++	if (mmap_pages == NULL) {
++		printk("%s: out of memory\n", __FUNCTION__);
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < MAX_PENDING_REQS; i++) {
++		page = mmap_pages[i];
++		SetPageForeign(page, netif_page_release);
++		netif_set_page_index(page, i);
++		INIT_LIST_HEAD(&pending_inuse[i].list);
++	}
++
++	pending_cons = 0;
++	pending_prod = MAX_PENDING_REQS;
++	for (i = 0; i < MAX_PENDING_REQS; i++)
++		pending_ring[i] = i;
++
++	spin_lock_init(&net_schedule_list_lock);
++	INIT_LIST_HEAD(&net_schedule_list);
++
++	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
++	if (MODPARM_copy_skb) {
++		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
++					      NULL, 0))
++			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
++		else
++			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
++	}
++
++	netif_accel_init();
++
++	netif_xenbus_init();
++
++#ifdef NETBE_DEBUG_INTERRUPT
++	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
++				      0,
++				      netif_be_dbg,
++				      SA_SHIRQ,
++				      "net-be-dbg",
++				      &netif_be_dbg);
++#endif
++
++	return 0;
++}
++
++module_init(netback_init);
++
++MODULE_LICENSE("Dual BSD/GPL");
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+new file mode 100644
+index 0000000..d7faeb6
+--- /dev/null
++++ b/drivers/xen/netback/xenbus.c
+@@ -0,0 +1,454 @@
++/*  Xenbus code for netif backend
++    Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
++    Copyright (C) 2005 XenSource Ltd
++
++    This program is free software; you can redistribute it and/or modify
++    it under the terms of the GNU General Public License as published by
++    the Free Software Foundation; either version 2 of the License, or
++    (at your option) any later version.
++
++    This program is distributed in the hope that it will be useful,
++    but WITHOUT ANY WARRANTY; without even the implied warranty of
++    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++    GNU General Public License for more details.
++
++    You should have received a copy of the GNU General Public License
++    along with this program; if not, write to the Free Software
++    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++*/
++
++#include <stdarg.h>
++#include <linux/module.h>
++#include <xen/xenbus.h>
++#include "common.h"
++
++#if 0
++#undef DPRINTK
++#define DPRINTK(fmt, args...) \
++    printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
++#endif
++
++
++static int connect_rings(struct backend_info *);
++static void connect(struct backend_info *);
++static void backend_create_netif(struct backend_info *be);
++
++static int netback_remove(struct xenbus_device *dev)
++{
++	struct backend_info *be = dev->dev.driver_data;
++
++	netback_remove_accelerators(be, dev);
++
++	if (be->netif) {
++		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++		netif_disconnect(be->netif);
++		be->netif = NULL;
++	}
++	kfree(be);
++	dev->dev.driver_data = NULL;
++	return 0;
++}
++
++
++/**
++ * Entry point to this code when a new device is created.  Allocate the basic
++ * structures and switch to InitWait.
++ */
++static int netback_probe(struct xenbus_device *dev,
++			 const struct xenbus_device_id *id)
++{
++	const char *message;
++	struct xenbus_transaction xbt;
++	int err;
++	int sg;
++	struct backend_info *be = kzalloc(sizeof(struct backend_info),
++					  GFP_KERNEL);
++	if (!be) {
++		xenbus_dev_fatal(dev, -ENOMEM,
++				 "allocating backend structure");
++		return -ENOMEM;
++	}
++
++	be->dev = dev;
++	dev->dev.driver_data = be;
++
++	sg = 1;
++	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
++		sg = 0;
++
++	do {
++		err = xenbus_transaction_start(&xbt);
++		if (err) {
++			xenbus_dev_fatal(dev, err, "starting transaction");
++			goto fail;
++		}
++
++		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
++		if (err) {
++			message = "writing feature-sg";
++			goto abort_transaction;
++		}
++
++		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
++				    "%d", sg);
++		if (err) {
++			message = "writing feature-gso-tcpv4";
++			goto abort_transaction;
++		}
++
++		/* We support rx-copy path. */
++		err = xenbus_printf(xbt, dev->nodename,
++				    "feature-rx-copy", "%d", 1);
++		if (err) {
++			message = "writing feature-rx-copy";
++			goto abort_transaction;
++		}
++
++		/*
++		 * We don't support rx-flip path (except old guests who don't
++		 * grok this feature flag).
++		 */
++		err = xenbus_printf(xbt, dev->nodename,
++				    "feature-rx-flip", "%d", 0);
++		if (err) {
++			message = "writing feature-rx-flip";
++			goto abort_transaction;
++		}
++
++		err = xenbus_transaction_end(xbt, 0);
++	} while (err == -EAGAIN);
++
++	if (err) {
++		xenbus_dev_fatal(dev, err, "completing transaction");
++		goto fail;
++	}
++
++	netback_probe_accelerators(be, dev);
++
++	err = xenbus_switch_state(dev, XenbusStateInitWait);
++	if (err)
++		goto fail;
++
++	/* This kicks hotplug scripts, so do it immediately. */
++	backend_create_netif(be);
++
++	return 0;
++
++abort_transaction:
++	xenbus_transaction_end(xbt, 1);
++	xenbus_dev_fatal(dev, err, "%s", message);
++fail:
++	DPRINTK("failed");
++	netback_remove(dev);
++	return err;
++}
++
++
++/**
++ * Handle the creation of the hotplug script environment.  We add the script
++ * and vif variables to the environment, for the benefit of the vif-* hotplug
++ * scripts.
++ */
++static int netback_uevent(struct xenbus_device *xdev, char **envp,
++			  int num_envp, char *buffer, int buffer_size)
++{
++	struct backend_info *be = xdev->dev.driver_data;
++	netif_t *netif = be->netif;
++	int i = 0, length = 0;
++	char *val;
++
++	DPRINTK("netback_uevent");
++
++	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
++	if (IS_ERR(val)) {
++		int err = PTR_ERR(val);
++		xenbus_dev_fatal(xdev, err, "reading script");
++		return err;
++	}
++	else {
++		add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
++			       &length, "script=%s", val);
++		kfree(val);
++	}
++
++	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
++		       "vif=%s", netif->dev->name);
++
++	envp[i] = NULL;
++
++	return 0;
++}
++
++
++static void backend_create_netif(struct backend_info *be)
++{
++	int err;
++	long handle;
++	struct xenbus_device *dev = be->dev;
++
++	if (be->netif != NULL)
++		return;
++
++	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
++	if (err != 1) {
++		xenbus_dev_fatal(dev, err, "reading handle");
++		return;
++	}
++
++	be->netif = netif_alloc(dev->otherend_id, handle);
++	if (IS_ERR(be->netif)) {
++		err = PTR_ERR(be->netif);
++		be->netif = NULL;
++		xenbus_dev_fatal(dev, err, "creating interface");
++		return;
++	}
++
++	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
++}
++
++
++/**
++ * Callback received when the frontend's state changes.
++ */
++static void frontend_changed(struct xenbus_device *dev,
++			     enum xenbus_state frontend_state)
++{
++	struct backend_info *be = dev->dev.driver_data;
++
++	DPRINTK("%s", xenbus_strstate(frontend_state));
++
++	be->frontend_state = frontend_state;
++
++	switch (frontend_state) {
++	case XenbusStateInitialising:
++		if (dev->state == XenbusStateClosed) {
++			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
++			       __FUNCTION__, dev->nodename);
++			xenbus_switch_state(dev, XenbusStateInitWait);
++		}
++		break;
++
++	case XenbusStateInitialised:
++		break;
++
++	case XenbusStateConnected:
++		if (dev->state == XenbusStateConnected)
++			break;
++		backend_create_netif(be);
++		if (be->netif)
++			connect(be);
++		break;
++
++	case XenbusStateClosing:
++		if (be->netif) {
++			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++			netif_disconnect(be->netif);
++			be->netif = NULL;
++		}
++		xenbus_switch_state(dev, XenbusStateClosing);
++		break;
++
++	case XenbusStateClosed:
++		xenbus_switch_state(dev, XenbusStateClosed);
++		if (xenbus_dev_is_online(dev))
++			break;
++		/* fall through if not online */
++	case XenbusStateUnknown:
++		device_unregister(&dev->dev);
++		break;
++
++	default:
++		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
++				 frontend_state);
++		break;
++	}
++}
++
++
++static void xen_net_read_rate(struct xenbus_device *dev,
++			      unsigned long *bytes, unsigned long *usec)
++{
++	char *s, *e;
++	unsigned long b, u;
++	char *ratestr;
++
++	/* Default to unlimited bandwidth. */
++	*bytes = ~0UL;
++	*usec = 0;
++
++	ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
++	if (IS_ERR(ratestr))
++		return;
++
++	s = ratestr;
++	b = simple_strtoul(s, &e, 10);
++	if ((s == e) || (*e != ','))
++		goto fail;
++
++	s = e + 1;
++	u = simple_strtoul(s, &e, 10);
++	if ((s == e) || (*e != '\0'))
++		goto fail;
++
++	*bytes = b;
++	*usec = u;
++
++	kfree(ratestr);
++	return;
++
++ fail:
++	WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
++	kfree(ratestr);
++}
++
++static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
++{
++	char *s, *e, *macstr;
++	int i;
++
++	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
++	if (IS_ERR(macstr))
++		return PTR_ERR(macstr);
++
++	for (i = 0; i < ETH_ALEN; i++) {
++		mac[i] = simple_strtoul(s, &e, 16);
++		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
++			kfree(macstr);
++			return -ENOENT;
++		}
++		s = e+1;
++	}
++
++	kfree(macstr);
++	return 0;
++}
++
++static void connect(struct backend_info *be)
++{
++	int err;
++	struct xenbus_device *dev = be->dev;
++
++	err = connect_rings(be);
++	if (err)
++		return;
++
++	err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
++		return;
++	}
++
++	xen_net_read_rate(dev, &be->netif->credit_bytes,
++			  &be->netif->credit_usec);
++	be->netif->remaining_credit = be->netif->credit_bytes;
++
++	xenbus_switch_state(dev, XenbusStateConnected);
++
++	netif_wake_queue(be->netif->dev);
++}
++
++
++static int connect_rings(struct backend_info *be)
++{
++	struct xenbus_device *dev = be->dev;
++	unsigned long tx_ring_ref, rx_ring_ref;
++	unsigned int evtchn, rx_copy;
++	int err;
++	int val;
++
++	DPRINTK("");
++
++	err = xenbus_gather(XBT_NIL, dev->otherend,
++			    "tx-ring-ref", "%lu", &tx_ring_ref,
++			    "rx-ring-ref", "%lu", &rx_ring_ref,
++			    "event-channel", "%u", &evtchn, NULL);
++	if (err) {
++		xenbus_dev_fatal(dev, err,
++				 "reading %s/ring-ref and event-channel",
++				 dev->otherend);
++		return err;
++	}
++
++	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
++			   &rx_copy);
++	if (err == -ENOENT) {
++		err = 0;
++		rx_copy = 0;
++	}
++	if (err < 0) {
++		xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
++				 dev->otherend);
++		return err;
++	}
++	be->netif->copying_receiver = !!rx_copy;
++
++	if (be->netif->dev->tx_queue_len != 0) {
++		if (xenbus_scanf(XBT_NIL, dev->otherend,
++				 "feature-rx-notify", "%d", &val) < 0)
++			val = 0;
++		if (val)
++			be->netif->can_queue = 1;
++		else
++			/* Must be non-zero for pfifo_fast to work. */
++			be->netif->dev->tx_queue_len = 1;
++	}
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
++		val = 0;
++	if (val) {
++		be->netif->features |= NETIF_F_SG;
++		be->netif->dev->features |= NETIF_F_SG;
++	}
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
++			 &val) < 0)
++		val = 0;
++	if (val) {
++		be->netif->features |= NETIF_F_TSO;
++		be->netif->dev->features |= NETIF_F_TSO;
++	}
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
++			 "%d", &val) < 0)
++		val = 0;
++	if (val) {
++		be->netif->features &= ~NETIF_F_IP_CSUM;
++		be->netif->dev->features &= ~NETIF_F_IP_CSUM;
++	}
++
++	/* Map the shared frame, irq etc. */
++	err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
++	if (err) {
++		xenbus_dev_fatal(dev, err,
++				 "mapping shared-frames %lu/%lu port %u",
++				 tx_ring_ref, rx_ring_ref, evtchn);
++		return err;
++	}
++	return 0;
++}
++
++
++/* ** Driver Registration ** */
++
++
++static const struct xenbus_device_id netback_ids[] = {
++	{ "vif" },
++	{ "" }
++};
++
++
++static struct xenbus_driver netback = {
++	.name = "vif",
++	.owner = THIS_MODULE,
++	.ids = netback_ids,
++	.probe = netback_probe,
++	.remove = netback_remove,
++	.uevent = netback_uevent,
++	.otherend_changed = frontend_changed,
++};
++
++
++void netif_xenbus_init(void)
++{
++	xenbus_register_backend(&netback);
++}
+-- 
+1.7.3.4
+
+
+From 5b30803bf5f58ee980edd8d88a2d73dda995ee93 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Mon, 9 Feb 2009 12:05:52 -0800
+Subject: [PATCH 052/139] xen: netback: first cut at porting to upstream and cleaning up
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/Kconfig             |    2 +-
+ drivers/xen/netback/Makefile    |    2 +-
+ drivers/xen/netback/common.h    |   33 +++---
+ drivers/xen/netback/interface.c |   37 +++---
+ drivers/xen/netback/netback.c   |  248 ++++++++++++++++++++++++---------------
+ drivers/xen/netback/xenbus.c    |   25 ++--
+ 6 files changed, 201 insertions(+), 146 deletions(-)
+
+diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
+index 7e83d43..30290a8 100644
+--- a/drivers/xen/Kconfig
++++ b/drivers/xen/Kconfig
+@@ -38,7 +38,7 @@ config XEN_BACKEND
+ 	  to other virtual machines.
+ 
+ config XEN_NETDEV_BACKEND
+-       bool "Xen backend network device"
++       tristate "Xen backend network device"
+        depends on XEN_BACKEND && NET
+        help
+          Implement the network backend driver, which passes packets
+diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
+index f4a0c51..a01a1a3 100644
+--- a/drivers/xen/netback/Makefile
++++ b/drivers/xen/netback/Makefile
+@@ -1,3 +1,3 @@
+ obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
+ 
+-netbk-y   := netback.o xenbus.o interface.o
++netbk-y := netback.o xenbus.o interface.o
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 9a54d57..65b88f4 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -43,8 +43,7 @@
+ #include <asm/io.h>
+ #include <asm/pgalloc.h>
+ #include <xen/interface/grant_table.h>
+-#include <xen/gnttab.h>
+-#include <xen/driver_util.h>
++#include <xen/grant_table.h>
+ #include <xen/xenbus.h>
+ 
+ #define DPRINTK(_f, _a...)			\
+@@ -55,7 +54,7 @@
+ #define WPRINTK(fmt, args...)				\
+ 	printk(KERN_WARNING "xen_net: " fmt, ##args)
+ 
+-typedef struct netif_st {
++struct xen_netif {
+ 	/* Unique identifier for this interface. */
+ 	domid_t          domid;
+ 	unsigned int     handle;
+@@ -70,8 +69,8 @@ typedef struct netif_st {
+ 	unsigned int     irq;
+ 
+ 	/* The shared rings and indexes. */
+-	netif_tx_back_ring_t tx;
+-	netif_rx_back_ring_t rx;
++	struct xen_netif_tx_back_ring tx;
++	struct xen_netif_rx_back_ring rx;
+ 	struct vm_struct *tx_comms_area;
+ 	struct vm_struct *rx_comms_area;
+ 
+@@ -103,7 +102,7 @@ typedef struct netif_st {
+ 	unsigned int carrier;
+ 
+ 	wait_queue_head_t waiting_to_free;
+-} netif_t;
++};
+ 
+ /*
+  * Implement our own carrier flag: the network stack's version causes delays
+@@ -141,7 +140,7 @@ struct netback_accelerator {
+ 
+ struct backend_info {
+ 	struct xenbus_device *dev;
+-	netif_t *netif;
++	struct xen_netif *netif;
+ 	enum xenbus_state frontend_state;
+ 
+ 	/* State relating to the netback accelerator */
+@@ -174,13 +173,13 @@ extern
+ void netif_accel_init(void);
+ 
+ 
+-#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+-#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
++#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
++#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
+ 
+-void netif_disconnect(netif_t *netif);
++void netif_disconnect(struct xen_netif *netif);
+ 
+-netif_t *netif_alloc(domid_t domid, unsigned int handle);
+-int netif_map(netif_t *netif, unsigned long tx_ring_ref,
++struct xen_netif *netif_alloc(domid_t domid, unsigned int handle);
++int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ 	      unsigned long rx_ring_ref, unsigned int evtchn);
+ 
+ #define netif_get(_b) (atomic_inc(&(_b)->refcnt))
+@@ -195,22 +194,22 @@ void netif_xenbus_init(void);
+ #define netif_schedulable(netif)				\
+ 	(netif_running((netif)->dev) && netback_carrier_ok(netif))
+ 
+-void netif_schedule_work(netif_t *netif);
+-void netif_deschedule_work(netif_t *netif);
++void netif_schedule_work(struct xen_netif *netif);
++void netif_deschedule_work(struct xen_netif *netif);
+ 
+ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev);
+-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++irqreturn_t netif_be_int(int irq, void *dev_id);
+ 
+ static inline int netbk_can_queue(struct net_device *dev)
+ {
+-	netif_t *netif = netdev_priv(dev);
++	struct xen_netif *netif = netdev_priv(dev);
+ 	return netif->can_queue;
+ }
+ 
+ static inline int netbk_can_sg(struct net_device *dev)
+ {
+-	netif_t *netif = netdev_priv(dev);
++	struct xen_netif *netif = netdev_priv(dev);
+ 	return netif->features & NETIF_F_SG;
+ }
+ 
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 7e67941..d184ad7 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -34,6 +34,9 @@
+ #include <linux/ethtool.h>
+ #include <linux/rtnetlink.h>
+ 
++#include <xen/events.h>
++#include <asm/xen/hypercall.h>
++
+ /*
+  * Module parameter 'queue_length':
+  *
+@@ -51,13 +54,13 @@
+ static unsigned long netbk_queue_length = 32;
+ module_param_named(queue_length, netbk_queue_length, ulong, 0);
+ 
+-static void __netif_up(netif_t *netif)
++static void __netif_up(struct xen_netif *netif)
+ {
+ 	enable_irq(netif->irq);
+ 	netif_schedule_work(netif);
+ }
+ 
+-static void __netif_down(netif_t *netif)
++static void __netif_down(struct xen_netif *netif)
+ {
+ 	disable_irq(netif->irq);
+ 	netif_deschedule_work(netif);
+@@ -65,7 +68,7 @@ static void __netif_down(netif_t *netif)
+ 
+ static int net_open(struct net_device *dev)
+ {
+-	netif_t *netif = netdev_priv(dev);
++	struct xen_netif *netif = netdev_priv(dev);
+ 	if (netback_carrier_ok(netif)) {
+ 		__netif_up(netif);
+ 		netif_start_queue(dev);
+@@ -75,7 +78,7 @@ static int net_open(struct net_device *dev)
+ 
+ static int net_close(struct net_device *dev)
+ {
+-	netif_t *netif = netdev_priv(dev);
++	struct xen_netif *netif = netdev_priv(dev);
+ 	if (netback_carrier_ok(netif))
+ 		__netif_down(netif);
+ 	netif_stop_queue(dev);
+@@ -95,7 +98,7 @@ static int netbk_change_mtu(struct net_device *dev, int mtu)
+ static int netbk_set_sg(struct net_device *dev, u32 data)
+ {
+ 	if (data) {
+-		netif_t *netif = netdev_priv(dev);
++		struct xen_netif *netif = netdev_priv(dev);
+ 
+ 		if (!(netif->features & NETIF_F_SG))
+ 			return -ENOSYS;
+@@ -107,7 +110,7 @@ static int netbk_set_sg(struct net_device *dev, u32 data)
+ static int netbk_set_tso(struct net_device *dev, u32 data)
+ {
+ 	if (data) {
+-		netif_t *netif = netdev_priv(dev);
++		struct xen_netif *netif = netdev_priv(dev);
+ 
+ 		if (!(netif->features & NETIF_F_TSO))
+ 			return -ENOSYS;
+@@ -127,15 +130,15 @@ static struct ethtool_ops network_ethtool_ops =
+ 	.get_link = ethtool_op_get_link,
+ };
+ 
+-netif_t *netif_alloc(domid_t domid, unsigned int handle)
++struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
+ {
+ 	int err = 0;
+ 	struct net_device *dev;
+-	netif_t *netif;
++	struct xen_netif *netif;
+ 	char name[IFNAMSIZ] = {};
+ 
+ 	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
+-	dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
++	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
+ 	if (dev == NULL) {
+ 		DPRINTK("Could not create netif: out of memory\n");
+ 		return ERR_PTR(-ENOMEM);
+@@ -194,7 +197,7 @@ netif_t *netif_alloc(domid_t domid, unsigned int handle)
+ }
+ 
+ static int map_frontend_pages(
+-	netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
++	struct xen_netif *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
+ {
+ 	struct gnttab_map_grant_ref op;
+ 
+@@ -229,7 +232,7 @@ static int map_frontend_pages(
+ 	return 0;
+ }
+ 
+-static void unmap_frontend_pages(netif_t *netif)
++static void unmap_frontend_pages(struct xen_netif *netif)
+ {
+ 	struct gnttab_unmap_grant_ref op;
+ 
+@@ -246,12 +249,12 @@ static void unmap_frontend_pages(netif_t *netif)
+ 		BUG();
+ }
+ 
+-int netif_map(netif_t *netif, unsigned long tx_ring_ref,
++int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ 	      unsigned long rx_ring_ref, unsigned int evtchn)
+ {
+ 	int err = -ENOMEM;
+-	netif_tx_sring_t *txs;
+-	netif_rx_sring_t *rxs;
++	struct xen_netif_tx_sring *txs;
++	struct xen_netif_rx_sring *rxs;
+ 
+ 	/* Already connected through? */
+ 	if (netif->irq)
+@@ -276,10 +279,10 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref,
+ 	netif->irq = err;
+ 	disable_irq(netif->irq);
+ 
+-	txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
++	txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
+ 	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+ 
+-	rxs = (netif_rx_sring_t *)
++	rxs = (struct xen_netif_rx_sring *)
+ 		((char *)netif->rx_comms_area->addr);
+ 	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+ 
+@@ -303,7 +306,7 @@ err_rx:
+ 	return err;
+ }
+ 
+-void netif_disconnect(netif_t *netif)
++void netif_disconnect(struct xen_netif *netif)
+ {
+ 	if (netback_carrier_ok(netif)) {
+ 		rtnl_lock();
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index db629d4..c959075 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -35,9 +35,17 @@
+  */
+ 
+ #include "common.h"
++
++#include <linux/tcp.h>
++#include <linux/udp.h>
++
+ #include <xen/balloon.h>
++#include <xen/events.h>
+ #include <xen/interface/memory.h>
+ 
++#include <asm/xen/hypercall.h>
++#include <asm/xen/page.h>
++
+ /*define NETBE_DEBUG_INTERRUPT*/
+ 
+ struct netbk_rx_meta {
+@@ -51,11 +59,12 @@ struct netbk_tx_pending_inuse {
+ 	unsigned long alloc_time;
+ };
+ 
++
+ static void netif_idx_release(u16 pending_idx);
+-static void make_tx_response(netif_t *netif,
+-			     netif_tx_request_t *txp,
++static void make_tx_response(struct xen_netif *netif,
++			     struct xen_netif_tx_request *txp,
+ 			     s8       st);
+-static netif_rx_response_t *make_rx_response(netif_t *netif,
++static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+ 					     u16      id,
+ 					     s8       st,
+ 					     u16      offset,
+@@ -108,8 +117,8 @@ static inline int netif_page_index(struct page *pg)
+ #define PKT_PROT_LEN 64
+ 
+ static struct pending_tx_info {
+-	netif_tx_request_t req;
+-	netif_t *netif;
++	struct xen_netif_tx_request req;
++	struct xen_netif *netif;
+ } pending_tx_info[MAX_PENDING_REQS];
+ static u16 pending_ring[MAX_PENDING_REQS];
+ typedef unsigned int PEND_RING_IDX;
+@@ -128,8 +137,8 @@ static LIST_HEAD(pending_inuse_head);
+ static struct sk_buff_head tx_queue;
+ 
+ static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+-static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
+-static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
++static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
++static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+ 
+ static struct list_head net_schedule_list;
+ static spinlock_t net_schedule_list_lock;
+@@ -195,7 +204,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+ 		goto err;
+ 
+ 	skb_reserve(nskb, 16 + NET_IP_ALIGN);
+-	headlen = nskb->end - nskb->data;
++	headlen = skb_end_pointer(nskb) - nskb->data;
+ 	if (headlen > skb_headlen(skb))
+ 		headlen = skb_headlen(skb);
+ 	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+@@ -243,9 +252,9 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+ 
+ 	offset = nskb->data - skb->data;
+ 
+-	nskb->h.raw = skb->h.raw + offset;
+-	nskb->nh.raw = skb->nh.raw + offset;
+-	nskb->mac.raw = skb->mac.raw + offset;
++	nskb->transport_header = skb->transport_header + offset;
++	nskb->network_header = skb->network_header + offset;
++	nskb->mac_header = skb->mac_header + offset;
+ 
+ 	return nskb;
+ 
+@@ -255,14 +264,14 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+ 	return NULL;
+ }
+ 
+-static inline int netbk_max_required_rx_slots(netif_t *netif)
++static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+ {
+ 	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
+ 		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
+ 	return 1; /* all in one */
+ }
+ 
+-static inline int netbk_queue_full(netif_t *netif)
++static inline int netbk_queue_full(struct xen_netif *netif)
+ {
+ 	RING_IDX peek   = netif->rx_req_cons_peek;
+ 	RING_IDX needed = netbk_max_required_rx_slots(netif);
+@@ -273,14 +282,14 @@ static inline int netbk_queue_full(netif_t *netif)
+ 
+ static void tx_queue_callback(unsigned long data)
+ {
+-	netif_t *netif = (netif_t *)data;
++	struct xen_netif *netif = (struct xen_netif *)data;
+ 	if (netif_schedulable(netif))
+ 		netif_wake_queue(netif->dev);
+ }
+ 
+ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+-	netif_t *netif = netdev_priv(dev);
++	struct xen_netif *netif = netdev_priv(dev);
+ 
+ 	BUG_ON(skb->dev != dev);
+ 
+@@ -302,7 +311,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 		/* Copy only the header fields we use in this driver. */
+ 		nskb->dev = skb->dev;
+ 		nskb->ip_summed = skb->ip_summed;
+-		nskb->proto_data_valid = skb->proto_data_valid;
+ 		dev_kfree_skb(skb);
+ 		skb = nskb;
+ 	}
+@@ -366,25 +374,25 @@ struct netrx_pending_operations {
+ 	unsigned mcl_prod, mcl_cons;
+ 	unsigned copy_prod, copy_cons;
+ 	unsigned meta_prod, meta_cons;
+-	mmu_update_t *mmu;
+-	gnttab_transfer_t *trans;
+-	gnttab_copy_t *copy;
+-	multicall_entry_t *mcl;
++	struct mmu_update *mmu;
++	struct gnttab_transfer *trans;
++	struct gnttab_copy *copy;
++	struct multicall_entry *mcl;
+ 	struct netbk_rx_meta *meta;
+ };
+ 
+ /* Set up the grant operations for this fragment.  If it's a flipping
+    interface, we also set up the unmap request from here. */
+-static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
++static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
+ 			  int i, struct netrx_pending_operations *npo,
+ 			  struct page *page, unsigned long size,
+ 			  unsigned long offset)
+ {
+-	mmu_update_t *mmu;
+-	gnttab_transfer_t *gop;
+-	gnttab_copy_t *copy_gop;
+-	multicall_entry_t *mcl;
+-	netif_rx_request_t *req;
++	struct mmu_update *mmu;
++	struct gnttab_transfer *gop;
++	struct gnttab_copy *copy_gop;
++	struct multicall_entry *mcl;
++	struct xen_netif_rx_request *req;
+ 	unsigned long old_mfn, new_mfn;
+ 	int idx = netif_page_index(page);
+ 
+@@ -426,12 +434,12 @@ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
+ 			mcl = npo->mcl + npo->mcl_prod++;
+ 			MULTI_update_va_mapping(mcl,
+ 					     (unsigned long)page_address(page),
+-					     pfn_pte_ma(new_mfn, PAGE_KERNEL),
++					     mfn_pte(new_mfn, PAGE_KERNEL),
+ 					     0);
+ 
+ 			mmu = npo->mmu + npo->mmu_prod++;
+-			mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
+-				MMU_MACHPHYS_UPDATE;
++			mmu->ptr = ((phys_addr_t)new_mfn << PAGE_SHIFT) |
++				    MMU_MACHPHYS_UPDATE;
+ 			mmu->val = page_to_pfn(page);
+ 		}
+ 
+@@ -446,7 +454,7 @@ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
+ static void netbk_gop_skb(struct sk_buff *skb,
+ 			  struct netrx_pending_operations *npo)
+ {
+-	netif_t *netif = netdev_priv(skb->dev);
++	struct xen_netif *netif = netdev_priv(skb->dev);
+ 	int nr_frags = skb_shinfo(skb)->nr_frags;
+ 	int i;
+ 	int extra;
+@@ -494,9 +502,9 @@ static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
+ static int netbk_check_gop(int nr_frags, domid_t domid,
+ 			   struct netrx_pending_operations *npo)
+ {
+-	multicall_entry_t *mcl;
+-	gnttab_transfer_t *gop;
+-	gnttab_copy_t     *copy_op;
++	struct multicall_entry *mcl;
++	struct gnttab_transfer *gop;
++	struct gnttab_copy     *copy_op;
+ 	int status = NETIF_RSP_OKAY;
+ 	int i;
+ 
+@@ -534,7 +542,7 @@ static int netbk_check_gop(int nr_frags, domid_t domid,
+ 	return status;
+ }
+ 
+-static void netbk_add_frag_responses(netif_t *netif, int status,
++static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+ 				     struct netbk_rx_meta *meta, int nr_frags)
+ {
+ 	int i;
+@@ -555,11 +563,11 @@ static void netbk_add_frag_responses(netif_t *netif, int status,
+ 
+ static void net_rx_action(unsigned long unused)
+ {
+-	netif_t *netif = NULL;
++	struct xen_netif *netif = NULL;
+ 	s8 status;
+ 	u16 id, irq, flags;
+-	netif_rx_response_t *resp;
+-	multicall_entry_t *mcl;
++	struct xen_netif_rx_response *resp;
++	struct multicall_entry *mcl;
+ 	struct sk_buff_head rxq;
+ 	struct sk_buff *skb;
+ 	int notify_nr = 0;
+@@ -572,10 +580,10 @@ static void net_rx_action(unsigned long unused)
+ 	 * Putting hundreds of bytes on the stack is considered rude.
+ 	 * Static works because a tasklet can only be on one CPU at any time.
+ 	 */
+-	static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
+-	static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+-	static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
+-	static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
++	static struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
++	static struct mmu_update rx_mmu[NET_RX_RING_SIZE];
++	static struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
++	static struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
+ 	static unsigned char rx_notify[NR_IRQS];
+ 	static u16 notify_list[NET_RX_RING_SIZE];
+ 	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+@@ -596,7 +604,7 @@ static void net_rx_action(unsigned long unused)
+ 		*(int *)skb->cb = nr_frags;
+ 
+ 		if (!xen_feature(XENFEAT_auto_translated_physmap) &&
+-		    !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
++		    !((struct xen_netif *)netdev_priv(skb->dev))->copying_receiver &&
+ 		    check_mfn(nr_frags + 1)) {
+ 			/* Memory squeeze? Back off for an arbitrary while. */
+ 			if ( net_ratelimit() )
+@@ -692,9 +700,10 @@ static void net_rx_action(unsigned long unused)
+ 		id = meta[npo.meta_cons].id;
+ 		flags = nr_frags ? NETRXF_more_data : 0;
+ 
+-		if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
++		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+ 			flags |= NETRXF_csum_blank | NETRXF_data_validated;
+-		else if (skb->proto_data_valid) /* remote but checksummed? */
++		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
++			/* remote but checksummed. */
+ 			flags |= NETRXF_data_validated;
+ 
+ 		if (meta[npo.meta_cons].copy)
+@@ -705,8 +714,8 @@ static void net_rx_action(unsigned long unused)
+ 					skb_headlen(skb), flags);
+ 
+ 		if (meta[npo.meta_cons].frag.size) {
+-			struct netif_extra_info *gso =
+-				(struct netif_extra_info *)
++			struct xen_netif_extra_info *gso =
++				(struct xen_netif_extra_info *)
+ 				RING_GET_RESPONSE(&netif->rx,
+ 						  netif->rx.rsp_prod_pvt++);
+ 
+@@ -769,16 +778,16 @@ static void netbk_tx_pending_timeout(unsigned long unused)
+ 
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+ {
+-	netif_t *netif = netdev_priv(dev);
++	struct xen_netif *netif = netdev_priv(dev);
+ 	return &netif->stats;
+ }
+ 
+-static int __on_net_schedule_list(netif_t *netif)
++static int __on_net_schedule_list(struct xen_netif *netif)
+ {
+ 	return netif->list.next != NULL;
+ }
+ 
+-static void remove_from_net_schedule_list(netif_t *netif)
++static void remove_from_net_schedule_list(struct xen_netif *netif)
+ {
+ 	spin_lock_irq(&net_schedule_list_lock);
+ 	if (likely(__on_net_schedule_list(netif))) {
+@@ -789,7 +798,7 @@ static void remove_from_net_schedule_list(netif_t *netif)
+ 	spin_unlock_irq(&net_schedule_list_lock);
+ }
+ 
+-static void add_to_net_schedule_list_tail(netif_t *netif)
++static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+ {
+ 	if (__on_net_schedule_list(netif))
+ 		return;
+@@ -811,7 +820,7 @@ static void add_to_net_schedule_list_tail(netif_t *netif)
+  * If we may be buffer transmit buffers for any reason then we must be rather
+  * more conservative and treat this as the final check for pending work.
+  */
+-void netif_schedule_work(netif_t *netif)
++void netif_schedule_work(struct xen_netif *netif)
+ {
+ 	int more_to_do;
+ 
+@@ -827,13 +836,13 @@ void netif_schedule_work(netif_t *netif)
+ 	}
+ }
+ 
+-void netif_deschedule_work(netif_t *netif)
++void netif_deschedule_work(struct xen_netif *netif)
+ {
+ 	remove_from_net_schedule_list(netif);
+ }
+ 
+ 
+-static void tx_add_credit(netif_t *netif)
++static void tx_add_credit(struct xen_netif *netif)
+ {
+ 	unsigned long max_burst, max_credit;
+ 
+@@ -855,7 +864,7 @@ static void tx_add_credit(netif_t *netif)
+ 
+ static void tx_credit_callback(unsigned long data)
+ {
+-	netif_t *netif = (netif_t *)data;
++	struct xen_netif *netif = (struct xen_netif *)data;
+ 	tx_add_credit(netif);
+ 	netif_schedule_work(netif);
+ }
+@@ -869,10 +878,10 @@ static inline int copy_pending_req(PEND_RING_IDX pending_idx)
+ inline static void net_tx_action_dealloc(void)
+ {
+ 	struct netbk_tx_pending_inuse *inuse, *n;
+-	gnttab_unmap_grant_ref_t *gop;
++	struct gnttab_unmap_grant_ref *gop;
+ 	u16 pending_idx;
+ 	PEND_RING_IDX dc, dp;
+-	netif_t *netif;
++	struct xen_netif *netif;
+ 	int ret;
+ 	LIST_HEAD(list);
+ 
+@@ -954,7 +963,7 @@ inline static void net_tx_action_dealloc(void)
+ 	}
+ }
+ 
+-static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
++static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *txp, RING_IDX end)
+ {
+ 	RING_IDX cons = netif->tx.req_cons;
+ 
+@@ -969,8 +978,8 @@ static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
+ 	netif_put(netif);
+ }
+ 
+-static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
+-				netif_tx_request_t *txp, int work_to_do)
++static int netbk_count_requests(struct xen_netif *netif, struct xen_netif_tx_request *first,
++				struct xen_netif_tx_request *txp, int work_to_do)
+ {
+ 	RING_IDX cons = netif->tx.req_cons;
+ 	int frags = 0;
+@@ -1009,10 +1018,10 @@ static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
+ 	return frags;
+ }
+ 
+-static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
++static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
+ 						  struct sk_buff *skb,
+-						  netif_tx_request_t *txp,
+-						  gnttab_map_grant_ref_t *mop)
++						  struct xen_netif_tx_request *txp,
++						  struct gnttab_map_grant_ref *mop)
+ {
+ 	struct skb_shared_info *shinfo = skb_shinfo(skb);
+ 	skb_frag_t *frags = shinfo->frags;
+@@ -1039,12 +1048,12 @@ static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
+ }
+ 
+ static int netbk_tx_check_mop(struct sk_buff *skb,
+-			       gnttab_map_grant_ref_t **mopp)
++			       struct gnttab_map_grant_ref **mopp)
+ {
+-	gnttab_map_grant_ref_t *mop = *mopp;
++	struct gnttab_map_grant_ref *mop = *mopp;
+ 	int pending_idx = *((u16 *)skb->data);
+-	netif_t *netif = pending_tx_info[pending_idx].netif;
+-	netif_tx_request_t *txp;
++	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
++	struct xen_netif_tx_request *txp;
+ 	struct skb_shared_info *shinfo = skb_shinfo(skb);
+ 	int nr_frags = shinfo->nr_frags;
+ 	int i, err, start;
+@@ -1118,7 +1127,7 @@ static void netbk_fill_frags(struct sk_buff *skb)
+ 
+ 	for (i = 0; i < nr_frags; i++) {
+ 		skb_frag_t *frag = shinfo->frags + i;
+-		netif_tx_request_t *txp;
++		struct xen_netif_tx_request *txp;
+ 		unsigned long pending_idx;
+ 
+ 		pending_idx = (unsigned long)frag->page;
+@@ -1138,10 +1147,10 @@ static void netbk_fill_frags(struct sk_buff *skb)
+ 	}
+ }
+ 
+-int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
++int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extras,
+ 		     int work_to_do)
+ {
+-	struct netif_extra_info extra;
++	struct xen_netif_extra_info extra;
+ 	RING_IDX cons = netif->tx.req_cons;
+ 
+ 	do {
+@@ -1166,7 +1175,7 @@ int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
+ 	return work_to_do;
+ }
+ 
+-static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
++static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso)
+ {
+ 	if (!gso->u.gso.size) {
+ 		DPRINTK("GSO size must not be zero.\n");
+@@ -1189,18 +1198,57 @@ static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
+ 	return 0;
+ }
+ 
++static int skb_checksum_setup(struct sk_buff *skb)
++{
++	struct iphdr *iph;
++	unsigned char *th;
++	int err = -EPROTO;
++
++	if (skb->protocol != htons(ETH_P_IP))
++		goto out;
++
++	iph = (void *)skb->data;
++	th = skb->data + 4 * iph->ihl;
++	if (th >= skb_tail_pointer(skb))
++		goto out;
++
++	skb->csum_start = th - skb->head;
++	switch (iph->protocol) {
++	case IPPROTO_TCP:
++		skb->csum_offset = offsetof(struct tcphdr, check);
++		break;
++	case IPPROTO_UDP:
++		skb->csum_offset = offsetof(struct udphdr, check);
++		break;
++	default:
++		if (net_ratelimit())
++			printk(KERN_ERR "Attempting to checksum a non-"
++			       "TCP/UDP packet, dropping a protocol"
++			       " %d packet", iph->protocol);
++		goto out;
++	}
++
++	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
++		goto out;
++
++	err = 0;
++
++out:
++	return err;
++}
++
+ /* Called after netfront has transmitted */
+ static void net_tx_action(unsigned long unused)
+ {
+ 	struct list_head *ent;
+ 	struct sk_buff *skb;
+-	netif_t *netif;
+-	netif_tx_request_t txreq;
+-	netif_tx_request_t txfrags[MAX_SKB_FRAGS];
+-	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
++	struct xen_netif *netif;
++	struct xen_netif_tx_request txreq;
++	struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
++	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+ 	u16 pending_idx;
+ 	RING_IDX i;
+-	gnttab_map_grant_ref_t *mop;
++	struct gnttab_map_grant_ref *mop;
+ 	unsigned int data_len;
+ 	int ret, work_to_do;
+ 
+@@ -1212,7 +1260,7 @@ static void net_tx_action(unsigned long unused)
+ 		!list_empty(&net_schedule_list)) {
+ 		/* Get a netif from the list with work to do. */
+ 		ent = net_schedule_list.next;
+-		netif = list_entry(ent, netif_t, list);
++		netif = list_entry(ent, struct xen_netif, list);
+ 		netif_get(netif);
+ 		remove_from_net_schedule_list(netif);
+ 
+@@ -1313,7 +1361,7 @@ static void net_tx_action(unsigned long unused)
+ 		skb_reserve(skb, 16 + NET_IP_ALIGN);
+ 
+ 		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+-			struct netif_extra_info *gso;
++			struct xen_netif_extra_info *gso;
+ 			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+ 
+ 			if (netbk_set_skb_gso(skb, gso)) {
+@@ -1372,7 +1420,7 @@ static void net_tx_action(unsigned long unused)
+ 
+ 	mop = tx_map_ops;
+ 	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+-		netif_tx_request_t *txp;
++		struct xen_netif_tx_request *txp;
+ 
+ 		pending_idx = *((u16 *)skb->data);
+ 		netif       = pending_tx_info[pending_idx].netif;
+@@ -1403,14 +1451,10 @@ static void net_tx_action(unsigned long unused)
+ 		 * Old frontends do not assert data_validated but we
+ 		 * can infer it from csum_blank so test both flags.
+ 		 */
+-		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
+-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+-			skb->proto_data_valid = 1;
+-		} else {
++		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank))
++			skb->ip_summed = CHECKSUM_PARTIAL;
++		else
+ 			skb->ip_summed = CHECKSUM_NONE;
+-			skb->proto_data_valid = 0;
+-		}
+-		skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
+ 
+ 		netbk_fill_frags(skb);
+ 
+@@ -1420,6 +1464,14 @@ static void net_tx_action(unsigned long unused)
+ 		netif->stats.rx_bytes += skb->len;
+ 		netif->stats.rx_packets++;
+ 
++		if (skb->ip_summed == CHECKSUM_PARTIAL) {
++			if (skb_checksum_setup(skb)) {
++				DPRINTK("Can't setup checksum in net_tx_action\n");
++				kfree_skb(skb);
++				continue;
++			}
++		}
++
+ 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+ 		    unlikely(skb_linearize(skb))) {
+ 			DPRINTK("Can't linearize skb in net_tx_action.\n");
+@@ -1464,9 +1516,9 @@ static void netif_page_release(struct page *page, unsigned int order)
+ 	netif_idx_release(idx);
+ }
+ 
+-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t netif_be_int(int irq, void *dev_id)
+ {
+-	netif_t *netif = dev_id;
++	struct xen_netif *netif = dev_id;
+ 
+ 	add_to_net_schedule_list_tail(netif);
+ 	maybe_schedule_tx_action();
+@@ -1477,12 +1529,12 @@ irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+ 	return IRQ_HANDLED;
+ }
+ 
+-static void make_tx_response(netif_t *netif,
+-			     netif_tx_request_t *txp,
++static void make_tx_response(struct xen_netif *netif,
++			     struct xen_netif_tx_request *txp,
+ 			     s8       st)
+ {
+ 	RING_IDX i = netif->tx.rsp_prod_pvt;
+-	netif_tx_response_t *resp;
++	struct xen_netif_tx_response *resp;
+ 	int notify;
+ 
+ 	resp = RING_GET_RESPONSE(&netif->tx, i);
+@@ -1507,7 +1559,7 @@ static void make_tx_response(netif_t *netif,
+ #endif
+ }
+ 
+-static netif_rx_response_t *make_rx_response(netif_t *netif,
++static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+ 					     u16      id,
+ 					     s8       st,
+ 					     u16      offset,
+@@ -1515,7 +1567,7 @@ static netif_rx_response_t *make_rx_response(netif_t *netif,
+ 					     u16      flags)
+ {
+ 	RING_IDX i = netif->rx.rsp_prod_pvt;
+-	netif_rx_response_t *resp;
++	struct xen_netif_rx_response *resp;
+ 
+ 	resp = RING_GET_RESPONSE(&netif->rx, i);
+ 	resp->offset     = offset;
+@@ -1534,14 +1586,14 @@ static netif_rx_response_t *make_rx_response(netif_t *netif,
+ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+ {
+ 	struct list_head *ent;
+-	netif_t *netif;
++	struct xen_netif *netif;
+ 	int i = 0;
+ 
+ 	printk(KERN_ALERT "netif_schedule_list:\n");
+ 	spin_lock_irq(&net_schedule_list_lock);
+ 
+ 	list_for_each (ent, &net_schedule_list) {
+-		netif = list_entry(ent, netif_t, list);
++		netif = list_entry(ent, struct xen_netif, list);
+ 		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+ 		       "rx_resp_prod=%08x\n",
+ 		       i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
+@@ -1569,11 +1621,13 @@ static int __init netback_init(void)
+ 	int i;
+ 	struct page *page;
+ 
+-	if (!is_running_on_xen())
++	printk(KERN_CRIT "*** netif_init\n");
++
++	if (!xen_domain())
+ 		return -ENODEV;
+ 
+ 	/* We can increase reservation by this much in net_rx_action(). */
+-	balloon_update_driver_allowance(NET_RX_RING_SIZE);
++//	balloon_update_driver_allowance(NET_RX_RING_SIZE);
+ 
+ 	skb_queue_head_init(&rx_queue);
+ 	skb_queue_head_init(&tx_queue);
+@@ -1616,7 +1670,7 @@ static int __init netback_init(void)
+ 			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+ 	}
+ 
+-	netif_accel_init();
++	//netif_accel_init();
+ 
+ 	netif_xenbus_init();
+ 
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index d7faeb6..ed7c006 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -37,7 +37,7 @@ static int netback_remove(struct xenbus_device *dev)
+ {
+ 	struct backend_info *be = dev->dev.driver_data;
+ 
+-	netback_remove_accelerators(be, dev);
++	//netback_remove_accelerators(be, dev);
+ 
+ 	if (be->netif) {
+ 		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+@@ -123,7 +123,7 @@ static int netback_probe(struct xenbus_device *dev,
+ 		goto fail;
+ 	}
+ 
+-	netback_probe_accelerators(be, dev);
++	//netback_probe_accelerators(be, dev);
+ 
+ 	err = xenbus_switch_state(dev, XenbusStateInitWait);
+ 	if (err)
+@@ -149,12 +149,10 @@ fail:
+  * and vif variables to the environment, for the benefit of the vif-* hotplug
+  * scripts.
+  */
+-static int netback_uevent(struct xenbus_device *xdev, char **envp,
+-			  int num_envp, char *buffer, int buffer_size)
++static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
+ {
+ 	struct backend_info *be = xdev->dev.driver_data;
+-	netif_t *netif = be->netif;
+-	int i = 0, length = 0;
++	struct xen_netif *netif = be->netif;
+ 	char *val;
+ 
+ 	DPRINTK("netback_uevent");
+@@ -166,15 +164,15 @@ static int netback_uevent(struct xenbus_device *xdev, char **envp,
+ 		return err;
+ 	}
+ 	else {
+-		add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
+-			       &length, "script=%s", val);
++		if (add_uevent_var(env, "script=%s", val)) {
++			kfree(val);
++			return -ENOMEM;
++		}
+ 		kfree(val);
+ 	}
+ 
+-	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+-		       "vif=%s", netif->dev->name);
+-
+-	envp[i] = NULL;
++	if (add_uevent_var(env, "vif=%s", netif->dev->name))
++		return -ENOMEM;
+ 
+ 	return 0;
+ }
+@@ -450,5 +448,6 @@ static struct xenbus_driver netback = {
+ 
+ void netif_xenbus_init(void)
+ {
+-	xenbus_register_backend(&netback);
++	printk(KERN_CRIT "registering netback\n");
++	(void)xenbus_register_backend(&netback);
+ }
+-- 
+1.7.3.4
+
+
+From a41a2ab9e1ac4ef8320f69f2719e973e25faff5c Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy at goop.org>
+Date: Mon, 9 Feb 2009 16:39:01 -0800
+Subject: [PATCH 053/139] xen: netback: don't include xen/evtchn.h
+
+Its a usermode header for users of /dev/evtchn
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy at goop.org>
+---
+ drivers/xen/netback/common.h |    1 -
+ 1 files changed, 0 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 65b88f4..5665ed1 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -38,7 +38,6 @@
+ #include <linux/netdevice.h>
+ #include <linux/etherdevice.h>
+ #include <linux/wait.h>
+-#include <xen/evtchn.h>
+ #include <xen/interface/io/netif.h>
+ #include <asm/io.h>
+ #include <asm/pgalloc.h>
+-- 
+1.7.3.4
+
+
+From f28a7c6148bb979acf99c0cbe3b441d0fb0853d9 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Wed, 18 Feb 2009 15:55:18 -0800
+Subject: [PATCH 054/139] xen: netback: use mod_timer
+
+__mod_timer is no longer a public API.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |    4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index c959075..e920703 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -334,7 +334,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 			 */
+ 			netif->tx_queue_timeout.data = (unsigned long)netif;
+ 			netif->tx_queue_timeout.function = tx_queue_callback;
+-			__mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
++			mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
+ 		}
+ 	}
+ 
+@@ -1299,7 +1299,7 @@ static void net_tx_action(unsigned long unused)
+ 					(unsigned long)netif;
+ 				netif->credit_timeout.function =
+ 					tx_credit_callback;
+-				__mod_timer(&netif->credit_timeout,
++				mod_timer(&netif->credit_timeout,
+ 					    next_credit);
+ 				netif_put(netif);
+ 				continue;
+-- 
+1.7.3.4
+
+
+From 52f97ad360f28762c785343ba5c9f8abb83536f3 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich at novell.com>
+Date: Fri, 6 Mar 2009 08:29:31 +0000
+Subject: [PATCH 055/139] xen: netback: unmap tx ring gref when mapping of rx ring gref failed
+
+[ijc-ported from linux-2.6.18-xen.hg 782:51decc39e5e7]
+Signed-off-by: Jan Beulich <jbeulich at novell.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/interface.c |    6 ++++++
+ 1 files changed, 6 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index d184ad7..f3d9ea1 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -222,6 +222,12 @@ static int map_frontend_pages(
+ 		BUG();
+ 
+ 	if (op.status) {
++		struct gnttab_unmap_grant_ref unop;
++
++		gnttab_set_unmap_op(&unop,
++				    (unsigned long)netif->tx_comms_area->addr,
++				    GNTMAP_host_map, netif->tx_shmem_handle);
++		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
+ 		DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
+ 		return op.status;
+ 	}
+-- 
+1.7.3.4
+
+
+From f9b63790f1404eb03ac824147b2294a46e485643 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <Ian.Campbell at citrix.com>
+Date: Fri, 6 Mar 2009 08:29:32 +0000
+Subject: [PATCH 056/139] xen: netback: add ethtool stat to track copied skbs.
+
+Copied skbs should be rare but we have no way of verifying that.
+
+[ijc-ported from linux-2.6.18-xen.hg 792:db9857bb0320]
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/common.h    |    3 ++
+ drivers/xen/netback/interface.c |   47 +++++++++++++++++++++++++++++++++++++++
+ drivers/xen/netback/netback.c   |    6 ++++-
+ 3 files changed, 55 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 5665ed1..6ba804d 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -92,6 +92,9 @@ struct xen_netif {
+ 	/* Enforce draining of the transmit queue. */
+ 	struct timer_list tx_queue_timeout;
+ 
++	/* Statistics */
++	int nr_copied_skbs;
++
+ 	/* Miscellaneous private stuff. */
+ 	struct list_head list;  /* scheduling list */
+ 	atomic_t         refcnt;
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index f3d9ea1..1a99c87 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -119,8 +119,51 @@ static int netbk_set_tso(struct net_device *dev, u32 data)
+ 	return ethtool_op_set_tso(dev, data);
+ }
+ 
++static void netbk_get_drvinfo(struct net_device *dev,
++			      struct ethtool_drvinfo *info)
++{
++	strcpy(info->driver, "netbk");
++}
++
++static const struct netif_stat {
++	char name[ETH_GSTRING_LEN];
++	u16 offset;
++} netbk_stats[] = {
++	{ "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
++};
++
++static int netbk_get_stats_count(struct net_device *dev)
++{
++	return ARRAY_SIZE(netbk_stats);
++}
++
++static void netbk_get_ethtool_stats(struct net_device *dev,
++				   struct ethtool_stats *stats, u64 * data)
++{
++	void *netif = netdev_priv(dev);
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
++		data[i] = *(int *)(netif + netbk_stats[i].offset);
++}
++
++static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
++{
++	int i;
++
++	switch (stringset) {
++	case ETH_SS_STATS:
++		for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
++			memcpy(data + i * ETH_GSTRING_LEN,
++			       netbk_stats[i].name, ETH_GSTRING_LEN);
++		break;
++	}
++}
++
+ static struct ethtool_ops network_ethtool_ops =
+ {
++	.get_drvinfo = netbk_get_drvinfo,
++
+ 	.get_tx_csum = ethtool_op_get_tx_csum,
+ 	.set_tx_csum = ethtool_op_set_tx_csum,
+ 	.get_sg = ethtool_op_get_sg,
+@@ -128,6 +171,10 @@ static struct ethtool_ops network_ethtool_ops =
+ 	.get_tso = ethtool_op_get_tso,
+ 	.set_tso = netbk_set_tso,
+ 	.get_link = ethtool_op_get_link,
++
++	.get_stats_count = netbk_get_stats_count,
++	.get_ethtool_stats = netbk_get_ethtool_stats,
++	.get_strings = netbk_get_strings,
+ };
+ 
+ struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index e920703..f59fadb 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -923,7 +923,11 @@ inline static void net_tx_action_dealloc(void)
+ 			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+ 				break;
+ 
+-			switch (copy_pending_req(inuse - pending_inuse)) {
++			pending_idx = inuse - pending_inuse;
++
++			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
++
++			switch (copy_pending_req(pending_idx)) {
+ 			case 0:
+ 				list_move_tail(&inuse->list, &list);
+ 				continue;
+-- 
+1.7.3.4
+
+
+From c41d8da3d853d4e89ba38693b90c1fe512095704 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <Ian.Campbell at citrix.com>
+Date: Fri, 6 Mar 2009 08:29:33 +0000
+Subject: [PATCH 057/139] xen: netback: make queue length parameter writeable in sysfs
+
+Any changes will only take effect for newly created VIFs.
+
+Also hook up the vif devices to their parent and publish bus info via
+ethtool.
+
+[ijc-ported from linux-2.6.18-xen.hg 793:3aa9b8a7876b]
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/interface.c |    3 ++-
+ drivers/xen/netback/xenbus.c    |    1 +
+ 2 files changed, 3 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 1a99c87..7706170 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -52,7 +52,7 @@
+  * blocked.
+  */
+ static unsigned long netbk_queue_length = 32;
+-module_param_named(queue_length, netbk_queue_length, ulong, 0);
++module_param_named(queue_length, netbk_queue_length, ulong, 0644);
+ 
+ static void __netif_up(struct xen_netif *netif)
+ {
+@@ -123,6 +123,7 @@ static void netbk_get_drvinfo(struct net_device *dev,
+ 			      struct ethtool_drvinfo *info)
+ {
+ 	strcpy(info->driver, "netbk");
++	strcpy(info->bus_info, dev->dev.parent->bus_id);
+ }
+ 
+ static const struct netif_stat {
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index ed7c006..dc7b367 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -200,6 +200,7 @@ static void backend_create_netif(struct backend_info *be)
+ 		xenbus_dev_fatal(dev, err, "creating interface");
+ 		return;
+ 	}
++	SET_NETDEV_DEV(be->netif->dev, &dev->dev);
+ 
+ 	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
+ }
+-- 
+1.7.3.4
+
+
+From f204d7567ab11ddb1ff3208ab5ed8921b575af5d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <Ian.Campbell at citrix.com>
+Date: Mon, 16 Mar 2009 22:05:16 +0000
+Subject: [PATCH 058/139] xen: netback: parent sysfs device should be set before registering.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/common.h    |    2 +-
+ drivers/xen/netback/interface.c |    4 +++-
+ drivers/xen/netback/xenbus.c    |    3 +--
+ 3 files changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 6ba804d..123a169 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -180,7 +180,7 @@ void netif_accel_init(void);
+ 
+ void netif_disconnect(struct xen_netif *netif);
+ 
+-struct xen_netif *netif_alloc(domid_t domid, unsigned int handle);
++struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
+ int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ 	      unsigned long rx_ring_ref, unsigned int evtchn);
+ 
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 7706170..5e0d26d 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -178,7 +178,7 @@ static struct ethtool_ops network_ethtool_ops =
+ 	.get_strings = netbk_get_strings,
+ };
+ 
+-struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
++struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
+ {
+ 	int err = 0;
+ 	struct net_device *dev;
+@@ -192,6 +192,8 @@ struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
+ 		return ERR_PTR(-ENOMEM);
+ 	}
+ 
++	SET_NETDEV_DEV(dev, parent);
++
+ 	netif = netdev_priv(dev);
+ 	memset(netif, 0, sizeof(*netif));
+ 	netif->domid  = domid;
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index dc7b367..749931e 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -193,14 +193,13 @@ static void backend_create_netif(struct backend_info *be)
+ 		return;
+ 	}
+ 
+-	be->netif = netif_alloc(dev->otherend_id, handle);
++	be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
+ 	if (IS_ERR(be->netif)) {
+ 		err = PTR_ERR(be->netif);
+ 		be->netif = NULL;
+ 		xenbus_dev_fatal(dev, err, "creating interface");
+ 		return;
+ 	}
+-	SET_NETDEV_DEV(be->netif->dev, &dev->dev);
+ 
+ 	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
+ }
+-- 
+1.7.3.4
+
+
+From bb606178665ea78b505cb54864899478b6020584 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 12:42:36 -0700
+Subject: [PATCH 059/139] xen: netback: use NET_SKB_PAD rather than "16"
+
+There's a constant for the default skb headroom.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |    6 +++---
+ 1 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index f59fadb..400f398 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -203,7 +203,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+ 	if (unlikely(!nskb))
+ 		goto err;
+ 
+-	skb_reserve(nskb, 16 + NET_IP_ALIGN);
++	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
+ 	headlen = skb_end_pointer(nskb) - nskb->data;
+ 	if (headlen > skb_headlen(skb))
+ 		headlen = skb_headlen(skb);
+@@ -1353,7 +1353,7 @@ static void net_tx_action(unsigned long unused)
+ 			    ret < MAX_SKB_FRAGS) ?
+ 			PKT_PROT_LEN : txreq.size;
+ 
+-		skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
++		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
+ 				GFP_ATOMIC | __GFP_NOWARN);
+ 		if (unlikely(skb == NULL)) {
+ 			DPRINTK("Can't allocate a skb in start_xmit.\n");
+@@ -1362,7 +1362,7 @@ static void net_tx_action(unsigned long unused)
+ 		}
+ 
+ 		/* Packets passed to netif_rx() must have some headroom. */
+-		skb_reserve(skb, 16 + NET_IP_ALIGN);
++		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+ 
+ 		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+ 			struct xen_netif_extra_info *gso;
+-- 
+1.7.3.4
+
+
+From fe41ab031dfa0c6f9821c2667ce821e7f4f635ed Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 13:31:26 -0700
+Subject: [PATCH 060/139] xen: netback: completely drop flip support
+
+Nobody uses it?
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h  |    1 -
+ drivers/xen/netback/netback.c |  245 ++++-------------------------------------
+ drivers/xen/netback/xenbus.c  |    3 +-
+ 3 files changed, 22 insertions(+), 227 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 123a169..06f04c1 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -78,7 +78,6 @@ struct xen_netif {
+ 
+ 	/* Internal feature information. */
+ 	u8 can_queue:1;	/* can queue packets for receiver? */
+-	u8 copying_receiver:1;	/* copy packets to receiver?       */
+ 
+ 	/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
+ 	RING_IDX rx_req_cons_peek;
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 400f398..36bea2b 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -51,7 +51,6 @@
+ struct netbk_rx_meta {
+ 	skb_frag_t frag;
+ 	int id;
+-	u8 copy:1;
+ };
+ 
+ struct netbk_tx_pending_inuse {
+@@ -160,26 +159,6 @@ static inline unsigned long alloc_mfn(void)
+ 	return mfn_list[--alloc_index];
+ }
+ 
+-static int check_mfn(int nr)
+-{
+-	struct xen_memory_reservation reservation = {
+-		.extent_order = 0,
+-		.domid        = DOMID_SELF
+-	};
+-	int rc;
+-
+-	if (likely(alloc_index >= nr))
+-		return 0;
+-
+-	set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
+-	reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
+-	rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
+-	if (likely(rc > 0))
+-		alloc_index += rc;
+-
+-	return alloc_index >= nr ? 0 : -ENOMEM;
+-}
+-
+ static inline void maybe_schedule_tx_action(void)
+ {
+ 	smp_mb();
+@@ -188,82 +167,6 @@ static inline void maybe_schedule_tx_action(void)
+ 		tasklet_schedule(&net_tx_tasklet);
+ }
+ 
+-static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+-{
+-	struct skb_shared_info *ninfo;
+-	struct sk_buff *nskb;
+-	unsigned long offset;
+-	int ret;
+-	int len;
+-	int headlen;
+-
+-	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
+-
+-	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
+-	if (unlikely(!nskb))
+-		goto err;
+-
+-	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
+-	headlen = skb_end_pointer(nskb) - nskb->data;
+-	if (headlen > skb_headlen(skb))
+-		headlen = skb_headlen(skb);
+-	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+-	BUG_ON(ret);
+-
+-	ninfo = skb_shinfo(nskb);
+-	ninfo->gso_size = skb_shinfo(skb)->gso_size;
+-	ninfo->gso_type = skb_shinfo(skb)->gso_type;
+-
+-	offset = headlen;
+-	len = skb->len - headlen;
+-
+-	nskb->len = skb->len;
+-	nskb->data_len = len;
+-	nskb->truesize += len;
+-
+-	while (len) {
+-		struct page *page;
+-		int copy;
+-		int zero;
+-
+-		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
+-			dump_stack();
+-			goto err_free;
+-		}
+-
+-		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
+-		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
+-
+-		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
+-		if (unlikely(!page))
+-			goto err_free;
+-
+-		ret = skb_copy_bits(skb, offset, page_address(page), copy);
+-		BUG_ON(ret);
+-
+-		ninfo->frags[ninfo->nr_frags].page = page;
+-		ninfo->frags[ninfo->nr_frags].page_offset = 0;
+-		ninfo->frags[ninfo->nr_frags].size = copy;
+-		ninfo->nr_frags++;
+-
+-		offset += copy;
+-		len -= copy;
+-	}
+-
+-	offset = nskb->data - skb->data;
+-
+-	nskb->transport_header = skb->transport_header + offset;
+-	nskb->network_header = skb->network_header + offset;
+-	nskb->mac_header = skb->mac_header + offset;
+-
+-	return nskb;
+-
+- err_free:
+-	kfree_skb(nskb);
+- err:
+-	return NULL;
+-}
+-
+ static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+ {
+ 	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
+@@ -297,24 +200,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
+ 		goto drop;
+ 
+-	/*
+-	 * Copy the packet here if it's destined for a flipping interface
+-	 * but isn't flippable (e.g. extra references to data).
+-	 * XXX For now we also copy skbuffs whose head crosses a page
+-	 * boundary, because netbk_gop_skb can't handle them.
+-	 */
+-	if (!netif->copying_receiver ||
+-	    ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
+-		struct sk_buff *nskb = netbk_copy_skb(skb);
+-		if ( unlikely(nskb == NULL) )
+-			goto drop;
+-		/* Copy only the header fields we use in this driver. */
+-		nskb->dev = skb->dev;
+-		nskb->ip_summed = skb->ip_summed;
+-		dev_kfree_skb(skb);
+-		skb = nskb;
+-	}
+-
+ 	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
+ 				   !!skb_shinfo(skb)->gso_size;
+ 	netif_get(netif);
+@@ -388,66 +273,32 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
+ 			  struct page *page, unsigned long size,
+ 			  unsigned long offset)
+ {
+-	struct mmu_update *mmu;
+-	struct gnttab_transfer *gop;
+ 	struct gnttab_copy *copy_gop;
+-	struct multicall_entry *mcl;
+ 	struct xen_netif_rx_request *req;
+-	unsigned long old_mfn, new_mfn;
++	unsigned long old_mfn;
+ 	int idx = netif_page_index(page);
+ 
+ 	old_mfn = virt_to_mfn(page_address(page));
+ 
+ 	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
+-	if (netif->copying_receiver) {
+-		/* The fragment needs to be copied rather than
+-		   flipped. */
+-		meta->copy = 1;
+-		copy_gop = npo->copy + npo->copy_prod++;
+-		copy_gop->flags = GNTCOPY_dest_gref;
+-		if (idx > -1) {
+-			struct pending_tx_info *src_pend = &pending_tx_info[idx];
+-			copy_gop->source.domid = src_pend->netif->domid;
+-			copy_gop->source.u.ref = src_pend->req.gref;
+-			copy_gop->flags |= GNTCOPY_source_gref;
+-		} else {
+-			copy_gop->source.domid = DOMID_SELF;
+-			copy_gop->source.u.gmfn = old_mfn;
+-		}
+-		copy_gop->source.offset = offset;
+-		copy_gop->dest.domid = netif->domid;
+-		copy_gop->dest.offset = 0;
+-		copy_gop->dest.u.ref = req->gref;
+-		copy_gop->len = size;
+-	} else {
+-		meta->copy = 0;
+-		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+-			new_mfn = alloc_mfn();
+-
+-			/*
+-			 * Set the new P2M table entry before
+-			 * reassigning the old data page. Heed the
+-			 * comment in pgtable-2level.h:pte_page(). :-)
+-			 */
+-			set_phys_to_machine(page_to_pfn(page), new_mfn);
+-
+-			mcl = npo->mcl + npo->mcl_prod++;
+-			MULTI_update_va_mapping(mcl,
+-					     (unsigned long)page_address(page),
+-					     mfn_pte(new_mfn, PAGE_KERNEL),
+-					     0);
+-
+-			mmu = npo->mmu + npo->mmu_prod++;
+-			mmu->ptr = ((phys_addr_t)new_mfn << PAGE_SHIFT) |
+-				    MMU_MACHPHYS_UPDATE;
+-			mmu->val = page_to_pfn(page);
+-		}
+ 
+-		gop = npo->trans + npo->trans_prod++;
+-		gop->mfn = old_mfn;
+-		gop->domid = netif->domid;
+-		gop->ref = req->gref;
++	copy_gop = npo->copy + npo->copy_prod++;
++	copy_gop->flags = GNTCOPY_dest_gref;
++	if (idx > -1) {
++		struct pending_tx_info *src_pend = &pending_tx_info[idx];
++		copy_gop->source.domid = src_pend->netif->domid;
++		copy_gop->source.u.ref = src_pend->req.gref;
++		copy_gop->flags |= GNTCOPY_source_gref;
++	} else {
++		copy_gop->source.domid = DOMID_SELF;
++		copy_gop->source.u.gmfn = old_mfn;
+ 	}
++	copy_gop->source.offset = offset;
++	copy_gop->dest.domid = netif->domid;
++	copy_gop->dest.offset = 0;
++	copy_gop->dest.u.ref = req->gref;
++	copy_gop->len = size;
++
+ 	return req->id;
+ }
+ 
+@@ -502,41 +353,17 @@ static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
+ static int netbk_check_gop(int nr_frags, domid_t domid,
+ 			   struct netrx_pending_operations *npo)
+ {
+-	struct multicall_entry *mcl;
+-	struct gnttab_transfer *gop;
+ 	struct gnttab_copy     *copy_op;
+ 	int status = NETIF_RSP_OKAY;
+ 	int i;
+ 
+ 	for (i = 0; i <= nr_frags; i++) {
+-		if (npo->meta[npo->meta_cons + i].copy) {
+ 			copy_op = npo->copy + npo->copy_cons++;
+ 			if (copy_op->status != GNTST_okay) {
+ 				DPRINTK("Bad status %d from copy to DOM%d.\n",
+ 					copy_op->status, domid);
+ 				status = NETIF_RSP_ERROR;
+ 			}
+-		} else {
+-			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+-				mcl = npo->mcl + npo->mcl_cons++;
+-				/* The update_va_mapping() must not fail. */
+-				BUG_ON(mcl->result != 0);
+-			}
+-
+-			gop = npo->trans + npo->trans_cons++;
+-			/* Check the reassignment error code. */
+-			if (gop->status != 0) {
+-				DPRINTK("Bad status %d from grant transfer to DOM%u\n",
+-					gop->status, domid);
+-				/*
+-				 * Page no longer belongs to us unless
+-				 * GNTST_bad_page, but that should be
+-				 * a fatal error anyway.
+-				 */
+-				BUG_ON(gop->status == GNTST_bad_page);
+-				status = NETIF_RSP_ERROR;
+-			}
+-		}
+ 	}
+ 
+ 	return status;
+@@ -551,11 +378,8 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+ 	for (i = 0; i < nr_frags; i++) {
+ 		int id = meta[i].id;
+ 		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
+-
+-		if (meta[i].copy)
+-			offset = 0;
+-		else
+-			offset = meta[i].frag.page_offset;
++		
++		offset = 0;
+ 		make_rx_response(netif, id, status, offset,
+ 				 meta[i].frag.size, flags);
+ 	}
+@@ -603,18 +427,6 @@ static void net_rx_action(unsigned long unused)
+ 		nr_frags = skb_shinfo(skb)->nr_frags;
+ 		*(int *)skb->cb = nr_frags;
+ 
+-		if (!xen_feature(XENFEAT_auto_translated_physmap) &&
+-		    !((struct xen_netif *)netdev_priv(skb->dev))->copying_receiver &&
+-		    check_mfn(nr_frags + 1)) {
+-			/* Memory squeeze? Back off for an arbitrary while. */
+-			if ( net_ratelimit() )
+-				WPRINTK("Memory squeeze in netback "
+-					"driver.\n");
+-			mod_timer(&net_timer, jiffies + HZ);
+-			skb_queue_head(&rx_queue, skb);
+-			break;
+-		}
+-
+ 		netbk_gop_skb(skb, &npo);
+ 
+ 		count += nr_frags + 1;
+@@ -677,20 +489,6 @@ static void net_rx_action(unsigned long unused)
+ 		nr_frags = *(int *)skb->cb;
+ 
+ 		netif = netdev_priv(skb->dev);
+-		/* We can't rely on skb_release_data to release the
+-		   pages used by fragments for us, since it tries to
+-		   touch the pages in the fraglist.  If we're in
+-		   flipping mode, that doesn't work.  In copying mode,
+-		   we still have access to all of the pages, and so
+-		   it's safe to let release_data deal with it. */
+-		/* (Freeing the fragments is safe since we copy
+-		   non-linear skbs destined for flipping interfaces) */
+-		if (!netif->copying_receiver) {
+-			atomic_set(&(skb_shinfo(skb)->dataref), 1);
+-			skb_shinfo(skb)->frag_list = NULL;
+-			skb_shinfo(skb)->nr_frags = 0;
+-			netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
+-		}
+ 
+ 		netif->stats.tx_bytes += skb->len;
+ 		netif->stats.tx_packets++;
+@@ -706,10 +504,7 @@ static void net_rx_action(unsigned long unused)
+ 			/* remote but checksummed. */
+ 			flags |= NETRXF_data_validated;
+ 
+-		if (meta[npo.meta_cons].copy)
+-			offset = 0;
+-		else
+-			offset = offset_in_page(skb->data);
++		offset = 0;
+ 		resp = make_rx_response(netif, id, status, offset,
+ 					skb_headlen(skb), flags);
+ 
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index 749931e..a492288 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -378,7 +378,8 @@ static int connect_rings(struct backend_info *be)
+ 				 dev->otherend);
+ 		return err;
+ 	}
+-	be->netif->copying_receiver = !!rx_copy;
++	if (!rx_copy)
++		return -EOPNOTSUPP;
+ 
+ 	if (be->netif->dev->tx_queue_len != 0) {
+ 		if (xenbus_scanf(XBT_NIL, dev->otherend,
+-- 
+1.7.3.4
+
+
+From 17d465234118873ab4f5a7992feb4ce7b5537cf7 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 15:19:39 -0700
+Subject: [PATCH 061/139] xen: netback: demacro MASK_PEND_IDX
+
+Replace it with a more meaningful inline: pending_index().
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |   21 +++++++++++++--------
+ 1 files changed, 13 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 36bea2b..4095622 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -121,7 +121,12 @@ static struct pending_tx_info {
+ } pending_tx_info[MAX_PENDING_REQS];
+ static u16 pending_ring[MAX_PENDING_REQS];
+ typedef unsigned int PEND_RING_IDX;
+-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
++
++static inline PEND_RING_IDX pending_index(unsigned i)
++{
++	return i & (MAX_PENDING_REQS-1);
++}
++
+ static PEND_RING_IDX pending_prod, pending_cons;
+ #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+ 
+@@ -695,7 +700,7 @@ inline static void net_tx_action_dealloc(void)
+ 		while (dc != dp) {
+ 			unsigned long pfn;
+ 
+-			pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
++			pending_idx = dealloc_ring[pending_index(dc++)];
+ 			list_move_tail(&pending_inuse[pending_idx].list, &list);
+ 
+ 			pfn = idx_to_pfn(pending_idx);
+@@ -754,7 +759,7 @@ inline static void net_tx_action_dealloc(void)
+ 		/* Ready for next use. */
+ 		gnttab_reset_grant_page(mmap_pages[pending_idx]);
+ 
+-		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++		pending_ring[pending_index(pending_prod++)] = pending_idx;
+ 
+ 		netif_put(netif);
+ 
+@@ -831,7 +836,7 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
+ 	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+ 
+ 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
+-		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
++		pending_idx = pending_ring[pending_index(pending_cons++)];
+ 
+ 		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
+ 				  GNTMAP_host_map | GNTMAP_readonly,
+@@ -862,7 +867,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ 	if (unlikely(err)) {
+ 		txp = &pending_tx_info[pending_idx].req;
+ 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+-		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++		pending_ring[pending_index(pending_prod++)] = pending_idx;
+ 		netif_put(netif);
+ 	} else {
+ 		set_phys_to_machine(
+@@ -895,7 +900,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ 		/* Error on this fragment: respond to client with an error. */
+ 		txp = &pending_tx_info[pending_idx].req;
+ 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+-		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++		pending_ring[pending_index(pending_prod++)] = pending_idx;
+ 		netif_put(netif);
+ 
+ 		/* Not the first error? Preceding frags already invalidated. */
+@@ -1142,7 +1147,7 @@ static void net_tx_action(unsigned long unused)
+ 			continue;
+ 		}
+ 
+-		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
++		pending_idx = pending_ring[pending_index(pending_cons)];
+ 
+ 		data_len = (txreq.size > PKT_PROT_LEN &&
+ 			    ret < MAX_SKB_FRAGS) ?
+@@ -1298,7 +1303,7 @@ static void netif_idx_release(u16 pending_idx)
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&_lock, flags);
+-	dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
++	dealloc_ring[pending_index(dealloc_prod)] = pending_idx;
+ 	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+ 	smp_wmb();
+ 	dealloc_prod++;
+-- 
+1.7.3.4
+
+
+From d47af34f87b2d365c75aa3579ad512619ef3d579 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 15:29:30 -0700
+Subject: [PATCH 062/139] xen: netback: convert PEND_RING_IDX into a proper typedef name
+
+Rename PEND_RING_IDX to pending_ring_idx_t.  Its not used that much,
+the extra typing won't kill anyone.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |   12 ++++++------
+ 1 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 4095622..8292e96 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -120,19 +120,19 @@ static struct pending_tx_info {
+ 	struct xen_netif *netif;
+ } pending_tx_info[MAX_PENDING_REQS];
+ static u16 pending_ring[MAX_PENDING_REQS];
+-typedef unsigned int PEND_RING_IDX;
++typedef unsigned int pending_ring_idx_t;
+ 
+-static inline PEND_RING_IDX pending_index(unsigned i)
++static inline pending_ring_idx_t pending_index(unsigned i)
+ {
+ 	return i & (MAX_PENDING_REQS-1);
+ }
+ 
+-static PEND_RING_IDX pending_prod, pending_cons;
++static pending_ring_idx_t pending_prod, pending_cons;
+ #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+ 
+ /* Freed TX SKBs get batched on this ring before return to pending_ring. */
+ static u16 dealloc_ring[MAX_PENDING_REQS];
+-static PEND_RING_IDX dealloc_prod, dealloc_cons;
++static pending_ring_idx_t dealloc_prod, dealloc_cons;
+ 
+ /* Doubly-linked list of in-use pending entries. */
+ static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+@@ -669,7 +669,7 @@ static void tx_credit_callback(unsigned long data)
+ 	netif_schedule_work(netif);
+ }
+ 
+-static inline int copy_pending_req(PEND_RING_IDX pending_idx)
++static inline int copy_pending_req(pending_ring_idx_t pending_idx)
+ {
+ 	return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
+ 				      &mmap_pages[pending_idx]);
+@@ -680,7 +680,7 @@ inline static void net_tx_action_dealloc(void)
+ 	struct netbk_tx_pending_inuse *inuse, *n;
+ 	struct gnttab_unmap_grant_ref *gop;
+ 	u16 pending_idx;
+-	PEND_RING_IDX dc, dp;
++	pending_ring_idx_t dc, dp;
+ 	struct xen_netif *netif;
+ 	int ret;
+ 	LIST_HEAD(list);
+-- 
+1.7.3.4
+
+
+From 56727a43f329d50c2a00fed0316ffd87d6c23ebd Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 15:31:32 -0700
+Subject: [PATCH 063/139] xen: netback: rename NR_PENDING_REQS to nr_pending_reqs()
+
+Use function syntax to show its actually computing a value, rather than
+a constant.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |   10 +++++++---
+ 1 files changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 8292e96..5410a68 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -128,7 +128,11 @@ static inline pending_ring_idx_t pending_index(unsigned i)
+ }
+ 
+ static pending_ring_idx_t pending_prod, pending_cons;
+-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
++
++static inline pending_ring_idx_t nr_pending_reqs(void)
++{
++	return MAX_PENDING_REQS - pending_prod + pending_cons;
++}
+ 
+ /* Freed TX SKBs get batched on this ring before return to pending_ring. */
+ static u16 dealloc_ring[MAX_PENDING_REQS];
+@@ -167,7 +171,7 @@ static inline unsigned long alloc_mfn(void)
+ static inline void maybe_schedule_tx_action(void)
+ {
+ 	smp_mb();
+-	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
++	if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
+ 	    !list_empty(&net_schedule_list))
+ 		tasklet_schedule(&net_tx_tasklet);
+ }
+@@ -1060,7 +1064,7 @@ static void net_tx_action(unsigned long unused)
+ 		net_tx_action_dealloc();
+ 
+ 	mop = tx_map_ops;
+-	while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ 		!list_empty(&net_schedule_list)) {
+ 		/* Get a netif from the list with work to do. */
+ 		ent = net_schedule_list.next;
+-- 
+1.7.3.4
+
+
+From 55b360614f1bd44d0b1395b4aabf41d8f1f13f17 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 15:45:45 -0700
+Subject: [PATCH 064/139] xen: netback: pre-initialize list and spinlocks; use empty list to indicate not on list
+
+Statically pre-initialize net_schedule_list head and lock.
+
+Use an empty list to mark when a xen_netif is not on the schedule list,
+rather than NULL (which may upset list debugging).
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/interface.c |    1 +
+ drivers/xen/netback/netback.c   |   12 ++++--------
+ 2 files changed, 5 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 5e0d26d..dc4fb53 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -201,6 +201,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ 	atomic_set(&netif->refcnt, 1);
+ 	init_waitqueue_head(&netif->waiting_to_free);
+ 	netif->dev = dev;
++	INIT_LIST_HEAD(&netif->list);
+ 
+ 	netback_carrier_off(netif);
+ 
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 5410a68..cbd4b03 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -148,8 +148,8 @@ static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+ static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+ static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+ 
+-static struct list_head net_schedule_list;
+-static spinlock_t net_schedule_list_lock;
++static LIST_HEAD(net_schedule_list);
++static DEFINE_SPINLOCK(net_schedule_list_lock);
+ 
+ #define MAX_MFN_ALLOC 64
+ static unsigned long mfn_list[MAX_MFN_ALLOC];
+@@ -588,15 +588,14 @@ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+ 
+ static int __on_net_schedule_list(struct xen_netif *netif)
+ {
+-	return netif->list.next != NULL;
++	return !list_empty(&netif->list);
+ }
+ 
+ static void remove_from_net_schedule_list(struct xen_netif *netif)
+ {
+ 	spin_lock_irq(&net_schedule_list_lock);
+ 	if (likely(__on_net_schedule_list(netif))) {
+-		list_del(&netif->list);
+-		netif->list.next = NULL;
++		list_del_init(&netif->list);
+ 		netif_put(netif);
+ 	}
+ 	spin_unlock_irq(&net_schedule_list_lock);
+@@ -1466,9 +1465,6 @@ static int __init netback_init(void)
+ 	for (i = 0; i < MAX_PENDING_REQS; i++)
+ 		pending_ring[i] = i;
+ 
+-	spin_lock_init(&net_schedule_list_lock);
+-	INIT_LIST_HEAD(&net_schedule_list);
+-
+ 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+ 	if (MODPARM_copy_skb) {
+ 		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+-- 
+1.7.3.4
+
+
+From e12cf57de7a6c20e4c8900ce7bf4e6924a12f49e Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 15:48:10 -0700
+Subject: [PATCH 065/139] xen: netback: remove CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+
+Keir says:
+> > Does CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER need to be a config
+> > option?  Could/should we always/never set it?
+> It doesn't work well with local delivery into dom0, nor even with IP
+> fragment reassembly. I don't think we would ever turn it on these days.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |   21 ---------------------
+ 1 files changed, 0 insertions(+), 21 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index cbd4b03..f00e405 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -615,23 +615,11 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+ 	spin_unlock_irq(&net_schedule_list_lock);
+ }
+ 
+-/*
+- * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
+- * If this driver is pipelining transmit requests then we can be very
+- * aggressive in avoiding new-packet notifications -- frontend only needs to
+- * send a notification if there are no outstanding unreceived responses.
+- * If we may be buffer transmit buffers for any reason then we must be rather
+- * more conservative and treat this as the final check for pending work.
+- */
+ void netif_schedule_work(struct xen_netif *netif)
+ {
+ 	int more_to_do;
+ 
+-#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+-	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
+-#else
+ 	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+-#endif
+ 
+ 	if (more_to_do) {
+ 		add_to_net_schedule_list_tail(netif);
+@@ -1355,15 +1343,6 @@ static void make_tx_response(struct xen_netif *netif,
+ 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+ 	if (notify)
+ 		notify_remote_via_irq(netif->irq);
+-
+-#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+-	if (i == netif->tx.req_cons) {
+-		int more_to_do;
+-		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+-		if (more_to_do)
+-			add_to_net_schedule_list_tail(netif);
+-	}
+-#endif
+ }
+ 
+ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+-- 
+1.7.3.4
+
+
+From adf542f9c714e3b7c76fcf9e44e0a89cae21a341 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 22:28:52 -0700
+Subject: [PATCH 066/139] xen: netback: make netif_get/put inlines
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h |   16 ++++++++++------
+ 1 files changed, 10 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 06f04c1..9056be0 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -183,12 +183,16 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ 	      unsigned long rx_ring_ref, unsigned int evtchn);
+ 
+-#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
+-#define netif_put(_b)						\
+-	do {							\
+-		if ( atomic_dec_and_test(&(_b)->refcnt) )	\
+-			wake_up(&(_b)->waiting_to_free);	\
+-	} while (0)
++static inline void netif_get(struct xen_netif *netif)
++{
++	atomic_inc(&netif->refcnt);
++}
++
++static inline void  netif_put(struct xen_netif *netif)
++{
++	if (atomic_dec_and_test(&netif->refcnt))
++		wake_up(&netif->waiting_to_free);
++}
+ 
+ void netif_xenbus_init(void);
+ 
+-- 
+1.7.3.4
+
+
+From f06459a199f953a68f001f06e54dde54e1e16c87 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 22:30:24 -0700
+Subject: [PATCH 067/139] xen: netback: move code around
+
+net_tx_action() into several functions; move variables into
+their innermost scopes; rename "i" to "idx".
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |  158 ++++++++++++++++++++++++-----------------
+ 1 files changed, 94 insertions(+), 64 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index f00e405..4d63ff3 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -773,7 +773,8 @@ static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *t
+ 	netif_put(netif);
+ }
+ 
+-static int netbk_count_requests(struct xen_netif *netif, struct xen_netif_tx_request *first,
++static int netbk_count_requests(struct xen_netif *netif,
++				struct xen_netif_tx_request *first,
+ 				struct xen_netif_tx_request *txp, int work_to_do)
+ {
+ 	RING_IDX cons = netif->tx.req_cons;
+@@ -1032,30 +1033,58 @@ out:
+ 	return err;
+ }
+ 
+-/* Called after netfront has transmitted */
+-static void net_tx_action(unsigned long unused)
++static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
+ {
+-	struct list_head *ent;
+-	struct sk_buff *skb;
+-	struct xen_netif *netif;
+-	struct xen_netif_tx_request txreq;
+-	struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+-	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+-	u16 pending_idx;
+-	RING_IDX i;
+-	struct gnttab_map_grant_ref *mop;
+-	unsigned int data_len;
+-	int ret, work_to_do;
++	unsigned long now = jiffies;
++	unsigned long next_credit =
++		netif->credit_timeout.expires +
++		msecs_to_jiffies(netif->credit_usec / 1000);
++
++	/* Timer could already be pending in rare cases. */
++	if (timer_pending(&netif->credit_timeout))
++		return true;
++
++	/* Passed the point where we can replenish credit? */
++	if (time_after_eq(now, next_credit)) {
++		netif->credit_timeout.expires = now;
++		tx_add_credit(netif);
++	}
+ 
+-	if (dealloc_cons != dealloc_prod)
+-		net_tx_action_dealloc();
++	/* Still too big to send right now? Set a callback. */
++	if (size > netif->remaining_credit) {
++		netif->credit_timeout.data     =
++			(unsigned long)netif;
++		netif->credit_timeout.function =
++			tx_credit_callback;
++		mod_timer(&netif->credit_timeout,
++			  next_credit);
++
++		return true;
++	}
++
++	return false;
++}
++
++static unsigned net_tx_build_mops(void)
++{
++	struct gnttab_map_grant_ref *mop;
++	struct sk_buff *skb;
++	int ret;
+ 
+ 	mop = tx_map_ops;
+ 	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ 		!list_empty(&net_schedule_list)) {
++		struct xen_netif *netif;
++		struct xen_netif_tx_request txreq;
++		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
++		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
++		u16 pending_idx;
++		RING_IDX idx;
++		int work_to_do;
++		unsigned int data_len;
++	
+ 		/* Get a netif from the list with work to do. */
+-		ent = net_schedule_list.next;
+-		netif = list_entry(ent, struct xen_netif, list);
++		netif = list_first_entry(&net_schedule_list, struct xen_netif, list);
+ 		netif_get(netif);
+ 		remove_from_net_schedule_list(netif);
+ 
+@@ -1065,67 +1094,43 @@ static void net_tx_action(unsigned long unused)
+ 			continue;
+ 		}
+ 
+-		i = netif->tx.req_cons;
++		idx = netif->tx.req_cons;
+ 		rmb(); /* Ensure that we see the request before we copy it. */
+-		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
++		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
+ 
+ 		/* Credit-based scheduling. */
+-		if (txreq.size > netif->remaining_credit) {
+-			unsigned long now = jiffies;
+-			unsigned long next_credit =
+-				netif->credit_timeout.expires +
+-				msecs_to_jiffies(netif->credit_usec / 1000);
+-
+-			/* Timer could already be pending in rare cases. */
+-			if (timer_pending(&netif->credit_timeout)) {
+-				netif_put(netif);
+-				continue;
+-			}
+-
+-			/* Passed the point where we can replenish credit? */
+-			if (time_after_eq(now, next_credit)) {
+-				netif->credit_timeout.expires = now;
+-				tx_add_credit(netif);
+-			}
+-
+-			/* Still too big to send right now? Set a callback. */
+-			if (txreq.size > netif->remaining_credit) {
+-				netif->credit_timeout.data     =
+-					(unsigned long)netif;
+-				netif->credit_timeout.function =
+-					tx_credit_callback;
+-				mod_timer(&netif->credit_timeout,
+-					    next_credit);
+-				netif_put(netif);
+-				continue;
+-			}
++		if (txreq.size > netif->remaining_credit &&
++		    tx_credit_exceeded(netif, txreq.size)) {
++			netif_put(netif);
++			continue;
+ 		}
++
+ 		netif->remaining_credit -= txreq.size;
+ 
+ 		work_to_do--;
+-		netif->tx.req_cons = ++i;
++		netif->tx.req_cons = ++idx;
+ 
+ 		memset(extras, 0, sizeof(extras));
+ 		if (txreq.flags & NETTXF_extra_info) {
+ 			work_to_do = netbk_get_extras(netif, extras,
+ 						      work_to_do);
+-			i = netif->tx.req_cons;
++			idx = netif->tx.req_cons;
+ 			if (unlikely(work_to_do < 0)) {
+-				netbk_tx_err(netif, &txreq, i);
++				netbk_tx_err(netif, &txreq, idx);
+ 				continue;
+ 			}
+ 		}
+ 
+ 		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
+ 		if (unlikely(ret < 0)) {
+-			netbk_tx_err(netif, &txreq, i - ret);
++			netbk_tx_err(netif, &txreq, idx - ret);
+ 			continue;
+ 		}
+-		i += ret;
++		idx += ret;
+ 
+ 		if (unlikely(txreq.size < ETH_HLEN)) {
+ 			DPRINTK("Bad packet size: %d\n", txreq.size);
+-			netbk_tx_err(netif, &txreq, i);
++			netbk_tx_err(netif, &txreq, idx);
+ 			continue;
+ 		}
+ 
+@@ -1134,7 +1139,7 @@ static void net_tx_action(unsigned long unused)
+ 			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
+ 				txreq.offset, txreq.size,
+ 				(txreq.offset &~PAGE_MASK) + txreq.size);
+-			netbk_tx_err(netif, &txreq, i);
++			netbk_tx_err(netif, &txreq, idx);
+ 			continue;
+ 		}
+ 
+@@ -1148,7 +1153,7 @@ static void net_tx_action(unsigned long unused)
+ 				GFP_ATOMIC | __GFP_NOWARN);
+ 		if (unlikely(skb == NULL)) {
+ 			DPRINTK("Can't allocate a skb in start_xmit.\n");
+-			netbk_tx_err(netif, &txreq, i);
++			netbk_tx_err(netif, &txreq, idx);
+ 			break;
+ 		}
+ 
+@@ -1161,7 +1166,7 @@ static void net_tx_action(unsigned long unused)
+ 
+ 			if (netbk_set_skb_gso(skb, gso)) {
+ 				kfree_skb(skb);
+-				netbk_tx_err(netif, &txreq, i);
++				netbk_tx_err(netif, &txreq, idx);
+ 				continue;
+ 			}
+ 		}
+@@ -1199,23 +1204,27 @@ static void net_tx_action(unsigned long unused)
+ 
+ 		mop = netbk_get_requests(netif, skb, txfrags, mop);
+ 
+-		netif->tx.req_cons = i;
++		netif->tx.req_cons = idx;
+ 		netif_schedule_work(netif);
+ 
+ 		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
+ 			break;
+ 	}
+ 
+-	if (mop == tx_map_ops)
+-		return;
++	return mop - tx_map_ops;
++}
+ 
+-	ret = HYPERVISOR_grant_table_op(
+-		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
+-	BUG_ON(ret);
++static void net_tx_submit(void)
++{
++	struct gnttab_map_grant_ref *mop;
++	struct sk_buff *skb;
+ 
+ 	mop = tx_map_ops;
+ 	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+ 		struct xen_netif_tx_request *txp;
++		struct xen_netif *netif;
++		u16 pending_idx;
++		unsigned data_len;
+ 
+ 		pending_idx = *((u16 *)skb->data);
+ 		netif       = pending_tx_info[pending_idx].netif;
+@@ -1288,6 +1297,27 @@ static void net_tx_action(unsigned long unused)
+ 	}
+ }
+ 
++/* Called after netfront has transmitted */
++static void net_tx_action(unsigned long unused)
++{
++	unsigned nr_mops;
++	int ret;
++
++	if (dealloc_cons != dealloc_prod)
++		net_tx_action_dealloc();
++
++	nr_mops = net_tx_build_mops();
++
++	if (nr_mops == 0)
++		return;
++
++	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
++					tx_map_ops, nr_mops);
++	BUG_ON(ret);
++
++	net_tx_submit();
++}
++
+ static void netif_idx_release(u16 pending_idx)
+ {
+ 	static DEFINE_SPINLOCK(_lock);
+-- 
+1.7.3.4
+
+
+From cec84ff81d9f6ca882908572b984215529b4117b Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Fri, 20 Mar 2009 23:18:12 -0700
+Subject: [PATCH 068/139] xen: netback: document PKT_PROT_LEN
+
+Document the rationale for the existence and value of PKT_PROT_LEN.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |    9 +++++++++
+ 1 files changed, 9 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 4d63ff3..80b424f 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -113,6 +113,15 @@ static inline int netif_page_index(struct page *pg)
+ 	return idx;
+ }
+ 
++/*
++ * This is the amount of packet we copy rather than map, so that the
++ * guest can't fiddle with the contents of the headers while we do
++ * packet processing on them (netfilter, routing, etc).  This could
++ * probably do with being larger, since 1) 64-bytes isn't necessarily
++ * long enough to cover a full christmas-tree ip+tcp header, let alone
++ * packet contents, and 2) the data is probably in cache anyway
++ * (though perhaps some other cpu's cache).
++ */
+ #define PKT_PROT_LEN 64
+ 
+ static struct pending_tx_info {
+-- 
+1.7.3.4
+
+
+From a9402ee935757e8facebc6e886f9912c2c523da7 Mon Sep 17 00:00:00 2001
+From: Christophe Saout <chtephan at leto.intern.saout.de>
+Date: Sun, 12 Apr 2009 13:40:27 +0200
+Subject: [PATCH 069/139] xen: netback: use dev_name() instead of removed ->bus_id.
+
+Signed-off-by: Christophe Saout <chtephan at leto.intern.saout.de>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/interface.c |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index dc4fb53..3bb5c20 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -123,7 +123,7 @@ static void netbk_get_drvinfo(struct net_device *dev,
+ 			      struct ethtool_drvinfo *info)
+ {
+ 	strcpy(info->driver, "netbk");
+-	strcpy(info->bus_info, dev->dev.parent->bus_id);
++	strcpy(info->bus_info, dev_name(dev->dev.parent));
+ }
+ 
+ static const struct netif_stat {
+-- 
+1.7.3.4
+
+
+From 35de1701fca19d693e9722bffbe7609caf1d5ac6 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 29 Jun 2009 14:04:23 -0700
+Subject: [PATCH 070/139] xen: netback: convert to net_device_ops
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/interface.c |   17 +++++++++++------
+ 1 files changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 3bb5c20..21c1f95 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -178,6 +178,15 @@ static struct ethtool_ops network_ethtool_ops =
+ 	.get_strings = netbk_get_strings,
+ };
+ 
++static struct net_device_ops netback_ops =
++{
++	.ndo_start_xmit	= netif_be_start_xmit,
++	.ndo_get_stats	= netif_be_get_stats,
++	.ndo_open	= net_open,
++	.ndo_stop	= net_close,
++	.ndo_change_mtu	= netbk_change_mtu,
++};
++
+ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
+ {
+ 	int err = 0;
+@@ -213,12 +222,8 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ 
+ 	init_timer(&netif->tx_queue_timeout);
+ 
+-	dev->hard_start_xmit = netif_be_start_xmit;
+-	dev->get_stats       = netif_be_get_stats;
+-	dev->open            = net_open;
+-	dev->stop            = net_close;
+-	dev->change_mtu	     = netbk_change_mtu;
+-	dev->features        = NETIF_F_IP_CSUM;
++	dev->netdev_ops	= &netback_ops;
++	dev->features	= NETIF_F_IP_CSUM;
+ 
+ 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+ 
+-- 
+1.7.3.4
+
+
+From c6f3885ef05e96489025e1c1c7299aac7cf43d87 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Fri, 4 Sep 2009 14:55:43 -0700
+Subject: [PATCH 071/139] xen: netback: reinstate missing code
+
+Change c3219dc868fe3e84070d6da2d0759a834b6f7251, "Completely drop flip
+support" was a bit too aggressive in removing code, and removed a chunk
+which was used for not only flip but if a buffer crossed a page boundary.
+Reinstate that code.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |   91 +++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 91 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 80b424f..7c0f05b 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -185,6 +185,82 @@ static inline void maybe_schedule_tx_action(void)
+ 		tasklet_schedule(&net_tx_tasklet);
+ }
+ 
++static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
++{
++	struct skb_shared_info *ninfo;
++	struct sk_buff *nskb;
++	unsigned long offset;
++	int ret;
++	int len;
++	int headlen;
++
++	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
++
++	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
++	if (unlikely(!nskb))
++		goto err;
++
++	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
++	headlen = skb_end_pointer(nskb) - nskb->data;
++	if (headlen > skb_headlen(skb))
++		headlen = skb_headlen(skb);
++	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
++	BUG_ON(ret);
++
++	ninfo = skb_shinfo(nskb);
++	ninfo->gso_size = skb_shinfo(skb)->gso_size;
++	ninfo->gso_type = skb_shinfo(skb)->gso_type;
++
++	offset = headlen;
++	len = skb->len - headlen;
++
++	nskb->len = skb->len;
++	nskb->data_len = len;
++	nskb->truesize += len;
++
++	while (len) {
++		struct page *page;
++		int copy;
++		int zero;
++
++		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
++			dump_stack();
++			goto err_free;
++		}
++
++		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
++		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
++
++		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
++		if (unlikely(!page))
++			goto err_free;
++
++		ret = skb_copy_bits(skb, offset, page_address(page), copy);
++		BUG_ON(ret);
++
++		ninfo->frags[ninfo->nr_frags].page = page;
++		ninfo->frags[ninfo->nr_frags].page_offset = 0;
++		ninfo->frags[ninfo->nr_frags].size = copy;
++		ninfo->nr_frags++;
++
++		offset += copy;
++		len -= copy;
++	}
++
++	offset = nskb->data - skb->data;
++
++	nskb->transport_header = skb->transport_header + offset;
++	nskb->network_header = skb->network_header + offset;
++	nskb->mac_header = skb->mac_header + offset;
++
++	return nskb;
++
++ err_free:
++	kfree_skb(nskb);
++ err:
++	return NULL;
++}
++
+ static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+ {
+ 	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
+@@ -218,6 +294,21 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
+ 		goto drop;
+ 
++	/*
++	 * XXX For now we also copy skbuffs whose head crosses a page
++	 * boundary, because netbk_gop_skb can't handle them.
++	 */
++	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
++		struct sk_buff *nskb = netbk_copy_skb(skb);
++		if ( unlikely(nskb == NULL) )
++			goto drop;
++		/* Copy only the header fields we use in this driver. */
++		nskb->dev = skb->dev;
++		nskb->ip_summed = skb->ip_summed;
++		dev_kfree_skb(skb);
++		skb = nskb;
++	}
++
+ 	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
+ 				   !!skb_shinfo(skb)->gso_size;
+ 	netif_get(netif);
+-- 
+1.7.3.4
+
+
+From 2e290d790877df4368691180f76206ad27a42505 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Wed, 9 Sep 2009 15:19:15 -0700
+Subject: [PATCH 072/139] xen: netback: remove debug noise
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |    2 --
+ 1 files changed, 0 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 7c0f05b..d7d738e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1537,8 +1537,6 @@ static int __init netback_init(void)
+ 	int i;
+ 	struct page *page;
+ 
+-	printk(KERN_CRIT "*** netif_init\n");
+-
+ 	if (!xen_domain())
+ 		return -ENODEV;
+ 
+-- 
+1.7.3.4
+
+
+From 3ba3bb7d563704c3050de6116aa0a761a5791428 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 8 Oct 2009 13:23:09 -0400
+Subject: [PATCH 073/139] Fix compile warnings: ignoring return value of 'xenbus_register_backend' ..
+
+We neglect to check the return value of xenbus_register_backend
+and take actions when that fails. This patch fixes that and adds
+code to deal with those type of failures.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h  |    2 +-
+ drivers/xen/netback/netback.c |   12 +++++++++++-
+ drivers/xen/netback/xenbus.c  |    4 ++--
+ 3 files changed, 14 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 9056be0..0675946 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -194,7 +194,7 @@ static inline void  netif_put(struct xen_netif *netif)
+ 		wake_up(&netif->waiting_to_free);
+ }
+ 
+-void netif_xenbus_init(void);
++int netif_xenbus_init(void);
+ 
+ #define netif_schedulable(netif)				\
+ 	(netif_running((netif)->dev) && netback_carrier_ok(netif))
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index d7d738e..860c61e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1536,6 +1536,7 @@ static int __init netback_init(void)
+ {
+ 	int i;
+ 	struct page *page;
++	int rc = 0;
+ 
+ 	if (!xen_domain())
+ 		return -ENODEV;
+@@ -1583,7 +1584,9 @@ static int __init netback_init(void)
+ 
+ 	//netif_accel_init();
+ 
+-	netif_xenbus_init();
++	rc = netif_xenbus_init();
++	if (rc)
++		goto failed_init;
+ 
+ #ifdef NETBE_DEBUG_INTERRUPT
+ 	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
+@@ -1595,6 +1598,13 @@ static int __init netback_init(void)
+ #endif
+ 
+ 	return 0;
++
++failed_init:
++	free_empty_pages_and_pagevec(mmap_pages, MAX_PENDING_REQS);
++	del_timer(&netbk_tx_pending_timer);
++	del_timer(&net_timer);
++	return rc;
++
+ }
+ 
+ module_init(netback_init);
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index a492288..c46b235 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -447,8 +447,8 @@ static struct xenbus_driver netback = {
+ };
+ 
+ 
+-void netif_xenbus_init(void)
++int netif_xenbus_init(void)
+ {
+ 	printk(KERN_CRIT "registering netback\n");
+-	(void)xenbus_register_backend(&netback);
++	return xenbus_register_backend(&netback);
+ }
+-- 
+1.7.3.4
+
+
+From 4bc919e07d5dc48cb95b22cc6e90c6110c229343 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Tue, 27 Oct 2009 12:37:50 -0700
+Subject: [PATCH 074/139] xen: netback: don't screw around with packet gso state
+
+These lines were reverted from 2.6.18 netback as the network stack
+was modified to deal with packets shorter than the gso size, so there's
+no need to fiddle with the gso state in netback.
+
+Taken from linux-2.6.18-xen.hg change 8081d19dce89
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |    5 -----
+ 1 files changed, 0 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 860c61e..9a14976 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1293,11 +1293,6 @@ static unsigned net_tx_build_mops(void)
+ 			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
+ 		}
+ 
+-		if (skb->data_len < skb_shinfo(skb)->gso_size) {
+-			skb_shinfo(skb)->gso_size = 0;
+-			skb_shinfo(skb)->gso_type = 0;
+-		}
+-
+ 		__skb_queue_tail(&tx_queue, skb);
+ 
+ 		pending_cons++;
+-- 
+1.7.3.4
+
+
+From f2b947783c47a721497e5d325c736234f71501e7 Mon Sep 17 00:00:00 2001
+From: Steven Smith <ssmith at weybridge.uk.xensource.com>
+Date: Fri, 30 Oct 2009 13:55:23 -0700
+Subject: [PATCH 075/139] xen: netback: make sure that pg->mapping is never NULL for a page mapped from a foreign domain.
+
+Otherwise, the foreign maps tracking infrastructure gets confused, and
+thinks that the foreign page is local.  This means that you can't
+forward that packet to another foreign domain.  This leads to very
+high packet drop, and hence very poor performance.
+
+Signed-off-by: Steven Smith <steven.smith at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |    4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 9a14976..111fec7 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -97,12 +97,12 @@ static inline unsigned long idx_to_kaddr(unsigned int idx)
+ /* extra field used in struct page */
+ static inline void netif_set_page_index(struct page *pg, unsigned int index)
+ {
+-	*(unsigned long *)&pg->mapping = index;
++	*(unsigned long *)&pg->mapping = index + 1;
+ }
+ 
+ static inline int netif_page_index(struct page *pg)
+ {
+-	unsigned long idx = (unsigned long)pg->mapping;
++	unsigned long idx = (unsigned long)pg->mapping - 1;
+ 
+ 	if (!PageForeign(pg))
+ 		return -1;
+-- 
+1.7.3.4
+
+
+From df8b27ea0fb2695842104e06caaecb55780577a7 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ijc at hellion.org.uk>
+Date: Thu, 3 Dec 2009 21:56:19 +0000
+Subject: [PATCH 076/139] xen: rename netbk module xen-netback.
+
+netbk is rather generic for a modular distro style kernel.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/Makefile |    4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
+index a01a1a3..e346e81 100644
+--- a/drivers/xen/netback/Makefile
++++ b/drivers/xen/netback/Makefile
+@@ -1,3 +1,3 @@
+-obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
++obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
+ 
+-netbk-y := netback.o xenbus.o interface.o
++xen-netback-y := netback.o xenbus.o interface.o
+-- 
+1.7.3.4
+
+
+From 279097395ad64ae4df15e206a487cd5fd3be39a8 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Tue, 16 Feb 2010 14:40:37 -0800
+Subject: [PATCH 077/139] xen: netback: use dev_get/set_drvdata() inteface
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/xenbus.c |   10 +++++-----
+ 1 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index c46b235..79e6fb0 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -35,7 +35,7 @@ static void backend_create_netif(struct backend_info *be);
+ 
+ static int netback_remove(struct xenbus_device *dev)
+ {
+-	struct backend_info *be = dev->dev.driver_data;
++  struct backend_info *be = dev_get_drvdata(&dev->dev);
+ 
+ 	//netback_remove_accelerators(be, dev);
+ 
+@@ -45,7 +45,7 @@ static int netback_remove(struct xenbus_device *dev)
+ 		be->netif = NULL;
+ 	}
+ 	kfree(be);
+-	dev->dev.driver_data = NULL;
++	dev_set_drvdata(&dev->dev, NULL);
+ 	return 0;
+ }
+ 
+@@ -70,7 +70,7 @@ static int netback_probe(struct xenbus_device *dev,
+ 	}
+ 
+ 	be->dev = dev;
+-	dev->dev.driver_data = be;
++	dev_set_drvdata(&dev->dev, be);
+ 
+ 	sg = 1;
+ 	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
+@@ -151,7 +151,7 @@ fail:
+  */
+ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
+ {
+-	struct backend_info *be = xdev->dev.driver_data;
++	struct backend_info *be = dev_get_drvdata(&xdev->dev);
+ 	struct xen_netif *netif = be->netif;
+ 	char *val;
+ 
+@@ -211,7 +211,7 @@ static void backend_create_netif(struct backend_info *be)
+ static void frontend_changed(struct xenbus_device *dev,
+ 			     enum xenbus_state frontend_state)
+ {
+-	struct backend_info *be = dev->dev.driver_data;
++	struct backend_info *be = dev_get_drvdata(&dev->dev);
+ 
+ 	DPRINTK("%s", xenbus_strstate(frontend_state));
+ 
+-- 
+1.7.3.4
+
+
+From 31d0b5f5763faf607e32f3b5a0f6b37a34bbbf09 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Tue, 16 Feb 2010 14:41:12 -0800
+Subject: [PATCH 078/139] xen: netback: include linux/sched.h for TASK_* definitions
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h |    2 ++
+ 1 files changed, 2 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 0675946..d8653d3 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -38,6 +38,8 @@
+ #include <linux/netdevice.h>
+ #include <linux/etherdevice.h>
+ #include <linux/wait.h>
++#include <linux/sched.h>
++
+ #include <xen/interface/io/netif.h>
+ #include <asm/io.h>
+ #include <asm/pgalloc.h>
+-- 
+1.7.3.4
+
+
+From cdefc88924b3cdfcac64be737a00a4ec5593cfd5 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 23 Feb 2010 11:52:27 +0000
+Subject: [PATCH 079/139] xen: netback: remove unused xen_network_done code
+
+It has been disabled effectively forever.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c |   23 -----------------------
+ 1 files changed, 0 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 111fec7..4b24893 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -343,25 +343,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 	return 0;
+ }
+ 
+-#if 0
+-static void xen_network_done_notify(void)
+-{
+-	static struct net_device *eth0_dev = NULL;
+-	if (unlikely(eth0_dev == NULL))
+-		eth0_dev = __dev_get_by_name("eth0");
+-	netif_rx_schedule(eth0_dev);
+-}
+-/*
+- * Add following to poll() function in NAPI driver (Tigon3 is example):
+- *  if ( xen_network_done() )
+- *      tg3_enable_ints(tp);
+- */
+-int xen_network_done(void)
+-{
+-	return skb_queue_empty(&rx_queue);
+-}
+-#endif
+-
+ struct netrx_pending_operations {
+ 	unsigned trans_prod, trans_cons;
+ 	unsigned mmu_prod, mmu_mcl;
+@@ -664,10 +645,6 @@ static void net_rx_action(unsigned long unused)
+ 	/* More work to do? */
+ 	if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
+ 		tasklet_schedule(&net_rx_tasklet);
+-#if 0
+-	else
+-		xen_network_done_notify();
+-#endif
+ }
+ 
+ static void net_alarm(unsigned long unused)
+-- 
+1.7.3.4
+
+
+From 994be068dd9947cedcee69a7185e54738cda33d4 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 23 Feb 2010 11:58:26 +0000
+Subject: [PATCH 080/139] xen: netback: factor disconnect from backend into new function.
+
+Makes subsequent patches cleaner.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/xenbus.c |   16 ++++++++++++----
+ 1 files changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index 79e6fb0..1f36b4d4 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -205,6 +205,16 @@ static void backend_create_netif(struct backend_info *be)
+ }
+ 
+ 
++static void disconnect_backend(struct xenbus_device *dev)
++{
++	struct backend_info *be = dev_get_drvdata(&dev->dev);
++
++	if (be->netif) {
++		netif_disconnect(be->netif);
++		be->netif = NULL;
++	}
++}
++
+ /**
+  * Callback received when the frontend's state changes.
+  */
+@@ -238,11 +248,9 @@ static void frontend_changed(struct xenbus_device *dev,
+ 		break;
+ 
+ 	case XenbusStateClosing:
+-		if (be->netif) {
++		if (be->netif)
+ 			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+-			netif_disconnect(be->netif);
+-			be->netif = NULL;
+-		}
++		disconnect_backend(dev);
+ 		xenbus_switch_state(dev, XenbusStateClosing);
+ 		break;
+ 
+-- 
+1.7.3.4
+
+
+From 9dcb4c18e5b29d8862cd7783d5b0040913010563 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 23 Feb 2010 12:10:24 +0000
+Subject: [PATCH 081/139] xen: netback: wait for hotplug scripts to complete before signalling connected to frontend
+
+Avoid the situation where the frontend is sending packets but the
+domain 0 bridging (or whatever) is not yet configured (because the
+hotplug scripts are too slow) and so packets get dropped.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Steven.Smith at citrix.com
+---
+ drivers/xen/netback/common.h |    2 +
+ drivers/xen/netback/xenbus.c |   45 +++++++++++++++++++++++++++++++++++++++++-
+ 2 files changed, 46 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index d8653d3..1983768 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -145,6 +145,8 @@ struct backend_info {
+ 	struct xenbus_device *dev;
+ 	struct xen_netif *netif;
+ 	enum xenbus_state frontend_state;
++	struct xenbus_watch hotplug_status_watch;
++	int have_hotplug_status_watch:1;
+ 
+ 	/* State relating to the netback accelerator */
+ 	void *netback_accel_priv;
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index 1f36b4d4..d2407cc 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -32,6 +32,7 @@
+ static int connect_rings(struct backend_info *);
+ static void connect(struct backend_info *);
+ static void backend_create_netif(struct backend_info *be);
++static void unregister_hotplug_status_watch(struct backend_info *be);
+ 
+ static int netback_remove(struct xenbus_device *dev)
+ {
+@@ -39,8 +40,10 @@ static int netback_remove(struct xenbus_device *dev)
+ 
+ 	//netback_remove_accelerators(be, dev);
+ 
++	unregister_hotplug_status_watch(be);
+ 	if (be->netif) {
+ 		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
+ 		netif_disconnect(be->netif);
+ 		be->netif = NULL;
+ 	}
+@@ -210,6 +213,7 @@ static void disconnect_backend(struct xenbus_device *dev)
+ 	struct backend_info *be = dev_get_drvdata(&dev->dev);
+ 
+ 	if (be->netif) {
++		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
+ 		netif_disconnect(be->netif);
+ 		be->netif = NULL;
+ 	}
+@@ -329,6 +333,36 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+ 	return 0;
+ }
+ 
++static void unregister_hotplug_status_watch(struct backend_info *be)
++{
++	if (be->have_hotplug_status_watch) {
++		unregister_xenbus_watch(&be->hotplug_status_watch);
++		kfree(be->hotplug_status_watch.node);
++	}
++	be->have_hotplug_status_watch = 0;
++}
++
++static void hotplug_status_changed(struct xenbus_watch *watch,
++				   const char **vec,
++				   unsigned int vec_size)
++{
++	struct backend_info *be = container_of(watch,
++					       struct backend_info,
++					       hotplug_status_watch);
++	char *str;
++	unsigned int len;
++
++	str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
++	if (IS_ERR(str))
++		return;
++	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
++		xenbus_switch_state(be->dev, XenbusStateConnected);
++		/* Not interested in this watch anymore. */
++		unregister_hotplug_status_watch(be);
++	}
++	kfree(str);
++}
++
+ static void connect(struct backend_info *be)
+ {
+ 	int err;
+@@ -348,7 +382,16 @@ static void connect(struct backend_info *be)
+ 			  &be->netif->credit_usec);
+ 	be->netif->remaining_credit = be->netif->credit_bytes;
+ 
+-	xenbus_switch_state(dev, XenbusStateConnected);
++	unregister_hotplug_status_watch(be);
++	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
++				   hotplug_status_changed,
++				   "%s/%s", dev->nodename, "hotplug-status");
++	if (err) {
++		/* Switch now, since we can't do a watch. */
++		xenbus_switch_state(dev, XenbusStateConnected);
++	} else {
++		be->have_hotplug_status_watch = 1;
++	}
+ 
+ 	netif_wake_queue(be->netif->dev);
+ }
+-- 
+1.7.3.4
+
+
+From 509cc7f20f866277a8f5d5895bb266b5b68aac6d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 23 Feb 2010 12:11:51 +0000
+Subject: [PATCH 082/139] xen: netback: Always pull through PKT_PROT_LEN bytes into the linear part of an skb.
+
+Previously PKT_PROT_LEN would only have an effect on the first fragment.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c |   10 ++++++++++
+ 1 files changed, 10 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 4b24893..d4a7a56 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1334,6 +1334,16 @@ static void net_tx_submit(void)
+ 
+ 		netbk_fill_frags(skb);
+ 
++		/*
++		 * If the initial fragment was < PKT_PROT_LEN then
++		 * pull through some bytes from the other fragments to
++		 * increase the linear region to PKT_PROT_LEN bytes.
++		 */
++		if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
++			int target = min_t(int, skb->len, PKT_PROT_LEN);
++			__pskb_pull_tail(skb, target - skb_headlen(skb));
++		}
++
+ 		skb->dev      = netif->dev;
+ 		skb->protocol = eth_type_trans(skb, skb->dev);
+ 
+-- 
+1.7.3.4
+
+
+From 673c82b5110cfffafe1e1978bc07d6d10d111d50 Mon Sep 17 00:00:00 2001
+From: Steven Smith <ssmith at xensource.com>
+Date: Tue, 23 Feb 2010 11:49:26 +0000
+Subject: [PATCH 083/139] xen: netback: try to pull a minimum of 72 bytes into the skb data area
+ when receiving a packet into netback.
+
+The previous number, 64, tended to place a fragment boundary in the middle of
+the TCP header options and led to unnecessary fragmentation in Windows <->
+Windows networking.
+
+Signed-off-by: Steven Smith <ssmith at xensource.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c |    9 +++------
+ 1 files changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index d4a7a56..44357d7 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -116,13 +116,10 @@ static inline int netif_page_index(struct page *pg)
+ /*
+  * This is the amount of packet we copy rather than map, so that the
+  * guest can't fiddle with the contents of the headers while we do
+- * packet processing on them (netfilter, routing, etc).  This could
+- * probably do with being larger, since 1) 64-bytes isn't necessarily
+- * long enough to cover a full christmas-tree ip+tcp header, let alone
+- * packet contents, and 2) the data is probably in cache anyway
+- * (though perhaps some other cpu's cache).
++ * packet processing on them (netfilter, routing, etc). 72 is enough
++ * to cover TCP+IP headers including options.
+  */
+-#define PKT_PROT_LEN 64
++#define PKT_PROT_LEN 72
+ 
+ static struct pending_tx_info {
+ 	struct xen_netif_tx_request req;
+-- 
+1.7.3.4
+
+
+From c83bd213efd3ebf700189249c30d987b1cb14d7e Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 23 Feb 2010 11:54:30 +0000
+Subject: [PATCH 084/139] xen: netback: Allow setting of large MTU before rings have connected.
+
+This allows large MTU to be configured by the VIF hotplug
+script. Previously this would fail because at the point the hotplug
+script runs the VIF features have most likely not been negotiated with
+the frontend and so SG has not yet been enabled. Invert this behaviour
+so that SG is assumed present until negotiations prove otherwise and
+reduce MTU at that point.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/interface.c |    6 +++++-
+ drivers/xen/netback/xenbus.c    |    8 +++++---
+ 2 files changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 21c1f95..b23b14d 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -104,6 +104,9 @@ static int netbk_set_sg(struct net_device *dev, u32 data)
+ 			return -ENOSYS;
+ 	}
+ 
++	if (dev->mtu > ETH_DATA_LEN)
++		dev->mtu = ETH_DATA_LEN;
++
+ 	return ethtool_op_set_sg(dev, data);
+ }
+ 
+@@ -207,6 +210,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ 	memset(netif, 0, sizeof(*netif));
+ 	netif->domid  = domid;
+ 	netif->handle = handle;
++	netif->features = NETIF_F_SG;
+ 	atomic_set(&netif->refcnt, 1);
+ 	init_waitqueue_head(&netif->waiting_to_free);
+ 	netif->dev = dev;
+@@ -223,7 +227,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ 	init_timer(&netif->tx_queue_timeout);
+ 
+ 	dev->netdev_ops	= &netback_ops;
+-	dev->features	= NETIF_F_IP_CSUM;
++	dev->features   = NETIF_F_IP_CSUM|NETIF_F_SG;
+ 
+ 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+ 
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index d2407cc..fcd3c34 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -445,9 +445,11 @@ static int connect_rings(struct backend_info *be)
+ 
+ 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
+ 		val = 0;
+-	if (val) {
+-		be->netif->features |= NETIF_F_SG;
+-		be->netif->dev->features |= NETIF_F_SG;
++	if (!val) {
++		be->netif->features &= ~NETIF_F_SG;
++		be->netif->dev->features &= ~NETIF_F_SG;
++		if (be->netif->dev->mtu > ETH_DATA_LEN)
++			be->netif->dev->mtu = ETH_DATA_LEN;
+ 	}
+ 
+ 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
+-- 
+1.7.3.4
+
+
+From e5cd35b00cb63f3a3fa1651260a58d59bbc134b7 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Fri, 19 Mar 2010 13:09:16 -0700
+Subject: [PATCH 085/139] xen: netback: use get_sset_count rather than obsolete get_stats_count
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/interface.c |   11 ++++++++---
+ 1 files changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index b23b14d..086d939 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -136,9 +136,14 @@ static const struct netif_stat {
+ 	{ "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
+ };
+ 
+-static int netbk_get_stats_count(struct net_device *dev)
++static int netbk_get_sset_count(struct net_device *dev, int string_set)
+ {
+-	return ARRAY_SIZE(netbk_stats);
++	switch (string_set) {
++	case ETH_SS_STATS:
++		return ARRAY_SIZE(netbk_stats);
++	default:
++		return -EINVAL;
++	}
+ }
+ 
+ static void netbk_get_ethtool_stats(struct net_device *dev,
+@@ -176,7 +181,7 @@ static struct ethtool_ops network_ethtool_ops =
+ 	.set_tso = netbk_set_tso,
+ 	.get_link = ethtool_op_get_link,
+ 
+-	.get_stats_count = netbk_get_stats_count,
++	.get_sset_count = netbk_get_sset_count,
+ 	.get_ethtool_stats = netbk_get_ethtool_stats,
+ 	.get_strings = netbk_get_strings,
+ };
+-- 
+1.7.3.4
+
+
+From 0c34835ee66ad641f01a8077a973b7ec1bfdcd86 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 11 May 2010 09:33:42 +0100
+Subject: [PATCH 086/139] xen: netback: correctly setup skb->ip_summed on receive
+
+In 2.6.18 CHECKSUM_PARTIAL and CHECKSUM_UNNECESSARY were both synonyms for
+CHECKSUM_HW. This is no longer the case and we need to select the correct one.
+
+  data_validated csum_blank -> ip_summed
+  0              0             CHECKSUM_NONE
+  0              1             CHECKSUM_PARTIAL
+  1              0             CHECKSUM_UNNECESSARY
+  1              1             CHECKSUM_PARTIAL
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Tested-by: Matej Zary <zary at cvtisr.sk>
+Tested-by: Michael D Labriola <mlabriol at gdeb.com>
+---
+ drivers/xen/netback/netback.c |   10 +++-------
+ 1 files changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 44357d7..725da0f 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1320,14 +1320,10 @@ static void net_tx_submit(void)
+ 			netif_idx_release(pending_idx);
+ 		}
+ 
+-		/*
+-		 * Old frontends do not assert data_validated but we
+-		 * can infer it from csum_blank so test both flags.
+-		 */
+-		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank))
++		if (txp->flags & NETTXF_csum_blank)
+ 			skb->ip_summed = CHECKSUM_PARTIAL;
+-		else
+-			skb->ip_summed = CHECKSUM_NONE;
++		else if (txp->flags & NETTXF_data_validated)
++			skb->ip_summed = CHECKSUM_UNNECESSARY;
+ 
+ 		netbk_fill_frags(skb);
+ 
+-- 
+1.7.3.4
+
+
+From 094944631cc5a9d6e623302c987f78117c0bf7ac Mon Sep 17 00:00:00 2001
+From: Dongxiao Xu <dongxiao.xu at intel.com>
+Date: Wed, 19 May 2010 16:58:56 -0700
+Subject: [PATCH 087/139] xen: netback: Move global/static variables into struct xen_netbk.
+
+Bundle a lot of discrete variables into a single structure.
+
+Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h  |   59 +++++++
+ drivers/xen/netback/netback.c |  360 ++++++++++++++++++++---------------------
+ 2 files changed, 232 insertions(+), 187 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 1983768..00208f4 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -222,4 +222,63 @@ static inline int netbk_can_sg(struct net_device *dev)
+ 	return netif->features & NETIF_F_SG;
+ }
+ 
++struct pending_tx_info {
++	struct xen_netif_tx_request req;
++	struct xen_netif *netif;
++};
++typedef unsigned int pending_ring_idx_t;
++
++struct netbk_rx_meta {
++	skb_frag_t frag;
++	int id;
++};
++
++struct netbk_tx_pending_inuse {
++	struct list_head list;
++	unsigned long alloc_time;
++};
++
++#define MAX_PENDING_REQS 256
++
++struct xen_netbk {
++	struct tasklet_struct net_tx_tasklet;
++	struct tasklet_struct net_rx_tasklet;
++
++	struct sk_buff_head rx_queue;
++	struct sk_buff_head tx_queue;
++
++	struct timer_list net_timer;
++	struct timer_list netbk_tx_pending_timer;
++
++	struct page **mmap_pages;
++
++	pending_ring_idx_t pending_prod;
++	pending_ring_idx_t pending_cons;
++	pending_ring_idx_t dealloc_prod;
++	pending_ring_idx_t dealloc_cons;
++
++	struct list_head pending_inuse_head;
++	struct list_head net_schedule_list;
++
++	/* Protect the net_schedule_list in netif. */
++	spinlock_t net_schedule_list_lock;
++
++	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
++	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
++	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
++	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
++
++	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
++	u16 pending_ring[MAX_PENDING_REQS];
++	u16 dealloc_ring[MAX_PENDING_REQS];
++
++	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
++	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
++	struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
++	struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
++	unsigned char rx_notify[NR_IRQS];
++	u16 notify_list[NET_RX_RING_SIZE];
++	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++};
++
+ #endif /* __NETIF__BACKEND__COMMON_H__ */
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 725da0f..417f497 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -48,16 +48,7 @@
+ 
+ /*define NETBE_DEBUG_INTERRUPT*/
+ 
+-struct netbk_rx_meta {
+-	skb_frag_t frag;
+-	int id;
+-};
+-
+-struct netbk_tx_pending_inuse {
+-	struct list_head list;
+-	unsigned long alloc_time;
+-};
+-
++static struct xen_netbk *netbk;
+ 
+ static void netif_idx_release(u16 pending_idx);
+ static void make_tx_response(struct xen_netif *netif,
+@@ -71,22 +62,12 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+ 					     u16      flags);
+ 
+ static void net_tx_action(unsigned long unused);
+-static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
+ 
+ static void net_rx_action(unsigned long unused);
+-static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
+-
+-static struct timer_list net_timer;
+-static struct timer_list netbk_tx_pending_timer;
+ 
+-#define MAX_PENDING_REQS 256
+-
+-static struct sk_buff_head rx_queue;
+-
+-static struct page **mmap_pages;
+ static inline unsigned long idx_to_pfn(unsigned int idx)
+ {
+-	return page_to_pfn(mmap_pages[idx]);
++	return page_to_pfn(netbk->mmap_pages[idx]);
+ }
+ 
+ static inline unsigned long idx_to_kaddr(unsigned int idx)
+@@ -107,7 +88,7 @@ static inline int netif_page_index(struct page *pg)
+ 	if (!PageForeign(pg))
+ 		return -1;
+ 
+-	if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg))
++	if ((idx >= MAX_PENDING_REQS) || (netbk->mmap_pages[idx] != pg))
+ 		return -1;
+ 
+ 	return idx;
+@@ -121,46 +102,17 @@ static inline int netif_page_index(struct page *pg)
+  */
+ #define PKT_PROT_LEN 72
+ 
+-static struct pending_tx_info {
+-	struct xen_netif_tx_request req;
+-	struct xen_netif *netif;
+-} pending_tx_info[MAX_PENDING_REQS];
+-static u16 pending_ring[MAX_PENDING_REQS];
+-typedef unsigned int pending_ring_idx_t;
+-
+ static inline pending_ring_idx_t pending_index(unsigned i)
+ {
+ 	return i & (MAX_PENDING_REQS-1);
+ }
+ 
+-static pending_ring_idx_t pending_prod, pending_cons;
+-
+ static inline pending_ring_idx_t nr_pending_reqs(void)
+ {
+-	return MAX_PENDING_REQS - pending_prod + pending_cons;
++	return MAX_PENDING_REQS -
++		netbk->pending_prod + netbk->pending_cons;
+ }
+ 
+-/* Freed TX SKBs get batched on this ring before return to pending_ring. */
+-static u16 dealloc_ring[MAX_PENDING_REQS];
+-static pending_ring_idx_t dealloc_prod, dealloc_cons;
+-
+-/* Doubly-linked list of in-use pending entries. */
+-static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+-static LIST_HEAD(pending_inuse_head);
+-
+-static struct sk_buff_head tx_queue;
+-
+-static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+-static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+-static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+-
+-static LIST_HEAD(net_schedule_list);
+-static DEFINE_SPINLOCK(net_schedule_list_lock);
+-
+-#define MAX_MFN_ALLOC 64
+-static unsigned long mfn_list[MAX_MFN_ALLOC];
+-static unsigned int alloc_index = 0;
+-
+ /* Setting this allows the safe use of this driver without netloop. */
+ static int MODPARM_copy_skb = 1;
+ module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
+@@ -168,18 +120,12 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+ 
+ int netbk_copy_skb_mode;
+ 
+-static inline unsigned long alloc_mfn(void)
+-{
+-	BUG_ON(alloc_index == 0);
+-	return mfn_list[--alloc_index];
+-}
+-
+ static inline void maybe_schedule_tx_action(void)
+ {
+ 	smp_mb();
+ 	if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
+-	    !list_empty(&net_schedule_list))
+-		tasklet_schedule(&net_tx_tasklet);
++	    !list_empty(&netbk->net_schedule_list))
++		tasklet_schedule(&netbk->net_tx_tasklet);
+ }
+ 
+ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+@@ -328,9 +274,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 			mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
+ 		}
+ 	}
+-
+-	skb_queue_tail(&rx_queue, skb);
+-	tasklet_schedule(&net_rx_tasklet);
++	skb_queue_tail(&netbk->rx_queue, skb);
++	tasklet_schedule(&netbk->net_rx_tasklet);
+ 
+ 	return 0;
+ 
+@@ -372,7 +317,7 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
+ 	copy_gop = npo->copy + npo->copy_prod++;
+ 	copy_gop->flags = GNTCOPY_dest_gref;
+ 	if (idx > -1) {
+-		struct pending_tx_info *src_pend = &pending_tx_info[idx];
++		struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
+ 		copy_gop->source.domid = src_pend->netif->domid;
+ 		copy_gop->source.u.ref = src_pend->req.gref;
+ 		copy_gop->flags |= GNTCOPY_source_gref;
+@@ -487,30 +432,19 @@ static void net_rx_action(unsigned long unused)
+ 	int count;
+ 	unsigned long offset;
+ 
+-	/*
+-	 * Putting hundreds of bytes on the stack is considered rude.
+-	 * Static works because a tasklet can only be on one CPU at any time.
+-	 */
+-	static struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
+-	static struct mmu_update rx_mmu[NET_RX_RING_SIZE];
+-	static struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
+-	static struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
+-	static unsigned char rx_notify[NR_IRQS];
+-	static u16 notify_list[NET_RX_RING_SIZE];
+-	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+-
+ 	struct netrx_pending_operations npo = {
+-		mmu: rx_mmu,
+-		trans: grant_trans_op,
+-		copy: grant_copy_op,
+-		mcl: rx_mcl,
+-		meta: meta};
++		.mmu   = netbk->rx_mmu,
++		.trans = netbk->grant_trans_op,
++		.copy  = netbk->grant_copy_op,
++		.mcl   = netbk->rx_mcl,
++		.meta  = netbk->meta,
++	};
+ 
+ 	skb_queue_head_init(&rxq);
+ 
+ 	count = 0;
+ 
+-	while ((skb = skb_dequeue(&rx_queue)) != NULL) {
++	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
+ 		nr_frags = skb_shinfo(skb)->nr_frags;
+ 		*(int *)skb->cb = nr_frags;
+ 
+@@ -525,39 +459,39 @@ static void net_rx_action(unsigned long unused)
+ 			break;
+ 	}
+ 
+-	BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
++	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
+ 
+ 	npo.mmu_mcl = npo.mcl_prod;
+ 	if (npo.mcl_prod) {
+ 		BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
+-		BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
++		BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu));
+ 		mcl = npo.mcl + npo.mcl_prod++;
+ 
+ 		BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
+ 		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+ 
+ 		mcl->op = __HYPERVISOR_mmu_update;
+-		mcl->args[0] = (unsigned long)rx_mmu;
++		mcl->args[0] = (unsigned long)netbk->rx_mmu;
+ 		mcl->args[1] = npo.mmu_prod;
+ 		mcl->args[2] = 0;
+ 		mcl->args[3] = DOMID_SELF;
+ 	}
+ 
+ 	if (npo.trans_prod) {
+-		BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
++		BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op));
+ 		mcl = npo.mcl + npo.mcl_prod++;
+ 		mcl->op = __HYPERVISOR_grant_table_op;
+ 		mcl->args[0] = GNTTABOP_transfer;
+-		mcl->args[1] = (unsigned long)grant_trans_op;
++		mcl->args[1] = (unsigned long)netbk->grant_trans_op;
+ 		mcl->args[2] = npo.trans_prod;
+ 	}
+ 
+ 	if (npo.copy_prod) {
+-		BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
++		BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
+ 		mcl = npo.mcl + npo.mcl_prod++;
+ 		mcl->op = __HYPERVISOR_grant_table_op;
+ 		mcl->args[0] = GNTTABOP_copy;
+-		mcl->args[1] = (unsigned long)grant_copy_op;
++		mcl->args[1] = (unsigned long)netbk->grant_copy_op;
+ 		mcl->args[2] = npo.copy_prod;
+ 	}
+ 
+@@ -565,7 +499,7 @@ static void net_rx_action(unsigned long unused)
+ 	if (!npo.mcl_prod)
+ 		return;
+ 
+-	BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
++	BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl));
+ 
+ 	ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
+ 	BUG_ON(ret != 0);
+@@ -582,7 +516,7 @@ static void net_rx_action(unsigned long unused)
+ 
+ 		status = netbk_check_gop(nr_frags, netif->domid, &npo);
+ 
+-		id = meta[npo.meta_cons].id;
++		id = netbk->meta[npo.meta_cons].id;
+ 		flags = nr_frags ? NETRXF_more_data : 0;
+ 
+ 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+@@ -595,7 +529,7 @@ static void net_rx_action(unsigned long unused)
+ 		resp = make_rx_response(netif, id, status, offset,
+ 					skb_headlen(skb), flags);
+ 
+-		if (meta[npo.meta_cons].frag.size) {
++		if (netbk->meta[npo.meta_cons].frag.size) {
+ 			struct xen_netif_extra_info *gso =
+ 				(struct xen_netif_extra_info *)
+ 				RING_GET_RESPONSE(&netif->rx,
+@@ -603,7 +537,7 @@ static void net_rx_action(unsigned long unused)
+ 
+ 			resp->flags |= NETRXF_extra_info;
+ 
+-			gso->u.gso.size = meta[npo.meta_cons].frag.size;
++			gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size;
+ 			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+ 			gso->u.gso.pad = 0;
+ 			gso->u.gso.features = 0;
+@@ -613,14 +547,14 @@ static void net_rx_action(unsigned long unused)
+ 		}
+ 
+ 		netbk_add_frag_responses(netif, status,
+-					 meta + npo.meta_cons + 1,
+-					 nr_frags);
++				netbk->meta + npo.meta_cons + 1,
++				nr_frags);
+ 
+ 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+ 		irq = netif->irq;
+-		if (ret && !rx_notify[irq]) {
+-			rx_notify[irq] = 1;
+-			notify_list[notify_nr++] = irq;
++		if (ret && !netbk->rx_notify[irq]) {
++			netbk->rx_notify[irq] = 1;
++			netbk->notify_list[notify_nr++] = irq;
+ 		}
+ 
+ 		if (netif_queue_stopped(netif->dev) &&
+@@ -634,24 +568,25 @@ static void net_rx_action(unsigned long unused)
+ 	}
+ 
+ 	while (notify_nr != 0) {
+-		irq = notify_list[--notify_nr];
+-		rx_notify[irq] = 0;
++		irq = netbk->notify_list[--notify_nr];
++		netbk->rx_notify[irq] = 0;
+ 		notify_remote_via_irq(irq);
+ 	}
+ 
+ 	/* More work to do? */
+-	if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
+-		tasklet_schedule(&net_rx_tasklet);
++	if (!skb_queue_empty(&netbk->rx_queue) &&
++			!timer_pending(&netbk->net_timer))
++		tasklet_schedule(&netbk->net_rx_tasklet);
+ }
+ 
+ static void net_alarm(unsigned long unused)
+ {
+-	tasklet_schedule(&net_rx_tasklet);
++	tasklet_schedule(&netbk->net_rx_tasklet);
+ }
+ 
+ static void netbk_tx_pending_timeout(unsigned long unused)
+ {
+-	tasklet_schedule(&net_tx_tasklet);
++	tasklet_schedule(&netbk->net_tx_tasklet);
+ }
+ 
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+@@ -667,12 +602,12 @@ static int __on_net_schedule_list(struct xen_netif *netif)
+ 
+ static void remove_from_net_schedule_list(struct xen_netif *netif)
+ {
+-	spin_lock_irq(&net_schedule_list_lock);
++	spin_lock_irq(&netbk->net_schedule_list_lock);
+ 	if (likely(__on_net_schedule_list(netif))) {
+ 		list_del_init(&netif->list);
+ 		netif_put(netif);
+ 	}
+-	spin_unlock_irq(&net_schedule_list_lock);
++	spin_unlock_irq(&netbk->net_schedule_list_lock);
+ }
+ 
+ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+@@ -680,13 +615,13 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+ 	if (__on_net_schedule_list(netif))
+ 		return;
+ 
+-	spin_lock_irq(&net_schedule_list_lock);
++	spin_lock_irq(&netbk->net_schedule_list_lock);
+ 	if (!__on_net_schedule_list(netif) &&
+ 	    likely(netif_schedulable(netif))) {
+-		list_add_tail(&netif->list, &net_schedule_list);
++		list_add_tail(&netif->list, &netbk->net_schedule_list);
+ 		netif_get(netif);
+ 	}
+-	spin_unlock_irq(&net_schedule_list_lock);
++	spin_unlock_irq(&netbk->net_schedule_list_lock);
+ }
+ 
+ void netif_schedule_work(struct xen_netif *netif)
+@@ -736,8 +671,9 @@ static void tx_credit_callback(unsigned long data)
+ 
+ static inline int copy_pending_req(pending_ring_idx_t pending_idx)
+ {
+-	return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
+-				      &mmap_pages[pending_idx]);
++	return gnttab_copy_grant_page(
++			netbk->grant_tx_handle[pending_idx],
++			&netbk->mmap_pages[pending_idx]);
+ }
+ 
+ inline static void net_tx_action_dealloc(void)
+@@ -750,22 +686,24 @@ inline static void net_tx_action_dealloc(void)
+ 	int ret;
+ 	LIST_HEAD(list);
+ 
+-	dc = dealloc_cons;
+-	gop = tx_unmap_ops;
++	dc = netbk->dealloc_cons;
++	gop = netbk->tx_unmap_ops;
+ 
+ 	/*
+ 	 * Free up any grants we have finished using
+ 	 */
+ 	do {
+-		dp = dealloc_prod;
++		dp = netbk->dealloc_prod;
+ 
+ 		/* Ensure we see all indices enqueued by netif_idx_release(). */
+ 		smp_rmb();
+ 
+ 		while (dc != dp) {
+ 			unsigned long pfn;
++			struct netbk_tx_pending_inuse *pending_inuse =
++					netbk->pending_inuse;
+ 
+-			pending_idx = dealloc_ring[pending_index(dc++)];
++			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
+ 			list_move_tail(&pending_inuse[pending_idx].list, &list);
+ 
+ 			pfn = idx_to_pfn(pending_idx);
+@@ -773,22 +711,27 @@ inline static void net_tx_action_dealloc(void)
+ 			if (!phys_to_machine_mapping_valid(pfn))
+ 				continue;
+ 
+-			gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
+-					    GNTMAP_host_map,
+-					    grant_tx_handle[pending_idx]);
++			gnttab_set_unmap_op(gop,
++					idx_to_kaddr(pending_idx),
++					GNTMAP_host_map,
++					netbk->grant_tx_handle[pending_idx]);
+ 			gop++;
+ 		}
+ 
+ 		if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
+-		    list_empty(&pending_inuse_head))
++		    list_empty(&netbk->pending_inuse_head))
+ 			break;
+ 
+ 		/* Copy any entries that have been pending for too long. */
+-		list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
++		list_for_each_entry_safe(inuse, n,
++				&netbk->pending_inuse_head, list) {
++			struct pending_tx_info *pending_tx_info;
++			pending_tx_info = netbk->pending_tx_info;
++
+ 			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+ 				break;
+ 
+-			pending_idx = inuse - pending_inuse;
++			pending_idx = inuse - netbk->pending_inuse;
+ 
+ 			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
+ 
+@@ -805,16 +748,21 @@ inline static void net_tx_action_dealloc(void)
+ 
+ 			break;
+ 		}
+-	} while (dp != dealloc_prod);
++	} while (dp != netbk->dealloc_prod);
+ 
+-	dealloc_cons = dc;
++	netbk->dealloc_cons = dc;
+ 
+ 	ret = HYPERVISOR_grant_table_op(
+-		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
++		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
++		gop - netbk->tx_unmap_ops);
+ 	BUG_ON(ret);
+ 
+ 	list_for_each_entry_safe(inuse, n, &list, list) {
+-		pending_idx = inuse - pending_inuse;
++		struct pending_tx_info *pending_tx_info;
++		pending_ring_idx_t index;
++
++		pending_tx_info = netbk->pending_tx_info;
++		pending_idx = inuse - netbk->pending_inuse;
+ 
+ 		netif = pending_tx_info[pending_idx].netif;
+ 
+@@ -822,9 +770,10 @@ inline static void net_tx_action_dealloc(void)
+ 				 NETIF_RSP_OKAY);
+ 
+ 		/* Ready for next use. */
+-		gnttab_reset_grant_page(mmap_pages[pending_idx]);
++		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
+ 
+-		pending_ring[pending_index(pending_prod++)] = pending_idx;
++		index = pending_index(netbk->pending_prod++);
++		netbk->pending_ring[index] = pending_idx;
+ 
+ 		netif_put(netif);
+ 
+@@ -832,7 +781,8 @@ inline static void net_tx_action_dealloc(void)
+ 	}
+ }
+ 
+-static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *txp, RING_IDX end)
++static void netbk_tx_err(struct xen_netif *netif,
++		struct xen_netif_tx_request *txp, RING_IDX end)
+ {
+ 	RING_IDX cons = netif->tx.req_cons;
+ 
+@@ -902,7 +852,12 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
+ 	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+ 
+ 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
+-		pending_idx = pending_ring[pending_index(pending_cons++)];
++		pending_ring_idx_t index;
++		struct pending_tx_info *pending_tx_info =
++			netbk->pending_tx_info;
++
++		index = pending_index(netbk->pending_cons++);
++		pending_idx = netbk->pending_ring[index];
+ 
+ 		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
+ 				  GNTMAP_host_map | GNTMAP_readonly,
+@@ -922,6 +877,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ {
+ 	struct gnttab_map_grant_ref *mop = *mopp;
+ 	int pending_idx = *((u16 *)skb->data);
++	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
+ 	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
+ 	struct xen_netif_tx_request *txp;
+ 	struct skb_shared_info *shinfo = skb_shinfo(skb);
+@@ -931,15 +887,17 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ 	/* Check status of header. */
+ 	err = mop->status;
+ 	if (unlikely(err)) {
++		pending_ring_idx_t index;
++		index = pending_index(netbk->pending_prod++);
+ 		txp = &pending_tx_info[pending_idx].req;
+ 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+-		pending_ring[pending_index(pending_prod++)] = pending_idx;
++		netbk->pending_ring[index] = pending_idx;
+ 		netif_put(netif);
+ 	} else {
+ 		set_phys_to_machine(
+ 			__pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
+ 			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+-		grant_tx_handle[pending_idx] = mop->handle;
++		netbk->grant_tx_handle[pending_idx] = mop->handle;
+ 	}
+ 
+ 	/* Skip first skb fragment if it is on same page as header fragment. */
+@@ -947,16 +905,19 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ 
+ 	for (i = start; i < nr_frags; i++) {
+ 		int j, newerr;
++		pending_ring_idx_t index;
+ 
+ 		pending_idx = (unsigned long)shinfo->frags[i].page;
+ 
+ 		/* Check error status: if okay then remember grant handle. */
+ 		newerr = (++mop)->status;
+ 		if (likely(!newerr)) {
++			unsigned long addr;
++			addr = idx_to_kaddr(pending_idx);
+ 			set_phys_to_machine(
+-				__pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
++				__pa(addr)>>PAGE_SHIFT,
+ 				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+-			grant_tx_handle[pending_idx] = mop->handle;
++			netbk->grant_tx_handle[pending_idx] = mop->handle;
+ 			/* Had a previous error? Invalidate this fragment. */
+ 			if (unlikely(err))
+ 				netif_idx_release(pending_idx);
+@@ -964,9 +925,10 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ 		}
+ 
+ 		/* Error on this fragment: respond to client with an error. */
+-		txp = &pending_tx_info[pending_idx].req;
++		txp = &netbk->pending_tx_info[pending_idx].req;
+ 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+-		pending_ring[pending_index(pending_prod++)] = pending_idx;
++		index = pending_index(netbk->pending_prod++);
++		netbk->pending_ring[index] = pending_idx;
+ 		netif_put(netif);
+ 
+ 		/* Not the first error? Preceding frags already invalidated. */
+@@ -1002,11 +964,11 @@ static void netbk_fill_frags(struct sk_buff *skb)
+ 
+ 		pending_idx = (unsigned long)frag->page;
+ 
+-		pending_inuse[pending_idx].alloc_time = jiffies;
+-		list_add_tail(&pending_inuse[pending_idx].list,
+-			      &pending_inuse_head);
++		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
++		list_add_tail(&netbk->pending_inuse[pending_idx].list,
++			      &netbk->pending_inuse_head);
+ 
+-		txp = &pending_tx_info[pending_idx].req;
++		txp = &netbk->pending_tx_info[pending_idx].req;
+ 		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
+ 		frag->size = txp->size;
+ 		frag->page_offset = txp->offset;
+@@ -1145,9 +1107,9 @@ static unsigned net_tx_build_mops(void)
+ 	struct sk_buff *skb;
+ 	int ret;
+ 
+-	mop = tx_map_ops;
++	mop = netbk->tx_map_ops;
+ 	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+-		!list_empty(&net_schedule_list)) {
++		!list_empty(&netbk->net_schedule_list)) {
+ 		struct xen_netif *netif;
+ 		struct xen_netif_tx_request txreq;
+ 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+@@ -1156,9 +1118,11 @@ static unsigned net_tx_build_mops(void)
+ 		RING_IDX idx;
+ 		int work_to_do;
+ 		unsigned int data_len;
++		pending_ring_idx_t index;
+ 	
+ 		/* Get a netif from the list with work to do. */
+-		netif = list_first_entry(&net_schedule_list, struct xen_netif, list);
++		netif = list_first_entry(&netbk->net_schedule_list,
++				struct xen_netif, list);
+ 		netif_get(netif);
+ 		remove_from_net_schedule_list(netif);
+ 
+@@ -1217,7 +1181,8 @@ static unsigned net_tx_build_mops(void)
+ 			continue;
+ 		}
+ 
+-		pending_idx = pending_ring[pending_index(pending_cons)];
++		index = pending_index(netbk->pending_cons);
++		pending_idx = netbk->pending_ring[index];
+ 
+ 		data_len = (txreq.size > PKT_PROT_LEN &&
+ 			    ret < MAX_SKB_FRAGS) ?
+@@ -1250,9 +1215,9 @@ static unsigned net_tx_build_mops(void)
+ 				  txreq.gref, netif->domid);
+ 		mop++;
+ 
+-		memcpy(&pending_tx_info[pending_idx].req,
++		memcpy(&netbk->pending_tx_info[pending_idx].req,
+ 		       &txreq, sizeof(txreq));
+-		pending_tx_info[pending_idx].netif = netif;
++		netbk->pending_tx_info[pending_idx].netif = netif;
+ 		*((u16 *)skb->data) = pending_idx;
+ 
+ 		__skb_put(skb, data_len);
+@@ -1267,20 +1232,20 @@ static unsigned net_tx_build_mops(void)
+ 			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
+ 		}
+ 
+-		__skb_queue_tail(&tx_queue, skb);
++		__skb_queue_tail(&netbk->tx_queue, skb);
+ 
+-		pending_cons++;
++		netbk->pending_cons++;
+ 
+ 		mop = netbk_get_requests(netif, skb, txfrags, mop);
+ 
+ 		netif->tx.req_cons = idx;
+ 		netif_schedule_work(netif);
+ 
+-		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
++		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
+ 			break;
+ 	}
+ 
+-	return mop - tx_map_ops;
++	return mop - netbk->tx_map_ops;
+ }
+ 
+ static void net_tx_submit(void)
+@@ -1288,16 +1253,16 @@ static void net_tx_submit(void)
+ 	struct gnttab_map_grant_ref *mop;
+ 	struct sk_buff *skb;
+ 
+-	mop = tx_map_ops;
+-	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
++	mop = netbk->tx_map_ops;
++	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
+ 		struct xen_netif_tx_request *txp;
+ 		struct xen_netif *netif;
+ 		u16 pending_idx;
+ 		unsigned data_len;
+ 
+ 		pending_idx = *((u16 *)skb->data);
+-		netif       = pending_tx_info[pending_idx].netif;
+-		txp         = &pending_tx_info[pending_idx].req;
++		netif = netbk->pending_tx_info[pending_idx].netif;
++		txp = &netbk->pending_tx_info[pending_idx].req;
+ 
+ 		/* Check the remap error code. */
+ 		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
+@@ -1363,12 +1328,13 @@ static void net_tx_submit(void)
+ 	}
+ 
+ 	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+-	    !list_empty(&pending_inuse_head)) {
++	    !list_empty(&netbk->pending_inuse_head)) {
+ 		struct netbk_tx_pending_inuse *oldest;
+ 
+-		oldest = list_entry(pending_inuse_head.next,
++		oldest = list_entry(netbk->pending_inuse_head.next,
+ 				    struct netbk_tx_pending_inuse, list);
+-		mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
++		mod_timer(&netbk->netbk_tx_pending_timer,
++				oldest->alloc_time + HZ);
+ 	}
+ }
+ 
+@@ -1378,7 +1344,7 @@ static void net_tx_action(unsigned long unused)
+ 	unsigned nr_mops;
+ 	int ret;
+ 
+-	if (dealloc_cons != dealloc_prod)
++	if (netbk->dealloc_cons != netbk->dealloc_prod)
+ 		net_tx_action_dealloc();
+ 
+ 	nr_mops = net_tx_build_mops();
+@@ -1387,7 +1353,7 @@ static void net_tx_action(unsigned long unused)
+ 		return;
+ 
+ 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+-					tx_map_ops, nr_mops);
++					netbk->tx_map_ops, nr_mops);
+ 	BUG_ON(ret);
+ 
+ 	net_tx_submit();
+@@ -1397,15 +1363,17 @@ static void netif_idx_release(u16 pending_idx)
+ {
+ 	static DEFINE_SPINLOCK(_lock);
+ 	unsigned long flags;
++	pending_ring_idx_t index;
+ 
+ 	spin_lock_irqsave(&_lock, flags);
+-	dealloc_ring[pending_index(dealloc_prod)] = pending_idx;
++	index = pending_index(netbk->dealloc_prod);
++	netbk->dealloc_ring[index] = pending_idx;
+ 	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+ 	smp_wmb();
+-	dealloc_prod++;
++	netbk->dealloc_prod++;
+ 	spin_unlock_irqrestore(&_lock, flags);
+ 
+-	tasklet_schedule(&net_tx_tasklet);
++	tasklet_schedule(&netbk->net_tx_tasklet);
+ }
+ 
+ static void netif_page_release(struct page *page, unsigned int order)
+@@ -1481,9 +1449,9 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+ 	int i = 0;
+ 
+ 	printk(KERN_ALERT "netif_schedule_list:\n");
+-	spin_lock_irq(&net_schedule_list_lock);
++	spin_lock_irq(&netbk->net_schedule_list_lock);
+ 
+-	list_for_each (ent, &net_schedule_list) {
++	list_for_each(ent, &netbk->net_schedule_list) {
+ 		netif = list_entry(ent, struct xen_netif, list);
+ 		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+ 		       "rx_resp_prod=%08x\n",
+@@ -1500,7 +1468,7 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+ 		i++;
+ 	}
+ 
+-	spin_unlock_irq(&net_schedule_list_lock);
++	spin_unlock_irq(&netbk->net_schedule_list_lock);
+ 	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+ 
+ 	return IRQ_HANDLED;
+@@ -1516,37 +1484,53 @@ static int __init netback_init(void)
+ 	if (!xen_domain())
+ 		return -ENODEV;
+ 
++	netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk));
++	if (!netbk) {
++		printk(KERN_ALERT "%s: out of memory\n", __func__);
++		return -ENOMEM;
++	}
++
+ 	/* We can increase reservation by this much in net_rx_action(). */
+ //	balloon_update_driver_allowance(NET_RX_RING_SIZE);
+ 
+-	skb_queue_head_init(&rx_queue);
+-	skb_queue_head_init(&tx_queue);
++	skb_queue_head_init(&netbk->rx_queue);
++	skb_queue_head_init(&netbk->tx_queue);
+ 
+-	init_timer(&net_timer);
+-	net_timer.data = 0;
+-	net_timer.function = net_alarm;
++	init_timer(&netbk->net_timer);
++	netbk->net_timer.data = 0;
++	netbk->net_timer.function = net_alarm;
+ 
+-	init_timer(&netbk_tx_pending_timer);
+-	netbk_tx_pending_timer.data = 0;
+-	netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
++	init_timer(&netbk->netbk_tx_pending_timer);
++	netbk->netbk_tx_pending_timer.data = 0;
++	netbk->netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
+ 
+-	mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+-	if (mmap_pages == NULL) {
+-		printk("%s: out of memory\n", __FUNCTION__);
+-		return -ENOMEM;
++	netbk->mmap_pages =
++		alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
++	if (!netbk->mmap_pages) {
++		printk(KERN_ALERT "%s: out of memory\n", __func__);
++		rc = -ENOMEM;
++		goto failed_init2;
+ 	}
+ 
+ 	for (i = 0; i < MAX_PENDING_REQS; i++) {
+-		page = mmap_pages[i];
++		page = netbk->mmap_pages[i];
+ 		SetPageForeign(page, netif_page_release);
+ 		netif_set_page_index(page, i);
+-		INIT_LIST_HEAD(&pending_inuse[i].list);
++		INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+ 	}
+ 
+-	pending_cons = 0;
+-	pending_prod = MAX_PENDING_REQS;
++	netbk->pending_cons = 0;
++	netbk->pending_prod = MAX_PENDING_REQS;
+ 	for (i = 0; i < MAX_PENDING_REQS; i++)
+-		pending_ring[i] = i;
++		netbk->pending_ring[i] = i;
++
++	tasklet_init(&netbk->net_tx_tasklet, net_tx_action, 0);
++	tasklet_init(&netbk->net_rx_tasklet, net_rx_action, 0);
++
++	INIT_LIST_HEAD(&netbk->pending_inuse_head);
++	INIT_LIST_HEAD(&netbk->net_schedule_list);
++
++	spin_lock_init(&netbk->net_schedule_list_lock);
+ 
+ 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+ 	if (MODPARM_copy_skb) {
+@@ -1561,7 +1545,7 @@ static int __init netback_init(void)
+ 
+ 	rc = netif_xenbus_init();
+ 	if (rc)
+-		goto failed_init;
++		goto failed_init1;
+ 
+ #ifdef NETBE_DEBUG_INTERRUPT
+ 	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
+@@ -1574,10 +1558,12 @@ static int __init netback_init(void)
+ 
+ 	return 0;
+ 
+-failed_init:
+-	free_empty_pages_and_pagevec(mmap_pages, MAX_PENDING_REQS);
+-	del_timer(&netbk_tx_pending_timer);
+-	del_timer(&net_timer);
++failed_init1:
++	free_empty_pages_and_pagevec(netbk->mmap_pages, MAX_PENDING_REQS);
++failed_init2:
++	del_timer(&netbk->netbk_tx_pending_timer);
++	del_timer(&netbk->net_timer);
++	vfree(netbk);
+ 	return rc;
+ 
+ }
+-- 
+1.7.3.4
+
+
+From c099c22d8b1c12fc7d68998982eb4ccd4918e813 Mon Sep 17 00:00:00 2001
+From: Dongxiao Xu <dongxiao.xu at intel.com>
+Date: Wed, 19 May 2010 16:58:57 -0700
+Subject: [PATCH 088/139] xen: netback: Introduce a new struct type page_ext.
+
+struct page_ext is used to store the group and idx information by
+which a specified page could be identified.
+
+Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h  |   15 +++++++++++++++
+ drivers/xen/netback/netback.c |   28 +++++++++++++++++-----------
+ 2 files changed, 32 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 00208f4..5e0e467 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -240,6 +240,21 @@ struct netbk_tx_pending_inuse {
+ 
+ #define MAX_PENDING_REQS 256
+ 
++/* extra field used in struct page */
++union page_ext {
++	struct {
++#if BITS_PER_LONG < 64
++#define IDX_WIDTH   8
++#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
++		unsigned int group:GROUP_WIDTH;
++		unsigned int idx:IDX_WIDTH;
++#else
++		unsigned int group, idx;
++#endif
++	} e;
++	void *mapping;
++};
++
+ struct xen_netbk {
+ 	struct tasklet_struct net_tx_tasklet;
+ 	struct tasklet_struct net_rx_tasklet;
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 417f497..71ec999 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -76,22 +76,27 @@ static inline unsigned long idx_to_kaddr(unsigned int idx)
+ }
+ 
+ /* extra field used in struct page */
+-static inline void netif_set_page_index(struct page *pg, unsigned int index)
++static inline void netif_set_page_ext(struct page *pg, unsigned int group,
++		unsigned int idx)
+ {
+-	*(unsigned long *)&pg->mapping = index + 1;
++	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
++
++	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
++	pg->mapping = ext.mapping;
+ }
+ 
+-static inline int netif_page_index(struct page *pg)
++static inline unsigned int netif_page_group(const struct page *pg)
+ {
+-	unsigned long idx = (unsigned long)pg->mapping - 1;
++	union page_ext ext = { .mapping = pg->mapping };
+ 
+-	if (!PageForeign(pg))
+-		return -1;
++	return ext.e.group - 1;
++}
+ 
+-	if ((idx >= MAX_PENDING_REQS) || (netbk->mmap_pages[idx] != pg))
+-		return -1;
++static inline unsigned int netif_page_index(const struct page *pg)
++{
++	union page_ext ext = { .mapping = pg->mapping };
+ 
+-	return idx;
++	return ext.e.idx;
+ }
+ 
+ /*
+@@ -1380,7 +1385,8 @@ static void netif_page_release(struct page *page, unsigned int order)
+ {
+ 	int idx = netif_page_index(page);
+ 	BUG_ON(order);
+-	BUG_ON(idx < 0);
++	BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
++	BUG_ON(netbk->mmap_pages[idx] != page);
+ 	netif_idx_release(idx);
+ }
+ 
+@@ -1515,7 +1521,7 @@ static int __init netback_init(void)
+ 	for (i = 0; i < MAX_PENDING_REQS; i++) {
+ 		page = netbk->mmap_pages[i];
+ 		SetPageForeign(page, netif_page_release);
+-		netif_set_page_index(page, i);
++		netif_set_page_ext(page, 0, i);
+ 		INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+ 	}
+ 
+-- 
+1.7.3.4
+
+
+From 9534985c5b9cc3f6238d6cb8bba7d376e82039d3 Mon Sep 17 00:00:00 2001
+From: Dongxiao Xu <dongxiao.xu at intel.com>
+Date: Wed, 19 May 2010 17:08:21 -0700
+Subject: [PATCH 089/139] xen: netback: Multiple tasklets support.
+
+Now netback uses one pair of tasklets for Tx/Rx data transaction.
+Netback tasklet could only run at one CPU at a time, and it is
+used to serve all the netfronts. Therefore it has become a
+performance bottle neck. This patch is to use multiple tasklet
+pairs to replace the current single pair in dom0.
+
+Assuming that Dom0 has CPUNR VCPUs, we define CPUNR kinds of
+tasklets pair (CPUNR for Tx, and CPUNR for Rx). Each pare of
+tasklets serve specific group of netfronts. Also for those global
+and static variables, we duplicated them for each group in
+order to avoid the spinlock.
+
+Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h    |    6 +
+ drivers/xen/netback/interface.c |   27 ++++
+ drivers/xen/netback/netback.c   |  270 ++++++++++++++++++++++++---------------
+ 3 files changed, 197 insertions(+), 106 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 5e0e467..847ba58 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -58,6 +58,7 @@
+ struct xen_netif {
+ 	/* Unique identifier for this interface. */
+ 	domid_t          domid;
++	int              group;
+ 	unsigned int     handle;
+ 
+ 	u8               fe_dev_addr[6];
+@@ -278,6 +279,8 @@ struct xen_netbk {
+ 	/* Protect the net_schedule_list in netif. */
+ 	spinlock_t net_schedule_list_lock;
+ 
++	atomic_t netfront_count;
++
+ 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+ 	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+ 	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+@@ -296,4 +299,7 @@ struct xen_netbk {
+ 	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+ };
+ 
++extern struct xen_netbk *xen_netbk;
++extern int xen_netbk_group_nr;
++
+ #endif /* __NETIF__BACKEND__COMMON_H__ */
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 086d939..172ef4c 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -54,8 +54,33 @@
+ static unsigned long netbk_queue_length = 32;
+ module_param_named(queue_length, netbk_queue_length, ulong, 0644);
+ 
++static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
++			   struct xen_netif *netif)
++{
++	int i;
++	int min_netfront_count;
++	int min_group = 0;
++	min_netfront_count = atomic_read(&netbk[0].netfront_count);
++	for (i = 0; i < group_nr; i++) {
++		int netfront_count = atomic_read(&netbk[i].netfront_count);
++		if (netfront_count < min_netfront_count) {
++			min_group = i;
++			min_netfront_count = netfront_count;
++		}
++	}
++
++	netif->group = min_group;
++	atomic_inc(&netbk[netif->group].netfront_count);
++}
++
++static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
++{
++	atomic_dec(&netbk[netif->group].netfront_count);
++}
++
+ static void __netif_up(struct xen_netif *netif)
+ {
++	netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
+ 	enable_irq(netif->irq);
+ 	netif_schedule_work(netif);
+ }
+@@ -64,6 +89,7 @@ static void __netif_down(struct xen_netif *netif)
+ {
+ 	disable_irq(netif->irq);
+ 	netif_deschedule_work(netif);
++	netbk_remove_netif(xen_netbk, netif);
+ }
+ 
+ static int net_open(struct net_device *dev)
+@@ -214,6 +240,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ 	netif = netdev_priv(dev);
+ 	memset(netif, 0, sizeof(*netif));
+ 	netif->domid  = domid;
++	netif->group  = -1;
+ 	netif->handle = handle;
+ 	netif->features = NETIF_F_SG;
+ 	atomic_set(&netif->refcnt, 1);
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 71ec999..feefb14 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -48,9 +48,10 @@
+ 
+ /*define NETBE_DEBUG_INTERRUPT*/
+ 
+-static struct xen_netbk *netbk;
++struct xen_netbk *xen_netbk;
++int xen_netbk_group_nr;
+ 
+-static void netif_idx_release(u16 pending_idx);
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
+ static void make_tx_response(struct xen_netif *netif,
+ 			     struct xen_netif_tx_request *txp,
+ 			     s8       st);
+@@ -61,18 +62,20 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+ 					     u16      size,
+ 					     u16      flags);
+ 
+-static void net_tx_action(unsigned long unused);
++static void net_tx_action(unsigned long data);
+ 
+-static void net_rx_action(unsigned long unused);
++static void net_rx_action(unsigned long data);
+ 
+-static inline unsigned long idx_to_pfn(unsigned int idx)
++static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
++				       unsigned int idx)
+ {
+ 	return page_to_pfn(netbk->mmap_pages[idx]);
+ }
+ 
+-static inline unsigned long idx_to_kaddr(unsigned int idx)
++static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
++					 unsigned int idx)
+ {
+-	return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
++	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
+ }
+ 
+ /* extra field used in struct page */
+@@ -112,7 +115,7 @@ static inline pending_ring_idx_t pending_index(unsigned i)
+ 	return i & (MAX_PENDING_REQS-1);
+ }
+ 
+-static inline pending_ring_idx_t nr_pending_reqs(void)
++static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
+ {
+ 	return MAX_PENDING_REQS -
+ 		netbk->pending_prod + netbk->pending_cons;
+@@ -125,10 +128,10 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+ 
+ int netbk_copy_skb_mode;
+ 
+-static inline void maybe_schedule_tx_action(void)
++static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
+ {
+ 	smp_mb();
+-	if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
++	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
+ 	    !list_empty(&netbk->net_schedule_list))
+ 		tasklet_schedule(&netbk->net_tx_tasklet);
+ }
+@@ -235,9 +238,15 @@ static void tx_queue_callback(unsigned long data)
+ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ 	struct xen_netif *netif = netdev_priv(dev);
++	struct xen_netbk *netbk;
+ 
+ 	BUG_ON(skb->dev != dev);
+ 
++	if (netif->group == -1)
++		goto drop;
++
++	netbk = &xen_netbk[netif->group];
++
+ 	/* Drop the packet if the target domain has no receive buffers. */
+ 	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
+ 		goto drop;
+@@ -313,6 +322,7 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
+ 	struct gnttab_copy *copy_gop;
+ 	struct xen_netif_rx_request *req;
+ 	unsigned long old_mfn;
++	int group = netif_page_group(page);
+ 	int idx = netif_page_index(page);
+ 
+ 	old_mfn = virt_to_mfn(page_address(page));
+@@ -321,7 +331,8 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
+ 
+ 	copy_gop = npo->copy + npo->copy_prod++;
+ 	copy_gop->flags = GNTCOPY_dest_gref;
+-	if (idx > -1) {
++	if (PageForeign(page)) {
++		struct xen_netbk *netbk = &xen_netbk[group];
+ 		struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
+ 		copy_gop->source.domid = src_pend->netif->domid;
+ 		copy_gop->source.u.ref = src_pend->req.gref;
+@@ -422,9 +433,10 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+ 	}
+ }
+ 
+-static void net_rx_action(unsigned long unused)
++static void net_rx_action(unsigned long data)
+ {
+ 	struct xen_netif *netif = NULL;
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
+ 	s8 status;
+ 	u16 id, irq, flags;
+ 	struct xen_netif_rx_response *resp;
+@@ -584,13 +596,15 @@ static void net_rx_action(unsigned long unused)
+ 		tasklet_schedule(&netbk->net_rx_tasklet);
+ }
+ 
+-static void net_alarm(unsigned long unused)
++static void net_alarm(unsigned long data)
+ {
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
+ 	tasklet_schedule(&netbk->net_rx_tasklet);
+ }
+ 
+-static void netbk_tx_pending_timeout(unsigned long unused)
++static void netbk_tx_pending_timeout(unsigned long data)
+ {
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
+ 	tasklet_schedule(&netbk->net_tx_tasklet);
+ }
+ 
+@@ -607,6 +621,7 @@ static int __on_net_schedule_list(struct xen_netif *netif)
+ 
+ static void remove_from_net_schedule_list(struct xen_netif *netif)
+ {
++	struct xen_netbk *netbk = &xen_netbk[netif->group];
+ 	spin_lock_irq(&netbk->net_schedule_list_lock);
+ 	if (likely(__on_net_schedule_list(netif))) {
+ 		list_del_init(&netif->list);
+@@ -617,6 +632,7 @@ static void remove_from_net_schedule_list(struct xen_netif *netif)
+ 
+ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+ {
++	struct xen_netbk *netbk = &xen_netbk[netif->group];
+ 	if (__on_net_schedule_list(netif))
+ 		return;
+ 
+@@ -631,13 +647,14 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+ 
+ void netif_schedule_work(struct xen_netif *netif)
+ {
++	struct xen_netbk *netbk = &xen_netbk[netif->group];
+ 	int more_to_do;
+ 
+ 	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+ 
+ 	if (more_to_do) {
+ 		add_to_net_schedule_list_tail(netif);
+-		maybe_schedule_tx_action();
++		maybe_schedule_tx_action(netbk);
+ 	}
+ }
+ 
+@@ -674,14 +691,15 @@ static void tx_credit_callback(unsigned long data)
+ 	netif_schedule_work(netif);
+ }
+ 
+-static inline int copy_pending_req(pending_ring_idx_t pending_idx)
++static inline int copy_pending_req(struct xen_netbk *netbk,
++				   pending_ring_idx_t pending_idx)
+ {
+ 	return gnttab_copy_grant_page(
+ 			netbk->grant_tx_handle[pending_idx],
+ 			&netbk->mmap_pages[pending_idx]);
+ }
+ 
+-inline static void net_tx_action_dealloc(void)
++static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+ {
+ 	struct netbk_tx_pending_inuse *inuse, *n;
+ 	struct gnttab_unmap_grant_ref *gop;
+@@ -711,13 +729,13 @@ inline static void net_tx_action_dealloc(void)
+ 			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
+ 			list_move_tail(&pending_inuse[pending_idx].list, &list);
+ 
+-			pfn = idx_to_pfn(pending_idx);
++			pfn = idx_to_pfn(netbk, pending_idx);
+ 			/* Already unmapped? */
+ 			if (!phys_to_machine_mapping_valid(pfn))
+ 				continue;
+ 
+ 			gnttab_set_unmap_op(gop,
+-					idx_to_kaddr(pending_idx),
++					idx_to_kaddr(netbk, pending_idx),
+ 					GNTMAP_host_map,
+ 					netbk->grant_tx_handle[pending_idx]);
+ 			gop++;
+@@ -740,7 +758,7 @@ inline static void net_tx_action_dealloc(void)
+ 
+ 			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
+ 
+-			switch (copy_pending_req(pending_idx)) {
++			switch (copy_pending_req(netbk, pending_idx)) {
+ 			case 0:
+ 				list_move_tail(&inuse->list, &list);
+ 				continue;
+@@ -843,7 +861,8 @@ static int netbk_count_requests(struct xen_netif *netif,
+ 	return frags;
+ }
+ 
+-static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
++static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
++						  struct xen_netif *netif,
+ 						  struct sk_buff *skb,
+ 						  struct xen_netif_tx_request *txp,
+ 						  struct gnttab_map_grant_ref *mop)
+@@ -864,7 +883,7 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
+ 		index = pending_index(netbk->pending_cons++);
+ 		pending_idx = netbk->pending_ring[index];
+ 
+-		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
++		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
+ 				  GNTMAP_host_map | GNTMAP_readonly,
+ 				  txp->gref, netif->domid);
+ 
+@@ -877,8 +896,9 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
+ 	return mop;
+ }
+ 
+-static int netbk_tx_check_mop(struct sk_buff *skb,
+-			       struct gnttab_map_grant_ref **mopp)
++static int netbk_tx_check_mop(struct xen_netbk *netbk,
++			      struct sk_buff *skb,
++			      struct gnttab_map_grant_ref **mopp)
+ {
+ 	struct gnttab_map_grant_ref *mop = *mopp;
+ 	int pending_idx = *((u16 *)skb->data);
+@@ -900,7 +920,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ 		netif_put(netif);
+ 	} else {
+ 		set_phys_to_machine(
+-			__pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
++			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
+ 			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+ 		netbk->grant_tx_handle[pending_idx] = mop->handle;
+ 	}
+@@ -918,14 +938,14 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ 		newerr = (++mop)->status;
+ 		if (likely(!newerr)) {
+ 			unsigned long addr;
+-			addr = idx_to_kaddr(pending_idx);
++			addr = idx_to_kaddr(netbk, pending_idx);
+ 			set_phys_to_machine(
+ 				__pa(addr)>>PAGE_SHIFT,
+ 				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+ 			netbk->grant_tx_handle[pending_idx] = mop->handle;
+ 			/* Had a previous error? Invalidate this fragment. */
+ 			if (unlikely(err))
+-				netif_idx_release(pending_idx);
++				netif_idx_release(netbk, pending_idx);
+ 			continue;
+ 		}
+ 
+@@ -942,10 +962,10 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ 
+ 		/* First error: invalidate header and preceding fragments. */
+ 		pending_idx = *((u16 *)skb->data);
+-		netif_idx_release(pending_idx);
++		netif_idx_release(netbk, pending_idx);
+ 		for (j = start; j < i; j++) {
+ 			pending_idx = (unsigned long)shinfo->frags[i].page;
+-			netif_idx_release(pending_idx);
++			netif_idx_release(netbk, pending_idx);
+ 		}
+ 
+ 		/* Remember the error: invalidate all subsequent fragments. */
+@@ -956,7 +976,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ 	return err;
+ }
+ 
+-static void netbk_fill_frags(struct sk_buff *skb)
++static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+ {
+ 	struct skb_shared_info *shinfo = skb_shinfo(skb);
+ 	int nr_frags = shinfo->nr_frags;
+@@ -974,7 +994,7 @@ static void netbk_fill_frags(struct sk_buff *skb)
+ 			      &netbk->pending_inuse_head);
+ 
+ 		txp = &netbk->pending_tx_info[pending_idx].req;
+-		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
++		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
+ 		frag->size = txp->size;
+ 		frag->page_offset = txp->offset;
+ 
+@@ -1106,14 +1126,14 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
+ 	return false;
+ }
+ 
+-static unsigned net_tx_build_mops(void)
++static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ {
+ 	struct gnttab_map_grant_ref *mop;
+ 	struct sk_buff *skb;
+ 	int ret;
+ 
+ 	mop = netbk->tx_map_ops;
+-	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ 		!list_empty(&netbk->net_schedule_list)) {
+ 		struct xen_netif *netif;
+ 		struct xen_netif_tx_request txreq;
+@@ -1215,7 +1235,7 @@ static unsigned net_tx_build_mops(void)
+ 			}
+ 		}
+ 
+-		gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
++		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
+ 				  GNTMAP_host_map | GNTMAP_readonly,
+ 				  txreq.gref, netif->domid);
+ 		mop++;
+@@ -1241,7 +1261,7 @@ static unsigned net_tx_build_mops(void)
+ 
+ 		netbk->pending_cons++;
+ 
+-		mop = netbk_get_requests(netif, skb, txfrags, mop);
++		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
+ 
+ 		netif->tx.req_cons = idx;
+ 		netif_schedule_work(netif);
+@@ -1253,7 +1273,7 @@ static unsigned net_tx_build_mops(void)
+ 	return mop - netbk->tx_map_ops;
+ }
+ 
+-static void net_tx_submit(void)
++static void net_tx_submit(struct xen_netbk *netbk)
+ {
+ 	struct gnttab_map_grant_ref *mop;
+ 	struct sk_buff *skb;
+@@ -1270,7 +1290,7 @@ static void net_tx_submit(void)
+ 		txp = &netbk->pending_tx_info[pending_idx].req;
+ 
+ 		/* Check the remap error code. */
+-		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
++		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
+ 			DPRINTK("netback grant failed.\n");
+ 			skb_shinfo(skb)->nr_frags = 0;
+ 			kfree_skb(skb);
+@@ -1279,7 +1299,7 @@ static void net_tx_submit(void)
+ 
+ 		data_len = skb->len;
+ 		memcpy(skb->data,
+-		       (void *)(idx_to_kaddr(pending_idx)|txp->offset),
++		       (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
+ 		       data_len);
+ 		if (data_len < txp->size) {
+ 			/* Append the packet payload as a fragment. */
+@@ -1287,7 +1307,7 @@ static void net_tx_submit(void)
+ 			txp->size -= data_len;
+ 		} else {
+ 			/* Schedule a response immediately. */
+-			netif_idx_release(pending_idx);
++			netif_idx_release(netbk, pending_idx);
+ 		}
+ 
+ 		if (txp->flags & NETTXF_csum_blank)
+@@ -1295,7 +1315,7 @@ static void net_tx_submit(void)
+ 		else if (txp->flags & NETTXF_data_validated)
+ 			skb->ip_summed = CHECKSUM_UNNECESSARY;
+ 
+-		netbk_fill_frags(skb);
++		netbk_fill_frags(netbk, skb);
+ 
+ 		/*
+ 		 * If the initial fragment was < PKT_PROT_LEN then
+@@ -1344,15 +1364,16 @@ static void net_tx_submit(void)
+ }
+ 
+ /* Called after netfront has transmitted */
+-static void net_tx_action(unsigned long unused)
++static void net_tx_action(unsigned long data)
+ {
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
+ 	unsigned nr_mops;
+ 	int ret;
+ 
+ 	if (netbk->dealloc_cons != netbk->dealloc_prod)
+-		net_tx_action_dealloc();
++		net_tx_action_dealloc(netbk);
+ 
+-	nr_mops = net_tx_build_mops();
++	nr_mops = net_tx_build_mops(netbk);
+ 
+ 	if (nr_mops == 0)
+ 		return;
+@@ -1361,10 +1382,10 @@ static void net_tx_action(unsigned long unused)
+ 					netbk->tx_map_ops, nr_mops);
+ 	BUG_ON(ret);
+ 
+-	net_tx_submit();
++	net_tx_submit(netbk);
+ }
+ 
+-static void netif_idx_release(u16 pending_idx)
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+ {
+ 	static DEFINE_SPINLOCK(_lock);
+ 	unsigned long flags;
+@@ -1383,19 +1404,28 @@ static void netif_idx_release(u16 pending_idx)
+ 
+ static void netif_page_release(struct page *page, unsigned int order)
+ {
++	int group = netif_page_group(page);
+ 	int idx = netif_page_index(page);
++	struct xen_netbk *netbk = &xen_netbk[group];
+ 	BUG_ON(order);
++	BUG_ON(group < 0 || group >= xen_netbk_group_nr);
+ 	BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
+ 	BUG_ON(netbk->mmap_pages[idx] != page);
+-	netif_idx_release(idx);
++	netif_idx_release(netbk, idx);
+ }
+ 
+ irqreturn_t netif_be_int(int irq, void *dev_id)
+ {
+ 	struct xen_netif *netif = dev_id;
++	struct xen_netbk *netbk;
++
++	if (netif->group == -1)
++		return IRQ_NONE;
++
++	netbk = &xen_netbk[netif->group];
+ 
+ 	add_to_net_schedule_list_tail(netif);
+-	maybe_schedule_tx_action();
++	maybe_schedule_tx_action(netbk);
+ 
+ 	if (netif_schedulable(netif) && !netbk_queue_full(netif))
+ 		netif_wake_queue(netif->dev);
+@@ -1453,28 +1483,40 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+ 	struct list_head *ent;
+ 	struct xen_netif *netif;
+ 	int i = 0;
++	int group = 0;
+ 
+ 	printk(KERN_ALERT "netif_schedule_list:\n");
+-	spin_lock_irq(&netbk->net_schedule_list_lock);
+ 
+-	list_for_each(ent, &netbk->net_schedule_list) {
+-		netif = list_entry(ent, struct xen_netif, list);
+-		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+-		       "rx_resp_prod=%08x\n",
+-		       i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
+-		printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
+-		       netif->tx.req_cons, netif->tx.rsp_prod_pvt);
+-		printk(KERN_ALERT "   shared(rx_req_prod=%08x "
+-		       "rx_resp_prod=%08x\n",
+-		       netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
+-		printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
+-		       netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
+-		printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
+-		       netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
+-		i++;
++	for (group = 0; group < xen_netbk_group_nr; group++) {
++		struct xen_netbk *netbk = &xen_netbk[group];
++		spin_lock_irq(&netbk->net_schedule_list_lock);
++		printk(KERN_ALERT "xen_netback group number: %d\n", group);
++		list_for_each(ent, &netbk->net_schedule_list) {
++			netif = list_entry(ent, struct xen_netif, list);
++			printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
++				"rx_resp_prod=%08x\n",
++				i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
++			printk(KERN_ALERT
++				"   tx_req_cons=%08x, tx_resp_prod=%08x)\n",
++				netif->tx.req_cons, netif->tx.rsp_prod_pvt);
++			printk(KERN_ALERT
++				"   shared(rx_req_prod=%08x "
++				"rx_resp_prod=%08x\n",
++				netif->rx.sring->req_prod,
++				netif->rx.sring->rsp_prod);
++			printk(KERN_ALERT
++				"   rx_event=%08x, tx_req_prod=%08x\n",
++				netif->rx.sring->rsp_event,
++				netif->tx.sring->req_prod);
++			printk(KERN_ALERT
++				"   tx_resp_prod=%08x, tx_event=%08x)\n",
++				netif->tx.sring->rsp_prod,
++				netif->tx.sring->rsp_event);
++			i++;
++		}
++		spin_unlock_irq(&netbk->net_schedule_list_lock);
+ 	}
+ 
+-	spin_unlock_irq(&netbk->net_schedule_list_lock);
+ 	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+ 
+ 	return IRQ_HANDLED;
+@@ -1486,12 +1528,15 @@ static int __init netback_init(void)
+ 	int i;
+ 	struct page *page;
+ 	int rc = 0;
++	int group;
+ 
+ 	if (!xen_domain())
+ 		return -ENODEV;
+ 
+-	netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk));
+-	if (!netbk) {
++	xen_netbk_group_nr = num_online_cpus();
++	xen_netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk) *
++					    xen_netbk_group_nr);
++	if (!xen_netbk) {
+ 		printk(KERN_ALERT "%s: out of memory\n", __func__);
+ 		return -ENOMEM;
+ 	}
+@@ -1499,44 +1544,54 @@ static int __init netback_init(void)
+ 	/* We can increase reservation by this much in net_rx_action(). */
+ //	balloon_update_driver_allowance(NET_RX_RING_SIZE);
+ 
+-	skb_queue_head_init(&netbk->rx_queue);
+-	skb_queue_head_init(&netbk->tx_queue);
+-
+-	init_timer(&netbk->net_timer);
+-	netbk->net_timer.data = 0;
+-	netbk->net_timer.function = net_alarm;
+-
+-	init_timer(&netbk->netbk_tx_pending_timer);
+-	netbk->netbk_tx_pending_timer.data = 0;
+-	netbk->netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
++	for (group = 0; group < xen_netbk_group_nr; group++) {
++		struct xen_netbk *netbk = &xen_netbk[group];
++		skb_queue_head_init(&netbk->rx_queue);
++		skb_queue_head_init(&netbk->tx_queue);
++
++		init_timer(&netbk->net_timer);
++		netbk->net_timer.data = (unsigned long)netbk;
++		netbk->net_timer.function = net_alarm;
++
++		init_timer(&netbk->netbk_tx_pending_timer);
++		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
++		netbk->netbk_tx_pending_timer.function =
++			netbk_tx_pending_timeout;
++
++		netbk->mmap_pages =
++			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
++		if (!netbk->mmap_pages) {
++			printk(KERN_ALERT "%s: out of memory\n", __func__);
++			del_timer(&netbk->netbk_tx_pending_timer);
++			del_timer(&netbk->net_timer);
++			rc = -ENOMEM;
++			goto failed_init;
++		}
+ 
+-	netbk->mmap_pages =
+-		alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+-	if (!netbk->mmap_pages) {
+-		printk(KERN_ALERT "%s: out of memory\n", __func__);
+-		rc = -ENOMEM;
+-		goto failed_init2;
+-	}
++		for (i = 0; i < MAX_PENDING_REQS; i++) {
++			page = netbk->mmap_pages[i];
++			SetPageForeign(page, netif_page_release);
++			netif_set_page_ext(page, group, i);
++			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
++		}
+ 
+-	for (i = 0; i < MAX_PENDING_REQS; i++) {
+-		page = netbk->mmap_pages[i];
+-		SetPageForeign(page, netif_page_release);
+-		netif_set_page_ext(page, 0, i);
+-		INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+-	}
++		netbk->pending_cons = 0;
++		netbk->pending_prod = MAX_PENDING_REQS;
++		for (i = 0; i < MAX_PENDING_REQS; i++)
++			netbk->pending_ring[i] = i;
+ 
+-	netbk->pending_cons = 0;
+-	netbk->pending_prod = MAX_PENDING_REQS;
+-	for (i = 0; i < MAX_PENDING_REQS; i++)
+-		netbk->pending_ring[i] = i;
++		tasklet_init(&netbk->net_tx_tasklet, net_tx_action,
++				(unsigned long)netbk);
++		tasklet_init(&netbk->net_rx_tasklet, net_rx_action,
++				(unsigned long)netbk);
+ 
+-	tasklet_init(&netbk->net_tx_tasklet, net_tx_action, 0);
+-	tasklet_init(&netbk->net_rx_tasklet, net_rx_action, 0);
++		INIT_LIST_HEAD(&netbk->pending_inuse_head);
++		INIT_LIST_HEAD(&netbk->net_schedule_list);
+ 
+-	INIT_LIST_HEAD(&netbk->pending_inuse_head);
+-	INIT_LIST_HEAD(&netbk->net_schedule_list);
++		spin_lock_init(&netbk->net_schedule_list_lock);
+ 
+-	spin_lock_init(&netbk->net_schedule_list_lock);
++		atomic_set(&netbk->netfront_count, 0);
++	}
+ 
+ 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+ 	if (MODPARM_copy_skb) {
+@@ -1551,25 +1606,28 @@ static int __init netback_init(void)
+ 
+ 	rc = netif_xenbus_init();
+ 	if (rc)
+-		goto failed_init1;
++		goto failed_init;
+ 
+ #ifdef NETBE_DEBUG_INTERRUPT
+ 	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
+ 				      0,
+ 				      netif_be_dbg,
+-				      SA_SHIRQ,
++				      IRQF_SHARED,
+ 				      "net-be-dbg",
+ 				      &netif_be_dbg);
+ #endif
+ 
+ 	return 0;
+ 
+-failed_init1:
+-	free_empty_pages_and_pagevec(netbk->mmap_pages, MAX_PENDING_REQS);
+-failed_init2:
+-	del_timer(&netbk->netbk_tx_pending_timer);
+-	del_timer(&netbk->net_timer);
+-	vfree(netbk);
++failed_init:
++	for (i = 0; i < group; i++) {
++		struct xen_netbk *netbk = &xen_netbk[i];
++		free_empty_pages_and_pagevec(netbk->mmap_pages,
++				MAX_PENDING_REQS);
++		del_timer(&netbk->netbk_tx_pending_timer);
++		del_timer(&netbk->net_timer);
++	}
++	vfree(xen_netbk);
+ 	return rc;
+ 
+ }
+-- 
+1.7.3.4
+
+
+From e7317b70c0436c109b605bb377939cb2eaff6a6f Mon Sep 17 00:00:00 2001
+From: Dongxiao Xu <dongxiao.xu at intel.com>
+Date: Wed, 19 May 2010 17:08:22 -0700
+Subject: [PATCH 090/139] xen: netback: Use Kernel thread to replace the tasklet.
+
+Kernel thread has more control over QoS, and could improve dom0's
+userspace responseness. This option is defaultly off currently.
+
+Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h  |   13 ++++-
+ drivers/xen/netback/netback.c |  109 ++++++++++++++++++++++++++++++++++++----
+ 2 files changed, 109 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 847ba58..36cb2b9 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -257,8 +257,17 @@ union page_ext {
+ };
+ 
+ struct xen_netbk {
+-	struct tasklet_struct net_tx_tasklet;
+-	struct tasklet_struct net_rx_tasklet;
++	union {
++		struct {
++			struct tasklet_struct net_tx_tasklet;
++			struct tasklet_struct net_rx_tasklet;
++		} tasklet;
++
++		struct {
++			wait_queue_head_t netbk_action_wq;
++			struct task_struct *task;
++		} kthread;
++	};
+ 
+ 	struct sk_buff_head rx_queue;
+ 	struct sk_buff_head tx_queue;
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index feefb14..547dcaa 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -38,6 +38,7 @@
+ 
+ #include <linux/tcp.h>
+ #include <linux/udp.h>
++#include <linux/kthread.h>
+ 
+ #include <xen/balloon.h>
+ #include <xen/events.h>
+@@ -128,12 +129,31 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+ 
+ int netbk_copy_skb_mode;
+ 
++static int MODPARM_netback_kthread;
++module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
++MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
++
++/*
++ * Netback bottom half handler.
++ * dir indicates the data direction.
++ * rx: 1, tx: 0.
++ */
++static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
++{
++	if (MODPARM_netback_kthread)
++		wake_up(&netbk->kthread.netbk_action_wq);
++	else if (dir)
++		tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
++	else
++		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
++}
++
+ static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
+ {
+ 	smp_mb();
+ 	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
+ 	    !list_empty(&netbk->net_schedule_list))
+-		tasklet_schedule(&netbk->net_tx_tasklet);
++		xen_netbk_bh_handler(netbk, 0);
+ }
+ 
+ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+@@ -289,7 +309,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 		}
+ 	}
+ 	skb_queue_tail(&netbk->rx_queue, skb);
+-	tasklet_schedule(&netbk->net_rx_tasklet);
++
++	xen_netbk_bh_handler(netbk, 1);
+ 
+ 	return 0;
+ 
+@@ -593,19 +614,19 @@ static void net_rx_action(unsigned long data)
+ 	/* More work to do? */
+ 	if (!skb_queue_empty(&netbk->rx_queue) &&
+ 			!timer_pending(&netbk->net_timer))
+-		tasklet_schedule(&netbk->net_rx_tasklet);
++		xen_netbk_bh_handler(netbk, 1);
+ }
+ 
+ static void net_alarm(unsigned long data)
+ {
+ 	struct xen_netbk *netbk = (struct xen_netbk *)data;
+-	tasklet_schedule(&netbk->net_rx_tasklet);
++	xen_netbk_bh_handler(netbk, 1);
+ }
+ 
+ static void netbk_tx_pending_timeout(unsigned long data)
+ {
+ 	struct xen_netbk *netbk = (struct xen_netbk *)data;
+-	tasklet_schedule(&netbk->net_tx_tasklet);
++	xen_netbk_bh_handler(netbk, 0);
+ }
+ 
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+@@ -1348,7 +1369,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ 			continue;
+ 		}
+ 
+-		netif_rx(skb);
++		netif_rx_ni(skb);
+ 		netif->dev->last_rx = jiffies;
+ 	}
+ 
+@@ -1399,7 +1420,7 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+ 	netbk->dealloc_prod++;
+ 	spin_unlock_irqrestore(&_lock, flags);
+ 
+-	tasklet_schedule(&netbk->net_tx_tasklet);
++	xen_netbk_bh_handler(netbk, 0);
+ }
+ 
+ static void netif_page_release(struct page *page, unsigned int order)
+@@ -1523,6 +1544,46 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+ }
+ #endif
+ 
++static inline int rx_work_todo(struct xen_netbk *netbk)
++{
++	return !skb_queue_empty(&netbk->rx_queue);
++}
++
++static inline int tx_work_todo(struct xen_netbk *netbk)
++{
++	if (netbk->dealloc_cons != netbk->dealloc_prod)
++		return 1;
++
++	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++			!list_empty(&netbk->net_schedule_list))
++		return 1;
++
++	return 0;
++}
++
++static int netbk_action_thread(void *data)
++{
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
++	while (!kthread_should_stop()) {
++		wait_event_interruptible(netbk->kthread.netbk_action_wq,
++				rx_work_todo(netbk)
++				|| tx_work_todo(netbk)
++				|| kthread_should_stop());
++		cond_resched();
++
++		if (kthread_should_stop())
++			break;
++
++		if (rx_work_todo(netbk))
++			net_rx_action((unsigned long)netbk);
++
++		if (tx_work_todo(netbk))
++			net_tx_action((unsigned long)netbk);
++	}
++
++	return 0;
++}
++
+ static int __init netback_init(void)
+ {
+ 	int i;
+@@ -1580,10 +1641,34 @@ static int __init netback_init(void)
+ 		for (i = 0; i < MAX_PENDING_REQS; i++)
+ 			netbk->pending_ring[i] = i;
+ 
+-		tasklet_init(&netbk->net_tx_tasklet, net_tx_action,
+-				(unsigned long)netbk);
+-		tasklet_init(&netbk->net_rx_tasklet, net_rx_action,
+-				(unsigned long)netbk);
++		if (MODPARM_netback_kthread) {
++			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
++			netbk->kthread.task =
++				kthread_create(netbk_action_thread,
++					       (void *)netbk,
++					       "netback/%u", group);
++
++			if (!IS_ERR(netbk->kthread.task)) {
++				kthread_bind(netbk->kthread.task, group);
++				wake_up_process(netbk->kthread.task);
++			} else {
++				printk(KERN_ALERT
++					"kthread_run() fails at netback\n");
++				free_empty_pages_and_pagevec(netbk->mmap_pages,
++						MAX_PENDING_REQS);
++				del_timer(&netbk->netbk_tx_pending_timer);
++				del_timer(&netbk->net_timer);
++				rc = PTR_ERR(netbk->kthread.task);
++				goto failed_init;
++			}
++		} else {
++			tasklet_init(&netbk->tasklet.net_tx_tasklet,
++				     net_tx_action,
++				     (unsigned long)netbk);
++			tasklet_init(&netbk->tasklet.net_rx_tasklet,
++				     net_rx_action,
++				     (unsigned long)netbk);
++		}
+ 
+ 		INIT_LIST_HEAD(&netbk->pending_inuse_head);
+ 		INIT_LIST_HEAD(&netbk->net_schedule_list);
+@@ -1626,6 +1711,8 @@ failed_init:
+ 				MAX_PENDING_REQS);
+ 		del_timer(&netbk->netbk_tx_pending_timer);
+ 		del_timer(&netbk->net_timer);
++		if (MODPARM_netback_kthread)
++			kthread_stop(netbk->kthread.task);
+ 	}
+ 	vfree(xen_netbk);
+ 	return rc;
+-- 
+1.7.3.4
+
+
+From 6359d5939c5d1f59b794cd02e8cdbd36b9f3434d Mon Sep 17 00:00:00 2001
+From: James Harper <james.harper at bendigoit.com.au>
+Date: Fri, 28 May 2010 23:12:56 -0700
+Subject: [PATCH 091/139] xen: netback: avoid null-pointer access in netback_uevent
+
+Check if drvdata has been set up yet and return if it hasn't.
+
+Signed-off-by: James Harper <james.harper at bendigoit.com.au>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/xenbus.c |    9 +++++++--
+ 1 files changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index fcd3c34..e30b0c7 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -154,12 +154,17 @@ fail:
+  */
+ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
+ {
+-	struct backend_info *be = dev_get_drvdata(&xdev->dev);
+-	struct xen_netif *netif = be->netif;
++	struct backend_info *be;
++	struct xen_netif *netif;
+ 	char *val;
+ 
+ 	DPRINTK("netback_uevent");
+ 
++	be = dev_get_drvdata(&xdev->dev);
++	if (!be)
++		return 0;
++	netif = be->netif;
++
+ 	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+ 	if (IS_ERR(val)) {
+ 		int err = PTR_ERR(val);
+-- 
+1.7.3.4
+
+
+From 4a818daa044d9d499412e8f6e2e3086c0521e7b3 Mon Sep 17 00:00:00 2001
+From: Keir Fraser <keir.fraser at citrix.com>
+Date: Fri, 11 Jun 2010 11:48:30 +0100
+Subject: [PATCH 092/139] xen: netback: Fixes for delayed copy of tx network packets.
+
+ - Should call net_tx_action_dealloc() even when dealloc ring is
+   empty, as there may in any case be work to do on the
+   pending_inuse list.
+ - Should not exit directly from the middle of the tx_action tasklet,
+   as the tx_pending_timer should always be checked and updated at the
+   end of the tasklet.
+
+Signed-off-by: Keir Fraser <keir.fraser at citrix.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+[picked from linux-2.6.18-xen.hg 959:1a97bd686258, ported across a43e2175 "xen/netback: move code around"]
+---
+ drivers/xen/netback/netback.c |   25 ++++++++++++-------------
+ 1 files changed, 12 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 547dcaa..58dfbd2 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1372,16 +1372,6 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ 		netif_rx_ni(skb);
+ 		netif->dev->last_rx = jiffies;
+ 	}
+-
+-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+-	    !list_empty(&netbk->pending_inuse_head)) {
+-		struct netbk_tx_pending_inuse *oldest;
+-
+-		oldest = list_entry(netbk->pending_inuse_head.next,
+-				    struct netbk_tx_pending_inuse, list);
+-		mod_timer(&netbk->netbk_tx_pending_timer,
+-				oldest->alloc_time + HZ);
+-	}
+ }
+ 
+ /* Called after netfront has transmitted */
+@@ -1391,19 +1381,28 @@ static void net_tx_action(unsigned long data)
+ 	unsigned nr_mops;
+ 	int ret;
+ 
+-	if (netbk->dealloc_cons != netbk->dealloc_prod)
+-		net_tx_action_dealloc(netbk);
++	net_tx_action_dealloc(netbk);
+ 
+ 	nr_mops = net_tx_build_mops(netbk);
+ 
+ 	if (nr_mops == 0)
+-		return;
++		goto out;
+ 
+ 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+ 					netbk->tx_map_ops, nr_mops);
+ 	BUG_ON(ret);
+ 
+ 	net_tx_submit(netbk);
++out:
++	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++	    !list_empty(&netbk->pending_inuse_head)) {
++		struct netbk_tx_pending_inuse *oldest;
++
++		oldest = list_entry(netbk->pending_inuse_head.next,
++				    struct netbk_tx_pending_inuse, list);
++		mod_timer(&netbk->netbk_tx_pending_timer,
++				oldest->alloc_time + HZ);
++	}
+ }
+ 
+ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+-- 
+1.7.3.4
+
+
+From 48fa1af97e6c9d304c04f70a75de1340e7d79e18 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 11 Jun 2010 10:51:01 +0100
+Subject: [PATCH 093/139] xen: netback: handle NET_SKBUFF_DATA_USES_OFFSET correctly
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jan Beulich <JBeulich at novell.com>
+---
+ drivers/xen/netback/netback.c |    4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 58dfbd2..aa094af 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -218,7 +218,11 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+ 		len -= copy;
+ 	}
+ 
++#ifdef NET_SKBUFF_DATA_USES_OFFSET
++	offset = 0;
++#else
+ 	offset = nskb->data - skb->data;
++#endif
+ 
+ 	nskb->transport_header = skb->transport_header + offset;
+ 	nskb->network_header = skb->network_header + offset;
+-- 
+1.7.3.4
+
+
+From 7d3e6e42251f179e407fa5236f613e5500b3a3ea Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 11 Jun 2010 10:51:01 +0100
+Subject: [PATCH 094/139] xen: netback: drop frag member from struct netbk_rx_meta
+
+It has been unused since c3219dc "xen/netback: completely drop flip
+support", as has netbk_free_pages().
+
+(Although it now has only a single member struct netbk_rx_meta will
+gain other members in a subsequent patch so there is no point
+reworking to get rid of the struct)
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/common.h  |    1 -
+ drivers/xen/netback/netback.c |    8 --------
+ 2 files changed, 0 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 36cb2b9..be4fe91 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -230,7 +230,6 @@ struct pending_tx_info {
+ typedef unsigned int pending_ring_idx_t;
+ 
+ struct netbk_rx_meta {
+-	skb_frag_t frag;
+ 	int id;
+ };
+ 
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index aa094af..9f7e489 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -411,14 +411,6 @@ static void netbk_gop_skb(struct sk_buff *skb,
+ 	netif->rx.req_cons += nr_frags + extra;
+ }
+ 
+-static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
+-{
+-	int i;
+-
+-	for (i = 0; i < nr_frags; i++)
+-		put_page(meta[i].frag.page);
+-}
+-
+ /* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
+    used to set up the operations on the top of
+    netrx_pending_operations, which have since been done.  Check that
+-- 
+1.7.3.4
+
+
+From 1ced27150d0092c40ebbbbb3896192003d433c0e Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 11 Jun 2010 10:51:01 +0100
+Subject: [PATCH 095/139] xen: netback: linearise SKBs as we copy them into guest memory on guest-RX.
+
+There's no point in sending lots of little packets to a copying
+receiver if we can instead arrange to copy them all into a single RX
+buffer.  We need to copy anyway, so there's no overhead here, and this
+is a little bit easier on the receiving domain's network stack.
+
+Based on a patch by Steven Smith. Fixed to not skip unnecessarily to
+the next buffer which could leave the head fragment of a received
+frame empty if the headlen of an SKB was large (which would crash
+netfront). Instead we only try and pack "small enough" fragments
+together but do not try to coalesce large or whole page fragments.
+
+In previous iterations of this patch we also tried to only include
+2048 bytes per frag because very old netfronts stored other
+information in the second half of the page. It has been determined
+that only frontends which support scatter-gather are going to come
+down this path and that any guest which supports scatter-gather is
+also new enough to allow us to use the full page size for each
+fragment (since this limitation which fixed as part of the SG
+implementation) so we do not need this restriction.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Steven Smith <Steven.Smith at eu.citrix.com>
+---
+ drivers/xen/netback/common.h  |   15 ++-
+ drivers/xen/netback/netback.c |  282 ++++++++++++++++++++++++++++++-----------
+ 2 files changed, 218 insertions(+), 79 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index be4fe91..9c0c048 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -82,7 +82,9 @@ struct xen_netif {
+ 	/* Internal feature information. */
+ 	u8 can_queue:1;	/* can queue packets for receiver? */
+ 
+-	/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
++	/* Allow netif_be_start_xmit() to peek ahead in the rx request
++	 * ring.  This is a prediction of what rx_req_cons will be once
++	 * all queued skbs are put on the ring. */
+ 	RING_IDX rx_req_cons_peek;
+ 
+ 	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
+@@ -231,6 +233,8 @@ typedef unsigned int pending_ring_idx_t;
+ 
+ struct netbk_rx_meta {
+ 	int id;
++	int size;
++	int gso_size;
+ };
+ 
+ struct netbk_tx_pending_inuse {
+@@ -240,6 +244,8 @@ struct netbk_tx_pending_inuse {
+ 
+ #define MAX_PENDING_REQS 256
+ 
++#define MAX_BUFFER_OFFSET PAGE_SIZE
++
+ /* extra field used in struct page */
+ union page_ext {
+ 	struct {
+@@ -301,7 +307,12 @@ struct xen_netbk {
+ 	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
+ 	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
+ 	struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
+-	struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
++	/*
++	 * Each head or fragment can be up to 4096 bytes. Given
++	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
++	 * head/fragment uses 2 copy operation.
++	 */
++	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
+ 	unsigned char rx_notify[NR_IRQS];
+ 	u16 notify_list[NET_RX_RING_SIZE];
+ 	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 9f7e489..d53d88e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -259,6 +259,48 @@ static void tx_queue_callback(unsigned long data)
+ 		netif_wake_queue(netif->dev);
+ }
+ 
++/* Figure out how many ring slots we're going to need to send @skb to
++   the guest. */
++static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
++{
++	unsigned count;
++	unsigned copy_off;
++	unsigned i;
++
++	copy_off = 0;
++	count = 1;
++
++	BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
++
++	copy_off = skb_headlen(skb);
++
++	if (skb_shinfo(skb)->gso_size)
++		count++;
++
++	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
++		unsigned long size = skb_shinfo(skb)->frags[i].size;
++		unsigned long bytes;
++		while (size > 0) {
++			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
++
++			/* These checks are the same as in netbk_gop_frag_copy */
++			if (copy_off == MAX_BUFFER_OFFSET
++			    || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) {
++				count++;
++				copy_off = 0;
++			}
++
++			bytes = size;
++			if (copy_off + bytes > MAX_BUFFER_OFFSET)
++				bytes = MAX_BUFFER_OFFSET - copy_off;
++
++			copy_off += bytes;
++			size -= bytes;
++		}
++	}
++	return count;
++}
++
+ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ 	struct xen_netif *netif = netdev_priv(dev);
+@@ -290,8 +332,9 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 		skb = nskb;
+ 	}
+ 
+-	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
+-				   !!skb_shinfo(skb)->gso_size;
++	/* Reserve ring slots for the worst-case number of
++	 * fragments. */
++	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
+ 	netif_get(netif);
+ 
+ 	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
+@@ -335,96 +378,165 @@ struct netrx_pending_operations {
+ 	struct gnttab_copy *copy;
+ 	struct multicall_entry *mcl;
+ 	struct netbk_rx_meta *meta;
++	int copy_off;
++	grant_ref_t copy_gref;
+ };
+ 
+ /* Set up the grant operations for this fragment.  If it's a flipping
+    interface, we also set up the unmap request from here. */
+-static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
+-			  int i, struct netrx_pending_operations *npo,
+-			  struct page *page, unsigned long size,
+-			  unsigned long offset)
++
++static void netbk_gop_frag_copy(struct xen_netif *netif,
++				struct netrx_pending_operations *npo,
++				struct page *page, unsigned long size,
++				unsigned long offset, int head)
+ {
+ 	struct gnttab_copy *copy_gop;
+-	struct xen_netif_rx_request *req;
+-	unsigned long old_mfn;
++	struct netbk_rx_meta *meta;
+ 	int group = netif_page_group(page);
+ 	int idx = netif_page_index(page);
++	unsigned long bytes;
++
++	/* Data must not cross a page boundary. */
++	BUG_ON(size + offset > PAGE_SIZE);
+ 
+-	old_mfn = virt_to_mfn(page_address(page));
++	meta = npo->meta + npo->meta_prod - 1;
+ 
+-	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
++	while (size > 0) {
++		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+ 
+-	copy_gop = npo->copy + npo->copy_prod++;
+-	copy_gop->flags = GNTCOPY_dest_gref;
+-	if (PageForeign(page)) {
++		/*
++		 * Move to a new receive buffer if:
++		 *
++		 * simple case: we have completely filled the current buffer.
++		 *
++		 * complex case: the current frag would overflow
++		 * the current buffer but only if:
++		 *     (i)   this frag would fit completely in the next buffer
++		 * and (ii)  there is already some data in the current buffer
++		 * and (iii) this is not the head buffer.
++		 *
++		 * Where:
++		 * - (i) stops us splitting a frag into two copies
++		 *   unless the frag is too large for a single buffer.
++		 * - (ii) stops us from leaving a buffer pointlessly empty.
++		 * - (iii) stops us leaving the first buffer
++		 *   empty. Strictly speaking this is already covered
++		 *   by (ii) but is explicitly checked because
++		 *   netfront relies on the first buffer being
++		 *   non-empty and can crash otherwise.
++		 *
++		 * This means we will effectively linearise small
++		 * frags but do not needlessly split large buffers
++		 * into multiple copies tend to give large frags their
++		 * own buffers as before.
++		 */
++		if (npo->copy_off == MAX_BUFFER_OFFSET
++		    || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) {
++			struct xen_netif_rx_request *req;
++
++			BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
++			/* Overflowed this request, go to the next one */
++			req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++			meta = npo->meta + npo->meta_prod++;
++			meta->size = 0;
++			meta->id = req->id;
++			npo->copy_off = 0;
++			npo->copy_gref = req->gref;
++		}
++
++		bytes = size;
++		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
++			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
++
++		copy_gop = npo->copy + npo->copy_prod++;
++		copy_gop->flags = GNTCOPY_dest_gref;
++		if (PageForeign(page)) {
+ 		struct xen_netbk *netbk = &xen_netbk[group];
+ 		struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
+ 		copy_gop->source.domid = src_pend->netif->domid;
+ 		copy_gop->source.u.ref = src_pend->req.gref;
+-		copy_gop->flags |= GNTCOPY_source_gref;
+-	} else {
+-		copy_gop->source.domid = DOMID_SELF;
+-		copy_gop->source.u.gmfn = old_mfn;
+-	}
+-	copy_gop->source.offset = offset;
+-	copy_gop->dest.domid = netif->domid;
+-	copy_gop->dest.offset = 0;
+-	copy_gop->dest.u.ref = req->gref;
+-	copy_gop->len = size;
++			copy_gop->flags |= GNTCOPY_source_gref;
++		} else {
++			copy_gop->source.domid = DOMID_SELF;
++			copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
++		}
++		copy_gop->source.offset = offset;
++		copy_gop->dest.domid = netif->domid;
+ 
+-	return req->id;
++		copy_gop->dest.offset = npo->copy_off;
++		copy_gop->dest.u.ref = npo->copy_gref;
++		copy_gop->len = bytes;
++
++		npo->copy_off += bytes;
++		meta->size += bytes;
++
++		offset += bytes;
++		size -= bytes;
++		head = 0; /* Must be something in this buffer now */
++	}
+ }
+ 
+-static void netbk_gop_skb(struct sk_buff *skb,
+-			  struct netrx_pending_operations *npo)
++/* Prepare an SKB to be transmitted to the frontend.  This is
++   responsible for allocating grant operations, meta structures, etc.
++   It returns the number of meta structures consumed.  The number of
++   ring slots used is always equal to the number of meta slots used
++   plus the number of GSO descriptors used.  Currently, we use either
++   zero GSO descriptors (for non-GSO packets) or one descriptor (for
++   frontend-side LRO). */
++static int netbk_gop_skb(struct sk_buff *skb,
++			 struct netrx_pending_operations *npo)
+ {
+ 	struct xen_netif *netif = netdev_priv(skb->dev);
+ 	int nr_frags = skb_shinfo(skb)->nr_frags;
+ 	int i;
+-	int extra;
+-	struct netbk_rx_meta *head_meta, *meta;
++	struct xen_netif_rx_request *req;
++	struct netbk_rx_meta *meta;
++	int old_meta_prod;
++
++	old_meta_prod = npo->meta_prod;
+ 
+-	head_meta = npo->meta + npo->meta_prod++;
+-	head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
+-	head_meta->frag.size = skb_shinfo(skb)->gso_size;
+-	extra = !!head_meta->frag.size + 1;
++	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++	meta = npo->meta + npo->meta_prod++;
++	meta->gso_size = skb_shinfo(skb)->gso_size;
++	meta->size = 0;
++	meta->id = req->id;
++	npo->copy_off = 0;
++	npo->copy_gref = req->gref;
++
++	netbk_gop_frag_copy(netif,
++			    npo, virt_to_page(skb->data),
++			    skb_headlen(skb),
++			    offset_in_page(skb->data), 1);
++
++	/* Leave a gap for the GSO descriptor. */
++	if (skb_shinfo(skb)->gso_size)
++		netif->rx.req_cons++;
+ 
+ 	for (i = 0; i < nr_frags; i++) {
+-		meta = npo->meta + npo->meta_prod++;
+-		meta->frag = skb_shinfo(skb)->frags[i];
+-		meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
+-					  meta->frag.page,
+-					  meta->frag.size,
+-					  meta->frag.page_offset);
++		netbk_gop_frag_copy(netif, npo,
++				    skb_shinfo(skb)->frags[i].page,
++				    skb_shinfo(skb)->frags[i].size,
++				    skb_shinfo(skb)->frags[i].page_offset,
++				    0);
+ 	}
+ 
+-	/*
+-	 * This must occur at the end to ensure that we don't trash skb_shinfo
+-	 * until we're done. We know that the head doesn't cross a page
+-	 * boundary because such packets get copied in netif_be_start_xmit.
+-	 */
+-	head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
+-				       virt_to_page(skb->data),
+-				       skb_headlen(skb),
+-				       offset_in_page(skb->data));
+-
+-	netif->rx.req_cons += nr_frags + extra;
++	return npo->meta_prod - old_meta_prod;
+ }
+ 
+ /* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
+    used to set up the operations on the top of
+    netrx_pending_operations, which have since been done.  Check that
+    they didn't give any errors and advance over them. */
+-static int netbk_check_gop(int nr_frags, domid_t domid,
++static int netbk_check_gop(int nr_meta_slots, domid_t domid,
+ 			   struct netrx_pending_operations *npo)
+ {
+ 	struct gnttab_copy     *copy_op;
+ 	int status = NETIF_RSP_OKAY;
+ 	int i;
+ 
+-	for (i = 0; i <= nr_frags; i++) {
+-			copy_op = npo->copy + npo->copy_cons++;
+-			if (copy_op->status != GNTST_okay) {
++	for (i = 0; i < nr_meta_slots; i++) {
++		copy_op = npo->copy + npo->copy_cons++;
++		if (copy_op->status != GNTST_okay) {
+ 				DPRINTK("Bad status %d from copy to DOM%d.\n",
+ 					copy_op->status, domid);
+ 				status = NETIF_RSP_ERROR;
+@@ -435,27 +547,35 @@ static int netbk_check_gop(int nr_frags, domid_t domid,
+ }
+ 
+ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+-				     struct netbk_rx_meta *meta, int nr_frags)
++				     struct netbk_rx_meta *meta,
++				     int nr_meta_slots)
+ {
+ 	int i;
+ 	unsigned long offset;
+ 
+-	for (i = 0; i < nr_frags; i++) {
+-		int id = meta[i].id;
+-		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
+-		
++	for (i = 0; i < nr_meta_slots; i++) {
++		int flags;
++		if (i == nr_meta_slots - 1)
++			flags = 0;
++		else
++			flags = NETRXF_more_data;
++
+ 		offset = 0;
+-		make_rx_response(netif, id, status, offset,
+-				 meta[i].frag.size, flags);
++		make_rx_response(netif, meta[i].id, status, offset,
++				 meta[i].size, flags);
+ 	}
+ }
+ 
++struct skb_cb_overlay {
++	int meta_slots_used;
++};
++
+ static void net_rx_action(unsigned long data)
+ {
+ 	struct xen_netif *netif = NULL;
+ 	struct xen_netbk *netbk = (struct xen_netbk *)data;
+ 	s8 status;
+-	u16 id, irq, flags;
++	u16 irq, flags;
+ 	struct xen_netif_rx_response *resp;
+ 	struct multicall_entry *mcl;
+ 	struct sk_buff_head rxq;
+@@ -465,6 +585,7 @@ static void net_rx_action(unsigned long data)
+ 	int nr_frags;
+ 	int count;
+ 	unsigned long offset;
++	struct skb_cb_overlay *sco;
+ 
+ 	struct netrx_pending_operations npo = {
+ 		.mmu   = netbk->rx_mmu,
+@@ -479,10 +600,11 @@ static void net_rx_action(unsigned long data)
+ 	count = 0;
+ 
+ 	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
++		netif = netdev_priv(skb->dev);
+ 		nr_frags = skb_shinfo(skb)->nr_frags;
+-		*(int *)skb->cb = nr_frags;
+ 
+-		netbk_gop_skb(skb, &npo);
++		sco = (struct skb_cb_overlay *)skb->cb;
++		sco->meta_slots_used = netbk_gop_skb(skb, &npo);
+ 
+ 		count += nr_frags + 1;
+ 
+@@ -541,18 +663,20 @@ static void net_rx_action(unsigned long data)
+ 	BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
+ 
+ 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
+-		nr_frags = *(int *)skb->cb;
++		sco = (struct skb_cb_overlay *)skb->cb;
+ 
+ 		netif = netdev_priv(skb->dev);
+ 
+ 		netif->stats.tx_bytes += skb->len;
+ 		netif->stats.tx_packets++;
+ 
+-		status = netbk_check_gop(nr_frags, netif->domid, &npo);
+-
+-		id = netbk->meta[npo.meta_cons].id;
+-		flags = nr_frags ? NETRXF_more_data : 0;
++		status = netbk_check_gop(sco->meta_slots_used,
++					 netif->domid, &npo);
+ 
++		if (sco->meta_slots_used == 1)
++			flags = 0;
++		else
++			flags = NETRXF_more_data;
+ 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+ 			flags |= NETRXF_csum_blank | NETRXF_data_validated;
+ 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+@@ -560,10 +684,12 @@ static void net_rx_action(unsigned long data)
+ 			flags |= NETRXF_data_validated;
+ 
+ 		offset = 0;
+-		resp = make_rx_response(netif, id, status, offset,
+-					skb_headlen(skb), flags);
++		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
++					status, offset,
++					netbk->meta[npo.meta_cons].size,
++					flags);
+ 
+-		if (netbk->meta[npo.meta_cons].frag.size) {
++		if (netbk->meta[npo.meta_cons].gso_size) {
+ 			struct xen_netif_extra_info *gso =
+ 				(struct xen_netif_extra_info *)
+ 				RING_GET_RESPONSE(&netif->rx,
+@@ -571,7 +697,7 @@ static void net_rx_action(unsigned long data)
+ 
+ 			resp->flags |= NETRXF_extra_info;
+ 
+-			gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size;
++			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
+ 			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+ 			gso->u.gso.pad = 0;
+ 			gso->u.gso.features = 0;
+@@ -580,9 +706,11 @@ static void net_rx_action(unsigned long data)
+ 			gso->flags = 0;
+ 		}
+ 
+-		netbk_add_frag_responses(netif, status,
+-				netbk->meta + npo.meta_cons + 1,
+-				nr_frags);
++		if (sco->meta_slots_used > 1) {
++			netbk_add_frag_responses(netif, status,
++						 netbk->meta + npo.meta_cons + 1,
++						 sco->meta_slots_used - 1);
++		}
+ 
+ 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+ 		irq = netif->irq;
+@@ -597,8 +725,8 @@ static void net_rx_action(unsigned long data)
+ 			netif_wake_queue(netif->dev);
+ 
+ 		netif_put(netif);
++		npo.meta_cons += sco->meta_slots_used;
+ 		dev_kfree_skb(skb);
+-		npo.meta_cons += nr_frags + 1;
+ 	}
+ 
+ 	while (notify_nr != 0) {
+-- 
+1.7.3.4
+
+
+From 96069b28e612232fb739ef48d9c2c5178b19f562 Mon Sep 17 00:00:00 2001
+From: Dongxiao Xu <dongxiao.xu at intel.com>
+Date: Thu, 10 Jun 2010 19:03:15 +0800
+Subject: [PATCH 096/139] xen: netback: Set allocated memory to zero from vmalloc.
+
+This should fix the windows/linux pv driver issue.
+
+Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |    1 +
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index d53d88e..c7024d4 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1724,6 +1724,7 @@ static int __init netback_init(void)
+ 		printk(KERN_ALERT "%s: out of memory\n", __func__);
+ 		return -ENOMEM;
+ 	}
++	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
+ 
+ 	/* We can increase reservation by this much in net_rx_action(). */
+ //	balloon_update_driver_allowance(NET_RX_RING_SIZE);
+-- 
+1.7.3.4
+
+
+From 109a748d1c11b7eeaaacedb08c48bc65640b0bb8 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 14 Jun 2010 13:23:33 +0100
+Subject: [PATCH 097/139] xen: netback: minor code formatting fixup
+
+Don't include redundant casts from allocation.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |    3 +--
+ 1 files changed, 1 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index c7024d4..58e920a 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1718,8 +1718,7 @@ static int __init netback_init(void)
+ 		return -ENODEV;
+ 
+ 	xen_netbk_group_nr = num_online_cpus();
+-	xen_netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk) *
+-					    xen_netbk_group_nr);
++	xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
+ 	if (!xen_netbk) {
+ 		printk(KERN_ALERT "%s: out of memory\n", __func__);
+ 		return -ENOMEM;
+-- 
+1.7.3.4
+
+
+From 2424b59d68ee6ccdb7e52ab68bdba3a8b742513d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 30 Jun 2010 10:12:49 +0100
+Subject: [PATCH 098/139] xen: netback: drop more relics of flipping mode
+
+The mmu_update and gnttab_transfer arrays were only used by flipping
+mode. With those gone the multicall now consists of a single call to
+GNTTABOP_copy so drop the multicall as well and just make the one
+hypercall.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+Cc: Paul Durrant <paul.durrant at citrix.com>
+---
+ drivers/xen/netback/common.h  |    3 --
+ drivers/xen/netback/netback.c |   55 +++--------------------------------------
+ 2 files changed, 4 insertions(+), 54 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 9c0c048..08e7a0e 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -304,9 +304,6 @@ struct xen_netbk {
+ 	u16 pending_ring[MAX_PENDING_REQS];
+ 	u16 dealloc_ring[MAX_PENDING_REQS];
+ 
+-	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
+-	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
+-	struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
+ 	/*
+ 	 * Each head or fragment can be up to 4096 bytes. Given
+ 	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 58e920a..ca65840 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -368,15 +368,9 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ }
+ 
+ struct netrx_pending_operations {
+-	unsigned trans_prod, trans_cons;
+-	unsigned mmu_prod, mmu_mcl;
+-	unsigned mcl_prod, mcl_cons;
+ 	unsigned copy_prod, copy_cons;
+ 	unsigned meta_prod, meta_cons;
+-	struct mmu_update *mmu;
+-	struct gnttab_transfer *trans;
+ 	struct gnttab_copy *copy;
+-	struct multicall_entry *mcl;
+ 	struct netbk_rx_meta *meta;
+ 	int copy_off;
+ 	grant_ref_t copy_gref;
+@@ -577,7 +571,6 @@ static void net_rx_action(unsigned long data)
+ 	s8 status;
+ 	u16 irq, flags;
+ 	struct xen_netif_rx_response *resp;
+-	struct multicall_entry *mcl;
+ 	struct sk_buff_head rxq;
+ 	struct sk_buff *skb;
+ 	int notify_nr = 0;
+@@ -588,10 +581,7 @@ static void net_rx_action(unsigned long data)
+ 	struct skb_cb_overlay *sco;
+ 
+ 	struct netrx_pending_operations npo = {
+-		.mmu   = netbk->rx_mmu,
+-		.trans = netbk->grant_trans_op,
+ 		.copy  = netbk->grant_copy_op,
+-		.mcl   = netbk->rx_mcl,
+ 		.meta  = netbk->meta,
+ 	};
+ 
+@@ -617,50 +607,13 @@ static void net_rx_action(unsigned long data)
+ 
+ 	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
+ 
+-	npo.mmu_mcl = npo.mcl_prod;
+-	if (npo.mcl_prod) {
+-		BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
+-		BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu));
+-		mcl = npo.mcl + npo.mcl_prod++;
+-
+-		BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
+-		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+-
+-		mcl->op = __HYPERVISOR_mmu_update;
+-		mcl->args[0] = (unsigned long)netbk->rx_mmu;
+-		mcl->args[1] = npo.mmu_prod;
+-		mcl->args[2] = 0;
+-		mcl->args[3] = DOMID_SELF;
+-	}
+-
+-	if (npo.trans_prod) {
+-		BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op));
+-		mcl = npo.mcl + npo.mcl_prod++;
+-		mcl->op = __HYPERVISOR_grant_table_op;
+-		mcl->args[0] = GNTTABOP_transfer;
+-		mcl->args[1] = (unsigned long)netbk->grant_trans_op;
+-		mcl->args[2] = npo.trans_prod;
+-	}
+-
+-	if (npo.copy_prod) {
+-		BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
+-		mcl = npo.mcl + npo.mcl_prod++;
+-		mcl->op = __HYPERVISOR_grant_table_op;
+-		mcl->args[0] = GNTTABOP_copy;
+-		mcl->args[1] = (unsigned long)netbk->grant_copy_op;
+-		mcl->args[2] = npo.copy_prod;
+-	}
+-
+-	/* Nothing to do? */
+-	if (!npo.mcl_prod)
++	if (!npo.copy_prod)
+ 		return;
+ 
+-	BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl));
+-
+-	ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
++	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
++	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
++					npo.copy_prod);
+ 	BUG_ON(ret != 0);
+-	/* The mmu_machphys_update() must not fail. */
+-	BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
+ 
+ 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
+ 		sco = (struct skb_cb_overlay *)skb->cb;
+-- 
+1.7.3.4
+
+
+From 673a19d9e2d78939c6dc9c49e7e35ee54b54c8c7 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Fri, 2 Jul 2010 10:28:11 +0100
+Subject: [PATCH 099/139] xen: netback: Fix basic indentation issue
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |   11 +++++++----
+ 1 files changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index ca65840..848503e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -445,10 +445,13 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ 		copy_gop = npo->copy + npo->copy_prod++;
+ 		copy_gop->flags = GNTCOPY_dest_gref;
+ 		if (PageForeign(page)) {
+-		struct xen_netbk *netbk = &xen_netbk[group];
+-		struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
+-		copy_gop->source.domid = src_pend->netif->domid;
+-		copy_gop->source.u.ref = src_pend->req.gref;
++			struct xen_netbk *netbk = &xen_netbk[group];
++			struct pending_tx_info *src_pend;
++
++			src_pend = &netbk->pending_tx_info[idx];
++
++			copy_gop->source.domid = src_pend->netif->domid;
++			copy_gop->source.u.ref = src_pend->req.gref;
+ 			copy_gop->flags |= GNTCOPY_source_gref;
+ 		} else {
+ 			copy_gop->source.domid = DOMID_SELF;
+-- 
+1.7.3.4
+
+
+From d08b2d1f2ff4723b335d0fb5b91ffd6cb6a005d3 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Mon, 5 Jul 2010 11:45:29 +0100
+Subject: [PATCH 100/139] xen: netback: Add a new style of passing GSO packets to frontends.
+
+feature-gso-tcpv4-prefix uses precedes the packet data passed to
+the frontend with a ring entry that contains the necessary
+metadata. This style of GSO passing is required for Citrix
+Windows PV Drivers.
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Cc: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h     |    3 ++-
+ drivers/xen/netback/netback.c    |   37 ++++++++++++++++++++++++++++++++++---
+ drivers/xen/netback/xenbus.c     |   15 ++++++++++++---
+ include/xen/interface/io/netif.h |    4 ++++
+ 4 files changed, 52 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 08e7a0e..78451ab 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -80,7 +80,8 @@ struct xen_netif {
+ 	int features;
+ 
+ 	/* Internal feature information. */
+-	u8 can_queue:1;	/* can queue packets for receiver? */
++	u8 can_queue:1;	    /* can queue packets for receiver? */
++	u8 gso_prefix:1;    /* use a prefix segment for GSO information */
+ 
+ 	/* Allow netif_be_start_xmit() to peek ahead in the rx request
+ 	 * ring.  This is a prediction of what rx_req_cons will be once
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 848503e..e93a62e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -432,6 +432,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ 			/* Overflowed this request, go to the next one */
+ 			req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+ 			meta = npo->meta + npo->meta_prod++;
++			meta->gso_size = 0;
+ 			meta->size = 0;
+ 			meta->id = req->id;
+ 			npo->copy_off = 0;
+@@ -492,9 +493,23 @@ static int netbk_gop_skb(struct sk_buff *skb,
+ 
+ 	old_meta_prod = npo->meta_prod;
+ 
++	/* Set up a GSO prefix descriptor, if necessary */
++	if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
++		req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++		meta = npo->meta + npo->meta_prod++;
++		meta->gso_size = skb_shinfo(skb)->gso_size;
++		meta->size = 0;
++		meta->id = req->id;
++	}
++
+ 	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+ 	meta = npo->meta + npo->meta_prod++;
+-	meta->gso_size = skb_shinfo(skb)->gso_size;
++
++	if (!netif->gso_prefix)
++		meta->gso_size = skb_shinfo(skb)->gso_size;
++	else
++		meta->gso_size = 0;
++
+ 	meta->size = 0;
+ 	meta->id = req->id;
+ 	npo->copy_off = 0;
+@@ -506,7 +521,7 @@ static int netbk_gop_skb(struct sk_buff *skb,
+ 			    offset_in_page(skb->data), 1);
+ 
+ 	/* Leave a gap for the GSO descriptor. */
+-	if (skb_shinfo(skb)->gso_size)
++	if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
+ 		netif->rx.req_cons++;
+ 
+ 	for (i = 0; i < nr_frags; i++) {
+@@ -623,6 +638,21 @@ static void net_rx_action(unsigned long data)
+ 
+ 		netif = netdev_priv(skb->dev);
+ 
++		if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
++			resp = RING_GET_RESPONSE(&netif->rx,
++						netif->rx.rsp_prod_pvt++);
++
++			resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
++
++			resp->offset = netbk->meta[npo.meta_cons].gso_size;
++			resp->id = netbk->meta[npo.meta_cons].id;
++			resp->status = sco->meta_slots_used;
++
++			npo.meta_cons++;
++			sco->meta_slots_used--;
++		}
++
++
+ 		netif->stats.tx_bytes += skb->len;
+ 		netif->stats.tx_packets++;
+ 
+@@ -633,6 +663,7 @@ static void net_rx_action(unsigned long data)
+ 			flags = 0;
+ 		else
+ 			flags = NETRXF_more_data;
++
+ 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+ 			flags |= NETRXF_csum_blank | NETRXF_data_validated;
+ 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+@@ -645,7 +676,7 @@ static void net_rx_action(unsigned long data)
+ 					netbk->meta[npo.meta_cons].size,
+ 					flags);
+ 
+-		if (netbk->meta[npo.meta_cons].gso_size) {
++		if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
+ 			struct xen_netif_extra_info *gso =
+ 				(struct xen_netif_extra_info *)
+ 				RING_GET_RESPONSE(&netif->rx,
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index e30b0c7..cda987f 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -457,16 +457,25 @@ static int connect_rings(struct backend_info *be)
+ 			be->netif->dev->mtu = ETH_DATA_LEN;
+ 	}
+ 
+-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
+-			 &val) < 0)
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
++			"%d", &val) < 0)
+ 		val = 0;
+ 	if (val) {
+ 		be->netif->features |= NETIF_F_TSO;
+ 		be->netif->dev->features |= NETIF_F_TSO;
+ 	}
+ 
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
++			"%d", &val) < 0)
++		val = 0;
++	if (val) {
++		be->netif->features |= NETIF_F_TSO;
++		be->netif->dev->features |= NETIF_F_TSO;
++		be->netif->gso_prefix = 1;
++	}
++
+ 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
+-			 "%d", &val) < 0)
++			"%d", &val) < 0)
+ 		val = 0;
+ 	if (val) {
+ 		be->netif->features &= ~NETIF_F_IP_CSUM;
+diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
+index 518481c..8309344 100644
+--- a/include/xen/interface/io/netif.h
++++ b/include/xen/interface/io/netif.h
+@@ -131,6 +131,10 @@ struct xen_netif_rx_request {
+ #define _NETRXF_extra_info     (3)
+ #define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
+ 
++/* GSO Prefix descriptor. */
++#define _NETRXF_gso_prefix     (4)
++#define  NETRXF_gso_prefix     (1U<<_NETRXF_gso_prefix)
++
+ struct xen_netif_rx_response {
+     uint16_t id;
+     uint16_t offset;       /* Offset in page of start of received packet  */
+-- 
+1.7.3.4
+
+
+From bd910979612331d60a629c16a49ebeb5efa0f035 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Fri, 2 Jul 2010 10:28:13 +0100
+Subject: [PATCH 101/139] xen: netback: Make frontend features distinct from netback feature flags.
+
+Make sure that if a feature flag is disabled by ethtool on netback
+that we do not gratuitously re-enabled it when we check the frontend
+features during ring connection.
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Cc: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h    |   14 ++++++--
+ drivers/xen/netback/interface.c |   68 ++++++++++++++++++++++++++++++--------
+ drivers/xen/netback/netback.c   |    2 +-
+ drivers/xen/netback/xenbus.c    |   44 ++++++++++---------------
+ 4 files changed, 81 insertions(+), 47 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 78451ab..a5f3759 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -76,12 +76,17 @@ struct xen_netif {
+ 	struct vm_struct *tx_comms_area;
+ 	struct vm_struct *rx_comms_area;
+ 
+-	/* Set of features that can be turned on in dev->features. */
+-	int features;
++	/* Flags that must not be set in dev->features */
++	int features_disabled;
++
++	/* Frontend feature information. */
++	u8 can_sg:1;
++	u8 gso:1;
++	u8 gso_prefix:1;
++	u8 csum:1;
+ 
+ 	/* Internal feature information. */
+ 	u8 can_queue:1;	    /* can queue packets for receiver? */
+-	u8 gso_prefix:1;    /* use a prefix segment for GSO information */
+ 
+ 	/* Allow netif_be_start_xmit() to peek ahead in the rx request
+ 	 * ring.  This is a prediction of what rx_req_cons will be once
+@@ -187,6 +192,7 @@ void netif_accel_init(void);
+ 
+ void netif_disconnect(struct xen_netif *netif);
+ 
++void netif_set_features(struct xen_netif *netif);
+ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
+ int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ 	      unsigned long rx_ring_ref, unsigned int evtchn);
+@@ -223,7 +229,7 @@ static inline int netbk_can_queue(struct net_device *dev)
+ static inline int netbk_can_sg(struct net_device *dev)
+ {
+ 	struct xen_netif *netif = netdev_priv(dev);
+-	return netif->features & NETIF_F_SG;
++	return netif->can_sg;
+ }
+ 
+ struct pending_tx_info {
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 172ef4c..2e8508a 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -121,31 +121,69 @@ static int netbk_change_mtu(struct net_device *dev, int mtu)
+ 	return 0;
+ }
+ 
+-static int netbk_set_sg(struct net_device *dev, u32 data)
++void netif_set_features(struct xen_netif *netif)
+ {
+-	if (data) {
+-		struct xen_netif *netif = netdev_priv(dev);
++	struct net_device *dev = netif->dev;
++	int features = dev->features;
++
++	if (netif->can_sg)
++		features |= NETIF_F_SG;
++	if (netif->gso || netif->gso_prefix)
++		features |= NETIF_F_TSO;
++	if (netif->csum)
++		features |= NETIF_F_IP_CSUM;
++
++	features &= ~(netif->features_disabled);
+ 
+-		if (!(netif->features & NETIF_F_SG))
++	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
++		dev->mtu = ETH_DATA_LEN;
++
++	dev->features = features;
++}
++
++static int netbk_set_tx_csum(struct net_device *dev, u32 data)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	if (data) {
++		if (!netif->csum)
+ 			return -ENOSYS;
++		netif->features_disabled &= ~NETIF_F_IP_CSUM;
++	} else {
++		netif->features_disabled |= NETIF_F_IP_CSUM;
+ 	}
+ 
+-	if (dev->mtu > ETH_DATA_LEN)
+-		dev->mtu = ETH_DATA_LEN;
++	netif_set_features(netif);
++	return 0;
++}
+ 
+-	return ethtool_op_set_sg(dev, data);
++static int netbk_set_sg(struct net_device *dev, u32 data)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	if (data) {
++		if (!netif->can_sg)
++			return -ENOSYS;
++		netif->features_disabled &= ~NETIF_F_SG;
++	} else {
++		netif->features_disabled |= NETIF_F_SG;
++	}
++
++	netif_set_features(netif);
++	return 0;
+ }
+ 
+ static int netbk_set_tso(struct net_device *dev, u32 data)
+ {
++	struct xen_netif *netif = netdev_priv(dev);
+ 	if (data) {
+-		struct xen_netif *netif = netdev_priv(dev);
+-
+-		if (!(netif->features & NETIF_F_TSO))
++		if (!netif->gso && !netif->gso_prefix)
+ 			return -ENOSYS;
++		netif->features_disabled &= ~NETIF_F_TSO;
++	} else {
++		netif->features_disabled |= NETIF_F_TSO;
+ 	}
+ 
+-	return ethtool_op_set_tso(dev, data);
++	netif_set_features(netif);
++	return 0;
+ }
+ 
+ static void netbk_get_drvinfo(struct net_device *dev,
+@@ -200,7 +238,7 @@ static struct ethtool_ops network_ethtool_ops =
+ 	.get_drvinfo = netbk_get_drvinfo,
+ 
+ 	.get_tx_csum = ethtool_op_get_tx_csum,
+-	.set_tx_csum = ethtool_op_set_tx_csum,
++	.set_tx_csum = netbk_set_tx_csum,
+ 	.get_sg = ethtool_op_get_sg,
+ 	.set_sg = netbk_set_sg,
+ 	.get_tso = ethtool_op_get_tso,
+@@ -242,7 +280,8 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ 	netif->domid  = domid;
+ 	netif->group  = -1;
+ 	netif->handle = handle;
+-	netif->features = NETIF_F_SG;
++	netif->can_sg = 1;
++	netif->csum = 1;
+ 	atomic_set(&netif->refcnt, 1);
+ 	init_waitqueue_head(&netif->waiting_to_free);
+ 	netif->dev = dev;
+@@ -259,8 +298,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ 	init_timer(&netif->tx_queue_timeout);
+ 
+ 	dev->netdev_ops	= &netback_ops;
+-	dev->features   = NETIF_F_IP_CSUM|NETIF_F_SG;
+-
++	netif_set_features(netif);
+ 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+ 
+ 	dev->tx_queue_len = netbk_queue_length;
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index e93a62e..63a771e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -238,7 +238,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+ 
+ static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+ {
+-	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
++	if (netif->can_sg || netif->gso || netif->gso_prefix)
+ 		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
+ 	return 1; /* all in one */
+ }
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index cda987f..17ff5cf 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -404,6 +404,7 @@ static void connect(struct backend_info *be)
+ 
+ static int connect_rings(struct backend_info *be)
+ {
++	struct xen_netif *netif = be->netif;
+ 	struct xenbus_device *dev = be->dev;
+ 	unsigned long tx_ring_ref, rx_ring_ref;
+ 	unsigned int evtchn, rx_copy;
+@@ -437,53 +438,42 @@ static int connect_rings(struct backend_info *be)
+ 	if (!rx_copy)
+ 		return -EOPNOTSUPP;
+ 
+-	if (be->netif->dev->tx_queue_len != 0) {
++	if (netif->dev->tx_queue_len != 0) {
+ 		if (xenbus_scanf(XBT_NIL, dev->otherend,
+ 				 "feature-rx-notify", "%d", &val) < 0)
+ 			val = 0;
+ 		if (val)
+-			be->netif->can_queue = 1;
++			netif->can_queue = 1;
+ 		else
+ 			/* Must be non-zero for pfifo_fast to work. */
+-			be->netif->dev->tx_queue_len = 1;
++			netif->dev->tx_queue_len = 1;
+ 	}
+ 
+-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
++			 "%d", &val) < 0)
+ 		val = 0;
+-	if (!val) {
+-		be->netif->features &= ~NETIF_F_SG;
+-		be->netif->dev->features &= ~NETIF_F_SG;
+-		if (be->netif->dev->mtu > ETH_DATA_LEN)
+-			be->netif->dev->mtu = ETH_DATA_LEN;
+-	}
++	netif->can_sg = !!val;
+ 
+ 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
+-			"%d", &val) < 0)
++			 "%d", &val) < 0)
+ 		val = 0;
+-	if (val) {
+-		be->netif->features |= NETIF_F_TSO;
+-		be->netif->dev->features |= NETIF_F_TSO;
+-	}
++	netif->gso = !!val;
+ 
+ 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
+-			"%d", &val) < 0)
++			 "%d", &val) < 0)
+ 		val = 0;
+-	if (val) {
+-		be->netif->features |= NETIF_F_TSO;
+-		be->netif->dev->features |= NETIF_F_TSO;
+-		be->netif->gso_prefix = 1;
+-	}
++	netif->gso_prefix = !!val;
+ 
+ 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
+-			"%d", &val) < 0)
++			 "%d", &val) < 0)
+ 		val = 0;
+-	if (val) {
+-		be->netif->features &= ~NETIF_F_IP_CSUM;
+-		be->netif->dev->features &= ~NETIF_F_IP_CSUM;
+-	}
++	netif->csum = !val;
++
++	/* Set dev->features */
++	netif_set_features(netif);
+ 
+ 	/* Map the shared frame, irq etc. */
+-	err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
++	err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
+ 	if (err) {
+ 		xenbus_dev_fatal(dev, err,
+ 				 "mapping shared-frames %lu/%lu port %u",
+-- 
+1.7.3.4
+
+
+From cf8c20169427de5829e3ec723712b77de52e64ac Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 15 Jul 2010 10:46:50 -0700
+Subject: [PATCH 102/139] xen: netback: only initialize for PV domains
+
+HVM domains don't support netback
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 63a771e..911c85b 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1701,7 +1701,7 @@ static int __init netback_init(void)
+ 	int rc = 0;
+ 	int group;
+ 
+-	if (!xen_domain())
++	if (!xen_pv_domain())
+ 		return -ENODEV;
+ 
+ 	xen_netbk_group_nr = num_online_cpus();
+-- 
+1.7.3.4
+
+
+From 00a5c7eddb919701ac998b33bf4f283efaa06bbc Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 21 Jul 2010 13:24:26 -0700
+Subject: [PATCH 103/139] xen/rings: make protocol specific usage of shared sring explicit
+
+I don't think protocol specific data't really belongs in this header
+but since it is already there and we seem to be stuck with it lets at
+least make the users explicit lest people get caught out by future new
+fields moving the pad field around.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+(cherry picked from commit df0afd34ec3015e44b8121d0e542d32fb04d438d)
+---
+ include/xen/interface/io/ring.h |    8 +++++++-
+ 1 files changed, 7 insertions(+), 1 deletions(-)
+
+diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
+index e8cbf43..e2d62cf 100644
+--- a/include/xen/interface/io/ring.h
++++ b/include/xen/interface/io/ring.h
+@@ -73,7 +73,13 @@ union __name##_sring_entry {						\
+ struct __name##_sring {							\
+     RING_IDX req_prod, req_event;					\
+     RING_IDX rsp_prod, rsp_event;					\
+-    uint8_t  pad[48];							\
++    union {								\
++        struct {							\
++            uint8_t msg;						\
++        } tapif_user;							\
++        uint8_t pvt_pad[4];						\
++    } private;								\
++    uint8_t pad[44];							\
+     union __name##_sring_entry ring[1]; /* variable-length */		\
+ };									\
+ 									\
+-- 
+1.7.3.4
+
+
+From 1866aec6117132b4399f9e956994af259ad5cfdb Mon Sep 17 00:00:00 2001
+From: Bastian Blank <waldi at debian.org>
+Date: Thu, 29 Jul 2010 17:30:18 +0200
+Subject: [PATCH 104/139] xen: netback: Fix null-pointer access in netback_uevent
+
+The uevent method of Xen netback does not check if the the network
+device is already setup and tries to dereference a null-pointer if not.
+
+Signed-off-by: Bastian Blank <waldi at debian.org>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/xenbus.c |   10 ++--------
+ 1 files changed, 2 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index 17ff5cf..1fec65a 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -154,17 +154,11 @@ fail:
+  */
+ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
+ {
+-	struct backend_info *be;
+-	struct xen_netif *netif;
++	struct backend_info *be = dev_get_drvdata(&xdev->dev);
+ 	char *val;
+ 
+ 	DPRINTK("netback_uevent");
+ 
+-	be = dev_get_drvdata(&xdev->dev);
+-	if (!be)
+-		return 0;
+-	netif = be->netif;
+-
+ 	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+ 	if (IS_ERR(val)) {
+ 		int err = PTR_ERR(val);
+@@ -179,7 +173,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
+ 		kfree(val);
+ 	}
+ 
+-	if (add_uevent_var(env, "vif=%s", netif->dev->name))
++	if (be && be->netif && add_uevent_var(env, "vif=%s", be->netif->dev->name))
+ 		return -ENOMEM;
+ 
+ 	return 0;
+-- 
+1.7.3.4
+
+
+From 7f1732b25d00393131220a0369caa8a28faf46e1 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 30 Jul 2010 15:16:47 +0100
+Subject: [PATCH 105/139] xen: netback: check if foreign pages are actually netback-created foreign pages.
+
+020ba906 "xen/netback: Multiple tasklets support." changed
+netbk_gop_frag_copy to attempt to lookup a pending_tx_info for any
+foreign page, regardless of whether the page was a netback-foreign
+page.
+
+In the case of non-netback pages this can lead to dereferencing a NULL
+src_pend->netif.
+
+Restore the behaviour of netif_page_index prior toa3031942
+"xen/netback: Introduce a new struct type page_ext" by performing
+tests to ensure that page is a netback page and extend the same checks
+to netif_page_group.
+
+Actually combine netif_page_{index,group} in to a single function
+since they are always called together and it saves duplicating all the
+checks.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+Cc: Xu, Dongxiao <dongxiao.xu at intel.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |   56 ++++++++++++++++++++++++++++------------
+ 1 files changed, 39 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 911c85b..95df223 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -89,18 +89,37 @@ static inline void netif_set_page_ext(struct page *pg, unsigned int group,
+ 	pg->mapping = ext.mapping;
+ }
+ 
+-static inline unsigned int netif_page_group(const struct page *pg)
++static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsigned int *_idx)
+ {
+ 	union page_ext ext = { .mapping = pg->mapping };
++	struct xen_netbk *netbk;
++	unsigned int group, idx;
+ 
+-	return ext.e.group - 1;
+-}
++	if (!PageForeign(pg))
++		return 0;
+ 
+-static inline unsigned int netif_page_index(const struct page *pg)
+-{
+-	union page_ext ext = { .mapping = pg->mapping };
++	group = ext.e.group - 1;
++
++	if (group < 0 || group >= xen_netbk_group_nr)
++		return 0;
++
++	netbk = &xen_netbk[group];
++
++	if (netbk->mmap_pages == NULL)
++		return 0;
+ 
+-	return ext.e.idx;
++	idx = ext.e.idx;
++
++	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
++		return 0;
++
++	if (netbk->mmap_pages[idx] != pg)
++		return 0;
++
++	*_group = group;
++	*_idx = idx;
++
++	return 1;
+ }
+ 
+ /*
+@@ -386,8 +405,12 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ {
+ 	struct gnttab_copy *copy_gop;
+ 	struct netbk_rx_meta *meta;
+-	int group = netif_page_group(page);
+-	int idx = netif_page_index(page);
++	/*
++	 * These variables a used iff netif_get_page_ext returns true,
++	 * in which case they are guaranteed to be initialized.
++         */
++	unsigned int uninitialized_var(group), uninitialized_var(idx);
++	int foreign = netif_get_page_ext(page, &group, &idx);
+ 	unsigned long bytes;
+ 
+ 	/* Data must not cross a page boundary. */
+@@ -445,7 +468,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ 
+ 		copy_gop = npo->copy + npo->copy_prod++;
+ 		copy_gop->flags = GNTCOPY_dest_gref;
+-		if (PageForeign(page)) {
++		if (foreign) {
+ 			struct xen_netbk *netbk = &xen_netbk[group];
+ 			struct pending_tx_info *src_pend;
+ 
+@@ -1535,14 +1558,13 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+ 
+ static void netif_page_release(struct page *page, unsigned int order)
+ {
+-	int group = netif_page_group(page);
+-	int idx = netif_page_index(page);
+-	struct xen_netbk *netbk = &xen_netbk[group];
++	unsigned int group, idx;
++	int foreign = netif_get_page_ext(page, &group, &idx);
++
++	BUG_ON(!foreign);
+ 	BUG_ON(order);
+-	BUG_ON(group < 0 || group >= xen_netbk_group_nr);
+-	BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
+-	BUG_ON(netbk->mmap_pages[idx] != page);
+-	netif_idx_release(netbk, idx);
++
++	netif_idx_release(&xen_netbk[group], idx);
+ }
+ 
+ irqreturn_t netif_be_int(int irq, void *dev_id)
+-- 
+1.7.3.4
+
+
+From 14a12990d12cd9ee919d5579c1d0c3df74ad66e7 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 30 Jul 2010 15:16:46 +0100
+Subject: [PATCH 106/139] xen: netback: do not unleash netback threads until initialisation is complete
+
+Otherwise netbk_action_thread can reference &netbk->net_schedule_list
+(via tx_work_todo) before it is initialised. Until now it was zeroed
+which is probably safe but not exactly robust.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+Cc: Xu, Dongxiao <dongxiao.xu at intel.com>
+Cc: Paul Durrant <Paul.Durrant at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |    4 +++-
+ 1 files changed, 3 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 95df223..2646383 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1782,7 +1782,6 @@ static int __init netback_init(void)
+ 
+ 			if (!IS_ERR(netbk->kthread.task)) {
+ 				kthread_bind(netbk->kthread.task, group);
+-				wake_up_process(netbk->kthread.task);
+ 			} else {
+ 				printk(KERN_ALERT
+ 					"kthread_run() fails at netback\n");
+@@ -1808,6 +1807,9 @@ static int __init netback_init(void)
+ 		spin_lock_init(&netbk->net_schedule_list_lock);
+ 
+ 		atomic_set(&netbk->netfront_count, 0);
++
++		if (MODPARM_netback_kthread)
++			wake_up_process(netbk->kthread.task);
+ 	}
+ 
+ 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+-- 
+1.7.3.4
+
+
+From 6decec42c631f2e2e268f00ce8841faf38817ca8 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ijc at hellion.org.uk>
+Date: Thu, 2 Sep 2010 14:36:40 +0100
+Subject: [PATCH 108/139] xen: netback: save interrupt state in add_to_net_schedule_list_tail
+
+add_to_net_schedule_list_tail is called from both hard interrupt context
+(add_to_net_schedule_list_tail) and soft interrupt/process context
+(netif_schedule_work) so use the interrupt state saving spinlock
+variants.
+
+Fixes:
+        ------------[ cut here ]------------
+        WARNING: at kernel/lockdep.c:2323 trace_hardirqs_on_caller+0xef/0x1a0()
+        Hardware name: PowerEdge 860
+        Modules linked in: rtc_cmos rtc_core rtc_lib
+        Pid: 16, comm: xenwatch Not tainted 2.6.32.18-x86_32p-xen0-00850-ge6b9b2c #98
+        Call Trace:
+         [<c103951c>] warn_slowpath_common+0x6c/0xc0
+         [<c1039585>] warn_slowpath_null+0x15/0x20
+         [<c105f60f>] trace_hardirqs_on_caller+0xef/0x1a0
+         [<c105f6cb>] trace_hardirqs_on+0xb/0x10
+         [<c136cc72>] _spin_unlock_irq+0x22/0x40
+         [<c11ab9ef>] add_to_net_schedule_list_tail+0x5f/0xb0
+         [<c11aba6b>] netif_be_int+0x2b/0x120
+         [<c106dd8e>] handle_IRQ_event+0x2e/0xe0
+         [<c106f98e>] handle_level_irq+0x6e/0xf0
+         [<c1197cdf>] __xen_evtchn_do_upcall+0x16f/0x190
+         [<c11981b8>] xen_evtchn_do_upcall+0x28/0x40
+         [<c100b487>] xen_do_upcall+0x7/0xc
+         [<c119bcf9>] xs_talkv+0x59/0x1a0
+         [<c119bf6a>] xs_single+0x3a/0x50
+         [<c119c6f9>] xenbus_read+0x39/0x60
+         [<c11adf77>] frontend_changed+0x3e7/0x6a0
+         [<c119d35a>] xenbus_otherend_changed+0x8a/0xa0
+         [<c119d572>] frontend_changed+0x12/0x20
+         [<c119b9dc>] xenwatch_thread+0x7c/0x140
+         [<c104ea74>] kthread+0x74/0x80
+         [<c100b433>] kernel_thread_helper+0x7/0x10
+        ---[ end trace 48d73949a8e0909a ]---
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c |    6 ++++--
+ 1 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 2646383..1d080f6 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -787,17 +787,19 @@ static void remove_from_net_schedule_list(struct xen_netif *netif)
+ 
+ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+ {
++	unsigned long flags;
++
+ 	struct xen_netbk *netbk = &xen_netbk[netif->group];
+ 	if (__on_net_schedule_list(netif))
+ 		return;
+ 
+-	spin_lock_irq(&netbk->net_schedule_list_lock);
++	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
+ 	if (!__on_net_schedule_list(netif) &&
+ 	    likely(netif_schedulable(netif))) {
+ 		list_add_tail(&netif->list, &netbk->net_schedule_list);
+ 		netif_get(netif);
+ 	}
+-	spin_unlock_irq(&netbk->net_schedule_list_lock);
++	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
+ }
+ 
+ void netif_schedule_work(struct xen_netif *netif)
+-- 
+1.7.3.4
+
+
+From 0e667d904c6ab6c44cedef51ef00964f9e0559ba Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 8 Oct 2010 17:11:51 +0100
+Subject: [PATCH 109/139] xen: netback: increase size of rx_meta array.
+
+We can end up needing as many of these as we need grant copy operations so
+increase the array size for the same reason.
+
+Crash observed on XenServer:
+kernel: ------------[ cut here ]------------
+kernel: kernel BUG at drivers/xen/netback/netback.c:834!
+kernel: invalid opcode: 0000 [#1] SMP
+kernel: last sysfs file: /sys/devices/xen-backend/vbd-10-768/statistics/rd_usecs
+kernel: Process netback (pid: 1413, ti=ec8a4000 task=ed0a6b70 task.ti=ec8a4000)
+kernel: Stack: 00000000 00000612 00000001 00000000 00020000 00000000 ecfbe000 00000000
+kernel:        ec8a5f80 ec8a5f98 ec8a5fac 00000000 c0537220 c0539220 00000000 c0534220
+kernel:        cd7afaa0 cd7afaa0 0000000c 00000014 062de396 00000001 00000001 00000014
+kernel: Call Trace:
+kernel:  [<c0285f10>] ? netbk_action_thread+0x0/0x1fe0
+kernel:  [<c013daf2>] ? kthread+0x42/0x70
+kernel:  [<c013dab0>] ? kthread+0x0/0x70
+kernel:  [<c010569b>] ? kernel_thread_helper+0x7/0x10
+kernel:  =======================
+kernel: Code: 00 00 c7 42 08 20 82 53 c0 8b 85 e4 fe ff ff c7 42 10 00 00 00 00 \
+              c7 42 14 f0 7f 00 00 89 42 0c 8b 8d ec fe ff ff e9 3e e9 ff ff <0f> \
+              0b eb fe 0f 0b eb fe 0f 0b eb fe 0f 0b eb fe 31 c0 e8 bf 31
+kernel: EIP: [<c028790a>] netbk_action_thread+0x19fa/0x1fe0 SS:ESP 0069:ec8a5d98
+
+Corresponding to
+	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index a5f3759..ce0041a 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -319,7 +319,7 @@ struct xen_netbk {
+ 	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
+ 	unsigned char rx_notify[NR_IRQS];
+ 	u16 notify_list[NET_RX_RING_SIZE];
+-	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
+ };
+ 
+ extern struct xen_netbk *xen_netbk;
+-- 
+1.7.3.4
+
+
+From 36713152990836043c908777654ea01ed13ccdf4 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 15 Oct 2010 13:41:44 +0100
+Subject: [PATCH 110/139] xen: netback: take net_schedule_list_lock when removing entry from net_schedule_list
+
+There is a race in net_tx_build_mops between checking if
+net_schedule_list is empty and actually dequeuing the first entry on
+the list. If another thread dequeues the only entry on the list during
+this window we crash because list_first_entry expects a non-empty
+list, like so:
+
+[ 0.133127] BUG: unable to handle kernel NULL pointer dereference at 00000008
+[ 0.133132] IP: [<c12aae71>] net_tx_build_mops+0x91/0xa70
+[ 0.133142] *pdpt = 0000000000000000 *pde = 000000000000000f
+[ 0.133147] Oops: 0002 1 SMP
+[ 0.133150] last sysfs file:
+[ 0.133152] Modules linked in:
+[ 0.133154]
+[ 0.133156] Pid: 55, comm: netback/1 Not tainted (2.6.32.12-0.7.1 #1) Latitude E4310
+[ 0.133158] EIP: 0061:[<c12aae71>] EFLAGS: 00010202 CPU: 1
+[ 0.133161] EIP is at net_tx_build_mops+0x91/0xa70
+[ 0.133163] EAX: 00000012 EBX: 00000008 ECX: e112b734 EDX: e112b76c
+[ 0.133165] ESI: ffffff30 EDI: 00000000 EBP: e112b734 ESP: dfe85d98
+[ 0.133167] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069
+[ 0.133169] Process netback/1 (pid: 55, ti=dfe84000 task=dfe83340 task.ti=dfe84000)
+[ 0.133170] Stack:
+[ 0.133172] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
+[ 0.133177] <0> 00000000 e112b734 e112ec08 e112b7f8 e112ec08 ffffff30 00000000 00000000
+[ 0.133186] <0> 00000000 00000000 00000000 e112b76c dfe85df4 00000001 00000000 aaaaaaaa
+[ 0.133193] Call Trace:
+[ 0.133202] [<c12abc7f>] net_tx_action+0x42f/0xac0
+[ 0.133206] [<c12ac37a>] netbk_action_thread+0x6a/0x1b0
+[ 0.133212] [<c1057444>] kthread+0x74/0x80
+[ 0.133218] [<c10049d7>] kernel_thread_helper+0x7/0x10
+[ 0.133220] Code: c4 00 00 00 89 74 24 58 39 74 24 2c 0f 84 c7 06 00 00 8b 74 24 \
+                  58 8b 5c 24 58 81 ee d0 00 00 00 83 c3 08 89 74 24 34 8b 7c 24 \
+             58 <f0> ff 47 08 89 f0 e8 b4 f9 ff ff 8b 46 2c 8b 56 34 89 44 24 5c
+[ 0.133261] EIP: [<c12aae71>] net_tx_build_mops+0x91/0xa70 SS:ESP 0069:dfe85d98
+[ 0.133265] CR2: 0000000000000008
+[ 0.133274] --[ end trace e2c5c15f54bd9d93 ]--
+
+Therefore after the initial lock free check for an empty list check
+again with the lock held before dequeueing the entry.
+
+Based on a patch by Tomasz Wroblewski.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Tomasz Wroblewski <tomasz.wroblewski at citrix.com>
+---
+ drivers/xen/netback/netback.c |   35 ++++++++++++++++++++++++++++-------
+ 1 files changed, 28 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 1d080f6..3b03435 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -774,15 +774,34 @@ static int __on_net_schedule_list(struct xen_netif *netif)
+ 	return !list_empty(&netif->list);
+ }
+ 
++/* Must be called with net_schedule_list_lock held */
+ static void remove_from_net_schedule_list(struct xen_netif *netif)
+ {
+-	struct xen_netbk *netbk = &xen_netbk[netif->group];
+-	spin_lock_irq(&netbk->net_schedule_list_lock);
+ 	if (likely(__on_net_schedule_list(netif))) {
+ 		list_del_init(&netif->list);
+ 		netif_put(netif);
+ 	}
++}
++
++static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
++{
++	struct xen_netif *netif = NULL;
++
++	spin_lock_irq(&netbk->net_schedule_list_lock);
++	if (list_empty(&netbk->net_schedule_list))
++		goto out;
++
++	netif = list_first_entry(&netbk->net_schedule_list,
++				 struct xen_netif, list);
++	if (!netif)
++		goto out;
++
++	netif_get(netif);
++
++	remove_from_net_schedule_list(netif);
++out:
+ 	spin_unlock_irq(&netbk->net_schedule_list_lock);
++	return netif;
+ }
+ 
+ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+@@ -817,7 +836,10 @@ void netif_schedule_work(struct xen_netif *netif)
+ 
+ void netif_deschedule_work(struct xen_netif *netif)
+ {
++	struct xen_netbk *netbk = &xen_netbk[netif->group];
++	spin_lock_irq(&netbk->net_schedule_list_lock);
+ 	remove_from_net_schedule_list(netif);
++	spin_unlock_irq(&netbk->net_schedule_list_lock);
+ }
+ 
+ 
+@@ -1301,12 +1323,11 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ 		int work_to_do;
+ 		unsigned int data_len;
+ 		pending_ring_idx_t index;
+-	
++
+ 		/* Get a netif from the list with work to do. */
+-		netif = list_first_entry(&netbk->net_schedule_list,
+-				struct xen_netif, list);
+-		netif_get(netif);
+-		remove_from_net_schedule_list(netif);
++		netif = poll_net_schedule_list(netbk);
++		if (!netif)
++			continue;
+ 
+ 		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+ 		if (!work_to_do) {
+-- 
+1.7.3.4
+
+
+From 6ad4931abe4b111253da13805cc504cc72b0df1c Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Wed, 15 Dec 2010 09:48:12 +0000
+Subject: [PATCH 111/139] xen: netback: Re-define PKT_PROT_LEN to be bigger.
+
+Re-define PKT_PROT_LEN to be big enough to handle maximal IPv4 and TCP options and phrase
+the definition so that it's reasonably obvious that's what it's for.
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c |   14 +++++++++-----
+ 1 files changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 3b03435..9bbd230 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -36,9 +36,11 @@
+ 
+ #include "common.h"
+ 
+-#include <linux/tcp.h>
+-#include <linux/udp.h>
+ #include <linux/kthread.h>
++#include <linux/if_vlan.h>
++#include <linux/udp.h>
++
++#include <net/tcp.h>
+ 
+ #include <xen/balloon.h>
+ #include <xen/events.h>
+@@ -125,10 +127,12 @@ static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsi
+ /*
+  * This is the amount of packet we copy rather than map, so that the
+  * guest can't fiddle with the contents of the headers while we do
+- * packet processing on them (netfilter, routing, etc). 72 is enough
+- * to cover TCP+IP headers including options.
++ * packet processing on them (netfilter, routing, etc).
+  */
+-#define PKT_PROT_LEN 72
++#define PKT_PROT_LEN    (ETH_HLEN + \
++			 VLAN_HLEN + \
++			 sizeof(struct iphdr) + MAX_IPOPTLEN + \
++			 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
+ 
+ static inline pending_ring_idx_t pending_index(unsigned i)
+ {
+-- 
+1.7.3.4
+
+
+From a2d629a773aba2049106bad81596ef88e80a9cd4 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Tue, 14 Dec 2010 20:35:19 +0000
+Subject: [PATCH 112/139] xen: netback: Don't count packets we don't actually receive.
+
+Make sure we only bump rx_packets when we're definitely going to call netif_rx_ni().
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c |    6 +++---
+ 1 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 9bbd230..78d3509 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1512,9 +1512,6 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ 		skb->dev      = netif->dev;
+ 		skb->protocol = eth_type_trans(skb, skb->dev);
+ 
+-		netif->stats.rx_bytes += skb->len;
+-		netif->stats.rx_packets++;
+-
+ 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ 			if (skb_checksum_setup(skb)) {
+ 				DPRINTK("Can't setup checksum in net_tx_action\n");
+@@ -1530,6 +1527,9 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ 			continue;
+ 		}
+ 
++		netif->stats.rx_bytes += skb->len;
++		netif->stats.rx_packets++;
++
+ 		netif_rx_ni(skb);
+ 		netif->dev->last_rx = jiffies;
+ 	}
+-- 
+1.7.3.4
+
+
+From c6efc62e71720df66d9a91d33a3de813f0ec41c2 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Tue, 14 Dec 2010 20:35:20 +0000
+Subject: [PATCH 113/139] xen: netback: Remove the 500ms timeout to restart the netif queue.
+
+It is generally unhelpful as it results in a massive tail-drop should a guest become
+unresponsive for a relatively short period of time and no back-pressure (other than
+that caused by a higher layer protocol) is applied to the sender.
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c |   20 +-------------------
+ 1 files changed, 1 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 78d3509..2caa5f8 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -275,13 +275,6 @@ static inline int netbk_queue_full(struct xen_netif *netif)
+ 	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
+ }
+ 
+-static void tx_queue_callback(unsigned long data)
+-{
+-	struct xen_netif *netif = (struct xen_netif *)data;
+-	if (netif_schedulable(netif))
+-		netif_wake_queue(netif->dev);
+-}
+-
+ /* Figure out how many ring slots we're going to need to send @skb to
+    the guest. */
+ static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
+@@ -364,19 +357,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 		netif->rx.sring->req_event = netif->rx_req_cons_peek +
+ 			netbk_max_required_rx_slots(netif);
+ 		mb(); /* request notification /then/ check & stop the queue */
+-		if (netbk_queue_full(netif)) {
++		if (netbk_queue_full(netif))
+ 			netif_stop_queue(dev);
+-			/*
+-			 * Schedule 500ms timeout to restart the queue, thus
+-			 * ensuring that an inactive queue will be drained.
+-			 * Packets will be immediately be dropped until more
+-			 * receive buffers become available (see
+-			 * netbk_queue_full() check above).
+-			 */
+-			netif->tx_queue_timeout.data = (unsigned long)netif;
+-			netif->tx_queue_timeout.function = tx_queue_callback;
+-			mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
+-		}
+ 	}
+ 	skb_queue_tail(&netbk->rx_queue, skb);
+ 
+-- 
+1.7.3.4
+
+
+From 0e8da97315f8cc89611f9194097931df4e67efc7 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Tue, 14 Dec 2010 20:35:21 +0000
+Subject: [PATCH 114/139] xen: netback: Add a missing test to tx_work_todo.
+
+Adda test so that, when netback is using worker threads, net_tx_action()
+gets called in a timely manner when the pending_inuse list is populated.
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c |    4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 2caa5f8..dd52d01 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1695,6 +1695,10 @@ static inline int tx_work_todo(struct xen_netbk *netbk)
+ 	if (netbk->dealloc_cons != netbk->dealloc_prod)
+ 		return 1;
+ 
++	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++	    !list_empty(&netbk->pending_inuse_head))
++		return 1;
++
+ 	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ 			!list_empty(&netbk->net_schedule_list))
+ 		return 1;
+-- 
+1.7.3.4
+
+
+From e2f4dacefdb6cdff5e4e0b380632ff7ca750ba8b Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Tue, 14 Dec 2010 20:35:22 +0000
+Subject: [PATCH 115/139] xen: netback: Re-factor net_tx_action_dealloc() slightly.
+
+There is no need for processing of the pending_inuse list to be within the dealloc_prod/cons
+loop.
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c |   26 ++++++++++++++------------
+ 1 files changed, 14 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index dd52d01..53b3a0e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -906,11 +906,20 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+ 			gop++;
+ 		}
+ 
+-		if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
+-		    list_empty(&netbk->pending_inuse_head))
+-			break;
++	} while (dp != netbk->dealloc_prod);
++
++	netbk->dealloc_cons = dc;
+ 
+-		/* Copy any entries that have been pending for too long. */
++	ret = HYPERVISOR_grant_table_op(
++		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
++		gop - netbk->tx_unmap_ops);
++	BUG_ON(ret);
++
++	/*
++	 * Copy any entries that have been pending for too long
++	 */
++	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++	    !list_empty(&netbk->pending_inuse_head)) {
+ 		list_for_each_entry_safe(inuse, n,
+ 				&netbk->pending_inuse_head, list) {
+ 			struct pending_tx_info *pending_tx_info;
+@@ -936,14 +945,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+ 
+ 			break;
+ 		}
+-	} while (dp != netbk->dealloc_prod);
+-
+-	netbk->dealloc_cons = dc;
+-
+-	ret = HYPERVISOR_grant_table_op(
+-		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
+-		gop - netbk->tx_unmap_ops);
+-	BUG_ON(ret);
++	}
+ 
+ 	list_for_each_entry_safe(inuse, n, &list, list) {
+ 		struct pending_tx_info *pending_tx_info;
+-- 
+1.7.3.4
+
+
+From 082386b4a581b2ba5a125cc8944a57ceb33ff37c Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 5 Jan 2011 09:57:37 +0000
+Subject: [PATCH 116/139] xen: netback: Drop GSO SKBs which do not have csum_blank.
+
+The Linux network stack expects all GSO SKBs to have ip_summed ==
+CHECKSUM_PARTIAL (which implies that the frame contains a partial
+checksum) and the Xen network ring protocol similarly expects an SKB
+which has GSO set to also have NETRX_csum_blank (which also implies a
+partial checksum). Therefore drop such frames on receive otherwise
+they will trigger the warning in skb_gso_segment.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+Cc: xen-devel at lists.xensource.com
+---
+ drivers/xen/netback/netback.c |    4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 53b3a0e..2766b93 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1502,6 +1502,10 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ 				kfree_skb(skb);
+ 				continue;
+ 			}
++		} else if (skb_is_gso(skb)) {
++			DPRINTK("Dropping GSO but not CHECKSUM_PARTIAL skb\n");
++			kfree_skb(skb);
++			continue;
+ 		}
+ 
+ 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+-- 
+1.7.3.4
+
+
+From 27e6a8538a7e781f4774e4746f67eb113996333d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 19 Jan 2011 12:43:38 +0000
+Subject: [PATCH 117/139] xen: netback: completely remove tx_queue_timer
+
+"xen: netback: Remove the 500ms timeout to restart the netif queue." missed
+removing the timer initialisation.
+
+Also remove the related comment which has been obsolete since the default for
+MODPARM_copy_skb was switched to true some time ago.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Paul Durrant <Paul.Durrant at citrix.com>
+---
+ drivers/xen/netback/common.h    |    3 ---
+ drivers/xen/netback/interface.c |   13 +------------
+ 2 files changed, 1 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index ce0041a..7e03a46 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -99,9 +99,6 @@ struct xen_netif {
+ 	unsigned long   remaining_credit;
+ 	struct timer_list credit_timeout;
+ 
+-	/* Enforce draining of the transmit queue. */
+-	struct timer_list tx_queue_timeout;
+-
+ 	/* Statistics */
+ 	int nr_copied_skbs;
+ 
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 2e8508a..efdc21c 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -41,15 +41,7 @@
+  * Module parameter 'queue_length':
+  *
+  * Enables queuing in the network stack when a client has run out of receive
+- * descriptors. Although this feature can improve receive bandwidth by avoiding
+- * packet loss, it can also result in packets sitting in the 'tx_queue' for
+- * unbounded time. This is bad if those packets hold onto foreign resources.
+- * For example, consider a packet that holds onto resources belonging to the
+- * guest for which it is queued (e.g., packet received on vif1.0, destined for
+- * vif1.1 which is not activated in the guest): in this situation the guest
+- * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
+- * run a timer (tx_queue_timeout) to drain the queue when the interface is
+- * blocked.
++ * descriptors.
+  */
+ static unsigned long netbk_queue_length = 32;
+ module_param_named(queue_length, netbk_queue_length, ulong, 0644);
+@@ -295,8 +287,6 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ 	/* Initialize 'expires' now: it's used to track the credit window. */
+ 	netif->credit_timeout.expires = jiffies;
+ 
+-	init_timer(&netif->tx_queue_timeout);
+-
+ 	dev->netdev_ops	= &netback_ops;
+ 	netif_set_features(netif);
+ 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+@@ -458,7 +448,6 @@ void netif_disconnect(struct xen_netif *netif)
+ 	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
+ 
+ 	del_timer_sync(&netif->credit_timeout);
+-	del_timer_sync(&netif->tx_queue_timeout);
+ 
+ 	if (netif->irq)
+ 		unbind_from_irqhandler(netif->irq, netif);
+-- 
+1.7.3.4
+
+
+From eccfb3db4b10548f9ecc71cc6f79bbec7e594f1d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 18 Jan 2011 11:37:12 +0000
+Subject: [PATCH 118/139] xen: netback: rationalise types used in count_skb_slots
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c |   10 +++-------
+ 1 files changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 2766b93..52f1745 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -277,14 +277,10 @@ static inline int netbk_queue_full(struct xen_netif *netif)
+ 
+ /* Figure out how many ring slots we're going to need to send @skb to
+    the guest. */
+-static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
++static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
+ {
+-	unsigned count;
+-	unsigned copy_off;
+-	unsigned i;
+-
+-	copy_off = 0;
+-	count = 1;
++	unsigned int count = 1;
++	int i, copy_off = 0;
+ 
+ 	BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
+ 
+-- 
+1.7.3.4
+
+
+From 351ea99b9be1dbab49e069a4250740acc4375f6d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 18 Jan 2011 11:21:35 +0000
+Subject: [PATCH 119/139] xen: netback: refactor logic for moving to a new receive buffer.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c |   80 ++++++++++++++++++++++++-----------------
+ 1 files changed, 47 insertions(+), 33 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 52f1745..ae4821a 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -275,8 +275,51 @@ static inline int netbk_queue_full(struct xen_netif *netif)
+ 	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
+ }
+ 
+-/* Figure out how many ring slots we're going to need to send @skb to
+-   the guest. */
++/*
++ * Returns true if we should start a new receive buffer instead of
++ * adding 'size' bytes to a buffer which currently contains 'offset'
++ * bytes.
++ */
++static bool start_new_rx_buffer(int offset, unsigned long size, int head)
++{
++	/* simple case: we have completely filled the current buffer. */
++	if (offset == MAX_BUFFER_OFFSET)
++		return true;
++
++	/*
++	 * complex case: start a fresh buffer if the current frag
++	 * would overflow the current buffer but only if:
++	 *     (i)   this frag would fit completely in the next buffer
++	 * and (ii)  there is already some data in the current buffer
++	 * and (iii) this is not the head buffer.
++	 *
++	 * Where:
++	 * - (i) stops us splitting a frag into two copies
++	 *   unless the frag is too large for a single buffer.
++	 * - (ii) stops us from leaving a buffer pointlessly empty.
++	 * - (iii) stops us leaving the first buffer
++	 *   empty. Strictly speaking this is already covered
++	 *   by (ii) but is explicitly checked because
++	 *   netfront relies on the first buffer being
++	 *   non-empty and can crash otherwise.
++	 *
++	 * This means we will effectively linearise small
++	 * frags but do not needlessly split large buffers
++	 * into multiple copies tend to give large frags their
++	 * own buffers as before.
++	 */
++	if ((offset + size > MAX_BUFFER_OFFSET) &&
++	    (size <= MAX_BUFFER_OFFSET) && offset && !head)
++		return true;
++
++	return false;
++}
++
++/*
++ * Figure out how many ring slots we're going to need to send @skb to
++ * the guest. This function is essentially a dry run of
++ * netbk_gop_frag_copy.
++ */
+ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
+ {
+ 	unsigned int count = 1;
+@@ -295,9 +338,7 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif
+ 		while (size > 0) {
+ 			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
+ 
+-			/* These checks are the same as in netbk_gop_frag_copy */
+-			if (copy_off == MAX_BUFFER_OFFSET
+-			    || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) {
++			if (start_new_rx_buffer(copy_off, size, 0)) {
+ 				count++;
+ 				copy_off = 0;
+ 			}
+@@ -403,34 +444,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ 	while (size > 0) {
+ 		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+ 
+-		/*
+-		 * Move to a new receive buffer if:
+-		 *
+-		 * simple case: we have completely filled the current buffer.
+-		 *
+-		 * complex case: the current frag would overflow
+-		 * the current buffer but only if:
+-		 *     (i)   this frag would fit completely in the next buffer
+-		 * and (ii)  there is already some data in the current buffer
+-		 * and (iii) this is not the head buffer.
+-		 *
+-		 * Where:
+-		 * - (i) stops us splitting a frag into two copies
+-		 *   unless the frag is too large for a single buffer.
+-		 * - (ii) stops us from leaving a buffer pointlessly empty.
+-		 * - (iii) stops us leaving the first buffer
+-		 *   empty. Strictly speaking this is already covered
+-		 *   by (ii) but is explicitly checked because
+-		 *   netfront relies on the first buffer being
+-		 *   non-empty and can crash otherwise.
+-		 *
+-		 * This means we will effectively linearise small
+-		 * frags but do not needlessly split large buffers
+-		 * into multiple copies tend to give large frags their
+-		 * own buffers as before.
+-		 */
+-		if (npo->copy_off == MAX_BUFFER_OFFSET
+-		    || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) {
++		if (start_new_rx_buffer(npo->copy_off, size, head)) {
+ 			struct xen_netif_rx_request *req;
+ 
+ 			BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
+-- 
+1.7.3.4
+
+
+From 790bdefa794705301733d53d36c3e8d3a98c811d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 24 Dec 2010 13:37:04 +0000
+Subject: [PATCH 120/139] xen: netback: cleanup coding style
+
+Fix checkpatch.pl errors plus manual sweep.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/common.h    |   52 +-------------
+ drivers/xen/netback/interface.c |   14 ++--
+ drivers/xen/netback/netback.c   |  142 ++++++++++++++++++++++----------------
+ drivers/xen/netback/xenbus.c    |   61 +++++++----------
+ 4 files changed, 118 insertions(+), 151 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 7e03a46..77bb3fc 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -35,13 +35,13 @@
+ #include <linux/slab.h>
+ #include <linux/ip.h>
+ #include <linux/in.h>
++#include <linux/io.h>
+ #include <linux/netdevice.h>
+ #include <linux/etherdevice.h>
+ #include <linux/wait.h>
+ #include <linux/sched.h>
+ 
+ #include <xen/interface/io/netif.h>
+-#include <asm/io.h>
+ #include <asm/pgalloc.h>
+ #include <xen/interface/grant_table.h>
+ #include <xen/grant_table.h>
+@@ -49,7 +49,7 @@
+ 
+ #define DPRINTK(_f, _a...)			\
+ 	pr_debug("(file=%s, line=%d) " _f,	\
+-		 __FILE__ , __LINE__ , ## _a )
++		 __FILE__ , __LINE__ , ## _a)
+ #define IPRINTK(fmt, args...)				\
+ 	printk(KERN_INFO "xen_net: " fmt, ##args)
+ #define WPRINTK(fmt, args...)				\
+@@ -131,66 +131,22 @@ enum {
+ 
+ extern int netbk_copy_skb_mode;
+ 
+-/* Function pointers into netback accelerator plugin modules */
+-struct netback_accel_hooks {
+-	struct module *owner;
+-	int  (*probe)(struct xenbus_device *dev);
+-	int (*remove)(struct xenbus_device *dev);
+-};
+-
+-/* Structure to track the state of a netback accelerator plugin */
+-struct netback_accelerator {
+-	struct list_head link;
+-	int id;
+-	char *eth_name;
+-	atomic_t use_count;
+-	struct netback_accel_hooks *hooks;
+-};
+-
+ struct backend_info {
+ 	struct xenbus_device *dev;
+ 	struct xen_netif *netif;
+ 	enum xenbus_state frontend_state;
+ 	struct xenbus_watch hotplug_status_watch;
+ 	int have_hotplug_status_watch:1;
+-
+-	/* State relating to the netback accelerator */
+-	void *netback_accel_priv;
+-	/* The accelerator that this backend is currently using */
+-	struct netback_accelerator *accelerator;
+ };
+ 
+-#define NETBACK_ACCEL_VERSION 0x00010001
+-
+-/*
+- * Connect an accelerator plugin module to netback.  Returns zero on
+- * success, < 0 on error, > 0 (with highest version number supported)
+- * if version mismatch.
+- */
+-extern int netback_connect_accelerator(unsigned version,
+-				       int id, const char *eth_name,
+-				       struct netback_accel_hooks *hooks);
+-/* Disconnect a previously connected accelerator plugin module */
+-extern void netback_disconnect_accelerator(int id, const char *eth_name);
+-
+-
+-extern
+-void netback_probe_accelerators(struct backend_info *be,
+-				struct xenbus_device *dev);
+-extern
+-void netback_remove_accelerators(struct backend_info *be,
+-				 struct xenbus_device *dev);
+-extern
+-void netif_accel_init(void);
+-
+-
+ #define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
+ #define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
+ 
+ void netif_disconnect(struct xen_netif *netif);
+ 
+ void netif_set_features(struct xen_netif *netif);
+-struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
++struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
++			      unsigned int handle);
+ int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ 	      unsigned long rx_ring_ref, unsigned int evtchn);
+ 
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index efdc21c..c66b180 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -225,8 +225,7 @@ static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
+ 	}
+ }
+ 
+-static struct ethtool_ops network_ethtool_ops =
+-{
++static struct ethtool_ops network_ethtool_ops = {
+ 	.get_drvinfo = netbk_get_drvinfo,
+ 
+ 	.get_tx_csum = ethtool_op_get_tx_csum,
+@@ -242,8 +241,7 @@ static struct ethtool_ops network_ethtool_ops =
+ 	.get_strings = netbk_get_strings,
+ };
+ 
+-static struct net_device_ops netback_ops =
+-{
++static struct net_device_ops netback_ops = {
+ 	.ndo_start_xmit	= netif_be_start_xmit,
+ 	.ndo_get_stats	= netif_be_get_stats,
+ 	.ndo_open	= net_open,
+@@ -251,7 +249,8 @@ static struct net_device_ops netback_ops =
+ 	.ndo_change_mtu	= netbk_change_mtu,
+ };
+ 
+-struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
++struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
++			      unsigned int handle)
+ {
+ 	int err = 0;
+ 	struct net_device *dev;
+@@ -316,8 +315,9 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ 	return netif;
+ }
+ 
+-static int map_frontend_pages(
+-	struct xen_netif *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
++static int map_frontend_pages(struct xen_netif *netif,
++			      grant_ref_t tx_ring_ref,
++			      grant_ref_t rx_ring_ref)
+ {
+ 	struct gnttab_map_grant_ref op;
+ 
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index ae4821a..99440fd 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1,11 +1,9 @@
+-/******************************************************************************
+- * drivers/xen/netback/netback.c
+- *
++/*
+  * Back-end of the driver for virtual network devices. This portion of the
+  * driver exports a 'unified' network-device interface that can be accessed
+  * by any operating system that implements a compatible front end. A
+  * reference front-end implementation can be found in:
+- *  drivers/xen/netfront/netfront.c
++ *  drivers/net/xen-netfront.c
+  *
+  * Copyright (c) 2002-2005, K A Fraser
+  *
+@@ -82,8 +80,8 @@ static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
+ }
+ 
+ /* extra field used in struct page */
+-static inline void netif_set_page_ext(struct page *pg, unsigned int group,
+-		unsigned int idx)
++static inline void netif_set_page_ext(struct page *pg,
++				      unsigned int group, unsigned int idx)
+ {
+ 	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
+ 
+@@ -91,7 +89,8 @@ static inline void netif_set_page_ext(struct page *pg, unsigned int group,
+ 	pg->mapping = ext.mapping;
+ }
+ 
+-static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsigned int *_idx)
++static int netif_get_page_ext(struct page *pg,
++			      unsigned int *_group, unsigned int *_idx)
+ {
+ 	union page_ext ext = { .mapping = pg->mapping };
+ 	struct xen_netbk *netbk;
+@@ -325,7 +324,7 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif
+ 	unsigned int count = 1;
+ 	int i, copy_off = 0;
+ 
+-	BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
++	BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);
+ 
+ 	copy_off = skb_headlen(skb);
+ 
+@@ -376,7 +375,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 	 */
+ 	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
+ 		struct sk_buff *nskb = netbk_copy_skb(skb);
+-		if ( unlikely(nskb == NULL) )
++		if (unlikely(nskb == NULL))
+ 			goto drop;
+ 		/* Copy only the header fields we use in this driver. */
+ 		nskb->dev = skb->dev;
+@@ -385,8 +384,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 		skb = nskb;
+ 	}
+ 
+-	/* Reserve ring slots for the worst-case number of
+-	 * fragments. */
++	/* Reserve ring slots for the worst-case number of fragments. */
+ 	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
+ 	netif_get(netif);
+ 
+@@ -418,9 +416,29 @@ struct netrx_pending_operations {
+ 	grant_ref_t copy_gref;
+ };
+ 
+-/* Set up the grant operations for this fragment.  If it's a flipping
+-   interface, we also set up the unmap request from here. */
++static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
++						struct netrx_pending_operations *npo)
++{
++	struct netbk_rx_meta *meta;
++	struct xen_netif_rx_request *req;
+ 
++	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++
++	meta = npo->meta + npo->meta_prod++;
++	meta->gso_size = 0;
++	meta->size = 0;
++	meta->id = req->id;
++
++	npo->copy_off = 0;
++	npo->copy_gref = req->gref;
++
++	return meta;
++}
++
++/*
++ * Set up the grant operations for this fragment. If it's a flipping
++ * interface, we also set up the unmap request from here.
++ */
+ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ 				struct netrx_pending_operations *npo,
+ 				struct page *page, unsigned long size,
+@@ -431,7 +449,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ 	/*
+ 	 * These variables a used iff netif_get_page_ext returns true,
+ 	 * in which case they are guaranteed to be initialized.
+-         */
++	 */
+ 	unsigned int uninitialized_var(group), uninitialized_var(idx);
+ 	int foreign = netif_get_page_ext(page, &group, &idx);
+ 	unsigned long bytes;
+@@ -445,17 +463,13 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ 		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+ 
+ 		if (start_new_rx_buffer(npo->copy_off, size, head)) {
+-			struct xen_netif_rx_request *req;
+-
+-			BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
+-			/* Overflowed this request, go to the next one */
+-			req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+-			meta = npo->meta + npo->meta_prod++;
+-			meta->gso_size = 0;
+-			meta->size = 0;
+-			meta->id = req->id;
+-			npo->copy_off = 0;
+-			npo->copy_gref = req->gref;
++			/*
++			 * Netfront requires there to be some data in
++			 * the head buffer.
++			 */
++			BUG_ON(head);
++
++			meta = get_next_rx_buffer(netif, npo);
+ 		}
+ 
+ 		bytes = size;
+@@ -474,8 +488,9 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ 			copy_gop->source.u.ref = src_pend->req.gref;
+ 			copy_gop->flags |= GNTCOPY_source_gref;
+ 		} else {
++			void *vaddr = page_address(page);
+ 			copy_gop->source.domid = DOMID_SELF;
+-			copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
++			copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
+ 		}
+ 		copy_gop->source.offset = offset;
+ 		copy_gop->dest.domid = netif->domid;
+@@ -489,17 +504,22 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ 
+ 		offset += bytes;
+ 		size -= bytes;
+-		head = 0; /* Must be something in this buffer now */
++		head = 0; /* There must be something in this buffer now. */
+ 	}
+ }
+ 
+-/* Prepare an SKB to be transmitted to the frontend.  This is
+-   responsible for allocating grant operations, meta structures, etc.
+-   It returns the number of meta structures consumed.  The number of
+-   ring slots used is always equal to the number of meta slots used
+-   plus the number of GSO descriptors used.  Currently, we use either
+-   zero GSO descriptors (for non-GSO packets) or one descriptor (for
+-   frontend-side LRO). */
++/*
++ * Prepare an SKB to be transmitted to the frontend.
++ *
++ * This function is responsible for allocating grant operations, meta
++ * structures, etc.
++ *
++ * It returns the number of meta structures consumed. The number of
++ * ring slots used is always equal to the number of meta slots used
++ * plus the number of GSO descriptors used. Currently, we use either
++ * zero GSO descriptors (for non-GSO packets) or one descriptor (for
++ * frontend-side LRO).
++ */
+ static int netbk_gop_skb(struct sk_buff *skb,
+ 			 struct netrx_pending_operations *npo)
+ {
+@@ -554,10 +574,12 @@ static int netbk_gop_skb(struct sk_buff *skb,
+ 	return npo->meta_prod - old_meta_prod;
+ }
+ 
+-/* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
+-   used to set up the operations on the top of
+-   netrx_pending_operations, which have since been done.  Check that
+-   they didn't give any errors and advance over them. */
++/*
++ * This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
++ * used to set up the operations on the top of
++ * netrx_pending_operations, which have since been done.  Check that
++ * they didn't give any errors and advance over them.
++ */
+ static int netbk_check_gop(int nr_meta_slots, domid_t domid,
+ 			   struct netrx_pending_operations *npo)
+ {
+@@ -584,6 +606,12 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+ 	int i;
+ 	unsigned long offset;
+ 
++	/* No fragments used */
++	if (nr_meta_slots <= 1)
++		return;
++
++	nr_meta_slots--;
++
+ 	for (i = 0; i < nr_meta_slots; i++) {
+ 		int flags;
+ 		if (i == nr_meta_slots - 1)
+@@ -712,11 +740,9 @@ static void net_rx_action(unsigned long data)
+ 			gso->flags = 0;
+ 		}
+ 
+-		if (sco->meta_slots_used > 1) {
+-			netbk_add_frag_responses(netif, status,
+-						 netbk->meta + npo.meta_cons + 1,
+-						 sco->meta_slots_used - 1);
+-		}
++		netbk_add_frag_responses(netif, status,
++					 netbk->meta + npo.meta_cons + 1,
++					 sco->meta_slots_used);
+ 
+ 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+ 		irq = netif->irq;
+@@ -887,9 +913,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+ 	dc = netbk->dealloc_cons;
+ 	gop = netbk->tx_unmap_ops;
+ 
+-	/*
+-	 * Free up any grants we have finished using
+-	 */
++	/* Free up any grants we have finished using. */
+ 	do {
+ 		dp = netbk->dealloc_prod;
+ 
+@@ -999,7 +1023,8 @@ static void netbk_tx_err(struct xen_netif *netif,
+ 
+ static int netbk_count_requests(struct xen_netif *netif,
+ 				struct xen_netif_tx_request *first,
+-				struct xen_netif_tx_request *txp, int work_to_do)
++				struct xen_netif_tx_request *txp,
++				int work_to_do)
+ {
+ 	RING_IDX cons = netif->tx.req_cons;
+ 	int frags = 0;
+@@ -1039,10 +1064,10 @@ static int netbk_count_requests(struct xen_netif *netif,
+ }
+ 
+ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+-						  struct xen_netif *netif,
+-						  struct sk_buff *skb,
+-						  struct xen_netif_tx_request *txp,
+-						  struct gnttab_map_grant_ref *mop)
++						       struct xen_netif *netif,
++						       struct sk_buff *skb,
++						       struct xen_netif_tx_request *txp,
++						       struct gnttab_map_grant_ref *mop)
+ {
+ 	struct skb_shared_info *shinfo = skb_shinfo(skb);
+ 	skb_frag_t *frags = shinfo->frags;
+@@ -1181,7 +1206,8 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+ 	}
+ }
+ 
+-int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extras,
++int netbk_get_extras(struct xen_netif *netif,
++		     struct xen_netif_extra_info *extras,
+ 		     int work_to_do)
+ {
+ 	struct xen_netif_extra_info extra;
+@@ -1209,7 +1235,8 @@ int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extra
+ 	return work_to_do;
+ }
+ 
+-static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso)
++static int netbk_set_skb_gso(struct sk_buff *skb,
++			     struct xen_netif_extra_info *gso)
+ {
+ 	if (!gso->u.gso.size) {
+ 		DPRINTK("GSO size must not be zero.\n");
+@@ -1315,7 +1342,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ 		struct xen_netif *netif;
+ 		struct xen_netif_tx_request txreq;
+ 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+-		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
++		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
+ 		u16 pending_idx;
+ 		RING_IDX idx;
+ 		int work_to_do;
+@@ -1377,7 +1404,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ 		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
+ 			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
+ 				txreq.offset, txreq.size,
+-				(txreq.offset &~PAGE_MASK) + txreq.size);
++				(txreq.offset&~PAGE_MASK) + txreq.size);
+ 			netbk_tx_err(netif, &txreq, idx);
+ 			continue;
+ 		}
+@@ -1763,9 +1790,6 @@ static int __init netback_init(void)
+ 	}
+ 	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
+ 
+-	/* We can increase reservation by this much in net_rx_action(). */
+-//	balloon_update_driver_allowance(NET_RX_RING_SIZE);
+-
+ 	for (group = 0; group < xen_netbk_group_nr; group++) {
+ 		struct xen_netbk *netbk = &xen_netbk[group];
+ 		skb_queue_head_init(&netbk->rx_queue);
+@@ -1850,8 +1874,6 @@ static int __init netback_init(void)
+ 			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+ 	}
+ 
+-	//netif_accel_init();
+-
+ 	rc = netif_xenbus_init();
+ 	if (rc)
+ 		goto failed_init;
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index 1fec65a..e2095fc 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -1,20 +1,20 @@
+ /*  Xenbus code for netif backend
+-    Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
+-    Copyright (C) 2005 XenSource Ltd
+-
+-    This program is free software; you can redistribute it and/or modify
+-    it under the terms of the GNU General Public License as published by
+-    the Free Software Foundation; either version 2 of the License, or
+-    (at your option) any later version.
+-
+-    This program is distributed in the hope that it will be useful,
+-    but WITHOUT ANY WARRANTY; without even the implied warranty of
+-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-    GNU General Public License for more details.
+-
+-    You should have received a copy of the GNU General Public License
+-    along with this program; if not, write to the Free Software
+-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ * Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
++ * Copyright (C) 2005 XenSource Ltd
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+ 
+ #include <stdarg.h>
+@@ -22,13 +22,6 @@
+ #include <xen/xenbus.h>
+ #include "common.h"
+ 
+-#if 0
+-#undef DPRINTK
+-#define DPRINTK(fmt, args...) \
+-    printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
+-#endif
+-
+-
+ static int connect_rings(struct backend_info *);
+ static void connect(struct backend_info *);
+ static void backend_create_netif(struct backend_info *be);
+@@ -36,9 +29,7 @@ static void unregister_hotplug_status_watch(struct backend_info *be);
+ 
+ static int netback_remove(struct xenbus_device *dev)
+ {
+-  struct backend_info *be = dev_get_drvdata(&dev->dev);
+-
+-	//netback_remove_accelerators(be, dev);
++	struct backend_info *be = dev_get_drvdata(&dev->dev);
+ 
+ 	unregister_hotplug_status_watch(be);
+ 	if (be->netif) {
+@@ -126,8 +117,6 @@ static int netback_probe(struct xenbus_device *dev,
+ 		goto fail;
+ 	}
+ 
+-	//netback_probe_accelerators(be, dev);
+-
+ 	err = xenbus_switch_state(dev, XenbusStateInitWait);
+ 	if (err)
+ 		goto fail;
+@@ -147,12 +136,13 @@ fail:
+ }
+ 
+ 
+-/**
++/*
+  * Handle the creation of the hotplug script environment.  We add the script
+  * and vif variables to the environment, for the benefit of the vif-* hotplug
+  * scripts.
+  */
+-static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
++static int netback_uevent(struct xenbus_device *xdev,
++			  struct kobj_uevent_env *env)
+ {
+ 	struct backend_info *be = dev_get_drvdata(&xdev->dev);
+ 	char *val;
+@@ -164,8 +154,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
+ 		int err = PTR_ERR(val);
+ 		xenbus_dev_fatal(xdev, err, "reading script");
+ 		return err;
+-	}
+-	else {
++	} else {
+ 		if (add_uevent_var(env, "script=%s", val)) {
+ 			kfree(val);
+ 			return -ENOMEM;
+@@ -173,10 +162,10 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
+ 		kfree(val);
+ 	}
+ 
+-	if (be && be->netif && add_uevent_var(env, "vif=%s", be->netif->dev->name))
+-		return -ENOMEM;
++	if (!be || !be->netif)
++		return 0;
+ 
+-	return 0;
++	return add_uevent_var(env, "vif=%s", be->netif->dev->name);
+ }
+ 
+ 
+@@ -234,7 +223,7 @@ static void frontend_changed(struct xenbus_device *dev,
+ 	case XenbusStateInitialising:
+ 		if (dev->state == XenbusStateClosed) {
+ 			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
+-			       __FUNCTION__, dev->nodename);
++			       __func__, dev->nodename);
+ 			xenbus_switch_state(dev, XenbusStateInitWait);
+ 		}
+ 		break;
+-- 
+1.7.3.4
+
+
+From 0b0514b47e27f4c1b438b30972011aa27ac1ee8f Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 19 Jan 2011 10:51:45 +0000
+Subject: [PATCH 121/139] xen: netback: drop private ?PRINTK macros in favour of pr_*
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/common.h    |   10 ++--------
+ drivers/xen/netback/interface.c |   13 +++++++------
+ drivers/xen/netback/netback.c   |   28 ++++++++++++++--------------
+ drivers/xen/netback/xenbus.c    |   13 +++----------
+ 4 files changed, 26 insertions(+), 38 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 77bb3fc..079e1de 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -29,6 +29,8 @@
+ #ifndef __NETIF__BACKEND__COMMON_H__
+ #define __NETIF__BACKEND__COMMON_H__
+ 
++#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
++
+ #include <linux/version.h>
+ #include <linux/module.h>
+ #include <linux/interrupt.h>
+@@ -47,14 +49,6 @@
+ #include <xen/grant_table.h>
+ #include <xen/xenbus.h>
+ 
+-#define DPRINTK(_f, _a...)			\
+-	pr_debug("(file=%s, line=%d) " _f,	\
+-		 __FILE__ , __LINE__ , ## _a)
+-#define IPRINTK(fmt, args...)				\
+-	printk(KERN_INFO "xen_net: " fmt, ##args)
+-#define WPRINTK(fmt, args...)				\
+-	printk(KERN_WARNING "xen_net: " fmt, ##args)
+-
+ struct xen_netif {
+ 	/* Unique identifier for this interface. */
+ 	domid_t          domid;
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index c66b180..c36db26 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -31,6 +31,7 @@
+  */
+ 
+ #include "common.h"
++
+ #include <linux/ethtool.h>
+ #include <linux/rtnetlink.h>
+ 
+@@ -260,7 +261,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
+ 	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
+ 	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
+ 	if (dev == NULL) {
+-		DPRINTK("Could not create netif: out of memory\n");
++		pr_debug("Could not allocate netdev\n");
+ 		return ERR_PTR(-ENOMEM);
+ 	}
+ 
+@@ -305,13 +306,13 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
+ 	err = register_netdevice(dev);
+ 	rtnl_unlock();
+ 	if (err) {
+-		DPRINTK("Could not register new net device %s: err=%d\n",
+-			dev->name, err);
++		pr_debug("Could not register new net device %s: err=%d\n",
++			 dev->name, err);
+ 		free_netdev(dev);
+ 		return ERR_PTR(err);
+ 	}
+ 
+-	DPRINTK("Successfully created netif\n");
++	pr_debug("Successfully created netif\n");
+ 	return netif;
+ }
+ 
+@@ -328,7 +329,7 @@ static int map_frontend_pages(struct xen_netif *netif,
+ 		BUG();
+ 
+ 	if (op.status) {
+-		DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
++		pr_debug("Gnttab failure mapping tx_ring_ref!\n");
+ 		return op.status;
+ 	}
+ 
+@@ -348,7 +349,7 @@ static int map_frontend_pages(struct xen_netif *netif,
+ 				    (unsigned long)netif->tx_comms_area->addr,
+ 				    GNTMAP_host_map, netif->tx_shmem_handle);
+ 		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
+-		DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
++		pr_debug("Gnttab failure mapping rx_ring_ref!\n");
+ 		return op.status;
+ 	}
+ 
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 99440fd..e0ca232 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -590,8 +590,8 @@ static int netbk_check_gop(int nr_meta_slots, domid_t domid,
+ 	for (i = 0; i < nr_meta_slots; i++) {
+ 		copy_op = npo->copy + npo->copy_cons++;
+ 		if (copy_op->status != GNTST_okay) {
+-				DPRINTK("Bad status %d from copy to DOM%d.\n",
+-					copy_op->status, domid);
++				pr_debug("Bad status %d from copy to DOM%d.\n",
++					 copy_op->status, domid);
+ 				status = NETIF_RSP_ERROR;
+ 			}
+ 	}
+@@ -1215,7 +1215,7 @@ int netbk_get_extras(struct xen_netif *netif,
+ 
+ 	do {
+ 		if (unlikely(work_to_do-- <= 0)) {
+-			DPRINTK("Missing extra info\n");
++			pr_debug("Missing extra info\n");
+ 			return -EBADR;
+ 		}
+ 
+@@ -1224,7 +1224,7 @@ int netbk_get_extras(struct xen_netif *netif,
+ 		if (unlikely(!extra.type ||
+ 			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+ 			netif->tx.req_cons = ++cons;
+-			DPRINTK("Invalid extra type: %d\n", extra.type);
++			pr_debug("Invalid extra type: %d\n", extra.type);
+ 			return -EINVAL;
+ 		}
+ 
+@@ -1239,13 +1239,13 @@ static int netbk_set_skb_gso(struct sk_buff *skb,
+ 			     struct xen_netif_extra_info *gso)
+ {
+ 	if (!gso->u.gso.size) {
+-		DPRINTK("GSO size must not be zero.\n");
++		pr_debug("GSO size must not be zero.\n");
+ 		return -EINVAL;
+ 	}
+ 
+ 	/* Currently only TCPv4 S.O. is supported. */
+ 	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+-		DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
++		pr_debug("Bad GSO type %d.\n", gso->u.gso.type);
+ 		return -EINVAL;
+ 	}
+ 
+@@ -1395,16 +1395,16 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ 		idx += ret;
+ 
+ 		if (unlikely(txreq.size < ETH_HLEN)) {
+-			DPRINTK("Bad packet size: %d\n", txreq.size);
++			pr_debug("Bad packet size: %d\n", txreq.size);
+ 			netbk_tx_err(netif, &txreq, idx);
+ 			continue;
+ 		}
+ 
+ 		/* No crossing a page as the payload mustn't fragment. */
+ 		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
+-			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
+-				txreq.offset, txreq.size,
+-				(txreq.offset&~PAGE_MASK) + txreq.size);
++			pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
++				 txreq.offset, txreq.size,
++				 (txreq.offset&~PAGE_MASK) + txreq.size);
+ 			netbk_tx_err(netif, &txreq, idx);
+ 			continue;
+ 		}
+@@ -1419,7 +1419,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ 		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
+ 				GFP_ATOMIC | __GFP_NOWARN);
+ 		if (unlikely(skb == NULL)) {
+-			DPRINTK("Can't allocate a skb in start_xmit.\n");
++			pr_debug("Can't allocate a skb in start_xmit.\n");
+ 			netbk_tx_err(netif, &txreq, idx);
+ 			break;
+ 		}
+@@ -1494,7 +1494,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ 
+ 		/* Check the remap error code. */
+ 		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
+-			DPRINTK("netback grant failed.\n");
++			pr_debug("netback grant failed.\n");
+ 			skb_shinfo(skb)->nr_frags = 0;
+ 			kfree_skb(skb);
+ 			continue;
+@@ -1535,12 +1535,12 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ 
+ 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ 			if (skb_checksum_setup(skb)) {
+-				DPRINTK("Can't setup checksum in net_tx_action\n");
++				pr_debug("skb_checksum_setup failed\n");
+ 				kfree_skb(skb);
+ 				continue;
+ 			}
+ 		} else if (skb_is_gso(skb)) {
+-			DPRINTK("Dropping GSO but not CHECKSUM_PARTIAL skb\n");
++			pr_debug("GSO SKB checksum is not partial\n");
+ 			kfree_skb(skb);
+ 			continue;
+ 		}
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index e2095fc..640c696 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -17,9 +17,6 @@
+  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+ 
+-#include <stdarg.h>
+-#include <linux/module.h>
+-#include <xen/xenbus.h>
+ #include "common.h"
+ 
+ static int connect_rings(struct backend_info *);
+@@ -130,7 +127,7 @@ abort_transaction:
+ 	xenbus_transaction_end(xbt, 1);
+ 	xenbus_dev_fatal(dev, err, "%s", message);
+ fail:
+-	DPRINTK("failed");
++	pr_debug("failed");
+ 	netback_remove(dev);
+ 	return err;
+ }
+@@ -147,8 +144,6 @@ static int netback_uevent(struct xenbus_device *xdev,
+ 	struct backend_info *be = dev_get_drvdata(&xdev->dev);
+ 	char *val;
+ 
+-	DPRINTK("netback_uevent");
+-
+ 	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+ 	if (IS_ERR(val)) {
+ 		int err = PTR_ERR(val);
+@@ -215,7 +210,7 @@ static void frontend_changed(struct xenbus_device *dev,
+ {
+ 	struct backend_info *be = dev_get_drvdata(&dev->dev);
+ 
+-	DPRINTK("%s", xenbus_strstate(frontend_state));
++	pr_debug("frontend state %s", xenbus_strstate(frontend_state));
+ 
+ 	be->frontend_state = frontend_state;
+ 
+@@ -295,7 +290,7 @@ static void xen_net_read_rate(struct xenbus_device *dev,
+ 	return;
+ 
+  fail:
+-	WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
++	pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
+ 	kfree(ratestr);
+ }
+ 
+@@ -394,8 +389,6 @@ static int connect_rings(struct backend_info *be)
+ 	int err;
+ 	int val;
+ 
+-	DPRINTK("");
+-
+ 	err = xenbus_gather(XBT_NIL, dev->otherend,
+ 			    "tx-ring-ref", "%lu", &tx_ring_ref,
+ 			    "rx-ring-ref", "%lu", &rx_ring_ref,
+-- 
+1.7.3.4
+
+
+From e9124d120cf83945516c4085b32ea40c1bb94ffb Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 18 Jan 2011 12:54:12 +0000
+Subject: [PATCH 122/139] xen: netback: move under drivers/net/xen-netback/
+
+From the kernel's PoV netback is just another network device driver.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/net/Kconfig                 |    7 +
+ drivers/net/Makefile                |    1 +
+ drivers/net/xen-netback/Makefile    |    3 +
+ drivers/net/xen-netback/common.h    |  275 +++++
+ drivers/net/xen-netback/interface.c |  465 +++++++++
+ drivers/net/xen-netback/netback.c   | 1909 +++++++++++++++++++++++++++++++++++
+ drivers/net/xen-netback/xenbus.c    |  487 +++++++++
+ drivers/xen/Kconfig                 |    7 -
+ drivers/xen/Makefile                |    1 -
+ drivers/xen/netback/Makefile        |    3 -
+ drivers/xen/netback/common.h        |  275 -----
+ drivers/xen/netback/interface.c     |  465 ---------
+ drivers/xen/netback/netback.c       | 1909 -----------------------------------
+ drivers/xen/netback/xenbus.c        |  487 ---------
+ 14 files changed, 3147 insertions(+), 3147 deletions(-)
+ create mode 100644 drivers/net/xen-netback/Makefile
+ create mode 100644 drivers/net/xen-netback/common.h
+ create mode 100644 drivers/net/xen-netback/interface.c
+ create mode 100644 drivers/net/xen-netback/netback.c
+ create mode 100644 drivers/net/xen-netback/xenbus.c
+ delete mode 100644 drivers/xen/netback/Makefile
+ delete mode 100644 drivers/xen/netback/common.h
+ delete mode 100644 drivers/xen/netback/interface.c
+ delete mode 100644 drivers/xen/netback/netback.c
+ delete mode 100644 drivers/xen/netback/xenbus.c
+
+diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
+index cbf0635..5b088f5 100644
+--- a/drivers/net/Kconfig
++++ b/drivers/net/Kconfig
+@@ -2970,6 +2970,13 @@ config XEN_NETDEV_FRONTEND
+ 	  if you are compiling a kernel for a Xen guest, you almost
+ 	  certainly want to enable this.
+ 
++config XEN_NETDEV_BACKEND
++	tristate "Xen backend network device"
++	depends on XEN_BACKEND
++	help
++	  Implement the network backend driver, which passes packets
++	  from the guest domain's frontend drivers to the network.
++
+ config ISERIES_VETH
+ 	tristate "iSeries Virtual Ethernet driver support"
+ 	depends on PPC_ISERIES
+diff --git a/drivers/net/Makefile b/drivers/net/Makefile
+index b90738d..145dfd7 100644
+--- a/drivers/net/Makefile
++++ b/drivers/net/Makefile
+@@ -171,6 +171,7 @@ obj-$(CONFIG_SLIP) += slip.o
+ obj-$(CONFIG_SLHC) += slhc.o
+ 
+ obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
++obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
+ 
+ obj-$(CONFIG_DUMMY) += dummy.o
+ obj-$(CONFIG_IFB) += ifb.o
+diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile
+new file mode 100644
+index 0000000..e346e81
+--- /dev/null
++++ b/drivers/net/xen-netback/Makefile
+@@ -0,0 +1,3 @@
++obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
++
++xen-netback-y := netback.o xenbus.o interface.o
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+new file mode 100644
+index 0000000..079e1de
+--- /dev/null
++++ b/drivers/net/xen-netback/common.h
+@@ -0,0 +1,275 @@
++/******************************************************************************
++ * arch/xen/drivers/netif/backend/common.h
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#ifndef __NETIF__BACKEND__COMMON_H__
++#define __NETIF__BACKEND__COMMON_H__
++
++#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/slab.h>
++#include <linux/ip.h>
++#include <linux/in.h>
++#include <linux/io.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/wait.h>
++#include <linux/sched.h>
++
++#include <xen/interface/io/netif.h>
++#include <asm/pgalloc.h>
++#include <xen/interface/grant_table.h>
++#include <xen/grant_table.h>
++#include <xen/xenbus.h>
++
++struct xen_netif {
++	/* Unique identifier for this interface. */
++	domid_t          domid;
++	int              group;
++	unsigned int     handle;
++
++	u8               fe_dev_addr[6];
++
++	/* Physical parameters of the comms window. */
++	grant_handle_t   tx_shmem_handle;
++	grant_ref_t      tx_shmem_ref;
++	grant_handle_t   rx_shmem_handle;
++	grant_ref_t      rx_shmem_ref;
++	unsigned int     irq;
++
++	/* The shared rings and indexes. */
++	struct xen_netif_tx_back_ring tx;
++	struct xen_netif_rx_back_ring rx;
++	struct vm_struct *tx_comms_area;
++	struct vm_struct *rx_comms_area;
++
++	/* Flags that must not be set in dev->features */
++	int features_disabled;
++
++	/* Frontend feature information. */
++	u8 can_sg:1;
++	u8 gso:1;
++	u8 gso_prefix:1;
++	u8 csum:1;
++
++	/* Internal feature information. */
++	u8 can_queue:1;	    /* can queue packets for receiver? */
++
++	/* Allow netif_be_start_xmit() to peek ahead in the rx request
++	 * ring.  This is a prediction of what rx_req_cons will be once
++	 * all queued skbs are put on the ring. */
++	RING_IDX rx_req_cons_peek;
++
++	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
++	unsigned long   credit_bytes;
++	unsigned long   credit_usec;
++	unsigned long   remaining_credit;
++	struct timer_list credit_timeout;
++
++	/* Statistics */
++	int nr_copied_skbs;
++
++	/* Miscellaneous private stuff. */
++	struct list_head list;  /* scheduling list */
++	atomic_t         refcnt;
++	struct net_device *dev;
++	struct net_device_stats stats;
++
++	unsigned int carrier;
++
++	wait_queue_head_t waiting_to_free;
++};
++
++/*
++ * Implement our own carrier flag: the network stack's version causes delays
++ * when the carrier is re-enabled (in particular, dev_activate() may not
++ * immediately be called, which can cause packet loss; also the etherbridge
++ * can be rather lazy in activating its port).
++ */
++#define netback_carrier_on(netif)	((netif)->carrier = 1)
++#define netback_carrier_off(netif)	((netif)->carrier = 0)
++#define netback_carrier_ok(netif)	((netif)->carrier)
++
++enum {
++	NETBK_DONT_COPY_SKB,
++	NETBK_DELAYED_COPY_SKB,
++	NETBK_ALWAYS_COPY_SKB,
++};
++
++extern int netbk_copy_skb_mode;
++
++struct backend_info {
++	struct xenbus_device *dev;
++	struct xen_netif *netif;
++	enum xenbus_state frontend_state;
++	struct xenbus_watch hotplug_status_watch;
++	int have_hotplug_status_watch:1;
++};
++
++#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
++#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
++
++void netif_disconnect(struct xen_netif *netif);
++
++void netif_set_features(struct xen_netif *netif);
++struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
++			      unsigned int handle);
++int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
++	      unsigned long rx_ring_ref, unsigned int evtchn);
++
++static inline void netif_get(struct xen_netif *netif)
++{
++	atomic_inc(&netif->refcnt);
++}
++
++static inline void  netif_put(struct xen_netif *netif)
++{
++	if (atomic_dec_and_test(&netif->refcnt))
++		wake_up(&netif->waiting_to_free);
++}
++
++int netif_xenbus_init(void);
++
++#define netif_schedulable(netif)				\
++	(netif_running((netif)->dev) && netback_carrier_ok(netif))
++
++void netif_schedule_work(struct xen_netif *netif);
++void netif_deschedule_work(struct xen_netif *netif);
++
++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
++struct net_device_stats *netif_be_get_stats(struct net_device *dev);
++irqreturn_t netif_be_int(int irq, void *dev_id);
++
++static inline int netbk_can_queue(struct net_device *dev)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	return netif->can_queue;
++}
++
++static inline int netbk_can_sg(struct net_device *dev)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	return netif->can_sg;
++}
++
++struct pending_tx_info {
++	struct xen_netif_tx_request req;
++	struct xen_netif *netif;
++};
++typedef unsigned int pending_ring_idx_t;
++
++struct netbk_rx_meta {
++	int id;
++	int size;
++	int gso_size;
++};
++
++struct netbk_tx_pending_inuse {
++	struct list_head list;
++	unsigned long alloc_time;
++};
++
++#define MAX_PENDING_REQS 256
++
++#define MAX_BUFFER_OFFSET PAGE_SIZE
++
++/* extra field used in struct page */
++union page_ext {
++	struct {
++#if BITS_PER_LONG < 64
++#define IDX_WIDTH   8
++#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
++		unsigned int group:GROUP_WIDTH;
++		unsigned int idx:IDX_WIDTH;
++#else
++		unsigned int group, idx;
++#endif
++	} e;
++	void *mapping;
++};
++
++struct xen_netbk {
++	union {
++		struct {
++			struct tasklet_struct net_tx_tasklet;
++			struct tasklet_struct net_rx_tasklet;
++		} tasklet;
++
++		struct {
++			wait_queue_head_t netbk_action_wq;
++			struct task_struct *task;
++		} kthread;
++	};
++
++	struct sk_buff_head rx_queue;
++	struct sk_buff_head tx_queue;
++
++	struct timer_list net_timer;
++	struct timer_list netbk_tx_pending_timer;
++
++	struct page **mmap_pages;
++
++	pending_ring_idx_t pending_prod;
++	pending_ring_idx_t pending_cons;
++	pending_ring_idx_t dealloc_prod;
++	pending_ring_idx_t dealloc_cons;
++
++	struct list_head pending_inuse_head;
++	struct list_head net_schedule_list;
++
++	/* Protect the net_schedule_list in netif. */
++	spinlock_t net_schedule_list_lock;
++
++	atomic_t netfront_count;
++
++	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
++	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
++	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
++	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
++
++	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
++	u16 pending_ring[MAX_PENDING_REQS];
++	u16 dealloc_ring[MAX_PENDING_REQS];
++
++	/*
++	 * Each head or fragment can be up to 4096 bytes. Given
++	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
++	 * head/fragment uses 2 copy operation.
++	 */
++	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
++	unsigned char rx_notify[NR_IRQS];
++	u16 notify_list[NET_RX_RING_SIZE];
++	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
++};
++
++extern struct xen_netbk *xen_netbk;
++extern int xen_netbk_group_nr;
++
++#endif /* __NETIF__BACKEND__COMMON_H__ */
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+new file mode 100644
+index 0000000..c36db26
+--- /dev/null
++++ b/drivers/net/xen-netback/interface.c
+@@ -0,0 +1,465 @@
++/******************************************************************************
++ * arch/xen/drivers/netif/backend/interface.c
++ *
++ * Network-device interface management.
++ *
++ * Copyright (c) 2004-2005, Keir Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++
++#include <linux/ethtool.h>
++#include <linux/rtnetlink.h>
++
++#include <xen/events.h>
++#include <asm/xen/hypercall.h>
++
++/*
++ * Module parameter 'queue_length':
++ *
++ * Enables queuing in the network stack when a client has run out of receive
++ * descriptors.
++ */
++static unsigned long netbk_queue_length = 32;
++module_param_named(queue_length, netbk_queue_length, ulong, 0644);
++
++static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
++			   struct xen_netif *netif)
++{
++	int i;
++	int min_netfront_count;
++	int min_group = 0;
++	min_netfront_count = atomic_read(&netbk[0].netfront_count);
++	for (i = 0; i < group_nr; i++) {
++		int netfront_count = atomic_read(&netbk[i].netfront_count);
++		if (netfront_count < min_netfront_count) {
++			min_group = i;
++			min_netfront_count = netfront_count;
++		}
++	}
++
++	netif->group = min_group;
++	atomic_inc(&netbk[netif->group].netfront_count);
++}
++
++static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
++{
++	atomic_dec(&netbk[netif->group].netfront_count);
++}
++
++static void __netif_up(struct xen_netif *netif)
++{
++	netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
++	enable_irq(netif->irq);
++	netif_schedule_work(netif);
++}
++
++static void __netif_down(struct xen_netif *netif)
++{
++	disable_irq(netif->irq);
++	netif_deschedule_work(netif);
++	netbk_remove_netif(xen_netbk, netif);
++}
++
++static int net_open(struct net_device *dev)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	if (netback_carrier_ok(netif)) {
++		__netif_up(netif);
++		netif_start_queue(dev);
++	}
++	return 0;
++}
++
++static int net_close(struct net_device *dev)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	if (netback_carrier_ok(netif))
++		__netif_down(netif);
++	netif_stop_queue(dev);
++	return 0;
++}
++
++static int netbk_change_mtu(struct net_device *dev, int mtu)
++{
++	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
++
++	if (mtu > max)
++		return -EINVAL;
++	dev->mtu = mtu;
++	return 0;
++}
++
++void netif_set_features(struct xen_netif *netif)
++{
++	struct net_device *dev = netif->dev;
++	int features = dev->features;
++
++	if (netif->can_sg)
++		features |= NETIF_F_SG;
++	if (netif->gso || netif->gso_prefix)
++		features |= NETIF_F_TSO;
++	if (netif->csum)
++		features |= NETIF_F_IP_CSUM;
++
++	features &= ~(netif->features_disabled);
++
++	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
++		dev->mtu = ETH_DATA_LEN;
++
++	dev->features = features;
++}
++
++static int netbk_set_tx_csum(struct net_device *dev, u32 data)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	if (data) {
++		if (!netif->csum)
++			return -ENOSYS;
++		netif->features_disabled &= ~NETIF_F_IP_CSUM;
++	} else {
++		netif->features_disabled |= NETIF_F_IP_CSUM;
++	}
++
++	netif_set_features(netif);
++	return 0;
++}
++
++static int netbk_set_sg(struct net_device *dev, u32 data)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	if (data) {
++		if (!netif->can_sg)
++			return -ENOSYS;
++		netif->features_disabled &= ~NETIF_F_SG;
++	} else {
++		netif->features_disabled |= NETIF_F_SG;
++	}
++
++	netif_set_features(netif);
++	return 0;
++}
++
++static int netbk_set_tso(struct net_device *dev, u32 data)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	if (data) {
++		if (!netif->gso && !netif->gso_prefix)
++			return -ENOSYS;
++		netif->features_disabled &= ~NETIF_F_TSO;
++	} else {
++		netif->features_disabled |= NETIF_F_TSO;
++	}
++
++	netif_set_features(netif);
++	return 0;
++}
++
++static void netbk_get_drvinfo(struct net_device *dev,
++			      struct ethtool_drvinfo *info)
++{
++	strcpy(info->driver, "netbk");
++	strcpy(info->bus_info, dev_name(dev->dev.parent));
++}
++
++static const struct netif_stat {
++	char name[ETH_GSTRING_LEN];
++	u16 offset;
++} netbk_stats[] = {
++	{ "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
++};
++
++static int netbk_get_sset_count(struct net_device *dev, int string_set)
++{
++	switch (string_set) {
++	case ETH_SS_STATS:
++		return ARRAY_SIZE(netbk_stats);
++	default:
++		return -EINVAL;
++	}
++}
++
++static void netbk_get_ethtool_stats(struct net_device *dev,
++				   struct ethtool_stats *stats, u64 * data)
++{
++	void *netif = netdev_priv(dev);
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
++		data[i] = *(int *)(netif + netbk_stats[i].offset);
++}
++
++static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
++{
++	int i;
++
++	switch (stringset) {
++	case ETH_SS_STATS:
++		for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
++			memcpy(data + i * ETH_GSTRING_LEN,
++			       netbk_stats[i].name, ETH_GSTRING_LEN);
++		break;
++	}
++}
++
++static struct ethtool_ops network_ethtool_ops = {
++	.get_drvinfo = netbk_get_drvinfo,
++
++	.get_tx_csum = ethtool_op_get_tx_csum,
++	.set_tx_csum = netbk_set_tx_csum,
++	.get_sg = ethtool_op_get_sg,
++	.set_sg = netbk_set_sg,
++	.get_tso = ethtool_op_get_tso,
++	.set_tso = netbk_set_tso,
++	.get_link = ethtool_op_get_link,
++
++	.get_sset_count = netbk_get_sset_count,
++	.get_ethtool_stats = netbk_get_ethtool_stats,
++	.get_strings = netbk_get_strings,
++};
++
++static struct net_device_ops netback_ops = {
++	.ndo_start_xmit	= netif_be_start_xmit,
++	.ndo_get_stats	= netif_be_get_stats,
++	.ndo_open	= net_open,
++	.ndo_stop	= net_close,
++	.ndo_change_mtu	= netbk_change_mtu,
++};
++
++struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
++			      unsigned int handle)
++{
++	int err = 0;
++	struct net_device *dev;
++	struct xen_netif *netif;
++	char name[IFNAMSIZ] = {};
++
++	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
++	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
++	if (dev == NULL) {
++		pr_debug("Could not allocate netdev\n");
++		return ERR_PTR(-ENOMEM);
++	}
++
++	SET_NETDEV_DEV(dev, parent);
++
++	netif = netdev_priv(dev);
++	memset(netif, 0, sizeof(*netif));
++	netif->domid  = domid;
++	netif->group  = -1;
++	netif->handle = handle;
++	netif->can_sg = 1;
++	netif->csum = 1;
++	atomic_set(&netif->refcnt, 1);
++	init_waitqueue_head(&netif->waiting_to_free);
++	netif->dev = dev;
++	INIT_LIST_HEAD(&netif->list);
++
++	netback_carrier_off(netif);
++
++	netif->credit_bytes = netif->remaining_credit = ~0UL;
++	netif->credit_usec  = 0UL;
++	init_timer(&netif->credit_timeout);
++	/* Initialize 'expires' now: it's used to track the credit window. */
++	netif->credit_timeout.expires = jiffies;
++
++	dev->netdev_ops	= &netback_ops;
++	netif_set_features(netif);
++	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
++
++	dev->tx_queue_len = netbk_queue_length;
++
++	/*
++	 * Initialise a dummy MAC address. We choose the numerically
++	 * largest non-broadcast address to prevent the address getting
++	 * stolen by an Ethernet bridge for STP purposes.
++	 * (FE:FF:FF:FF:FF:FF)
++	 */
++	memset(dev->dev_addr, 0xFF, ETH_ALEN);
++	dev->dev_addr[0] &= ~0x01;
++
++	rtnl_lock();
++	err = register_netdevice(dev);
++	rtnl_unlock();
++	if (err) {
++		pr_debug("Could not register new net device %s: err=%d\n",
++			 dev->name, err);
++		free_netdev(dev);
++		return ERR_PTR(err);
++	}
++
++	pr_debug("Successfully created netif\n");
++	return netif;
++}
++
++static int map_frontend_pages(struct xen_netif *netif,
++			      grant_ref_t tx_ring_ref,
++			      grant_ref_t rx_ring_ref)
++{
++	struct gnttab_map_grant_ref op;
++
++	gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
++			  GNTMAP_host_map, tx_ring_ref, netif->domid);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status) {
++		pr_debug("Gnttab failure mapping tx_ring_ref!\n");
++		return op.status;
++	}
++
++	netif->tx_shmem_ref    = tx_ring_ref;
++	netif->tx_shmem_handle = op.handle;
++
++	gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
++			  GNTMAP_host_map, rx_ring_ref, netif->domid);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
++
++	if (op.status) {
++		struct gnttab_unmap_grant_ref unop;
++
++		gnttab_set_unmap_op(&unop,
++				    (unsigned long)netif->tx_comms_area->addr,
++				    GNTMAP_host_map, netif->tx_shmem_handle);
++		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
++		pr_debug("Gnttab failure mapping rx_ring_ref!\n");
++		return op.status;
++	}
++
++	netif->rx_shmem_ref    = rx_ring_ref;
++	netif->rx_shmem_handle = op.handle;
++
++	return 0;
++}
++
++static void unmap_frontend_pages(struct xen_netif *netif)
++{
++	struct gnttab_unmap_grant_ref op;
++
++	gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
++			    GNTMAP_host_map, netif->tx_shmem_handle);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++		BUG();
++
++	gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
++			    GNTMAP_host_map, netif->rx_shmem_handle);
++
++	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++		BUG();
++}
++
++int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
++	      unsigned long rx_ring_ref, unsigned int evtchn)
++{
++	int err = -ENOMEM;
++	struct xen_netif_tx_sring *txs;
++	struct xen_netif_rx_sring *rxs;
++
++	/* Already connected through? */
++	if (netif->irq)
++		return 0;
++
++	netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
++	if (netif->tx_comms_area == NULL)
++		return -ENOMEM;
++	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
++	if (netif->rx_comms_area == NULL)
++		goto err_rx;
++
++	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
++	if (err)
++		goto err_map;
++
++	err = bind_interdomain_evtchn_to_irqhandler(
++		netif->domid, evtchn, netif_be_int, 0,
++		netif->dev->name, netif);
++	if (err < 0)
++		goto err_hypervisor;
++	netif->irq = err;
++	disable_irq(netif->irq);
++
++	txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
++	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
++
++	rxs = (struct xen_netif_rx_sring *)
++		((char *)netif->rx_comms_area->addr);
++	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
++
++	netif->rx_req_cons_peek = 0;
++
++	netif_get(netif);
++
++	rtnl_lock();
++	netback_carrier_on(netif);
++	if (netif_running(netif->dev))
++		__netif_up(netif);
++	rtnl_unlock();
++
++	return 0;
++err_hypervisor:
++	unmap_frontend_pages(netif);
++err_map:
++	free_vm_area(netif->rx_comms_area);
++err_rx:
++	free_vm_area(netif->tx_comms_area);
++	return err;
++}
++
++void netif_disconnect(struct xen_netif *netif)
++{
++	if (netback_carrier_ok(netif)) {
++		rtnl_lock();
++		netback_carrier_off(netif);
++		netif_carrier_off(netif->dev); /* discard queued packets */
++		if (netif_running(netif->dev))
++			__netif_down(netif);
++		rtnl_unlock();
++		netif_put(netif);
++	}
++
++	atomic_dec(&netif->refcnt);
++	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
++
++	del_timer_sync(&netif->credit_timeout);
++
++	if (netif->irq)
++		unbind_from_irqhandler(netif->irq, netif);
++
++	unregister_netdev(netif->dev);
++
++	if (netif->tx.sring) {
++		unmap_frontend_pages(netif);
++		free_vm_area(netif->tx_comms_area);
++		free_vm_area(netif->rx_comms_area);
++	}
++
++	free_netdev(netif->dev);
++}
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+new file mode 100644
+index 0000000..e0ca232
+--- /dev/null
++++ b/drivers/net/xen-netback/netback.c
+@@ -0,0 +1,1909 @@
++/*
++ * Back-end of the driver for virtual network devices. This portion of the
++ * driver exports a 'unified' network-device interface that can be accessed
++ * by any operating system that implements a compatible front end. A
++ * reference front-end implementation can be found in:
++ *  drivers/net/xen-netfront.c
++ *
++ * Copyright (c) 2002-2005, K A Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++
++#include <linux/kthread.h>
++#include <linux/if_vlan.h>
++#include <linux/udp.h>
++
++#include <net/tcp.h>
++
++#include <xen/balloon.h>
++#include <xen/events.h>
++#include <xen/interface/memory.h>
++
++#include <asm/xen/hypercall.h>
++#include <asm/xen/page.h>
++
++/*define NETBE_DEBUG_INTERRUPT*/
++
++struct xen_netbk *xen_netbk;
++int xen_netbk_group_nr;
++
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
++static void make_tx_response(struct xen_netif *netif,
++			     struct xen_netif_tx_request *txp,
++			     s8       st);
++static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
++					     u16      id,
++					     s8       st,
++					     u16      offset,
++					     u16      size,
++					     u16      flags);
++
++static void net_tx_action(unsigned long data);
++
++static void net_rx_action(unsigned long data);
++
++static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
++				       unsigned int idx)
++{
++	return page_to_pfn(netbk->mmap_pages[idx]);
++}
++
++static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
++					 unsigned int idx)
++{
++	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
++}
++
++/* extra field used in struct page */
++static inline void netif_set_page_ext(struct page *pg,
++				      unsigned int group, unsigned int idx)
++{
++	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
++
++	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
++	pg->mapping = ext.mapping;
++}
++
++static int netif_get_page_ext(struct page *pg,
++			      unsigned int *_group, unsigned int *_idx)
++{
++	union page_ext ext = { .mapping = pg->mapping };
++	struct xen_netbk *netbk;
++	unsigned int group, idx;
++
++	if (!PageForeign(pg))
++		return 0;
++
++	group = ext.e.group - 1;
++
++	if (group < 0 || group >= xen_netbk_group_nr)
++		return 0;
++
++	netbk = &xen_netbk[group];
++
++	if (netbk->mmap_pages == NULL)
++		return 0;
++
++	idx = ext.e.idx;
++
++	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
++		return 0;
++
++	if (netbk->mmap_pages[idx] != pg)
++		return 0;
++
++	*_group = group;
++	*_idx = idx;
++
++	return 1;
++}
++
++/*
++ * This is the amount of packet we copy rather than map, so that the
++ * guest can't fiddle with the contents of the headers while we do
++ * packet processing on them (netfilter, routing, etc).
++ */
++#define PKT_PROT_LEN    (ETH_HLEN + \
++			 VLAN_HLEN + \
++			 sizeof(struct iphdr) + MAX_IPOPTLEN + \
++			 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
++
++static inline pending_ring_idx_t pending_index(unsigned i)
++{
++	return i & (MAX_PENDING_REQS-1);
++}
++
++static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
++{
++	return MAX_PENDING_REQS -
++		netbk->pending_prod + netbk->pending_cons;
++}
++
++/* Setting this allows the safe use of this driver without netloop. */
++static int MODPARM_copy_skb = 1;
++module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
++MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
++
++int netbk_copy_skb_mode;
++
++static int MODPARM_netback_kthread;
++module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
++MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
++
++/*
++ * Netback bottom half handler.
++ * dir indicates the data direction.
++ * rx: 1, tx: 0.
++ */
++static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
++{
++	if (MODPARM_netback_kthread)
++		wake_up(&netbk->kthread.netbk_action_wq);
++	else if (dir)
++		tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
++	else
++		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
++}
++
++static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
++{
++	smp_mb();
++	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
++	    !list_empty(&netbk->net_schedule_list))
++		xen_netbk_bh_handler(netbk, 0);
++}
++
++static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
++{
++	struct skb_shared_info *ninfo;
++	struct sk_buff *nskb;
++	unsigned long offset;
++	int ret;
++	int len;
++	int headlen;
++
++	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
++
++	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
++	if (unlikely(!nskb))
++		goto err;
++
++	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
++	headlen = skb_end_pointer(nskb) - nskb->data;
++	if (headlen > skb_headlen(skb))
++		headlen = skb_headlen(skb);
++	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
++	BUG_ON(ret);
++
++	ninfo = skb_shinfo(nskb);
++	ninfo->gso_size = skb_shinfo(skb)->gso_size;
++	ninfo->gso_type = skb_shinfo(skb)->gso_type;
++
++	offset = headlen;
++	len = skb->len - headlen;
++
++	nskb->len = skb->len;
++	nskb->data_len = len;
++	nskb->truesize += len;
++
++	while (len) {
++		struct page *page;
++		int copy;
++		int zero;
++
++		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
++			dump_stack();
++			goto err_free;
++		}
++
++		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
++		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
++
++		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
++		if (unlikely(!page))
++			goto err_free;
++
++		ret = skb_copy_bits(skb, offset, page_address(page), copy);
++		BUG_ON(ret);
++
++		ninfo->frags[ninfo->nr_frags].page = page;
++		ninfo->frags[ninfo->nr_frags].page_offset = 0;
++		ninfo->frags[ninfo->nr_frags].size = copy;
++		ninfo->nr_frags++;
++
++		offset += copy;
++		len -= copy;
++	}
++
++#ifdef NET_SKBUFF_DATA_USES_OFFSET
++	offset = 0;
++#else
++	offset = nskb->data - skb->data;
++#endif
++
++	nskb->transport_header = skb->transport_header + offset;
++	nskb->network_header = skb->network_header + offset;
++	nskb->mac_header = skb->mac_header + offset;
++
++	return nskb;
++
++ err_free:
++	kfree_skb(nskb);
++ err:
++	return NULL;
++}
++
++static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
++{
++	if (netif->can_sg || netif->gso || netif->gso_prefix)
++		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
++	return 1; /* all in one */
++}
++
++static inline int netbk_queue_full(struct xen_netif *netif)
++{
++	RING_IDX peek   = netif->rx_req_cons_peek;
++	RING_IDX needed = netbk_max_required_rx_slots(netif);
++
++	return ((netif->rx.sring->req_prod - peek) < needed) ||
++	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
++}
++
++/*
++ * Returns true if we should start a new receive buffer instead of
++ * adding 'size' bytes to a buffer which currently contains 'offset'
++ * bytes.
++ */
++static bool start_new_rx_buffer(int offset, unsigned long size, int head)
++{
++	/* simple case: we have completely filled the current buffer. */
++	if (offset == MAX_BUFFER_OFFSET)
++		return true;
++
++	/*
++	 * complex case: start a fresh buffer if the current frag
++	 * would overflow the current buffer but only if:
++	 *     (i)   this frag would fit completely in the next buffer
++	 * and (ii)  there is already some data in the current buffer
++	 * and (iii) this is not the head buffer.
++	 *
++	 * Where:
++	 * - (i) stops us splitting a frag into two copies
++	 *   unless the frag is too large for a single buffer.
++	 * - (ii) stops us from leaving a buffer pointlessly empty.
++	 * - (iii) stops us leaving the first buffer
++	 *   empty. Strictly speaking this is already covered
++	 *   by (ii) but is explicitly checked because
++	 *   netfront relies on the first buffer being
++	 *   non-empty and can crash otherwise.
++	 *
++	 * This means we will effectively linearise small
++	 * frags but do not needlessly split large buffers
++	 * into multiple copies tend to give large frags their
++	 * own buffers as before.
++	 */
++	if ((offset + size > MAX_BUFFER_OFFSET) &&
++	    (size <= MAX_BUFFER_OFFSET) && offset && !head)
++		return true;
++
++	return false;
++}
++
++/*
++ * Figure out how many ring slots we're going to need to send @skb to
++ * the guest. This function is essentially a dry run of
++ * netbk_gop_frag_copy.
++ */
++static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
++{
++	unsigned int count = 1;
++	int i, copy_off = 0;
++
++	BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);
++
++	copy_off = skb_headlen(skb);
++
++	if (skb_shinfo(skb)->gso_size)
++		count++;
++
++	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
++		unsigned long size = skb_shinfo(skb)->frags[i].size;
++		unsigned long bytes;
++		while (size > 0) {
++			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
++
++			if (start_new_rx_buffer(copy_off, size, 0)) {
++				count++;
++				copy_off = 0;
++			}
++
++			bytes = size;
++			if (copy_off + bytes > MAX_BUFFER_OFFSET)
++				bytes = MAX_BUFFER_OFFSET - copy_off;
++
++			copy_off += bytes;
++			size -= bytes;
++		}
++	}
++	return count;
++}
++
++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	struct xen_netbk *netbk;
++
++	BUG_ON(skb->dev != dev);
++
++	if (netif->group == -1)
++		goto drop;
++
++	netbk = &xen_netbk[netif->group];
++
++	/* Drop the packet if the target domain has no receive buffers. */
++	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
++		goto drop;
++
++	/*
++	 * XXX For now we also copy skbuffs whose head crosses a page
++	 * boundary, because netbk_gop_skb can't handle them.
++	 */
++	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
++		struct sk_buff *nskb = netbk_copy_skb(skb);
++		if (unlikely(nskb == NULL))
++			goto drop;
++		/* Copy only the header fields we use in this driver. */
++		nskb->dev = skb->dev;
++		nskb->ip_summed = skb->ip_summed;
++		dev_kfree_skb(skb);
++		skb = nskb;
++	}
++
++	/* Reserve ring slots for the worst-case number of fragments. */
++	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
++	netif_get(netif);
++
++	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
++		netif->rx.sring->req_event = netif->rx_req_cons_peek +
++			netbk_max_required_rx_slots(netif);
++		mb(); /* request notification /then/ check & stop the queue */
++		if (netbk_queue_full(netif))
++			netif_stop_queue(dev);
++	}
++	skb_queue_tail(&netbk->rx_queue, skb);
++
++	xen_netbk_bh_handler(netbk, 1);
++
++	return 0;
++
++ drop:
++	netif->stats.tx_dropped++;
++	dev_kfree_skb(skb);
++	return 0;
++}
++
++struct netrx_pending_operations {
++	unsigned copy_prod, copy_cons;
++	unsigned meta_prod, meta_cons;
++	struct gnttab_copy *copy;
++	struct netbk_rx_meta *meta;
++	int copy_off;
++	grant_ref_t copy_gref;
++};
++
++static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
++						struct netrx_pending_operations *npo)
++{
++	struct netbk_rx_meta *meta;
++	struct xen_netif_rx_request *req;
++
++	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++
++	meta = npo->meta + npo->meta_prod++;
++	meta->gso_size = 0;
++	meta->size = 0;
++	meta->id = req->id;
++
++	npo->copy_off = 0;
++	npo->copy_gref = req->gref;
++
++	return meta;
++}
++
++/*
++ * Set up the grant operations for this fragment. If it's a flipping
++ * interface, we also set up the unmap request from here.
++ */
++static void netbk_gop_frag_copy(struct xen_netif *netif,
++				struct netrx_pending_operations *npo,
++				struct page *page, unsigned long size,
++				unsigned long offset, int head)
++{
++	struct gnttab_copy *copy_gop;
++	struct netbk_rx_meta *meta;
++	/*
++	 * These variables a used iff netif_get_page_ext returns true,
++	 * in which case they are guaranteed to be initialized.
++	 */
++	unsigned int uninitialized_var(group), uninitialized_var(idx);
++	int foreign = netif_get_page_ext(page, &group, &idx);
++	unsigned long bytes;
++
++	/* Data must not cross a page boundary. */
++	BUG_ON(size + offset > PAGE_SIZE);
++
++	meta = npo->meta + npo->meta_prod - 1;
++
++	while (size > 0) {
++		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
++
++		if (start_new_rx_buffer(npo->copy_off, size, head)) {
++			/*
++			 * Netfront requires there to be some data in
++			 * the head buffer.
++			 */
++			BUG_ON(head);
++
++			meta = get_next_rx_buffer(netif, npo);
++		}
++
++		bytes = size;
++		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
++			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
++
++		copy_gop = npo->copy + npo->copy_prod++;
++		copy_gop->flags = GNTCOPY_dest_gref;
++		if (foreign) {
++			struct xen_netbk *netbk = &xen_netbk[group];
++			struct pending_tx_info *src_pend;
++
++			src_pend = &netbk->pending_tx_info[idx];
++
++			copy_gop->source.domid = src_pend->netif->domid;
++			copy_gop->source.u.ref = src_pend->req.gref;
++			copy_gop->flags |= GNTCOPY_source_gref;
++		} else {
++			void *vaddr = page_address(page);
++			copy_gop->source.domid = DOMID_SELF;
++			copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
++		}
++		copy_gop->source.offset = offset;
++		copy_gop->dest.domid = netif->domid;
++
++		copy_gop->dest.offset = npo->copy_off;
++		copy_gop->dest.u.ref = npo->copy_gref;
++		copy_gop->len = bytes;
++
++		npo->copy_off += bytes;
++		meta->size += bytes;
++
++		offset += bytes;
++		size -= bytes;
++		head = 0; /* There must be something in this buffer now. */
++	}
++}
++
++/*
++ * Prepare an SKB to be transmitted to the frontend.
++ *
++ * This function is responsible for allocating grant operations, meta
++ * structures, etc.
++ *
++ * It returns the number of meta structures consumed. The number of
++ * ring slots used is always equal to the number of meta slots used
++ * plus the number of GSO descriptors used. Currently, we use either
++ * zero GSO descriptors (for non-GSO packets) or one descriptor (for
++ * frontend-side LRO).
++ */
++static int netbk_gop_skb(struct sk_buff *skb,
++			 struct netrx_pending_operations *npo)
++{
++	struct xen_netif *netif = netdev_priv(skb->dev);
++	int nr_frags = skb_shinfo(skb)->nr_frags;
++	int i;
++	struct xen_netif_rx_request *req;
++	struct netbk_rx_meta *meta;
++	int old_meta_prod;
++
++	old_meta_prod = npo->meta_prod;
++
++	/* Set up a GSO prefix descriptor, if necessary */
++	if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
++		req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++		meta = npo->meta + npo->meta_prod++;
++		meta->gso_size = skb_shinfo(skb)->gso_size;
++		meta->size = 0;
++		meta->id = req->id;
++	}
++
++	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++	meta = npo->meta + npo->meta_prod++;
++
++	if (!netif->gso_prefix)
++		meta->gso_size = skb_shinfo(skb)->gso_size;
++	else
++		meta->gso_size = 0;
++
++	meta->size = 0;
++	meta->id = req->id;
++	npo->copy_off = 0;
++	npo->copy_gref = req->gref;
++
++	netbk_gop_frag_copy(netif,
++			    npo, virt_to_page(skb->data),
++			    skb_headlen(skb),
++			    offset_in_page(skb->data), 1);
++
++	/* Leave a gap for the GSO descriptor. */
++	if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
++		netif->rx.req_cons++;
++
++	for (i = 0; i < nr_frags; i++) {
++		netbk_gop_frag_copy(netif, npo,
++				    skb_shinfo(skb)->frags[i].page,
++				    skb_shinfo(skb)->frags[i].size,
++				    skb_shinfo(skb)->frags[i].page_offset,
++				    0);
++	}
++
++	return npo->meta_prod - old_meta_prod;
++}
++
++/*
++ * This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
++ * used to set up the operations on the top of
++ * netrx_pending_operations, which have since been done.  Check that
++ * they didn't give any errors and advance over them.
++ */
++static int netbk_check_gop(int nr_meta_slots, domid_t domid,
++			   struct netrx_pending_operations *npo)
++{
++	struct gnttab_copy     *copy_op;
++	int status = NETIF_RSP_OKAY;
++	int i;
++
++	for (i = 0; i < nr_meta_slots; i++) {
++		copy_op = npo->copy + npo->copy_cons++;
++		if (copy_op->status != GNTST_okay) {
++				pr_debug("Bad status %d from copy to DOM%d.\n",
++					 copy_op->status, domid);
++				status = NETIF_RSP_ERROR;
++			}
++	}
++
++	return status;
++}
++
++static void netbk_add_frag_responses(struct xen_netif *netif, int status,
++				     struct netbk_rx_meta *meta,
++				     int nr_meta_slots)
++{
++	int i;
++	unsigned long offset;
++
++	/* No fragments used */
++	if (nr_meta_slots <= 1)
++		return;
++
++	nr_meta_slots--;
++
++	for (i = 0; i < nr_meta_slots; i++) {
++		int flags;
++		if (i == nr_meta_slots - 1)
++			flags = 0;
++		else
++			flags = NETRXF_more_data;
++
++		offset = 0;
++		make_rx_response(netif, meta[i].id, status, offset,
++				 meta[i].size, flags);
++	}
++}
++
++struct skb_cb_overlay {
++	int meta_slots_used;
++};
++
++static void net_rx_action(unsigned long data)
++{
++	struct xen_netif *netif = NULL;
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
++	s8 status;
++	u16 irq, flags;
++	struct xen_netif_rx_response *resp;
++	struct sk_buff_head rxq;
++	struct sk_buff *skb;
++	int notify_nr = 0;
++	int ret;
++	int nr_frags;
++	int count;
++	unsigned long offset;
++	struct skb_cb_overlay *sco;
++
++	struct netrx_pending_operations npo = {
++		.copy  = netbk->grant_copy_op,
++		.meta  = netbk->meta,
++	};
++
++	skb_queue_head_init(&rxq);
++
++	count = 0;
++
++	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
++		netif = netdev_priv(skb->dev);
++		nr_frags = skb_shinfo(skb)->nr_frags;
++
++		sco = (struct skb_cb_overlay *)skb->cb;
++		sco->meta_slots_used = netbk_gop_skb(skb, &npo);
++
++		count += nr_frags + 1;
++
++		__skb_queue_tail(&rxq, skb);
++
++		/* Filled the batch queue? */
++		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
++			break;
++	}
++
++	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
++
++	if (!npo.copy_prod)
++		return;
++
++	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
++	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
++					npo.copy_prod);
++	BUG_ON(ret != 0);
++
++	while ((skb = __skb_dequeue(&rxq)) != NULL) {
++		sco = (struct skb_cb_overlay *)skb->cb;
++
++		netif = netdev_priv(skb->dev);
++
++		if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
++			resp = RING_GET_RESPONSE(&netif->rx,
++						netif->rx.rsp_prod_pvt++);
++
++			resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
++
++			resp->offset = netbk->meta[npo.meta_cons].gso_size;
++			resp->id = netbk->meta[npo.meta_cons].id;
++			resp->status = sco->meta_slots_used;
++
++			npo.meta_cons++;
++			sco->meta_slots_used--;
++		}
++
++
++		netif->stats.tx_bytes += skb->len;
++		netif->stats.tx_packets++;
++
++		status = netbk_check_gop(sco->meta_slots_used,
++					 netif->domid, &npo);
++
++		if (sco->meta_slots_used == 1)
++			flags = 0;
++		else
++			flags = NETRXF_more_data;
++
++		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
++			flags |= NETRXF_csum_blank | NETRXF_data_validated;
++		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
++			/* remote but checksummed. */
++			flags |= NETRXF_data_validated;
++
++		offset = 0;
++		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
++					status, offset,
++					netbk->meta[npo.meta_cons].size,
++					flags);
++
++		if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
++			struct xen_netif_extra_info *gso =
++				(struct xen_netif_extra_info *)
++				RING_GET_RESPONSE(&netif->rx,
++						  netif->rx.rsp_prod_pvt++);
++
++			resp->flags |= NETRXF_extra_info;
++
++			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
++			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
++			gso->u.gso.pad = 0;
++			gso->u.gso.features = 0;
++
++			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
++			gso->flags = 0;
++		}
++
++		netbk_add_frag_responses(netif, status,
++					 netbk->meta + npo.meta_cons + 1,
++					 sco->meta_slots_used);
++
++		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
++		irq = netif->irq;
++		if (ret && !netbk->rx_notify[irq]) {
++			netbk->rx_notify[irq] = 1;
++			netbk->notify_list[notify_nr++] = irq;
++		}
++
++		if (netif_queue_stopped(netif->dev) &&
++		    netif_schedulable(netif) &&
++		    !netbk_queue_full(netif))
++			netif_wake_queue(netif->dev);
++
++		netif_put(netif);
++		npo.meta_cons += sco->meta_slots_used;
++		dev_kfree_skb(skb);
++	}
++
++	while (notify_nr != 0) {
++		irq = netbk->notify_list[--notify_nr];
++		netbk->rx_notify[irq] = 0;
++		notify_remote_via_irq(irq);
++	}
++
++	/* More work to do? */
++	if (!skb_queue_empty(&netbk->rx_queue) &&
++			!timer_pending(&netbk->net_timer))
++		xen_netbk_bh_handler(netbk, 1);
++}
++
++static void net_alarm(unsigned long data)
++{
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
++	xen_netbk_bh_handler(netbk, 1);
++}
++
++static void netbk_tx_pending_timeout(unsigned long data)
++{
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
++	xen_netbk_bh_handler(netbk, 0);
++}
++
++struct net_device_stats *netif_be_get_stats(struct net_device *dev)
++{
++	struct xen_netif *netif = netdev_priv(dev);
++	return &netif->stats;
++}
++
++static int __on_net_schedule_list(struct xen_netif *netif)
++{
++	return !list_empty(&netif->list);
++}
++
++/* Must be called with net_schedule_list_lock held */
++static void remove_from_net_schedule_list(struct xen_netif *netif)
++{
++	if (likely(__on_net_schedule_list(netif))) {
++		list_del_init(&netif->list);
++		netif_put(netif);
++	}
++}
++
++static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
++{
++	struct xen_netif *netif = NULL;
++
++	spin_lock_irq(&netbk->net_schedule_list_lock);
++	if (list_empty(&netbk->net_schedule_list))
++		goto out;
++
++	netif = list_first_entry(&netbk->net_schedule_list,
++				 struct xen_netif, list);
++	if (!netif)
++		goto out;
++
++	netif_get(netif);
++
++	remove_from_net_schedule_list(netif);
++out:
++	spin_unlock_irq(&netbk->net_schedule_list_lock);
++	return netif;
++}
++
++static void add_to_net_schedule_list_tail(struct xen_netif *netif)
++{
++	unsigned long flags;
++
++	struct xen_netbk *netbk = &xen_netbk[netif->group];
++	if (__on_net_schedule_list(netif))
++		return;
++
++	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
++	if (!__on_net_schedule_list(netif) &&
++	    likely(netif_schedulable(netif))) {
++		list_add_tail(&netif->list, &netbk->net_schedule_list);
++		netif_get(netif);
++	}
++	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
++}
++
++void netif_schedule_work(struct xen_netif *netif)
++{
++	struct xen_netbk *netbk = &xen_netbk[netif->group];
++	int more_to_do;
++
++	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
++
++	if (more_to_do) {
++		add_to_net_schedule_list_tail(netif);
++		maybe_schedule_tx_action(netbk);
++	}
++}
++
++void netif_deschedule_work(struct xen_netif *netif)
++{
++	struct xen_netbk *netbk = &xen_netbk[netif->group];
++	spin_lock_irq(&netbk->net_schedule_list_lock);
++	remove_from_net_schedule_list(netif);
++	spin_unlock_irq(&netbk->net_schedule_list_lock);
++}
++
++
++static void tx_add_credit(struct xen_netif *netif)
++{
++	unsigned long max_burst, max_credit;
++
++	/*
++	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
++	 * Otherwise the interface can seize up due to insufficient credit.
++	 */
++	max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
++	max_burst = min(max_burst, 131072UL);
++	max_burst = max(max_burst, netif->credit_bytes);
++
++	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
++	max_credit = netif->remaining_credit + netif->credit_bytes;
++	if (max_credit < netif->remaining_credit)
++		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
++
++	netif->remaining_credit = min(max_credit, max_burst);
++}
++
++static void tx_credit_callback(unsigned long data)
++{
++	struct xen_netif *netif = (struct xen_netif *)data;
++	tx_add_credit(netif);
++	netif_schedule_work(netif);
++}
++
++static inline int copy_pending_req(struct xen_netbk *netbk,
++				   pending_ring_idx_t pending_idx)
++{
++	return gnttab_copy_grant_page(
++			netbk->grant_tx_handle[pending_idx],
++			&netbk->mmap_pages[pending_idx]);
++}
++
++static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
++{
++	struct netbk_tx_pending_inuse *inuse, *n;
++	struct gnttab_unmap_grant_ref *gop;
++	u16 pending_idx;
++	pending_ring_idx_t dc, dp;
++	struct xen_netif *netif;
++	int ret;
++	LIST_HEAD(list);
++
++	dc = netbk->dealloc_cons;
++	gop = netbk->tx_unmap_ops;
++
++	/* Free up any grants we have finished using. */
++	do {
++		dp = netbk->dealloc_prod;
++
++		/* Ensure we see all indices enqueued by netif_idx_release(). */
++		smp_rmb();
++
++		while (dc != dp) {
++			unsigned long pfn;
++			struct netbk_tx_pending_inuse *pending_inuse =
++					netbk->pending_inuse;
++
++			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
++			list_move_tail(&pending_inuse[pending_idx].list, &list);
++
++			pfn = idx_to_pfn(netbk, pending_idx);
++			/* Already unmapped? */
++			if (!phys_to_machine_mapping_valid(pfn))
++				continue;
++
++			gnttab_set_unmap_op(gop,
++					idx_to_kaddr(netbk, pending_idx),
++					GNTMAP_host_map,
++					netbk->grant_tx_handle[pending_idx]);
++			gop++;
++		}
++
++	} while (dp != netbk->dealloc_prod);
++
++	netbk->dealloc_cons = dc;
++
++	ret = HYPERVISOR_grant_table_op(
++		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
++		gop - netbk->tx_unmap_ops);
++	BUG_ON(ret);
++
++	/*
++	 * Copy any entries that have been pending for too long
++	 */
++	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++	    !list_empty(&netbk->pending_inuse_head)) {
++		list_for_each_entry_safe(inuse, n,
++				&netbk->pending_inuse_head, list) {
++			struct pending_tx_info *pending_tx_info;
++			pending_tx_info = netbk->pending_tx_info;
++
++			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
++				break;
++
++			pending_idx = inuse - netbk->pending_inuse;
++
++			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
++
++			switch (copy_pending_req(netbk, pending_idx)) {
++			case 0:
++				list_move_tail(&inuse->list, &list);
++				continue;
++			case -EBUSY:
++				list_del_init(&inuse->list);
++				continue;
++			case -ENOENT:
++				continue;
++			}
++
++			break;
++		}
++	}
++
++	list_for_each_entry_safe(inuse, n, &list, list) {
++		struct pending_tx_info *pending_tx_info;
++		pending_ring_idx_t index;
++
++		pending_tx_info = netbk->pending_tx_info;
++		pending_idx = inuse - netbk->pending_inuse;
++
++		netif = pending_tx_info[pending_idx].netif;
++
++		make_tx_response(netif, &pending_tx_info[pending_idx].req,
++				 NETIF_RSP_OKAY);
++
++		/* Ready for next use. */
++		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
++
++		index = pending_index(netbk->pending_prod++);
++		netbk->pending_ring[index] = pending_idx;
++
++		netif_put(netif);
++
++		list_del_init(&inuse->list);
++	}
++}
++
++static void netbk_tx_err(struct xen_netif *netif,
++		struct xen_netif_tx_request *txp, RING_IDX end)
++{
++	RING_IDX cons = netif->tx.req_cons;
++
++	do {
++		make_tx_response(netif, txp, NETIF_RSP_ERROR);
++		if (cons >= end)
++			break;
++		txp = RING_GET_REQUEST(&netif->tx, cons++);
++	} while (1);
++	netif->tx.req_cons = cons;
++	netif_schedule_work(netif);
++	netif_put(netif);
++}
++
++static int netbk_count_requests(struct xen_netif *netif,
++				struct xen_netif_tx_request *first,
++				struct xen_netif_tx_request *txp,
++				int work_to_do)
++{
++	RING_IDX cons = netif->tx.req_cons;
++	int frags = 0;
++
++	if (!(first->flags & NETTXF_more_data))
++		return 0;
++
++	do {
++		if (frags >= work_to_do) {
++			DPRINTK("Need more frags\n");
++			return -frags;
++		}
++
++		if (unlikely(frags >= MAX_SKB_FRAGS)) {
++			DPRINTK("Too many frags\n");
++			return -frags;
++		}
++
++		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
++		       sizeof(*txp));
++		if (txp->size > first->size) {
++			DPRINTK("Frags galore\n");
++			return -frags;
++		}
++
++		first->size -= txp->size;
++		frags++;
++
++		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
++			DPRINTK("txp->offset: %x, size: %u\n",
++				txp->offset, txp->size);
++			return -frags;
++		}
++	} while ((txp++)->flags & NETTXF_more_data);
++
++	return frags;
++}
++
++static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
++						       struct xen_netif *netif,
++						       struct sk_buff *skb,
++						       struct xen_netif_tx_request *txp,
++						       struct gnttab_map_grant_ref *mop)
++{
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	skb_frag_t *frags = shinfo->frags;
++	unsigned long pending_idx = *((u16 *)skb->data);
++	int i, start;
++
++	/* Skip first skb fragment if it is on same page as header fragment. */
++	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
++
++	for (i = start; i < shinfo->nr_frags; i++, txp++) {
++		pending_ring_idx_t index;
++		struct pending_tx_info *pending_tx_info =
++			netbk->pending_tx_info;
++
++		index = pending_index(netbk->pending_cons++);
++		pending_idx = netbk->pending_ring[index];
++
++		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
++				  GNTMAP_host_map | GNTMAP_readonly,
++				  txp->gref, netif->domid);
++
++		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
++		netif_get(netif);
++		pending_tx_info[pending_idx].netif = netif;
++		frags[i].page = (void *)pending_idx;
++	}
++
++	return mop;
++}
++
++static int netbk_tx_check_mop(struct xen_netbk *netbk,
++			      struct sk_buff *skb,
++			      struct gnttab_map_grant_ref **mopp)
++{
++	struct gnttab_map_grant_ref *mop = *mopp;
++	int pending_idx = *((u16 *)skb->data);
++	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
++	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
++	struct xen_netif_tx_request *txp;
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	int nr_frags = shinfo->nr_frags;
++	int i, err, start;
++
++	/* Check status of header. */
++	err = mop->status;
++	if (unlikely(err)) {
++		pending_ring_idx_t index;
++		index = pending_index(netbk->pending_prod++);
++		txp = &pending_tx_info[pending_idx].req;
++		make_tx_response(netif, txp, NETIF_RSP_ERROR);
++		netbk->pending_ring[index] = pending_idx;
++		netif_put(netif);
++	} else {
++		set_phys_to_machine(
++			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
++			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
++		netbk->grant_tx_handle[pending_idx] = mop->handle;
++	}
++
++	/* Skip first skb fragment if it is on same page as header fragment. */
++	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
++
++	for (i = start; i < nr_frags; i++) {
++		int j, newerr;
++		pending_ring_idx_t index;
++
++		pending_idx = (unsigned long)shinfo->frags[i].page;
++
++		/* Check error status: if okay then remember grant handle. */
++		newerr = (++mop)->status;
++		if (likely(!newerr)) {
++			unsigned long addr;
++			addr = idx_to_kaddr(netbk, pending_idx);
++			set_phys_to_machine(
++				__pa(addr)>>PAGE_SHIFT,
++				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
++			netbk->grant_tx_handle[pending_idx] = mop->handle;
++			/* Had a previous error? Invalidate this fragment. */
++			if (unlikely(err))
++				netif_idx_release(netbk, pending_idx);
++			continue;
++		}
++
++		/* Error on this fragment: respond to client with an error. */
++		txp = &netbk->pending_tx_info[pending_idx].req;
++		make_tx_response(netif, txp, NETIF_RSP_ERROR);
++		index = pending_index(netbk->pending_prod++);
++		netbk->pending_ring[index] = pending_idx;
++		netif_put(netif);
++
++		/* Not the first error? Preceding frags already invalidated. */
++		if (err)
++			continue;
++
++		/* First error: invalidate header and preceding fragments. */
++		pending_idx = *((u16 *)skb->data);
++		netif_idx_release(netbk, pending_idx);
++		for (j = start; j < i; j++) {
++			pending_idx = (unsigned long)shinfo->frags[i].page;
++			netif_idx_release(netbk, pending_idx);
++		}
++
++		/* Remember the error: invalidate all subsequent fragments. */
++		err = newerr;
++	}
++
++	*mopp = mop + 1;
++	return err;
++}
++
++static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
++{
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	int nr_frags = shinfo->nr_frags;
++	int i;
++
++	for (i = 0; i < nr_frags; i++) {
++		skb_frag_t *frag = shinfo->frags + i;
++		struct xen_netif_tx_request *txp;
++		unsigned long pending_idx;
++
++		pending_idx = (unsigned long)frag->page;
++
++		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
++		list_add_tail(&netbk->pending_inuse[pending_idx].list,
++			      &netbk->pending_inuse_head);
++
++		txp = &netbk->pending_tx_info[pending_idx].req;
++		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
++		frag->size = txp->size;
++		frag->page_offset = txp->offset;
++
++		skb->len += txp->size;
++		skb->data_len += txp->size;
++		skb->truesize += txp->size;
++	}
++}
++
++int netbk_get_extras(struct xen_netif *netif,
++		     struct xen_netif_extra_info *extras,
++		     int work_to_do)
++{
++	struct xen_netif_extra_info extra;
++	RING_IDX cons = netif->tx.req_cons;
++
++	do {
++		if (unlikely(work_to_do-- <= 0)) {
++			pr_debug("Missing extra info\n");
++			return -EBADR;
++		}
++
++		memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
++		       sizeof(extra));
++		if (unlikely(!extra.type ||
++			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
++			netif->tx.req_cons = ++cons;
++			pr_debug("Invalid extra type: %d\n", extra.type);
++			return -EINVAL;
++		}
++
++		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
++		netif->tx.req_cons = ++cons;
++	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
++
++	return work_to_do;
++}
++
++static int netbk_set_skb_gso(struct sk_buff *skb,
++			     struct xen_netif_extra_info *gso)
++{
++	if (!gso->u.gso.size) {
++		pr_debug("GSO size must not be zero.\n");
++		return -EINVAL;
++	}
++
++	/* Currently only TCPv4 S.O. is supported. */
++	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
++		pr_debug("Bad GSO type %d.\n", gso->u.gso.type);
++		return -EINVAL;
++	}
++
++	skb_shinfo(skb)->gso_size = gso->u.gso.size;
++	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
++
++	/* Header must be checked, and gso_segs computed. */
++	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
++	skb_shinfo(skb)->gso_segs = 0;
++
++	return 0;
++}
++
++static int skb_checksum_setup(struct sk_buff *skb)
++{
++	struct iphdr *iph;
++	unsigned char *th;
++	int err = -EPROTO;
++
++	if (skb->protocol != htons(ETH_P_IP))
++		goto out;
++
++	iph = (void *)skb->data;
++	th = skb->data + 4 * iph->ihl;
++	if (th >= skb_tail_pointer(skb))
++		goto out;
++
++	skb->csum_start = th - skb->head;
++	switch (iph->protocol) {
++	case IPPROTO_TCP:
++		skb->csum_offset = offsetof(struct tcphdr, check);
++		break;
++	case IPPROTO_UDP:
++		skb->csum_offset = offsetof(struct udphdr, check);
++		break;
++	default:
++		if (net_ratelimit())
++			printk(KERN_ERR "Attempting to checksum a non-"
++			       "TCP/UDP packet, dropping a protocol"
++			       " %d packet", iph->protocol);
++		goto out;
++	}
++
++	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
++		goto out;
++
++	err = 0;
++
++out:
++	return err;
++}
++
++static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
++{
++	unsigned long now = jiffies;
++	unsigned long next_credit =
++		netif->credit_timeout.expires +
++		msecs_to_jiffies(netif->credit_usec / 1000);
++
++	/* Timer could already be pending in rare cases. */
++	if (timer_pending(&netif->credit_timeout))
++		return true;
++
++	/* Passed the point where we can replenish credit? */
++	if (time_after_eq(now, next_credit)) {
++		netif->credit_timeout.expires = now;
++		tx_add_credit(netif);
++	}
++
++	/* Still too big to send right now? Set a callback. */
++	if (size > netif->remaining_credit) {
++		netif->credit_timeout.data     =
++			(unsigned long)netif;
++		netif->credit_timeout.function =
++			tx_credit_callback;
++		mod_timer(&netif->credit_timeout,
++			  next_credit);
++
++		return true;
++	}
++
++	return false;
++}
++
++static unsigned net_tx_build_mops(struct xen_netbk *netbk)
++{
++	struct gnttab_map_grant_ref *mop;
++	struct sk_buff *skb;
++	int ret;
++
++	mop = netbk->tx_map_ops;
++	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++		!list_empty(&netbk->net_schedule_list)) {
++		struct xen_netif *netif;
++		struct xen_netif_tx_request txreq;
++		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
++		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
++		u16 pending_idx;
++		RING_IDX idx;
++		int work_to_do;
++		unsigned int data_len;
++		pending_ring_idx_t index;
++
++		/* Get a netif from the list with work to do. */
++		netif = poll_net_schedule_list(netbk);
++		if (!netif)
++			continue;
++
++		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
++		if (!work_to_do) {
++			netif_put(netif);
++			continue;
++		}
++
++		idx = netif->tx.req_cons;
++		rmb(); /* Ensure that we see the request before we copy it. */
++		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
++
++		/* Credit-based scheduling. */
++		if (txreq.size > netif->remaining_credit &&
++		    tx_credit_exceeded(netif, txreq.size)) {
++			netif_put(netif);
++			continue;
++		}
++
++		netif->remaining_credit -= txreq.size;
++
++		work_to_do--;
++		netif->tx.req_cons = ++idx;
++
++		memset(extras, 0, sizeof(extras));
++		if (txreq.flags & NETTXF_extra_info) {
++			work_to_do = netbk_get_extras(netif, extras,
++						      work_to_do);
++			idx = netif->tx.req_cons;
++			if (unlikely(work_to_do < 0)) {
++				netbk_tx_err(netif, &txreq, idx);
++				continue;
++			}
++		}
++
++		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
++		if (unlikely(ret < 0)) {
++			netbk_tx_err(netif, &txreq, idx - ret);
++			continue;
++		}
++		idx += ret;
++
++		if (unlikely(txreq.size < ETH_HLEN)) {
++			pr_debug("Bad packet size: %d\n", txreq.size);
++			netbk_tx_err(netif, &txreq, idx);
++			continue;
++		}
++
++		/* No crossing a page as the payload mustn't fragment. */
++		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
++			pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
++				 txreq.offset, txreq.size,
++				 (txreq.offset&~PAGE_MASK) + txreq.size);
++			netbk_tx_err(netif, &txreq, idx);
++			continue;
++		}
++
++		index = pending_index(netbk->pending_cons);
++		pending_idx = netbk->pending_ring[index];
++
++		data_len = (txreq.size > PKT_PROT_LEN &&
++			    ret < MAX_SKB_FRAGS) ?
++			PKT_PROT_LEN : txreq.size;
++
++		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
++				GFP_ATOMIC | __GFP_NOWARN);
++		if (unlikely(skb == NULL)) {
++			pr_debug("Can't allocate a skb in start_xmit.\n");
++			netbk_tx_err(netif, &txreq, idx);
++			break;
++		}
++
++		/* Packets passed to netif_rx() must have some headroom. */
++		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
++
++		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
++			struct xen_netif_extra_info *gso;
++			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
++
++			if (netbk_set_skb_gso(skb, gso)) {
++				kfree_skb(skb);
++				netbk_tx_err(netif, &txreq, idx);
++				continue;
++			}
++		}
++
++		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
++				  GNTMAP_host_map | GNTMAP_readonly,
++				  txreq.gref, netif->domid);
++		mop++;
++
++		memcpy(&netbk->pending_tx_info[pending_idx].req,
++		       &txreq, sizeof(txreq));
++		netbk->pending_tx_info[pending_idx].netif = netif;
++		*((u16 *)skb->data) = pending_idx;
++
++		__skb_put(skb, data_len);
++
++		skb_shinfo(skb)->nr_frags = ret;
++		if (data_len < txreq.size) {
++			skb_shinfo(skb)->nr_frags++;
++			skb_shinfo(skb)->frags[0].page =
++				(void *)(unsigned long)pending_idx;
++		} else {
++			/* Discriminate from any valid pending_idx value. */
++			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
++		}
++
++		__skb_queue_tail(&netbk->tx_queue, skb);
++
++		netbk->pending_cons++;
++
++		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
++
++		netif->tx.req_cons = idx;
++		netif_schedule_work(netif);
++
++		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
++			break;
++	}
++
++	return mop - netbk->tx_map_ops;
++}
++
++static void net_tx_submit(struct xen_netbk *netbk)
++{
++	struct gnttab_map_grant_ref *mop;
++	struct sk_buff *skb;
++
++	mop = netbk->tx_map_ops;
++	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
++		struct xen_netif_tx_request *txp;
++		struct xen_netif *netif;
++		u16 pending_idx;
++		unsigned data_len;
++
++		pending_idx = *((u16 *)skb->data);
++		netif = netbk->pending_tx_info[pending_idx].netif;
++		txp = &netbk->pending_tx_info[pending_idx].req;
++
++		/* Check the remap error code. */
++		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
++			pr_debug("netback grant failed.\n");
++			skb_shinfo(skb)->nr_frags = 0;
++			kfree_skb(skb);
++			continue;
++		}
++
++		data_len = skb->len;
++		memcpy(skb->data,
++		       (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
++		       data_len);
++		if (data_len < txp->size) {
++			/* Append the packet payload as a fragment. */
++			txp->offset += data_len;
++			txp->size -= data_len;
++		} else {
++			/* Schedule a response immediately. */
++			netif_idx_release(netbk, pending_idx);
++		}
++
++		if (txp->flags & NETTXF_csum_blank)
++			skb->ip_summed = CHECKSUM_PARTIAL;
++		else if (txp->flags & NETTXF_data_validated)
++			skb->ip_summed = CHECKSUM_UNNECESSARY;
++
++		netbk_fill_frags(netbk, skb);
++
++		/*
++		 * If the initial fragment was < PKT_PROT_LEN then
++		 * pull through some bytes from the other fragments to
++		 * increase the linear region to PKT_PROT_LEN bytes.
++		 */
++		if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
++			int target = min_t(int, skb->len, PKT_PROT_LEN);
++			__pskb_pull_tail(skb, target - skb_headlen(skb));
++		}
++
++		skb->dev      = netif->dev;
++		skb->protocol = eth_type_trans(skb, skb->dev);
++
++		if (skb->ip_summed == CHECKSUM_PARTIAL) {
++			if (skb_checksum_setup(skb)) {
++				pr_debug("skb_checksum_setup failed\n");
++				kfree_skb(skb);
++				continue;
++			}
++		} else if (skb_is_gso(skb)) {
++			pr_debug("GSO SKB checksum is not partial\n");
++			kfree_skb(skb);
++			continue;
++		}
++
++		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
++		    unlikely(skb_linearize(skb))) {
++			DPRINTK("Can't linearize skb in net_tx_action.\n");
++			kfree_skb(skb);
++			continue;
++		}
++
++		netif->stats.rx_bytes += skb->len;
++		netif->stats.rx_packets++;
++
++		netif_rx_ni(skb);
++		netif->dev->last_rx = jiffies;
++	}
++}
++
++/* Called after netfront has transmitted */
++static void net_tx_action(unsigned long data)
++{
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
++	unsigned nr_mops;
++	int ret;
++
++	net_tx_action_dealloc(netbk);
++
++	nr_mops = net_tx_build_mops(netbk);
++
++	if (nr_mops == 0)
++		goto out;
++
++	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
++					netbk->tx_map_ops, nr_mops);
++	BUG_ON(ret);
++
++	net_tx_submit(netbk);
++out:
++	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++	    !list_empty(&netbk->pending_inuse_head)) {
++		struct netbk_tx_pending_inuse *oldest;
++
++		oldest = list_entry(netbk->pending_inuse_head.next,
++				    struct netbk_tx_pending_inuse, list);
++		mod_timer(&netbk->netbk_tx_pending_timer,
++				oldest->alloc_time + HZ);
++	}
++}
++
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
++{
++	static DEFINE_SPINLOCK(_lock);
++	unsigned long flags;
++	pending_ring_idx_t index;
++
++	spin_lock_irqsave(&_lock, flags);
++	index = pending_index(netbk->dealloc_prod);
++	netbk->dealloc_ring[index] = pending_idx;
++	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
++	smp_wmb();
++	netbk->dealloc_prod++;
++	spin_unlock_irqrestore(&_lock, flags);
++
++	xen_netbk_bh_handler(netbk, 0);
++}
++
++static void netif_page_release(struct page *page, unsigned int order)
++{
++	unsigned int group, idx;
++	int foreign = netif_get_page_ext(page, &group, &idx);
++
++	BUG_ON(!foreign);
++	BUG_ON(order);
++
++	netif_idx_release(&xen_netbk[group], idx);
++}
++
++irqreturn_t netif_be_int(int irq, void *dev_id)
++{
++	struct xen_netif *netif = dev_id;
++	struct xen_netbk *netbk;
++
++	if (netif->group == -1)
++		return IRQ_NONE;
++
++	netbk = &xen_netbk[netif->group];
++
++	add_to_net_schedule_list_tail(netif);
++	maybe_schedule_tx_action(netbk);
++
++	if (netif_schedulable(netif) && !netbk_queue_full(netif))
++		netif_wake_queue(netif->dev);
++
++	return IRQ_HANDLED;
++}
++
++static void make_tx_response(struct xen_netif *netif,
++			     struct xen_netif_tx_request *txp,
++			     s8       st)
++{
++	RING_IDX i = netif->tx.rsp_prod_pvt;
++	struct xen_netif_tx_response *resp;
++	int notify;
++
++	resp = RING_GET_RESPONSE(&netif->tx, i);
++	resp->id     = txp->id;
++	resp->status = st;
++
++	if (txp->flags & NETTXF_extra_info)
++		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
++
++	netif->tx.rsp_prod_pvt = ++i;
++	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
++	if (notify)
++		notify_remote_via_irq(netif->irq);
++}
++
++static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
++					     u16      id,
++					     s8       st,
++					     u16      offset,
++					     u16      size,
++					     u16      flags)
++{
++	RING_IDX i = netif->rx.rsp_prod_pvt;
++	struct xen_netif_rx_response *resp;
++
++	resp = RING_GET_RESPONSE(&netif->rx, i);
++	resp->offset     = offset;
++	resp->flags      = flags;
++	resp->id         = id;
++	resp->status     = (s16)size;
++	if (st < 0)
++		resp->status = (s16)st;
++
++	netif->rx.rsp_prod_pvt = ++i;
++
++	return resp;
++}
++
++#ifdef NETBE_DEBUG_INTERRUPT
++static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
++{
++	struct list_head *ent;
++	struct xen_netif *netif;
++	int i = 0;
++	int group = 0;
++
++	printk(KERN_ALERT "netif_schedule_list:\n");
++
++	for (group = 0; group < xen_netbk_group_nr; group++) {
++		struct xen_netbk *netbk = &xen_netbk[group];
++		spin_lock_irq(&netbk->net_schedule_list_lock);
++		printk(KERN_ALERT "xen_netback group number: %d\n", group);
++		list_for_each(ent, &netbk->net_schedule_list) {
++			netif = list_entry(ent, struct xen_netif, list);
++			printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
++				"rx_resp_prod=%08x\n",
++				i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
++			printk(KERN_ALERT
++				"   tx_req_cons=%08x, tx_resp_prod=%08x)\n",
++				netif->tx.req_cons, netif->tx.rsp_prod_pvt);
++			printk(KERN_ALERT
++				"   shared(rx_req_prod=%08x "
++				"rx_resp_prod=%08x\n",
++				netif->rx.sring->req_prod,
++				netif->rx.sring->rsp_prod);
++			printk(KERN_ALERT
++				"   rx_event=%08x, tx_req_prod=%08x\n",
++				netif->rx.sring->rsp_event,
++				netif->tx.sring->req_prod);
++			printk(KERN_ALERT
++				"   tx_resp_prod=%08x, tx_event=%08x)\n",
++				netif->tx.sring->rsp_prod,
++				netif->tx.sring->rsp_event);
++			i++;
++		}
++		spin_unlock_irq(&netbk->net_schedule_list_lock);
++	}
++
++	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
++
++	return IRQ_HANDLED;
++}
++#endif
++
++static inline int rx_work_todo(struct xen_netbk *netbk)
++{
++	return !skb_queue_empty(&netbk->rx_queue);
++}
++
++static inline int tx_work_todo(struct xen_netbk *netbk)
++{
++	if (netbk->dealloc_cons != netbk->dealloc_prod)
++		return 1;
++
++	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++	    !list_empty(&netbk->pending_inuse_head))
++		return 1;
++
++	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++			!list_empty(&netbk->net_schedule_list))
++		return 1;
++
++	return 0;
++}
++
++static int netbk_action_thread(void *data)
++{
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
++	while (!kthread_should_stop()) {
++		wait_event_interruptible(netbk->kthread.netbk_action_wq,
++				rx_work_todo(netbk)
++				|| tx_work_todo(netbk)
++				|| kthread_should_stop());
++		cond_resched();
++
++		if (kthread_should_stop())
++			break;
++
++		if (rx_work_todo(netbk))
++			net_rx_action((unsigned long)netbk);
++
++		if (tx_work_todo(netbk))
++			net_tx_action((unsigned long)netbk);
++	}
++
++	return 0;
++}
++
++static int __init netback_init(void)
++{
++	int i;
++	struct page *page;
++	int rc = 0;
++	int group;
++
++	if (!xen_pv_domain())
++		return -ENODEV;
++
++	xen_netbk_group_nr = num_online_cpus();
++	xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
++	if (!xen_netbk) {
++		printk(KERN_ALERT "%s: out of memory\n", __func__);
++		return -ENOMEM;
++	}
++	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
++
++	for (group = 0; group < xen_netbk_group_nr; group++) {
++		struct xen_netbk *netbk = &xen_netbk[group];
++		skb_queue_head_init(&netbk->rx_queue);
++		skb_queue_head_init(&netbk->tx_queue);
++
++		init_timer(&netbk->net_timer);
++		netbk->net_timer.data = (unsigned long)netbk;
++		netbk->net_timer.function = net_alarm;
++
++		init_timer(&netbk->netbk_tx_pending_timer);
++		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
++		netbk->netbk_tx_pending_timer.function =
++			netbk_tx_pending_timeout;
++
++		netbk->mmap_pages =
++			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
++		if (!netbk->mmap_pages) {
++			printk(KERN_ALERT "%s: out of memory\n", __func__);
++			del_timer(&netbk->netbk_tx_pending_timer);
++			del_timer(&netbk->net_timer);
++			rc = -ENOMEM;
++			goto failed_init;
++		}
++
++		for (i = 0; i < MAX_PENDING_REQS; i++) {
++			page = netbk->mmap_pages[i];
++			SetPageForeign(page, netif_page_release);
++			netif_set_page_ext(page, group, i);
++			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
++		}
++
++		netbk->pending_cons = 0;
++		netbk->pending_prod = MAX_PENDING_REQS;
++		for (i = 0; i < MAX_PENDING_REQS; i++)
++			netbk->pending_ring[i] = i;
++
++		if (MODPARM_netback_kthread) {
++			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
++			netbk->kthread.task =
++				kthread_create(netbk_action_thread,
++					       (void *)netbk,
++					       "netback/%u", group);
++
++			if (!IS_ERR(netbk->kthread.task)) {
++				kthread_bind(netbk->kthread.task, group);
++			} else {
++				printk(KERN_ALERT
++					"kthread_run() fails at netback\n");
++				free_empty_pages_and_pagevec(netbk->mmap_pages,
++						MAX_PENDING_REQS);
++				del_timer(&netbk->netbk_tx_pending_timer);
++				del_timer(&netbk->net_timer);
++				rc = PTR_ERR(netbk->kthread.task);
++				goto failed_init;
++			}
++		} else {
++			tasklet_init(&netbk->tasklet.net_tx_tasklet,
++				     net_tx_action,
++				     (unsigned long)netbk);
++			tasklet_init(&netbk->tasklet.net_rx_tasklet,
++				     net_rx_action,
++				     (unsigned long)netbk);
++		}
++
++		INIT_LIST_HEAD(&netbk->pending_inuse_head);
++		INIT_LIST_HEAD(&netbk->net_schedule_list);
++
++		spin_lock_init(&netbk->net_schedule_list_lock);
++
++		atomic_set(&netbk->netfront_count, 0);
++
++		if (MODPARM_netback_kthread)
++			wake_up_process(netbk->kthread.task);
++	}
++
++	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
++	if (MODPARM_copy_skb) {
++		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
++					      NULL, 0))
++			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
++		else
++			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
++	}
++
++	rc = netif_xenbus_init();
++	if (rc)
++		goto failed_init;
++
++#ifdef NETBE_DEBUG_INTERRUPT
++	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
++				      0,
++				      netif_be_dbg,
++				      IRQF_SHARED,
++				      "net-be-dbg",
++				      &netif_be_dbg);
++#endif
++
++	return 0;
++
++failed_init:
++	for (i = 0; i < group; i++) {
++		struct xen_netbk *netbk = &xen_netbk[i];
++		free_empty_pages_and_pagevec(netbk->mmap_pages,
++				MAX_PENDING_REQS);
++		del_timer(&netbk->netbk_tx_pending_timer);
++		del_timer(&netbk->net_timer);
++		if (MODPARM_netback_kthread)
++			kthread_stop(netbk->kthread.task);
++	}
++	vfree(xen_netbk);
++	return rc;
++
++}
++
++module_init(netback_init);
++
++MODULE_LICENSE("Dual BSD/GPL");
+diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
+new file mode 100644
+index 0000000..640c696
+--- /dev/null
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -0,0 +1,487 @@
++/*  Xenbus code for netif backend
++ * Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
++ * Copyright (C) 2005 XenSource Ltd
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++*/
++
++#include "common.h"
++
++static int connect_rings(struct backend_info *);
++static void connect(struct backend_info *);
++static void backend_create_netif(struct backend_info *be);
++static void unregister_hotplug_status_watch(struct backend_info *be);
++
++static int netback_remove(struct xenbus_device *dev)
++{
++	struct backend_info *be = dev_get_drvdata(&dev->dev);
++
++	unregister_hotplug_status_watch(be);
++	if (be->netif) {
++		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
++		netif_disconnect(be->netif);
++		be->netif = NULL;
++	}
++	kfree(be);
++	dev_set_drvdata(&dev->dev, NULL);
++	return 0;
++}
++
++
++/**
++ * Entry point to this code when a new device is created.  Allocate the basic
++ * structures and switch to InitWait.
++ */
++static int netback_probe(struct xenbus_device *dev,
++			 const struct xenbus_device_id *id)
++{
++	const char *message;
++	struct xenbus_transaction xbt;
++	int err;
++	int sg;
++	struct backend_info *be = kzalloc(sizeof(struct backend_info),
++					  GFP_KERNEL);
++	if (!be) {
++		xenbus_dev_fatal(dev, -ENOMEM,
++				 "allocating backend structure");
++		return -ENOMEM;
++	}
++
++	be->dev = dev;
++	dev_set_drvdata(&dev->dev, be);
++
++	sg = 1;
++	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
++		sg = 0;
++
++	do {
++		err = xenbus_transaction_start(&xbt);
++		if (err) {
++			xenbus_dev_fatal(dev, err, "starting transaction");
++			goto fail;
++		}
++
++		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
++		if (err) {
++			message = "writing feature-sg";
++			goto abort_transaction;
++		}
++
++		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
++				    "%d", sg);
++		if (err) {
++			message = "writing feature-gso-tcpv4";
++			goto abort_transaction;
++		}
++
++		/* We support rx-copy path. */
++		err = xenbus_printf(xbt, dev->nodename,
++				    "feature-rx-copy", "%d", 1);
++		if (err) {
++			message = "writing feature-rx-copy";
++			goto abort_transaction;
++		}
++
++		/*
++		 * We don't support rx-flip path (except old guests who don't
++		 * grok this feature flag).
++		 */
++		err = xenbus_printf(xbt, dev->nodename,
++				    "feature-rx-flip", "%d", 0);
++		if (err) {
++			message = "writing feature-rx-flip";
++			goto abort_transaction;
++		}
++
++		err = xenbus_transaction_end(xbt, 0);
++	} while (err == -EAGAIN);
++
++	if (err) {
++		xenbus_dev_fatal(dev, err, "completing transaction");
++		goto fail;
++	}
++
++	err = xenbus_switch_state(dev, XenbusStateInitWait);
++	if (err)
++		goto fail;
++
++	/* This kicks hotplug scripts, so do it immediately. */
++	backend_create_netif(be);
++
++	return 0;
++
++abort_transaction:
++	xenbus_transaction_end(xbt, 1);
++	xenbus_dev_fatal(dev, err, "%s", message);
++fail:
++	pr_debug("failed");
++	netback_remove(dev);
++	return err;
++}
++
++
++/*
++ * Handle the creation of the hotplug script environment.  We add the script
++ * and vif variables to the environment, for the benefit of the vif-* hotplug
++ * scripts.
++ */
++static int netback_uevent(struct xenbus_device *xdev,
++			  struct kobj_uevent_env *env)
++{
++	struct backend_info *be = dev_get_drvdata(&xdev->dev);
++	char *val;
++
++	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
++	if (IS_ERR(val)) {
++		int err = PTR_ERR(val);
++		xenbus_dev_fatal(xdev, err, "reading script");
++		return err;
++	} else {
++		if (add_uevent_var(env, "script=%s", val)) {
++			kfree(val);
++			return -ENOMEM;
++		}
++		kfree(val);
++	}
++
++	if (!be || !be->netif)
++		return 0;
++
++	return add_uevent_var(env, "vif=%s", be->netif->dev->name);
++}
++
++
++static void backend_create_netif(struct backend_info *be)
++{
++	int err;
++	long handle;
++	struct xenbus_device *dev = be->dev;
++
++	if (be->netif != NULL)
++		return;
++
++	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
++	if (err != 1) {
++		xenbus_dev_fatal(dev, err, "reading handle");
++		return;
++	}
++
++	be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
++	if (IS_ERR(be->netif)) {
++		err = PTR_ERR(be->netif);
++		be->netif = NULL;
++		xenbus_dev_fatal(dev, err, "creating interface");
++		return;
++	}
++
++	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
++}
++
++
++static void disconnect_backend(struct xenbus_device *dev)
++{
++	struct backend_info *be = dev_get_drvdata(&dev->dev);
++
++	if (be->netif) {
++		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
++		netif_disconnect(be->netif);
++		be->netif = NULL;
++	}
++}
++
++/**
++ * Callback received when the frontend's state changes.
++ */
++static void frontend_changed(struct xenbus_device *dev,
++			     enum xenbus_state frontend_state)
++{
++	struct backend_info *be = dev_get_drvdata(&dev->dev);
++
++	pr_debug("frontend state %s", xenbus_strstate(frontend_state));
++
++	be->frontend_state = frontend_state;
++
++	switch (frontend_state) {
++	case XenbusStateInitialising:
++		if (dev->state == XenbusStateClosed) {
++			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
++			       __func__, dev->nodename);
++			xenbus_switch_state(dev, XenbusStateInitWait);
++		}
++		break;
++
++	case XenbusStateInitialised:
++		break;
++
++	case XenbusStateConnected:
++		if (dev->state == XenbusStateConnected)
++			break;
++		backend_create_netif(be);
++		if (be->netif)
++			connect(be);
++		break;
++
++	case XenbusStateClosing:
++		if (be->netif)
++			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++		disconnect_backend(dev);
++		xenbus_switch_state(dev, XenbusStateClosing);
++		break;
++
++	case XenbusStateClosed:
++		xenbus_switch_state(dev, XenbusStateClosed);
++		if (xenbus_dev_is_online(dev))
++			break;
++		/* fall through if not online */
++	case XenbusStateUnknown:
++		device_unregister(&dev->dev);
++		break;
++
++	default:
++		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
++				 frontend_state);
++		break;
++	}
++}
++
++
++static void xen_net_read_rate(struct xenbus_device *dev,
++			      unsigned long *bytes, unsigned long *usec)
++{
++	char *s, *e;
++	unsigned long b, u;
++	char *ratestr;
++
++	/* Default to unlimited bandwidth. */
++	*bytes = ~0UL;
++	*usec = 0;
++
++	ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
++	if (IS_ERR(ratestr))
++		return;
++
++	s = ratestr;
++	b = simple_strtoul(s, &e, 10);
++	if ((s == e) || (*e != ','))
++		goto fail;
++
++	s = e + 1;
++	u = simple_strtoul(s, &e, 10);
++	if ((s == e) || (*e != '\0'))
++		goto fail;
++
++	*bytes = b;
++	*usec = u;
++
++	kfree(ratestr);
++	return;
++
++ fail:
++	pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
++	kfree(ratestr);
++}
++
++static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
++{
++	char *s, *e, *macstr;
++	int i;
++
++	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
++	if (IS_ERR(macstr))
++		return PTR_ERR(macstr);
++
++	for (i = 0; i < ETH_ALEN; i++) {
++		mac[i] = simple_strtoul(s, &e, 16);
++		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
++			kfree(macstr);
++			return -ENOENT;
++		}
++		s = e+1;
++	}
++
++	kfree(macstr);
++	return 0;
++}
++
++static void unregister_hotplug_status_watch(struct backend_info *be)
++{
++	if (be->have_hotplug_status_watch) {
++		unregister_xenbus_watch(&be->hotplug_status_watch);
++		kfree(be->hotplug_status_watch.node);
++	}
++	be->have_hotplug_status_watch = 0;
++}
++
++static void hotplug_status_changed(struct xenbus_watch *watch,
++				   const char **vec,
++				   unsigned int vec_size)
++{
++	struct backend_info *be = container_of(watch,
++					       struct backend_info,
++					       hotplug_status_watch);
++	char *str;
++	unsigned int len;
++
++	str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
++	if (IS_ERR(str))
++		return;
++	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
++		xenbus_switch_state(be->dev, XenbusStateConnected);
++		/* Not interested in this watch anymore. */
++		unregister_hotplug_status_watch(be);
++	}
++	kfree(str);
++}
++
++static void connect(struct backend_info *be)
++{
++	int err;
++	struct xenbus_device *dev = be->dev;
++
++	err = connect_rings(be);
++	if (err)
++		return;
++
++	err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
++		return;
++	}
++
++	xen_net_read_rate(dev, &be->netif->credit_bytes,
++			  &be->netif->credit_usec);
++	be->netif->remaining_credit = be->netif->credit_bytes;
++
++	unregister_hotplug_status_watch(be);
++	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
++				   hotplug_status_changed,
++				   "%s/%s", dev->nodename, "hotplug-status");
++	if (err) {
++		/* Switch now, since we can't do a watch. */
++		xenbus_switch_state(dev, XenbusStateConnected);
++	} else {
++		be->have_hotplug_status_watch = 1;
++	}
++
++	netif_wake_queue(be->netif->dev);
++}
++
++
++static int connect_rings(struct backend_info *be)
++{
++	struct xen_netif *netif = be->netif;
++	struct xenbus_device *dev = be->dev;
++	unsigned long tx_ring_ref, rx_ring_ref;
++	unsigned int evtchn, rx_copy;
++	int err;
++	int val;
++
++	err = xenbus_gather(XBT_NIL, dev->otherend,
++			    "tx-ring-ref", "%lu", &tx_ring_ref,
++			    "rx-ring-ref", "%lu", &rx_ring_ref,
++			    "event-channel", "%u", &evtchn, NULL);
++	if (err) {
++		xenbus_dev_fatal(dev, err,
++				 "reading %s/ring-ref and event-channel",
++				 dev->otherend);
++		return err;
++	}
++
++	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
++			   &rx_copy);
++	if (err == -ENOENT) {
++		err = 0;
++		rx_copy = 0;
++	}
++	if (err < 0) {
++		xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
++				 dev->otherend);
++		return err;
++	}
++	if (!rx_copy)
++		return -EOPNOTSUPP;
++
++	if (netif->dev->tx_queue_len != 0) {
++		if (xenbus_scanf(XBT_NIL, dev->otherend,
++				 "feature-rx-notify", "%d", &val) < 0)
++			val = 0;
++		if (val)
++			netif->can_queue = 1;
++		else
++			/* Must be non-zero for pfifo_fast to work. */
++			netif->dev->tx_queue_len = 1;
++	}
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
++			 "%d", &val) < 0)
++		val = 0;
++	netif->can_sg = !!val;
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
++			 "%d", &val) < 0)
++		val = 0;
++	netif->gso = !!val;
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
++			 "%d", &val) < 0)
++		val = 0;
++	netif->gso_prefix = !!val;
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
++			 "%d", &val) < 0)
++		val = 0;
++	netif->csum = !val;
++
++	/* Set dev->features */
++	netif_set_features(netif);
++
++	/* Map the shared frame, irq etc. */
++	err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
++	if (err) {
++		xenbus_dev_fatal(dev, err,
++				 "mapping shared-frames %lu/%lu port %u",
++				 tx_ring_ref, rx_ring_ref, evtchn);
++		return err;
++	}
++	return 0;
++}
++
++
++/* ** Driver Registration ** */
++
++
++static const struct xenbus_device_id netback_ids[] = {
++	{ "vif" },
++	{ "" }
++};
++
++
++static struct xenbus_driver netback = {
++	.name = "vif",
++	.owner = THIS_MODULE,
++	.ids = netback_ids,
++	.probe = netback_probe,
++	.remove = netback_remove,
++	.uevent = netback_uevent,
++	.otherend_changed = frontend_changed,
++};
++
++
++int netif_xenbus_init(void)
++{
++	printk(KERN_CRIT "registering netback\n");
++	return xenbus_register_backend(&netback);
++}
+diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
+index 30290a8..5a48ce9 100644
+--- a/drivers/xen/Kconfig
++++ b/drivers/xen/Kconfig
+@@ -37,13 +37,6 @@ config XEN_BACKEND
+ 	depends on XEN_PCIDEV_BACKEND
+ 
+ 
+-config XEN_NETDEV_BACKEND
+-       tristate "Xen backend network device"
+-       depends on XEN_BACKEND && NET
+-       help
+-         Implement the network backend driver, which passes packets
+-         from the guest domain's frontend drivers to the network.
+-
+ config XENFS
+ 	tristate "Xen filesystem"
+ 	default y
+diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
+index c0e0509..533a199 100644
+--- a/drivers/xen/Makefile
++++ b/drivers/xen/Makefile
+@@ -9,7 +9,6 @@ obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
+ obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
+ obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
+ obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
+-obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
+ obj-$(CONFIG_XENFS)		+= xenfs/
+ obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
+ obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
+diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
+deleted file mode 100644
+index e346e81..0000000
+--- a/drivers/xen/netback/Makefile
++++ /dev/null
+@@ -1,3 +0,0 @@
+-obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
+-
+-xen-netback-y := netback.o xenbus.o interface.o
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+deleted file mode 100644
+index 079e1de..0000000
+--- a/drivers/xen/netback/common.h
++++ /dev/null
+@@ -1,275 +0,0 @@
+-/******************************************************************************
+- * arch/xen/drivers/netif/backend/common.h
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License version 2
+- * as published by the Free Software Foundation; or, when distributed
+- * separately from the Linux kernel or incorporated into other
+- * software packages, subject to the following license:
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a copy
+- * of this source file (the "Software"), to deal in the Software without
+- * restriction, including without limitation the rights to use, copy, modify,
+- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+- * and to permit persons to whom the Software is furnished to do so, subject to
+- * the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included in
+- * all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+- * IN THE SOFTWARE.
+- */
+-
+-#ifndef __NETIF__BACKEND__COMMON_H__
+-#define __NETIF__BACKEND__COMMON_H__
+-
+-#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
+-
+-#include <linux/version.h>
+-#include <linux/module.h>
+-#include <linux/interrupt.h>
+-#include <linux/slab.h>
+-#include <linux/ip.h>
+-#include <linux/in.h>
+-#include <linux/io.h>
+-#include <linux/netdevice.h>
+-#include <linux/etherdevice.h>
+-#include <linux/wait.h>
+-#include <linux/sched.h>
+-
+-#include <xen/interface/io/netif.h>
+-#include <asm/pgalloc.h>
+-#include <xen/interface/grant_table.h>
+-#include <xen/grant_table.h>
+-#include <xen/xenbus.h>
+-
+-struct xen_netif {
+-	/* Unique identifier for this interface. */
+-	domid_t          domid;
+-	int              group;
+-	unsigned int     handle;
+-
+-	u8               fe_dev_addr[6];
+-
+-	/* Physical parameters of the comms window. */
+-	grant_handle_t   tx_shmem_handle;
+-	grant_ref_t      tx_shmem_ref;
+-	grant_handle_t   rx_shmem_handle;
+-	grant_ref_t      rx_shmem_ref;
+-	unsigned int     irq;
+-
+-	/* The shared rings and indexes. */
+-	struct xen_netif_tx_back_ring tx;
+-	struct xen_netif_rx_back_ring rx;
+-	struct vm_struct *tx_comms_area;
+-	struct vm_struct *rx_comms_area;
+-
+-	/* Flags that must not be set in dev->features */
+-	int features_disabled;
+-
+-	/* Frontend feature information. */
+-	u8 can_sg:1;
+-	u8 gso:1;
+-	u8 gso_prefix:1;
+-	u8 csum:1;
+-
+-	/* Internal feature information. */
+-	u8 can_queue:1;	    /* can queue packets for receiver? */
+-
+-	/* Allow netif_be_start_xmit() to peek ahead in the rx request
+-	 * ring.  This is a prediction of what rx_req_cons will be once
+-	 * all queued skbs are put on the ring. */
+-	RING_IDX rx_req_cons_peek;
+-
+-	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
+-	unsigned long   credit_bytes;
+-	unsigned long   credit_usec;
+-	unsigned long   remaining_credit;
+-	struct timer_list credit_timeout;
+-
+-	/* Statistics */
+-	int nr_copied_skbs;
+-
+-	/* Miscellaneous private stuff. */
+-	struct list_head list;  /* scheduling list */
+-	atomic_t         refcnt;
+-	struct net_device *dev;
+-	struct net_device_stats stats;
+-
+-	unsigned int carrier;
+-
+-	wait_queue_head_t waiting_to_free;
+-};
+-
+-/*
+- * Implement our own carrier flag: the network stack's version causes delays
+- * when the carrier is re-enabled (in particular, dev_activate() may not
+- * immediately be called, which can cause packet loss; also the etherbridge
+- * can be rather lazy in activating its port).
+- */
+-#define netback_carrier_on(netif)	((netif)->carrier = 1)
+-#define netback_carrier_off(netif)	((netif)->carrier = 0)
+-#define netback_carrier_ok(netif)	((netif)->carrier)
+-
+-enum {
+-	NETBK_DONT_COPY_SKB,
+-	NETBK_DELAYED_COPY_SKB,
+-	NETBK_ALWAYS_COPY_SKB,
+-};
+-
+-extern int netbk_copy_skb_mode;
+-
+-struct backend_info {
+-	struct xenbus_device *dev;
+-	struct xen_netif *netif;
+-	enum xenbus_state frontend_state;
+-	struct xenbus_watch hotplug_status_watch;
+-	int have_hotplug_status_watch:1;
+-};
+-
+-#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
+-#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
+-
+-void netif_disconnect(struct xen_netif *netif);
+-
+-void netif_set_features(struct xen_netif *netif);
+-struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
+-			      unsigned int handle);
+-int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+-	      unsigned long rx_ring_ref, unsigned int evtchn);
+-
+-static inline void netif_get(struct xen_netif *netif)
+-{
+-	atomic_inc(&netif->refcnt);
+-}
+-
+-static inline void  netif_put(struct xen_netif *netif)
+-{
+-	if (atomic_dec_and_test(&netif->refcnt))
+-		wake_up(&netif->waiting_to_free);
+-}
+-
+-int netif_xenbus_init(void);
+-
+-#define netif_schedulable(netif)				\
+-	(netif_running((netif)->dev) && netback_carrier_ok(netif))
+-
+-void netif_schedule_work(struct xen_netif *netif);
+-void netif_deschedule_work(struct xen_netif *netif);
+-
+-int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
+-struct net_device_stats *netif_be_get_stats(struct net_device *dev);
+-irqreturn_t netif_be_int(int irq, void *dev_id);
+-
+-static inline int netbk_can_queue(struct net_device *dev)
+-{
+-	struct xen_netif *netif = netdev_priv(dev);
+-	return netif->can_queue;
+-}
+-
+-static inline int netbk_can_sg(struct net_device *dev)
+-{
+-	struct xen_netif *netif = netdev_priv(dev);
+-	return netif->can_sg;
+-}
+-
+-struct pending_tx_info {
+-	struct xen_netif_tx_request req;
+-	struct xen_netif *netif;
+-};
+-typedef unsigned int pending_ring_idx_t;
+-
+-struct netbk_rx_meta {
+-	int id;
+-	int size;
+-	int gso_size;
+-};
+-
+-struct netbk_tx_pending_inuse {
+-	struct list_head list;
+-	unsigned long alloc_time;
+-};
+-
+-#define MAX_PENDING_REQS 256
+-
+-#define MAX_BUFFER_OFFSET PAGE_SIZE
+-
+-/* extra field used in struct page */
+-union page_ext {
+-	struct {
+-#if BITS_PER_LONG < 64
+-#define IDX_WIDTH   8
+-#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
+-		unsigned int group:GROUP_WIDTH;
+-		unsigned int idx:IDX_WIDTH;
+-#else
+-		unsigned int group, idx;
+-#endif
+-	} e;
+-	void *mapping;
+-};
+-
+-struct xen_netbk {
+-	union {
+-		struct {
+-			struct tasklet_struct net_tx_tasklet;
+-			struct tasklet_struct net_rx_tasklet;
+-		} tasklet;
+-
+-		struct {
+-			wait_queue_head_t netbk_action_wq;
+-			struct task_struct *task;
+-		} kthread;
+-	};
+-
+-	struct sk_buff_head rx_queue;
+-	struct sk_buff_head tx_queue;
+-
+-	struct timer_list net_timer;
+-	struct timer_list netbk_tx_pending_timer;
+-
+-	struct page **mmap_pages;
+-
+-	pending_ring_idx_t pending_prod;
+-	pending_ring_idx_t pending_cons;
+-	pending_ring_idx_t dealloc_prod;
+-	pending_ring_idx_t dealloc_cons;
+-
+-	struct list_head pending_inuse_head;
+-	struct list_head net_schedule_list;
+-
+-	/* Protect the net_schedule_list in netif. */
+-	spinlock_t net_schedule_list_lock;
+-
+-	atomic_t netfront_count;
+-
+-	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+-	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+-	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+-	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+-
+-	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+-	u16 pending_ring[MAX_PENDING_REQS];
+-	u16 dealloc_ring[MAX_PENDING_REQS];
+-
+-	/*
+-	 * Each head or fragment can be up to 4096 bytes. Given
+-	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
+-	 * head/fragment uses 2 copy operation.
+-	 */
+-	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
+-	unsigned char rx_notify[NR_IRQS];
+-	u16 notify_list[NET_RX_RING_SIZE];
+-	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
+-};
+-
+-extern struct xen_netbk *xen_netbk;
+-extern int xen_netbk_group_nr;
+-
+-#endif /* __NETIF__BACKEND__COMMON_H__ */
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+deleted file mode 100644
+index c36db26..0000000
+--- a/drivers/xen/netback/interface.c
++++ /dev/null
+@@ -1,465 +0,0 @@
+-/******************************************************************************
+- * arch/xen/drivers/netif/backend/interface.c
+- *
+- * Network-device interface management.
+- *
+- * Copyright (c) 2004-2005, Keir Fraser
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License version 2
+- * as published by the Free Software Foundation; or, when distributed
+- * separately from the Linux kernel or incorporated into other
+- * software packages, subject to the following license:
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a copy
+- * of this source file (the "Software"), to deal in the Software without
+- * restriction, including without limitation the rights to use, copy, modify,
+- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+- * and to permit persons to whom the Software is furnished to do so, subject to
+- * the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included in
+- * all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+- * IN THE SOFTWARE.
+- */
+-
+-#include "common.h"
+-
+-#include <linux/ethtool.h>
+-#include <linux/rtnetlink.h>
+-
+-#include <xen/events.h>
+-#include <asm/xen/hypercall.h>
+-
+-/*
+- * Module parameter 'queue_length':
+- *
+- * Enables queuing in the network stack when a client has run out of receive
+- * descriptors.
+- */
+-static unsigned long netbk_queue_length = 32;
+-module_param_named(queue_length, netbk_queue_length, ulong, 0644);
+-
+-static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
+-			   struct xen_netif *netif)
+-{
+-	int i;
+-	int min_netfront_count;
+-	int min_group = 0;
+-	min_netfront_count = atomic_read(&netbk[0].netfront_count);
+-	for (i = 0; i < group_nr; i++) {
+-		int netfront_count = atomic_read(&netbk[i].netfront_count);
+-		if (netfront_count < min_netfront_count) {
+-			min_group = i;
+-			min_netfront_count = netfront_count;
+-		}
+-	}
+-
+-	netif->group = min_group;
+-	atomic_inc(&netbk[netif->group].netfront_count);
+-}
+-
+-static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
+-{
+-	atomic_dec(&netbk[netif->group].netfront_count);
+-}
+-
+-static void __netif_up(struct xen_netif *netif)
+-{
+-	netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
+-	enable_irq(netif->irq);
+-	netif_schedule_work(netif);
+-}
+-
+-static void __netif_down(struct xen_netif *netif)
+-{
+-	disable_irq(netif->irq);
+-	netif_deschedule_work(netif);
+-	netbk_remove_netif(xen_netbk, netif);
+-}
+-
+-static int net_open(struct net_device *dev)
+-{
+-	struct xen_netif *netif = netdev_priv(dev);
+-	if (netback_carrier_ok(netif)) {
+-		__netif_up(netif);
+-		netif_start_queue(dev);
+-	}
+-	return 0;
+-}
+-
+-static int net_close(struct net_device *dev)
+-{
+-	struct xen_netif *netif = netdev_priv(dev);
+-	if (netback_carrier_ok(netif))
+-		__netif_down(netif);
+-	netif_stop_queue(dev);
+-	return 0;
+-}
+-
+-static int netbk_change_mtu(struct net_device *dev, int mtu)
+-{
+-	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+-
+-	if (mtu > max)
+-		return -EINVAL;
+-	dev->mtu = mtu;
+-	return 0;
+-}
+-
+-void netif_set_features(struct xen_netif *netif)
+-{
+-	struct net_device *dev = netif->dev;
+-	int features = dev->features;
+-
+-	if (netif->can_sg)
+-		features |= NETIF_F_SG;
+-	if (netif->gso || netif->gso_prefix)
+-		features |= NETIF_F_TSO;
+-	if (netif->csum)
+-		features |= NETIF_F_IP_CSUM;
+-
+-	features &= ~(netif->features_disabled);
+-
+-	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
+-		dev->mtu = ETH_DATA_LEN;
+-
+-	dev->features = features;
+-}
+-
+-static int netbk_set_tx_csum(struct net_device *dev, u32 data)
+-{
+-	struct xen_netif *netif = netdev_priv(dev);
+-	if (data) {
+-		if (!netif->csum)
+-			return -ENOSYS;
+-		netif->features_disabled &= ~NETIF_F_IP_CSUM;
+-	} else {
+-		netif->features_disabled |= NETIF_F_IP_CSUM;
+-	}
+-
+-	netif_set_features(netif);
+-	return 0;
+-}
+-
+-static int netbk_set_sg(struct net_device *dev, u32 data)
+-{
+-	struct xen_netif *netif = netdev_priv(dev);
+-	if (data) {
+-		if (!netif->can_sg)
+-			return -ENOSYS;
+-		netif->features_disabled &= ~NETIF_F_SG;
+-	} else {
+-		netif->features_disabled |= NETIF_F_SG;
+-	}
+-
+-	netif_set_features(netif);
+-	return 0;
+-}
+-
+-static int netbk_set_tso(struct net_device *dev, u32 data)
+-{
+-	struct xen_netif *netif = netdev_priv(dev);
+-	if (data) {
+-		if (!netif->gso && !netif->gso_prefix)
+-			return -ENOSYS;
+-		netif->features_disabled &= ~NETIF_F_TSO;
+-	} else {
+-		netif->features_disabled |= NETIF_F_TSO;
+-	}
+-
+-	netif_set_features(netif);
+-	return 0;
+-}
+-
+-static void netbk_get_drvinfo(struct net_device *dev,
+-			      struct ethtool_drvinfo *info)
+-{
+-	strcpy(info->driver, "netbk");
+-	strcpy(info->bus_info, dev_name(dev->dev.parent));
+-}
+-
+-static const struct netif_stat {
+-	char name[ETH_GSTRING_LEN];
+-	u16 offset;
+-} netbk_stats[] = {
+-	{ "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
+-};
+-
+-static int netbk_get_sset_count(struct net_device *dev, int string_set)
+-{
+-	switch (string_set) {
+-	case ETH_SS_STATS:
+-		return ARRAY_SIZE(netbk_stats);
+-	default:
+-		return -EINVAL;
+-	}
+-}
+-
+-static void netbk_get_ethtool_stats(struct net_device *dev,
+-				   struct ethtool_stats *stats, u64 * data)
+-{
+-	void *netif = netdev_priv(dev);
+-	int i;
+-
+-	for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+-		data[i] = *(int *)(netif + netbk_stats[i].offset);
+-}
+-
+-static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
+-{
+-	int i;
+-
+-	switch (stringset) {
+-	case ETH_SS_STATS:
+-		for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+-			memcpy(data + i * ETH_GSTRING_LEN,
+-			       netbk_stats[i].name, ETH_GSTRING_LEN);
+-		break;
+-	}
+-}
+-
+-static struct ethtool_ops network_ethtool_ops = {
+-	.get_drvinfo = netbk_get_drvinfo,
+-
+-	.get_tx_csum = ethtool_op_get_tx_csum,
+-	.set_tx_csum = netbk_set_tx_csum,
+-	.get_sg = ethtool_op_get_sg,
+-	.set_sg = netbk_set_sg,
+-	.get_tso = ethtool_op_get_tso,
+-	.set_tso = netbk_set_tso,
+-	.get_link = ethtool_op_get_link,
+-
+-	.get_sset_count = netbk_get_sset_count,
+-	.get_ethtool_stats = netbk_get_ethtool_stats,
+-	.get_strings = netbk_get_strings,
+-};
+-
+-static struct net_device_ops netback_ops = {
+-	.ndo_start_xmit	= netif_be_start_xmit,
+-	.ndo_get_stats	= netif_be_get_stats,
+-	.ndo_open	= net_open,
+-	.ndo_stop	= net_close,
+-	.ndo_change_mtu	= netbk_change_mtu,
+-};
+-
+-struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
+-			      unsigned int handle)
+-{
+-	int err = 0;
+-	struct net_device *dev;
+-	struct xen_netif *netif;
+-	char name[IFNAMSIZ] = {};
+-
+-	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
+-	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
+-	if (dev == NULL) {
+-		pr_debug("Could not allocate netdev\n");
+-		return ERR_PTR(-ENOMEM);
+-	}
+-
+-	SET_NETDEV_DEV(dev, parent);
+-
+-	netif = netdev_priv(dev);
+-	memset(netif, 0, sizeof(*netif));
+-	netif->domid  = domid;
+-	netif->group  = -1;
+-	netif->handle = handle;
+-	netif->can_sg = 1;
+-	netif->csum = 1;
+-	atomic_set(&netif->refcnt, 1);
+-	init_waitqueue_head(&netif->waiting_to_free);
+-	netif->dev = dev;
+-	INIT_LIST_HEAD(&netif->list);
+-
+-	netback_carrier_off(netif);
+-
+-	netif->credit_bytes = netif->remaining_credit = ~0UL;
+-	netif->credit_usec  = 0UL;
+-	init_timer(&netif->credit_timeout);
+-	/* Initialize 'expires' now: it's used to track the credit window. */
+-	netif->credit_timeout.expires = jiffies;
+-
+-	dev->netdev_ops	= &netback_ops;
+-	netif_set_features(netif);
+-	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+-
+-	dev->tx_queue_len = netbk_queue_length;
+-
+-	/*
+-	 * Initialise a dummy MAC address. We choose the numerically
+-	 * largest non-broadcast address to prevent the address getting
+-	 * stolen by an Ethernet bridge for STP purposes.
+-	 * (FE:FF:FF:FF:FF:FF)
+-	 */
+-	memset(dev->dev_addr, 0xFF, ETH_ALEN);
+-	dev->dev_addr[0] &= ~0x01;
+-
+-	rtnl_lock();
+-	err = register_netdevice(dev);
+-	rtnl_unlock();
+-	if (err) {
+-		pr_debug("Could not register new net device %s: err=%d\n",
+-			 dev->name, err);
+-		free_netdev(dev);
+-		return ERR_PTR(err);
+-	}
+-
+-	pr_debug("Successfully created netif\n");
+-	return netif;
+-}
+-
+-static int map_frontend_pages(struct xen_netif *netif,
+-			      grant_ref_t tx_ring_ref,
+-			      grant_ref_t rx_ring_ref)
+-{
+-	struct gnttab_map_grant_ref op;
+-
+-	gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
+-			  GNTMAP_host_map, tx_ring_ref, netif->domid);
+-
+-	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+-		BUG();
+-
+-	if (op.status) {
+-		pr_debug("Gnttab failure mapping tx_ring_ref!\n");
+-		return op.status;
+-	}
+-
+-	netif->tx_shmem_ref    = tx_ring_ref;
+-	netif->tx_shmem_handle = op.handle;
+-
+-	gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
+-			  GNTMAP_host_map, rx_ring_ref, netif->domid);
+-
+-	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+-		BUG();
+-
+-	if (op.status) {
+-		struct gnttab_unmap_grant_ref unop;
+-
+-		gnttab_set_unmap_op(&unop,
+-				    (unsigned long)netif->tx_comms_area->addr,
+-				    GNTMAP_host_map, netif->tx_shmem_handle);
+-		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
+-		pr_debug("Gnttab failure mapping rx_ring_ref!\n");
+-		return op.status;
+-	}
+-
+-	netif->rx_shmem_ref    = rx_ring_ref;
+-	netif->rx_shmem_handle = op.handle;
+-
+-	return 0;
+-}
+-
+-static void unmap_frontend_pages(struct xen_netif *netif)
+-{
+-	struct gnttab_unmap_grant_ref op;
+-
+-	gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
+-			    GNTMAP_host_map, netif->tx_shmem_handle);
+-
+-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+-		BUG();
+-
+-	gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
+-			    GNTMAP_host_map, netif->rx_shmem_handle);
+-
+-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+-		BUG();
+-}
+-
+-int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+-	      unsigned long rx_ring_ref, unsigned int evtchn)
+-{
+-	int err = -ENOMEM;
+-	struct xen_netif_tx_sring *txs;
+-	struct xen_netif_rx_sring *rxs;
+-
+-	/* Already connected through? */
+-	if (netif->irq)
+-		return 0;
+-
+-	netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
+-	if (netif->tx_comms_area == NULL)
+-		return -ENOMEM;
+-	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
+-	if (netif->rx_comms_area == NULL)
+-		goto err_rx;
+-
+-	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
+-	if (err)
+-		goto err_map;
+-
+-	err = bind_interdomain_evtchn_to_irqhandler(
+-		netif->domid, evtchn, netif_be_int, 0,
+-		netif->dev->name, netif);
+-	if (err < 0)
+-		goto err_hypervisor;
+-	netif->irq = err;
+-	disable_irq(netif->irq);
+-
+-	txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
+-	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+-
+-	rxs = (struct xen_netif_rx_sring *)
+-		((char *)netif->rx_comms_area->addr);
+-	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+-
+-	netif->rx_req_cons_peek = 0;
+-
+-	netif_get(netif);
+-
+-	rtnl_lock();
+-	netback_carrier_on(netif);
+-	if (netif_running(netif->dev))
+-		__netif_up(netif);
+-	rtnl_unlock();
+-
+-	return 0;
+-err_hypervisor:
+-	unmap_frontend_pages(netif);
+-err_map:
+-	free_vm_area(netif->rx_comms_area);
+-err_rx:
+-	free_vm_area(netif->tx_comms_area);
+-	return err;
+-}
+-
+-void netif_disconnect(struct xen_netif *netif)
+-{
+-	if (netback_carrier_ok(netif)) {
+-		rtnl_lock();
+-		netback_carrier_off(netif);
+-		netif_carrier_off(netif->dev); /* discard queued packets */
+-		if (netif_running(netif->dev))
+-			__netif_down(netif);
+-		rtnl_unlock();
+-		netif_put(netif);
+-	}
+-
+-	atomic_dec(&netif->refcnt);
+-	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
+-
+-	del_timer_sync(&netif->credit_timeout);
+-
+-	if (netif->irq)
+-		unbind_from_irqhandler(netif->irq, netif);
+-
+-	unregister_netdev(netif->dev);
+-
+-	if (netif->tx.sring) {
+-		unmap_frontend_pages(netif);
+-		free_vm_area(netif->tx_comms_area);
+-		free_vm_area(netif->rx_comms_area);
+-	}
+-
+-	free_netdev(netif->dev);
+-}
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+deleted file mode 100644
+index e0ca232..0000000
+--- a/drivers/xen/netback/netback.c
++++ /dev/null
+@@ -1,1909 +0,0 @@
+-/*
+- * Back-end of the driver for virtual network devices. This portion of the
+- * driver exports a 'unified' network-device interface that can be accessed
+- * by any operating system that implements a compatible front end. A
+- * reference front-end implementation can be found in:
+- *  drivers/net/xen-netfront.c
+- *
+- * Copyright (c) 2002-2005, K A Fraser
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License version 2
+- * as published by the Free Software Foundation; or, when distributed
+- * separately from the Linux kernel or incorporated into other
+- * software packages, subject to the following license:
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a copy
+- * of this source file (the "Software"), to deal in the Software without
+- * restriction, including without limitation the rights to use, copy, modify,
+- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+- * and to permit persons to whom the Software is furnished to do so, subject to
+- * the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included in
+- * all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+- * IN THE SOFTWARE.
+- */
+-
+-#include "common.h"
+-
+-#include <linux/kthread.h>
+-#include <linux/if_vlan.h>
+-#include <linux/udp.h>
+-
+-#include <net/tcp.h>
+-
+-#include <xen/balloon.h>
+-#include <xen/events.h>
+-#include <xen/interface/memory.h>
+-
+-#include <asm/xen/hypercall.h>
+-#include <asm/xen/page.h>
+-
+-/*define NETBE_DEBUG_INTERRUPT*/
+-
+-struct xen_netbk *xen_netbk;
+-int xen_netbk_group_nr;
+-
+-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
+-static void make_tx_response(struct xen_netif *netif,
+-			     struct xen_netif_tx_request *txp,
+-			     s8       st);
+-static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+-					     u16      id,
+-					     s8       st,
+-					     u16      offset,
+-					     u16      size,
+-					     u16      flags);
+-
+-static void net_tx_action(unsigned long data);
+-
+-static void net_rx_action(unsigned long data);
+-
+-static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
+-				       unsigned int idx)
+-{
+-	return page_to_pfn(netbk->mmap_pages[idx]);
+-}
+-
+-static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
+-					 unsigned int idx)
+-{
+-	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
+-}
+-
+-/* extra field used in struct page */
+-static inline void netif_set_page_ext(struct page *pg,
+-				      unsigned int group, unsigned int idx)
+-{
+-	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
+-
+-	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
+-	pg->mapping = ext.mapping;
+-}
+-
+-static int netif_get_page_ext(struct page *pg,
+-			      unsigned int *_group, unsigned int *_idx)
+-{
+-	union page_ext ext = { .mapping = pg->mapping };
+-	struct xen_netbk *netbk;
+-	unsigned int group, idx;
+-
+-	if (!PageForeign(pg))
+-		return 0;
+-
+-	group = ext.e.group - 1;
+-
+-	if (group < 0 || group >= xen_netbk_group_nr)
+-		return 0;
+-
+-	netbk = &xen_netbk[group];
+-
+-	if (netbk->mmap_pages == NULL)
+-		return 0;
+-
+-	idx = ext.e.idx;
+-
+-	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
+-		return 0;
+-
+-	if (netbk->mmap_pages[idx] != pg)
+-		return 0;
+-
+-	*_group = group;
+-	*_idx = idx;
+-
+-	return 1;
+-}
+-
+-/*
+- * This is the amount of packet we copy rather than map, so that the
+- * guest can't fiddle with the contents of the headers while we do
+- * packet processing on them (netfilter, routing, etc).
+- */
+-#define PKT_PROT_LEN    (ETH_HLEN + \
+-			 VLAN_HLEN + \
+-			 sizeof(struct iphdr) + MAX_IPOPTLEN + \
+-			 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
+-
+-static inline pending_ring_idx_t pending_index(unsigned i)
+-{
+-	return i & (MAX_PENDING_REQS-1);
+-}
+-
+-static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
+-{
+-	return MAX_PENDING_REQS -
+-		netbk->pending_prod + netbk->pending_cons;
+-}
+-
+-/* Setting this allows the safe use of this driver without netloop. */
+-static int MODPARM_copy_skb = 1;
+-module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
+-MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+-
+-int netbk_copy_skb_mode;
+-
+-static int MODPARM_netback_kthread;
+-module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
+-MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
+-
+-/*
+- * Netback bottom half handler.
+- * dir indicates the data direction.
+- * rx: 1, tx: 0.
+- */
+-static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
+-{
+-	if (MODPARM_netback_kthread)
+-		wake_up(&netbk->kthread.netbk_action_wq);
+-	else if (dir)
+-		tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
+-	else
+-		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
+-}
+-
+-static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
+-{
+-	smp_mb();
+-	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
+-	    !list_empty(&netbk->net_schedule_list))
+-		xen_netbk_bh_handler(netbk, 0);
+-}
+-
+-static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+-{
+-	struct skb_shared_info *ninfo;
+-	struct sk_buff *nskb;
+-	unsigned long offset;
+-	int ret;
+-	int len;
+-	int headlen;
+-
+-	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
+-
+-	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
+-	if (unlikely(!nskb))
+-		goto err;
+-
+-	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
+-	headlen = skb_end_pointer(nskb) - nskb->data;
+-	if (headlen > skb_headlen(skb))
+-		headlen = skb_headlen(skb);
+-	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+-	BUG_ON(ret);
+-
+-	ninfo = skb_shinfo(nskb);
+-	ninfo->gso_size = skb_shinfo(skb)->gso_size;
+-	ninfo->gso_type = skb_shinfo(skb)->gso_type;
+-
+-	offset = headlen;
+-	len = skb->len - headlen;
+-
+-	nskb->len = skb->len;
+-	nskb->data_len = len;
+-	nskb->truesize += len;
+-
+-	while (len) {
+-		struct page *page;
+-		int copy;
+-		int zero;
+-
+-		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
+-			dump_stack();
+-			goto err_free;
+-		}
+-
+-		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
+-		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
+-
+-		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
+-		if (unlikely(!page))
+-			goto err_free;
+-
+-		ret = skb_copy_bits(skb, offset, page_address(page), copy);
+-		BUG_ON(ret);
+-
+-		ninfo->frags[ninfo->nr_frags].page = page;
+-		ninfo->frags[ninfo->nr_frags].page_offset = 0;
+-		ninfo->frags[ninfo->nr_frags].size = copy;
+-		ninfo->nr_frags++;
+-
+-		offset += copy;
+-		len -= copy;
+-	}
+-
+-#ifdef NET_SKBUFF_DATA_USES_OFFSET
+-	offset = 0;
+-#else
+-	offset = nskb->data - skb->data;
+-#endif
+-
+-	nskb->transport_header = skb->transport_header + offset;
+-	nskb->network_header = skb->network_header + offset;
+-	nskb->mac_header = skb->mac_header + offset;
+-
+-	return nskb;
+-
+- err_free:
+-	kfree_skb(nskb);
+- err:
+-	return NULL;
+-}
+-
+-static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+-{
+-	if (netif->can_sg || netif->gso || netif->gso_prefix)
+-		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
+-	return 1; /* all in one */
+-}
+-
+-static inline int netbk_queue_full(struct xen_netif *netif)
+-{
+-	RING_IDX peek   = netif->rx_req_cons_peek;
+-	RING_IDX needed = netbk_max_required_rx_slots(netif);
+-
+-	return ((netif->rx.sring->req_prod - peek) < needed) ||
+-	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
+-}
+-
+-/*
+- * Returns true if we should start a new receive buffer instead of
+- * adding 'size' bytes to a buffer which currently contains 'offset'
+- * bytes.
+- */
+-static bool start_new_rx_buffer(int offset, unsigned long size, int head)
+-{
+-	/* simple case: we have completely filled the current buffer. */
+-	if (offset == MAX_BUFFER_OFFSET)
+-		return true;
+-
+-	/*
+-	 * complex case: start a fresh buffer if the current frag
+-	 * would overflow the current buffer but only if:
+-	 *     (i)   this frag would fit completely in the next buffer
+-	 * and (ii)  there is already some data in the current buffer
+-	 * and (iii) this is not the head buffer.
+-	 *
+-	 * Where:
+-	 * - (i) stops us splitting a frag into two copies
+-	 *   unless the frag is too large for a single buffer.
+-	 * - (ii) stops us from leaving a buffer pointlessly empty.
+-	 * - (iii) stops us leaving the first buffer
+-	 *   empty. Strictly speaking this is already covered
+-	 *   by (ii) but is explicitly checked because
+-	 *   netfront relies on the first buffer being
+-	 *   non-empty and can crash otherwise.
+-	 *
+-	 * This means we will effectively linearise small
+-	 * frags but do not needlessly split large buffers
+-	 * into multiple copies tend to give large frags their
+-	 * own buffers as before.
+-	 */
+-	if ((offset + size > MAX_BUFFER_OFFSET) &&
+-	    (size <= MAX_BUFFER_OFFSET) && offset && !head)
+-		return true;
+-
+-	return false;
+-}
+-
+-/*
+- * Figure out how many ring slots we're going to need to send @skb to
+- * the guest. This function is essentially a dry run of
+- * netbk_gop_frag_copy.
+- */
+-static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
+-{
+-	unsigned int count = 1;
+-	int i, copy_off = 0;
+-
+-	BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);
+-
+-	copy_off = skb_headlen(skb);
+-
+-	if (skb_shinfo(skb)->gso_size)
+-		count++;
+-
+-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+-		unsigned long size = skb_shinfo(skb)->frags[i].size;
+-		unsigned long bytes;
+-		while (size > 0) {
+-			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
+-
+-			if (start_new_rx_buffer(copy_off, size, 0)) {
+-				count++;
+-				copy_off = 0;
+-			}
+-
+-			bytes = size;
+-			if (copy_off + bytes > MAX_BUFFER_OFFSET)
+-				bytes = MAX_BUFFER_OFFSET - copy_off;
+-
+-			copy_off += bytes;
+-			size -= bytes;
+-		}
+-	}
+-	return count;
+-}
+-
+-int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+-{
+-	struct xen_netif *netif = netdev_priv(dev);
+-	struct xen_netbk *netbk;
+-
+-	BUG_ON(skb->dev != dev);
+-
+-	if (netif->group == -1)
+-		goto drop;
+-
+-	netbk = &xen_netbk[netif->group];
+-
+-	/* Drop the packet if the target domain has no receive buffers. */
+-	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
+-		goto drop;
+-
+-	/*
+-	 * XXX For now we also copy skbuffs whose head crosses a page
+-	 * boundary, because netbk_gop_skb can't handle them.
+-	 */
+-	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
+-		struct sk_buff *nskb = netbk_copy_skb(skb);
+-		if (unlikely(nskb == NULL))
+-			goto drop;
+-		/* Copy only the header fields we use in this driver. */
+-		nskb->dev = skb->dev;
+-		nskb->ip_summed = skb->ip_summed;
+-		dev_kfree_skb(skb);
+-		skb = nskb;
+-	}
+-
+-	/* Reserve ring slots for the worst-case number of fragments. */
+-	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
+-	netif_get(netif);
+-
+-	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
+-		netif->rx.sring->req_event = netif->rx_req_cons_peek +
+-			netbk_max_required_rx_slots(netif);
+-		mb(); /* request notification /then/ check & stop the queue */
+-		if (netbk_queue_full(netif))
+-			netif_stop_queue(dev);
+-	}
+-	skb_queue_tail(&netbk->rx_queue, skb);
+-
+-	xen_netbk_bh_handler(netbk, 1);
+-
+-	return 0;
+-
+- drop:
+-	netif->stats.tx_dropped++;
+-	dev_kfree_skb(skb);
+-	return 0;
+-}
+-
+-struct netrx_pending_operations {
+-	unsigned copy_prod, copy_cons;
+-	unsigned meta_prod, meta_cons;
+-	struct gnttab_copy *copy;
+-	struct netbk_rx_meta *meta;
+-	int copy_off;
+-	grant_ref_t copy_gref;
+-};
+-
+-static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
+-						struct netrx_pending_operations *npo)
+-{
+-	struct netbk_rx_meta *meta;
+-	struct xen_netif_rx_request *req;
+-
+-	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+-
+-	meta = npo->meta + npo->meta_prod++;
+-	meta->gso_size = 0;
+-	meta->size = 0;
+-	meta->id = req->id;
+-
+-	npo->copy_off = 0;
+-	npo->copy_gref = req->gref;
+-
+-	return meta;
+-}
+-
+-/*
+- * Set up the grant operations for this fragment. If it's a flipping
+- * interface, we also set up the unmap request from here.
+- */
+-static void netbk_gop_frag_copy(struct xen_netif *netif,
+-				struct netrx_pending_operations *npo,
+-				struct page *page, unsigned long size,
+-				unsigned long offset, int head)
+-{
+-	struct gnttab_copy *copy_gop;
+-	struct netbk_rx_meta *meta;
+-	/*
+-	 * These variables a used iff netif_get_page_ext returns true,
+-	 * in which case they are guaranteed to be initialized.
+-	 */
+-	unsigned int uninitialized_var(group), uninitialized_var(idx);
+-	int foreign = netif_get_page_ext(page, &group, &idx);
+-	unsigned long bytes;
+-
+-	/* Data must not cross a page boundary. */
+-	BUG_ON(size + offset > PAGE_SIZE);
+-
+-	meta = npo->meta + npo->meta_prod - 1;
+-
+-	while (size > 0) {
+-		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+-
+-		if (start_new_rx_buffer(npo->copy_off, size, head)) {
+-			/*
+-			 * Netfront requires there to be some data in
+-			 * the head buffer.
+-			 */
+-			BUG_ON(head);
+-
+-			meta = get_next_rx_buffer(netif, npo);
+-		}
+-
+-		bytes = size;
+-		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
+-			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
+-
+-		copy_gop = npo->copy + npo->copy_prod++;
+-		copy_gop->flags = GNTCOPY_dest_gref;
+-		if (foreign) {
+-			struct xen_netbk *netbk = &xen_netbk[group];
+-			struct pending_tx_info *src_pend;
+-
+-			src_pend = &netbk->pending_tx_info[idx];
+-
+-			copy_gop->source.domid = src_pend->netif->domid;
+-			copy_gop->source.u.ref = src_pend->req.gref;
+-			copy_gop->flags |= GNTCOPY_source_gref;
+-		} else {
+-			void *vaddr = page_address(page);
+-			copy_gop->source.domid = DOMID_SELF;
+-			copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
+-		}
+-		copy_gop->source.offset = offset;
+-		copy_gop->dest.domid = netif->domid;
+-
+-		copy_gop->dest.offset = npo->copy_off;
+-		copy_gop->dest.u.ref = npo->copy_gref;
+-		copy_gop->len = bytes;
+-
+-		npo->copy_off += bytes;
+-		meta->size += bytes;
+-
+-		offset += bytes;
+-		size -= bytes;
+-		head = 0; /* There must be something in this buffer now. */
+-	}
+-}
+-
+-/*
+- * Prepare an SKB to be transmitted to the frontend.
+- *
+- * This function is responsible for allocating grant operations, meta
+- * structures, etc.
+- *
+- * It returns the number of meta structures consumed. The number of
+- * ring slots used is always equal to the number of meta slots used
+- * plus the number of GSO descriptors used. Currently, we use either
+- * zero GSO descriptors (for non-GSO packets) or one descriptor (for
+- * frontend-side LRO).
+- */
+-static int netbk_gop_skb(struct sk_buff *skb,
+-			 struct netrx_pending_operations *npo)
+-{
+-	struct xen_netif *netif = netdev_priv(skb->dev);
+-	int nr_frags = skb_shinfo(skb)->nr_frags;
+-	int i;
+-	struct xen_netif_rx_request *req;
+-	struct netbk_rx_meta *meta;
+-	int old_meta_prod;
+-
+-	old_meta_prod = npo->meta_prod;
+-
+-	/* Set up a GSO prefix descriptor, if necessary */
+-	if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
+-		req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+-		meta = npo->meta + npo->meta_prod++;
+-		meta->gso_size = skb_shinfo(skb)->gso_size;
+-		meta->size = 0;
+-		meta->id = req->id;
+-	}
+-
+-	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+-	meta = npo->meta + npo->meta_prod++;
+-
+-	if (!netif->gso_prefix)
+-		meta->gso_size = skb_shinfo(skb)->gso_size;
+-	else
+-		meta->gso_size = 0;
+-
+-	meta->size = 0;
+-	meta->id = req->id;
+-	npo->copy_off = 0;
+-	npo->copy_gref = req->gref;
+-
+-	netbk_gop_frag_copy(netif,
+-			    npo, virt_to_page(skb->data),
+-			    skb_headlen(skb),
+-			    offset_in_page(skb->data), 1);
+-
+-	/* Leave a gap for the GSO descriptor. */
+-	if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
+-		netif->rx.req_cons++;
+-
+-	for (i = 0; i < nr_frags; i++) {
+-		netbk_gop_frag_copy(netif, npo,
+-				    skb_shinfo(skb)->frags[i].page,
+-				    skb_shinfo(skb)->frags[i].size,
+-				    skb_shinfo(skb)->frags[i].page_offset,
+-				    0);
+-	}
+-
+-	return npo->meta_prod - old_meta_prod;
+-}
+-
+-/*
+- * This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
+- * used to set up the operations on the top of
+- * netrx_pending_operations, which have since been done.  Check that
+- * they didn't give any errors and advance over them.
+- */
+-static int netbk_check_gop(int nr_meta_slots, domid_t domid,
+-			   struct netrx_pending_operations *npo)
+-{
+-	struct gnttab_copy     *copy_op;
+-	int status = NETIF_RSP_OKAY;
+-	int i;
+-
+-	for (i = 0; i < nr_meta_slots; i++) {
+-		copy_op = npo->copy + npo->copy_cons++;
+-		if (copy_op->status != GNTST_okay) {
+-				pr_debug("Bad status %d from copy to DOM%d.\n",
+-					 copy_op->status, domid);
+-				status = NETIF_RSP_ERROR;
+-			}
+-	}
+-
+-	return status;
+-}
+-
+-static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+-				     struct netbk_rx_meta *meta,
+-				     int nr_meta_slots)
+-{
+-	int i;
+-	unsigned long offset;
+-
+-	/* No fragments used */
+-	if (nr_meta_slots <= 1)
+-		return;
+-
+-	nr_meta_slots--;
+-
+-	for (i = 0; i < nr_meta_slots; i++) {
+-		int flags;
+-		if (i == nr_meta_slots - 1)
+-			flags = 0;
+-		else
+-			flags = NETRXF_more_data;
+-
+-		offset = 0;
+-		make_rx_response(netif, meta[i].id, status, offset,
+-				 meta[i].size, flags);
+-	}
+-}
+-
+-struct skb_cb_overlay {
+-	int meta_slots_used;
+-};
+-
+-static void net_rx_action(unsigned long data)
+-{
+-	struct xen_netif *netif = NULL;
+-	struct xen_netbk *netbk = (struct xen_netbk *)data;
+-	s8 status;
+-	u16 irq, flags;
+-	struct xen_netif_rx_response *resp;
+-	struct sk_buff_head rxq;
+-	struct sk_buff *skb;
+-	int notify_nr = 0;
+-	int ret;
+-	int nr_frags;
+-	int count;
+-	unsigned long offset;
+-	struct skb_cb_overlay *sco;
+-
+-	struct netrx_pending_operations npo = {
+-		.copy  = netbk->grant_copy_op,
+-		.meta  = netbk->meta,
+-	};
+-
+-	skb_queue_head_init(&rxq);
+-
+-	count = 0;
+-
+-	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
+-		netif = netdev_priv(skb->dev);
+-		nr_frags = skb_shinfo(skb)->nr_frags;
+-
+-		sco = (struct skb_cb_overlay *)skb->cb;
+-		sco->meta_slots_used = netbk_gop_skb(skb, &npo);
+-
+-		count += nr_frags + 1;
+-
+-		__skb_queue_tail(&rxq, skb);
+-
+-		/* Filled the batch queue? */
+-		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
+-			break;
+-	}
+-
+-	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
+-
+-	if (!npo.copy_prod)
+-		return;
+-
+-	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
+-	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
+-					npo.copy_prod);
+-	BUG_ON(ret != 0);
+-
+-	while ((skb = __skb_dequeue(&rxq)) != NULL) {
+-		sco = (struct skb_cb_overlay *)skb->cb;
+-
+-		netif = netdev_priv(skb->dev);
+-
+-		if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
+-			resp = RING_GET_RESPONSE(&netif->rx,
+-						netif->rx.rsp_prod_pvt++);
+-
+-			resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
+-
+-			resp->offset = netbk->meta[npo.meta_cons].gso_size;
+-			resp->id = netbk->meta[npo.meta_cons].id;
+-			resp->status = sco->meta_slots_used;
+-
+-			npo.meta_cons++;
+-			sco->meta_slots_used--;
+-		}
+-
+-
+-		netif->stats.tx_bytes += skb->len;
+-		netif->stats.tx_packets++;
+-
+-		status = netbk_check_gop(sco->meta_slots_used,
+-					 netif->domid, &npo);
+-
+-		if (sco->meta_slots_used == 1)
+-			flags = 0;
+-		else
+-			flags = NETRXF_more_data;
+-
+-		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+-			flags |= NETRXF_csum_blank | NETRXF_data_validated;
+-		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+-			/* remote but checksummed. */
+-			flags |= NETRXF_data_validated;
+-
+-		offset = 0;
+-		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
+-					status, offset,
+-					netbk->meta[npo.meta_cons].size,
+-					flags);
+-
+-		if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
+-			struct xen_netif_extra_info *gso =
+-				(struct xen_netif_extra_info *)
+-				RING_GET_RESPONSE(&netif->rx,
+-						  netif->rx.rsp_prod_pvt++);
+-
+-			resp->flags |= NETRXF_extra_info;
+-
+-			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
+-			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+-			gso->u.gso.pad = 0;
+-			gso->u.gso.features = 0;
+-
+-			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
+-			gso->flags = 0;
+-		}
+-
+-		netbk_add_frag_responses(netif, status,
+-					 netbk->meta + npo.meta_cons + 1,
+-					 sco->meta_slots_used);
+-
+-		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+-		irq = netif->irq;
+-		if (ret && !netbk->rx_notify[irq]) {
+-			netbk->rx_notify[irq] = 1;
+-			netbk->notify_list[notify_nr++] = irq;
+-		}
+-
+-		if (netif_queue_stopped(netif->dev) &&
+-		    netif_schedulable(netif) &&
+-		    !netbk_queue_full(netif))
+-			netif_wake_queue(netif->dev);
+-
+-		netif_put(netif);
+-		npo.meta_cons += sco->meta_slots_used;
+-		dev_kfree_skb(skb);
+-	}
+-
+-	while (notify_nr != 0) {
+-		irq = netbk->notify_list[--notify_nr];
+-		netbk->rx_notify[irq] = 0;
+-		notify_remote_via_irq(irq);
+-	}
+-
+-	/* More work to do? */
+-	if (!skb_queue_empty(&netbk->rx_queue) &&
+-			!timer_pending(&netbk->net_timer))
+-		xen_netbk_bh_handler(netbk, 1);
+-}
+-
+-static void net_alarm(unsigned long data)
+-{
+-	struct xen_netbk *netbk = (struct xen_netbk *)data;
+-	xen_netbk_bh_handler(netbk, 1);
+-}
+-
+-static void netbk_tx_pending_timeout(unsigned long data)
+-{
+-	struct xen_netbk *netbk = (struct xen_netbk *)data;
+-	xen_netbk_bh_handler(netbk, 0);
+-}
+-
+-struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+-{
+-	struct xen_netif *netif = netdev_priv(dev);
+-	return &netif->stats;
+-}
+-
+-static int __on_net_schedule_list(struct xen_netif *netif)
+-{
+-	return !list_empty(&netif->list);
+-}
+-
+-/* Must be called with net_schedule_list_lock held */
+-static void remove_from_net_schedule_list(struct xen_netif *netif)
+-{
+-	if (likely(__on_net_schedule_list(netif))) {
+-		list_del_init(&netif->list);
+-		netif_put(netif);
+-	}
+-}
+-
+-static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
+-{
+-	struct xen_netif *netif = NULL;
+-
+-	spin_lock_irq(&netbk->net_schedule_list_lock);
+-	if (list_empty(&netbk->net_schedule_list))
+-		goto out;
+-
+-	netif = list_first_entry(&netbk->net_schedule_list,
+-				 struct xen_netif, list);
+-	if (!netif)
+-		goto out;
+-
+-	netif_get(netif);
+-
+-	remove_from_net_schedule_list(netif);
+-out:
+-	spin_unlock_irq(&netbk->net_schedule_list_lock);
+-	return netif;
+-}
+-
+-static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+-{
+-	unsigned long flags;
+-
+-	struct xen_netbk *netbk = &xen_netbk[netif->group];
+-	if (__on_net_schedule_list(netif))
+-		return;
+-
+-	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
+-	if (!__on_net_schedule_list(netif) &&
+-	    likely(netif_schedulable(netif))) {
+-		list_add_tail(&netif->list, &netbk->net_schedule_list);
+-		netif_get(netif);
+-	}
+-	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
+-}
+-
+-void netif_schedule_work(struct xen_netif *netif)
+-{
+-	struct xen_netbk *netbk = &xen_netbk[netif->group];
+-	int more_to_do;
+-
+-	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+-
+-	if (more_to_do) {
+-		add_to_net_schedule_list_tail(netif);
+-		maybe_schedule_tx_action(netbk);
+-	}
+-}
+-
+-void netif_deschedule_work(struct xen_netif *netif)
+-{
+-	struct xen_netbk *netbk = &xen_netbk[netif->group];
+-	spin_lock_irq(&netbk->net_schedule_list_lock);
+-	remove_from_net_schedule_list(netif);
+-	spin_unlock_irq(&netbk->net_schedule_list_lock);
+-}
+-
+-
+-static void tx_add_credit(struct xen_netif *netif)
+-{
+-	unsigned long max_burst, max_credit;
+-
+-	/*
+-	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
+-	 * Otherwise the interface can seize up due to insufficient credit.
+-	 */
+-	max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
+-	max_burst = min(max_burst, 131072UL);
+-	max_burst = max(max_burst, netif->credit_bytes);
+-
+-	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
+-	max_credit = netif->remaining_credit + netif->credit_bytes;
+-	if (max_credit < netif->remaining_credit)
+-		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
+-
+-	netif->remaining_credit = min(max_credit, max_burst);
+-}
+-
+-static void tx_credit_callback(unsigned long data)
+-{
+-	struct xen_netif *netif = (struct xen_netif *)data;
+-	tx_add_credit(netif);
+-	netif_schedule_work(netif);
+-}
+-
+-static inline int copy_pending_req(struct xen_netbk *netbk,
+-				   pending_ring_idx_t pending_idx)
+-{
+-	return gnttab_copy_grant_page(
+-			netbk->grant_tx_handle[pending_idx],
+-			&netbk->mmap_pages[pending_idx]);
+-}
+-
+-static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+-{
+-	struct netbk_tx_pending_inuse *inuse, *n;
+-	struct gnttab_unmap_grant_ref *gop;
+-	u16 pending_idx;
+-	pending_ring_idx_t dc, dp;
+-	struct xen_netif *netif;
+-	int ret;
+-	LIST_HEAD(list);
+-
+-	dc = netbk->dealloc_cons;
+-	gop = netbk->tx_unmap_ops;
+-
+-	/* Free up any grants we have finished using. */
+-	do {
+-		dp = netbk->dealloc_prod;
+-
+-		/* Ensure we see all indices enqueued by netif_idx_release(). */
+-		smp_rmb();
+-
+-		while (dc != dp) {
+-			unsigned long pfn;
+-			struct netbk_tx_pending_inuse *pending_inuse =
+-					netbk->pending_inuse;
+-
+-			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
+-			list_move_tail(&pending_inuse[pending_idx].list, &list);
+-
+-			pfn = idx_to_pfn(netbk, pending_idx);
+-			/* Already unmapped? */
+-			if (!phys_to_machine_mapping_valid(pfn))
+-				continue;
+-
+-			gnttab_set_unmap_op(gop,
+-					idx_to_kaddr(netbk, pending_idx),
+-					GNTMAP_host_map,
+-					netbk->grant_tx_handle[pending_idx]);
+-			gop++;
+-		}
+-
+-	} while (dp != netbk->dealloc_prod);
+-
+-	netbk->dealloc_cons = dc;
+-
+-	ret = HYPERVISOR_grant_table_op(
+-		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
+-		gop - netbk->tx_unmap_ops);
+-	BUG_ON(ret);
+-
+-	/*
+-	 * Copy any entries that have been pending for too long
+-	 */
+-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+-	    !list_empty(&netbk->pending_inuse_head)) {
+-		list_for_each_entry_safe(inuse, n,
+-				&netbk->pending_inuse_head, list) {
+-			struct pending_tx_info *pending_tx_info;
+-			pending_tx_info = netbk->pending_tx_info;
+-
+-			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+-				break;
+-
+-			pending_idx = inuse - netbk->pending_inuse;
+-
+-			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
+-
+-			switch (copy_pending_req(netbk, pending_idx)) {
+-			case 0:
+-				list_move_tail(&inuse->list, &list);
+-				continue;
+-			case -EBUSY:
+-				list_del_init(&inuse->list);
+-				continue;
+-			case -ENOENT:
+-				continue;
+-			}
+-
+-			break;
+-		}
+-	}
+-
+-	list_for_each_entry_safe(inuse, n, &list, list) {
+-		struct pending_tx_info *pending_tx_info;
+-		pending_ring_idx_t index;
+-
+-		pending_tx_info = netbk->pending_tx_info;
+-		pending_idx = inuse - netbk->pending_inuse;
+-
+-		netif = pending_tx_info[pending_idx].netif;
+-
+-		make_tx_response(netif, &pending_tx_info[pending_idx].req,
+-				 NETIF_RSP_OKAY);
+-
+-		/* Ready for next use. */
+-		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
+-
+-		index = pending_index(netbk->pending_prod++);
+-		netbk->pending_ring[index] = pending_idx;
+-
+-		netif_put(netif);
+-
+-		list_del_init(&inuse->list);
+-	}
+-}
+-
+-static void netbk_tx_err(struct xen_netif *netif,
+-		struct xen_netif_tx_request *txp, RING_IDX end)
+-{
+-	RING_IDX cons = netif->tx.req_cons;
+-
+-	do {
+-		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+-		if (cons >= end)
+-			break;
+-		txp = RING_GET_REQUEST(&netif->tx, cons++);
+-	} while (1);
+-	netif->tx.req_cons = cons;
+-	netif_schedule_work(netif);
+-	netif_put(netif);
+-}
+-
+-static int netbk_count_requests(struct xen_netif *netif,
+-				struct xen_netif_tx_request *first,
+-				struct xen_netif_tx_request *txp,
+-				int work_to_do)
+-{
+-	RING_IDX cons = netif->tx.req_cons;
+-	int frags = 0;
+-
+-	if (!(first->flags & NETTXF_more_data))
+-		return 0;
+-
+-	do {
+-		if (frags >= work_to_do) {
+-			DPRINTK("Need more frags\n");
+-			return -frags;
+-		}
+-
+-		if (unlikely(frags >= MAX_SKB_FRAGS)) {
+-			DPRINTK("Too many frags\n");
+-			return -frags;
+-		}
+-
+-		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
+-		       sizeof(*txp));
+-		if (txp->size > first->size) {
+-			DPRINTK("Frags galore\n");
+-			return -frags;
+-		}
+-
+-		first->size -= txp->size;
+-		frags++;
+-
+-		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
+-			DPRINTK("txp->offset: %x, size: %u\n",
+-				txp->offset, txp->size);
+-			return -frags;
+-		}
+-	} while ((txp++)->flags & NETTXF_more_data);
+-
+-	return frags;
+-}
+-
+-static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+-						       struct xen_netif *netif,
+-						       struct sk_buff *skb,
+-						       struct xen_netif_tx_request *txp,
+-						       struct gnttab_map_grant_ref *mop)
+-{
+-	struct skb_shared_info *shinfo = skb_shinfo(skb);
+-	skb_frag_t *frags = shinfo->frags;
+-	unsigned long pending_idx = *((u16 *)skb->data);
+-	int i, start;
+-
+-	/* Skip first skb fragment if it is on same page as header fragment. */
+-	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+-
+-	for (i = start; i < shinfo->nr_frags; i++, txp++) {
+-		pending_ring_idx_t index;
+-		struct pending_tx_info *pending_tx_info =
+-			netbk->pending_tx_info;
+-
+-		index = pending_index(netbk->pending_cons++);
+-		pending_idx = netbk->pending_ring[index];
+-
+-		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
+-				  GNTMAP_host_map | GNTMAP_readonly,
+-				  txp->gref, netif->domid);
+-
+-		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+-		netif_get(netif);
+-		pending_tx_info[pending_idx].netif = netif;
+-		frags[i].page = (void *)pending_idx;
+-	}
+-
+-	return mop;
+-}
+-
+-static int netbk_tx_check_mop(struct xen_netbk *netbk,
+-			      struct sk_buff *skb,
+-			      struct gnttab_map_grant_ref **mopp)
+-{
+-	struct gnttab_map_grant_ref *mop = *mopp;
+-	int pending_idx = *((u16 *)skb->data);
+-	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
+-	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
+-	struct xen_netif_tx_request *txp;
+-	struct skb_shared_info *shinfo = skb_shinfo(skb);
+-	int nr_frags = shinfo->nr_frags;
+-	int i, err, start;
+-
+-	/* Check status of header. */
+-	err = mop->status;
+-	if (unlikely(err)) {
+-		pending_ring_idx_t index;
+-		index = pending_index(netbk->pending_prod++);
+-		txp = &pending_tx_info[pending_idx].req;
+-		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+-		netbk->pending_ring[index] = pending_idx;
+-		netif_put(netif);
+-	} else {
+-		set_phys_to_machine(
+-			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
+-			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+-		netbk->grant_tx_handle[pending_idx] = mop->handle;
+-	}
+-
+-	/* Skip first skb fragment if it is on same page as header fragment. */
+-	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+-
+-	for (i = start; i < nr_frags; i++) {
+-		int j, newerr;
+-		pending_ring_idx_t index;
+-
+-		pending_idx = (unsigned long)shinfo->frags[i].page;
+-
+-		/* Check error status: if okay then remember grant handle. */
+-		newerr = (++mop)->status;
+-		if (likely(!newerr)) {
+-			unsigned long addr;
+-			addr = idx_to_kaddr(netbk, pending_idx);
+-			set_phys_to_machine(
+-				__pa(addr)>>PAGE_SHIFT,
+-				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+-			netbk->grant_tx_handle[pending_idx] = mop->handle;
+-			/* Had a previous error? Invalidate this fragment. */
+-			if (unlikely(err))
+-				netif_idx_release(netbk, pending_idx);
+-			continue;
+-		}
+-
+-		/* Error on this fragment: respond to client with an error. */
+-		txp = &netbk->pending_tx_info[pending_idx].req;
+-		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+-		index = pending_index(netbk->pending_prod++);
+-		netbk->pending_ring[index] = pending_idx;
+-		netif_put(netif);
+-
+-		/* Not the first error? Preceding frags already invalidated. */
+-		if (err)
+-			continue;
+-
+-		/* First error: invalidate header and preceding fragments. */
+-		pending_idx = *((u16 *)skb->data);
+-		netif_idx_release(netbk, pending_idx);
+-		for (j = start; j < i; j++) {
+-			pending_idx = (unsigned long)shinfo->frags[i].page;
+-			netif_idx_release(netbk, pending_idx);
+-		}
+-
+-		/* Remember the error: invalidate all subsequent fragments. */
+-		err = newerr;
+-	}
+-
+-	*mopp = mop + 1;
+-	return err;
+-}
+-
+-static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+-{
+-	struct skb_shared_info *shinfo = skb_shinfo(skb);
+-	int nr_frags = shinfo->nr_frags;
+-	int i;
+-
+-	for (i = 0; i < nr_frags; i++) {
+-		skb_frag_t *frag = shinfo->frags + i;
+-		struct xen_netif_tx_request *txp;
+-		unsigned long pending_idx;
+-
+-		pending_idx = (unsigned long)frag->page;
+-
+-		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
+-		list_add_tail(&netbk->pending_inuse[pending_idx].list,
+-			      &netbk->pending_inuse_head);
+-
+-		txp = &netbk->pending_tx_info[pending_idx].req;
+-		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
+-		frag->size = txp->size;
+-		frag->page_offset = txp->offset;
+-
+-		skb->len += txp->size;
+-		skb->data_len += txp->size;
+-		skb->truesize += txp->size;
+-	}
+-}
+-
+-int netbk_get_extras(struct xen_netif *netif,
+-		     struct xen_netif_extra_info *extras,
+-		     int work_to_do)
+-{
+-	struct xen_netif_extra_info extra;
+-	RING_IDX cons = netif->tx.req_cons;
+-
+-	do {
+-		if (unlikely(work_to_do-- <= 0)) {
+-			pr_debug("Missing extra info\n");
+-			return -EBADR;
+-		}
+-
+-		memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
+-		       sizeof(extra));
+-		if (unlikely(!extra.type ||
+-			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+-			netif->tx.req_cons = ++cons;
+-			pr_debug("Invalid extra type: %d\n", extra.type);
+-			return -EINVAL;
+-		}
+-
+-		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
+-		netif->tx.req_cons = ++cons;
+-	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
+-
+-	return work_to_do;
+-}
+-
+-static int netbk_set_skb_gso(struct sk_buff *skb,
+-			     struct xen_netif_extra_info *gso)
+-{
+-	if (!gso->u.gso.size) {
+-		pr_debug("GSO size must not be zero.\n");
+-		return -EINVAL;
+-	}
+-
+-	/* Currently only TCPv4 S.O. is supported. */
+-	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+-		pr_debug("Bad GSO type %d.\n", gso->u.gso.type);
+-		return -EINVAL;
+-	}
+-
+-	skb_shinfo(skb)->gso_size = gso->u.gso.size;
+-	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+-
+-	/* Header must be checked, and gso_segs computed. */
+-	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+-	skb_shinfo(skb)->gso_segs = 0;
+-
+-	return 0;
+-}
+-
+-static int skb_checksum_setup(struct sk_buff *skb)
+-{
+-	struct iphdr *iph;
+-	unsigned char *th;
+-	int err = -EPROTO;
+-
+-	if (skb->protocol != htons(ETH_P_IP))
+-		goto out;
+-
+-	iph = (void *)skb->data;
+-	th = skb->data + 4 * iph->ihl;
+-	if (th >= skb_tail_pointer(skb))
+-		goto out;
+-
+-	skb->csum_start = th - skb->head;
+-	switch (iph->protocol) {
+-	case IPPROTO_TCP:
+-		skb->csum_offset = offsetof(struct tcphdr, check);
+-		break;
+-	case IPPROTO_UDP:
+-		skb->csum_offset = offsetof(struct udphdr, check);
+-		break;
+-	default:
+-		if (net_ratelimit())
+-			printk(KERN_ERR "Attempting to checksum a non-"
+-			       "TCP/UDP packet, dropping a protocol"
+-			       " %d packet", iph->protocol);
+-		goto out;
+-	}
+-
+-	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
+-		goto out;
+-
+-	err = 0;
+-
+-out:
+-	return err;
+-}
+-
+-static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
+-{
+-	unsigned long now = jiffies;
+-	unsigned long next_credit =
+-		netif->credit_timeout.expires +
+-		msecs_to_jiffies(netif->credit_usec / 1000);
+-
+-	/* Timer could already be pending in rare cases. */
+-	if (timer_pending(&netif->credit_timeout))
+-		return true;
+-
+-	/* Passed the point where we can replenish credit? */
+-	if (time_after_eq(now, next_credit)) {
+-		netif->credit_timeout.expires = now;
+-		tx_add_credit(netif);
+-	}
+-
+-	/* Still too big to send right now? Set a callback. */
+-	if (size > netif->remaining_credit) {
+-		netif->credit_timeout.data     =
+-			(unsigned long)netif;
+-		netif->credit_timeout.function =
+-			tx_credit_callback;
+-		mod_timer(&netif->credit_timeout,
+-			  next_credit);
+-
+-		return true;
+-	}
+-
+-	return false;
+-}
+-
+-static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+-{
+-	struct gnttab_map_grant_ref *mop;
+-	struct sk_buff *skb;
+-	int ret;
+-
+-	mop = netbk->tx_map_ops;
+-	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+-		!list_empty(&netbk->net_schedule_list)) {
+-		struct xen_netif *netif;
+-		struct xen_netif_tx_request txreq;
+-		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+-		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
+-		u16 pending_idx;
+-		RING_IDX idx;
+-		int work_to_do;
+-		unsigned int data_len;
+-		pending_ring_idx_t index;
+-
+-		/* Get a netif from the list with work to do. */
+-		netif = poll_net_schedule_list(netbk);
+-		if (!netif)
+-			continue;
+-
+-		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+-		if (!work_to_do) {
+-			netif_put(netif);
+-			continue;
+-		}
+-
+-		idx = netif->tx.req_cons;
+-		rmb(); /* Ensure that we see the request before we copy it. */
+-		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
+-
+-		/* Credit-based scheduling. */
+-		if (txreq.size > netif->remaining_credit &&
+-		    tx_credit_exceeded(netif, txreq.size)) {
+-			netif_put(netif);
+-			continue;
+-		}
+-
+-		netif->remaining_credit -= txreq.size;
+-
+-		work_to_do--;
+-		netif->tx.req_cons = ++idx;
+-
+-		memset(extras, 0, sizeof(extras));
+-		if (txreq.flags & NETTXF_extra_info) {
+-			work_to_do = netbk_get_extras(netif, extras,
+-						      work_to_do);
+-			idx = netif->tx.req_cons;
+-			if (unlikely(work_to_do < 0)) {
+-				netbk_tx_err(netif, &txreq, idx);
+-				continue;
+-			}
+-		}
+-
+-		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
+-		if (unlikely(ret < 0)) {
+-			netbk_tx_err(netif, &txreq, idx - ret);
+-			continue;
+-		}
+-		idx += ret;
+-
+-		if (unlikely(txreq.size < ETH_HLEN)) {
+-			pr_debug("Bad packet size: %d\n", txreq.size);
+-			netbk_tx_err(netif, &txreq, idx);
+-			continue;
+-		}
+-
+-		/* No crossing a page as the payload mustn't fragment. */
+-		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
+-			pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
+-				 txreq.offset, txreq.size,
+-				 (txreq.offset&~PAGE_MASK) + txreq.size);
+-			netbk_tx_err(netif, &txreq, idx);
+-			continue;
+-		}
+-
+-		index = pending_index(netbk->pending_cons);
+-		pending_idx = netbk->pending_ring[index];
+-
+-		data_len = (txreq.size > PKT_PROT_LEN &&
+-			    ret < MAX_SKB_FRAGS) ?
+-			PKT_PROT_LEN : txreq.size;
+-
+-		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
+-				GFP_ATOMIC | __GFP_NOWARN);
+-		if (unlikely(skb == NULL)) {
+-			pr_debug("Can't allocate a skb in start_xmit.\n");
+-			netbk_tx_err(netif, &txreq, idx);
+-			break;
+-		}
+-
+-		/* Packets passed to netif_rx() must have some headroom. */
+-		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+-
+-		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+-			struct xen_netif_extra_info *gso;
+-			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+-
+-			if (netbk_set_skb_gso(skb, gso)) {
+-				kfree_skb(skb);
+-				netbk_tx_err(netif, &txreq, idx);
+-				continue;
+-			}
+-		}
+-
+-		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
+-				  GNTMAP_host_map | GNTMAP_readonly,
+-				  txreq.gref, netif->domid);
+-		mop++;
+-
+-		memcpy(&netbk->pending_tx_info[pending_idx].req,
+-		       &txreq, sizeof(txreq));
+-		netbk->pending_tx_info[pending_idx].netif = netif;
+-		*((u16 *)skb->data) = pending_idx;
+-
+-		__skb_put(skb, data_len);
+-
+-		skb_shinfo(skb)->nr_frags = ret;
+-		if (data_len < txreq.size) {
+-			skb_shinfo(skb)->nr_frags++;
+-			skb_shinfo(skb)->frags[0].page =
+-				(void *)(unsigned long)pending_idx;
+-		} else {
+-			/* Discriminate from any valid pending_idx value. */
+-			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
+-		}
+-
+-		__skb_queue_tail(&netbk->tx_queue, skb);
+-
+-		netbk->pending_cons++;
+-
+-		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
+-
+-		netif->tx.req_cons = idx;
+-		netif_schedule_work(netif);
+-
+-		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
+-			break;
+-	}
+-
+-	return mop - netbk->tx_map_ops;
+-}
+-
+-static void net_tx_submit(struct xen_netbk *netbk)
+-{
+-	struct gnttab_map_grant_ref *mop;
+-	struct sk_buff *skb;
+-
+-	mop = netbk->tx_map_ops;
+-	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
+-		struct xen_netif_tx_request *txp;
+-		struct xen_netif *netif;
+-		u16 pending_idx;
+-		unsigned data_len;
+-
+-		pending_idx = *((u16 *)skb->data);
+-		netif = netbk->pending_tx_info[pending_idx].netif;
+-		txp = &netbk->pending_tx_info[pending_idx].req;
+-
+-		/* Check the remap error code. */
+-		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
+-			pr_debug("netback grant failed.\n");
+-			skb_shinfo(skb)->nr_frags = 0;
+-			kfree_skb(skb);
+-			continue;
+-		}
+-
+-		data_len = skb->len;
+-		memcpy(skb->data,
+-		       (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
+-		       data_len);
+-		if (data_len < txp->size) {
+-			/* Append the packet payload as a fragment. */
+-			txp->offset += data_len;
+-			txp->size -= data_len;
+-		} else {
+-			/* Schedule a response immediately. */
+-			netif_idx_release(netbk, pending_idx);
+-		}
+-
+-		if (txp->flags & NETTXF_csum_blank)
+-			skb->ip_summed = CHECKSUM_PARTIAL;
+-		else if (txp->flags & NETTXF_data_validated)
+-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+-
+-		netbk_fill_frags(netbk, skb);
+-
+-		/*
+-		 * If the initial fragment was < PKT_PROT_LEN then
+-		 * pull through some bytes from the other fragments to
+-		 * increase the linear region to PKT_PROT_LEN bytes.
+-		 */
+-		if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
+-			int target = min_t(int, skb->len, PKT_PROT_LEN);
+-			__pskb_pull_tail(skb, target - skb_headlen(skb));
+-		}
+-
+-		skb->dev      = netif->dev;
+-		skb->protocol = eth_type_trans(skb, skb->dev);
+-
+-		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+-			if (skb_checksum_setup(skb)) {
+-				pr_debug("skb_checksum_setup failed\n");
+-				kfree_skb(skb);
+-				continue;
+-			}
+-		} else if (skb_is_gso(skb)) {
+-			pr_debug("GSO SKB checksum is not partial\n");
+-			kfree_skb(skb);
+-			continue;
+-		}
+-
+-		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+-		    unlikely(skb_linearize(skb))) {
+-			DPRINTK("Can't linearize skb in net_tx_action.\n");
+-			kfree_skb(skb);
+-			continue;
+-		}
+-
+-		netif->stats.rx_bytes += skb->len;
+-		netif->stats.rx_packets++;
+-
+-		netif_rx_ni(skb);
+-		netif->dev->last_rx = jiffies;
+-	}
+-}
+-
+-/* Called after netfront has transmitted */
+-static void net_tx_action(unsigned long data)
+-{
+-	struct xen_netbk *netbk = (struct xen_netbk *)data;
+-	unsigned nr_mops;
+-	int ret;
+-
+-	net_tx_action_dealloc(netbk);
+-
+-	nr_mops = net_tx_build_mops(netbk);
+-
+-	if (nr_mops == 0)
+-		goto out;
+-
+-	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+-					netbk->tx_map_ops, nr_mops);
+-	BUG_ON(ret);
+-
+-	net_tx_submit(netbk);
+-out:
+-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+-	    !list_empty(&netbk->pending_inuse_head)) {
+-		struct netbk_tx_pending_inuse *oldest;
+-
+-		oldest = list_entry(netbk->pending_inuse_head.next,
+-				    struct netbk_tx_pending_inuse, list);
+-		mod_timer(&netbk->netbk_tx_pending_timer,
+-				oldest->alloc_time + HZ);
+-	}
+-}
+-
+-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+-{
+-	static DEFINE_SPINLOCK(_lock);
+-	unsigned long flags;
+-	pending_ring_idx_t index;
+-
+-	spin_lock_irqsave(&_lock, flags);
+-	index = pending_index(netbk->dealloc_prod);
+-	netbk->dealloc_ring[index] = pending_idx;
+-	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+-	smp_wmb();
+-	netbk->dealloc_prod++;
+-	spin_unlock_irqrestore(&_lock, flags);
+-
+-	xen_netbk_bh_handler(netbk, 0);
+-}
+-
+-static void netif_page_release(struct page *page, unsigned int order)
+-{
+-	unsigned int group, idx;
+-	int foreign = netif_get_page_ext(page, &group, &idx);
+-
+-	BUG_ON(!foreign);
+-	BUG_ON(order);
+-
+-	netif_idx_release(&xen_netbk[group], idx);
+-}
+-
+-irqreturn_t netif_be_int(int irq, void *dev_id)
+-{
+-	struct xen_netif *netif = dev_id;
+-	struct xen_netbk *netbk;
+-
+-	if (netif->group == -1)
+-		return IRQ_NONE;
+-
+-	netbk = &xen_netbk[netif->group];
+-
+-	add_to_net_schedule_list_tail(netif);
+-	maybe_schedule_tx_action(netbk);
+-
+-	if (netif_schedulable(netif) && !netbk_queue_full(netif))
+-		netif_wake_queue(netif->dev);
+-
+-	return IRQ_HANDLED;
+-}
+-
+-static void make_tx_response(struct xen_netif *netif,
+-			     struct xen_netif_tx_request *txp,
+-			     s8       st)
+-{
+-	RING_IDX i = netif->tx.rsp_prod_pvt;
+-	struct xen_netif_tx_response *resp;
+-	int notify;
+-
+-	resp = RING_GET_RESPONSE(&netif->tx, i);
+-	resp->id     = txp->id;
+-	resp->status = st;
+-
+-	if (txp->flags & NETTXF_extra_info)
+-		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
+-
+-	netif->tx.rsp_prod_pvt = ++i;
+-	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+-	if (notify)
+-		notify_remote_via_irq(netif->irq);
+-}
+-
+-static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+-					     u16      id,
+-					     s8       st,
+-					     u16      offset,
+-					     u16      size,
+-					     u16      flags)
+-{
+-	RING_IDX i = netif->rx.rsp_prod_pvt;
+-	struct xen_netif_rx_response *resp;
+-
+-	resp = RING_GET_RESPONSE(&netif->rx, i);
+-	resp->offset     = offset;
+-	resp->flags      = flags;
+-	resp->id         = id;
+-	resp->status     = (s16)size;
+-	if (st < 0)
+-		resp->status = (s16)st;
+-
+-	netif->rx.rsp_prod_pvt = ++i;
+-
+-	return resp;
+-}
+-
+-#ifdef NETBE_DEBUG_INTERRUPT
+-static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+-{
+-	struct list_head *ent;
+-	struct xen_netif *netif;
+-	int i = 0;
+-	int group = 0;
+-
+-	printk(KERN_ALERT "netif_schedule_list:\n");
+-
+-	for (group = 0; group < xen_netbk_group_nr; group++) {
+-		struct xen_netbk *netbk = &xen_netbk[group];
+-		spin_lock_irq(&netbk->net_schedule_list_lock);
+-		printk(KERN_ALERT "xen_netback group number: %d\n", group);
+-		list_for_each(ent, &netbk->net_schedule_list) {
+-			netif = list_entry(ent, struct xen_netif, list);
+-			printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+-				"rx_resp_prod=%08x\n",
+-				i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
+-			printk(KERN_ALERT
+-				"   tx_req_cons=%08x, tx_resp_prod=%08x)\n",
+-				netif->tx.req_cons, netif->tx.rsp_prod_pvt);
+-			printk(KERN_ALERT
+-				"   shared(rx_req_prod=%08x "
+-				"rx_resp_prod=%08x\n",
+-				netif->rx.sring->req_prod,
+-				netif->rx.sring->rsp_prod);
+-			printk(KERN_ALERT
+-				"   rx_event=%08x, tx_req_prod=%08x\n",
+-				netif->rx.sring->rsp_event,
+-				netif->tx.sring->req_prod);
+-			printk(KERN_ALERT
+-				"   tx_resp_prod=%08x, tx_event=%08x)\n",
+-				netif->tx.sring->rsp_prod,
+-				netif->tx.sring->rsp_event);
+-			i++;
+-		}
+-		spin_unlock_irq(&netbk->net_schedule_list_lock);
+-	}
+-
+-	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+-
+-	return IRQ_HANDLED;
+-}
+-#endif
+-
+-static inline int rx_work_todo(struct xen_netbk *netbk)
+-{
+-	return !skb_queue_empty(&netbk->rx_queue);
+-}
+-
+-static inline int tx_work_todo(struct xen_netbk *netbk)
+-{
+-	if (netbk->dealloc_cons != netbk->dealloc_prod)
+-		return 1;
+-
+-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+-	    !list_empty(&netbk->pending_inuse_head))
+-		return 1;
+-
+-	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+-			!list_empty(&netbk->net_schedule_list))
+-		return 1;
+-
+-	return 0;
+-}
+-
+-static int netbk_action_thread(void *data)
+-{
+-	struct xen_netbk *netbk = (struct xen_netbk *)data;
+-	while (!kthread_should_stop()) {
+-		wait_event_interruptible(netbk->kthread.netbk_action_wq,
+-				rx_work_todo(netbk)
+-				|| tx_work_todo(netbk)
+-				|| kthread_should_stop());
+-		cond_resched();
+-
+-		if (kthread_should_stop())
+-			break;
+-
+-		if (rx_work_todo(netbk))
+-			net_rx_action((unsigned long)netbk);
+-
+-		if (tx_work_todo(netbk))
+-			net_tx_action((unsigned long)netbk);
+-	}
+-
+-	return 0;
+-}
+-
+-static int __init netback_init(void)
+-{
+-	int i;
+-	struct page *page;
+-	int rc = 0;
+-	int group;
+-
+-	if (!xen_pv_domain())
+-		return -ENODEV;
+-
+-	xen_netbk_group_nr = num_online_cpus();
+-	xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
+-	if (!xen_netbk) {
+-		printk(KERN_ALERT "%s: out of memory\n", __func__);
+-		return -ENOMEM;
+-	}
+-	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
+-
+-	for (group = 0; group < xen_netbk_group_nr; group++) {
+-		struct xen_netbk *netbk = &xen_netbk[group];
+-		skb_queue_head_init(&netbk->rx_queue);
+-		skb_queue_head_init(&netbk->tx_queue);
+-
+-		init_timer(&netbk->net_timer);
+-		netbk->net_timer.data = (unsigned long)netbk;
+-		netbk->net_timer.function = net_alarm;
+-
+-		init_timer(&netbk->netbk_tx_pending_timer);
+-		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
+-		netbk->netbk_tx_pending_timer.function =
+-			netbk_tx_pending_timeout;
+-
+-		netbk->mmap_pages =
+-			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+-		if (!netbk->mmap_pages) {
+-			printk(KERN_ALERT "%s: out of memory\n", __func__);
+-			del_timer(&netbk->netbk_tx_pending_timer);
+-			del_timer(&netbk->net_timer);
+-			rc = -ENOMEM;
+-			goto failed_init;
+-		}
+-
+-		for (i = 0; i < MAX_PENDING_REQS; i++) {
+-			page = netbk->mmap_pages[i];
+-			SetPageForeign(page, netif_page_release);
+-			netif_set_page_ext(page, group, i);
+-			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+-		}
+-
+-		netbk->pending_cons = 0;
+-		netbk->pending_prod = MAX_PENDING_REQS;
+-		for (i = 0; i < MAX_PENDING_REQS; i++)
+-			netbk->pending_ring[i] = i;
+-
+-		if (MODPARM_netback_kthread) {
+-			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
+-			netbk->kthread.task =
+-				kthread_create(netbk_action_thread,
+-					       (void *)netbk,
+-					       "netback/%u", group);
+-
+-			if (!IS_ERR(netbk->kthread.task)) {
+-				kthread_bind(netbk->kthread.task, group);
+-			} else {
+-				printk(KERN_ALERT
+-					"kthread_run() fails at netback\n");
+-				free_empty_pages_and_pagevec(netbk->mmap_pages,
+-						MAX_PENDING_REQS);
+-				del_timer(&netbk->netbk_tx_pending_timer);
+-				del_timer(&netbk->net_timer);
+-				rc = PTR_ERR(netbk->kthread.task);
+-				goto failed_init;
+-			}
+-		} else {
+-			tasklet_init(&netbk->tasklet.net_tx_tasklet,
+-				     net_tx_action,
+-				     (unsigned long)netbk);
+-			tasklet_init(&netbk->tasklet.net_rx_tasklet,
+-				     net_rx_action,
+-				     (unsigned long)netbk);
+-		}
+-
+-		INIT_LIST_HEAD(&netbk->pending_inuse_head);
+-		INIT_LIST_HEAD(&netbk->net_schedule_list);
+-
+-		spin_lock_init(&netbk->net_schedule_list_lock);
+-
+-		atomic_set(&netbk->netfront_count, 0);
+-
+-		if (MODPARM_netback_kthread)
+-			wake_up_process(netbk->kthread.task);
+-	}
+-
+-	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+-	if (MODPARM_copy_skb) {
+-		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+-					      NULL, 0))
+-			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
+-		else
+-			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+-	}
+-
+-	rc = netif_xenbus_init();
+-	if (rc)
+-		goto failed_init;
+-
+-#ifdef NETBE_DEBUG_INTERRUPT
+-	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
+-				      0,
+-				      netif_be_dbg,
+-				      IRQF_SHARED,
+-				      "net-be-dbg",
+-				      &netif_be_dbg);
+-#endif
+-
+-	return 0;
+-
+-failed_init:
+-	for (i = 0; i < group; i++) {
+-		struct xen_netbk *netbk = &xen_netbk[i];
+-		free_empty_pages_and_pagevec(netbk->mmap_pages,
+-				MAX_PENDING_REQS);
+-		del_timer(&netbk->netbk_tx_pending_timer);
+-		del_timer(&netbk->net_timer);
+-		if (MODPARM_netback_kthread)
+-			kthread_stop(netbk->kthread.task);
+-	}
+-	vfree(xen_netbk);
+-	return rc;
+-
+-}
+-
+-module_init(netback_init);
+-
+-MODULE_LICENSE("Dual BSD/GPL");
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+deleted file mode 100644
+index 640c696..0000000
+--- a/drivers/xen/netback/xenbus.c
++++ /dev/null
+@@ -1,487 +0,0 @@
+-/*  Xenbus code for netif backend
+- * Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
+- * Copyright (C) 2005 XenSource Ltd
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License as published by
+- * the Free Software Foundation; either version 2 of the License, or
+- * (at your option) any later version.
+- *
+- * This program is distributed in the hope that it will be useful,
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+- * GNU General Public License for more details.
+- *
+- * You should have received a copy of the GNU General Public License
+- * along with this program; if not, write to the Free Software
+- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-*/
+-
+-#include "common.h"
+-
+-static int connect_rings(struct backend_info *);
+-static void connect(struct backend_info *);
+-static void backend_create_netif(struct backend_info *be);
+-static void unregister_hotplug_status_watch(struct backend_info *be);
+-
+-static int netback_remove(struct xenbus_device *dev)
+-{
+-	struct backend_info *be = dev_get_drvdata(&dev->dev);
+-
+-	unregister_hotplug_status_watch(be);
+-	if (be->netif) {
+-		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+-		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
+-		netif_disconnect(be->netif);
+-		be->netif = NULL;
+-	}
+-	kfree(be);
+-	dev_set_drvdata(&dev->dev, NULL);
+-	return 0;
+-}
+-
+-
+-/**
+- * Entry point to this code when a new device is created.  Allocate the basic
+- * structures and switch to InitWait.
+- */
+-static int netback_probe(struct xenbus_device *dev,
+-			 const struct xenbus_device_id *id)
+-{
+-	const char *message;
+-	struct xenbus_transaction xbt;
+-	int err;
+-	int sg;
+-	struct backend_info *be = kzalloc(sizeof(struct backend_info),
+-					  GFP_KERNEL);
+-	if (!be) {
+-		xenbus_dev_fatal(dev, -ENOMEM,
+-				 "allocating backend structure");
+-		return -ENOMEM;
+-	}
+-
+-	be->dev = dev;
+-	dev_set_drvdata(&dev->dev, be);
+-
+-	sg = 1;
+-	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
+-		sg = 0;
+-
+-	do {
+-		err = xenbus_transaction_start(&xbt);
+-		if (err) {
+-			xenbus_dev_fatal(dev, err, "starting transaction");
+-			goto fail;
+-		}
+-
+-		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
+-		if (err) {
+-			message = "writing feature-sg";
+-			goto abort_transaction;
+-		}
+-
+-		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
+-				    "%d", sg);
+-		if (err) {
+-			message = "writing feature-gso-tcpv4";
+-			goto abort_transaction;
+-		}
+-
+-		/* We support rx-copy path. */
+-		err = xenbus_printf(xbt, dev->nodename,
+-				    "feature-rx-copy", "%d", 1);
+-		if (err) {
+-			message = "writing feature-rx-copy";
+-			goto abort_transaction;
+-		}
+-
+-		/*
+-		 * We don't support rx-flip path (except old guests who don't
+-		 * grok this feature flag).
+-		 */
+-		err = xenbus_printf(xbt, dev->nodename,
+-				    "feature-rx-flip", "%d", 0);
+-		if (err) {
+-			message = "writing feature-rx-flip";
+-			goto abort_transaction;
+-		}
+-
+-		err = xenbus_transaction_end(xbt, 0);
+-	} while (err == -EAGAIN);
+-
+-	if (err) {
+-		xenbus_dev_fatal(dev, err, "completing transaction");
+-		goto fail;
+-	}
+-
+-	err = xenbus_switch_state(dev, XenbusStateInitWait);
+-	if (err)
+-		goto fail;
+-
+-	/* This kicks hotplug scripts, so do it immediately. */
+-	backend_create_netif(be);
+-
+-	return 0;
+-
+-abort_transaction:
+-	xenbus_transaction_end(xbt, 1);
+-	xenbus_dev_fatal(dev, err, "%s", message);
+-fail:
+-	pr_debug("failed");
+-	netback_remove(dev);
+-	return err;
+-}
+-
+-
+-/*
+- * Handle the creation of the hotplug script environment.  We add the script
+- * and vif variables to the environment, for the benefit of the vif-* hotplug
+- * scripts.
+- */
+-static int netback_uevent(struct xenbus_device *xdev,
+-			  struct kobj_uevent_env *env)
+-{
+-	struct backend_info *be = dev_get_drvdata(&xdev->dev);
+-	char *val;
+-
+-	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+-	if (IS_ERR(val)) {
+-		int err = PTR_ERR(val);
+-		xenbus_dev_fatal(xdev, err, "reading script");
+-		return err;
+-	} else {
+-		if (add_uevent_var(env, "script=%s", val)) {
+-			kfree(val);
+-			return -ENOMEM;
+-		}
+-		kfree(val);
+-	}
+-
+-	if (!be || !be->netif)
+-		return 0;
+-
+-	return add_uevent_var(env, "vif=%s", be->netif->dev->name);
+-}
+-
+-
+-static void backend_create_netif(struct backend_info *be)
+-{
+-	int err;
+-	long handle;
+-	struct xenbus_device *dev = be->dev;
+-
+-	if (be->netif != NULL)
+-		return;
+-
+-	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
+-	if (err != 1) {
+-		xenbus_dev_fatal(dev, err, "reading handle");
+-		return;
+-	}
+-
+-	be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
+-	if (IS_ERR(be->netif)) {
+-		err = PTR_ERR(be->netif);
+-		be->netif = NULL;
+-		xenbus_dev_fatal(dev, err, "creating interface");
+-		return;
+-	}
+-
+-	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
+-}
+-
+-
+-static void disconnect_backend(struct xenbus_device *dev)
+-{
+-	struct backend_info *be = dev_get_drvdata(&dev->dev);
+-
+-	if (be->netif) {
+-		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
+-		netif_disconnect(be->netif);
+-		be->netif = NULL;
+-	}
+-}
+-
+-/**
+- * Callback received when the frontend's state changes.
+- */
+-static void frontend_changed(struct xenbus_device *dev,
+-			     enum xenbus_state frontend_state)
+-{
+-	struct backend_info *be = dev_get_drvdata(&dev->dev);
+-
+-	pr_debug("frontend state %s", xenbus_strstate(frontend_state));
+-
+-	be->frontend_state = frontend_state;
+-
+-	switch (frontend_state) {
+-	case XenbusStateInitialising:
+-		if (dev->state == XenbusStateClosed) {
+-			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
+-			       __func__, dev->nodename);
+-			xenbus_switch_state(dev, XenbusStateInitWait);
+-		}
+-		break;
+-
+-	case XenbusStateInitialised:
+-		break;
+-
+-	case XenbusStateConnected:
+-		if (dev->state == XenbusStateConnected)
+-			break;
+-		backend_create_netif(be);
+-		if (be->netif)
+-			connect(be);
+-		break;
+-
+-	case XenbusStateClosing:
+-		if (be->netif)
+-			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+-		disconnect_backend(dev);
+-		xenbus_switch_state(dev, XenbusStateClosing);
+-		break;
+-
+-	case XenbusStateClosed:
+-		xenbus_switch_state(dev, XenbusStateClosed);
+-		if (xenbus_dev_is_online(dev))
+-			break;
+-		/* fall through if not online */
+-	case XenbusStateUnknown:
+-		device_unregister(&dev->dev);
+-		break;
+-
+-	default:
+-		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
+-				 frontend_state);
+-		break;
+-	}
+-}
+-
+-
+-static void xen_net_read_rate(struct xenbus_device *dev,
+-			      unsigned long *bytes, unsigned long *usec)
+-{
+-	char *s, *e;
+-	unsigned long b, u;
+-	char *ratestr;
+-
+-	/* Default to unlimited bandwidth. */
+-	*bytes = ~0UL;
+-	*usec = 0;
+-
+-	ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
+-	if (IS_ERR(ratestr))
+-		return;
+-
+-	s = ratestr;
+-	b = simple_strtoul(s, &e, 10);
+-	if ((s == e) || (*e != ','))
+-		goto fail;
+-
+-	s = e + 1;
+-	u = simple_strtoul(s, &e, 10);
+-	if ((s == e) || (*e != '\0'))
+-		goto fail;
+-
+-	*bytes = b;
+-	*usec = u;
+-
+-	kfree(ratestr);
+-	return;
+-
+- fail:
+-	pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
+-	kfree(ratestr);
+-}
+-
+-static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+-{
+-	char *s, *e, *macstr;
+-	int i;
+-
+-	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
+-	if (IS_ERR(macstr))
+-		return PTR_ERR(macstr);
+-
+-	for (i = 0; i < ETH_ALEN; i++) {
+-		mac[i] = simple_strtoul(s, &e, 16);
+-		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+-			kfree(macstr);
+-			return -ENOENT;
+-		}
+-		s = e+1;
+-	}
+-
+-	kfree(macstr);
+-	return 0;
+-}
+-
+-static void unregister_hotplug_status_watch(struct backend_info *be)
+-{
+-	if (be->have_hotplug_status_watch) {
+-		unregister_xenbus_watch(&be->hotplug_status_watch);
+-		kfree(be->hotplug_status_watch.node);
+-	}
+-	be->have_hotplug_status_watch = 0;
+-}
+-
+-static void hotplug_status_changed(struct xenbus_watch *watch,
+-				   const char **vec,
+-				   unsigned int vec_size)
+-{
+-	struct backend_info *be = container_of(watch,
+-					       struct backend_info,
+-					       hotplug_status_watch);
+-	char *str;
+-	unsigned int len;
+-
+-	str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
+-	if (IS_ERR(str))
+-		return;
+-	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
+-		xenbus_switch_state(be->dev, XenbusStateConnected);
+-		/* Not interested in this watch anymore. */
+-		unregister_hotplug_status_watch(be);
+-	}
+-	kfree(str);
+-}
+-
+-static void connect(struct backend_info *be)
+-{
+-	int err;
+-	struct xenbus_device *dev = be->dev;
+-
+-	err = connect_rings(be);
+-	if (err)
+-		return;
+-
+-	err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
+-	if (err) {
+-		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
+-		return;
+-	}
+-
+-	xen_net_read_rate(dev, &be->netif->credit_bytes,
+-			  &be->netif->credit_usec);
+-	be->netif->remaining_credit = be->netif->credit_bytes;
+-
+-	unregister_hotplug_status_watch(be);
+-	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
+-				   hotplug_status_changed,
+-				   "%s/%s", dev->nodename, "hotplug-status");
+-	if (err) {
+-		/* Switch now, since we can't do a watch. */
+-		xenbus_switch_state(dev, XenbusStateConnected);
+-	} else {
+-		be->have_hotplug_status_watch = 1;
+-	}
+-
+-	netif_wake_queue(be->netif->dev);
+-}
+-
+-
+-static int connect_rings(struct backend_info *be)
+-{
+-	struct xen_netif *netif = be->netif;
+-	struct xenbus_device *dev = be->dev;
+-	unsigned long tx_ring_ref, rx_ring_ref;
+-	unsigned int evtchn, rx_copy;
+-	int err;
+-	int val;
+-
+-	err = xenbus_gather(XBT_NIL, dev->otherend,
+-			    "tx-ring-ref", "%lu", &tx_ring_ref,
+-			    "rx-ring-ref", "%lu", &rx_ring_ref,
+-			    "event-channel", "%u", &evtchn, NULL);
+-	if (err) {
+-		xenbus_dev_fatal(dev, err,
+-				 "reading %s/ring-ref and event-channel",
+-				 dev->otherend);
+-		return err;
+-	}
+-
+-	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
+-			   &rx_copy);
+-	if (err == -ENOENT) {
+-		err = 0;
+-		rx_copy = 0;
+-	}
+-	if (err < 0) {
+-		xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
+-				 dev->otherend);
+-		return err;
+-	}
+-	if (!rx_copy)
+-		return -EOPNOTSUPP;
+-
+-	if (netif->dev->tx_queue_len != 0) {
+-		if (xenbus_scanf(XBT_NIL, dev->otherend,
+-				 "feature-rx-notify", "%d", &val) < 0)
+-			val = 0;
+-		if (val)
+-			netif->can_queue = 1;
+-		else
+-			/* Must be non-zero for pfifo_fast to work. */
+-			netif->dev->tx_queue_len = 1;
+-	}
+-
+-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
+-			 "%d", &val) < 0)
+-		val = 0;
+-	netif->can_sg = !!val;
+-
+-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
+-			 "%d", &val) < 0)
+-		val = 0;
+-	netif->gso = !!val;
+-
+-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
+-			 "%d", &val) < 0)
+-		val = 0;
+-	netif->gso_prefix = !!val;
+-
+-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
+-			 "%d", &val) < 0)
+-		val = 0;
+-	netif->csum = !val;
+-
+-	/* Set dev->features */
+-	netif_set_features(netif);
+-
+-	/* Map the shared frame, irq etc. */
+-	err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
+-	if (err) {
+-		xenbus_dev_fatal(dev, err,
+-				 "mapping shared-frames %lu/%lu port %u",
+-				 tx_ring_ref, rx_ring_ref, evtchn);
+-		return err;
+-	}
+-	return 0;
+-}
+-
+-
+-/* ** Driver Registration ** */
+-
+-
+-static const struct xenbus_device_id netback_ids[] = {
+-	{ "vif" },
+-	{ "" }
+-};
+-
+-
+-static struct xenbus_driver netback = {
+-	.name = "vif",
+-	.owner = THIS_MODULE,
+-	.ids = netback_ids,
+-	.probe = netback_probe,
+-	.remove = netback_remove,
+-	.uevent = netback_uevent,
+-	.otherend_changed = frontend_changed,
+-};
+-
+-
+-int netif_xenbus_init(void)
+-{
+-	printk(KERN_CRIT "registering netback\n");
+-	return xenbus_register_backend(&netback);
+-}
+-- 
+1.7.3.4
+
+
+From 01d93054b9f5223c8ed9a3c11ea5a89ce7db442c Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Sat, 11 Dec 2010 10:15:50 +0000
+Subject: [PATCH 123/139] xen: netback: Make dependency on PageForeign conditional
+
+When PageForeign is not available we fallback to a copying TX mode.
+
+All uses of PageForeign are now gated with HAVE_XEN_PAGE_FOREIGN, this should
+allow for easier removal of the dependency for upstream, e.g. using unifdef.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/net/xen-netback/common.h    |   21 +++
+ drivers/net/xen-netback/interface.c |    4 +
+ drivers/net/xen-netback/netback.c   |  272 +++++++++++++++++++++++++++++-----
+ drivers/net/xen-netback/xenbus.c    |    2 +
+ 4 files changed, 259 insertions(+), 40 deletions(-)
+
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+index 079e1de..f45bac8 100644
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -93,8 +93,10 @@ struct xen_netif {
+ 	unsigned long   remaining_credit;
+ 	struct timer_list credit_timeout;
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	/* Statistics */
+ 	int nr_copied_skbs;
++#endif
+ 
+ 	/* Miscellaneous private stuff. */
+ 	struct list_head list;  /* scheduling list */
+@@ -117,6 +119,7 @@ struct xen_netif {
+ #define netback_carrier_off(netif)	((netif)->carrier = 0)
+ #define netback_carrier_ok(netif)	((netif)->carrier)
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ enum {
+ 	NETBK_DONT_COPY_SKB,
+ 	NETBK_DELAYED_COPY_SKB,
+@@ -124,6 +127,7 @@ enum {
+ };
+ 
+ extern int netbk_copy_skb_mode;
++#endif
+ 
+ struct backend_info {
+ 	struct xenbus_device *dev;
+@@ -191,10 +195,12 @@ struct netbk_rx_meta {
+ 	int gso_size;
+ };
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ struct netbk_tx_pending_inuse {
+ 	struct list_head list;
+ 	unsigned long alloc_time;
+ };
++#endif
+ 
+ #define MAX_PENDING_REQS 256
+ 
+@@ -232,16 +238,24 @@ struct xen_netbk {
+ 	struct sk_buff_head tx_queue;
+ 
+ 	struct timer_list net_timer;
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	struct timer_list netbk_tx_pending_timer;
++#endif
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	struct page **mmap_pages;
++#else
++	struct page *mmap_pages[MAX_PENDING_REQS];
++#endif
+ 
+ 	pending_ring_idx_t pending_prod;
+ 	pending_ring_idx_t pending_cons;
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	pending_ring_idx_t dealloc_prod;
+ 	pending_ring_idx_t dealloc_cons;
+ 
+ 	struct list_head pending_inuse_head;
++#endif
+ 	struct list_head net_schedule_list;
+ 
+ 	/* Protect the net_schedule_list in netif. */
+@@ -250,13 +264,20 @@ struct xen_netbk {
+ 	atomic_t netfront_count;
+ 
+ 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+ 	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+ 	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+ 
+ 	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
++#else
++	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
++#endif
++
+ 	u16 pending_ring[MAX_PENDING_REQS];
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	u16 dealloc_ring[MAX_PENDING_REQS];
++#endif
+ 
+ 	/*
+ 	 * Each head or fragment can be up to 4096 bytes. Given
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+index c36db26..3ff3aff 100644
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -186,6 +186,7 @@ static void netbk_get_drvinfo(struct net_device *dev,
+ 	strcpy(info->bus_info, dev_name(dev->dev.parent));
+ }
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ static const struct netif_stat {
+ 	char name[ETH_GSTRING_LEN];
+ 	u16 offset;
+@@ -225,6 +226,7 @@ static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
+ 		break;
+ 	}
+ }
++#endif
+ 
+ static struct ethtool_ops network_ethtool_ops = {
+ 	.get_drvinfo = netbk_get_drvinfo,
+@@ -237,9 +239,11 @@ static struct ethtool_ops network_ethtool_ops = {
+ 	.set_tso = netbk_set_tso,
+ 	.get_link = ethtool_op_get_link,
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	.get_sset_count = netbk_get_sset_count,
+ 	.get_ethtool_stats = netbk_get_ethtool_stats,
+ 	.get_strings = netbk_get_strings,
++#endif
+ };
+ 
+ static struct net_device_ops netback_ops = {
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+index e0ca232..6a1aa5c 100644
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -40,7 +40,9 @@
+ 
+ #include <net/tcp.h>
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ #include <xen/balloon.h>
++#endif
+ #include <xen/events.h>
+ #include <xen/interface/memory.h>
+ 
+@@ -80,9 +82,10 @@ static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
+ }
+ 
+ /* extra field used in struct page */
+-static inline void netif_set_page_ext(struct page *pg,
+-				      unsigned int group, unsigned int idx)
++static inline void netif_set_page_ext(struct page *pg, struct xen_netbk *netbk,
++		unsigned int idx)
+ {
++	unsigned int group = netbk - xen_netbk;
+ 	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
+ 
+ 	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
+@@ -96,8 +99,10 @@ static int netif_get_page_ext(struct page *pg,
+ 	struct xen_netbk *netbk;
+ 	unsigned int group, idx;
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	if (!PageForeign(pg))
+ 		return 0;
++#endif
+ 
+ 	group = ext.e.group - 1;
+ 
+@@ -106,8 +111,10 @@ static int netif_get_page_ext(struct page *pg,
+ 
+ 	netbk = &xen_netbk[group];
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	if (netbk->mmap_pages == NULL)
+ 		return 0;
++#endif
+ 
+ 	idx = ext.e.idx;
+ 
+@@ -144,12 +151,14 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
+ 		netbk->pending_prod + netbk->pending_cons;
+ }
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ /* Setting this allows the safe use of this driver without netloop. */
+ static int MODPARM_copy_skb = 1;
+ module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
+ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+ 
+ int netbk_copy_skb_mode;
++#endif
+ 
+ static int MODPARM_netback_kthread;
+ module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
+@@ -779,11 +788,13 @@ static void net_alarm(unsigned long data)
+ 	xen_netbk_bh_handler(netbk, 1);
+ }
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ static void netbk_tx_pending_timeout(unsigned long data)
+ {
+ 	struct xen_netbk *netbk = (struct xen_netbk *)data;
+ 	xen_netbk_bh_handler(netbk, 0);
+ }
++#endif
+ 
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+ {
+@@ -892,6 +903,7 @@ static void tx_credit_callback(unsigned long data)
+ 	netif_schedule_work(netif);
+ }
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ static inline int copy_pending_req(struct xen_netbk *netbk,
+ 				   pending_ring_idx_t pending_idx)
+ {
+@@ -899,7 +911,9 @@ static inline int copy_pending_req(struct xen_netbk *netbk,
+ 			netbk->grant_tx_handle[pending_idx],
+ 			&netbk->mmap_pages[pending_idx]);
+ }
++#endif
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+ {
+ 	struct netbk_tx_pending_inuse *inuse, *n;
+@@ -1004,6 +1018,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+ 		list_del_init(&inuse->list);
+ 	}
+ }
++#endif
+ 
+ static void netbk_tx_err(struct xen_netif *netif,
+ 		struct xen_netif_tx_request *txp, RING_IDX end)
+@@ -1034,19 +1049,19 @@ static int netbk_count_requests(struct xen_netif *netif,
+ 
+ 	do {
+ 		if (frags >= work_to_do) {
+-			DPRINTK("Need more frags\n");
++			printk(KERN_CRIT "Need more frags\n");
+ 			return -frags;
+ 		}
+ 
+ 		if (unlikely(frags >= MAX_SKB_FRAGS)) {
+-			DPRINTK("Too many frags\n");
++			printk(KERN_CRIT "Too many frags\n");
+ 			return -frags;
+ 		}
+ 
+ 		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
+ 		       sizeof(*txp));
+ 		if (txp->size > first->size) {
+-			DPRINTK("Frags galore\n");
++			printk(KERN_CRIT "Frags galore\n");
+ 			return -frags;
+ 		}
+ 
+@@ -1054,20 +1069,42 @@ static int netbk_count_requests(struct xen_netif *netif,
+ 		frags++;
+ 
+ 		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
+-			DPRINTK("txp->offset: %x, size: %u\n",
++			printk(KERN_CRIT "txp->offset: %x, size: %u\n",
+ 				txp->offset, txp->size);
+ 			return -frags;
+ 		}
+ 	} while ((txp++)->flags & NETTXF_more_data);
+-
+ 	return frags;
+ }
+ 
++#ifndef HAVE_XEN_PAGE_FOREIGN
++static struct page *netif_alloc_page(struct xen_netbk *netbk,
++				     struct sk_buff *skb,
++				     unsigned long pending_idx)
++{
++	struct page *page;
++	page = alloc_page(GFP_KERNEL|__GFP_COLD);
++	if (!page)
++		return NULL;
++	netif_set_page_ext(page, netbk, pending_idx);
++	netbk->mmap_pages[pending_idx] = page;
++	return page;
++}
++#endif
++
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+ 						       struct xen_netif *netif,
+ 						       struct sk_buff *skb,
+ 						       struct xen_netif_tx_request *txp,
+-						       struct gnttab_map_grant_ref *mop)
++						       struct gnttab_map_grant_ref *gop)
++#else
++static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk,
++					      struct xen_netif *netif,
++					      struct sk_buff *skb,
++					      struct xen_netif_tx_request *txp,
++					      struct gnttab_copy *gop)
++#endif
+ {
+ 	struct skb_shared_info *shinfo = skb_shinfo(skb);
+ 	skb_frag_t *frags = shinfo->frags;
+@@ -1078,16 +1115,39 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+ 	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+ 
+ 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
++#ifndef HAVE_XEN_PAGE_FOREIGN
++		struct page *page;
++#endif
+ 		pending_ring_idx_t index;
+ 		struct pending_tx_info *pending_tx_info =
+ 			netbk->pending_tx_info;
+ 
+ 		index = pending_index(netbk->pending_cons++);
+ 		pending_idx = netbk->pending_ring[index];
+-
+-		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
++#ifdef HAVE_XEN_PAGE_FOREIGN
++		gnttab_set_map_op(gop++, idx_to_kaddr(netbk, pending_idx),
+ 				  GNTMAP_host_map | GNTMAP_readonly,
+ 				  txp->gref, netif->domid);
++#else
++		page = netif_alloc_page(netbk, skb, pending_idx);
++		if (!page)
++			return NULL;
++
++		netbk->mmap_pages[pending_idx] = page;
++
++		gop->source.u.ref = txp->gref;
++		gop->source.domid = netif->domid;
++		gop->source.offset = txp->offset;
++
++		gop->dest.u.gmfn = virt_to_mfn(page_address(page));
++		gop->dest.domid = DOMID_SELF;
++		gop->dest.offset = txp->offset;
++
++		gop->len = txp->size;
++		gop->flags = GNTCOPY_source_gref;
++
++		gop++;
++#endif
+ 
+ 		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+ 		netif_get(netif);
+@@ -1095,14 +1155,24 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+ 		frags[i].page = (void *)pending_idx;
+ 	}
+ 
+-	return mop;
++	return gop;
+ }
+ 
+-static int netbk_tx_check_mop(struct xen_netbk *netbk,
++#ifdef HAVE_XEN_PAGE_FOREIGN
++static int netbk_tx_check_gop(struct xen_netbk *netbk,
+ 			      struct sk_buff *skb,
+-			      struct gnttab_map_grant_ref **mopp)
++			      struct gnttab_map_grant_ref **gopp)
++#else
++static int netbk_tx_check_gop(struct xen_netbk *netbk,
++			      struct sk_buff *skb,
++			      struct gnttab_copy **gopp)
++#endif
+ {
+-	struct gnttab_map_grant_ref *mop = *mopp;
++#ifdef HAVE_XEN_PAGE_FOREIGN
++	struct gnttab_map_grant_ref *gop = *gopp;
++#else
++	struct gnttab_copy *gop = *gopp;
++#endif
+ 	int pending_idx = *((u16 *)skb->data);
+ 	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
+ 	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
+@@ -1112,7 +1182,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
+ 	int i, err, start;
+ 
+ 	/* Check status of header. */
+-	err = mop->status;
++	err = gop->status;
+ 	if (unlikely(err)) {
+ 		pending_ring_idx_t index;
+ 		index = pending_index(netbk->pending_prod++);
+@@ -1120,11 +1190,13 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
+ 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+ 		netbk->pending_ring[index] = pending_idx;
+ 		netif_put(netif);
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	} else {
+ 		set_phys_to_machine(
+ 			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
+-			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+-		netbk->grant_tx_handle[pending_idx] = mop->handle;
++			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
++		netbk->grant_tx_handle[pending_idx] = gop->handle;
++#endif
+ 	}
+ 
+ 	/* Skip first skb fragment if it is on same page as header fragment. */
+@@ -1137,14 +1209,16 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
+ 		pending_idx = (unsigned long)shinfo->frags[i].page;
+ 
+ 		/* Check error status: if okay then remember grant handle. */
+-		newerr = (++mop)->status;
++		newerr = (++gop)->status;
+ 		if (likely(!newerr)) {
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 			unsigned long addr;
+ 			addr = idx_to_kaddr(netbk, pending_idx);
+ 			set_phys_to_machine(
+ 				__pa(addr)>>PAGE_SHIFT,
+-				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+-			netbk->grant_tx_handle[pending_idx] = mop->handle;
++				FOREIGN_FRAME(gop->dev_bus_addr>>PAGE_SHIFT));
++			netbk->grant_tx_handle[pending_idx] = gop->handle;
++#endif
+ 			/* Had a previous error? Invalidate this fragment. */
+ 			if (unlikely(err))
+ 				netif_idx_release(netbk, pending_idx);
+@@ -1174,7 +1248,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
+ 		err = newerr;
+ 	}
+ 
+-	*mopp = mop + 1;
++	*gopp = gop + 1;
+ 	return err;
+ }
+ 
+@@ -1190,10 +1264,11 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+ 		unsigned long pending_idx;
+ 
+ 		pending_idx = (unsigned long)frag->page;
+-
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
+ 		list_add_tail(&netbk->pending_inuse[pending_idx].list,
+ 			      &netbk->pending_inuse_head);
++#endif
+ 
+ 		txp = &netbk->pending_tx_info[pending_idx].req;
+ 		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
+@@ -1203,6 +1278,10 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+ 		skb->len += txp->size;
+ 		skb->data_len += txp->size;
+ 		skb->truesize += txp->size;
++
++		/* Take an extra reference to offset netif_idx_release */
++		get_page(netbk->mmap_pages[pending_idx]);
++		netif_idx_release(netbk, pending_idx);
+ 	}
+ }
+ 
+@@ -1330,18 +1409,24 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
+ 	return false;
+ }
+ 
+-static unsigned net_tx_build_mops(struct xen_netbk *netbk)
++static unsigned net_tx_build_gops(struct xen_netbk *netbk)
+ {
+-	struct gnttab_map_grant_ref *mop;
++#ifdef HAVE_XEN_PAGE_FOREIGN
++	struct gnttab_map_grant_ref *gop = netbk->tx_map_ops, *request_gop;
++#else
++	struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
++#endif
+ 	struct sk_buff *skb;
+ 	int ret;
+ 
+-	mop = netbk->tx_map_ops;
+ 	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ 		!list_empty(&netbk->net_schedule_list)) {
+ 		struct xen_netif *netif;
+ 		struct xen_netif_tx_request txreq;
+ 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
++#ifndef HAVE_XEN_PAGE_FOREIGN
++		struct page *page;
++#endif
+ 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
+ 		u16 pending_idx;
+ 		RING_IDX idx;
+@@ -1438,10 +1523,35 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ 			}
+ 		}
+ 
+-		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
++#ifdef HAVE_XEN_PAGE_FOREIGN
++		gnttab_set_map_op(gop, idx_to_kaddr(netbk, pending_idx),
+ 				  GNTMAP_host_map | GNTMAP_readonly,
+ 				  txreq.gref, netif->domid);
+-		mop++;
++		gop++;
++#else
++		/* XXX could copy straight to head */
++		page = netif_alloc_page(netbk, skb, pending_idx);
++		if (!page) {
++			kfree_skb(skb);
++			netbk_tx_err(netif, &txreq, idx);
++			continue;
++		}
++
++		netbk->mmap_pages[pending_idx] = page;
++
++		gop->source.u.ref = txreq.gref;
++		gop->source.domid = netif->domid;
++		gop->source.offset = txreq.offset;
++
++		gop->dest.u.gmfn = virt_to_mfn(page_address(page));
++		gop->dest.domid = DOMID_SELF;
++		gop->dest.offset = txreq.offset;
++
++		gop->len = txreq.size;
++		gop->flags = GNTCOPY_source_gref;
++
++		gop++;
++#endif
+ 
+ 		memcpy(&netbk->pending_tx_info[pending_idx].req,
+ 		       &txreq, sizeof(txreq));
+@@ -1464,24 +1574,43 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ 
+ 		netbk->pending_cons++;
+ 
+-		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
++		request_gop = netbk_get_requests(netbk, netif,
++						 skb, txfrags, gop);
++		if (request_gop == NULL) {
++			kfree_skb(skb);
++			netbk_tx_err(netif, &txreq, idx);
++			continue;
++		}
++		gop = request_gop;
+ 
+ 		netif->tx.req_cons = idx;
+ 		netif_schedule_work(netif);
+ 
+-		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
++#ifdef HAVE_XEN_PAGE_FOREIGN
++		if ((gop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
+ 			break;
++#else
++		if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
++			break;
++#endif
+ 	}
+ 
+-	return mop - netbk->tx_map_ops;
++#ifdef HAVE_XEN_PAGE_FOREIGN
++	return gop - netbk->tx_map_ops;
++#else
++	return gop - netbk->tx_copy_ops;
++#endif
+ }
+ 
+ static void net_tx_submit(struct xen_netbk *netbk)
+ {
+-	struct gnttab_map_grant_ref *mop;
++#ifdef HAVE_XEN_PAGE_FOREIGN
++	struct gnttab_map_grant_ref *gop = netbk->tx_map_ops;
++#else
++	struct gnttab_copy *gop = netbk->tx_copy_ops;
++#endif
+ 	struct sk_buff *skb;
+ 
+-	mop = netbk->tx_map_ops;
+ 	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
+ 		struct xen_netif_tx_request *txp;
+ 		struct xen_netif *netif;
+@@ -1493,7 +1622,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ 		txp = &netbk->pending_tx_info[pending_idx].req;
+ 
+ 		/* Check the remap error code. */
+-		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
++		if (unlikely(netbk_tx_check_gop(netbk, skb, &gop))) {
+ 			pr_debug("netback grant failed.\n");
+ 			skb_shinfo(skb)->nr_frags = 0;
+ 			kfree_skb(skb);
+@@ -1545,12 +1674,14 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ 			continue;
+ 		}
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+ 		    unlikely(skb_linearize(skb))) {
+ 			DPRINTK("Can't linearize skb in net_tx_action.\n");
+ 			kfree_skb(skb);
+ 			continue;
+ 		}
++#endif
+ 
+ 		netif->stats.rx_bytes += skb->len;
+ 		netif->stats.rx_packets++;
+@@ -1564,21 +1695,31 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ static void net_tx_action(unsigned long data)
+ {
+ 	struct xen_netbk *netbk = (struct xen_netbk *)data;
+-	unsigned nr_mops;
++	unsigned nr_gops;
+ 	int ret;
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	net_tx_action_dealloc(netbk);
++#endif
+ 
+-	nr_mops = net_tx_build_mops(netbk);
++	nr_gops = net_tx_build_gops(netbk);
+ 
+-	if (nr_mops == 0)
++#ifdef HAVE_XEN_PAGE_FOREIGN
++	if (nr_gops == 0)
+ 		goto out;
+-
+ 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+-					netbk->tx_map_ops, nr_mops);
++					netbk->tx_map_ops, nr_gops);
++#else
++	if (nr_gops == 0)
++		return;
++	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
++					netbk->tx_copy_ops, nr_gops);
++#endif
+ 	BUG_ON(ret);
+ 
+ 	net_tx_submit(netbk);
++
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ out:
+ 	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+ 	    !list_empty(&netbk->pending_inuse_head)) {
+@@ -1589,8 +1730,10 @@ out:
+ 		mod_timer(&netbk->netbk_tx_pending_timer,
+ 				oldest->alloc_time + HZ);
+ 	}
++#endif
+ }
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+ {
+ 	static DEFINE_SPINLOCK(_lock);
+@@ -1618,6 +1761,34 @@ static void netif_page_release(struct page *page, unsigned int order)
+ 
+ 	netif_idx_release(&xen_netbk[group], idx);
+ }
++#else
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
++{
++	struct xen_netif *netif;
++	struct pending_tx_info *pending_tx_info;
++	pending_ring_idx_t index;
++
++	/* Already complete? */
++	if (netbk->mmap_pages[pending_idx] == NULL)
++		return;
++
++	pending_tx_info = &netbk->pending_tx_info[pending_idx];
++
++	netif = pending_tx_info->netif;
++
++	make_tx_response(netif, &pending_tx_info->req,
++			 NETIF_RSP_OKAY);
++
++	index = pending_index(netbk->pending_prod++);
++	netbk->pending_ring[index] = pending_idx;
++
++	netif_put(netif);
++
++	netbk->mmap_pages[pending_idx]->mapping = 0;
++	put_page(netbk->mmap_pages[pending_idx]);
++	netbk->mmap_pages[pending_idx] = NULL;
++}
++#endif
+ 
+ irqreturn_t netif_be_int(int irq, void *dev_id)
+ {
+@@ -1735,12 +1906,14 @@ static inline int rx_work_todo(struct xen_netbk *netbk)
+ 
+ static inline int tx_work_todo(struct xen_netbk *netbk)
+ {
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	if (netbk->dealloc_cons != netbk->dealloc_prod)
+ 		return 1;
+ 
+ 	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+ 	    !list_empty(&netbk->pending_inuse_head))
+ 		return 1;
++#endif
+ 
+ 	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ 			!list_empty(&netbk->net_schedule_list))
+@@ -1775,7 +1948,9 @@ static int netbk_action_thread(void *data)
+ static int __init netback_init(void)
+ {
+ 	int i;
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	struct page *page;
++#endif
+ 	int rc = 0;
+ 	int group;
+ 
+@@ -1799,11 +1974,14 @@ static int __init netback_init(void)
+ 		netbk->net_timer.data = (unsigned long)netbk;
+ 		netbk->net_timer.function = net_alarm;
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 		init_timer(&netbk->netbk_tx_pending_timer);
+ 		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
+ 		netbk->netbk_tx_pending_timer.function =
+ 			netbk_tx_pending_timeout;
++#endif
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 		netbk->mmap_pages =
+ 			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+ 		if (!netbk->mmap_pages) {
+@@ -1817,9 +1995,10 @@ static int __init netback_init(void)
+ 		for (i = 0; i < MAX_PENDING_REQS; i++) {
+ 			page = netbk->mmap_pages[i];
+ 			SetPageForeign(page, netif_page_release);
+-			netif_set_page_ext(page, group, i);
++			netif_set_page_ext(page, netbk, i);
+ 			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+ 		}
++#endif
+ 
+ 		netbk->pending_cons = 0;
+ 		netbk->pending_prod = MAX_PENDING_REQS;
+@@ -1838,9 +2017,11 @@ static int __init netback_init(void)
+ 			} else {
+ 				printk(KERN_ALERT
+ 					"kthread_run() fails at netback\n");
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 				free_empty_pages_and_pagevec(netbk->mmap_pages,
+ 						MAX_PENDING_REQS);
+ 				del_timer(&netbk->netbk_tx_pending_timer);
++#endif
+ 				del_timer(&netbk->net_timer);
+ 				rc = PTR_ERR(netbk->kthread.task);
+ 				goto failed_init;
+@@ -1854,17 +2035,19 @@ static int __init netback_init(void)
+ 				     (unsigned long)netbk);
+ 		}
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 		INIT_LIST_HEAD(&netbk->pending_inuse_head);
++#endif
+ 		INIT_LIST_HEAD(&netbk->net_schedule_list);
+ 
+ 		spin_lock_init(&netbk->net_schedule_list_lock);
+ 
+ 		atomic_set(&netbk->netfront_count, 0);
+-
+ 		if (MODPARM_netback_kthread)
+ 			wake_up_process(netbk->kthread.task);
+ 	}
+ 
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+ 	if (MODPARM_copy_skb) {
+ 		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+@@ -1873,6 +2056,7 @@ static int __init netback_init(void)
+ 		else
+ 			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+ 	}
++#endif
+ 
+ 	rc = netif_xenbus_init();
+ 	if (rc)
+@@ -1892,9 +2076,17 @@ static int __init netback_init(void)
+ failed_init:
+ 	for (i = 0; i < group; i++) {
+ 		struct xen_netbk *netbk = &xen_netbk[i];
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 		free_empty_pages_and_pagevec(netbk->mmap_pages,
+ 				MAX_PENDING_REQS);
+ 		del_timer(&netbk->netbk_tx_pending_timer);
++#else
++		int j;
++		for (j = 0; j < MAX_PENDING_REQS; j++) {
++			if (netbk->mmap_pages[i])
++				__free_page(netbk->mmap_pages[i]);
++		}
++#endif
+ 		del_timer(&netbk->net_timer);
+ 		if (MODPARM_netback_kthread)
+ 			kthread_stop(netbk->kthread.task);
+diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
+index 640c696..f6bf50e 100644
+--- a/drivers/net/xen-netback/xenbus.c
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -64,8 +64,10 @@ static int netback_probe(struct xenbus_device *dev,
+ 	dev_set_drvdata(&dev->dev, be);
+ 
+ 	sg = 1;
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ 	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
+ 		sg = 0;
++#endif
+ 
+ 	do {
+ 		err = xenbus_transaction_start(&xbt);
+-- 
+1.7.3.4
+
+
+From 03ff29cd511480cae999d204ec068ee72075edcc Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 19 Jan 2011 09:43:44 +0000
+Subject: [PATCH 124/139] xen: netback: completely drop foreign page support
+
+for i in drivers/net/xen-netback/*.[ch] ; do
+	echo $i
+	./scripts/unifdef -B -UHAVE_XEN_PAGE_FOREIGN $i > $i.unifdef
+	mv $i.unifdef $i
+done
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/net/xen-netback/common.h    |   46 -----
+ drivers/net/xen-netback/interface.c |   47 -----
+ drivers/net/xen-netback/netback.c   |  345 -----------------------------------
+ drivers/net/xen-netback/xenbus.c    |    4 -
+ 4 files changed, 0 insertions(+), 442 deletions(-)
+
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+index f45bac8..2d55ed6 100644
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -93,11 +93,6 @@ struct xen_netif {
+ 	unsigned long   remaining_credit;
+ 	struct timer_list credit_timeout;
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	/* Statistics */
+-	int nr_copied_skbs;
+-#endif
+-
+ 	/* Miscellaneous private stuff. */
+ 	struct list_head list;  /* scheduling list */
+ 	atomic_t         refcnt;
+@@ -119,16 +114,6 @@ struct xen_netif {
+ #define netback_carrier_off(netif)	((netif)->carrier = 0)
+ #define netback_carrier_ok(netif)	((netif)->carrier)
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-enum {
+-	NETBK_DONT_COPY_SKB,
+-	NETBK_DELAYED_COPY_SKB,
+-	NETBK_ALWAYS_COPY_SKB,
+-};
+-
+-extern int netbk_copy_skb_mode;
+-#endif
+-
+ struct backend_info {
+ 	struct xenbus_device *dev;
+ 	struct xen_netif *netif;
+@@ -195,13 +180,6 @@ struct netbk_rx_meta {
+ 	int gso_size;
+ };
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-struct netbk_tx_pending_inuse {
+-	struct list_head list;
+-	unsigned long alloc_time;
+-};
+-#endif
+-
+ #define MAX_PENDING_REQS 256
+ 
+ #define MAX_BUFFER_OFFSET PAGE_SIZE
+@@ -238,24 +216,11 @@ struct xen_netbk {
+ 	struct sk_buff_head tx_queue;
+ 
+ 	struct timer_list net_timer;
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	struct timer_list netbk_tx_pending_timer;
+-#endif
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	struct page **mmap_pages;
+-#else
+ 	struct page *mmap_pages[MAX_PENDING_REQS];
+-#endif
+ 
+ 	pending_ring_idx_t pending_prod;
+ 	pending_ring_idx_t pending_cons;
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	pending_ring_idx_t dealloc_prod;
+-	pending_ring_idx_t dealloc_cons;
+-
+-	struct list_head pending_inuse_head;
+-#endif
+ 	struct list_head net_schedule_list;
+ 
+ 	/* Protect the net_schedule_list in netif. */
+@@ -264,20 +229,9 @@ struct xen_netbk {
+ 	atomic_t netfront_count;
+ 
+ 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+-	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+-	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+-
+-	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+-#else
+ 	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
+-#endif
+ 
+ 	u16 pending_ring[MAX_PENDING_REQS];
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	u16 dealloc_ring[MAX_PENDING_REQS];
+-#endif
+ 
+ 	/*
+ 	 * Each head or fragment can be up to 4096 bytes. Given
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+index 3ff3aff..eae5cf8 100644
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -186,48 +186,6 @@ static void netbk_get_drvinfo(struct net_device *dev,
+ 	strcpy(info->bus_info, dev_name(dev->dev.parent));
+ }
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static const struct netif_stat {
+-	char name[ETH_GSTRING_LEN];
+-	u16 offset;
+-} netbk_stats[] = {
+-	{ "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
+-};
+-
+-static int netbk_get_sset_count(struct net_device *dev, int string_set)
+-{
+-	switch (string_set) {
+-	case ETH_SS_STATS:
+-		return ARRAY_SIZE(netbk_stats);
+-	default:
+-		return -EINVAL;
+-	}
+-}
+-
+-static void netbk_get_ethtool_stats(struct net_device *dev,
+-				   struct ethtool_stats *stats, u64 * data)
+-{
+-	void *netif = netdev_priv(dev);
+-	int i;
+-
+-	for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+-		data[i] = *(int *)(netif + netbk_stats[i].offset);
+-}
+-
+-static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
+-{
+-	int i;
+-
+-	switch (stringset) {
+-	case ETH_SS_STATS:
+-		for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+-			memcpy(data + i * ETH_GSTRING_LEN,
+-			       netbk_stats[i].name, ETH_GSTRING_LEN);
+-		break;
+-	}
+-}
+-#endif
+-
+ static struct ethtool_ops network_ethtool_ops = {
+ 	.get_drvinfo = netbk_get_drvinfo,
+ 
+@@ -239,11 +197,6 @@ static struct ethtool_ops network_ethtool_ops = {
+ 	.set_tso = netbk_set_tso,
+ 	.get_link = ethtool_op_get_link,
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	.get_sset_count = netbk_get_sset_count,
+-	.get_ethtool_stats = netbk_get_ethtool_stats,
+-	.get_strings = netbk_get_strings,
+-#endif
+ };
+ 
+ static struct net_device_ops netback_ops = {
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+index 6a1aa5c..b7b9341 100644
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -40,9 +40,6 @@
+ 
+ #include <net/tcp.h>
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-#include <xen/balloon.h>
+-#endif
+ #include <xen/events.h>
+ #include <xen/interface/memory.h>
+ 
+@@ -99,11 +96,6 @@ static int netif_get_page_ext(struct page *pg,
+ 	struct xen_netbk *netbk;
+ 	unsigned int group, idx;
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	if (!PageForeign(pg))
+-		return 0;
+-#endif
+-
+ 	group = ext.e.group - 1;
+ 
+ 	if (group < 0 || group >= xen_netbk_group_nr)
+@@ -111,11 +103,6 @@ static int netif_get_page_ext(struct page *pg,
+ 
+ 	netbk = &xen_netbk[group];
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	if (netbk->mmap_pages == NULL)
+-		return 0;
+-#endif
+-
+ 	idx = ext.e.idx;
+ 
+ 	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
+@@ -151,15 +138,6 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
+ 		netbk->pending_prod + netbk->pending_cons;
+ }
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-/* Setting this allows the safe use of this driver without netloop. */
+-static int MODPARM_copy_skb = 1;
+-module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
+-MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+-
+-int netbk_copy_skb_mode;
+-#endif
+-
+ static int MODPARM_netback_kthread;
+ module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
+ MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
+@@ -788,14 +766,6 @@ static void net_alarm(unsigned long data)
+ 	xen_netbk_bh_handler(netbk, 1);
+ }
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static void netbk_tx_pending_timeout(unsigned long data)
+-{
+-	struct xen_netbk *netbk = (struct xen_netbk *)data;
+-	xen_netbk_bh_handler(netbk, 0);
+-}
+-#endif
+-
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+ {
+ 	struct xen_netif *netif = netdev_priv(dev);
+@@ -903,123 +873,6 @@ static void tx_credit_callback(unsigned long data)
+ 	netif_schedule_work(netif);
+ }
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static inline int copy_pending_req(struct xen_netbk *netbk,
+-				   pending_ring_idx_t pending_idx)
+-{
+-	return gnttab_copy_grant_page(
+-			netbk->grant_tx_handle[pending_idx],
+-			&netbk->mmap_pages[pending_idx]);
+-}
+-#endif
+-
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+-{
+-	struct netbk_tx_pending_inuse *inuse, *n;
+-	struct gnttab_unmap_grant_ref *gop;
+-	u16 pending_idx;
+-	pending_ring_idx_t dc, dp;
+-	struct xen_netif *netif;
+-	int ret;
+-	LIST_HEAD(list);
+-
+-	dc = netbk->dealloc_cons;
+-	gop = netbk->tx_unmap_ops;
+-
+-	/* Free up any grants we have finished using. */
+-	do {
+-		dp = netbk->dealloc_prod;
+-
+-		/* Ensure we see all indices enqueued by netif_idx_release(). */
+-		smp_rmb();
+-
+-		while (dc != dp) {
+-			unsigned long pfn;
+-			struct netbk_tx_pending_inuse *pending_inuse =
+-					netbk->pending_inuse;
+-
+-			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
+-			list_move_tail(&pending_inuse[pending_idx].list, &list);
+-
+-			pfn = idx_to_pfn(netbk, pending_idx);
+-			/* Already unmapped? */
+-			if (!phys_to_machine_mapping_valid(pfn))
+-				continue;
+-
+-			gnttab_set_unmap_op(gop,
+-					idx_to_kaddr(netbk, pending_idx),
+-					GNTMAP_host_map,
+-					netbk->grant_tx_handle[pending_idx]);
+-			gop++;
+-		}
+-
+-	} while (dp != netbk->dealloc_prod);
+-
+-	netbk->dealloc_cons = dc;
+-
+-	ret = HYPERVISOR_grant_table_op(
+-		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
+-		gop - netbk->tx_unmap_ops);
+-	BUG_ON(ret);
+-
+-	/*
+-	 * Copy any entries that have been pending for too long
+-	 */
+-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+-	    !list_empty(&netbk->pending_inuse_head)) {
+-		list_for_each_entry_safe(inuse, n,
+-				&netbk->pending_inuse_head, list) {
+-			struct pending_tx_info *pending_tx_info;
+-			pending_tx_info = netbk->pending_tx_info;
+-
+-			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+-				break;
+-
+-			pending_idx = inuse - netbk->pending_inuse;
+-
+-			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
+-
+-			switch (copy_pending_req(netbk, pending_idx)) {
+-			case 0:
+-				list_move_tail(&inuse->list, &list);
+-				continue;
+-			case -EBUSY:
+-				list_del_init(&inuse->list);
+-				continue;
+-			case -ENOENT:
+-				continue;
+-			}
+-
+-			break;
+-		}
+-	}
+-
+-	list_for_each_entry_safe(inuse, n, &list, list) {
+-		struct pending_tx_info *pending_tx_info;
+-		pending_ring_idx_t index;
+-
+-		pending_tx_info = netbk->pending_tx_info;
+-		pending_idx = inuse - netbk->pending_inuse;
+-
+-		netif = pending_tx_info[pending_idx].netif;
+-
+-		make_tx_response(netif, &pending_tx_info[pending_idx].req,
+-				 NETIF_RSP_OKAY);
+-
+-		/* Ready for next use. */
+-		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
+-
+-		index = pending_index(netbk->pending_prod++);
+-		netbk->pending_ring[index] = pending_idx;
+-
+-		netif_put(netif);
+-
+-		list_del_init(&inuse->list);
+-	}
+-}
+-#endif
+-
+ static void netbk_tx_err(struct xen_netif *netif,
+ 		struct xen_netif_tx_request *txp, RING_IDX end)
+ {
+@@ -1077,7 +930,6 @@ static int netbk_count_requests(struct xen_netif *netif,
+ 	return frags;
+ }
+ 
+-#ifndef HAVE_XEN_PAGE_FOREIGN
+ static struct page *netif_alloc_page(struct xen_netbk *netbk,
+ 				     struct sk_buff *skb,
+ 				     unsigned long pending_idx)
+@@ -1090,21 +942,12 @@ static struct page *netif_alloc_page(struct xen_netbk *netbk,
+ 	netbk->mmap_pages[pending_idx] = page;
+ 	return page;
+ }
+-#endif
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+-						       struct xen_netif *netif,
+-						       struct sk_buff *skb,
+-						       struct xen_netif_tx_request *txp,
+-						       struct gnttab_map_grant_ref *gop)
+-#else
+ static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk,
+ 					      struct xen_netif *netif,
+ 					      struct sk_buff *skb,
+ 					      struct xen_netif_tx_request *txp,
+ 					      struct gnttab_copy *gop)
+-#endif
+ {
+ 	struct skb_shared_info *shinfo = skb_shinfo(skb);
+ 	skb_frag_t *frags = shinfo->frags;
+@@ -1115,20 +958,13 @@ static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk,
+ 	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+ 
+ 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
+-#ifndef HAVE_XEN_PAGE_FOREIGN
+ 		struct page *page;
+-#endif
+ 		pending_ring_idx_t index;
+ 		struct pending_tx_info *pending_tx_info =
+ 			netbk->pending_tx_info;
+ 
+ 		index = pending_index(netbk->pending_cons++);
+ 		pending_idx = netbk->pending_ring[index];
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-		gnttab_set_map_op(gop++, idx_to_kaddr(netbk, pending_idx),
+-				  GNTMAP_host_map | GNTMAP_readonly,
+-				  txp->gref, netif->domid);
+-#else
+ 		page = netif_alloc_page(netbk, skb, pending_idx);
+ 		if (!page)
+ 			return NULL;
+@@ -1147,7 +983,6 @@ static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk,
+ 		gop->flags = GNTCOPY_source_gref;
+ 
+ 		gop++;
+-#endif
+ 
+ 		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+ 		netif_get(netif);
+@@ -1158,21 +993,11 @@ static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk,
+ 	return gop;
+ }
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static int netbk_tx_check_gop(struct xen_netbk *netbk,
+-			      struct sk_buff *skb,
+-			      struct gnttab_map_grant_ref **gopp)
+-#else
+ static int netbk_tx_check_gop(struct xen_netbk *netbk,
+ 			      struct sk_buff *skb,
+ 			      struct gnttab_copy **gopp)
+-#endif
+ {
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	struct gnttab_map_grant_ref *gop = *gopp;
+-#else
+ 	struct gnttab_copy *gop = *gopp;
+-#endif
+ 	int pending_idx = *((u16 *)skb->data);
+ 	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
+ 	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
+@@ -1190,13 +1015,6 @@ static int netbk_tx_check_gop(struct xen_netbk *netbk,
+ 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+ 		netbk->pending_ring[index] = pending_idx;
+ 		netif_put(netif);
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	} else {
+-		set_phys_to_machine(
+-			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
+-			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
+-		netbk->grant_tx_handle[pending_idx] = gop->handle;
+-#endif
+ 	}
+ 
+ 	/* Skip first skb fragment if it is on same page as header fragment. */
+@@ -1211,14 +1029,6 @@ static int netbk_tx_check_gop(struct xen_netbk *netbk,
+ 		/* Check error status: if okay then remember grant handle. */
+ 		newerr = (++gop)->status;
+ 		if (likely(!newerr)) {
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-			unsigned long addr;
+-			addr = idx_to_kaddr(netbk, pending_idx);
+-			set_phys_to_machine(
+-				__pa(addr)>>PAGE_SHIFT,
+-				FOREIGN_FRAME(gop->dev_bus_addr>>PAGE_SHIFT));
+-			netbk->grant_tx_handle[pending_idx] = gop->handle;
+-#endif
+ 			/* Had a previous error? Invalidate this fragment. */
+ 			if (unlikely(err))
+ 				netif_idx_release(netbk, pending_idx);
+@@ -1264,11 +1074,6 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+ 		unsigned long pending_idx;
+ 
+ 		pending_idx = (unsigned long)frag->page;
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
+-		list_add_tail(&netbk->pending_inuse[pending_idx].list,
+-			      &netbk->pending_inuse_head);
+-#endif
+ 
+ 		txp = &netbk->pending_tx_info[pending_idx].req;
+ 		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
+@@ -1411,11 +1216,7 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
+ 
+ static unsigned net_tx_build_gops(struct xen_netbk *netbk)
+ {
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	struct gnttab_map_grant_ref *gop = netbk->tx_map_ops, *request_gop;
+-#else
+ 	struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
+-#endif
+ 	struct sk_buff *skb;
+ 	int ret;
+ 
+@@ -1424,9 +1225,7 @@ static unsigned net_tx_build_gops(struct xen_netbk *netbk)
+ 		struct xen_netif *netif;
+ 		struct xen_netif_tx_request txreq;
+ 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+-#ifndef HAVE_XEN_PAGE_FOREIGN
+ 		struct page *page;
+-#endif
+ 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
+ 		u16 pending_idx;
+ 		RING_IDX idx;
+@@ -1523,12 +1322,6 @@ static unsigned net_tx_build_gops(struct xen_netbk *netbk)
+ 			}
+ 		}
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-		gnttab_set_map_op(gop, idx_to_kaddr(netbk, pending_idx),
+-				  GNTMAP_host_map | GNTMAP_readonly,
+-				  txreq.gref, netif->domid);
+-		gop++;
+-#else
+ 		/* XXX could copy straight to head */
+ 		page = netif_alloc_page(netbk, skb, pending_idx);
+ 		if (!page) {
+@@ -1551,7 +1344,6 @@ static unsigned net_tx_build_gops(struct xen_netbk *netbk)
+ 		gop->flags = GNTCOPY_source_gref;
+ 
+ 		gop++;
+-#endif
+ 
+ 		memcpy(&netbk->pending_tx_info[pending_idx].req,
+ 		       &txreq, sizeof(txreq));
+@@ -1586,29 +1378,16 @@ static unsigned net_tx_build_gops(struct xen_netbk *netbk)
+ 		netif->tx.req_cons = idx;
+ 		netif_schedule_work(netif);
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-		if ((gop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
+-			break;
+-#else
+ 		if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
+ 			break;
+-#endif
+ 	}
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	return gop - netbk->tx_map_ops;
+-#else
+ 	return gop - netbk->tx_copy_ops;
+-#endif
+ }
+ 
+ static void net_tx_submit(struct xen_netbk *netbk)
+ {
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	struct gnttab_map_grant_ref *gop = netbk->tx_map_ops;
+-#else
+ 	struct gnttab_copy *gop = netbk->tx_copy_ops;
+-#endif
+ 	struct sk_buff *skb;
+ 
+ 	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
+@@ -1674,15 +1453,6 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ 			continue;
+ 		}
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+-		    unlikely(skb_linearize(skb))) {
+-			DPRINTK("Can't linearize skb in net_tx_action.\n");
+-			kfree_skb(skb);
+-			continue;
+-		}
+-#endif
+-
+ 		netif->stats.rx_bytes += skb->len;
+ 		netif->stats.rx_packets++;
+ 
+@@ -1698,70 +1468,18 @@ static void net_tx_action(unsigned long data)
+ 	unsigned nr_gops;
+ 	int ret;
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	net_tx_action_dealloc(netbk);
+-#endif
+-
+ 	nr_gops = net_tx_build_gops(netbk);
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	if (nr_gops == 0)
+-		goto out;
+-	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+-					netbk->tx_map_ops, nr_gops);
+-#else
+ 	if (nr_gops == 0)
+ 		return;
+ 	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
+ 					netbk->tx_copy_ops, nr_gops);
+-#endif
+ 	BUG_ON(ret);
+ 
+ 	net_tx_submit(netbk);
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-out:
+-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+-	    !list_empty(&netbk->pending_inuse_head)) {
+-		struct netbk_tx_pending_inuse *oldest;
+-
+-		oldest = list_entry(netbk->pending_inuse_head.next,
+-				    struct netbk_tx_pending_inuse, list);
+-		mod_timer(&netbk->netbk_tx_pending_timer,
+-				oldest->alloc_time + HZ);
+-	}
+-#endif
+ }
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+-{
+-	static DEFINE_SPINLOCK(_lock);
+-	unsigned long flags;
+-	pending_ring_idx_t index;
+-
+-	spin_lock_irqsave(&_lock, flags);
+-	index = pending_index(netbk->dealloc_prod);
+-	netbk->dealloc_ring[index] = pending_idx;
+-	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+-	smp_wmb();
+-	netbk->dealloc_prod++;
+-	spin_unlock_irqrestore(&_lock, flags);
+-
+-	xen_netbk_bh_handler(netbk, 0);
+-}
+-
+-static void netif_page_release(struct page *page, unsigned int order)
+-{
+-	unsigned int group, idx;
+-	int foreign = netif_get_page_ext(page, &group, &idx);
+-
+-	BUG_ON(!foreign);
+-	BUG_ON(order);
+-
+-	netif_idx_release(&xen_netbk[group], idx);
+-}
+-#else
+ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+ {
+ 	struct xen_netif *netif;
+@@ -1788,7 +1506,6 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+ 	put_page(netbk->mmap_pages[pending_idx]);
+ 	netbk->mmap_pages[pending_idx] = NULL;
+ }
+-#endif
+ 
+ irqreturn_t netif_be_int(int irq, void *dev_id)
+ {
+@@ -1906,14 +1623,6 @@ static inline int rx_work_todo(struct xen_netbk *netbk)
+ 
+ static inline int tx_work_todo(struct xen_netbk *netbk)
+ {
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	if (netbk->dealloc_cons != netbk->dealloc_prod)
+-		return 1;
+-
+-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+-	    !list_empty(&netbk->pending_inuse_head))
+-		return 1;
+-#endif
+ 
+ 	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ 			!list_empty(&netbk->net_schedule_list))
+@@ -1948,9 +1657,6 @@ static int netbk_action_thread(void *data)
+ static int __init netback_init(void)
+ {
+ 	int i;
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	struct page *page;
+-#endif
+ 	int rc = 0;
+ 	int group;
+ 
+@@ -1974,32 +1680,6 @@ static int __init netback_init(void)
+ 		netbk->net_timer.data = (unsigned long)netbk;
+ 		netbk->net_timer.function = net_alarm;
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-		init_timer(&netbk->netbk_tx_pending_timer);
+-		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
+-		netbk->netbk_tx_pending_timer.function =
+-			netbk_tx_pending_timeout;
+-#endif
+-
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-		netbk->mmap_pages =
+-			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+-		if (!netbk->mmap_pages) {
+-			printk(KERN_ALERT "%s: out of memory\n", __func__);
+-			del_timer(&netbk->netbk_tx_pending_timer);
+-			del_timer(&netbk->net_timer);
+-			rc = -ENOMEM;
+-			goto failed_init;
+-		}
+-
+-		for (i = 0; i < MAX_PENDING_REQS; i++) {
+-			page = netbk->mmap_pages[i];
+-			SetPageForeign(page, netif_page_release);
+-			netif_set_page_ext(page, netbk, i);
+-			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+-		}
+-#endif
+-
+ 		netbk->pending_cons = 0;
+ 		netbk->pending_prod = MAX_PENDING_REQS;
+ 		for (i = 0; i < MAX_PENDING_REQS; i++)
+@@ -2017,11 +1697,6 @@ static int __init netback_init(void)
+ 			} else {
+ 				printk(KERN_ALERT
+ 					"kthread_run() fails at netback\n");
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-				free_empty_pages_and_pagevec(netbk->mmap_pages,
+-						MAX_PENDING_REQS);
+-				del_timer(&netbk->netbk_tx_pending_timer);
+-#endif
+ 				del_timer(&netbk->net_timer);
+ 				rc = PTR_ERR(netbk->kthread.task);
+ 				goto failed_init;
+@@ -2035,9 +1710,6 @@ static int __init netback_init(void)
+ 				     (unsigned long)netbk);
+ 		}
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-		INIT_LIST_HEAD(&netbk->pending_inuse_head);
+-#endif
+ 		INIT_LIST_HEAD(&netbk->net_schedule_list);
+ 
+ 		spin_lock_init(&netbk->net_schedule_list_lock);
+@@ -2047,17 +1719,6 @@ static int __init netback_init(void)
+ 			wake_up_process(netbk->kthread.task);
+ 	}
+ 
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+-	if (MODPARM_copy_skb) {
+-		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+-					      NULL, 0))
+-			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
+-		else
+-			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+-	}
+-#endif
+-
+ 	rc = netif_xenbus_init();
+ 	if (rc)
+ 		goto failed_init;
+@@ -2076,17 +1737,11 @@ static int __init netback_init(void)
+ failed_init:
+ 	for (i = 0; i < group; i++) {
+ 		struct xen_netbk *netbk = &xen_netbk[i];
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-		free_empty_pages_and_pagevec(netbk->mmap_pages,
+-				MAX_PENDING_REQS);
+-		del_timer(&netbk->netbk_tx_pending_timer);
+-#else
+ 		int j;
+ 		for (j = 0; j < MAX_PENDING_REQS; j++) {
+ 			if (netbk->mmap_pages[i])
+ 				__free_page(netbk->mmap_pages[i]);
+ 		}
+-#endif
+ 		del_timer(&netbk->net_timer);
+ 		if (MODPARM_netback_kthread)
+ 			kthread_stop(netbk->kthread.task);
+diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
+index f6bf50e..0aa3366 100644
+--- a/drivers/net/xen-netback/xenbus.c
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -64,10 +64,6 @@ static int netback_probe(struct xenbus_device *dev,
+ 	dev_set_drvdata(&dev->dev, be);
+ 
+ 	sg = 1;
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
+-		sg = 0;
+-#endif
+ 
+ 	do {
+ 		err = xenbus_transaction_start(&xbt);
+-- 
+1.7.3.4
+
+
+From 7f1492ef6a8026cba4c1b49b7a2030802f76ec83 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 19 Jan 2011 14:41:55 +0000
+Subject: [PATCH 125/139] xen: netback: drop ethtool drvinfo callback
+
+The default provided by the network core is sufficient for our needs.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/net/xen-netback/interface.c |    9 ---------
+ 1 files changed, 0 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+index eae5cf8..295ab3d 100644
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -179,16 +179,7 @@ static int netbk_set_tso(struct net_device *dev, u32 data)
+ 	return 0;
+ }
+ 
+-static void netbk_get_drvinfo(struct net_device *dev,
+-			      struct ethtool_drvinfo *info)
+-{
+-	strcpy(info->driver, "netbk");
+-	strcpy(info->bus_info, dev_name(dev->dev.parent));
+-}
+-
+ static struct ethtool_ops network_ethtool_ops = {
+-	.get_drvinfo = netbk_get_drvinfo,
+-
+ 	.get_tx_csum = ethtool_op_get_tx_csum,
+ 	.set_tx_csum = netbk_set_tx_csum,
+ 	.get_sg = ethtool_op_get_sg,
+-- 
+1.7.3.4
+
+
+From 83381aa69cf38fc26125019479527e0710fe27cd Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 15 Dec 2010 13:31:03 -0500
+Subject: [PATCH 126/139] ttm: Set VM_IO only on pages with TTM_MEMTYPE_FLAG_FIXED set.
+
+This patch is based on "[Patch RFC] ttm: nouveau accelerated on Xen
+pv-ops kernel"
+http://lists.freedesktop.org/archives/nouveau/2010-March/005326.html
+
+Under Xen, the PFN of page is virtualized. The physical addresses used
+for DMA programming needs to be the Machine Frame Number (MFN).
+Xen transparently does the correct translation using the _PAGE_IOMEM
+PTE bit. If the bit is set, Xen assumes that the backing memory is in
+the IOMEM space, and PFN equals MFN. If not set, page_to_pfn() returns
+a phantom MFN.
+
+The patch enables the ttm_bo_vm_fault() handler to behave correctly
+under Xen, and has no side-effects on normal (not under Xen) operations.
+
+The use of TTM_MEMTYPE_FLAG_FIXED in the check assumes that
+only pages which have this flag are backed by device memory or IO.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Arvind R <arvino55 at gmail.com>
+
+Conflicts:
+
+	drivers/gpu/drm/ttm/ttm_bo_vm.c
+---
+ drivers/gpu/drm/ttm/ttm_bo_vm.c |    7 ++++++-
+ 1 files changed, 6 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
+index 221b924..bb24374 100644
+--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
++++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
+@@ -247,6 +247,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
+ {
+ 	struct ttm_bo_driver *driver;
+ 	struct ttm_buffer_object *bo;
++	struct ttm_mem_type_manager *man;
+ 	int ret;
+ 
+ 	read_lock(&bdev->vm_lock);
+@@ -279,7 +280,11 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
+ 	 */
+ 
+ 	vma->vm_private_data = bo;
+-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
++	vma->vm_flags |= VM_RESERVED | VM_MIXEDMAP | VM_DONTEXPAND;
++	man = &bdev->man[bo->mem.mem_type];
++	if (man->flags & TTM_MEMTYPE_FLAG_FIXED)
++		vma->vm_flags |= VM_IO;
++	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+ 	return 0;
+ out_unref:
+ 	ttm_bo_unref(&bo);
+-- 
+1.7.3.4
+
+
+From 9c2e85765d147fc77ae27cb81a7091942f22a584 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 15 Dec 2010 13:32:15 -0500
+Subject: [PATCH 127/139] drm: recompute vma->vm_page_prot after changing vm_flags
+
+vm_get_page_prot() computes vm_page_prot depending on vm_flags, so
+we need to re-call it if we change flags.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+
+Conflicts:
+
+	drivers/gpu/drm/ttm/ttm_bo_vm.c
+---
+ drivers/gpu/drm/ttm/ttm_bo_vm.c |    1 +
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
+index bb24374..735dc1d 100644
+--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
++++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
+@@ -300,6 +300,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo)
+ 	vma->vm_ops = &ttm_bo_vm_ops;
+ 	vma->vm_private_data = ttm_bo_reference(bo);
+ 	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
++	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+ 	return 0;
+ }
+ EXPORT_SYMBOL(ttm_fbdev_mmap);
+-- 
+1.7.3.4
+
+
+From 74632f8e51618dc31beba712d03dd0f1168cc241 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Fri, 20 Feb 2009 15:58:42 -0500
+Subject: [PATCH 128/139] x86: define arch_vm_get_page_prot to set _PAGE_IOMAP on VM_IO vmas
+
+Set _PAGE_IOMAP in ptes mapping a VM_IO vma.  This says that the mapping
+is of a real piece of physical hardware, and not just system memory.
+
+Xen, in particular, uses to this to inhibit the normal pfn->mfn conversion
+that would normally happen - in other words, treat the address directly
+as a machine physical address without converting it from pseudo-physical.
+
+[ Impact: make VM_IO mappings map the right thing under Xen ]
+[ v2: rebased on v2.6.37-rc1]
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ arch/x86/include/asm/pgtable.h |    3 +++
+ arch/x86/mm/pgtable.c          |   10 ++++++++++
+ 2 files changed, 13 insertions(+), 0 deletions(-)
+
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index 18601c8..284ee01 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -485,6 +485,9 @@ static inline unsigned long pages_to_mb(unsigned long npg)
+ #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)	\
+ 	remap_pfn_range(vma, vaddr, pfn, size, prot)
+ 
++#define arch_vm_get_page_prot arch_vm_get_page_prot
++extern pgprot_t arch_vm_get_page_prot(unsigned vm_flags);
++
+ #if PAGETABLE_LEVELS > 2
+ static inline int pud_none(pud_t pud)
+ {
+diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
+index 500242d..1e72207 100644
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -15,6 +15,16 @@
+ 
+ gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
+ 
++pgprot_t arch_vm_get_page_prot(unsigned vm_flags)
++{
++	pgprot_t ret = __pgprot(0);
++
++	if (vm_flags & VM_IO)
++		ret = __pgprot(_PAGE_IOMAP);
++
++	return ret;
++}
++
+ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+ {
+ 	return (pte_t *)__get_free_page(PGALLOC_GFP);
+-- 
+1.7.3.4
+
+
+From 81ec0e742ce919124909640039c05baa29b1568a Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Wed, 8 Dec 2010 11:03:27 -0800
+Subject: [PATCH 129/139] mm: remove unused "token" argument from apply_to_page_range callback.
+
+The argument is basically the struct page of the pte_t * passed into
+the callback.  But there's no need to pass that, since it can be fairly
+easily derived from the pte_t * itself if needed (and no current users
+need to do that anyway).
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ arch/x86/xen/grant-table.c |    6 ++----
+ arch/x86/xen/mmu.c         |    3 +--
+ include/linux/mm.h         |    3 +--
+ mm/memory.c                |    2 +-
+ mm/vmalloc.c               |    2 +-
+ 5 files changed, 6 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
+index 49ba9b5..5bf892a 100644
+--- a/arch/x86/xen/grant-table.c
++++ b/arch/x86/xen/grant-table.c
+@@ -44,8 +44,7 @@
+ 
+ #include <asm/pgtable.h>
+ 
+-static int map_pte_fn(pte_t *pte, struct page *pmd_page,
+-		      unsigned long addr, void *data)
++static int map_pte_fn(pte_t *pte, unsigned long addr, void *data)
+ {
+ 	unsigned long **frames = (unsigned long **)data;
+ 
+@@ -54,8 +53,7 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page,
+ 	return 0;
+ }
+ 
+-static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
+-			unsigned long addr, void *data)
++static int unmap_pte_fn(pte_t *pte, unsigned long addr, void *data)
+ {
+ 
+ 	set_pte_at(&init_mm, addr, pte, __pte(0));
+diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
+index 5e92b61..38ba804 100644
+--- a/arch/x86/xen/mmu.c
++++ b/arch/x86/xen/mmu.c
+@@ -2292,8 +2292,7 @@ struct remap_data {
+ 	struct mmu_update *mmu_update;
+ };
+ 
+-static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
+-				 unsigned long addr, void *data)
++static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
+ {
+ 	struct remap_data *rmd = data;
+ 	pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 956a355..bb898ec 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1529,8 +1529,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
+ #define FOLL_MLOCK	0x40	/* mark page as mlocked */
+ #define FOLL_SPLIT	0x80	/* don't return transhuge pages, split them */
+ 
+-typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
+-			void *data);
++typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
+ extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
+ 			       unsigned long size, pte_fn_t fn, void *data);
+ 
+diff --git a/mm/memory.c b/mm/memory.c
+index 31250fa..740470c 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2032,7 +2032,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ 	token = pmd_pgtable(*pmd);
+ 
+ 	do {
+-		err = fn(pte++, token, addr, data);
++		err = fn(pte++, addr, data);
+ 		if (err)
+ 			break;
+ 	} while (addr += PAGE_SIZE, addr != end);
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index f9b1667..5ddbdfe 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -2061,7 +2061,7 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
+ }
+ 
+ 
+-static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
++static int f(pte_t *pte, unsigned long addr, void *data)
+ {
+ 	/* apply_to_page_range() does all the hard work. */
+ 	return 0;
+-- 
+1.7.3.4
+
+
+From 7f635db45f8e921c9203fdfb904d0095b7af6480 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Tue, 30 Nov 2010 10:03:44 -0800
+Subject: [PATCH 130/139] mm: add apply_to_page_range_batch()
+
+apply_to_page_range() calls its callback function once for each pte, which
+is pretty inefficient since it will almost always be operating on a batch
+of adjacent ptes.  apply_to_page_range_batch() calls its callback
+with both a pte_t * and a count, so it can operate on multiple ptes at
+once.
+
+The callback is expected to handle all its ptes, or return an error.  For
+both apply_to_page_range and apply_to_page_range_batch, it is up to
+the caller to work out how much progress was made if either fails with
+an error.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ include/linux/mm.h |    6 +++++
+ mm/memory.c        |   57 +++++++++++++++++++++++++++++++++++++--------------
+ 2 files changed, 47 insertions(+), 16 deletions(-)
+
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index bb898ec..5a32a8a 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1533,6 +1533,12 @@ typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
+ extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
+ 			       unsigned long size, pte_fn_t fn, void *data);
+ 
++typedef int (*pte_batch_fn_t)(pte_t *pte, unsigned count,
++			      unsigned long addr, void *data);
++extern int apply_to_page_range_batch(struct mm_struct *mm,
++				     unsigned long address, unsigned long size,
++				     pte_batch_fn_t fn, void *data);
++
+ #ifdef CONFIG_PROC_FS
+ void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
+ #else
+diff --git a/mm/memory.c b/mm/memory.c
+index 740470c..496e4e6 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2012,11 +2012,10 @@ EXPORT_SYMBOL(remap_pfn_range);
+ 
+ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ 				     unsigned long addr, unsigned long end,
+-				     pte_fn_t fn, void *data)
++				     pte_batch_fn_t fn, void *data)
+ {
+ 	pte_t *pte;
+ 	int err;
+-	pgtable_t token;
+ 	spinlock_t *uninitialized_var(ptl);
+ 
+ 	pte = (mm == &init_mm) ?
+@@ -2028,25 +2027,17 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ 	BUG_ON(pmd_huge(*pmd));
+ 
+ 	arch_enter_lazy_mmu_mode();
+-
+-	token = pmd_pgtable(*pmd);
+-
+-	do {
+-		err = fn(pte++, addr, data);
+-		if (err)
+-			break;
+-	} while (addr += PAGE_SIZE, addr != end);
+-
++	err = fn(pte, (end - addr) / PAGE_SIZE, addr, data);
+ 	arch_leave_lazy_mmu_mode();
+ 
+ 	if (mm != &init_mm)
+-		pte_unmap_unlock(pte-1, ptl);
++		pte_unmap_unlock(pte, ptl);
+ 	return err;
+ }
+ 
+ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
+ 				     unsigned long addr, unsigned long end,
+-				     pte_fn_t fn, void *data)
++				     pte_batch_fn_t fn, void *data)
+ {
+ 	pmd_t *pmd;
+ 	unsigned long next;
+@@ -2068,7 +2059,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
+ 
+ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ 				     unsigned long addr, unsigned long end,
+-				     pte_fn_t fn, void *data)
++				     pte_batch_fn_t fn, void *data)
+ {
+ 	pud_t *pud;
+ 	unsigned long next;
+@@ -2090,8 +2081,9 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
+  * Scan a region of virtual memory, filling in page tables as necessary
+  * and calling a provided function on each leaf page table.
+  */
+-int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+-			unsigned long size, pte_fn_t fn, void *data)
++int apply_to_page_range_batch(struct mm_struct *mm,
++			      unsigned long addr, unsigned long size,
++			      pte_batch_fn_t fn, void *data)
+ {
+ 	pgd_t *pgd;
+ 	unsigned long next;
+@@ -2109,6 +2101,39 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+ 
+ 	return err;
+ }
++EXPORT_SYMBOL_GPL(apply_to_page_range_batch);
++
++struct pte_single_fn
++{
++	pte_fn_t fn;
++	void *data;
++};
++
++static int apply_pte_batch(pte_t *pte, unsigned count,
++			   unsigned long addr, void *data)
++{
++	struct pte_single_fn *single = data;
++	int err = 0;
++
++	while (count--) {
++		err = single->fn(pte, addr, single->data);
++		if (err)
++			break;
++
++		addr += PAGE_SIZE;
++		pte++;
++	}
++
++	return err;
++}
++
++int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
++			unsigned long size, pte_fn_t fn, void *data)
++{
++	struct pte_single_fn single = { .fn = fn, .data = data };
++	return apply_to_page_range_batch(mm, addr, size,
++					 apply_pte_batch, &single);
++}
+ EXPORT_SYMBOL_GPL(apply_to_page_range);
+ 
+ /*
+-- 
+1.7.3.4
+
+
+From dea51c0fa49689f6a489205c00ebf83c8e78f6cd Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 6 Dec 2010 12:26:53 -0800
+Subject: [PATCH 131/139] ioremap: use apply_to_page_range_batch() for ioremap_page_range()
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ lib/ioremap.c |   85 +++++++++++++++------------------------------------------
+ 1 files changed, 22 insertions(+), 63 deletions(-)
+
+diff --git a/lib/ioremap.c b/lib/ioremap.c
+index da4e2ad..e75d0d1 100644
+--- a/lib/ioremap.c
++++ b/lib/ioremap.c
+@@ -13,81 +13,40 @@
+ #include <asm/cacheflush.h>
+ #include <asm/pgtable.h>
+ 
+-static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
+-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
++struct ioremap_data
+ {
+-	pte_t *pte;
++	phys_addr_t phys_addr;
++	pgprot_t prot;
++};
++
++static int ioremap_pte_range(pte_t *pte, unsigned count,
++			     unsigned long addr, void *v)
++{
++	struct ioremap_data *data = v;
+ 	u64 pfn;
+ 
+-	pfn = phys_addr >> PAGE_SHIFT;
+-	pte = pte_alloc_kernel(pmd, addr);
+-	if (!pte)
+-		return -ENOMEM;
+-	do {
+-		BUG_ON(!pte_none(*pte));
+-		set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
+-		pfn++;
+-	} while (pte++, addr += PAGE_SIZE, addr != end);
+-	return 0;
+-}
++	pfn = data->phys_addr >> PAGE_SHIFT;
++	data->phys_addr += count * PAGE_SIZE;
+ 
+-static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
+-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+-{
+-	pmd_t *pmd;
+-	unsigned long next;
++	while (count--) {
++		BUG_ON(!pte_none(*pte));
+ 
+-	phys_addr -= addr;
+-	pmd = pmd_alloc(&init_mm, pud, addr);
+-	if (!pmd)
+-		return -ENOMEM;
+-	do {
+-		next = pmd_addr_end(addr, end);
+-		if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot))
+-			return -ENOMEM;
+-	} while (pmd++, addr = next, addr != end);
+-	return 0;
+-}
++		set_pte_at(&init_mm, addr, pte++, pfn_pte(pfn++, data->prot));
+ 
+-static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
+-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+-{
+-	pud_t *pud;
+-	unsigned long next;
++		addr += PAGE_SIZE;
++	}
+ 
+-	phys_addr -= addr;
+-	pud = pud_alloc(&init_mm, pgd, addr);
+-	if (!pud)
+-		return -ENOMEM;
+-	do {
+-		next = pud_addr_end(addr, end);
+-		if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot))
+-			return -ENOMEM;
+-	} while (pud++, addr = next, addr != end);
+ 	return 0;
+ }
+ 
+-int ioremap_page_range(unsigned long addr,
+-		       unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
++int ioremap_page_range(unsigned long addr, unsigned long end,
++		       phys_addr_t phys_addr, pgprot_t prot)
+ {
+-	pgd_t *pgd;
+-	unsigned long start;
+-	unsigned long next;
+-	int err;
+-
+-	BUG_ON(addr >= end);
+-
+-	start = addr;
+-	phys_addr -= addr;
+-	pgd = pgd_offset_k(addr);
+-	do {
+-		next = pgd_addr_end(addr, end);
+-		err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
+-		if (err)
+-			break;
+-	} while (pgd++, addr = next, addr != end);
++	struct ioremap_data data = { .phys_addr = phys_addr, .prot = prot };
++	int err = apply_to_page_range_batch(&init_mm, addr, end - addr,
++					    ioremap_pte_range, &data);
+ 
+-	flush_cache_vmap(start, end);
++	flush_cache_vmap(addr, end);
+ 
+ 	return err;
+ }
+-- 
+1.7.3.4
+
+
+From 7a064a31021ba0b4adfc90061d7da2daa9b3d27e Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 29 Nov 2010 12:22:24 -0800
+Subject: [PATCH 132/139] vmalloc: use plain pte_clear() for unmaps
+
+ptep_get_and_clear() is potentially moderately expensive (at least
+an atomic operation, or potentially a trap-and-fault when virtualized)
+so use a plain pte_clear().
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ mm/vmalloc.c |    3 ++-
+ 1 files changed, 2 insertions(+), 1 deletions(-)
+
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index 5ddbdfe..c06dc1e 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -39,8 +39,9 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
+ 
+ 	pte = pte_offset_kernel(pmd, addr);
+ 	do {
+-		pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
++		pte_t ptent = *pte;
+ 		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
++		pte_clear(&init_mm, addr, pte);
+ 	} while (pte++, addr += PAGE_SIZE, addr != end);
+ }
+ 
+-- 
+1.7.3.4
+
+
+From 334c14835ef823ce665eeebf6aad467064f47e47 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 29 Nov 2010 11:06:19 -0800
+Subject: [PATCH 133/139] vmalloc: use apply_to_page_range_batch() for vunmap_page_range()
+
+There's no need to open-code it when there's helpful utility function
+to do the job.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Cc: Nick Piggin <npiggin at kernel.dk>
+---
+ mm/vmalloc.c |   53 +++++++++--------------------------------------------
+ 1 files changed, 9 insertions(+), 44 deletions(-)
+
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index c06dc1e..e99aa3b 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -33,59 +33,24 @@
+ 
+ /*** Page table manipulation functions ***/
+ 
+-static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
++static int vunmap_pte(pte_t *pte, unsigned count,
++		      unsigned long addr, void *data)
+ {
+-	pte_t *pte;
+-
+-	pte = pte_offset_kernel(pmd, addr);
+-	do {
++	while (count--) {
+ 		pte_t ptent = *pte;
+-		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
+-		pte_clear(&init_mm, addr, pte);
+-	} while (pte++, addr += PAGE_SIZE, addr != end);
+-}
+-
+-static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
+-{
+-	pmd_t *pmd;
+-	unsigned long next;
+ 
+-	pmd = pmd_offset(pud, addr);
+-	do {
+-		next = pmd_addr_end(addr, end);
+-		if (pmd_none_or_clear_bad(pmd))
+-			continue;
+-		vunmap_pte_range(pmd, addr, next);
+-	} while (pmd++, addr = next, addr != end);
+-}
++		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
+ 
+-static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
+-{
+-	pud_t *pud;
+-	unsigned long next;
++		pte_clear(&init_mm, addr, pte++);
++		addr += PAGE_SIZE;
++	}
+ 
+-	pud = pud_offset(pgd, addr);
+-	do {
+-		next = pud_addr_end(addr, end);
+-		if (pud_none_or_clear_bad(pud))
+-			continue;
+-		vunmap_pmd_range(pud, addr, next);
+-	} while (pud++, addr = next, addr != end);
++	return 0;
+ }
+ 
+ static void vunmap_page_range(unsigned long addr, unsigned long end)
+ {
+-	pgd_t *pgd;
+-	unsigned long next;
+-
+-	BUG_ON(addr >= end);
+-	pgd = pgd_offset_k(addr);
+-	do {
+-		next = pgd_addr_end(addr, end);
+-		if (pgd_none_or_clear_bad(pgd))
+-			continue;
+-		vunmap_pud_range(pgd, addr, next);
+-	} while (pgd++, addr = next, addr != end);
++	apply_to_page_range_batch(&init_mm, addr, end - addr, vunmap_pte, NULL);
+ }
+ 
+ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
+-- 
+1.7.3.4
+
+
+From 937b74f8d19f7e62d63d4e82c2cf21f3bd636d9e Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 29 Nov 2010 11:11:45 -0800
+Subject: [PATCH 134/139] vmalloc: use apply_to_page_range_batch() for vmap_page_range_noflush()
+
+There's no need to open-code it when there's a helpful utility
+function.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Cc: Nick Piggin <npiggin at kernel.dk>
+---
+ mm/vmalloc.c |   92 ++++++++++++++++++---------------------------------------
+ 1 files changed, 29 insertions(+), 63 deletions(-)
+
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index e99aa3b..cf4e705 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -53,63 +53,34 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
+ 	apply_to_page_range_batch(&init_mm, addr, end - addr, vunmap_pte, NULL);
+ }
+ 
+-static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
+-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
++struct vmap_data
+ {
+-	pte_t *pte;
++	struct page **pages;
++	unsigned index;
++	pgprot_t prot;
++};
+ 
+-	/*
+-	 * nr is a running index into the array which helps higher level
+-	 * callers keep track of where we're up to.
+-	 */
++static int vmap_pte(pte_t *pte, unsigned count,
++		    unsigned long addr, void *data)
++{
++	struct vmap_data *vmap = data;
+ 
+-	pte = pte_alloc_kernel(pmd, addr);
+-	if (!pte)
+-		return -ENOMEM;
+-	do {
+-		struct page *page = pages[*nr];
++	while (count--) {
++		struct page *page = vmap->pages[vmap->index];
+ 
+ 		if (WARN_ON(!pte_none(*pte)))
+ 			return -EBUSY;
++
+ 		if (WARN_ON(!page))
+ 			return -ENOMEM;
+-		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
+-		(*nr)++;
+-	} while (pte++, addr += PAGE_SIZE, addr != end);
+-	return 0;
+-}
+ 
+-static int vmap_pmd_range(pud_t *pud, unsigned long addr,
+-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+-{
+-	pmd_t *pmd;
+-	unsigned long next;
+-
+-	pmd = pmd_alloc(&init_mm, pud, addr);
+-	if (!pmd)
+-		return -ENOMEM;
+-	do {
+-		next = pmd_addr_end(addr, end);
+-		if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
+-			return -ENOMEM;
+-	} while (pmd++, addr = next, addr != end);
+-	return 0;
+-}
++		set_pte_at(&init_mm, addr, pte, mk_pte(page, vmap->prot));
+ 
+-static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+-{
+-	pud_t *pud;
+-	unsigned long next;
++		pte++;
++		addr += PAGE_SIZE;
++		vmap->index++;
++	}
+ 
+-	pud = pud_alloc(&init_mm, pgd, addr);
+-	if (!pud)
+-		return -ENOMEM;
+-	do {
+-		next = pud_addr_end(addr, end);
+-		if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
+-			return -ENOMEM;
+-	} while (pud++, addr = next, addr != end);
+ 	return 0;
+ }
+ 
+@@ -122,22 +93,17 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
+ 				   pgprot_t prot, struct page **pages)
+ {
+-	pgd_t *pgd;
+-	unsigned long next;
+-	unsigned long addr = start;
+-	int err = 0;
+-	int nr = 0;
+-
+-	BUG_ON(addr >= end);
+-	pgd = pgd_offset_k(addr);
+-	do {
+-		next = pgd_addr_end(addr, end);
+-		err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
+-		if (err)
+-			return err;
+-	} while (pgd++, addr = next, addr != end);
+-
+-	return nr;
++	int err;
++	struct vmap_data vmap = {
++		.pages = pages,
++		.index = 0,
++		.prot = prot
++	};
++	
++	err = apply_to_page_range_batch(&init_mm, start, end - start,
++					vmap_pte, &vmap);
++	
++	return err ? err : vmap.index;
+ }
+ 
+ static int vmap_page_range(unsigned long start, unsigned long end,
+-- 
+1.7.3.4
+
+
+From d4205306bb6609275ad93a8d1bfb4de3d06d0eb5 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Wed, 1 Dec 2010 15:45:21 -0800
+Subject: [PATCH 135/139] vmalloc: use apply_to_page_range_batch() in alloc_vm_area()
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ mm/vmalloc.c |    8 ++++----
+ 1 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index cf4e705..64d395f 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -1993,9 +1993,9 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
+ }
+ 
+ 
+-static int f(pte_t *pte, unsigned long addr, void *data)
++static int f(pte_t *pte, unsigned count, unsigned long addr, void *data)
+ {
+-	/* apply_to_page_range() does all the hard work. */
++	/* apply_to_page_range_batch() does all the hard work. */
+ 	return 0;
+ }
+ 
+@@ -2024,8 +2024,8 @@ struct vm_struct *alloc_vm_area(size_t size)
+ 	 * This ensures that page tables are constructed for this region
+ 	 * of kernel virtual address space and mapped into init_mm.
+ 	 */
+-	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
+-				area->size, f, NULL)) {
++	if (apply_to_page_range_batch(&init_mm, (unsigned long)area->addr,
++				      area->size, f, NULL)) {
+ 		free_vm_area(area);
+ 		return NULL;
+ 	}
+-- 
+1.7.3.4
+
+
+From e35361f09bf25ecb5ba6877e44319de315b76f5e Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Wed, 1 Dec 2010 15:44:04 -0800
+Subject: [PATCH 136/139] xen/mmu: use apply_to_page_range_batch() in xen_remap_domain_mfn_range()
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ arch/x86/xen/mmu.c |   19 ++++++++++++-------
+ 1 files changed, 12 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
+index 38ba804..25da278 100644
+--- a/arch/x86/xen/mmu.c
++++ b/arch/x86/xen/mmu.c
+@@ -2292,14 +2292,19 @@ struct remap_data {
+ 	struct mmu_update *mmu_update;
+ };
+ 
+-static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
++static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned count,
++				 unsigned long addr, void *data)
+ {
+ 	struct remap_data *rmd = data;
+-	pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
+ 
+-	rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
+-	rmd->mmu_update->val = pte_val_ma(pte);
+-	rmd->mmu_update++;
++	while (count--) {
++		pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
++
++		rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
++		rmd->mmu_update->val = pte_val_ma(pte);
++		rmd->mmu_update++;
++		ptep++;
++	}
+ 
+ 	return 0;
+ }
+@@ -2328,8 +2333,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+ 		range = (unsigned long)batch << PAGE_SHIFT;
+ 
+ 		rmd.mmu_update = mmu_update;
+-		err = apply_to_page_range(vma->vm_mm, addr, range,
+-					  remap_area_mfn_pte_fn, &rmd);
++		err = apply_to_page_range_batch(vma->vm_mm, addr, range,
++						remap_area_mfn_pte_fn, &rmd);
+ 		if (err)
+ 			goto out;
+ 
+-- 
+1.7.3.4
+
+
+From 02533b01d70f7cbbe3cf47de3f27740ab334a11f Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Wed, 1 Dec 2010 15:50:12 -0800
+Subject: [PATCH 137/139] xen/grant-table: use apply_to_page_range_batch()
+
+No need to call the callback per-pte.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ arch/x86/xen/grant-table.c |   28 ++++++++++++++++++----------
+ 1 files changed, 18 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
+index 5bf892a..11a8a45 100644
+--- a/arch/x86/xen/grant-table.c
++++ b/arch/x86/xen/grant-table.c
+@@ -44,19 +44,27 @@
+ 
+ #include <asm/pgtable.h>
+ 
+-static int map_pte_fn(pte_t *pte, unsigned long addr, void *data)
++static int map_pte_fn(pte_t *pte, unsigned count, unsigned long addr, void *data)
+ {
+ 	unsigned long **frames = (unsigned long **)data;
+ 
+-	set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+-	(*frames)++;
++	while (count--) {
++		set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
++		(*frames)++;
++		pte++;
++		addr += PAGE_SIZE;
++	}
+ 	return 0;
+ }
+ 
+-static int unmap_pte_fn(pte_t *pte, unsigned long addr, void *data)
++static int unmap_pte_fn(pte_t *pte, unsigned count, unsigned long addr, void *data)
+ {
++	while (count--) {
++		pte_clear(&init_mm, addr, pte);
++		addr += PAGE_SIZE;
++		pte++;
++	}
+ 
+-	set_pte_at(&init_mm, addr, pte, __pte(0));
+ 	return 0;
+ }
+ 
+@@ -75,15 +83,15 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
+ 		*__shared = shared;
+ 	}
+ 
+-	rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+-				 PAGE_SIZE * nr_gframes,
+-				 map_pte_fn, &frames);
++	rc = apply_to_page_range_batch(&init_mm, (unsigned long)shared,
++				       PAGE_SIZE * nr_gframes,
++				       map_pte_fn, &frames);
+ 	return rc;
+ }
+ 
+ void arch_gnttab_unmap_shared(struct grant_entry *shared,
+ 			      unsigned long nr_gframes)
+ {
+-	apply_to_page_range(&init_mm, (unsigned long)shared,
+-			    PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
++	apply_to_page_range_batch(&init_mm, (unsigned long)shared,
++				  PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
+ }
+-- 
+1.7.3.4
+
+
+From cb3172f5566fe75b749b0873deedc42687c39064 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 19 Jan 2011 18:41:03 -0500
+Subject: [PATCH 138/139] x86/nx: Made .bss be HPAGE_ALIGNED.
+
+That makes it boot under Xen.
+---
+ arch/x86/kernel/vmlinux.lds.S |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
+index b34ab80..e37d10f 100644
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -341,7 +341,7 @@ SECTIONS
+ #endif
+ 
+ 	/* BSS */
+-	. = ALIGN(PAGE_SIZE);
++	. = ALIGN(HPAGE_SIZE);
+ 	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+ 		__bss_start = .;
+ 		*(.bss..page_aligned)
+-- 
+1.7.3.4
+
+
+From 8751f3b0fd2ca59c410052d1faecc2297bb91d62 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 24 Jan 2011 17:25:29 -0800
+Subject: [PATCH 139/139] xen/gntdev: remove token argument from find_grant_ptes
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/gntdev.c |    3 +--
+ 1 files changed, 1 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
+index 1e31cdc..2b777c0 100644
+--- a/drivers/xen/gntdev.c
++++ b/drivers/xen/gntdev.c
+@@ -226,8 +226,7 @@ static void gntdev_free_map(struct grant_map *map)
+ 
+ /* ------------------------------------------------------------------ */
+ 
+-static int find_grant_ptes(pte_t *pte, pgtable_t token,
+-		unsigned long addr, void *data)
++static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
+ {
+ 	struct grant_map *map = data;
+ 	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+-- 
+1.7.3.4
+
diff --git a/xen.pvhvm.fixes.patch b/xen.pvhvm.fixes.patch
new file mode 100644
index 0000000..cf6dc95
--- /dev/null
+++ b/xen.pvhvm.fixes.patch
@@ -0,0 +1,197 @@
+From bb89b5a4444a7514f3cf6cadb4f613832d7a9887 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Fri, 14 Jan 2011 17:55:44 -0500
+Subject: [PATCH 1/4] m2p: No need to catch exceptions when we know that there is no RAM
+
+.. beyound what we think is the end of memory. However there might
+be more System RAM - but assigned to a guest. Hence jump to the
+M2P override check and consult.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ arch/x86/include/asm/xen/page.h |    6 +++++-
+ 1 files changed, 5 insertions(+), 1 deletions(-)
+
+diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
+index f25bdf2..74a8559 100644
+--- a/arch/x86/include/asm/xen/page.h
++++ b/arch/x86/include/asm/xen/page.h
+@@ -77,6 +77,10 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
+ 	if (xen_feature(XENFEAT_auto_translated_physmap))
+ 		return mfn;
+ 
++	if (unlikely((mfn >> machine_to_phys_order) != 0)) {
++		pfn = ~0;
++		goto try_override;
++	}
+ 	pfn = 0;
+ 	/*
+ 	 * The array access can fail (e.g., device space beyond end of RAM).
+@@ -84,7 +88,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
+ 	 * but we must handle the fault without crashing!
+ 	 */
+ 	__get_user(pfn, &machine_to_phys_mapping[mfn]);
+-
++try_override:
+ 	/*
+ 	 * If this appears to be a foreign mfn (because the pfn
+ 	 * doesn't map back to the mfn), then check the local override
+-- 
+1.7.3.4
+
+
+From a7cdabcd381dffb5db72a31c78b65a2bcdec2a04 Mon Sep 17 00:00:00 2001
+From: Stefan Bader <stefan.bader at canonical.com>
+Date: Thu, 20 Jan 2011 15:19:46 +0000
+Subject: [PATCH 2/4] xen: p2m: correctly initialize partial p2m leave
+
+After changing the p2m mapping to a tree by
+
+  commit 58e05027b530ff081ecea68e38de8d59db8f87e0
+    xen: convert p2m to a 3 level tree
+
+and trying to boot a DomU with 615MB of memory, the following crash was
+observed in the dump:
+
+kernel direct mapping tables up to 26f00000 @ 1ec4000-1fff000
+BUG: unable to handle kernel NULL pointer dereference at (null)
+IP: [<c0107397>] xen_set_pte+0x27/0x60
+*pdpt = 0000000000000000 *pde = 0000000000000000
+
+Adding further debug statements showed that when trying to set up
+pfn=0x26700 the returned mapping was invalid.
+
+pfn=0x266ff calling set_pte(0xc1fe77f8, 0x6b3003)
+pfn=0x26700 calling set_pte(0xc1fe7800, 0x3)
+
+Although the last_pfn obtained from the startup info is 0x26700, which
+should in turn not be hit, the additional 8MB which are added as extra
+memory normally seem to be ok. This lead to looking into the initial
+p2m tree construction, which uses the smaller value and assuming that
+there is other code handling the extra memory.
+
+When the p2m tree is set up, the leaves are directly pointed to the
+array which the domain builder set up. But if the mapping is not on a
+boundary that fits into one p2m page, this will result in the last leaf
+being only partially valid. And as the invalid entries are not
+initialized in that case, things go badly wrong.
+
+I am trying to fix that by checking whether the current leaf is a
+complete map and if not, allocate a completely new page and copy only
+the valid pointers there. This may not be the most efficient or elegant
+solution, but at least it seems to allow me booting DomUs with memory
+assignments all over the range.
+
+Signed-off-by: Stefan Bader <stefan.bader at canonical.com>
+---
+ arch/x86/xen/p2m.c |   20 +++++++++++++++++++-
+ 1 files changed, 19 insertions(+), 1 deletions(-)
+
+diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
+index 8f2251d..c9307ec 100644
+--- a/arch/x86/xen/p2m.c
++++ b/arch/x86/xen/p2m.c
+@@ -237,7 +237,25 @@ void __init xen_build_dynamic_phys_to_machine(void)
+ 			p2m_top[topidx] = mid;
+ 		}
+ 
+-		p2m_top[topidx][mididx] = &mfn_list[pfn];
++		/*
++		 * As long as the mfn_list has enough entries to completely
++		 * fill a p2m page, pointing into the array is ok. But if
++		 * not the entries beyond the last pfn will be undefined.
++		 * And guessing that the 'what-ever-there-is' does not take it
++		 * too kindly when changing it to invalid markers, a new page
++		 * is allocated, initialized and filled with the valid part.
++		 */
++		if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) {
++			unsigned long p2midx;
++			unsigned long **p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
++			p2m_init(p2m);
++
++			for (p2midx = 0; pfn + p2midx < max_pfn; p2midx++) {
++				p2m[p2midx] = mfn_list[pfn + p2midx];
++			}
++			p2m_top[topidx][mididx] = p2m;
++		} else
++			p2m_top[topidx][mididx] = &mfn_list[pfn];
+ 	}
+ 
+ 	m2p_override_init();
+-- 
+1.7.3.4
+
+
+From b84683ad1e704c2a296d08ff0cbe29db936f94a7 Mon Sep 17 00:00:00 2001
+From: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+Date: Tue, 25 Jan 2011 12:03:42 +0000
+Subject: [PATCH 3/4] xen: make sure the e820 memory regions end at page boundary
+
+Signed-off-by: M A Young <m.a.young at durham.ac.uk>
+Signed-off-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+---
+ arch/x86/xen/setup.c |    5 ++++-
+ 1 files changed, 4 insertions(+), 1 deletions(-)
+
+diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
+index b5a7f92..a3d28a1 100644
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -179,7 +179,10 @@ char * __init xen_memory_setup(void)
+ 	e820.nr_map = 0;
+ 	xen_extra_mem_start = mem_end;
+ 	for (i = 0; i < memmap.nr_entries; i++) {
+-		unsigned long long end = map[i].addr + map[i].size;
++		unsigned long long end;
++		if (map[i].type == E820_RAM)
++			map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;
++		end = map[i].addr + map[i].size;
+ 
+ 		if (map[i].type == E820_RAM && end > mem_end) {
+ 			/* RAM off the end - may be partially included */
+-- 
+1.7.3.4
+
+
+From daed469760dabbf6ae81c9869a263535fb587e63 Mon Sep 17 00:00:00 2001
+From: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+Date: Tue, 25 Jan 2011 12:05:11 +0000
+Subject: [PATCH 4/4] When destroying mappings between _brk_end and _end, do not go over _end
+
+Signed-off-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+---
+ arch/x86/mm/init.c |   15 ++++++++++++++-
+ 1 files changed, 14 insertions(+), 1 deletions(-)
+
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index 947f42a..ebc0221 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -291,10 +291,23 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
+ 		 * located on different 2M pages. cleanup_highmap(), however,
+ 		 * can only consider _end when it runs, so destroy any
+ 		 * mappings beyond _brk_end here.
++		 * Be careful not to go over _end.
+ 		 */
+ 		pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
+ 		pmd = pmd_offset(pud, _brk_end - 1);
+-		while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
++		while (++pmd < pmd_offset(pud, (unsigned long)_end - 1))
++			pmd_clear(pmd);
++		if (((unsigned long)_end) & ~PMD_MASK) {
++			pte_t *pte;
++			unsigned long addr;
++			for (addr = ((unsigned long)_end) & PMD_MASK;
++					addr < ((unsigned long)_end);
++					addr += PAGE_SIZE) {
++				pte = pte_offset_map(pmd, addr);
++				pte_clear(&init_mm, addr, pte);
++				pte_unmap(pte);
++			}
++		} else
+ 			pmd_clear(pmd);
+ 	}
+ #endif
+-- 
+1.7.3.4
+


More information about the scm-commits mailing list