[kernel/rawhide/user/myoung/xendom0: 4/4] Switch to xen/next-2.6.38 which adds net and pci backends add some memory fixes
myoung
myoung at fedoraproject.org
Tue Jan 25 21:28:41 UTC 2011
commit 1f6c23a1b72c669a4d9f5713ed4bea0b1ab72505
Author: Michael Young <m.a.young at durham.ac.uk>
Date: Tue Jan 25 21:25:53 2011 +0000
Switch to xen/next-2.6.38 which adds net and pci backends
add some memory fixes
config-local | 3 +
kernel.spec | 17 +-
xen.next-2.6.38.patch |30821 +++++++++++++++++++++++++++++++++++++++++++++++++
xen.pvhvm.fixes.patch | 197 +
4 files changed, 31033 insertions(+), 5 deletions(-)
---
diff --git a/config-local b/config-local
index 8c32be5..8f91cda 100644
--- a/config-local
+++ b/config-local
@@ -1,2 +1,5 @@
# This file is intentionally left empty in the stock kernel. Its a nicety
# added for those wanting to do custom rebuilds with altered config opts.
+CONFIG_XEN_NETDEV_BACKEND=m
+CONFIG_XEN_PCIDEV_BACKEND=m
+CONFIG_XEN_PCIDEV_BE_DEBUG=n
diff --git a/kernel.spec b/kernel.spec
index b6050f0..4bd59bb 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -737,12 +737,12 @@ Patch12421: fs-call-security_d_instantiate-in-d_obtain_alias.patch
# Xen patches
# git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git branches
-Patch20000: xen.next-2.6.37.patch
+Patch20000: xen.next-2.6.38.patch
#Patch20001: xen.upstream.core.patch
# git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git branches
-Patch20005: xen.pcifront.fixes.patch
+#Patch20005: xen.pcifront.fixes.patch
# git://xenbits.xen.org/people/sstabellini/linux-pvhvm branches
-#Patch20010: xen.pvhvm.fixes.patch
+Patch20010: xen.pvhvm.fixes.patch
%endif
@@ -1357,9 +1357,9 @@ ApplyPatch dmar-disable-when-ricoh-multifunction.patch
ApplyPatch fs-call-security_d_instantiate-in-d_obtain_alias.patch
# Xen patches
-ApplyPatch xen.next-2.6.37.patch
+ApplyPatch xen.next-2.6.38.patch
#ApplyPatch xen.upstream.core.patch
-ApplyPatch xen.pcifront.fixes.patch
+#ApplyPatch xen.pcifront.fixes.patch
#ApplyPatch xen.pvhvm.fixes.patch
# END OF PATCH APPLICATIONS
@@ -1974,6 +1974,13 @@ fi
# ||----w |
# || ||
%changelog
+* Tue Jan 25 2011 Michael Young <m.a.young at durham.ac.uk>
+- Switch from xen/next-2.6.37 to xen/next-2.6.38
+ which adds XEN_NETDEV_BACKEND and XEN_PCIDEV_BACKEND
+- comment out xen.pcifront.fixes.patch (patches are in next-2.6.38)
+- put 2.6.38-rc1-memory-fixes branch in xen.pvhvm.fixes.patch
+ for some memory fixes including a later version of the crash on boot patch
+
* Tue Jan 25 2011 Kyle McMartin <kmcmartin at redhat.com> 2.6.38-0.rc2.git3.1
- Linux 2.6.38-rc2-git3
- perf-gcc460-build-fixes.patch: fix context from [9486aa38]
diff --git a/xen.next-2.6.38.patch b/xen.next-2.6.38.patch
new file mode 100644
index 0000000..c8a28e1
--- /dev/null
+++ b/xen.next-2.6.38.patch
@@ -0,0 +1,30821 @@
+From 1e13f505ecbc011465783283ebfa05a42f7ce18f Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ijc at hellion.org.uk>
+Date: Thu, 3 Dec 2009 22:04:06 +0000
+Subject: [PATCH 001/139] xen: export xen_gsi_from_irq, it is required by modular pciback
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/events.c | 1 +
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 97612f5..a04da4b 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -778,6 +778,7 @@ int xen_gsi_from_irq(unsigned irq)
+ {
+ return gsi_from_irq(irq);
+ }
++EXPORT_SYMBOL_GPL(xen_gsi_from_irq);
+
+ int xen_irq_from_pirq(unsigned pirq)
+ {
+--
+1.7.3.4
+
+
+From f0885b9401a859bc7bed849925a703c03d00119b Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 8 Nov 2010 14:13:35 -0500
+Subject: [PATCH 002/139] xen/pci: Add xen_[find|register|unregister]_device_domain_owner functions.
+
+Xen PCI backend performs ownership (MSI/MSI-X) changes on the behalf of
+the guest. This means we need some mechanism to find, set and unset
+the domain id of the guest.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ arch/x86/include/asm/xen/pci.h | 16 +++++++++
+ arch/x86/pci/xen.c | 73 ++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 89 insertions(+), 0 deletions(-)
+
+diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
+index 2329b3e..8474b4b 100644
+--- a/arch/x86/include/asm/xen/pci.h
++++ b/arch/x86/include/asm/xen/pci.h
+@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void)
+ #endif
+ #if defined(CONFIG_XEN_DOM0)
+ void __init xen_setup_pirqs(void);
++int xen_find_device_domain_owner(struct pci_dev *dev);
++int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
++int xen_unregister_device_domain_owner(struct pci_dev *dev);
+ #else
+ static inline void __init xen_setup_pirqs(void)
+ {
+ }
++static inline int xen_find_device_domain_owner(struct pci_dev *dev)
++{
++ return -1;
++}
++static inline int xen_register_device_domain_owner(struct pci_dev *dev,
++ uint16_t domain)
++{
++ return -1;
++}
++static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
++{
++ return -1;
++}
+ #endif
+
+ #if defined(CONFIG_PCI_MSI)
+diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
+index 117f5b8..6d2a986 100644
+--- a/arch/x86/pci/xen.c
++++ b/arch/x86/pci/xen.c
+@@ -412,3 +412,76 @@ void __init xen_setup_pirqs(void)
+ }
+ }
+ #endif
++
++struct xen_device_domain_owner {
++ domid_t domain;
++ struct pci_dev *dev;
++ struct list_head list;
++};
++
++static DEFINE_SPINLOCK(dev_domain_list_spinlock);
++static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
++
++static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
++{
++ struct xen_device_domain_owner *owner;
++
++ list_for_each_entry(owner, &dev_domain_list, list) {
++ if (owner->dev == dev)
++ return owner;
++ }
++ return NULL;
++}
++
++int xen_find_device_domain_owner(struct pci_dev *dev)
++{
++ struct xen_device_domain_owner *owner;
++ int domain = -ENODEV;
++
++ spin_lock(&dev_domain_list_spinlock);
++ owner = find_device(dev);
++ if (owner)
++ domain = owner->domain;
++ spin_unlock(&dev_domain_list_spinlock);
++ return domain;
++}
++EXPORT_SYMBOL(xen_find_device_domain_owner);
++
++int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
++{
++ struct xen_device_domain_owner *owner;
++
++ owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
++ if (!owner)
++ return -ENODEV;
++
++ spin_lock(&dev_domain_list_spinlock);
++ if (find_device(dev)) {
++ spin_unlock(&dev_domain_list_spinlock);
++ kfree(owner);
++ return -EEXIST;
++ }
++ owner->domain = domain;
++ owner->dev = dev;
++ list_add_tail(&owner->list, &dev_domain_list);
++ spin_unlock(&dev_domain_list_spinlock);
++ return 0;
++}
++EXPORT_SYMBOL(xen_register_device_domain_owner);
++
++int xen_unregister_device_domain_owner(struct pci_dev *dev)
++{
++ struct xen_device_domain_owner *owner;
++
++ spin_lock(&dev_domain_list_spinlock);
++ owner = find_device(dev);
++ if (!owner) {
++ spin_unlock(&dev_domain_list_spinlock);
++ return -ENODEV;
++ }
++ list_del(&owner->list);
++ spin_unlock(&dev_domain_list_spinlock);
++ kfree(owner);
++ return 0;
++}
++EXPORT_SYMBOL(xen_unregister_device_domain_owner);
+--
+1.7.3.4
+
+
+From da24916fdf04d7b4a32c5b9d2c09e47775496e1d Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 8 Nov 2010 14:23:17 -0500
+Subject: [PATCH 003/139] xen: Check if the PCI device is owned by a domain different than DOMID_SELF.
+
+We check if there is a domain owner for the PCI device. In case of failure
+(meaning no domain has registered for this device) we make
+DOMID_SELF the owner.
+
+[v2: deal with rebasing on v2.6.37-1]
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Acked-by: Xiantao Zhang <xiantao.zhang at intel.com>
+---
+ drivers/xen/events.c | 16 +++++++++++++---
+ 1 files changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index a04da4b..96c93e7 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -40,6 +40,7 @@
+ #include <asm/xen/pci.h>
+ #include <asm/xen/hypercall.h>
+ #include <asm/xen/hypervisor.h>
++#include <asm/xen/pci.h>
+
+ #include <xen/xen.h>
+ #include <xen/hvm.h>
+@@ -97,6 +98,7 @@ struct irq_info
+ unsigned short gsi;
+ unsigned char vector;
+ unsigned char flags;
++ uint16_t domid;
+ } pirq;
+ } u;
+ };
+@@ -158,7 +160,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq,
+ {
+ return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
+ .cpu = 0,
+- .u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } };
++ .u.pirq = { .pirq = pirq, .gsi = gsi,
++ .vector = vector, .domid = DOMID_SELF } };
+ }
+
+ /*
+@@ -688,11 +691,16 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+ int irq = -1;
+ struct physdev_map_pirq map_irq;
+ int rc;
++ domid_t domid;
+ int pos;
+ u32 table_offset, bir;
+
++ domid = rc = xen_find_device_domain_owner(dev);
++ if (rc < 0)
++ domid = DOMID_SELF;
++
+ memset(&map_irq, 0, sizeof(map_irq));
+- map_irq.domid = DOMID_SELF;
++ map_irq.domid = domid;
+ map_irq.type = MAP_PIRQ_TYPE_MSI;
+ map_irq.index = -1;
+ map_irq.pirq = -1;
+@@ -727,6 +735,8 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+ goto out;
+ }
+ irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
++ if (domid)
++ irq_info[irq].u.pirq.domid = domid;
+
+ set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
+ handle_level_irq,
+@@ -753,7 +763,7 @@ int xen_destroy_irq(int irq)
+
+ if (xen_initial_domain()) {
+ unmap_irq.pirq = info->u.pirq.pirq;
+- unmap_irq.domid = DOMID_SELF;
++ unmap_irq.domid = info->u.pirq.domid;
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
+ if (rc) {
+ printk(KERN_WARNING "unmap irq failed %d\n", rc);
+--
+1.7.3.4
+
+
+From 30fecb8166bdd163bdaab795b573cf988f60fbbe Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 8 Nov 2010 14:26:36 -0500
+Subject: [PATCH 004/139] xen: Add support to check if IRQ line is shared with other domains.
+
+We do this via the PHYSDEVOP_irq_status_query support hypervisor call.
+We will get a positive value if another domain has binded its
+PIRQ to the specified IRQ line.
+
+[v2: Deal with v2.6.37-rc1 rebase fallout]
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/events.c | 13 +++++++++++++
+ include/xen/events.h | 3 +++
+ 2 files changed, 16 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 96c93e7..690dfad 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -1398,6 +1398,19 @@ void xen_poll_irq(int irq)
+ xen_poll_irq_timeout(irq, 0 /* no timeout */);
+ }
+
++/* Check whether the IRQ line is shared with other guests. */
++int xen_ignore_irq(int irq)
++{
++ struct irq_info *info = info_for_irq(irq);
++ struct physdev_irq_status_query irq_status = { .irq =
++ info->u.pirq.gsi };
++
++ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
++ return 0;
++ return !(irq_status.flags & XENIRQSTAT_shared);
++}
++EXPORT_SYMBOL_GPL(xen_ignore_irq);
++
+ void xen_irq_resume(void)
+ {
+ unsigned int cpu, irq, evtchn;
+diff --git a/include/xen/events.h b/include/xen/events.h
+index 646dd17..553c664 100644
+--- a/include/xen/events.h
++++ b/include/xen/events.h
+@@ -89,4 +89,7 @@ int xen_vector_from_irq(unsigned pirq);
+ /* Return irq from pirq */
+ int xen_irq_from_pirq(unsigned pirq);
+
++/* Determine whether to ignore this IRQ if passed to a guest. */
++int xen_ignore_irq(int irq);
++
+ #endif /* _XEN_EVENTS_H */
+--
+1.7.3.4
+
+
+From 909e45104de4414897cefce2f6bbed07fc4de4b3 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Mon, 9 Feb 2009 12:05:50 -0800
+Subject: [PATCH 005/139] xen: implement bind_interdomain_evtchn_to_irqhandler for backend drivers
+
+Impact: new Xen-internal API
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/events.c | 38 ++++++++++++++++++++++++++++++++++++++
+ include/xen/events.h | 6 ++++++
+ 2 files changed, 44 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 690dfad..95eea13 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -849,6 +849,21 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ return irq;
+ }
+
++static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
++ unsigned int remote_port)
++{
++ struct evtchn_bind_interdomain bind_interdomain;
++ int err;
++
++ bind_interdomain.remote_dom = remote_domain;
++ bind_interdomain.remote_port = remote_port;
++
++ err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
++ &bind_interdomain);
++
++ return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
++}
++
+
+ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+ {
+@@ -944,6 +959,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
+ }
+ EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
+
++int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
++ unsigned int remote_port,
++ irq_handler_t handler,
++ unsigned long irqflags,
++ const char *devname,
++ void *dev_id)
++{
++ int irq, retval;
++
++ irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
++ if (irq < 0)
++ return irq;
++
++ retval = request_irq(irq, handler, irqflags, devname, dev_id);
++ if (retval != 0) {
++ unbind_from_irq(irq);
++ return retval;
++ }
++
++ return irq;
++}
++EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
++
+ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
+ irq_handler_t handler,
+ unsigned long irqflags, const char *devname, void *dev_id)
+diff --git a/include/xen/events.h b/include/xen/events.h
+index 553c664..2fe1644 100644
+--- a/include/xen/events.h
++++ b/include/xen/events.h
+@@ -23,6 +23,12 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id);
++int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
++ unsigned int remote_port,
++ irq_handler_t handler,
++ unsigned long irqflags,
++ const char *devname,
++ void *dev_id);
+
+ /*
+ * Common unbind function for all event sources. Takes IRQ to unbind from.
+--
+1.7.3.4
+
+
+From b4f664c8de09ab8537e1cd194df29056f803062e Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 8 Nov 2010 14:46:33 -0500
+Subject: [PATCH 006/139] pci/xen: Make xen_[find|register|unregister]_domain_owner be _GPL
+
+EXPORT_SYMBOL -> EXPORT_SYMBOL_GPL.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ arch/x86/pci/xen.c | 6 +++---
+ 1 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
+index 6d2a986..0fa23c8 100644
+--- a/arch/x86/pci/xen.c
++++ b/arch/x86/pci/xen.c
+@@ -445,7 +445,7 @@ int xen_find_device_domain_owner(struct pci_dev *dev)
+ spin_unlock(&dev_domain_list_spinlock);
+ return domain;
+ }
+-EXPORT_SYMBOL(xen_find_device_domain_owner);
++EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
+
+ int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
+ {
+@@ -467,7 +467,7 @@ int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
+ spin_unlock(&dev_domain_list_spinlock);
+ return 0;
+ }
+-EXPORT_SYMBOL(xen_register_device_domain_owner);
++EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
+
+ int xen_unregister_device_domain_owner(struct pci_dev *dev)
+ {
+@@ -484,4 +484,4 @@ int xen_unregister_device_domain_owner(struct pci_dev *dev)
+ kfree(owner);
+ return 0;
+ }
+-EXPORT_SYMBOL(xen_unregister_device_domain_owner);
++EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
+--
+1.7.3.4
+
+
+From 443b2aafbdb509f218fcb8f4665f063e3a5e1a92 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:20 -0400
+Subject: [PATCH 007/139] xen-pciback: Initial copy from linux-2.6.18.hg off pciback driver.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/Makefile | 17 +
+ drivers/xen/pciback/conf_space.c | 435 ++++++++
+ drivers/xen/pciback/conf_space.h | 126 +++
+ drivers/xen/pciback/conf_space_capability.c | 69 ++
+ drivers/xen/pciback/conf_space_capability.h | 23 +
+ drivers/xen/pciback/conf_space_capability_msi.c | 79 ++
+ drivers/xen/pciback/conf_space_capability_pm.c | 126 +++
+ drivers/xen/pciback/conf_space_capability_vpd.c | 40 +
+ drivers/xen/pciback/conf_space_header.c | 317 ++++++
+ drivers/xen/pciback/conf_space_quirks.c | 138 +++
+ drivers/xen/pciback/conf_space_quirks.h | 35 +
+ drivers/xen/pciback/controller.c | 443 ++++++++
+ drivers/xen/pciback/passthrough.c | 176 +++
+ drivers/xen/pciback/pci_stub.c | 1316 +++++++++++++++++++++++
+ drivers/xen/pciback/pciback.h | 126 +++
+ drivers/xen/pciback/pciback_ops.c | 134 +++
+ drivers/xen/pciback/slot.c | 187 ++++
+ drivers/xen/pciback/vpci.c | 242 +++++
+ drivers/xen/pciback/xenbus.c | 710 ++++++++++++
+ 19 files changed, 4739 insertions(+), 0 deletions(-)
+ create mode 100644 drivers/xen/pciback/Makefile
+ create mode 100644 drivers/xen/pciback/conf_space.c
+ create mode 100644 drivers/xen/pciback/conf_space.h
+ create mode 100644 drivers/xen/pciback/conf_space_capability.c
+ create mode 100644 drivers/xen/pciback/conf_space_capability.h
+ create mode 100644 drivers/xen/pciback/conf_space_capability_msi.c
+ create mode 100644 drivers/xen/pciback/conf_space_capability_pm.c
+ create mode 100644 drivers/xen/pciback/conf_space_capability_vpd.c
+ create mode 100644 drivers/xen/pciback/conf_space_header.c
+ create mode 100644 drivers/xen/pciback/conf_space_quirks.c
+ create mode 100644 drivers/xen/pciback/conf_space_quirks.h
+ create mode 100644 drivers/xen/pciback/controller.c
+ create mode 100644 drivers/xen/pciback/passthrough.c
+ create mode 100644 drivers/xen/pciback/pci_stub.c
+ create mode 100644 drivers/xen/pciback/pciback.h
+ create mode 100644 drivers/xen/pciback/pciback_ops.c
+ create mode 100644 drivers/xen/pciback/slot.c
+ create mode 100644 drivers/xen/pciback/vpci.c
+ create mode 100644 drivers/xen/pciback/xenbus.c
+
+diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile
+new file mode 100644
+index 0000000..106dae7
+--- /dev/null
++++ b/drivers/xen/pciback/Makefile
+@@ -0,0 +1,17 @@
++obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
++
++pciback-y := pci_stub.o pciback_ops.o xenbus.o
++pciback-y += conf_space.o conf_space_header.o \
++ conf_space_capability.o \
++ conf_space_capability_vpd.o \
++ conf_space_capability_pm.o \
++ conf_space_quirks.o
++pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
++pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
++
++ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
++EXTRA_CFLAGS += -DDEBUG
++endif
+diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
+new file mode 100644
+index 0000000..0c76db1
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space.c
+@@ -0,0 +1,435 @@
++/*
++ * PCI Backend - Functions for creating a virtual configuration space for
++ * exported PCI Devices.
++ * It's dangerous to allow PCI Driver Domains to change their
++ * device's resources (memory, i/o ports, interrupts). We need to
++ * restrict changes to certain PCI Configuration registers:
++ * BARs, INTERRUPT_PIN, most registers in the header...
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_quirks.h"
++
++static int permissive;
++module_param(permissive, bool, 0644);
++
++#define DEFINE_PCI_CONFIG(op,size,type) \
++int pciback_##op##_config_##size \
++(struct pci_dev *dev, int offset, type value, void *data) \
++{ \
++ return pci_##op##_config_##size (dev, offset, value); \
++}
++
++DEFINE_PCI_CONFIG(read, byte, u8 *)
++DEFINE_PCI_CONFIG(read, word, u16 *)
++DEFINE_PCI_CONFIG(read, dword, u32 *)
++
++DEFINE_PCI_CONFIG(write, byte, u8)
++DEFINE_PCI_CONFIG(write, word, u16)
++DEFINE_PCI_CONFIG(write, dword, u32)
++
++static int conf_space_read(struct pci_dev *dev,
++ const struct config_field_entry *entry,
++ int offset, u32 *value)
++{
++ int ret = 0;
++ const struct config_field *field = entry->field;
++
++ *value = 0;
++
++ switch (field->size) {
++ case 1:
++ if (field->u.b.read)
++ ret = field->u.b.read(dev, offset, (u8 *) value,
++ entry->data);
++ break;
++ case 2:
++ if (field->u.w.read)
++ ret = field->u.w.read(dev, offset, (u16 *) value,
++ entry->data);
++ break;
++ case 4:
++ if (field->u.dw.read)
++ ret = field->u.dw.read(dev, offset, value, entry->data);
++ break;
++ }
++ return ret;
++}
++
++static int conf_space_write(struct pci_dev *dev,
++ const struct config_field_entry *entry,
++ int offset, u32 value)
++{
++ int ret = 0;
++ const struct config_field *field = entry->field;
++
++ switch (field->size) {
++ case 1:
++ if (field->u.b.write)
++ ret = field->u.b.write(dev, offset, (u8) value,
++ entry->data);
++ break;
++ case 2:
++ if (field->u.w.write)
++ ret = field->u.w.write(dev, offset, (u16) value,
++ entry->data);
++ break;
++ case 4:
++ if (field->u.dw.write)
++ ret = field->u.dw.write(dev, offset, value,
++ entry->data);
++ break;
++ }
++ return ret;
++}
++
++static inline u32 get_mask(int size)
++{
++ if (size == 1)
++ return 0xff;
++ else if (size == 2)
++ return 0xffff;
++ else
++ return 0xffffffff;
++}
++
++static inline int valid_request(int offset, int size)
++{
++ /* Validate request (no un-aligned requests) */
++ if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
++ return 1;
++ return 0;
++}
++
++static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
++ int offset)
++{
++ if (offset >= 0) {
++ new_val_mask <<= (offset * 8);
++ new_val <<= (offset * 8);
++ } else {
++ new_val_mask >>= (offset * -8);
++ new_val >>= (offset * -8);
++ }
++ val = (val & ~new_val_mask) | (new_val & new_val_mask);
++
++ return val;
++}
++
++static int pcibios_err_to_errno(int err)
++{
++ switch (err) {
++ case PCIBIOS_SUCCESSFUL:
++ return XEN_PCI_ERR_success;
++ case PCIBIOS_DEVICE_NOT_FOUND:
++ return XEN_PCI_ERR_dev_not_found;
++ case PCIBIOS_BAD_REGISTER_NUMBER:
++ return XEN_PCI_ERR_invalid_offset;
++ case PCIBIOS_FUNC_NOT_SUPPORTED:
++ return XEN_PCI_ERR_not_implemented;
++ case PCIBIOS_SET_FAILED:
++ return XEN_PCI_ERR_access_denied;
++ }
++ return err;
++}
++
++int pciback_config_read(struct pci_dev *dev, int offset, int size,
++ u32 * ret_val)
++{
++ int err = 0;
++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++ const struct config_field_entry *cfg_entry;
++ const struct config_field *field;
++ int req_start, req_end, field_start, field_end;
++ /* if read fails for any reason, return 0 (as if device didn't respond) */
++ u32 value = 0, tmp_val;
++
++ if (unlikely(verbose_request))
++ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n",
++ pci_name(dev), size, offset);
++
++ if (!valid_request(offset, size)) {
++ err = XEN_PCI_ERR_invalid_offset;
++ goto out;
++ }
++
++ /* Get the real value first, then modify as appropriate */
++ switch (size) {
++ case 1:
++ err = pci_read_config_byte(dev, offset, (u8 *) & value);
++ break;
++ case 2:
++ err = pci_read_config_word(dev, offset, (u16 *) & value);
++ break;
++ case 4:
++ err = pci_read_config_dword(dev, offset, &value);
++ break;
++ }
++
++ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++ field = cfg_entry->field;
++
++ req_start = offset;
++ req_end = offset + size;
++ field_start = OFFSET(cfg_entry);
++ field_end = OFFSET(cfg_entry) + field->size;
++
++ if ((req_start >= field_start && req_start < field_end)
++ || (req_end > field_start && req_end <= field_end)) {
++ err = conf_space_read(dev, cfg_entry, field_start,
++ &tmp_val);
++ if (err)
++ goto out;
++
++ value = merge_value(value, tmp_val,
++ get_mask(field->size),
++ field_start - req_start);
++ }
++ }
++
++ out:
++ if (unlikely(verbose_request))
++ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
++ pci_name(dev), size, offset, value);
++
++ *ret_val = value;
++ return pcibios_err_to_errno(err);
++}
++
++int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
++{
++ int err = 0, handled = 0;
++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++ const struct config_field_entry *cfg_entry;
++ const struct config_field *field;
++ u32 tmp_val;
++ int req_start, req_end, field_start, field_end;
++
++ if (unlikely(verbose_request))
++ printk(KERN_DEBUG
++ "pciback: %s: write request %d bytes at 0x%x = %x\n",
++ pci_name(dev), size, offset, value);
++
++ if (!valid_request(offset, size))
++ return XEN_PCI_ERR_invalid_offset;
++
++ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++ field = cfg_entry->field;
++
++ req_start = offset;
++ req_end = offset + size;
++ field_start = OFFSET(cfg_entry);
++ field_end = OFFSET(cfg_entry) + field->size;
++
++ if ((req_start >= field_start && req_start < field_end)
++ || (req_end > field_start && req_end <= field_end)) {
++ tmp_val = 0;
++
++ err = pciback_config_read(dev, field_start,
++ field->size, &tmp_val);
++ if (err)
++ break;
++
++ tmp_val = merge_value(tmp_val, value, get_mask(size),
++ req_start - field_start);
++
++ err = conf_space_write(dev, cfg_entry, field_start,
++ tmp_val);
++
++ /* handled is set true here, but not every byte
++ * may have been written! Properly detecting if
++ * every byte is handled is unnecessary as the
++ * flag is used to detect devices that need
++ * special helpers to work correctly.
++ */
++ handled = 1;
++ }
++ }
++
++ if (!handled && !err) {
++ /* By default, anything not specificially handled above is
++ * read-only. The permissive flag changes this behavior so
++ * that anything not specifically handled above is writable.
++ * This means that some fields may still be read-only because
++ * they have entries in the config_field list that intercept
++ * the write and do nothing. */
++ if (dev_data->permissive || permissive) {
++ switch (size) {
++ case 1:
++ err = pci_write_config_byte(dev, offset,
++ (u8) value);
++ break;
++ case 2:
++ err = pci_write_config_word(dev, offset,
++ (u16) value);
++ break;
++ case 4:
++ err = pci_write_config_dword(dev, offset,
++ (u32) value);
++ break;
++ }
++ } else if (!dev_data->warned_on_write) {
++ dev_data->warned_on_write = 1;
++ dev_warn(&dev->dev, "Driver tried to write to a "
++ "read-only configuration space field at offset "
++ "0x%x, size %d. This may be harmless, but if "
++ "you have problems with your device:\n"
++ "1) see permissive attribute in sysfs\n"
++ "2) report problems to the xen-devel "
++ "mailing list along with details of your "
++ "device obtained from lspci.\n", offset, size);
++ }
++ }
++
++ return pcibios_err_to_errno(err);
++}
++
++void pciback_config_free_dyn_fields(struct pci_dev *dev)
++{
++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++ struct config_field_entry *cfg_entry, *t;
++ const struct config_field *field;
++
++ dev_dbg(&dev->dev,
++ "free-ing dynamically allocated virtual configuration space fields\n");
++ if (!dev_data)
++ return;
++
++ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
++ field = cfg_entry->field;
++
++ if (field->clean) {
++ field->clean((struct config_field *)field);
++
++ if (cfg_entry->data)
++ kfree(cfg_entry->data);
++
++ list_del(&cfg_entry->list);
++ kfree(cfg_entry);
++ }
++
++ }
++}
++
++void pciback_config_reset_dev(struct pci_dev *dev)
++{
++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++ const struct config_field_entry *cfg_entry;
++ const struct config_field *field;
++
++ dev_dbg(&dev->dev, "resetting virtual configuration space\n");
++ if (!dev_data)
++ return;
++
++ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++ field = cfg_entry->field;
++
++ if (field->reset)
++ field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
++ }
++}
++
++void pciback_config_free_dev(struct pci_dev *dev)
++{
++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++ struct config_field_entry *cfg_entry, *t;
++ const struct config_field *field;
++
++ dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
++ if (!dev_data)
++ return;
++
++ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
++ list_del(&cfg_entry->list);
++
++ field = cfg_entry->field;
++
++ if (field->release)
++ field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
++
++ kfree(cfg_entry);
++ }
++}
++
++int pciback_config_add_field_offset(struct pci_dev *dev,
++ const struct config_field *field,
++ unsigned int base_offset)
++{
++ int err = 0;
++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++ struct config_field_entry *cfg_entry;
++ void *tmp;
++
++ cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
++ if (!cfg_entry) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ cfg_entry->data = NULL;
++ cfg_entry->field = field;
++ cfg_entry->base_offset = base_offset;
++
++ /* silently ignore duplicate fields */
++ err = pciback_field_is_dup(dev,OFFSET(cfg_entry));
++ if (err)
++ goto out;
++
++ if (field->init) {
++ tmp = field->init(dev, OFFSET(cfg_entry));
++
++ if (IS_ERR(tmp)) {
++ err = PTR_ERR(tmp);
++ goto out;
++ }
++
++ cfg_entry->data = tmp;
++ }
++
++ dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
++ OFFSET(cfg_entry));
++ list_add_tail(&cfg_entry->list, &dev_data->config_fields);
++
++ out:
++ if (err)
++ kfree(cfg_entry);
++
++ return err;
++}
++
++/* This sets up the device's virtual configuration space to keep track of
++ * certain registers (like the base address registers (BARs) so that we can
++ * keep the client from manipulating them directly.
++ */
++int pciback_config_init_dev(struct pci_dev *dev)
++{
++ int err = 0;
++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++
++ dev_dbg(&dev->dev, "initializing virtual configuration space\n");
++
++ INIT_LIST_HEAD(&dev_data->config_fields);
++
++ err = pciback_config_header_add_fields(dev);
++ if (err)
++ goto out;
++
++ err = pciback_config_capability_add_fields(dev);
++ if (err)
++ goto out;
++
++ err = pciback_config_quirks_init(dev);
++
++ out:
++ return err;
++}
++
++int pciback_config_init(void)
++{
++ return pciback_config_capability_init();
++}
+diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h
+new file mode 100644
+index 0000000..fe746ef
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space.h
+@@ -0,0 +1,126 @@
++/*
++ * PCI Backend - Common data structures for overriding the configuration space
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#ifndef __XEN_PCIBACK_CONF_SPACE_H__
++#define __XEN_PCIBACK_CONF_SPACE_H__
++
++#include <linux/list.h>
++#include <linux/err.h>
++
++/* conf_field_init can return an errno in a ptr with ERR_PTR() */
++typedef void *(*conf_field_init) (struct pci_dev * dev, int offset);
++typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data);
++typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data);
++
++typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value,
++ void *data);
++typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value,
++ void *data);
++typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value,
++ void *data);
++typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value,
++ void *data);
++typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value,
++ void *data);
++typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value,
++ void *data);
++
++/* These are the fields within the configuration space which we
++ * are interested in intercepting reads/writes to and changing their
++ * values.
++ */
++struct config_field {
++ unsigned int offset;
++ unsigned int size;
++ unsigned int mask;
++ conf_field_init init;
++ conf_field_reset reset;
++ conf_field_free release;
++ void (*clean) (struct config_field * field);
++ union {
++ struct {
++ conf_dword_write write;
++ conf_dword_read read;
++ } dw;
++ struct {
++ conf_word_write write;
++ conf_word_read read;
++ } w;
++ struct {
++ conf_byte_write write;
++ conf_byte_read read;
++ } b;
++ } u;
++ struct list_head list;
++};
++
++struct config_field_entry {
++ struct list_head list;
++ const struct config_field *field;
++ unsigned int base_offset;
++ void *data;
++};
++
++#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
++
++/* Add fields to a device - the add_fields macro expects to get a pointer to
++ * the first entry in an array (of which the ending is marked by size==0)
++ */
++int pciback_config_add_field_offset(struct pci_dev *dev,
++ const struct config_field *field,
++ unsigned int offset);
++
++static inline int pciback_config_add_field(struct pci_dev *dev,
++ const struct config_field *field)
++{
++ return pciback_config_add_field_offset(dev, field, 0);
++}
++
++static inline int pciback_config_add_fields(struct pci_dev *dev,
++ const struct config_field *field)
++{
++ int i, err = 0;
++ for (i = 0; field[i].size != 0; i++) {
++ err = pciback_config_add_field(dev, &field[i]);
++ if (err)
++ break;
++ }
++ return err;
++}
++
++static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
++ const struct config_field *field,
++ unsigned int offset)
++{
++ int i, err = 0;
++ for (i = 0; field[i].size != 0; i++) {
++ err = pciback_config_add_field_offset(dev, &field[i], offset);
++ if (err)
++ break;
++ }
++ return err;
++}
++
++/* Read/Write the real configuration space */
++int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value,
++ void *data);
++int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value,
++ void *data);
++int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value,
++ void *data);
++int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
++ void *data);
++int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value,
++ void *data);
++int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value,
++ void *data);
++
++int pciback_config_capability_init(void);
++
++int pciback_config_header_add_fields(struct pci_dev *dev);
++int pciback_config_capability_add_fields(struct pci_dev *dev);
++
++#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
+diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c
+new file mode 100644
+index 0000000..50efca4
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability.c
+@@ -0,0 +1,69 @@
++/*
++ * PCI Backend - Handles the virtual fields found on the capability lists
++ * in the configuration space.
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_capability.h"
++
++static LIST_HEAD(capabilities);
++
++static const struct config_field caplist_header[] = {
++ {
++ .offset = PCI_CAP_LIST_ID,
++ .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
++ .u.w.read = pciback_read_config_word,
++ .u.w.write = NULL,
++ },
++ {}
++};
++
++static inline void register_capability(struct pciback_config_capability *cap)
++{
++ list_add_tail(&cap->cap_list, &capabilities);
++}
++
++int pciback_config_capability_add_fields(struct pci_dev *dev)
++{
++ int err = 0;
++ struct pciback_config_capability *cap;
++ int cap_offset;
++
++ list_for_each_entry(cap, &capabilities, cap_list) {
++ cap_offset = pci_find_capability(dev, cap->capability);
++ if (cap_offset) {
++ dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
++ cap->capability, cap_offset);
++
++ err = pciback_config_add_fields_offset(dev,
++ caplist_header,
++ cap_offset);
++ if (err)
++ goto out;
++ err = pciback_config_add_fields_offset(dev,
++ cap->fields,
++ cap_offset);
++ if (err)
++ goto out;
++ }
++ }
++
++ out:
++ return err;
++}
++
++extern struct pciback_config_capability pciback_config_capability_vpd;
++extern struct pciback_config_capability pciback_config_capability_pm;
++
++int pciback_config_capability_init(void)
++{
++ register_capability(&pciback_config_capability_vpd);
++ register_capability(&pciback_config_capability_pm);
++
++ return 0;
++}
+diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h
+new file mode 100644
+index 0000000..823392e
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability.h
+@@ -0,0 +1,23 @@
++/*
++ * PCI Backend - Data structures for special overlays for structures on
++ * the capability list.
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#ifndef __PCIBACK_CONFIG_CAPABILITY_H__
++#define __PCIBACK_CONFIG_CAPABILITY_H__
++
++#include <linux/pci.h>
++#include <linux/list.h>
++
++struct pciback_config_capability {
++ struct list_head cap_list;
++
++ int capability;
++
++ /* If the device has the capability found above, add these fields */
++ const struct config_field *fields;
++};
++
++#endif
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+new file mode 100644
+index 0000000..762e396
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -0,0 +1,79 @@
++/*
++ * PCI Backend -- Configuration overlay for MSI capability
++ */
++#include <linux/pci.h>
++#include <linux/slab.h>
++#include "conf_space.h"
++#include "conf_space_capability.h"
++#include <xen/interface/io/pciif.h>
++#include "pciback.h"
++
++int pciback_enable_msi(struct pciback_device *pdev,
++ struct pci_dev *dev, struct xen_pci_op *op)
++{
++ int otherend = pdev->xdev->otherend_id;
++ int status;
++
++ status = pci_enable_msi(dev);
++
++ if (status) {
++ printk("error enable msi for guest %x status %x\n", otherend, status);
++ op->value = 0;
++ return XEN_PCI_ERR_op_failed;
++ }
++
++ op->value = dev->irq;
++ return 0;
++}
++
++int pciback_disable_msi(struct pciback_device *pdev,
++ struct pci_dev *dev, struct xen_pci_op *op)
++{
++ pci_disable_msi(dev);
++
++ op->value = dev->irq;
++ return 0;
++}
++
++int pciback_enable_msix(struct pciback_device *pdev,
++ struct pci_dev *dev, struct xen_pci_op *op)
++{
++ int i, result;
++ struct msix_entry *entries;
++
++ if (op->value > SH_INFO_MAX_VEC)
++ return -EINVAL;
++
++ entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
++ if (entries == NULL)
++ return -ENOMEM;
++
++ for (i = 0; i < op->value; i++) {
++ entries[i].entry = op->msix_entries[i].entry;
++ entries[i].vector = op->msix_entries[i].vector;
++ }
++
++ result = pci_enable_msix(dev, entries, op->value);
++
++ for (i = 0; i < op->value; i++) {
++ op->msix_entries[i].entry = entries[i].entry;
++ op->msix_entries[i].vector = entries[i].vector;
++ }
++
++ kfree(entries);
++
++ op->value = result;
++
++ return result;
++}
++
++int pciback_disable_msix(struct pciback_device *pdev,
++ struct pci_dev *dev, struct xen_pci_op *op)
++{
++
++ pci_disable_msix(dev);
++
++ op->value = dev->irq;
++ return 0;
++}
++
+diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
+new file mode 100644
+index 0000000..e2f99c7
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability_pm.c
+@@ -0,0 +1,126 @@
++/*
++ * PCI Backend - Configuration space overlay for power management
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/pci.h>
++#include "conf_space.h"
++#include "conf_space_capability.h"
++
++static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
++ void *data)
++{
++ int err;
++ u16 real_value;
++
++ err = pci_read_config_word(dev, offset, &real_value);
++ if (err)
++ goto out;
++
++ *value = real_value & ~PCI_PM_CAP_PME_MASK;
++
++ out:
++ return err;
++}
++
++/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
++ * Can't allow driver domain to enable PMEs - they're shared */
++#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
++
++static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
++ void *data)
++{
++ int err;
++ u16 old_value;
++ pci_power_t new_state, old_state;
++
++ err = pci_read_config_word(dev, offset, &old_value);
++ if (err)
++ goto out;
++
++ old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
++ new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
++
++ new_value &= PM_OK_BITS;
++ if ((old_value & PM_OK_BITS) != new_value) {
++ new_value = (old_value & ~PM_OK_BITS) | new_value;
++ err = pci_write_config_word(dev, offset, new_value);
++ if (err)
++ goto out;
++ }
++
++ /* Let pci core handle the power management change */
++ dev_dbg(&dev->dev, "set power state to %x\n", new_state);
++ err = pci_set_power_state(dev, new_state);
++ if (err) {
++ err = PCIBIOS_SET_FAILED;
++ goto out;
++ }
++
++ /*
++ * Device may lose PCI config info on D3->D0 transition. This
++ * is a problem for some guests which will not reset BARs. Even
++ * those that have a go will be foiled by our BAR-write handler
++ * which will discard the write! Since Linux won't re-init
++ * the config space automatically in all cases, we do it here.
++ * Future: Should we re-initialise all first 64 bytes of config space?
++ */
++ if (new_state == PCI_D0 &&
++ (old_state == PCI_D3hot || old_state == PCI_D3cold) &&
++ !(old_value & PCI_PM_CTRL_NO_SOFT_RESET))
++ pci_restore_bars(dev);
++
++ out:
++ return err;
++}
++
++/* Ensure PMEs are disabled */
++static void *pm_ctrl_init(struct pci_dev *dev, int offset)
++{
++ int err;
++ u16 value;
++
++ err = pci_read_config_word(dev, offset, &value);
++ if (err)
++ goto out;
++
++ if (value & PCI_PM_CTRL_PME_ENABLE) {
++ value &= ~PCI_PM_CTRL_PME_ENABLE;
++ err = pci_write_config_word(dev, offset, value);
++ }
++
++ out:
++ return ERR_PTR(err);
++}
++
++static const struct config_field caplist_pm[] = {
++ {
++ .offset = PCI_PM_PMC,
++ .size = 2,
++ .u.w.read = pm_caps_read,
++ },
++ {
++ .offset = PCI_PM_CTRL,
++ .size = 2,
++ .init = pm_ctrl_init,
++ .u.w.read = pciback_read_config_word,
++ .u.w.write = pm_ctrl_write,
++ },
++ {
++ .offset = PCI_PM_PPB_EXTENSIONS,
++ .size = 1,
++ .u.b.read = pciback_read_config_byte,
++ },
++ {
++ .offset = PCI_PM_DATA_REGISTER,
++ .size = 1,
++ .u.b.read = pciback_read_config_byte,
++ },
++ {}
++};
++
++struct pciback_config_capability pciback_config_capability_pm = {
++ .capability = PCI_CAP_ID_PM,
++ .fields = caplist_pm,
++};
+diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c
+new file mode 100644
+index 0000000..920cb4a
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability_vpd.c
+@@ -0,0 +1,40 @@
++/*
++ * PCI Backend - Configuration space overlay for Vital Product Data
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/pci.h>
++#include "conf_space.h"
++#include "conf_space_capability.h"
++
++static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
++ void *data)
++{
++ /* Disallow writes to the vital product data */
++ if (value & PCI_VPD_ADDR_F)
++ return PCIBIOS_SET_FAILED;
++ else
++ return pci_write_config_word(dev, offset, value);
++}
++
++static const struct config_field caplist_vpd[] = {
++ {
++ .offset = PCI_VPD_ADDR,
++ .size = 2,
++ .u.w.read = pciback_read_config_word,
++ .u.w.write = vpd_address_write,
++ },
++ {
++ .offset = PCI_VPD_DATA,
++ .size = 4,
++ .u.dw.read = pciback_read_config_dword,
++ .u.dw.write = NULL,
++ },
++ {}
++};
++
++struct pciback_config_capability pciback_config_capability_vpd = {
++ .capability = PCI_CAP_ID_VPD,
++ .fields = caplist_vpd,
++};
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
+new file mode 100644
+index 0000000..f794e12
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -0,0 +1,317 @@
++/*
++ * PCI Backend - Handles the virtual fields in the configuration space headers.
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++
++struct pci_bar_info {
++ u32 val;
++ u32 len_val;
++ int which;
++};
++
++#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
++#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
++
++static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
++{
++ int err;
++
++ if (!dev->is_enabled && is_enable_cmd(value)) {
++ if (unlikely(verbose_request))
++ printk(KERN_DEBUG "pciback: %s: enable\n",
++ pci_name(dev));
++ err = pci_enable_device(dev);
++ if (err)
++ return err;
++ } else if (dev->is_enabled && !is_enable_cmd(value)) {
++ if (unlikely(verbose_request))
++ printk(KERN_DEBUG "pciback: %s: disable\n",
++ pci_name(dev));
++ pci_disable_device(dev);
++ }
++
++ if (!dev->is_busmaster && is_master_cmd(value)) {
++ if (unlikely(verbose_request))
++ printk(KERN_DEBUG "pciback: %s: set bus master\n",
++ pci_name(dev));
++ pci_set_master(dev);
++ }
++
++ if (value & PCI_COMMAND_INVALIDATE) {
++ if (unlikely(verbose_request))
++ printk(KERN_DEBUG
++ "pciback: %s: enable memory-write-invalidate\n",
++ pci_name(dev));
++ err = pci_set_mwi(dev);
++ if (err) {
++ printk(KERN_WARNING
++ "pciback: %s: cannot enable memory-write-invalidate (%d)\n",
++ pci_name(dev), err);
++ value &= ~PCI_COMMAND_INVALIDATE;
++ }
++ }
++
++ return pci_write_config_word(dev, offset, value);
++}
++
++static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
++{
++ struct pci_bar_info *bar = data;
++
++ if (unlikely(!bar)) {
++ printk(KERN_WARNING "pciback: driver data not found for %s\n",
++ pci_name(dev));
++ return XEN_PCI_ERR_op_failed;
++ }
++
++ /* A write to obtain the length must happen as a 32-bit write.
++ * This does not (yet) support writing individual bytes
++ */
++ if (value == ~PCI_ROM_ADDRESS_ENABLE)
++ bar->which = 1;
++ else {
++ u32 tmpval;
++ pci_read_config_dword(dev, offset, &tmpval);
++ if (tmpval != bar->val && value == bar->val) {
++ /* Allow restoration of bar value. */
++ pci_write_config_dword(dev, offset, bar->val);
++ }
++ bar->which = 0;
++ }
++
++ /* Do we need to support enabling/disabling the rom address here? */
++
++ return 0;
++}
++
++/* For the BARs, only allow writes which write ~0 or
++ * the correct resource information
++ * (Needed for when the driver probes the resource usage)
++ */
++static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
++{
++ struct pci_bar_info *bar = data;
++
++ if (unlikely(!bar)) {
++ printk(KERN_WARNING "pciback: driver data not found for %s\n",
++ pci_name(dev));
++ return XEN_PCI_ERR_op_failed;
++ }
++
++ /* A write to obtain the length must happen as a 32-bit write.
++ * This does not (yet) support writing individual bytes
++ */
++ if (value == ~0)
++ bar->which = 1;
++ else {
++ u32 tmpval;
++ pci_read_config_dword(dev, offset, &tmpval);
++ if (tmpval != bar->val && value == bar->val) {
++ /* Allow restoration of bar value. */
++ pci_write_config_dword(dev, offset, bar->val);
++ }
++ bar->which = 0;
++ }
++
++ return 0;
++}
++
++static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
++{
++ struct pci_bar_info *bar = data;
++
++ if (unlikely(!bar)) {
++ printk(KERN_WARNING "pciback: driver data not found for %s\n",
++ pci_name(dev));
++ return XEN_PCI_ERR_op_failed;
++ }
++
++ *value = bar->which ? bar->len_val : bar->val;
++
++ return 0;
++}
++
++static inline void read_dev_bar(struct pci_dev *dev,
++ struct pci_bar_info *bar_info, int offset,
++ u32 len_mask)
++{
++ pci_read_config_dword(dev, offset, &bar_info->val);
++ pci_write_config_dword(dev, offset, len_mask);
++ pci_read_config_dword(dev, offset, &bar_info->len_val);
++ pci_write_config_dword(dev, offset, bar_info->val);
++}
++
++static void *bar_init(struct pci_dev *dev, int offset)
++{
++ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
++
++ if (!bar)
++ return ERR_PTR(-ENOMEM);
++
++ read_dev_bar(dev, bar, offset, ~0);
++ bar->which = 0;
++
++ return bar;
++}
++
++static void *rom_init(struct pci_dev *dev, int offset)
++{
++ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
++
++ if (!bar)
++ return ERR_PTR(-ENOMEM);
++
++ read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
++ bar->which = 0;
++
++ return bar;
++}
++
++static void bar_reset(struct pci_dev *dev, int offset, void *data)
++{
++ struct pci_bar_info *bar = data;
++
++ bar->which = 0;
++}
++
++static void bar_release(struct pci_dev *dev, int offset, void *data)
++{
++ kfree(data);
++}
++
++static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
++ void *data)
++{
++ *value = (u8) dev->irq;
++
++ return 0;
++}
++
++static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
++{
++ u8 cur_value;
++ int err;
++
++ err = pci_read_config_byte(dev, offset, &cur_value);
++ if (err)
++ goto out;
++
++ if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
++ || value == PCI_BIST_START)
++ err = pci_write_config_byte(dev, offset, value);
++
++ out:
++ return err;
++}
++
++static const struct config_field header_common[] = {
++ {
++ .offset = PCI_COMMAND,
++ .size = 2,
++ .u.w.read = pciback_read_config_word,
++ .u.w.write = command_write,
++ },
++ {
++ .offset = PCI_INTERRUPT_LINE,
++ .size = 1,
++ .u.b.read = interrupt_read,
++ },
++ {
++ .offset = PCI_INTERRUPT_PIN,
++ .size = 1,
++ .u.b.read = pciback_read_config_byte,
++ },
++ {
++ /* Any side effects of letting driver domain control cache line? */
++ .offset = PCI_CACHE_LINE_SIZE,
++ .size = 1,
++ .u.b.read = pciback_read_config_byte,
++ .u.b.write = pciback_write_config_byte,
++ },
++ {
++ .offset = PCI_LATENCY_TIMER,
++ .size = 1,
++ .u.b.read = pciback_read_config_byte,
++ },
++ {
++ .offset = PCI_BIST,
++ .size = 1,
++ .u.b.read = pciback_read_config_byte,
++ .u.b.write = bist_write,
++ },
++ {}
++};
++
++#define CFG_FIELD_BAR(reg_offset) \
++ { \
++ .offset = reg_offset, \
++ .size = 4, \
++ .init = bar_init, \
++ .reset = bar_reset, \
++ .release = bar_release, \
++ .u.dw.read = bar_read, \
++ .u.dw.write = bar_write, \
++ }
++
++#define CFG_FIELD_ROM(reg_offset) \
++ { \
++ .offset = reg_offset, \
++ .size = 4, \
++ .init = rom_init, \
++ .reset = bar_reset, \
++ .release = bar_release, \
++ .u.dw.read = bar_read, \
++ .u.dw.write = rom_write, \
++ }
++
++static const struct config_field header_0[] = {
++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
++ CFG_FIELD_ROM(PCI_ROM_ADDRESS),
++ {}
++};
++
++static const struct config_field header_1[] = {
++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
++ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
++ CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
++ {}
++};
++
++int pciback_config_header_add_fields(struct pci_dev *dev)
++{
++ int err;
++
++ err = pciback_config_add_fields(dev, header_common);
++ if (err)
++ goto out;
++
++ switch (dev->hdr_type) {
++ case PCI_HEADER_TYPE_NORMAL:
++ err = pciback_config_add_fields(dev, header_0);
++ break;
++
++ case PCI_HEADER_TYPE_BRIDGE:
++ err = pciback_config_add_fields(dev, header_1);
++ break;
++
++ default:
++ err = -EINVAL;
++ printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n",
++ pci_name(dev), dev->hdr_type);
++ break;
++ }
++
++ out:
++ return err;
++}
+diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c
+new file mode 100644
+index 0000000..244a438
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_quirks.c
+@@ -0,0 +1,138 @@
++/*
++ * PCI Backend - Handle special overlays for broken devices.
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ * Author: Chris Bookholt <hap10 at epoch.ncsc.mil>
++ */
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_quirks.h"
++
++LIST_HEAD(pciback_quirks);
++
++static inline const struct pci_device_id *
++match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
++{
++ if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
++ (id->device == PCI_ANY_ID || id->device == dev->device) &&
++ (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
++ (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
++ !((id->class ^ dev->class) & id->class_mask))
++ return id;
++ return NULL;
++}
++
++struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
++{
++ struct pciback_config_quirk *tmp_quirk;
++
++ list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list)
++ if (match_one_device(&tmp_quirk->devid, dev) != NULL)
++ goto out;
++ tmp_quirk = NULL;
++ printk(KERN_DEBUG
++ "quirk didn't match any device pciback knows about\n");
++ out:
++ return tmp_quirk;
++}
++
++static inline void register_quirk(struct pciback_config_quirk *quirk)
++{
++ list_add_tail(&quirk->quirks_list, &pciback_quirks);
++}
++
++int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
++{
++ int ret = 0;
++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++ struct config_field_entry *cfg_entry;
++
++ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++ if ( OFFSET(cfg_entry) == reg) {
++ ret = 1;
++ break;
++ }
++ }
++ return ret;
++}
++
++int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
++ *field)
++{
++ int err = 0;
++
++ switch (field->size) {
++ case 1:
++ field->u.b.read = pciback_read_config_byte;
++ field->u.b.write = pciback_write_config_byte;
++ break;
++ case 2:
++ field->u.w.read = pciback_read_config_word;
++ field->u.w.write = pciback_write_config_word;
++ break;
++ case 4:
++ field->u.dw.read = pciback_read_config_dword;
++ field->u.dw.write = pciback_write_config_dword;
++ break;
++ default:
++ err = -EINVAL;
++ goto out;
++ }
++
++ pciback_config_add_field(dev, field);
++
++ out:
++ return err;
++}
++
++int pciback_config_quirks_init(struct pci_dev *dev)
++{
++ struct pciback_config_quirk *quirk;
++ int ret = 0;
++
++ quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
++ if (!quirk) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ quirk->devid.vendor = dev->vendor;
++ quirk->devid.device = dev->device;
++ quirk->devid.subvendor = dev->subsystem_vendor;
++ quirk->devid.subdevice = dev->subsystem_device;
++ quirk->devid.class = 0;
++ quirk->devid.class_mask = 0;
++ quirk->devid.driver_data = 0UL;
++
++ quirk->pdev = dev;
++
++ register_quirk(quirk);
++ out:
++ return ret;
++}
++
++void pciback_config_field_free(struct config_field *field)
++{
++ kfree(field);
++}
++
++int pciback_config_quirk_release(struct pci_dev *dev)
++{
++ struct pciback_config_quirk *quirk;
++ int ret = 0;
++
++ quirk = pciback_find_quirk(dev);
++ if (!quirk) {
++ ret = -ENXIO;
++ goto out;
++ }
++
++ list_del(&quirk->quirks_list);
++ kfree(quirk);
++
++ out:
++ return ret;
++}
+diff --git a/drivers/xen/pciback/conf_space_quirks.h b/drivers/xen/pciback/conf_space_quirks.h
+new file mode 100644
+index 0000000..acd0e1a
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_quirks.h
+@@ -0,0 +1,35 @@
++/*
++ * PCI Backend - Data structures for special overlays for broken devices.
++ *
++ * Ryan Wilson <hap9 at epoch.ncsc.mil>
++ * Chris Bookholt <hap10 at epoch.ncsc.mil>
++ */
++
++#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
++#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
++
++#include <linux/pci.h>
++#include <linux/list.h>
++
++struct pciback_config_quirk {
++ struct list_head quirks_list;
++ struct pci_device_id devid;
++ struct pci_dev *pdev;
++};
++
++struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev);
++
++int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
++ *field);
++
++int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg);
++
++int pciback_config_quirks_init(struct pci_dev *dev);
++
++void pciback_config_field_free(struct config_field *field);
++
++int pciback_config_quirk_release(struct pci_dev *dev);
++
++int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg);
++
++#endif
+diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
+new file mode 100644
+index 0000000..294e48f
+--- /dev/null
++++ b/drivers/xen/pciback/controller.c
+@@ -0,0 +1,443 @@
++/*
++ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
++ * Alex Williamson <alex.williamson at hp.com>
++ *
++ * PCI "Controller" Backend - virtualize PCI bus topology based on PCI
++ * controllers. Devices under the same PCI controller are exposed on the
++ * same virtual domain:bus. Within a bus, device slots are virtualized
++ * to compact the bus.
++ *
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ */
++
++#include <linux/acpi.h>
++#include <linux/list.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++#define PCI_MAX_BUSSES 255
++#define PCI_MAX_SLOTS 32
++
++struct controller_dev_entry {
++ struct list_head list;
++ struct pci_dev *dev;
++ unsigned int devfn;
++};
++
++struct controller_list_entry {
++ struct list_head list;
++ struct pci_controller *controller;
++ unsigned int domain;
++ unsigned int bus;
++ unsigned int next_devfn;
++ struct list_head dev_list;
++};
++
++struct controller_dev_data {
++ struct list_head list;
++ unsigned int next_domain;
++ unsigned int next_bus;
++ spinlock_t lock;
++};
++
++struct walk_info {
++ struct pciback_device *pdev;
++ int resource_count;
++ int root_num;
++};
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++ unsigned int domain, unsigned int bus,
++ unsigned int devfn)
++{
++ struct controller_dev_data *dev_data = pdev->pci_dev_data;
++ struct controller_dev_entry *dev_entry;
++ struct controller_list_entry *cntrl_entry;
++ struct pci_dev *dev = NULL;
++ unsigned long flags;
++
++ spin_lock_irqsave(&dev_data->lock, flags);
++
++ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++ if (cntrl_entry->domain != domain ||
++ cntrl_entry->bus != bus)
++ continue;
++
++ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
++ if (devfn == dev_entry->devfn) {
++ dev = dev_entry->dev;
++ goto found;
++ }
++ }
++ }
++found:
++ spin_unlock_irqrestore(&dev_data->lock, flags);
++
++ return dev;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++ int devid, publish_pci_dev_cb publish_cb)
++{
++ struct controller_dev_data *dev_data = pdev->pci_dev_data;
++ struct controller_dev_entry *dev_entry;
++ struct controller_list_entry *cntrl_entry;
++ struct pci_controller *dev_controller = PCI_CONTROLLER(dev);
++ unsigned long flags;
++ int ret = 0, found = 0;
++
++ spin_lock_irqsave(&dev_data->lock, flags);
++
++ /* Look to see if we already have a domain:bus for this controller */
++ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++ if (cntrl_entry->controller == dev_controller) {
++ found = 1;
++ break;
++ }
++ }
++
++ if (!found) {
++ cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC);
++ if (!cntrl_entry) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ cntrl_entry->controller = dev_controller;
++ cntrl_entry->next_devfn = PCI_DEVFN(0, 0);
++
++ cntrl_entry->domain = dev_data->next_domain;
++ cntrl_entry->bus = dev_data->next_bus++;
++ if (dev_data->next_bus > PCI_MAX_BUSSES) {
++ dev_data->next_domain++;
++ dev_data->next_bus = 0;
++ }
++
++ INIT_LIST_HEAD(&cntrl_entry->dev_list);
++
++ list_add_tail(&cntrl_entry->list, &dev_data->list);
++ }
++
++ if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) {
++ /*
++ * While it seems unlikely, this can actually happen if
++ * a controller has P2P bridges under it.
++ */
++ xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x "
++ "is full, no room to export %04x:%02x:%02x.%x",
++ cntrl_entry->domain, cntrl_entry->bus,
++ pci_domain_nr(dev->bus), dev->bus->number,
++ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
++ ret = -ENOSPC;
++ goto out;
++ }
++
++ dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC);
++ if (!dev_entry) {
++ if (list_empty(&cntrl_entry->dev_list)) {
++ list_del(&cntrl_entry->list);
++ kfree(cntrl_entry);
++ }
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ dev_entry->dev = dev;
++ dev_entry->devfn = cntrl_entry->next_devfn;
++
++ list_add_tail(&dev_entry->list, &cntrl_entry->dev_list);
++
++ cntrl_entry->next_devfn += PCI_DEVFN(1, 0);
++
++out:
++ spin_unlock_irqrestore(&dev_data->lock, flags);
++
++ /* TODO: Publish virtual domain:bus:slot.func here. */
++
++ return ret;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++ struct controller_dev_data *dev_data = pdev->pci_dev_data;
++ struct controller_list_entry *cntrl_entry;
++ struct controller_dev_entry *dev_entry = NULL;
++ struct pci_dev *found_dev = NULL;
++ unsigned long flags;
++
++ spin_lock_irqsave(&dev_data->lock, flags);
++
++ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++ if (cntrl_entry->controller != PCI_CONTROLLER(dev))
++ continue;
++
++ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
++ if (dev_entry->dev == dev) {
++ found_dev = dev_entry->dev;
++ break;
++ }
++ }
++ }
++
++ if (!found_dev) {
++ spin_unlock_irqrestore(&dev_data->lock, flags);
++ return;
++ }
++
++ list_del(&dev_entry->list);
++ kfree(dev_entry);
++
++ if (list_empty(&cntrl_entry->dev_list)) {
++ list_del(&cntrl_entry->list);
++ kfree(cntrl_entry);
++ }
++
++ spin_unlock_irqrestore(&dev_data->lock, flags);
++ pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++ struct controller_dev_data *dev_data;
++
++ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
++ if (!dev_data)
++ return -ENOMEM;
++
++ spin_lock_init(&dev_data->lock);
++
++ INIT_LIST_HEAD(&dev_data->list);
++
++ /* Starting domain:bus numbers */
++ dev_data->next_domain = 0;
++ dev_data->next_bus = 0;
++
++ pdev->pci_dev_data = dev_data;
++
++ return 0;
++}
++
++static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
++{
++ struct walk_info *info = data;
++ struct acpi_resource_address64 addr;
++ acpi_status status;
++ int i, len, err;
++ char str[32], tmp[3];
++ unsigned char *ptr, *buf;
++
++ status = acpi_resource_to_address64(res, &addr);
++
++ /* Do we care about this range? Let's check. */
++ if (!ACPI_SUCCESS(status) ||
++ !(addr.resource_type == ACPI_MEMORY_RANGE ||
++ addr.resource_type == ACPI_IO_RANGE) ||
++ !addr.address_length || addr.producer_consumer != ACPI_PRODUCER)
++ return AE_OK;
++
++ /*
++ * Furthermore, we really only care to tell the guest about
++ * address ranges that require address translation of some sort.
++ */
++ if (!(addr.resource_type == ACPI_MEMORY_RANGE &&
++ addr.info.mem.translation) &&
++ !(addr.resource_type == ACPI_IO_RANGE &&
++ addr.info.io.translation))
++ return AE_OK;
++
++ /* Store the resource in xenbus for the guest */
++ len = snprintf(str, sizeof(str), "root-%d-resource-%d",
++ info->root_num, info->resource_count);
++ if (unlikely(len >= (sizeof(str) - 1)))
++ return AE_OK;
++
++ buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL);
++ if (!buf)
++ return AE_OK;
++
++ /* Clean out resource_source */
++ res->data.address64.resource_source.index = 0xFF;
++ res->data.address64.resource_source.string_length = 0;
++ res->data.address64.resource_source.string_ptr = NULL;
++
++ ptr = (unsigned char *)res;
++
++ /* Turn the acpi_resource into an ASCII byte stream */
++ for (i = 0; i < sizeof(*res); i++) {
++ snprintf(tmp, sizeof(tmp), "%02x", ptr[i]);
++ strncat(buf, tmp, 2);
++ }
++
++ err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename,
++ str, "%s", buf);
++
++ if (!err)
++ info->resource_count++;
++
++ kfree(buf);
++
++ return AE_OK;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++ publish_pci_root_cb publish_root_cb)
++{
++ struct controller_dev_data *dev_data = pdev->pci_dev_data;
++ struct controller_list_entry *cntrl_entry;
++ int i, root_num, len, err = 0;
++ unsigned int domain, bus;
++ char str[64];
++ struct walk_info info;
++
++ spin_lock(&dev_data->lock);
++
++ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++ /* First publish all the domain:bus info */
++ err = publish_root_cb(pdev, cntrl_entry->domain,
++ cntrl_entry->bus);
++ if (err)
++ goto out;
++
++ /*
++ * Now figure out which root-%d this belongs to
++ * so we can associate resources with it.
++ */
++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++ "root_num", "%d", &root_num);
++
++ if (err != 1)
++ goto out;
++
++ for (i = 0; i < root_num; i++) {
++ len = snprintf(str, sizeof(str), "root-%d", i);
++ if (unlikely(len >= (sizeof(str) - 1))) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++ str, "%x:%x", &domain, &bus);
++ if (err != 2)
++ goto out;
++
++ /* Is this the one we just published? */
++ if (domain == cntrl_entry->domain &&
++ bus == cntrl_entry->bus)
++ break;
++ }
++
++ if (i == root_num)
++ goto out;
++
++ info.pdev = pdev;
++ info.resource_count = 0;
++ info.root_num = i;
++
++ /* Let ACPI do the heavy lifting on decoding resources */
++ acpi_walk_resources(cntrl_entry->controller->acpi_handle,
++ METHOD_NAME__CRS, write_xenbus_resource,
++ &info);
++
++ /* No resouces. OK. On to the next one */
++ if (!info.resource_count)
++ continue;
++
++ /* Store the number of resources we wrote for this root-%d */
++ len = snprintf(str, sizeof(str), "root-%d-resources", i);
++ if (unlikely(len >= (sizeof(str) - 1))) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++ "%d", info.resource_count);
++ if (err)
++ goto out;
++ }
++
++ /* Finally, write some magic to synchronize with the guest. */
++ len = snprintf(str, sizeof(str), "root-resource-magic");
++ if (unlikely(len >= (sizeof(str) - 1))) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++ "%lx", (sizeof(struct acpi_resource) * 2) + 1);
++
++out:
++ spin_unlock(&dev_data->lock);
++
++ return err;
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++ struct controller_dev_data *dev_data = pdev->pci_dev_data;
++ struct controller_list_entry *cntrl_entry, *c;
++ struct controller_dev_entry *dev_entry, *d;
++
++ list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) {
++ list_for_each_entry_safe(dev_entry, d,
++ &cntrl_entry->dev_list, list) {
++ list_del(&dev_entry->list);
++ pcistub_put_pci_dev(dev_entry->dev);
++ kfree(dev_entry);
++ }
++ list_del(&cntrl_entry->list);
++ kfree(cntrl_entry);
++ }
++
++ kfree(dev_data);
++ pdev->pci_dev_data = NULL;
++}
++
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++ struct pciback_device *pdev,
++ unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++{
++ struct controller_dev_data *dev_data = pdev->pci_dev_data;
++ struct controller_dev_entry *dev_entry;
++ struct controller_list_entry *cntrl_entry;
++ unsigned long flags;
++ int found = 0;
++ spin_lock_irqsave(&dev_data->lock, flags);
++
++ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
++ if ( (dev_entry->dev->bus->number ==
++ pcidev->bus->number) &&
++ (dev_entry->dev->devfn ==
++ pcidev->devfn) &&
++ (pci_domain_nr(dev_entry->dev->bus) ==
++ pci_domain_nr(pcidev->bus)))
++ {
++ found = 1;
++ *domain = cntrl_entry->domain;
++ *bus = cntrl_entry->bus;
++ *devfn = dev_entry->devfn;
++ goto out;
++ }
++ }
++ }
++out:
++ spin_unlock_irqrestore(&dev_data->lock, flags);
++ return found;
++
++}
++
+diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c
+new file mode 100644
+index 0000000..9e7a0c4
+--- /dev/null
++++ b/drivers/xen/pciback/passthrough.c
+@@ -0,0 +1,176 @@
++/*
++ * PCI Backend - Provides restricted access to the real PCI bus topology
++ * to the frontend
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/list.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++struct passthrough_dev_data {
++ /* Access to dev_list must be protected by lock */
++ struct list_head dev_list;
++ spinlock_t lock;
++};
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++ unsigned int domain, unsigned int bus,
++ unsigned int devfn)
++{
++ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++ struct pci_dev_entry *dev_entry;
++ struct pci_dev *dev = NULL;
++ unsigned long flags;
++
++ spin_lock_irqsave(&dev_data->lock, flags);
++
++ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
++ if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
++ && bus == (unsigned int)dev_entry->dev->bus->number
++ && devfn == dev_entry->dev->devfn) {
++ dev = dev_entry->dev;
++ break;
++ }
++ }
++
++ spin_unlock_irqrestore(&dev_data->lock, flags);
++
++ return dev;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++ int devid, publish_pci_dev_cb publish_cb)
++{
++ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++ struct pci_dev_entry *dev_entry;
++ unsigned long flags;
++ unsigned int domain, bus, devfn;
++ int err;
++
++ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
++ if (!dev_entry)
++ return -ENOMEM;
++ dev_entry->dev = dev;
++
++ spin_lock_irqsave(&dev_data->lock, flags);
++ list_add_tail(&dev_entry->list, &dev_data->dev_list);
++ spin_unlock_irqrestore(&dev_data->lock, flags);
++
++ /* Publish this device. */
++ domain = (unsigned int)pci_domain_nr(dev->bus);
++ bus = (unsigned int)dev->bus->number;
++ devfn = dev->devfn;
++ err = publish_cb(pdev, domain, bus, devfn, devid);
++
++ return err;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++ struct pci_dev_entry *dev_entry, *t;
++ struct pci_dev *found_dev = NULL;
++ unsigned long flags;
++
++ spin_lock_irqsave(&dev_data->lock, flags);
++
++ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
++ if (dev_entry->dev == dev) {
++ list_del(&dev_entry->list);
++ found_dev = dev_entry->dev;
++ kfree(dev_entry);
++ }
++ }
++
++ spin_unlock_irqrestore(&dev_data->lock, flags);
++
++ if (found_dev)
++ pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++ struct passthrough_dev_data *dev_data;
++
++ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
++ if (!dev_data)
++ return -ENOMEM;
++
++ spin_lock_init(&dev_data->lock);
++
++ INIT_LIST_HEAD(&dev_data->dev_list);
++
++ pdev->pci_dev_data = dev_data;
++
++ return 0;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++ publish_pci_root_cb publish_root_cb)
++{
++ int err = 0;
++ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++ struct pci_dev_entry *dev_entry, *e;
++ struct pci_dev *dev;
++ int found;
++ unsigned int domain, bus;
++
++ spin_lock(&dev_data->lock);
++
++ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
++ /* Only publish this device as a root if none of its
++ * parent bridges are exported
++ */
++ found = 0;
++ dev = dev_entry->dev->bus->self;
++ for (; !found && dev != NULL; dev = dev->bus->self) {
++ list_for_each_entry(e, &dev_data->dev_list, list) {
++ if (dev == e->dev) {
++ found = 1;
++ break;
++ }
++ }
++ }
++
++ domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
++ bus = (unsigned int)dev_entry->dev->bus->number;
++
++ if (!found) {
++ err = publish_root_cb(pdev, domain, bus);
++ if (err)
++ break;
++ }
++ }
++
++ spin_unlock(&dev_data->lock);
++
++ return err;
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++ struct pci_dev_entry *dev_entry, *t;
++
++ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
++ list_del(&dev_entry->list);
++ pcistub_put_pci_dev(dev_entry->dev);
++ kfree(dev_entry);
++ }
++
++ kfree(dev_data);
++ pdev->pci_dev_data = NULL;
++}
++
++int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
++ unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++
++{
++ *domain = pci_domain_nr(pcidev->bus);
++ *bus = pcidev->bus->number;
++ *devfn = pcidev->devfn;
++ return 1;
++}
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+new file mode 100644
+index 0000000..c481a73
+--- /dev/null
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -0,0 +1,1316 @@
++/*
++ * PCI Stub Driver - Grabs devices in backend to be exported later
++ *
++ * Ryan Wilson <hap9 at epoch.ncsc.mil>
++ * Chris Bookholt <hap10 at epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/rwsem.h>
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/kref.h>
++#include <linux/pci.h>
++#include <linux/wait.h>
++#include <asm/atomic.h>
++#include <xen/evtchn.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_quirks.h"
++
++static char *pci_devs_to_hide = NULL;
++wait_queue_head_t aer_wait_queue;
++/*Add sem for sync AER handling and pciback remove/reconfigue ops,
++* We want to avoid in middle of AER ops, pciback devices is being removed
++*/
++static DECLARE_RWSEM(pcistub_sem);
++module_param_named(hide, pci_devs_to_hide, charp, 0444);
++
++struct pcistub_device_id {
++ struct list_head slot_list;
++ int domain;
++ unsigned char bus;
++ unsigned int devfn;
++};
++static LIST_HEAD(pcistub_device_ids);
++static DEFINE_SPINLOCK(device_ids_lock);
++
++struct pcistub_device {
++ struct kref kref;
++ struct list_head dev_list;
++ spinlock_t lock;
++
++ struct pci_dev *dev;
++ struct pciback_device *pdev; /* non-NULL if struct pci_dev is in use */
++};
++
++/* Access to pcistub_devices & seized_devices lists and the initialize_devices
++ * flag must be locked with pcistub_devices_lock
++ */
++static DEFINE_SPINLOCK(pcistub_devices_lock);
++static LIST_HEAD(pcistub_devices);
++
++/* wait for device_initcall before initializing our devices
++ * (see pcistub_init_devices_late)
++ */
++static int initialize_devices = 0;
++static LIST_HEAD(seized_devices);
++
++static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
++{
++ struct pcistub_device *psdev;
++
++ dev_dbg(&dev->dev, "pcistub_device_alloc\n");
++
++ psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
++ if (!psdev)
++ return NULL;
++
++ psdev->dev = pci_dev_get(dev);
++ if (!psdev->dev) {
++ kfree(psdev);
++ return NULL;
++ }
++
++ kref_init(&psdev->kref);
++ spin_lock_init(&psdev->lock);
++
++ return psdev;
++}
++
++/* Don't call this directly as it's called by pcistub_device_put */
++static void pcistub_device_release(struct kref *kref)
++{
++ struct pcistub_device *psdev;
++
++ psdev = container_of(kref, struct pcistub_device, kref);
++
++ dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
++
++ /* Clean-up the device */
++ pciback_reset_device(psdev->dev);
++ pciback_config_free_dyn_fields(psdev->dev);
++ pciback_config_free_dev(psdev->dev);
++ kfree(pci_get_drvdata(psdev->dev));
++ pci_set_drvdata(psdev->dev, NULL);
++
++ pci_dev_put(psdev->dev);
++
++ kfree(psdev);
++}
++
++static inline void pcistub_device_get(struct pcistub_device *psdev)
++{
++ kref_get(&psdev->kref);
++}
++
++static inline void pcistub_device_put(struct pcistub_device *psdev)
++{
++ kref_put(&psdev->kref, pcistub_device_release);
++}
++
++static struct pcistub_device *pcistub_device_find(int domain, int bus,
++ int slot, int func)
++{
++ struct pcistub_device *psdev = NULL;
++ unsigned long flags;
++
++ spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++ if (psdev->dev != NULL
++ && domain == pci_domain_nr(psdev->dev->bus)
++ && bus == psdev->dev->bus->number
++ && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
++ pcistub_device_get(psdev);
++ goto out;
++ }
++ }
++
++ /* didn't find it */
++ psdev = NULL;
++
++ out:
++ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++ return psdev;
++}
++
++static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev,
++ struct pcistub_device *psdev)
++{
++ struct pci_dev *pci_dev = NULL;
++ unsigned long flags;
++
++ pcistub_device_get(psdev);
++
++ spin_lock_irqsave(&psdev->lock, flags);
++ if (!psdev->pdev) {
++ psdev->pdev = pdev;
++ pci_dev = psdev->dev;
++ }
++ spin_unlock_irqrestore(&psdev->lock, flags);
++
++ if (!pci_dev)
++ pcistub_device_put(psdev);
++
++ return pci_dev;
++}
++
++struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
++ int domain, int bus,
++ int slot, int func)
++{
++ struct pcistub_device *psdev;
++ struct pci_dev *found_dev = NULL;
++ unsigned long flags;
++
++ spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++ if (psdev->dev != NULL
++ && domain == pci_domain_nr(psdev->dev->bus)
++ && bus == psdev->dev->bus->number
++ && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
++ found_dev = pcistub_device_get_pci_dev(pdev, psdev);
++ break;
++ }
++ }
++
++ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++ return found_dev;
++}
++
++struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
++ struct pci_dev *dev)
++{
++ struct pcistub_device *psdev;
++ struct pci_dev *found_dev = NULL;
++ unsigned long flags;
++
++ spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++ if (psdev->dev == dev) {
++ found_dev = pcistub_device_get_pci_dev(pdev, psdev);
++ break;
++ }
++ }
++
++ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++ return found_dev;
++}
++
++void pcistub_put_pci_dev(struct pci_dev *dev)
++{
++ struct pcistub_device *psdev, *found_psdev = NULL;
++ unsigned long flags;
++
++ spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++ if (psdev->dev == dev) {
++ found_psdev = psdev;
++ break;
++ }
++ }
++
++ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++ /*hold this lock for avoiding breaking link between
++ * pcistub and pciback when AER is in processing
++ */
++ down_write(&pcistub_sem);
++ /* Cleanup our device
++ * (so it's ready for the next domain)
++ */
++ pciback_reset_device(found_psdev->dev);
++ pciback_config_free_dyn_fields(found_psdev->dev);
++ pciback_config_reset_dev(found_psdev->dev);
++
++ spin_lock_irqsave(&found_psdev->lock, flags);
++ found_psdev->pdev = NULL;
++ spin_unlock_irqrestore(&found_psdev->lock, flags);
++
++ pcistub_device_put(found_psdev);
++ up_write(&pcistub_sem);
++}
++
++static int __devinit pcistub_match_one(struct pci_dev *dev,
++ struct pcistub_device_id *pdev_id)
++{
++ /* Match the specified device by domain, bus, slot, func and also if
++ * any of the device's parent bridges match.
++ */
++ for (; dev != NULL; dev = dev->bus->self) {
++ if (pci_domain_nr(dev->bus) == pdev_id->domain
++ && dev->bus->number == pdev_id->bus
++ && dev->devfn == pdev_id->devfn)
++ return 1;
++
++ /* Sometimes topmost bridge links to itself. */
++ if (dev == dev->bus->self)
++ break;
++ }
++
++ return 0;
++}
++
++static int __devinit pcistub_match(struct pci_dev *dev)
++{
++ struct pcistub_device_id *pdev_id;
++ unsigned long flags;
++ int found = 0;
++
++ spin_lock_irqsave(&device_ids_lock, flags);
++ list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
++ if (pcistub_match_one(dev, pdev_id)) {
++ found = 1;
++ break;
++ }
++ }
++ spin_unlock_irqrestore(&device_ids_lock, flags);
++
++ return found;
++}
++
++static int __devinit pcistub_init_device(struct pci_dev *dev)
++{
++ struct pciback_dev_data *dev_data;
++ int err = 0;
++
++ dev_dbg(&dev->dev, "initializing...\n");
++
++ /* The PCI backend is not intended to be a module (or to work with
++ * removable PCI devices (yet). If it were, pciback_config_free()
++ * would need to be called somewhere to free the memory allocated
++ * here and then to call kfree(pci_get_drvdata(psdev->dev)).
++ */
++ dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
++ if (!dev_data) {
++ err = -ENOMEM;
++ goto out;
++ }
++ pci_set_drvdata(dev, dev_data);
++
++ dev_dbg(&dev->dev, "initializing config\n");
++
++ init_waitqueue_head(&aer_wait_queue);
++ err = pciback_config_init_dev(dev);
++ if (err)
++ goto out;
++
++ /* HACK: Force device (& ACPI) to determine what IRQ it's on - we
++ * must do this here because pcibios_enable_device may specify
++ * the pci device's true irq (and possibly its other resources)
++ * if they differ from what's in the configuration space.
++ * This makes the assumption that the device's resources won't
++ * change after this point (otherwise this code may break!)
++ */
++ dev_dbg(&dev->dev, "enabling device\n");
++ err = pci_enable_device(dev);
++ if (err)
++ goto config_release;
++
++ /* Now disable the device (this also ensures some private device
++ * data is setup before we export)
++ */
++ dev_dbg(&dev->dev, "reset device\n");
++ pciback_reset_device(dev);
++
++ return 0;
++
++ config_release:
++ pciback_config_free_dev(dev);
++
++ out:
++ pci_set_drvdata(dev, NULL);
++ kfree(dev_data);
++ return err;
++}
++
++/*
++ * Because some initialization still happens on
++ * devices during fs_initcall, we need to defer
++ * full initialization of our devices until
++ * device_initcall.
++ */
++static int __init pcistub_init_devices_late(void)
++{
++ struct pcistub_device *psdev;
++ unsigned long flags;
++ int err = 0;
++
++ pr_debug("pciback: pcistub_init_devices_late\n");
++
++ spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++ while (!list_empty(&seized_devices)) {
++ psdev = container_of(seized_devices.next,
++ struct pcistub_device, dev_list);
++ list_del(&psdev->dev_list);
++
++ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++ err = pcistub_init_device(psdev->dev);
++ if (err) {
++ dev_err(&psdev->dev->dev,
++ "error %d initializing device\n", err);
++ kfree(psdev);
++ psdev = NULL;
++ }
++
++ spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++ if (psdev)
++ list_add_tail(&psdev->dev_list, &pcistub_devices);
++ }
++
++ initialize_devices = 1;
++
++ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++ return 0;
++}
++
++static int __devinit pcistub_seize(struct pci_dev *dev)
++{
++ struct pcistub_device *psdev;
++ unsigned long flags;
++ int err = 0;
++
++ psdev = pcistub_device_alloc(dev);
++ if (!psdev)
++ return -ENOMEM;
++
++ spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++ if (initialize_devices) {
++ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++ /* don't want irqs disabled when calling pcistub_init_device */
++ err = pcistub_init_device(psdev->dev);
++
++ spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++ if (!err)
++ list_add(&psdev->dev_list, &pcistub_devices);
++ } else {
++ dev_dbg(&dev->dev, "deferring initialization\n");
++ list_add(&psdev->dev_list, &seized_devices);
++ }
++
++ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++ if (err)
++ pcistub_device_put(psdev);
++
++ return err;
++}
++
++static int __devinit pcistub_probe(struct pci_dev *dev,
++ const struct pci_device_id *id)
++{
++ int err = 0;
++
++ dev_dbg(&dev->dev, "probing...\n");
++
++ if (pcistub_match(dev)) {
++
++ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
++ && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
++ dev_err(&dev->dev, "can't export pci devices that "
++ "don't have a normal (0) or bridge (1) "
++ "header type!\n");
++ err = -ENODEV;
++ goto out;
++ }
++
++ dev_info(&dev->dev, "seizing device\n");
++ err = pcistub_seize(dev);
++#ifdef CONFIG_PCI_GUESTDEV
++ } else if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
++ if (!pci_is_guestdev(dev)) {
++ err = -ENODEV;
++ goto out;
++ }
++
++ dev_info(&dev->dev, "seizing device\n");
++ err = pcistub_seize(dev);
++#endif /* CONFIG_PCI_GUESTDEV */
++ } else
++ /* Didn't find the device */
++ err = -ENODEV;
++
++ out:
++ return err;
++}
++
++static void pcistub_remove(struct pci_dev *dev)
++{
++ struct pcistub_device *psdev, *found_psdev = NULL;
++ unsigned long flags;
++
++ dev_dbg(&dev->dev, "removing\n");
++
++ spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++ pciback_config_quirk_release(dev);
++
++ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++ if (psdev->dev == dev) {
++ found_psdev = psdev;
++ break;
++ }
++ }
++
++ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++ if (found_psdev) {
++ dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
++ found_psdev->pdev);
++
++ if (found_psdev->pdev) {
++ printk(KERN_WARNING "pciback: ****** removing device "
++ "%s while still in-use! ******\n",
++ pci_name(found_psdev->dev));
++ printk(KERN_WARNING "pciback: ****** driver domain may "
++ "still access this device's i/o resources!\n");
++ printk(KERN_WARNING "pciback: ****** shutdown driver "
++ "domain before binding device\n");
++ printk(KERN_WARNING "pciback: ****** to other drivers "
++ "or domains\n");
++
++ pciback_release_pci_dev(found_psdev->pdev,
++ found_psdev->dev);
++ }
++
++ spin_lock_irqsave(&pcistub_devices_lock, flags);
++ list_del(&found_psdev->dev_list);
++ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++ /* the final put for releasing from the list */
++ pcistub_device_put(found_psdev);
++ }
++}
++
++static const struct pci_device_id pcistub_ids[] = {
++ {
++ .vendor = PCI_ANY_ID,
++ .device = PCI_ANY_ID,
++ .subvendor = PCI_ANY_ID,
++ .subdevice = PCI_ANY_ID,
++ },
++ {0,},
++};
++
++static void kill_domain_by_device(struct pcistub_device *psdev)
++{
++ struct xenbus_transaction xbt;
++ int err;
++ char nodename[1024];
++
++ if (!psdev)
++ dev_err(&psdev->dev->dev,
++ "device is NULL when do AER recovery/kill_domain\n");
++ sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
++ psdev->pdev->xdev->otherend_id);
++ nodename[strlen(nodename)] = '\0';
++
++again:
++ err = xenbus_transaction_start(&xbt);
++ if (err)
++ {
++ dev_err(&psdev->dev->dev,
++ "error %d when start xenbus transaction\n", err);
++ return;
++ }
++ /*PV AER handlers will set this flag*/
++ xenbus_printf(xbt, nodename, "aerState" , "aerfail" );
++ err = xenbus_transaction_end(xbt, 0);
++ if (err)
++ {
++ if (err == -EAGAIN)
++ goto again;
++ dev_err(&psdev->dev->dev,
++ "error %d when end xenbus transaction\n", err);
++ return;
++ }
++}
++
++/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
++ * backend need to have cooperation. In pciback, those steps will do similar
++ * jobs: send service request and waiting for front_end response.
++*/
++static pci_ers_result_t common_process(struct pcistub_device *psdev,
++ pci_channel_state_t state, int aer_cmd, pci_ers_result_t result)
++{
++ pci_ers_result_t res = result;
++ struct xen_pcie_aer_op *aer_op;
++ int ret;
++
++ /*with PV AER drivers*/
++ aer_op = &(psdev->pdev->sh_info->aer_op);
++ aer_op->cmd = aer_cmd ;
++ /*useful for error_detected callback*/
++ aer_op->err = state;
++ /*pcifront_end BDF*/
++ ret = pciback_get_pcifront_dev(psdev->dev, psdev->pdev,
++ &aer_op->domain, &aer_op->bus, &aer_op->devfn);
++ if (!ret) {
++ dev_err(&psdev->dev->dev,
++ "pciback: failed to get pcifront device\n");
++ return PCI_ERS_RESULT_NONE;
++ }
++ wmb();
++
++ dev_dbg(&psdev->dev->dev,
++ "pciback: aer_op %x dom %x bus %x devfn %x\n",
++ aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
++ /*local flag to mark there's aer request, pciback callback will use this
++ * flag to judge whether we need to check pci-front give aer service
++ * ack signal
++ */
++ set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
++
++ /*It is possible that a pcifront conf_read_write ops request invokes
++ * the callback which cause the spurious execution of wake_up.
++ * Yet it is harmless and better than a spinlock here
++ */
++ set_bit(_XEN_PCIB_active,
++ (unsigned long *)&psdev->pdev->sh_info->flags);
++ wmb();
++ notify_remote_via_irq(psdev->pdev->evtchn_irq);
++
++ ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active,
++ (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
++
++ if (!ret) {
++ if (test_bit(_XEN_PCIB_active,
++ (unsigned long *)&psdev->pdev->sh_info->flags)) {
++ dev_err(&psdev->dev->dev,
++ "pcifront aer process not responding!\n");
++ clear_bit(_XEN_PCIB_active,
++ (unsigned long *)&psdev->pdev->sh_info->flags);
++ aer_op->err = PCI_ERS_RESULT_NONE;
++ return res;
++ }
++ }
++ clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
++
++ if ( test_bit( _XEN_PCIF_active,
++ (unsigned long*)&psdev->pdev->sh_info->flags)) {
++ dev_dbg(&psdev->dev->dev,
++ "schedule pci_conf service in pciback \n");
++ test_and_schedule_op(psdev->pdev);
++ }
++
++ res = (pci_ers_result_t)aer_op->err;
++ return res;
++}
++
++/*
++* pciback_slot_reset: it will send the slot_reset request to pcifront in case
++* of the device driver could provide this service, and then wait for pcifront
++* ack.
++* @dev: pointer to PCI devices
++* return value is used by aer_core do_recovery policy
++*/
++static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
++{
++ struct pcistub_device *psdev;
++ pci_ers_result_t result;
++
++ result = PCI_ERS_RESULT_RECOVERED;
++ dev_dbg(&dev->dev, "pciback_slot_reset(bus:%x,devfn:%x)\n",
++ dev->bus->number, dev->devfn);
++
++ down_write(&pcistub_sem);
++ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
++ dev->bus->number,
++ PCI_SLOT(dev->devfn),
++ PCI_FUNC(dev->devfn));
++
++ if ( !psdev || !psdev->pdev )
++ {
++ dev_err(&dev->dev,
++ "pciback device is not found/assigned\n");
++ goto end;
++ }
++
++ if ( !psdev->pdev->sh_info )
++ {
++ dev_err(&dev->dev, "pciback device is not connected or owned"
++ " by HVM, kill it\n");
++ kill_domain_by_device(psdev);
++ goto release;
++ }
++
++ if ( !test_bit(_XEN_PCIB_AERHANDLER,
++ (unsigned long *)&psdev->pdev->sh_info->flags) ) {
++ dev_err(&dev->dev,
++ "guest with no AER driver should have been killed\n");
++ goto release;
++ }
++ result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
++
++ if (result == PCI_ERS_RESULT_NONE ||
++ result == PCI_ERS_RESULT_DISCONNECT) {
++ dev_dbg(&dev->dev,
++ "No AER slot_reset service or disconnected!\n");
++ kill_domain_by_device(psdev);
++ }
++release:
++ pcistub_device_put(psdev);
++end:
++ up_write(&pcistub_sem);
++ return result;
++
++}
++
++
++/*pciback_mmio_enabled: it will send the mmio_enabled request to pcifront
++* in case of the device driver could provide this service, and then wait
++* for pcifront ack.
++* @dev: pointer to PCI devices
++* return value is used by aer_core do_recovery policy
++*/
++
++static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
++{
++ struct pcistub_device *psdev;
++ pci_ers_result_t result;
++
++ result = PCI_ERS_RESULT_RECOVERED;
++ dev_dbg(&dev->dev, "pciback_mmio_enabled(bus:%x,devfn:%x)\n",
++ dev->bus->number, dev->devfn);
++
++ down_write(&pcistub_sem);
++ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
++ dev->bus->number,
++ PCI_SLOT(dev->devfn),
++ PCI_FUNC(dev->devfn));
++
++ if ( !psdev || !psdev->pdev )
++ {
++ dev_err(&dev->dev,
++ "pciback device is not found/assigned\n");
++ goto end;
++ }
++
++ if ( !psdev->pdev->sh_info )
++ {
++ dev_err(&dev->dev, "pciback device is not connected or owned"
++ " by HVM, kill it\n");
++ kill_domain_by_device(psdev);
++ goto release;
++ }
++
++ if ( !test_bit(_XEN_PCIB_AERHANDLER,
++ (unsigned long *)&psdev->pdev->sh_info->flags) ) {
++ dev_err(&dev->dev,
++ "guest with no AER driver should have been killed\n");
++ goto release;
++ }
++ result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
++
++ if (result == PCI_ERS_RESULT_NONE ||
++ result == PCI_ERS_RESULT_DISCONNECT) {
++ dev_dbg(&dev->dev,
++ "No AER mmio_enabled service or disconnected!\n");
++ kill_domain_by_device(psdev);
++ }
++release:
++ pcistub_device_put(psdev);
++end:
++ up_write(&pcistub_sem);
++ return result;
++}
++
++/*pciback_error_detected: it will send the error_detected request to pcifront
++* in case of the device driver could provide this service, and then wait
++* for pcifront ack.
++* @dev: pointer to PCI devices
++* @error: the current PCI connection state
++* return value is used by aer_core do_recovery policy
++*/
++
++static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
++ pci_channel_state_t error)
++{
++ struct pcistub_device *psdev;
++ pci_ers_result_t result;
++
++ result = PCI_ERS_RESULT_CAN_RECOVER;
++ dev_dbg(&dev->dev, "pciback_error_detected(bus:%x,devfn:%x)\n",
++ dev->bus->number, dev->devfn);
++
++ down_write(&pcistub_sem);
++ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
++ dev->bus->number,
++ PCI_SLOT(dev->devfn),
++ PCI_FUNC(dev->devfn));
++
++ if ( !psdev || !psdev->pdev )
++ {
++ dev_err(&dev->dev,
++ "pciback device is not found/assigned\n");
++ goto end;
++ }
++
++ if ( !psdev->pdev->sh_info )
++ {
++ dev_err(&dev->dev, "pciback device is not connected or owned"
++ " by HVM, kill it\n");
++ kill_domain_by_device(psdev);
++ goto release;
++ }
++
++ /*Guest owns the device yet no aer handler regiested, kill guest*/
++ if ( !test_bit(_XEN_PCIB_AERHANDLER,
++ (unsigned long *)&psdev->pdev->sh_info->flags) ) {
++ dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
++ kill_domain_by_device(psdev);
++ goto release;
++ }
++ result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
++
++ if (result == PCI_ERS_RESULT_NONE ||
++ result == PCI_ERS_RESULT_DISCONNECT) {
++ dev_dbg(&dev->dev,
++ "No AER error_detected service or disconnected!\n");
++ kill_domain_by_device(psdev);
++ }
++release:
++ pcistub_device_put(psdev);
++end:
++ up_write(&pcistub_sem);
++ return result;
++}
++
++/*pciback_error_resume: it will send the error_resume request to pcifront
++* in case of the device driver could provide this service, and then wait
++* for pcifront ack.
++* @dev: pointer to PCI devices
++*/
++
++static void pciback_error_resume(struct pci_dev *dev)
++{
++ struct pcistub_device *psdev;
++
++ dev_dbg(&dev->dev, "pciback_error_resume(bus:%x,devfn:%x)\n",
++ dev->bus->number, dev->devfn);
++
++ down_write(&pcistub_sem);
++ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
++ dev->bus->number,
++ PCI_SLOT(dev->devfn),
++ PCI_FUNC(dev->devfn));
++
++ if ( !psdev || !psdev->pdev )
++ {
++ dev_err(&dev->dev,
++ "pciback device is not found/assigned\n");
++ goto end;
++ }
++
++ if ( !psdev->pdev->sh_info )
++ {
++ dev_err(&dev->dev, "pciback device is not connected or owned"
++ " by HVM, kill it\n");
++ kill_domain_by_device(psdev);
++ goto release;
++ }
++
++ if ( !test_bit(_XEN_PCIB_AERHANDLER,
++ (unsigned long *)&psdev->pdev->sh_info->flags) ) {
++ dev_err(&dev->dev,
++ "guest with no AER driver should have been killed\n");
++ kill_domain_by_device(psdev);
++ goto release;
++ }
++ common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED);
++release:
++ pcistub_device_put(psdev);
++end:
++ up_write(&pcistub_sem);
++ return;
++}
++
++/*add pciback AER handling*/
++static struct pci_error_handlers pciback_error_handler = {
++ .error_detected = pciback_error_detected,
++ .mmio_enabled = pciback_mmio_enabled,
++ .slot_reset = pciback_slot_reset,
++ .resume = pciback_error_resume,
++};
++
++/*
++ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
++ * for a normal device. I don't want it to be loaded automatically.
++ */
++
++static struct pci_driver pciback_pci_driver = {
++ .name = "pciback",
++ .id_table = pcistub_ids,
++ .probe = pcistub_probe,
++ .remove = pcistub_remove,
++ .err_handler = &pciback_error_handler,
++};
++
++static inline int str_to_slot(const char *buf, int *domain, int *bus,
++ int *slot, int *func)
++{
++ int err;
++
++ err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
++ if (err == 4)
++ return 0;
++ else if (err < 0)
++ return -EINVAL;
++
++ /* try again without domain */
++ *domain = 0;
++ err = sscanf(buf, " %x:%x.%x", bus, slot, func);
++ if (err == 3)
++ return 0;
++
++ return -EINVAL;
++}
++
++static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
++ *slot, int *func, int *reg, int *size, int *mask)
++{
++ int err;
++
++ err =
++ sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
++ func, reg, size, mask);
++ if (err == 7)
++ return 0;
++ return -EINVAL;
++}
++
++static int pcistub_device_id_add(int domain, int bus, int slot, int func)
++{
++ struct pcistub_device_id *pci_dev_id;
++ unsigned long flags;
++
++ pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
++ if (!pci_dev_id)
++ return -ENOMEM;
++
++ pci_dev_id->domain = domain;
++ pci_dev_id->bus = bus;
++ pci_dev_id->devfn = PCI_DEVFN(slot, func);
++
++ pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n",
++ domain, bus, slot, func);
++
++ spin_lock_irqsave(&device_ids_lock, flags);
++ list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
++ spin_unlock_irqrestore(&device_ids_lock, flags);
++
++ return 0;
++}
++
++static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
++{
++ struct pcistub_device_id *pci_dev_id, *t;
++ int devfn = PCI_DEVFN(slot, func);
++ int err = -ENOENT;
++ unsigned long flags;
++
++ spin_lock_irqsave(&device_ids_lock, flags);
++ list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) {
++
++ if (pci_dev_id->domain == domain
++ && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
++ /* Don't break; here because it's possible the same
++ * slot could be in the list more than once
++ */
++ list_del(&pci_dev_id->slot_list);
++ kfree(pci_dev_id);
++
++ err = 0;
++
++ pr_debug("pciback: removed %04x:%02x:%02x.%01x from "
++ "seize list\n", domain, bus, slot, func);
++ }
++ }
++ spin_unlock_irqrestore(&device_ids_lock, flags);
++
++ return err;
++}
++
++static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
++ int size, int mask)
++{
++ int err = 0;
++ struct pcistub_device *psdev;
++ struct pci_dev *dev;
++ struct config_field *field;
++
++ psdev = pcistub_device_find(domain, bus, slot, func);
++ if (!psdev || !psdev->dev) {
++ err = -ENODEV;
++ goto out;
++ }
++ dev = psdev->dev;
++
++ field = kzalloc(sizeof(*field), GFP_ATOMIC);
++ if (!field) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ field->offset = reg;
++ field->size = size;
++ field->mask = mask;
++ field->init = NULL;
++ field->reset = NULL;
++ field->release = NULL;
++ field->clean = pciback_config_field_free;
++
++ err = pciback_config_quirks_add_field(dev, field);
++ if (err)
++ kfree(field);
++ out:
++ return err;
++}
++
++static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
++ size_t count)
++{
++ int domain, bus, slot, func;
++ int err;
++
++ err = str_to_slot(buf, &domain, &bus, &slot, &func);
++ if (err)
++ goto out;
++
++ err = pcistub_device_id_add(domain, bus, slot, func);
++
++ out:
++ if (!err)
++ err = count;
++ return err;
++}
++
++DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
++
++static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
++ size_t count)
++{
++ int domain, bus, slot, func;
++ int err;
++
++ err = str_to_slot(buf, &domain, &bus, &slot, &func);
++ if (err)
++ goto out;
++
++ err = pcistub_device_id_remove(domain, bus, slot, func);
++
++ out:
++ if (!err)
++ err = count;
++ return err;
++}
++
++DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
++
++static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
++{
++ struct pcistub_device_id *pci_dev_id;
++ size_t count = 0;
++ unsigned long flags;
++
++ spin_lock_irqsave(&device_ids_lock, flags);
++ list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
++ if (count >= PAGE_SIZE)
++ break;
++
++ count += scnprintf(buf + count, PAGE_SIZE - count,
++ "%04x:%02x:%02x.%01x\n",
++ pci_dev_id->domain, pci_dev_id->bus,
++ PCI_SLOT(pci_dev_id->devfn),
++ PCI_FUNC(pci_dev_id->devfn));
++ }
++ spin_unlock_irqrestore(&device_ids_lock, flags);
++
++ return count;
++}
++
++DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
++
++static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
++ size_t count)
++{
++ int domain, bus, slot, func, reg, size, mask;
++ int err;
++
++ err = str_to_quirk(buf, &domain, &bus, &slot, &func, ®, &size,
++ &mask);
++ if (err)
++ goto out;
++
++ err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
++
++ out:
++ if (!err)
++ err = count;
++ return err;
++}
++
++static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
++{
++ int count = 0;
++ unsigned long flags;
++ extern struct list_head pciback_quirks;
++ struct pciback_config_quirk *quirk;
++ struct pciback_dev_data *dev_data;
++ const struct config_field *field;
++ const struct config_field_entry *cfg_entry;
++
++ spin_lock_irqsave(&device_ids_lock, flags);
++ list_for_each_entry(quirk, &pciback_quirks, quirks_list) {
++ if (count >= PAGE_SIZE)
++ goto out;
++
++ count += scnprintf(buf + count, PAGE_SIZE - count,
++ "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
++ quirk->pdev->bus->number,
++ PCI_SLOT(quirk->pdev->devfn),
++ PCI_FUNC(quirk->pdev->devfn),
++ quirk->devid.vendor, quirk->devid.device,
++ quirk->devid.subvendor,
++ quirk->devid.subdevice);
++
++ dev_data = pci_get_drvdata(quirk->pdev);
++
++ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++ field = cfg_entry->field;
++ if (count >= PAGE_SIZE)
++ goto out;
++
++ count += scnprintf(buf + count, PAGE_SIZE - count,
++ "\t\t%08x:%01x:%08x\n",
++ cfg_entry->base_offset + field->offset,
++ field->size, field->mask);
++ }
++ }
++
++ out:
++ spin_unlock_irqrestore(&device_ids_lock, flags);
++
++ return count;
++}
++
++DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
++
++static ssize_t permissive_add(struct device_driver *drv, const char *buf,
++ size_t count)
++{
++ int domain, bus, slot, func;
++ int err;
++ struct pcistub_device *psdev;
++ struct pciback_dev_data *dev_data;
++ err = str_to_slot(buf, &domain, &bus, &slot, &func);
++ if (err)
++ goto out;
++ psdev = pcistub_device_find(domain, bus, slot, func);
++ if (!psdev) {
++ err = -ENODEV;
++ goto out;
++ }
++ if (!psdev->dev) {
++ err = -ENODEV;
++ goto release;
++ }
++ dev_data = pci_get_drvdata(psdev->dev);
++ /* the driver data for a device should never be null at this point */
++ if (!dev_data) {
++ err = -ENXIO;
++ goto release;
++ }
++ if (!dev_data->permissive) {
++ dev_data->permissive = 1;
++ /* Let user know that what they're doing could be unsafe */
++ dev_warn(&psdev->dev->dev,
++ "enabling permissive mode configuration space accesses!\n");
++ dev_warn(&psdev->dev->dev,
++ "permissive mode is potentially unsafe!\n");
++ }
++ release:
++ pcistub_device_put(psdev);
++ out:
++ if (!err)
++ err = count;
++ return err;
++}
++
++static ssize_t permissive_show(struct device_driver *drv, char *buf)
++{
++ struct pcistub_device *psdev;
++ struct pciback_dev_data *dev_data;
++ size_t count = 0;
++ unsigned long flags;
++ spin_lock_irqsave(&pcistub_devices_lock, flags);
++ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++ if (count >= PAGE_SIZE)
++ break;
++ if (!psdev->dev)
++ continue;
++ dev_data = pci_get_drvdata(psdev->dev);
++ if (!dev_data || !dev_data->permissive)
++ continue;
++ count +=
++ scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
++ pci_name(psdev->dev));
++ }
++ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++ return count;
++}
++
++DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
++
++#ifdef CONFIG_PCI_MSI
++
++int pciback_get_owner(struct pci_dev *dev)
++{
++ struct pcistub_device *psdev;
++
++ psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number,
++ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
++
++ if (!psdev || !psdev->pdev)
++ return -1;
++
++ return psdev->pdev->xdev->otherend_id;
++}
++#endif
++
++static void pcistub_exit(void)
++{
++ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
++ driver_remove_file(&pciback_pci_driver.driver,
++ &driver_attr_remove_slot);
++ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
++ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
++ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
++
++ pci_unregister_driver(&pciback_pci_driver);
++ WARN_ON(unregister_msi_get_owner(pciback_get_owner));
++}
++
++static int __init pcistub_init(void)
++{
++ int pos = 0;
++ int err = 0;
++ int domain, bus, slot, func;
++ int parsed;
++
++ if (pci_devs_to_hide && *pci_devs_to_hide) {
++ do {
++ parsed = 0;
++
++ err = sscanf(pci_devs_to_hide + pos,
++ " (%x:%x:%x.%x) %n",
++ &domain, &bus, &slot, &func, &parsed);
++ if (err != 4) {
++ domain = 0;
++ err = sscanf(pci_devs_to_hide + pos,
++ " (%x:%x.%x) %n",
++ &bus, &slot, &func, &parsed);
++ if (err != 3)
++ goto parse_error;
++ }
++
++ err = pcistub_device_id_add(domain, bus, slot, func);
++ if (err)
++ goto out;
++
++ /* if parsed<=0, we've reached the end of the string */
++ pos += parsed;
++ } while (parsed > 0 && pci_devs_to_hide[pos]);
++ }
++
++ /* If we're the first PCI Device Driver to register, we're the
++ * first one to get offered PCI devices as they become
++ * available (and thus we can be the first to grab them)
++ */
++ err = pci_register_driver(&pciback_pci_driver);
++ if (err < 0)
++ goto out;
++
++ err = driver_create_file(&pciback_pci_driver.driver,
++ &driver_attr_new_slot);
++ if (!err)
++ err = driver_create_file(&pciback_pci_driver.driver,
++ &driver_attr_remove_slot);
++ if (!err)
++ err = driver_create_file(&pciback_pci_driver.driver,
++ &driver_attr_slots);
++ if (!err)
++ err = driver_create_file(&pciback_pci_driver.driver,
++ &driver_attr_quirks);
++ if (!err)
++ err = driver_create_file(&pciback_pci_driver.driver,
++ &driver_attr_permissive);
++
++ if (!err)
++ err = register_msi_get_owner(pciback_get_owner);
++ if (err)
++ pcistub_exit();
++
++ out:
++ return err;
++
++ parse_error:
++ printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
++ pci_devs_to_hide + pos);
++ return -EINVAL;
++}
++
++#ifndef MODULE
++/*
++ * fs_initcall happens before device_initcall
++ * so pciback *should* get called first (b/c we
++ * want to suck up any device before other drivers
++ * get a chance by being the first pci device
++ * driver to register)
++ */
++fs_initcall(pcistub_init);
++#endif
++
++static int __init pciback_init(void)
++{
++ int err;
++
++ err = pciback_config_init();
++ if (err)
++ return err;
++
++#ifdef MODULE
++ err = pcistub_init();
++ if (err < 0)
++ return err;
++#endif
++
++ pcistub_init_devices_late();
++ err = pciback_xenbus_register();
++ if (err)
++ pcistub_exit();
++
++ return err;
++}
++
++static void __exit pciback_cleanup(void)
++{
++ pciback_xenbus_unregister();
++ pcistub_exit();
++}
++
++module_init(pciback_init);
++module_exit(pciback_cleanup);
++
++MODULE_LICENSE("Dual BSD/GPL");
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+new file mode 100644
+index 0000000..6744f45
+--- /dev/null
++++ b/drivers/xen/pciback/pciback.h
+@@ -0,0 +1,126 @@
++/*
++ * PCI Backend Common Data Structures & Function Declarations
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++#ifndef __XEN_PCIBACK_H__
++#define __XEN_PCIBACK_H__
++
++#include <linux/pci.h>
++#include <linux/interrupt.h>
++#include <xen/xenbus.h>
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/workqueue.h>
++#include <asm/atomic.h>
++#include <xen/interface/io/pciif.h>
++
++struct pci_dev_entry {
++ struct list_head list;
++ struct pci_dev *dev;
++};
++
++#define _PDEVF_op_active (0)
++#define PDEVF_op_active (1<<(_PDEVF_op_active))
++#define _PCIB_op_pending (1)
++#define PCIB_op_pending (1<<(_PCIB_op_pending))
++
++struct pciback_device {
++ void *pci_dev_data;
++ spinlock_t dev_lock;
++
++ struct xenbus_device *xdev;
++
++ struct xenbus_watch be_watch;
++ u8 be_watching;
++
++ int evtchn_irq;
++
++ struct vm_struct *sh_area;
++ struct xen_pci_sharedinfo *sh_info;
++
++ unsigned long flags;
++
++ struct work_struct op_work;
++};
++
++struct pciback_dev_data {
++ struct list_head config_fields;
++ int permissive;
++ int warned_on_write;
++};
++
++/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
++struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
++ int domain, int bus,
++ int slot, int func);
++struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
++ struct pci_dev *dev);
++void pcistub_put_pci_dev(struct pci_dev *dev);
++
++/* Ensure a device is turned off or reset */
++void pciback_reset_device(struct pci_dev *pdev);
++
++/* Access a virtual configuration space for a PCI device */
++int pciback_config_init(void);
++int pciback_config_init_dev(struct pci_dev *dev);
++void pciback_config_free_dyn_fields(struct pci_dev *dev);
++void pciback_config_reset_dev(struct pci_dev *dev);
++void pciback_config_free_dev(struct pci_dev *dev);
++int pciback_config_read(struct pci_dev *dev, int offset, int size,
++ u32 * ret_val);
++int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
++
++/* Handle requests for specific devices from the frontend */
++typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
++ unsigned int domain, unsigned int bus,
++ unsigned int devfn, unsigned int devid);
++typedef int (*publish_pci_root_cb) (struct pciback_device * pdev,
++ unsigned int domain, unsigned int bus);
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++ int devid, publish_pci_dev_cb publish_cb);
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev);
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++ unsigned int domain, unsigned int bus,
++ unsigned int devfn);
++
++/**
++* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback
++* before sending aer request to pcifront, so that guest could identify
++* device, coopearte with pciback to finish aer recovery job if device driver
++* has the capability
++*/
++
++int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
++ unsigned int *domain, unsigned int *bus, unsigned int *devfn);
++int pciback_init_devices(struct pciback_device *pdev);
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++ publish_pci_root_cb cb);
++void pciback_release_devices(struct pciback_device *pdev);
++
++/* Handles events from front-end */
++irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
++void pciback_do_op(void *data);
++
++int pciback_xenbus_register(void);
++void pciback_xenbus_unregister(void);
++
++#ifdef CONFIG_PCI_MSI
++int pciback_enable_msi(struct pciback_device *pdev,
++ struct pci_dev *dev, struct xen_pci_op *op);
++
++int pciback_disable_msi(struct pciback_device *pdev,
++ struct pci_dev *dev, struct xen_pci_op *op);
++
++
++int pciback_enable_msix(struct pciback_device *pdev,
++ struct pci_dev *dev, struct xen_pci_op *op);
++
++int pciback_disable_msix(struct pciback_device *pdev,
++ struct pci_dev *dev, struct xen_pci_op *op);
++#endif
++extern int verbose_request;
++
++void test_and_schedule_op(struct pciback_device *pdev);
++#endif
++
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+new file mode 100644
+index 0000000..b85b2db
+--- /dev/null
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -0,0 +1,134 @@
++/*
++ * PCI Backend Operations - respond to PCI requests from Frontend
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/wait.h>
++#include <asm/bitops.h>
++#include <xen/evtchn.h>
++#include "pciback.h"
++
++int verbose_request = 0;
++module_param(verbose_request, int, 0644);
++
++/* Ensure a device is "turned off" and ready to be exported.
++ * (Also see pciback_config_reset to ensure virtual configuration space is
++ * ready to be re-exported)
++ */
++void pciback_reset_device(struct pci_dev *dev)
++{
++ u16 cmd;
++
++ /* Disable devices (but not bridges) */
++ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
++ pci_disable_device(dev);
++
++ pci_write_config_word(dev, PCI_COMMAND, 0);
++
++ dev->is_enabled = 0;
++ dev->is_busmaster = 0;
++ } else {
++ pci_read_config_word(dev, PCI_COMMAND, &cmd);
++ if (cmd & (PCI_COMMAND_INVALIDATE)) {
++ cmd &= ~(PCI_COMMAND_INVALIDATE);
++ pci_write_config_word(dev, PCI_COMMAND, cmd);
++
++ dev->is_busmaster = 0;
++ }
++ }
++}
++extern wait_queue_head_t aer_wait_queue;
++extern struct workqueue_struct *pciback_wq;
++/*
++* Now the same evtchn is used for both pcifront conf_read_write request
++* as well as pcie aer front end ack. We use a new work_queue to schedule
++* pciback conf_read_write service for avoiding confict with aer_core
++* do_recovery job which also use the system default work_queue
++*/
++void test_and_schedule_op(struct pciback_device *pdev)
++{
++ /* Check that frontend is requesting an operation and that we are not
++ * already processing a request */
++ if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
++ && !test_and_set_bit(_PDEVF_op_active, &pdev->flags))
++ {
++ queue_work(pciback_wq, &pdev->op_work);
++ }
++ /*_XEN_PCIB_active should have been cleared by pcifront. And also make
++ sure pciback is waiting for ack by checking _PCIB_op_pending*/
++ if (!test_bit(_XEN_PCIB_active,(unsigned long *)&pdev->sh_info->flags)
++ &&test_bit(_PCIB_op_pending, &pdev->flags)) {
++ wake_up(&aer_wait_queue);
++ }
++}
++
++/* Performing the configuration space reads/writes must not be done in atomic
++ * context because some of the pci_* functions can sleep (mostly due to ACPI
++ * use of semaphores). This function is intended to be called from a work
++ * queue in process context taking a struct pciback_device as a parameter */
++void pciback_do_op(void *data)
++{
++ struct pciback_device *pdev = data;
++ struct pci_dev *dev;
++ struct xen_pci_op *op = &pdev->sh_info->op;
++
++ dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
++
++ if (dev == NULL)
++ op->err = XEN_PCI_ERR_dev_not_found;
++ else
++ {
++ switch (op->cmd)
++ {
++ case XEN_PCI_OP_conf_read:
++ op->err = pciback_config_read(dev,
++ op->offset, op->size, &op->value);
++ break;
++ case XEN_PCI_OP_conf_write:
++ op->err = pciback_config_write(dev,
++ op->offset, op->size, op->value);
++ break;
++#ifdef CONFIG_PCI_MSI
++ case XEN_PCI_OP_enable_msi:
++ op->err = pciback_enable_msi(pdev, dev, op);
++ break;
++ case XEN_PCI_OP_disable_msi:
++ op->err = pciback_disable_msi(pdev, dev, op);
++ break;
++ case XEN_PCI_OP_enable_msix:
++ op->err = pciback_enable_msix(pdev, dev, op);
++ break;
++ case XEN_PCI_OP_disable_msix:
++ op->err = pciback_disable_msix(pdev, dev, op);
++ break;
++#endif
++ default:
++ op->err = XEN_PCI_ERR_not_implemented;
++ break;
++ }
++ }
++ /* Tell the driver domain that we're done. */
++ wmb();
++ clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
++ notify_remote_via_irq(pdev->evtchn_irq);
++
++ /* Mark that we're done. */
++ smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
++ clear_bit(_PDEVF_op_active, &pdev->flags);
++ smp_mb__after_clear_bit(); /* /before/ final check for work */
++
++ /* Check to see if the driver domain tried to start another request in
++ * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
++ */
++ test_and_schedule_op(pdev);
++}
++
++irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
++{
++ struct pciback_device *pdev = dev_id;
++
++ test_and_schedule_op(pdev);
++
++ return IRQ_HANDLED;
++}
+diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c
+new file mode 100644
+index 0000000..105a8b6
+--- /dev/null
++++ b/drivers/xen/pciback/slot.c
+@@ -0,0 +1,187 @@
++/*
++ * PCI Backend - Provides a Virtual PCI bus (with real devices)
++ * to the frontend
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil> (vpci.c)
++ * Author: Tristan Gingold <tristan.gingold at bull.net>, from vpci.c
++ */
++
++#include <linux/list.h>
++#include <linux/slab.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++/* There are at most 32 slots in a pci bus. */
++#define PCI_SLOT_MAX 32
++
++#define PCI_BUS_NBR 2
++
++struct slot_dev_data {
++ /* Access to dev_list must be protected by lock */
++ struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX];
++ spinlock_t lock;
++};
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++ unsigned int domain, unsigned int bus,
++ unsigned int devfn)
++{
++ struct pci_dev *dev = NULL;
++ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++ unsigned long flags;
++
++ if (domain != 0 || PCI_FUNC(devfn) != 0)
++ return NULL;
++
++ if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR)
++ return NULL;
++
++ spin_lock_irqsave(&slot_dev->lock, flags);
++ dev = slot_dev->slots[bus][PCI_SLOT(devfn)];
++ spin_unlock_irqrestore(&slot_dev->lock, flags);
++
++ return dev;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++ int devid, publish_pci_dev_cb publish_cb)
++{
++ int err = 0, slot, bus;
++ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++ unsigned long flags;
++
++ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
++ err = -EFAULT;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Can't export bridges on the virtual PCI bus");
++ goto out;
++ }
++
++ spin_lock_irqsave(&slot_dev->lock, flags);
++
++ /* Assign to a new slot on the virtual PCI bus */
++ for (bus = 0; bus < PCI_BUS_NBR; bus++)
++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++ if (slot_dev->slots[bus][slot] == NULL) {
++ printk(KERN_INFO
++ "pciback: slot: %s: assign to virtual slot %d, bus %d\n",
++ pci_name(dev), slot, bus);
++ slot_dev->slots[bus][slot] = dev;
++ goto unlock;
++ }
++ }
++
++ err = -ENOMEM;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "No more space on root virtual PCI bus");
++
++ unlock:
++ spin_unlock_irqrestore(&slot_dev->lock, flags);
++
++ /* Publish this device. */
++ if(!err)
++ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
++
++ out:
++ return err;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++ int slot, bus;
++ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++ struct pci_dev *found_dev = NULL;
++ unsigned long flags;
++
++ spin_lock_irqsave(&slot_dev->lock, flags);
++
++ for (bus = 0; bus < PCI_BUS_NBR; bus++)
++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++ if (slot_dev->slots[bus][slot] == dev) {
++ slot_dev->slots[bus][slot] = NULL;
++ found_dev = dev;
++ goto out;
++ }
++ }
++
++ out:
++ spin_unlock_irqrestore(&slot_dev->lock, flags);
++
++ if (found_dev)
++ pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++ int slot, bus;
++ struct slot_dev_data *slot_dev;
++
++ slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL);
++ if (!slot_dev)
++ return -ENOMEM;
++
++ spin_lock_init(&slot_dev->lock);
++
++ for (bus = 0; bus < PCI_BUS_NBR; bus++)
++ for (slot = 0; slot < PCI_SLOT_MAX; slot++)
++ slot_dev->slots[bus][slot] = NULL;
++
++ pdev->pci_dev_data = slot_dev;
++
++ return 0;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++ publish_pci_root_cb publish_cb)
++{
++ /* The Virtual PCI bus has only one root */
++ return publish_cb(pdev, 0, 0);
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++ int slot, bus;
++ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++ struct pci_dev *dev;
++
++ for (bus = 0; bus < PCI_BUS_NBR; bus++)
++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++ dev = slot_dev->slots[bus][slot];
++ if (dev != NULL)
++ pcistub_put_pci_dev(dev);
++ }
++
++ kfree(slot_dev);
++ pdev->pci_dev_data = NULL;
++}
++
++int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
++ unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++{
++ int slot, busnr;
++ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++ struct pci_dev *dev;
++ int found = 0;
++ unsigned long flags;
++
++ spin_lock_irqsave(&slot_dev->lock, flags);
++
++ for (busnr = 0; busnr < PCI_BUS_NBR; bus++)
++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++ dev = slot_dev->slots[busnr][slot];
++ if (dev && dev->bus->number == pcidev->bus->number
++ && dev->devfn == pcidev->devfn
++ && pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)) {
++ found = 1;
++ *domain = 0;
++ *bus = busnr;
++ *devfn = PCI_DEVFN(slot,0);
++ goto out;
++ }
++ }
++out:
++ spin_unlock_irqrestore(&slot_dev->lock, flags);
++ return found;
++
++}
+diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
+new file mode 100644
+index 0000000..a5b7ece
+--- /dev/null
++++ b/drivers/xen/pciback/vpci.c
+@@ -0,0 +1,242 @@
++/*
++ * PCI Backend - Provides a Virtual PCI bus (with real devices)
++ * to the frontend
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/list.h>
++#include <linux/slab.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++#define PCI_SLOT_MAX 32
++
++struct vpci_dev_data {
++ /* Access to dev_list must be protected by lock */
++ struct list_head dev_list[PCI_SLOT_MAX];
++ spinlock_t lock;
++};
++
++static inline struct list_head *list_first(struct list_head *head)
++{
++ return head->next;
++}
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++ unsigned int domain, unsigned int bus,
++ unsigned int devfn)
++{
++ struct pci_dev_entry *entry;
++ struct pci_dev *dev = NULL;
++ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++ unsigned long flags;
++
++ if (domain != 0 || bus != 0)
++ return NULL;
++
++ if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
++ spin_lock_irqsave(&vpci_dev->lock, flags);
++
++ list_for_each_entry(entry,
++ &vpci_dev->dev_list[PCI_SLOT(devfn)],
++ list) {
++ if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
++ dev = entry->dev;
++ break;
++ }
++ }
++
++ spin_unlock_irqrestore(&vpci_dev->lock, flags);
++ }
++ return dev;
++}
++
++static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
++{
++ if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
++ && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
++ return 1;
++
++ return 0;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++ int devid, publish_pci_dev_cb publish_cb)
++{
++ int err = 0, slot, func;
++ struct pci_dev_entry *t, *dev_entry;
++ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++ unsigned long flags;
++
++ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
++ err = -EFAULT;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Can't export bridges on the virtual PCI bus");
++ goto out;
++ }
++
++ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
++ if (!dev_entry) {
++ err = -ENOMEM;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error adding entry to virtual PCI bus");
++ goto out;
++ }
++
++ dev_entry->dev = dev;
++
++ spin_lock_irqsave(&vpci_dev->lock, flags);
++
++ /* Keep multi-function devices together on the virtual PCI bus */
++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++ if (!list_empty(&vpci_dev->dev_list[slot])) {
++ t = list_entry(list_first(&vpci_dev->dev_list[slot]),
++ struct pci_dev_entry, list);
++
++ if (match_slot(dev, t->dev)) {
++ pr_info("pciback: vpci: %s: "
++ "assign to virtual slot %d func %d\n",
++ pci_name(dev), slot,
++ PCI_FUNC(dev->devfn));
++ list_add_tail(&dev_entry->list,
++ &vpci_dev->dev_list[slot]);
++ func = PCI_FUNC(dev->devfn);
++ goto unlock;
++ }
++ }
++ }
++
++ /* Assign to a new slot on the virtual PCI bus */
++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++ if (list_empty(&vpci_dev->dev_list[slot])) {
++ printk(KERN_INFO
++ "pciback: vpci: %s: assign to virtual slot %d\n",
++ pci_name(dev), slot);
++ list_add_tail(&dev_entry->list,
++ &vpci_dev->dev_list[slot]);
++ func = PCI_FUNC(dev->devfn);
++ goto unlock;
++ }
++ }
++
++ err = -ENOMEM;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "No more space on root virtual PCI bus");
++
++ unlock:
++ spin_unlock_irqrestore(&vpci_dev->lock, flags);
++
++ /* Publish this device. */
++ if(!err)
++ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
++
++ out:
++ return err;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++ int slot;
++ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++ struct pci_dev *found_dev = NULL;
++ unsigned long flags;
++
++ spin_lock_irqsave(&vpci_dev->lock, flags);
++
++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++ struct pci_dev_entry *e, *tmp;
++ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
++ list) {
++ if (e->dev == dev) {
++ list_del(&e->list);
++ found_dev = e->dev;
++ kfree(e);
++ goto out;
++ }
++ }
++ }
++
++ out:
++ spin_unlock_irqrestore(&vpci_dev->lock, flags);
++
++ if (found_dev)
++ pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++ int slot;
++ struct vpci_dev_data *vpci_dev;
++
++ vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
++ if (!vpci_dev)
++ return -ENOMEM;
++
++ spin_lock_init(&vpci_dev->lock);
++
++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++ INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
++ }
++
++ pdev->pci_dev_data = vpci_dev;
++
++ return 0;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++ publish_pci_root_cb publish_cb)
++{
++ /* The Virtual PCI bus has only one root */
++ return publish_cb(pdev, 0, 0);
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++ int slot;
++ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++
++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++ struct pci_dev_entry *e, *tmp;
++ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
++ list) {
++ list_del(&e->list);
++ pcistub_put_pci_dev(e->dev);
++ kfree(e);
++ }
++ }
++
++ kfree(vpci_dev);
++ pdev->pci_dev_data = NULL;
++}
++
++int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
++ unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++{
++ struct pci_dev_entry *entry;
++ struct pci_dev *dev = NULL;
++ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++ unsigned long flags;
++ int found = 0, slot;
++
++ spin_lock_irqsave(&vpci_dev->lock, flags);
++ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++ list_for_each_entry(entry,
++ &vpci_dev->dev_list[slot],
++ list) {
++ dev = entry->dev;
++ if (dev && dev->bus->number == pcidev->bus->number
++ && pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)
++ && dev->devfn == pcidev->devfn)
++ {
++ found = 1;
++ *domain = 0;
++ *bus = 0;
++ *devfn = PCI_DEVFN(slot, PCI_FUNC(pcidev->devfn));
++ }
++ }
++ }
++ spin_unlock_irqrestore(&vpci_dev->lock, flags);
++ return found;
++}
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+new file mode 100644
+index 0000000..4d56c45
+--- /dev/null
++++ b/drivers/xen/pciback/xenbus.c
+@@ -0,0 +1,710 @@
++/*
++ * PCI Backend Xenbus Setup - handles setup with frontend and xend
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/vmalloc.h>
++#include <xen/xenbus.h>
++#include <xen/evtchn.h>
++#include "pciback.h"
++
++#define INVALID_EVTCHN_IRQ (-1)
++struct workqueue_struct *pciback_wq;
++
++static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
++{
++ struct pciback_device *pdev;
++
++ pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
++ if (pdev == NULL)
++ goto out;
++ dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
++
++ pdev->xdev = xdev;
++ xdev->dev.driver_data = pdev;
++
++ spin_lock_init(&pdev->dev_lock);
++
++ pdev->sh_area = NULL;
++ pdev->sh_info = NULL;
++ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
++ pdev->be_watching = 0;
++
++ INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
++
++ if (pciback_init_devices(pdev)) {
++ kfree(pdev);
++ pdev = NULL;
++ }
++ out:
++ return pdev;
++}
++
++static void pciback_disconnect(struct pciback_device *pdev)
++{
++ spin_lock(&pdev->dev_lock);
++
++ /* Ensure the guest can't trigger our handler before removing devices */
++ if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
++ unbind_from_irqhandler(pdev->evtchn_irq, pdev);
++ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
++ }
++
++ /* If the driver domain started an op, make sure we complete it
++ * before releasing the shared memory */
++ flush_workqueue(pciback_wq);
++
++ if (pdev->sh_info != NULL) {
++ xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
++ pdev->sh_info = NULL;
++ }
++
++ spin_unlock(&pdev->dev_lock);
++}
++
++static void free_pdev(struct pciback_device *pdev)
++{
++ if (pdev->be_watching)
++ unregister_xenbus_watch(&pdev->be_watch);
++
++ pciback_disconnect(pdev);
++
++ pciback_release_devices(pdev);
++
++ pdev->xdev->dev.driver_data = NULL;
++ pdev->xdev = NULL;
++
++ kfree(pdev);
++}
++
++static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
++ int remote_evtchn)
++{
++ int err = 0;
++ struct vm_struct *area;
++
++ dev_dbg(&pdev->xdev->dev,
++ "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
++ gnt_ref, remote_evtchn);
++
++ area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
++ if (IS_ERR(area)) {
++ err = PTR_ERR(area);
++ goto out;
++ }
++ pdev->sh_area = area;
++ pdev->sh_info = area->addr;
++
++ err = bind_interdomain_evtchn_to_irqhandler(
++ pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
++ SA_SAMPLE_RANDOM, "pciback", pdev);
++ if (err < 0) {
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error binding event channel to IRQ");
++ goto out;
++ }
++ pdev->evtchn_irq = err;
++ err = 0;
++
++ dev_dbg(&pdev->xdev->dev, "Attached!\n");
++ out:
++ return err;
++}
++
++static int pciback_attach(struct pciback_device *pdev)
++{
++ int err = 0;
++ int gnt_ref, remote_evtchn;
++ char *magic = NULL;
++
++ spin_lock(&pdev->dev_lock);
++
++ /* Make sure we only do this setup once */
++ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
++ XenbusStateInitialised)
++ goto out;
++
++ /* Wait for frontend to state that it has published the configuration */
++ if (xenbus_read_driver_state(pdev->xdev->otherend) !=
++ XenbusStateInitialised)
++ goto out;
++
++ dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
++
++ err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
++ "pci-op-ref", "%u", &gnt_ref,
++ "event-channel", "%u", &remote_evtchn,
++ "magic", NULL, &magic, NULL);
++ if (err) {
++ /* If configuration didn't get read correctly, wait longer */
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error reading configuration from frontend");
++ goto out;
++ }
++
++ if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
++ xenbus_dev_fatal(pdev->xdev, -EFAULT,
++ "version mismatch (%s/%s) with pcifront - "
++ "halting pciback",
++ magic, XEN_PCI_MAGIC);
++ goto out;
++ }
++
++ err = pciback_do_attach(pdev, gnt_ref, remote_evtchn);
++ if (err)
++ goto out;
++
++ dev_dbg(&pdev->xdev->dev, "Connecting...\n");
++
++ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
++ if (err)
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error switching to connected state!");
++
++ dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
++ out:
++ spin_unlock(&pdev->dev_lock);
++
++ if (magic)
++ kfree(magic);
++
++ return err;
++}
++
++static int pciback_publish_pci_dev(struct pciback_device *pdev,
++ unsigned int domain, unsigned int bus,
++ unsigned int devfn, unsigned int devid)
++{
++ int err;
++ int len;
++ char str[64];
++
++ len = snprintf(str, sizeof(str), "vdev-%d", devid);
++ if (unlikely(len >= (sizeof(str) - 1))) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++ "%04x:%02x:%02x.%02x", domain, bus,
++ PCI_SLOT(devfn), PCI_FUNC(devfn));
++
++ out:
++ return err;
++}
++
++static int pciback_export_device(struct pciback_device *pdev,
++ int domain, int bus, int slot, int func,
++ int devid)
++{
++ struct pci_dev *dev;
++ int err = 0;
++
++ dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
++ domain, bus, slot, func);
++
++ dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
++ if (!dev) {
++ err = -EINVAL;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Couldn't locate PCI device "
++ "(%04x:%02x:%02x.%01x)! "
++ "perhaps already in-use?",
++ domain, bus, slot, func);
++ goto out;
++ }
++
++ err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev);
++ if (err)
++ goto out;
++
++ /* TODO: It'd be nice to export a bridge and have all of its children
++ * get exported with it. This may be best done in xend (which will
++ * have to calculate resource usage anyway) but we probably want to
++ * put something in here to ensure that if a bridge gets given to a
++ * driver domain, that all devices under that bridge are not given
++ * to other driver domains (as he who controls the bridge can disable
++ * it and stop the other devices from working).
++ */
++ out:
++ return err;
++}
++
++static int pciback_remove_device(struct pciback_device *pdev,
++ int domain, int bus, int slot, int func)
++{
++ int err = 0;
++ struct pci_dev *dev;
++
++ dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
++ domain, bus, slot, func);
++
++ dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
++ if (!dev) {
++ err = -EINVAL;
++ dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
++ "(%04x:%02x:%02x.%01x)! not owned by this domain\n",
++ domain, bus, slot, func);
++ goto out;
++ }
++
++ pciback_release_pci_dev(pdev, dev);
++
++ out:
++ return err;
++}
++
++static int pciback_publish_pci_root(struct pciback_device *pdev,
++ unsigned int domain, unsigned int bus)
++{
++ unsigned int d, b;
++ int i, root_num, len, err;
++ char str[64];
++
++ dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
++
++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++ "root_num", "%d", &root_num);
++ if (err == 0 || err == -ENOENT)
++ root_num = 0;
++ else if (err < 0)
++ goto out;
++
++ /* Verify that we haven't already published this pci root */
++ for (i = 0; i < root_num; i++) {
++ len = snprintf(str, sizeof(str), "root-%d", i);
++ if (unlikely(len >= (sizeof(str) - 1))) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++ str, "%x:%x", &d, &b);
++ if (err < 0)
++ goto out;
++ if (err != 2) {
++ err = -EINVAL;
++ goto out;
++ }
++
++ if (d == domain && b == bus) {
++ err = 0;
++ goto out;
++ }
++ }
++
++ len = snprintf(str, sizeof(str), "root-%d", root_num);
++ if (unlikely(len >= (sizeof(str) - 1))) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
++ root_num, domain, bus);
++
++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++ "%04x:%02x", domain, bus);
++ if (err)
++ goto out;
++
++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
++ "root_num", "%d", (root_num + 1));
++
++ out:
++ return err;
++}
++
++static int pciback_reconfigure(struct pciback_device *pdev)
++{
++ int err = 0;
++ int num_devs;
++ int domain, bus, slot, func;
++ int substate;
++ int i, len;
++ char state_str[64];
++ char dev_str[64];
++
++ spin_lock(&pdev->dev_lock);
++
++ dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
++
++ /* Make sure we only reconfigure once */
++ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
++ XenbusStateReconfiguring)
++ goto out;
++
++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
++ &num_devs);
++ if (err != 1) {
++ if (err >= 0)
++ err = -EINVAL;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error reading number of devices");
++ goto out;
++ }
++
++ for (i = 0; i < num_devs; i++) {
++ len = snprintf(state_str, sizeof(state_str), "state-%d", i);
++ if (unlikely(len >= (sizeof(state_str) - 1))) {
++ err = -ENOMEM;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "String overflow while reading "
++ "configuration");
++ goto out;
++ }
++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
++ "%d", &substate);
++ if (err != 1)
++ substate = XenbusStateUnknown;
++
++ switch (substate) {
++ case XenbusStateInitialising:
++ dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
++
++ len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
++ if (unlikely(len >= (sizeof(dev_str) - 1))) {
++ err = -ENOMEM;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "String overflow while "
++ "reading configuration");
++ goto out;
++ }
++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++ dev_str, "%x:%x:%x.%x",
++ &domain, &bus, &slot, &func);
++ if (err < 0) {
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error reading device "
++ "configuration");
++ goto out;
++ }
++ if (err != 4) {
++ err = -EINVAL;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error parsing pci device "
++ "configuration");
++ goto out;
++ }
++
++ err = pciback_export_device(pdev, domain, bus, slot,
++ func, i);
++ if (err)
++ goto out;
++
++ /* Publish pci roots. */
++ err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
++ if (err) {
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error while publish PCI root"
++ "buses for frontend");
++ goto out;
++ }
++
++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
++ state_str, "%d",
++ XenbusStateInitialised);
++ if (err) {
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error switching substate of "
++ "dev-%d\n", i);
++ goto out;
++ }
++ break;
++
++ case XenbusStateClosing:
++ dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
++
++ len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
++ if (unlikely(len >= (sizeof(dev_str) - 1))) {
++ err = -ENOMEM;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "String overflow while "
++ "reading configuration");
++ goto out;
++ }
++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++ dev_str, "%x:%x:%x.%x",
++ &domain, &bus, &slot, &func);
++ if (err < 0) {
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error reading device "
++ "configuration");
++ goto out;
++ }
++ if (err != 4) {
++ err = -EINVAL;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error parsing pci device "
++ "configuration");
++ goto out;
++ }
++
++ err = pciback_remove_device(pdev, domain, bus, slot,
++ func);
++ if(err)
++ goto out;
++
++ /* TODO: If at some point we implement support for pci
++ * root hot-remove on pcifront side, we'll need to
++ * remove unnecessary xenstore nodes of pci roots here.
++ */
++
++ break;
++
++ default:
++ break;
++ }
++ }
++
++ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
++ if (err) {
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error switching to reconfigured state!");
++ goto out;
++ }
++
++ out:
++ spin_unlock(&pdev->dev_lock);
++
++ return 0;
++}
++
++static void pciback_frontend_changed(struct xenbus_device *xdev,
++ enum xenbus_state fe_state)
++{
++ struct pciback_device *pdev = xdev->dev.driver_data;
++
++ dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
++
++ switch (fe_state) {
++ case XenbusStateInitialised:
++ pciback_attach(pdev);
++ break;
++
++ case XenbusStateReconfiguring:
++ pciback_reconfigure(pdev);
++ break;
++
++ case XenbusStateConnected:
++ /* pcifront switched its state from reconfiguring to connected.
++ * Then switch to connected state.
++ */
++ xenbus_switch_state(xdev, XenbusStateConnected);
++ break;
++
++ case XenbusStateClosing:
++ pciback_disconnect(pdev);
++ xenbus_switch_state(xdev, XenbusStateClosing);
++ break;
++
++ case XenbusStateClosed:
++ pciback_disconnect(pdev);
++ xenbus_switch_state(xdev, XenbusStateClosed);
++ if (xenbus_dev_is_online(xdev))
++ break;
++ /* fall through if not online */
++ case XenbusStateUnknown:
++ dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
++ device_unregister(&xdev->dev);
++ break;
++
++ default:
++ break;
++ }
++}
++
++static int pciback_setup_backend(struct pciback_device *pdev)
++{
++ /* Get configuration from xend (if available now) */
++ int domain, bus, slot, func;
++ int err = 0;
++ int i, num_devs;
++ char dev_str[64];
++ char state_str[64];
++
++ spin_lock(&pdev->dev_lock);
++
++ /* It's possible we could get the call to setup twice, so make sure
++ * we're not already connected.
++ */
++ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
++ XenbusStateInitWait)
++ goto out;
++
++ dev_dbg(&pdev->xdev->dev, "getting be setup\n");
++
++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
++ &num_devs);
++ if (err != 1) {
++ if (err >= 0)
++ err = -EINVAL;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error reading number of devices");
++ goto out;
++ }
++
++ for (i = 0; i < num_devs; i++) {
++ int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
++ if (unlikely(l >= (sizeof(dev_str) - 1))) {
++ err = -ENOMEM;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "String overflow while reading "
++ "configuration");
++ goto out;
++ }
++
++ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
++ "%x:%x:%x.%x", &domain, &bus, &slot, &func);
++ if (err < 0) {
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error reading device configuration");
++ goto out;
++ }
++ if (err != 4) {
++ err = -EINVAL;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error parsing pci device "
++ "configuration");
++ goto out;
++ }
++
++ err = pciback_export_device(pdev, domain, bus, slot, func, i);
++ if (err)
++ goto out;
++
++ /* Switch substate of this device. */
++ l = snprintf(state_str, sizeof(state_str), "state-%d", i);
++ if (unlikely(l >= (sizeof(state_str) - 1))) {
++ err = -ENOMEM;
++ xenbus_dev_fatal(pdev->xdev, err,
++ "String overflow while reading "
++ "configuration");
++ goto out;
++ }
++ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
++ "%d", XenbusStateInitialised);
++ if (err) {
++ xenbus_dev_fatal(pdev->xdev, err, "Error switching "
++ "substate of dev-%d\n", i);
++ goto out;
++ }
++ }
++
++ err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
++ if (err) {
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error while publish PCI root buses "
++ "for frontend");
++ goto out;
++ }
++
++ err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
++ if (err)
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error switching to initialised state!");
++
++ out:
++ spin_unlock(&pdev->dev_lock);
++
++ if (!err)
++ /* see if pcifront is already configured (if not, we'll wait) */
++ pciback_attach(pdev);
++
++ return err;
++}
++
++static void pciback_be_watch(struct xenbus_watch *watch,
++ const char **vec, unsigned int len)
++{
++ struct pciback_device *pdev =
++ container_of(watch, struct pciback_device, be_watch);
++
++ switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
++ case XenbusStateInitWait:
++ pciback_setup_backend(pdev);
++ break;
++
++ default:
++ break;
++ }
++}
++
++static int pciback_xenbus_probe(struct xenbus_device *dev,
++ const struct xenbus_device_id *id)
++{
++ int err = 0;
++ struct pciback_device *pdev = alloc_pdev(dev);
++
++ if (pdev == NULL) {
++ err = -ENOMEM;
++ xenbus_dev_fatal(dev, err,
++ "Error allocating pciback_device struct");
++ goto out;
++ }
++
++ /* wait for xend to configure us */
++ err = xenbus_switch_state(dev, XenbusStateInitWait);
++ if (err)
++ goto out;
++
++ /* watch the backend node for backend configuration information */
++ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
++ pciback_be_watch);
++ if (err)
++ goto out;
++ pdev->be_watching = 1;
++
++ /* We need to force a call to our callback here in case
++ * xend already configured us!
++ */
++ pciback_be_watch(&pdev->be_watch, NULL, 0);
++
++ out:
++ return err;
++}
++
++static int pciback_xenbus_remove(struct xenbus_device *dev)
++{
++ struct pciback_device *pdev = dev->dev.driver_data;
++
++ if (pdev != NULL)
++ free_pdev(pdev);
++
++ return 0;
++}
++
++static const struct xenbus_device_id xenpci_ids[] = {
++ {"pci"},
++ {{0}},
++};
++
++static struct xenbus_driver xenbus_pciback_driver = {
++ .name = "pciback",
++ .owner = THIS_MODULE,
++ .ids = xenpci_ids,
++ .probe = pciback_xenbus_probe,
++ .remove = pciback_xenbus_remove,
++ .otherend_changed = pciback_frontend_changed,
++};
++
++int __init pciback_xenbus_register(void)
++{
++ if (!is_running_on_xen())
++ return -ENODEV;
++ pciback_wq = create_workqueue("pciback_workqueue");
++ if (!pciback_wq) {
++ printk(KERN_ERR "pciback_xenbus_register: create"
++ "pciback_workqueue failed\n");
++ return -EFAULT;
++ }
++ return xenbus_register_backend(&xenbus_pciback_driver);
++}
++
++void __exit pciback_xenbus_unregister(void)
++{
++ destroy_workqueue(pciback_wq);
++ xenbus_unregister_driver(&xenbus_pciback_driver);
++}
+--
+1.7.3.4
+
+
+From cf2a64556286b762ce6a3a9b408ba7ecdcaea03a Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:22 -0400
+Subject: [PATCH 008/139] xen-pciback: Fix include header name change (evtchn.h is now events.h)
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pci_stub.c | 2 +-
+ drivers/xen/pciback/pciback_ops.c | 2 +-
+ drivers/xen/pciback/xenbus.c | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index c481a73..c02f21f 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -13,7 +13,7 @@
+ #include <linux/pci.h>
+ #include <linux/wait.h>
+ #include <asm/atomic.h>
+-#include <xen/evtchn.h>
++#include <xen/events.h>
+ #include "pciback.h"
+ #include "conf_space.h"
+ #include "conf_space_quirks.h"
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index b85b2db..58d09eb 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -6,7 +6,7 @@
+ #include <linux/module.h>
+ #include <linux/wait.h>
+ #include <asm/bitops.h>
+-#include <xen/evtchn.h>
++#include <xen/events.h>
+ #include "pciback.h"
+
+ int verbose_request = 0;
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index 4d56c45..bbca3fe 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -8,7 +8,7 @@
+ #include <linux/list.h>
+ #include <linux/vmalloc.h>
+ #include <xen/xenbus.h>
+-#include <xen/evtchn.h>
++#include <xen/events.h>
+ #include "pciback.h"
+
+ #define INVALID_EVTCHN_IRQ (-1)
+--
+1.7.3.4
+
+
+From f6222ae41f2fee3f67983f833ee8dcba2c7a1362 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:24 -0400
+Subject: [PATCH 009/139] xen-pciback: Use pci_is_enabled() instead of is_enabled.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/conf_space_header.c | 4 ++--
+ drivers/xen/pciback/pciback_ops.c | 1 -
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
+index f794e12..5a9e028 100644
+--- a/drivers/xen/pciback/conf_space_header.c
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -22,14 +22,14 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
+ {
+ int err;
+
+- if (!dev->is_enabled && is_enable_cmd(value)) {
++ if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG "pciback: %s: enable\n",
+ pci_name(dev));
+ err = pci_enable_device(dev);
+ if (err)
+ return err;
+- } else if (dev->is_enabled && !is_enable_cmd(value)) {
++ } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG "pciback: %s: disable\n",
+ pci_name(dev));
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index 58d09eb..2d570e7 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -26,7 +26,6 @@ void pciback_reset_device(struct pci_dev *dev)
+
+ pci_write_config_word(dev, PCI_COMMAND, 0);
+
+- dev->is_enabled = 0;
+ dev->is_busmaster = 0;
+ } else {
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+--
+1.7.3.4
+
+
+From 0d379d03a3284e4b4d890b7e1b8163d485cc72e6 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:25 -0400
+Subject: [PATCH 010/139] xen-pciback: Fix usage of INIT_WORK.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pciback.h | 4 ++--
+ drivers/xen/pciback/pciback_ops.c | 7 ++++---
+ drivers/xen/pciback/xenbus.c | 3 ++-
+ 3 files changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+index 6744f45..4fb8c05 100644
+--- a/drivers/xen/pciback/pciback.h
++++ b/drivers/xen/pciback/pciback.h
+@@ -99,8 +99,8 @@ int pciback_publish_pci_roots(struct pciback_device *pdev,
+ void pciback_release_devices(struct pciback_device *pdev);
+
+ /* Handles events from front-end */
+-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
+-void pciback_do_op(void *data);
++irqreturn_t pciback_handle_event(int irq, void *dev_id);
++void pciback_do_op(struct work_struct *data);
+
+ int pciback_xenbus_register(void);
+ void pciback_xenbus_unregister(void);
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index 2d570e7..6624faf 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -66,9 +66,10 @@ void test_and_schedule_op(struct pciback_device *pdev)
+ * context because some of the pci_* functions can sleep (mostly due to ACPI
+ * use of semaphores). This function is intended to be called from a work
+ * queue in process context taking a struct pciback_device as a parameter */
+-void pciback_do_op(void *data)
++
++void pciback_do_op(struct work_struct *data)
+ {
+- struct pciback_device *pdev = data;
++ struct pciback_device *pdev = container_of(data, struct pciback_device, op_work);
+ struct pci_dev *dev;
+ struct xen_pci_op *op = &pdev->sh_info->op;
+
+@@ -123,7 +124,7 @@ void pciback_do_op(void *data)
+ test_and_schedule_op(pdev);
+ }
+
+-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t pciback_handle_event(int irq, void *dev_id)
+ {
+ struct pciback_device *pdev = dev_id;
+
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index bbca3fe..bd52289 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -9,6 +9,7 @@
+ #include <linux/vmalloc.h>
+ #include <xen/xenbus.h>
+ #include <xen/events.h>
++#include <linux/workqueue.h>
+ #include "pciback.h"
+
+ #define INVALID_EVTCHN_IRQ (-1)
+@@ -33,7 +34,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
+ pdev->be_watching = 0;
+
+- INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
++ INIT_WORK(&pdev->op_work, pciback_do_op);
+
+ if (pciback_init_devices(pdev)) {
+ kfree(pdev);
+--
+1.7.3.4
+
+
+From 57f6c49d0f428f96cca49147d68b0bb6156613a6 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:26 -0400
+Subject: [PATCH 011/139] xen-pciback: Update the calling mechanism for xenbus_[map|unmap]_ring_valloc functions.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pciback.h | 1 -
+ drivers/xen/pciback/xenbus.c | 18 +++++++++---------
+ 2 files changed, 9 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+index 4fb8c05..5e8e14e 100644
+--- a/drivers/xen/pciback/pciback.h
++++ b/drivers/xen/pciback/pciback.h
+@@ -36,7 +36,6 @@ struct pciback_device {
+
+ int evtchn_irq;
+
+- struct vm_struct *sh_area;
+ struct xen_pci_sharedinfo *sh_info;
+
+ unsigned long flags;
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index bd52289..5be1350 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -7,6 +7,7 @@
+ #include <linux/init.h>
+ #include <linux/list.h>
+ #include <linux/vmalloc.h>
++#include <linux/workqueue.h>
+ #include <xen/xenbus.h>
+ #include <xen/events.h>
+ #include <linux/workqueue.h>
+@@ -29,7 +30,6 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
+
+ spin_lock_init(&pdev->dev_lock);
+
+- pdev->sh_area = NULL;
+ pdev->sh_info = NULL;
+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
+ pdev->be_watching = 0;
+@@ -59,7 +59,7 @@ static void pciback_disconnect(struct pciback_device *pdev)
+ flush_workqueue(pciback_wq);
+
+ if (pdev->sh_info != NULL) {
+- xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
++ xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
+ pdev->sh_info = NULL;
+ }
+
+@@ -85,23 +85,23 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
+ int remote_evtchn)
+ {
+ int err = 0;
+- struct vm_struct *area;
++ void *vaddr;
+
+ dev_dbg(&pdev->xdev->dev,
+ "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
+ gnt_ref, remote_evtchn);
+
+- area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
+- if (IS_ERR(area)) {
+- err = PTR_ERR(area);
++ err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
++ if (err < 0) {
++ xenbus_dev_fatal(pdev->xdev, err,
++ "Error mapping other domain page in ours.");
+ goto out;
+ }
+- pdev->sh_area = area;
+- pdev->sh_info = area->addr;
++ pdev->sh_info = vaddr;
+
+ err = bind_interdomain_evtchn_to_irqhandler(
+ pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
+- SA_SAMPLE_RANDOM, "pciback", pdev);
++ 0, "pciback", pdev);
+ if (err < 0) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error binding event channel to IRQ");
+--
+1.7.3.4
+
+
+From 6e86fcb926e41fb55f512972603e5aaf77e2efb8 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:30 -0400
+Subject: [PATCH 012/139] xen-pciback: Add check to load only under priviliged domain.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pci_stub.c | 5 +++++
+ drivers/xen/pciback/xenbus.c | 2 --
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index c02f21f..d97dac5 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -14,6 +14,8 @@
+ #include <linux/wait.h>
+ #include <asm/atomic.h>
+ #include <xen/events.h>
++#include <asm/xen/pci.h>
++#include <asm/xen/hypervisor.h>
+ #include "pciback.h"
+ #include "conf_space.h"
+ #include "conf_space_quirks.h"
+@@ -1286,6 +1288,9 @@ static int __init pciback_init(void)
+ {
+ int err;
+
++ if (!xen_initial_domain())
++ return -ENODEV;
++
+ err = pciback_config_init();
+ if (err)
+ return err;
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index 5be1350..a85c413 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -693,8 +693,6 @@ static struct xenbus_driver xenbus_pciback_driver = {
+
+ int __init pciback_xenbus_register(void)
+ {
+- if (!is_running_on_xen())
+- return -ENODEV;
+ pciback_wq = create_workqueue("pciback_workqueue");
+ if (!pciback_wq) {
+ printk(KERN_ERR "pciback_xenbus_register: create"
+--
+1.7.3.4
+
+
+From c1139f912c1336538e51966d56e5905954052cba Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:31 -0400
+Subject: [PATCH 013/139] xen-pciback: Remove usage of pci_restore_bars() as Linux handles the power-up states correctly now.
+
+Originally this code was pulled from the upstream kernel, and stuck
+in the linux-2.6-sparse tree. At that point of time, the Linux tree (2.6.16?)
+did not know how to handle this. Nowadays the pci_set_power_state routine
+handles this case so we do not need this anymore.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/conf_space_capability_pm.c | 13 -------------
+ 1 files changed, 0 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
+index e2f99c7..e1d3af4 100644
+--- a/drivers/xen/pciback/conf_space_capability_pm.c
++++ b/drivers/xen/pciback/conf_space_capability_pm.c
+@@ -58,19 +58,6 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
+ goto out;
+ }
+
+- /*
+- * Device may lose PCI config info on D3->D0 transition. This
+- * is a problem for some guests which will not reset BARs. Even
+- * those that have a go will be foiled by our BAR-write handler
+- * which will discard the write! Since Linux won't re-init
+- * the config space automatically in all cases, we do it here.
+- * Future: Should we re-initialise all first 64 bytes of config space?
+- */
+- if (new_state == PCI_D0 &&
+- (old_state == PCI_D3hot || old_state == PCI_D3cold) &&
+- !(old_value & PCI_PM_CTRL_NO_SOFT_RESET))
+- pci_restore_bars(dev);
+-
+ out:
+ return err;
+ }
+--
+1.7.3.4
+
+
+From 721657d92623cfcf2f6f68c14abf97eb40fa6b20 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 13 Oct 2009 17:22:32 -0400
+Subject: [PATCH 014/139] xen-pciback: Enable Xen-PCI-back to be compiled.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/Kconfig | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ drivers/xen/Makefile | 1 +
+ 2 files changed, 66 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
+index 6e6180c..d874453 100644
+--- a/drivers/xen/Kconfig
++++ b/drivers/xen/Kconfig
+@@ -29,6 +29,71 @@ config XEN_DEV_EVTCHN
+ Support for backend device drivers that provide I/O services
+ to other virtual machines.
+
++config XEN_PCIDEV_BACKEND
++ tristate "PCI-device backend driver"
++ depends on PCI
++ depends on XEN_BACKEND
++ help
++ The PCI device backend driver allows the kernel to export arbitrary
++ PCI devices to other guests. If you select this to be a module, you
++ will need to make sure no other driver has bound to the device(s)
++ you want to make visible to other guests.
++
++choice
++ prompt "PCI Backend Mode"
++ depends on XEN_PCIDEV_BACKEND
++ default XEN_PCIDEV_BACKEND_VPCI if !IA64
++ default XEN_PCIDEV_BACKEND_CONTROLLER if IA64
++
++config XEN_PCIDEV_BACKEND_VPCI
++ bool "Virtual PCI"
++ ---help---
++ This PCI Backend hides the true PCI topology and makes the frontend
++ think there is a single PCI bus with only the exported devices on it.
++ For example, a device at 03:05.0 will be re-assigned to 00:00.0. A
++ second device at 02:1a.1 will be re-assigned to 00:01.1.
++
++config XEN_PCIDEV_BACKEND_PASS
++ bool "Passthrough"
++ ---help---
++ This PCI Backend provides a real view of the PCI topology to the
++ frontend (for example, a device at 06:01.b will still appear at
++ 06:01.b to the frontend). This is similar to how Xen 2.0.x exposed
++ PCI devices to its driver domains. This may be required for drivers
++ which depend on finding their hardward in certain bus/slot
++ locations.
++
++config XEN_PCIDEV_BACKEND_SLOT
++ bool "Slot"
++ ---help---
++ This PCI Backend hides the true PCI topology and makes the frontend
++ think there is a single PCI bus with only the exported devices on it.
++ Contrary to the virtual PCI backend, a function becomes a new slot.
++ For example, a device at 03:05.2 will be re-assigned to 00:00.0. A
++ second device at 02:1a.1 will be re-assigned to 00:01.0.
++
++config XEN_PCIDEV_BACKEND_CONTROLLER
++ bool "Controller"
++ depends on IA64
++ ---help---
++ This PCI backend virtualizes the PCI bus topology by providing a
++ virtual bus per PCI root device. Devices which are physically under
++ the same root bus will appear on the same virtual bus. For systems
++ with complex I/O addressing, this is the only backend which supports
++ extended I/O port spaces and MMIO translation offsets. This backend
++ also supports slot virtualization. For example, a device at
++ 0000:01:02.1 will be re-assigned to 0000:00:00.0. A second device
++ at 0000:02:05.0 (behind a P2P bridge on bus 0000:01) will be
++ re-assigned to 0000:00:01.0. A third device at 0000:16:05.0 (under
++ a different PCI root bus) will be re-assigned to 0000:01:00.0.
++
++endchoice
++
++config XEN_PCIDEV_BE_DEBUG
++ bool "PCI Backend Debugging"
++ depends on XEN_PCIDEV_BACKEND
++
++
+ config XENFS
+ tristate "Xen filesystem"
+ default y
+diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
+index eb8a78d..3737dee 100644
+--- a/drivers/xen/Makefile
++++ b/drivers/xen/Makefile
+@@ -9,6 +9,7 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
+ obj-$(CONFIG_XEN_BALLOON) += balloon.o
+ obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
+ obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
++obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/
+ obj-$(CONFIG_XENFS) += xenfs/
+ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
+ obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
+--
+1.7.3.4
+
+
+From c164cd8577017d1c4e001b475fadddc7d2ff5c78 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 5 Nov 2009 15:25:43 -0500
+Subject: [PATCH 015/139] xen-pciback: Return the physical IRQ number instead of the allocated IRQ number to pcifront.
+
+The allocation of IRQ numbers in Linux privileged domains is based
+on finding the first unbound IRQ number. After the allocation is done
+a HYPERCALL to Xen is done, which allocates a PIRQ globally.
+That PIRQ->IRQ binding is saved in data structures that are used
+during ISR executions.
+
+Before this patch, for non-privileged domains we would return the local
+IRQ number instead of the PIRQ. The non-privileged domains require the
+PIRQ so that they can attach the their own interrupt handler to it.
+Fortunatly there is a function, 'xen_gsi_from_irq' that returns
+that global IRQ number.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/conf_space_capability_msi.c | 12 ++++++++----
+ 1 files changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+index 762e396..7fb5371 100644
+--- a/drivers/xen/pciback/conf_space_capability_msi.c
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -6,6 +6,7 @@
+ #include "conf_space.h"
+ #include "conf_space_capability.h"
+ #include <xen/interface/io/pciif.h>
++#include <xen/events.h>
+ #include "pciback.h"
+
+ int pciback_enable_msi(struct pciback_device *pdev,
+@@ -22,7 +23,9 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ return XEN_PCI_ERR_op_failed;
+ }
+
+- op->value = dev->irq;
++ /* The value the guest needs is actually the IDT vector, not the
++ * the local domain's IRQ number. */
++ op->value = xen_gsi_from_irq(dev->irq);
+ return 0;
+ }
+
+@@ -31,7 +34,7 @@ int pciback_disable_msi(struct pciback_device *pdev,
+ {
+ pci_disable_msi(dev);
+
+- op->value = dev->irq;
++ op->value = xen_gsi_from_irq(dev->irq);
+ return 0;
+ }
+
+@@ -57,7 +60,8 @@ int pciback_enable_msix(struct pciback_device *pdev,
+
+ for (i = 0; i < op->value; i++) {
+ op->msix_entries[i].entry = entries[i].entry;
+- op->msix_entries[i].vector = entries[i].vector;
++ op->msix_entries[i].vector =
++ xen_gsi_from_irq(entries[i].vector);
+ }
+
+ kfree(entries);
+@@ -73,7 +77,7 @@ int pciback_disable_msix(struct pciback_device *pdev,
+
+ pci_disable_msix(dev);
+
+- op->value = dev->irq;
++ op->value = xen_gsi_from_irq(dev->irq);
+ return 0;
+ }
+
+--
+1.7.3.4
+
+
+From b0b035f1de3282aa96a6dc28007d513e8fce793d Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 5 Nov 2009 15:25:44 -0500
+Subject: [PATCH 016/139] xen-pciback: Fix checkpatch warnings and errors for pciback/ directory.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/conf_space.c | 34 +++---
+ drivers/xen/pciback/conf_space.h | 30 ++--
+ drivers/xen/pciback/conf_space_capability.c | 5 +-
+ drivers/xen/pciback/conf_space_capability.h | 3 +
+ drivers/xen/pciback/conf_space_capability_msi.c | 3 +-
+ drivers/xen/pciback/conf_space_capability_pm.c | 4 +-
+ drivers/xen/pciback/conf_space_capability_vpd.c | 2 +-
+ drivers/xen/pciback/conf_space_header.c | 7 +-
+ drivers/xen/pciback/conf_space_quirks.c | 16 ++-
+ drivers/xen/pciback/controller.c | 15 +-
+ drivers/xen/pciback/passthrough.c | 6 +-
+ drivers/xen/pciback/pci_stub.c | 165 +++++++++++------------
+ drivers/xen/pciback/pciback.h | 28 +++--
+ drivers/xen/pciback/pciback_ops.c | 74 +++++------
+ drivers/xen/pciback/slot.c | 22 ++--
+ drivers/xen/pciback/vpci.c | 28 ++--
+ drivers/xen/pciback/xenbus.c | 42 +++---
+ 17 files changed, 245 insertions(+), 239 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
+index 0c76db1..370c18e 100644
+--- a/drivers/xen/pciback/conf_space.c
++++ b/drivers/xen/pciback/conf_space.c
+@@ -18,11 +18,11 @@
+ static int permissive;
+ module_param(permissive, bool, 0644);
+
+-#define DEFINE_PCI_CONFIG(op,size,type) \
++#define DEFINE_PCI_CONFIG(op, size, type) \
+ int pciback_##op##_config_##size \
+ (struct pci_dev *dev, int offset, type value, void *data) \
+ { \
+- return pci_##op##_config_##size (dev, offset, value); \
++ return pci_##op##_config_##size(dev, offset, value); \
+ }
+
+ DEFINE_PCI_CONFIG(read, byte, u8 *)
+@@ -139,14 +139,15 @@ static int pcibios_err_to_errno(int err)
+ }
+
+ int pciback_config_read(struct pci_dev *dev, int offset, int size,
+- u32 * ret_val)
++ u32 *ret_val)
+ {
+ int err = 0;
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+ const struct config_field_entry *cfg_entry;
+ const struct config_field *field;
+ int req_start, req_end, field_start, field_end;
+- /* if read fails for any reason, return 0 (as if device didn't respond) */
++ /* if read fails for any reason, return 0
++ * (as if device didn't respond) */
+ u32 value = 0, tmp_val;
+
+ if (unlikely(verbose_request))
+@@ -161,10 +162,10 @@ int pciback_config_read(struct pci_dev *dev, int offset, int size,
+ /* Get the real value first, then modify as appropriate */
+ switch (size) {
+ case 1:
+- err = pci_read_config_byte(dev, offset, (u8 *) & value);
++ err = pci_read_config_byte(dev, offset, (u8 *) &value);
+ break;
+ case 2:
+- err = pci_read_config_word(dev, offset, (u16 *) & value);
++ err = pci_read_config_word(dev, offset, (u16 *) &value);
+ break;
+ case 4:
+ err = pci_read_config_dword(dev, offset, &value);
+@@ -192,7 +193,7 @@ int pciback_config_read(struct pci_dev *dev, int offset, int size,
+ }
+ }
+
+- out:
++out:
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
+ pci_name(dev), size, offset, value);
+@@ -276,8 +277,8 @@ int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
+ } else if (!dev_data->warned_on_write) {
+ dev_data->warned_on_write = 1;
+ dev_warn(&dev->dev, "Driver tried to write to a "
+- "read-only configuration space field at offset "
+- "0x%x, size %d. This may be harmless, but if "
++ "read-only configuration space field at offset"
++ " 0x%x, size %d. This may be harmless, but if "
+ "you have problems with your device:\n"
+ "1) see permissive attribute in sysfs\n"
+ "2) report problems to the xen-devel "
+@@ -295,8 +296,8 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev)
+ struct config_field_entry *cfg_entry, *t;
+ const struct config_field *field;
+
+- dev_dbg(&dev->dev,
+- "free-ing dynamically allocated virtual configuration space fields\n");
++ dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
++ "configuration space fields\n");
+ if (!dev_data)
+ return;
+
+@@ -306,8 +307,7 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev)
+ if (field->clean) {
+ field->clean((struct config_field *)field);
+
+- if (cfg_entry->data)
+- kfree(cfg_entry->data);
++ kfree(cfg_entry->data);
+
+ list_del(&cfg_entry->list);
+ kfree(cfg_entry);
+@@ -376,7 +376,7 @@ int pciback_config_add_field_offset(struct pci_dev *dev,
+ cfg_entry->base_offset = base_offset;
+
+ /* silently ignore duplicate fields */
+- err = pciback_field_is_dup(dev,OFFSET(cfg_entry));
++ err = pciback_field_is_dup(dev, OFFSET(cfg_entry));
+ if (err)
+ goto out;
+
+@@ -395,14 +395,14 @@ int pciback_config_add_field_offset(struct pci_dev *dev,
+ OFFSET(cfg_entry));
+ list_add_tail(&cfg_entry->list, &dev_data->config_fields);
+
+- out:
++out:
+ if (err)
+ kfree(cfg_entry);
+
+ return err;
+ }
+
+-/* This sets up the device's virtual configuration space to keep track of
++/* This sets up the device's virtual configuration space to keep track of
+ * certain registers (like the base address registers (BARs) so that we can
+ * keep the client from manipulating them directly.
+ */
+@@ -425,7 +425,7 @@ int pciback_config_init_dev(struct pci_dev *dev)
+
+ err = pciback_config_quirks_init(dev);
+
+- out:
++out:
+ return err;
+ }
+
+diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h
+index fe746ef..50ebef2 100644
+--- a/drivers/xen/pciback/conf_space.h
++++ b/drivers/xen/pciback/conf_space.h
+@@ -11,21 +11,21 @@
+ #include <linux/err.h>
+
+ /* conf_field_init can return an errno in a ptr with ERR_PTR() */
+-typedef void *(*conf_field_init) (struct pci_dev * dev, int offset);
+-typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data);
+-typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data);
++typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
++typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
++typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
+
+-typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value,
++typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
+ void *data);
+-typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value,
++typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
+ void *data);
+-typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value,
++typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
+ void *data);
+-typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value,
++typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
+ void *data);
+-typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value,
++typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
+ void *data);
+-typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value,
++typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
+ void *data);
+
+ /* These are the fields within the configuration space which we
+@@ -39,7 +39,7 @@ struct config_field {
+ conf_field_init init;
+ conf_field_reset reset;
+ conf_field_free release;
+- void (*clean) (struct config_field * field);
++ void (*clean) (struct config_field *field);
+ union {
+ struct {
+ conf_dword_write write;
+@@ -92,8 +92,8 @@ static inline int pciback_config_add_fields(struct pci_dev *dev,
+ }
+
+ static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
+- const struct config_field *field,
+- unsigned int offset)
++ const struct config_field *field,
++ unsigned int offset)
+ {
+ int i, err = 0;
+ for (i = 0; field[i].size != 0; i++) {
+@@ -105,11 +105,11 @@ static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
+ }
+
+ /* Read/Write the real configuration space */
+-int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value,
++int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
+ void *data);
+-int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value,
++int pciback_read_config_word(struct pci_dev *dev, int offset, u16 *value,
+ void *data);
+-int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value,
++int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
+ void *data);
+ int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
+ void *data);
+diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c
+index 50efca4..0ea84d6 100644
+--- a/drivers/xen/pciback/conf_space_capability.c
++++ b/drivers/xen/pciback/conf_space_capability.c
+@@ -53,13 +53,10 @@ int pciback_config_capability_add_fields(struct pci_dev *dev)
+ }
+ }
+
+- out:
++out:
+ return err;
+ }
+
+-extern struct pciback_config_capability pciback_config_capability_vpd;
+-extern struct pciback_config_capability pciback_config_capability_pm;
+-
+ int pciback_config_capability_init(void)
+ {
+ register_capability(&pciback_config_capability_vpd);
+diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h
+index 823392e..8da3ac4 100644
+--- a/drivers/xen/pciback/conf_space_capability.h
++++ b/drivers/xen/pciback/conf_space_capability.h
+@@ -20,4 +20,7 @@ struct pciback_config_capability {
+ const struct config_field *fields;
+ };
+
++extern struct pciback_config_capability pciback_config_capability_vpd;
++extern struct pciback_config_capability pciback_config_capability_pm;
++
+ #endif
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+index 7fb5371..b70ea8b 100644
+--- a/drivers/xen/pciback/conf_space_capability_msi.c
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -18,7 +18,8 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ status = pci_enable_msi(dev);
+
+ if (status) {
+- printk("error enable msi for guest %x status %x\n", otherend, status);
++ printk(KERN_ERR "error enable msi for guest %x status %x\n",
++ otherend, status);
+ op->value = 0;
+ return XEN_PCI_ERR_op_failed;
+ }
+diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
+index e1d3af4..0442616 100644
+--- a/drivers/xen/pciback/conf_space_capability_pm.c
++++ b/drivers/xen/pciback/conf_space_capability_pm.c
+@@ -20,7 +20,7 @@ static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
+
+ *value = real_value & ~PCI_PM_CAP_PME_MASK;
+
+- out:
++out:
+ return err;
+ }
+
+@@ -77,7 +77,7 @@ static void *pm_ctrl_init(struct pci_dev *dev, int offset)
+ err = pci_write_config_word(dev, offset, value);
+ }
+
+- out:
++out:
+ return ERR_PTR(err);
+ }
+
+diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c
+index 920cb4a..e7b4d66 100644
+--- a/drivers/xen/pciback/conf_space_capability_vpd.c
++++ b/drivers/xen/pciback/conf_space_capability_vpd.c
+@@ -33,7 +33,7 @@ static const struct config_field caplist_vpd[] = {
+ },
+ {}
+ };
+-
++
+ struct pciback_config_capability pciback_config_capability_vpd = {
+ .capability = PCI_CAP_ID_VPD,
+ .fields = caplist_vpd,
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
+index 5a9e028..3ae7da1 100644
+--- a/drivers/xen/pciback/conf_space_header.c
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -51,7 +51,8 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
+ err = pci_set_mwi(dev);
+ if (err) {
+ printk(KERN_WARNING
+- "pciback: %s: cannot enable memory-write-invalidate (%d)\n",
++ "pciback: %s: cannot enable "
++ "memory-write-invalidate (%d)\n",
+ pci_name(dev), err);
+ value &= ~PCI_COMMAND_INVALIDATE;
+ }
+@@ -206,7 +207,7 @@ static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
+ || value == PCI_BIST_START)
+ err = pci_write_config_byte(dev, offset, value);
+
+- out:
++out:
+ return err;
+ }
+
+@@ -312,6 +313,6 @@ int pciback_config_header_add_fields(struct pci_dev *dev)
+ break;
+ }
+
+- out:
++out:
+ return err;
+ }
+diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c
+index 244a438..45c31fb 100644
+--- a/drivers/xen/pciback/conf_space_quirks.c
++++ b/drivers/xen/pciback/conf_space_quirks.c
+@@ -18,8 +18,10 @@ match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
+ {
+ if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
+ (id->device == PCI_ANY_ID || id->device == dev->device) &&
+- (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
+- (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
++ (id->subvendor == PCI_ANY_ID ||
++ id->subvendor == dev->subsystem_vendor) &&
++ (id->subdevice == PCI_ANY_ID ||
++ id->subdevice == dev->subsystem_device) &&
+ !((id->class ^ dev->class) & id->class_mask))
+ return id;
+ return NULL;
+@@ -35,7 +37,7 @@ struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
+ tmp_quirk = NULL;
+ printk(KERN_DEBUG
+ "quirk didn't match any device pciback knows about\n");
+- out:
++out:
+ return tmp_quirk;
+ }
+
+@@ -51,7 +53,7 @@ int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
+ struct config_field_entry *cfg_entry;
+
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+- if ( OFFSET(cfg_entry) == reg) {
++ if (OFFSET(cfg_entry) == reg) {
+ ret = 1;
+ break;
+ }
+@@ -84,7 +86,7 @@ int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
+
+ pciback_config_add_field(dev, field);
+
+- out:
++out:
+ return err;
+ }
+
+@@ -110,7 +112,7 @@ int pciback_config_quirks_init(struct pci_dev *dev)
+ quirk->pdev = dev;
+
+ register_quirk(quirk);
+- out:
++out:
+ return ret;
+ }
+
+@@ -133,6 +135,6 @@ int pciback_config_quirk_release(struct pci_dev *dev)
+ list_del(&quirk->quirks_list);
+ kfree(quirk);
+
+- out:
++out:
+ return ret;
+ }
+diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
+index 294e48f..7f04f11 100644
+--- a/drivers/xen/pciback/controller.c
++++ b/drivers/xen/pciback/controller.c
+@@ -259,7 +259,7 @@ static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
+ !(addr.resource_type == ACPI_IO_RANGE &&
+ addr.info.io.translation))
+ return AE_OK;
+-
++
+ /* Store the resource in xenbus for the guest */
+ len = snprintf(str, sizeof(str), "root-%d-resource-%d",
+ info->root_num, info->resource_count);
+@@ -314,7 +314,7 @@ int pciback_publish_pci_roots(struct pciback_device *pdev,
+ goto out;
+
+ /*
+- * Now figure out which root-%d this belongs to
++ * Now figure out which root-%d this belongs to
+ * so we can associate resources with it.
+ */
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
+@@ -407,8 +407,8 @@ void pciback_release_devices(struct pciback_device *pdev)
+ pdev->pci_dev_data = NULL;
+ }
+
+-int pciback_get_pcifront_dev(struct pci_dev *pcidev,
+- struct pciback_device *pdev,
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++ struct pciback_device *pdev,
+ unsigned int *domain, unsigned int *bus, unsigned int *devfn)
+ {
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
+@@ -420,13 +420,12 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev,
+
+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
+- if ( (dev_entry->dev->bus->number ==
++ if ((dev_entry->dev->bus->number ==
+ pcidev->bus->number) &&
+- (dev_entry->dev->devfn ==
++ (dev_entry->dev->devfn ==
+ pcidev->devfn) &&
+ (pci_domain_nr(dev_entry->dev->bus) ==
+- pci_domain_nr(pcidev->bus)))
+- {
++ pci_domain_nr(pcidev->bus))) {
+ found = 1;
+ *domain = cntrl_entry->domain;
+ *bus = cntrl_entry->bus;
+diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c
+index 9e7a0c4..5386bebf 100644
+--- a/drivers/xen/pciback/passthrough.c
++++ b/drivers/xen/pciback/passthrough.c
+@@ -165,8 +165,10 @@ void pciback_release_devices(struct pciback_device *pdev)
+ pdev->pci_dev_data = NULL;
+ }
+
+-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
+- unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++ struct pciback_device *pdev,
++ unsigned int *domain, unsigned int *bus,
++ unsigned int *devfn)
+
+ {
+ *domain = pci_domain_nr(pcidev->bus);
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index d97dac5..28222ee 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -20,7 +20,7 @@
+ #include "conf_space.h"
+ #include "conf_space_quirks.h"
+
+-static char *pci_devs_to_hide = NULL;
++static char *pci_devs_to_hide;
+ wait_queue_head_t aer_wait_queue;
+ /*Add sem for sync AER handling and pciback remove/reconfigue ops,
+ * We want to avoid in middle of AER ops, pciback devices is being removed
+@@ -43,7 +43,7 @@ struct pcistub_device {
+ spinlock_t lock;
+
+ struct pci_dev *dev;
+- struct pciback_device *pdev; /* non-NULL if struct pci_dev is in use */
++ struct pciback_device *pdev;/* non-NULL if struct pci_dev is in use */
+ };
+
+ /* Access to pcistub_devices & seized_devices lists and the initialize_devices
+@@ -55,7 +55,7 @@ static LIST_HEAD(pcistub_devices);
+ /* wait for device_initcall before initializing our devices
+ * (see pcistub_init_devices_late)
+ */
+-static int initialize_devices = 0;
++static int initialize_devices;
+ static LIST_HEAD(seized_devices);
+
+ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
+@@ -132,7 +132,7 @@ static struct pcistub_device *pcistub_device_find(int domain, int bus,
+ /* didn't find it */
+ psdev = NULL;
+
+- out:
++out:
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ return psdev;
+ }
+@@ -321,10 +321,10 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)
+
+ return 0;
+
+- config_release:
++config_release:
+ pciback_config_free_dev(dev);
+
+- out:
++out:
+ pci_set_drvdata(dev, NULL);
+ kfree(dev_data);
+ return err;
+@@ -443,7 +443,7 @@ static int __devinit pcistub_probe(struct pci_dev *dev,
+ /* Didn't find the device */
+ err = -ENODEV;
+
+- out:
++out:
+ return err;
+ }
+
+@@ -511,26 +511,24 @@ static void kill_domain_by_device(struct pcistub_device *psdev)
+ int err;
+ char nodename[1024];
+
+- if (!psdev)
++ if (!psdev)
+ dev_err(&psdev->dev->dev,
+ "device is NULL when do AER recovery/kill_domain\n");
+- sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
++ sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
+ psdev->pdev->xdev->otherend_id);
+ nodename[strlen(nodename)] = '\0';
+
+ again:
+ err = xenbus_transaction_start(&xbt);
+- if (err)
+- {
++ if (err) {
+ dev_err(&psdev->dev->dev,
+ "error %d when start xenbus transaction\n", err);
+ return;
+ }
+ /*PV AER handlers will set this flag*/
+- xenbus_printf(xbt, nodename, "aerState" , "aerfail" );
++ xenbus_printf(xbt, nodename, "aerState" , "aerfail");
+ err = xenbus_transaction_end(xbt, 0);
+- if (err)
+- {
++ if (err) {
+ if (err == -EAGAIN)
+ goto again;
+ dev_err(&psdev->dev->dev,
+@@ -541,9 +539,9 @@ again:
+
+ /* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
+ * backend need to have cooperation. In pciback, those steps will do similar
+- * jobs: send service request and waiting for front_end response.
++ * jobs: send service request and waiting for front_end response.
+ */
+-static pci_ers_result_t common_process(struct pcistub_device *psdev,
++static pci_ers_result_t common_process(struct pcistub_device *psdev,
+ pci_channel_state_t state, int aer_cmd, pci_ers_result_t result)
+ {
+ pci_ers_result_t res = result;
+@@ -561,12 +559,12 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
+ if (!ret) {
+ dev_err(&psdev->dev->dev,
+ "pciback: failed to get pcifront device\n");
+- return PCI_ERS_RESULT_NONE;
++ return PCI_ERS_RESULT_NONE;
+ }
+ wmb();
+
+- dev_dbg(&psdev->dev->dev,
+- "pciback: aer_op %x dom %x bus %x devfn %x\n",
++ dev_dbg(&psdev->dev->dev,
++ "pciback: aer_op %x dom %x bus %x devfn %x\n",
+ aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
+ /*local flag to mark there's aer request, pciback callback will use this
+ * flag to judge whether we need to check pci-front give aer service
+@@ -575,21 +573,21 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
+ set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
+
+ /*It is possible that a pcifront conf_read_write ops request invokes
+- * the callback which cause the spurious execution of wake_up.
++ * the callback which cause the spurious execution of wake_up.
+ * Yet it is harmless and better than a spinlock here
+ */
+- set_bit(_XEN_PCIB_active,
++ set_bit(_XEN_PCIB_active,
+ (unsigned long *)&psdev->pdev->sh_info->flags);
+ wmb();
+ notify_remote_via_irq(psdev->pdev->evtchn_irq);
+
+ ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active,
+- (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
++ (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
+
+ if (!ret) {
+- if (test_bit(_XEN_PCIB_active,
++ if (test_bit(_XEN_PCIB_active,
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
+- dev_err(&psdev->dev->dev,
++ dev_err(&psdev->dev->dev,
+ "pcifront aer process not responding!\n");
+ clear_bit(_XEN_PCIB_active,
+ (unsigned long *)&psdev->pdev->sh_info->flags);
+@@ -599,16 +597,16 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
+ }
+ clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
+
+- if ( test_bit( _XEN_PCIF_active,
+- (unsigned long*)&psdev->pdev->sh_info->flags)) {
+- dev_dbg(&psdev->dev->dev,
++ if (test_bit(_XEN_PCIF_active,
++ (unsigned long *)&psdev->pdev->sh_info->flags)) {
++ dev_dbg(&psdev->dev->dev,
+ "schedule pci_conf service in pciback \n");
+ test_and_schedule_op(psdev->pdev);
+ }
+
+ res = (pci_ers_result_t)aer_op->err;
+ return res;
+-}
++}
+
+ /*
+ * pciback_slot_reset: it will send the slot_reset request to pcifront in case
+@@ -632,24 +630,22 @@ static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
+ PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+
+- if ( !psdev || !psdev->pdev )
+- {
+- dev_err(&dev->dev,
++ if (!psdev || !psdev->pdev) {
++ dev_err(&dev->dev,
+ "pciback device is not found/assigned\n");
+ goto end;
+ }
+
+- if ( !psdev->pdev->sh_info )
+- {
++ if (!psdev->pdev->sh_info) {
+ dev_err(&dev->dev, "pciback device is not connected or owned"
+ " by HVM, kill it\n");
+ kill_domain_by_device(psdev);
+ goto release;
+ }
+
+- if ( !test_bit(_XEN_PCIB_AERHANDLER,
+- (unsigned long *)&psdev->pdev->sh_info->flags) ) {
+- dev_err(&dev->dev,
++ if (!test_bit(_XEN_PCIB_AERHANDLER,
++ (unsigned long *)&psdev->pdev->sh_info->flags)) {
++ dev_err(&dev->dev,
+ "guest with no AER driver should have been killed\n");
+ goto release;
+ }
+@@ -657,7 +653,7 @@ static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
+
+ if (result == PCI_ERS_RESULT_NONE ||
+ result == PCI_ERS_RESULT_DISCONNECT) {
+- dev_dbg(&dev->dev,
++ dev_dbg(&dev->dev,
+ "No AER slot_reset service or disconnected!\n");
+ kill_domain_by_device(psdev);
+ }
+@@ -670,9 +666,9 @@ end:
+ }
+
+
+-/*pciback_mmio_enabled: it will send the mmio_enabled request to pcifront
+-* in case of the device driver could provide this service, and then wait
+-* for pcifront ack.
++/*pciback_mmio_enabled: it will send the mmio_enabled request to pcifront
++* in case of the device driver could provide this service, and then wait
++* for pcifront ack
+ * @dev: pointer to PCI devices
+ * return value is used by aer_core do_recovery policy
+ */
+@@ -692,24 +688,22 @@ static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
+ PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+
+- if ( !psdev || !psdev->pdev )
+- {
+- dev_err(&dev->dev,
++ if (!psdev || !psdev->pdev) {
++ dev_err(&dev->dev,
+ "pciback device is not found/assigned\n");
+ goto end;
+ }
+
+- if ( !psdev->pdev->sh_info )
+- {
++ if (!psdev->pdev->sh_info) {
+ dev_err(&dev->dev, "pciback device is not connected or owned"
+ " by HVM, kill it\n");
+ kill_domain_by_device(psdev);
+ goto release;
+ }
+
+- if ( !test_bit(_XEN_PCIB_AERHANDLER,
+- (unsigned long *)&psdev->pdev->sh_info->flags) ) {
+- dev_err(&dev->dev,
++ if (!test_bit(_XEN_PCIB_AERHANDLER,
++ (unsigned long *)&psdev->pdev->sh_info->flags)) {
++ dev_err(&dev->dev,
+ "guest with no AER driver should have been killed\n");
+ goto release;
+ }
+@@ -717,7 +711,7 @@ static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
+
+ if (result == PCI_ERS_RESULT_NONE ||
+ result == PCI_ERS_RESULT_DISCONNECT) {
+- dev_dbg(&dev->dev,
++ dev_dbg(&dev->dev,
+ "No AER mmio_enabled service or disconnected!\n");
+ kill_domain_by_device(psdev);
+ }
+@@ -728,8 +722,8 @@ end:
+ return result;
+ }
+
+-/*pciback_error_detected: it will send the error_detected request to pcifront
+-* in case of the device driver could provide this service, and then wait
++/*pciback_error_detected: it will send the error_detected request to pcifront
++* in case of the device driver could provide this service, and then wait
+ * for pcifront ack.
+ * @dev: pointer to PCI devices
+ * @error: the current PCI connection state
+@@ -752,15 +746,13 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
+ PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+
+- if ( !psdev || !psdev->pdev )
+- {
+- dev_err(&dev->dev,
++ if (!psdev || !psdev->pdev) {
++ dev_err(&dev->dev,
+ "pciback device is not found/assigned\n");
+ goto end;
+ }
+
+- if ( !psdev->pdev->sh_info )
+- {
++ if (!psdev->pdev->sh_info) {
+ dev_err(&dev->dev, "pciback device is not connected or owned"
+ " by HVM, kill it\n");
+ kill_domain_by_device(psdev);
+@@ -768,8 +760,8 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
+ }
+
+ /*Guest owns the device yet no aer handler regiested, kill guest*/
+- if ( !test_bit(_XEN_PCIB_AERHANDLER,
+- (unsigned long *)&psdev->pdev->sh_info->flags) ) {
++ if (!test_bit(_XEN_PCIB_AERHANDLER,
++ (unsigned long *)&psdev->pdev->sh_info->flags)) {
+ dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
+ kill_domain_by_device(psdev);
+ goto release;
+@@ -778,7 +770,7 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
+
+ if (result == PCI_ERS_RESULT_NONE ||
+ result == PCI_ERS_RESULT_DISCONNECT) {
+- dev_dbg(&dev->dev,
++ dev_dbg(&dev->dev,
+ "No AER error_detected service or disconnected!\n");
+ kill_domain_by_device(psdev);
+ }
+@@ -789,8 +781,8 @@ end:
+ return result;
+ }
+
+-/*pciback_error_resume: it will send the error_resume request to pcifront
+-* in case of the device driver could provide this service, and then wait
++/*pciback_error_resume: it will send the error_resume request to pcifront
++* in case of the device driver could provide this service, and then wait
+ * for pcifront ack.
+ * @dev: pointer to PCI devices
+ */
+@@ -808,29 +800,28 @@ static void pciback_error_resume(struct pci_dev *dev)
+ PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+
+- if ( !psdev || !psdev->pdev )
+- {
+- dev_err(&dev->dev,
++ if (!psdev || !psdev->pdev) {
++ dev_err(&dev->dev,
+ "pciback device is not found/assigned\n");
+ goto end;
+ }
+
+- if ( !psdev->pdev->sh_info )
+- {
++ if (!psdev->pdev->sh_info) {
+ dev_err(&dev->dev, "pciback device is not connected or owned"
+ " by HVM, kill it\n");
+ kill_domain_by_device(psdev);
+ goto release;
+ }
+
+- if ( !test_bit(_XEN_PCIB_AERHANDLER,
+- (unsigned long *)&psdev->pdev->sh_info->flags) ) {
+- dev_err(&dev->dev,
++ if (!test_bit(_XEN_PCIB_AERHANDLER,
++ (unsigned long *)&psdev->pdev->sh_info->flags)) {
++ dev_err(&dev->dev,
+ "guest with no AER driver should have been killed\n");
+ kill_domain_by_device(psdev);
+ goto release;
+ }
+- common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED);
++ common_process(psdev, 1, XEN_PCI_OP_aer_resume,
++ PCI_ERS_RESULT_RECOVERED);
+ release:
+ pcistub_device_put(psdev);
+ end:
+@@ -923,8 +914,8 @@ static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
+ unsigned long flags;
+
+ spin_lock_irqsave(&device_ids_lock, flags);
+- list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) {
+-
++ list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
++ slot_list) {
+ if (pci_dev_id->domain == domain
+ && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
+ /* Don't break; here because it's possible the same
+@@ -976,7 +967,7 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
+ err = pciback_config_quirks_add_field(dev, field);
+ if (err)
+ kfree(field);
+- out:
++out:
+ return err;
+ }
+
+@@ -992,7 +983,7 @@ static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
+
+ err = pcistub_device_id_add(domain, bus, slot, func);
+
+- out:
++out:
+ if (!err)
+ err = count;
+ return err;
+@@ -1012,7 +1003,7 @@ static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
+
+ err = pcistub_device_id_remove(domain, bus, slot, func);
+
+- out:
++out:
+ if (!err)
+ err = count;
+ return err;
+@@ -1057,7 +1048,7 @@ static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
+
+ err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
+
+- out:
++out:
+ if (!err)
+ err = count;
+ return err;
+@@ -1067,7 +1058,6 @@ static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
+ {
+ int count = 0;
+ unsigned long flags;
+- extern struct list_head pciback_quirks;
+ struct pciback_config_quirk *quirk;
+ struct pciback_dev_data *dev_data;
+ const struct config_field *field;
+@@ -1096,12 +1086,13 @@ static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
+
+ count += scnprintf(buf + count, PAGE_SIZE - count,
+ "\t\t%08x:%01x:%08x\n",
+- cfg_entry->base_offset + field->offset,
+- field->size, field->mask);
++ cfg_entry->base_offset +
++ field->offset, field->size,
++ field->mask);
+ }
+ }
+
+- out:
++out:
+ spin_unlock_irqrestore(&device_ids_lock, flags);
+
+ return count;
+@@ -1137,14 +1128,14 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf,
+ if (!dev_data->permissive) {
+ dev_data->permissive = 1;
+ /* Let user know that what they're doing could be unsafe */
+- dev_warn(&psdev->dev->dev,
+- "enabling permissive mode configuration space accesses!\n");
++ dev_warn(&psdev->dev->dev, "enabling permissive mode "
++ "configuration space accesses!\n");
+ dev_warn(&psdev->dev->dev,
+ "permissive mode is potentially unsafe!\n");
+ }
+- release:
++release:
+ pcistub_device_put(psdev);
+- out:
++out:
+ if (!err)
+ err = count;
+ return err;
+@@ -1264,10 +1255,10 @@ static int __init pcistub_init(void)
+ if (err)
+ pcistub_exit();
+
+- out:
++out:
+ return err;
+
+- parse_error:
++parse_error:
+ printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
+ pci_devs_to_hide + pos);
+ return -EINVAL;
+@@ -1276,7 +1267,7 @@ static int __init pcistub_init(void)
+ #ifndef MODULE
+ /*
+ * fs_initcall happens before device_initcall
+- * so pciback *should* get called first (b/c we
++ * so pciback *should* get called first (b/c we
+ * want to suck up any device before other drivers
+ * get a chance by being the first pci device
+ * driver to register)
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+index 5e8e14e..98e2912 100644
+--- a/drivers/xen/pciback/pciback.h
++++ b/drivers/xen/pciback/pciback.h
+@@ -49,6 +49,12 @@ struct pciback_dev_data {
+ int warned_on_write;
+ };
+
++/* Used by XenBus and pciback_ops.c */
++extern wait_queue_head_t aer_wait_queue;
++extern struct workqueue_struct *pciback_wq;
++/* Used by pcistub.c and conf_space_quirks.c */
++extern struct list_head pciback_quirks;
++
+ /* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
+ struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
+ int domain, int bus,
+@@ -67,14 +73,14 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev);
+ void pciback_config_reset_dev(struct pci_dev *dev);
+ void pciback_config_free_dev(struct pci_dev *dev);
+ int pciback_config_read(struct pci_dev *dev, int offset, int size,
+- u32 * ret_val);
++ u32 *ret_val);
+ int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
+
+ /* Handle requests for specific devices from the frontend */
+ typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
+ unsigned int domain, unsigned int bus,
+ unsigned int devfn, unsigned int devid);
+-typedef int (*publish_pci_root_cb) (struct pciback_device * pdev,
++typedef int (*publish_pci_root_cb) (struct pciback_device *pdev,
+ unsigned int domain, unsigned int bus);
+ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+ int devid, publish_pci_dev_cb publish_cb);
+@@ -83,15 +89,17 @@ struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
+ unsigned int domain, unsigned int bus,
+ unsigned int devfn);
+
+-/**
++/**
+ * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback
+-* before sending aer request to pcifront, so that guest could identify
++* before sending aer request to pcifront, so that guest could identify
+ * device, coopearte with pciback to finish aer recovery job if device driver
+ * has the capability
+ */
+
+-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
+- unsigned int *domain, unsigned int *bus, unsigned int *devfn);
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++ struct pciback_device *pdev,
++ unsigned int *domain, unsigned int *bus,
++ unsigned int *devfn);
+ int pciback_init_devices(struct pciback_device *pdev);
+ int pciback_publish_pci_roots(struct pciback_device *pdev,
+ publish_pci_root_cb cb);
+@@ -106,17 +114,17 @@ void pciback_xenbus_unregister(void);
+
+ #ifdef CONFIG_PCI_MSI
+ int pciback_enable_msi(struct pciback_device *pdev,
+- struct pci_dev *dev, struct xen_pci_op *op);
++ struct pci_dev *dev, struct xen_pci_op *op);
+
+ int pciback_disable_msi(struct pciback_device *pdev,
+- struct pci_dev *dev, struct xen_pci_op *op);
++ struct pci_dev *dev, struct xen_pci_op *op);
+
+
+ int pciback_enable_msix(struct pciback_device *pdev,
+- struct pci_dev *dev, struct xen_pci_op *op);
++ struct pci_dev *dev, struct xen_pci_op *op);
+
+ int pciback_disable_msix(struct pciback_device *pdev,
+- struct pci_dev *dev, struct xen_pci_op *op);
++ struct pci_dev *dev, struct xen_pci_op *op);
+ #endif
+ extern int verbose_request;
+
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index 6624faf..bf83dca 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -5,11 +5,11 @@
+ */
+ #include <linux/module.h>
+ #include <linux/wait.h>
+-#include <asm/bitops.h>
++#include <linux/bitops.h>
+ #include <xen/events.h>
+ #include "pciback.h"
+
+-int verbose_request = 0;
++int verbose_request;
+ module_param(verbose_request, int, 0644);
+
+ /* Ensure a device is "turned off" and ready to be exported.
+@@ -37,12 +37,10 @@ void pciback_reset_device(struct pci_dev *dev)
+ }
+ }
+ }
+-extern wait_queue_head_t aer_wait_queue;
+-extern struct workqueue_struct *pciback_wq;
+ /*
+ * Now the same evtchn is used for both pcifront conf_read_write request
+ * as well as pcie aer front end ack. We use a new work_queue to schedule
+-* pciback conf_read_write service for avoiding confict with aer_core
++* pciback conf_read_write service for avoiding confict with aer_core
+ * do_recovery job which also use the system default work_queue
+ */
+ void test_and_schedule_op(struct pciback_device *pdev)
+@@ -50,14 +48,13 @@ void test_and_schedule_op(struct pciback_device *pdev)
+ /* Check that frontend is requesting an operation and that we are not
+ * already processing a request */
+ if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
+- && !test_and_set_bit(_PDEVF_op_active, &pdev->flags))
+- {
++ && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
+ queue_work(pciback_wq, &pdev->op_work);
+ }
+ /*_XEN_PCIB_active should have been cleared by pcifront. And also make
+ sure pciback is waiting for ack by checking _PCIB_op_pending*/
+- if (!test_bit(_XEN_PCIB_active,(unsigned long *)&pdev->sh_info->flags)
+- &&test_bit(_PCIB_op_pending, &pdev->flags)) {
++ if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
++ && test_bit(_PCIB_op_pending, &pdev->flags)) {
+ wake_up(&aer_wait_queue);
+ }
+ }
+@@ -69,7 +66,8 @@ void test_and_schedule_op(struct pciback_device *pdev)
+
+ void pciback_do_op(struct work_struct *data)
+ {
+- struct pciback_device *pdev = container_of(data, struct pciback_device, op_work);
++ struct pciback_device *pdev =
++ container_of(data, struct pciback_device, op_work);
+ struct pci_dev *dev;
+ struct xen_pci_op *op = &pdev->sh_info->op;
+
+@@ -77,38 +75,36 @@ void pciback_do_op(struct work_struct *data)
+
+ if (dev == NULL)
+ op->err = XEN_PCI_ERR_dev_not_found;
+- else
+- {
+- switch (op->cmd)
+- {
+- case XEN_PCI_OP_conf_read:
+- op->err = pciback_config_read(dev,
+- op->offset, op->size, &op->value);
+- break;
+- case XEN_PCI_OP_conf_write:
+- op->err = pciback_config_write(dev,
+- op->offset, op->size, op->value);
+- break;
++ else {
++ switch (op->cmd) {
++ case XEN_PCI_OP_conf_read:
++ op->err = pciback_config_read(dev,
++ op->offset, op->size, &op->value);
++ break;
++ case XEN_PCI_OP_conf_write:
++ op->err = pciback_config_write(dev,
++ op->offset, op->size, op->value);
++ break;
+ #ifdef CONFIG_PCI_MSI
+- case XEN_PCI_OP_enable_msi:
+- op->err = pciback_enable_msi(pdev, dev, op);
+- break;
+- case XEN_PCI_OP_disable_msi:
+- op->err = pciback_disable_msi(pdev, dev, op);
+- break;
+- case XEN_PCI_OP_enable_msix:
+- op->err = pciback_enable_msix(pdev, dev, op);
+- break;
+- case XEN_PCI_OP_disable_msix:
+- op->err = pciback_disable_msix(pdev, dev, op);
+- break;
++ case XEN_PCI_OP_enable_msi:
++ op->err = pciback_enable_msi(pdev, dev, op);
++ break;
++ case XEN_PCI_OP_disable_msi:
++ op->err = pciback_disable_msi(pdev, dev, op);
++ break;
++ case XEN_PCI_OP_enable_msix:
++ op->err = pciback_enable_msix(pdev, dev, op);
++ break;
++ case XEN_PCI_OP_disable_msix:
++ op->err = pciback_disable_msix(pdev, dev, op);
++ break;
+ #endif
+- default:
+- op->err = XEN_PCI_ERR_not_implemented;
+- break;
++ default:
++ op->err = XEN_PCI_ERR_not_implemented;
++ break;
+ }
+ }
+- /* Tell the driver domain that we're done. */
++ /* Tell the driver domain that we're done. */
+ wmb();
+ clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
+ notify_remote_via_irq(pdev->evtchn_irq);
+@@ -119,7 +115,7 @@ void pciback_do_op(struct work_struct *data)
+ smp_mb__after_clear_bit(); /* /before/ final check for work */
+
+ /* Check to see if the driver domain tried to start another request in
+- * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
++ * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
+ */
+ test_and_schedule_op(pdev);
+ }
+diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c
+index 105a8b6..efb922d 100644
+--- a/drivers/xen/pciback/slot.c
++++ b/drivers/xen/pciback/slot.c
+@@ -65,7 +65,8 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+ if (slot_dev->slots[bus][slot] == NULL) {
+ printk(KERN_INFO
+- "pciback: slot: %s: assign to virtual slot %d, bus %d\n",
++ "pciback: slot: %s: assign to virtual "
++ "slot %d, bus %d\n",
+ pci_name(dev), slot, bus);
+ slot_dev->slots[bus][slot] = dev;
+ goto unlock;
+@@ -76,14 +77,14 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+ xenbus_dev_fatal(pdev->xdev, err,
+ "No more space on root virtual PCI bus");
+
+- unlock:
++unlock:
+ spin_unlock_irqrestore(&slot_dev->lock, flags);
+
+ /* Publish this device. */
+- if(!err)
++ if (!err)
+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
+
+- out:
++out:
+ return err;
+ }
+
+@@ -105,7 +106,7 @@ void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
+ }
+ }
+
+- out:
++out:
+ spin_unlock_irqrestore(&slot_dev->lock, flags);
+
+ if (found_dev)
+@@ -156,8 +157,10 @@ void pciback_release_devices(struct pciback_device *pdev)
+ pdev->pci_dev_data = NULL;
+ }
+
+-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
+- unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++ struct pciback_device *pdev,
++ unsigned int *domain, unsigned int *bus,
++ unsigned int *devfn)
+ {
+ int slot, busnr;
+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
+@@ -172,11 +175,12 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev
+ dev = slot_dev->slots[busnr][slot];
+ if (dev && dev->bus->number == pcidev->bus->number
+ && dev->devfn == pcidev->devfn
+- && pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)) {
++ && pci_domain_nr(dev->bus) ==
++ pci_domain_nr(pcidev->bus)) {
+ found = 1;
+ *domain = 0;
+ *bus = busnr;
+- *devfn = PCI_DEVFN(slot,0);
++ *devfn = PCI_DEVFN(slot, 0);
+ goto out;
+ }
+ }
+diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
+index a5b7ece..721b81b 100644
+--- a/drivers/xen/pciback/vpci.c
++++ b/drivers/xen/pciback/vpci.c
+@@ -125,14 +125,14 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+ xenbus_dev_fatal(pdev->xdev, err,
+ "No more space on root virtual PCI bus");
+
+- unlock:
++unlock:
+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
+
+ /* Publish this device. */
+- if(!err)
++ if (!err)
+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
+
+- out:
++out:
+ return err;
+ }
+
+@@ -158,7 +158,7 @@ void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
+ }
+ }
+
+- out:
++out:
+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
+
+ if (found_dev)
+@@ -176,9 +176,8 @@ int pciback_init_devices(struct pciback_device *pdev)
+
+ spin_lock_init(&vpci_dev->lock);
+
+- for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++ for (slot = 0; slot < PCI_SLOT_MAX; slot++)
+ INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
+- }
+
+ pdev->pci_dev_data = vpci_dev;
+
+@@ -211,8 +210,10 @@ void pciback_release_devices(struct pciback_device *pdev)
+ pdev->pci_dev_data = NULL;
+ }
+
+-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
+- unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++ struct pciback_device *pdev,
++ unsigned int *domain, unsigned int *bus,
++ unsigned int *devfn)
+ {
+ struct pci_dev_entry *entry;
+ struct pci_dev *dev = NULL;
+@@ -227,15 +228,16 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev
+ list) {
+ dev = entry->dev;
+ if (dev && dev->bus->number == pcidev->bus->number
+- && pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)
+- && dev->devfn == pcidev->devfn)
+- {
++ && pci_domain_nr(dev->bus) ==
++ pci_domain_nr(pcidev->bus)
++ && dev->devfn == pcidev->devfn) {
+ found = 1;
+ *domain = 0;
+ *bus = 0;
+- *devfn = PCI_DEVFN(slot, PCI_FUNC(pcidev->devfn));
++ *devfn = PCI_DEVFN(slot,
++ PCI_FUNC(pcidev->devfn));
+ }
+- }
++ }
+ }
+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
+ return found;
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index a85c413..efec585 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -40,7 +40,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
+ kfree(pdev);
+ pdev = NULL;
+ }
+- out:
++out:
+ return pdev;
+ }
+
+@@ -111,7 +111,7 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
+ err = 0;
+
+ dev_dbg(&pdev->xdev->dev, "Attached!\n");
+- out:
++out:
+ return err;
+ }
+
+@@ -166,11 +166,10 @@ static int pciback_attach(struct pciback_device *pdev)
+ "Error switching to connected state!");
+
+ dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
+- out:
++out:
+ spin_unlock(&pdev->dev_lock);
+
+- if (magic)
+- kfree(magic);
++ kfree(magic);
+
+ return err;
+ }
+@@ -193,7 +192,7 @@ static int pciback_publish_pci_dev(struct pciback_device *pdev,
+ "%04x:%02x:%02x.%02x", domain, bus,
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+- out:
++out:
+ return err;
+ }
+
+@@ -230,7 +229,7 @@ static int pciback_export_device(struct pciback_device *pdev,
+ * to other driver domains (as he who controls the bridge can disable
+ * it and stop the other devices from working).
+ */
+- out:
++out:
+ return err;
+ }
+
+@@ -253,8 +252,8 @@ static int pciback_remove_device(struct pciback_device *pdev,
+ }
+
+ pciback_release_pci_dev(pdev, dev);
+-
+- out:
++
++out:
+ return err;
+ }
+
+@@ -314,7 +313,7 @@ static int pciback_publish_pci_root(struct pciback_device *pdev,
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
+ "root_num", "%d", (root_num + 1));
+
+- out:
++out:
+ return err;
+ }
+
+@@ -358,7 +357,7 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ }
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
+ "%d", &substate);
+- if (err != 1)
++ if (err != 1)
+ substate = XenbusStateUnknown;
+
+ switch (substate) {
+@@ -389,14 +388,15 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ "configuration");
+ goto out;
+ }
+-
++
+ err = pciback_export_device(pdev, domain, bus, slot,
+ func, i);
+ if (err)
+ goto out;
+
+ /* Publish pci roots. */
+- err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
++ err = pciback_publish_pci_roots(pdev,
++ pciback_publish_pci_root);
+ if (err) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error while publish PCI root"
+@@ -412,7 +412,7 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ "Error switching substate of "
+ "dev-%d\n", i);
+ goto out;
+- }
++ }
+ break;
+
+ case XenbusStateClosing:
+@@ -445,7 +445,7 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+
+ err = pciback_remove_device(pdev, domain, bus, slot,
+ func);
+- if(err)
++ if (err)
+ goto out;
+
+ /* TODO: If at some point we implement support for pci
+@@ -466,8 +466,8 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ "Error switching to reconfigured state!");
+ goto out;
+ }
+-
+- out:
++
++out:
+ spin_unlock(&pdev->dev_lock);
+
+ return 0;
+@@ -591,7 +591,7 @@ static int pciback_setup_backend(struct pciback_device *pdev)
+ xenbus_dev_fatal(pdev->xdev, err, "Error switching "
+ "substate of dev-%d\n", i);
+ goto out;
+- }
++ }
+ }
+
+ err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
+@@ -607,7 +607,7 @@ static int pciback_setup_backend(struct pciback_device *pdev)
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error switching to initialised state!");
+
+- out:
++out:
+ spin_unlock(&pdev->dev_lock);
+
+ if (!err)
+@@ -663,7 +663,7 @@ static int pciback_xenbus_probe(struct xenbus_device *dev,
+ */
+ pciback_be_watch(&pdev->be_watch, NULL, 0);
+
+- out:
++out:
+ return err;
+ }
+
+@@ -679,7 +679,7 @@ static int pciback_xenbus_remove(struct xenbus_device *dev)
+
+ static const struct xenbus_device_id xenpci_ids[] = {
+ {"pci"},
+- {{0}},
++ {""},
+ };
+
+ static struct xenbus_driver xenbus_pciback_driver = {
+--
+1.7.3.4
+
+
+From ca1ee0c25b425d9739b1a24cf911de2e041a2514 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 8 Mar 2010 18:39:15 -0500
+Subject: [PATCH 017/139] xen-pciback: remove driver_data direct access to struct device
+
+The driver core is going to not allow direct access to the
+driver_data pointer in struct device. Instead, the functions
+dev_get_drvdata() and dev_set_drvdata() should be used.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/xenbus.c | 8 ++++----
+ 1 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index efec585..af6c25a 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -26,7 +26,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
+ dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
+
+ pdev->xdev = xdev;
+- xdev->dev.driver_data = pdev;
++ dev_set_drvdata(&xdev->dev, pdev);
+
+ spin_lock_init(&pdev->dev_lock);
+
+@@ -75,7 +75,7 @@ static void free_pdev(struct pciback_device *pdev)
+
+ pciback_release_devices(pdev);
+
+- pdev->xdev->dev.driver_data = NULL;
++ dev_set_drvdata(&pdev->xdev->dev, NULL);
+ pdev->xdev = NULL;
+
+ kfree(pdev);
+@@ -476,7 +476,7 @@ out:
+ static void pciback_frontend_changed(struct xenbus_device *xdev,
+ enum xenbus_state fe_state)
+ {
+- struct pciback_device *pdev = xdev->dev.driver_data;
++ struct pciback_device *pdev = dev_get_drvdata(&xdev->dev);
+
+ dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
+
+@@ -669,7 +669,7 @@ out:
+
+ static int pciback_xenbus_remove(struct xenbus_device *dev)
+ {
+- struct pciback_device *pdev = dev->dev.driver_data;
++ struct pciback_device *pdev = dev_get_drvdata(&dev->dev);
+
+ if (pdev != NULL)
+ free_pdev(pdev);
+--
+1.7.3.4
+
+
+From 585f088e6aec3e4514ac2563852961f71c74e47e Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 8 Mar 2010 18:47:55 -0500
+Subject: [PATCH 018/139] xen-pciback: Fix compile error: 'TASK_NORMAL' undeclared.
+
+Both files were missing the #include <linux/sched.h>
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/pci_stub.c | 1 +
+ drivers/xen/pciback/pciback_ops.c | 1 +
+ 2 files changed, 2 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 28222ee..6fc0b6e 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -12,6 +12,7 @@
+ #include <linux/kref.h>
+ #include <linux/pci.h>
+ #include <linux/wait.h>
++#include <linux/sched.h>
+ #include <asm/atomic.h>
+ #include <xen/events.h>
+ #include <asm/xen/pci.h>
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index bf83dca..2b9a93e 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -7,6 +7,7 @@
+ #include <linux/wait.h>
+ #include <linux/bitops.h>
+ #include <xen/events.h>
++#include <linux/sched.h>
+ #include "pciback.h"
+
+ int verbose_request;
+--
+1.7.3.4
+
+
+From 03dd111c81bad8e69cdb8b5d67381702adb24593 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 9 Dec 2009 17:43:16 -0500
+Subject: [PATCH 019/139] xen-pciback: Remove the vestiges of CONFIG_PCI_GUESTDEV.
+
+The same functionality for this (that used to be called
+pci_is_guestdev) is now via: "pci=resource_alignment="
+command line argument.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pci_stub.c | 10 ----------
+ 1 files changed, 0 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 6fc0b6e..d30aa7c 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -430,16 +430,6 @@ static int __devinit pcistub_probe(struct pci_dev *dev,
+
+ dev_info(&dev->dev, "seizing device\n");
+ err = pcistub_seize(dev);
+-#ifdef CONFIG_PCI_GUESTDEV
+- } else if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
+- if (!pci_is_guestdev(dev)) {
+- err = -ENODEV;
+- goto out;
+- }
+-
+- dev_info(&dev->dev, "seizing device\n");
+- err = pcistub_seize(dev);
+-#endif /* CONFIG_PCI_GUESTDEV */
+ } else
+ /* Didn't find the device */
+ err = -ENODEV;
+--
+1.7.3.4
+
+
+From 30acb3491495a43b59a64612ad92a7a290c59e82 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 9 Dec 2009 17:43:17 -0500
+Subject: [PATCH 020/139] xen-pciback: Remove deprecated routine to find domain owner of PCI device.
+
+In linux-2.6.18.hg tree the mechanism to find the domain owner was
+for the MSI driver (msi-xen.c) to call in this function to retrieve
+the domain number. This is not the way anymore.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pci_stub.c | 19 -------------------
+ 1 files changed, 0 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index d30aa7c..30e7b59 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -1157,22 +1157,6 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf)
+
+ DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
+
+-#ifdef CONFIG_PCI_MSI
+-
+-int pciback_get_owner(struct pci_dev *dev)
+-{
+- struct pcistub_device *psdev;
+-
+- psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number,
+- PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+-
+- if (!psdev || !psdev->pdev)
+- return -1;
+-
+- return psdev->pdev->xdev->otherend_id;
+-}
+-#endif
+-
+ static void pcistub_exit(void)
+ {
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
+@@ -1183,7 +1167,6 @@ static void pcistub_exit(void)
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
+
+ pci_unregister_driver(&pciback_pci_driver);
+- WARN_ON(unregister_msi_get_owner(pciback_get_owner));
+ }
+
+ static int __init pcistub_init(void)
+@@ -1241,8 +1224,6 @@ static int __init pcistub_init(void)
+ err = driver_create_file(&pciback_pci_driver.driver,
+ &driver_attr_permissive);
+
+- if (!err)
+- err = register_msi_get_owner(pciback_get_owner);
+ if (err)
+ pcistub_exit();
+
+--
+1.7.3.4
+
+
+From da36c7662d9738ce44c37b4f1f41c045c64d6914 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 22 Dec 2009 13:53:41 -0500
+Subject: [PATCH 021/139] xen-pciback: Fix compiler warning in pci_stub.c.
+
+warning: the frame size of 1036 bytes is larger than 1024 bytes
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pci_stub.c | 5 +++--
+ 1 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 30e7b59..0b5a16b 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -496,16 +496,17 @@ static const struct pci_device_id pcistub_ids[] = {
+ {0,},
+ };
+
++#define PCI_NODENAME_MAX 40
+ static void kill_domain_by_device(struct pcistub_device *psdev)
+ {
+ struct xenbus_transaction xbt;
+ int err;
+- char nodename[1024];
++ char nodename[PCI_NODENAME_MAX];
+
+ if (!psdev)
+ dev_err(&psdev->dev->dev,
+ "device is NULL when do AER recovery/kill_domain\n");
+- sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
++ snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
+ psdev->pdev->xdev->otherend_id);
+ nodename[strlen(nodename)] = '\0';
+
+--
+1.7.3.4
+
+
+From 83484f34b2cc42807c71514fbabbd40e281ec094 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 22 Dec 2009 13:53:42 -0500
+Subject: [PATCH 022/139] xen-pciback: Fix compile warning in vpci.c
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+warning: ‘func’ may be used uninitialized in this function
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/vpci.c | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
+index 721b81b..2857ab8 100644
+--- a/drivers/xen/pciback/vpci.c
++++ b/drivers/xen/pciback/vpci.c
+@@ -65,7 +65,7 @@ static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
+ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+ int devid, publish_pci_dev_cb publish_cb)
+ {
+- int err = 0, slot, func;
++ int err = 0, slot, func = -1;
+ struct pci_dev_entry *t, *dev_entry;
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+ unsigned long flags;
+--
+1.7.3.4
+
+
+From 5612e6358835700c49d8be5671823614ace30c94 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ijc at hellion.org.uk>
+Date: Thu, 3 Dec 2009 21:56:20 +0000
+Subject: [PATCH 023/139] xen: rename pciback module to xen-pciback.
+
+pciback is rather generic for a modular distro style kernel.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/Makefile | 24 ++++++++++++------------
+ 1 files changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile
+index 106dae7..38bc123 100644
+--- a/drivers/xen/pciback/Makefile
++++ b/drivers/xen/pciback/Makefile
+@@ -1,16 +1,16 @@
+-obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
++obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
+
+-pciback-y := pci_stub.o pciback_ops.o xenbus.o
+-pciback-y += conf_space.o conf_space_header.o \
+- conf_space_capability.o \
+- conf_space_capability_vpd.o \
+- conf_space_capability_pm.o \
+- conf_space_quirks.o
+-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
+-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
+-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
+-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
+-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
++xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
++xen-pciback-y += conf_space.o conf_space_header.o \
++ conf_space_capability.o \
++ conf_space_capability_vpd.o \
++ conf_space_capability_pm.o \
++ conf_space_quirks.o
++xen-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
+
+ ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
+ EXTRA_CFLAGS += -DDEBUG
+--
+1.7.3.4
+
+
+From 245a9ec5ef1f9c8a6bc6b5c0ac1bb616c3c8c979 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 9 Dec 2009 17:43:15 -0500
+Subject: [PATCH 024/139] xen-pciback: Register the owner (domain) of the PCI device.
+
+When the front-end and back-end start negotiating we register
+the domain that will use the PCI device. Furthermore during shutdown
+of guest or unbinding of the PCI device (and unloading of module)
+from pciback we unregister the domain owner.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/pciback/pci_stub.c | 2 ++
+ drivers/xen/pciback/xenbus.c | 13 +++++++++++++
+ 2 files changed, 15 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 0b5a16b..02178e2 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -90,6 +90,8 @@ static void pcistub_device_release(struct kref *kref)
+
+ dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
+
++ xen_unregister_device_domain_owner(psdev->dev);
++
+ /* Clean-up the device */
+ pciback_reset_device(psdev->dev);
+ pciback_config_free_dyn_fields(psdev->dev);
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index af6c25a..d448bf5 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -10,6 +10,7 @@
+ #include <linux/workqueue.h>
+ #include <xen/xenbus.h>
+ #include <xen/events.h>
++#include <asm/xen/pci.h>
+ #include <linux/workqueue.h>
+ #include "pciback.h"
+
+@@ -221,6 +222,15 @@ static int pciback_export_device(struct pciback_device *pdev,
+ if (err)
+ goto out;
+
++ dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
++ if (xen_register_device_domain_owner(dev,
++ pdev->xdev->otherend_id) != 0) {
++ dev_err(&dev->dev, "device has been assigned to another " \
++ "domain! Over-writting the ownership, but beware.\n");
++ xen_unregister_device_domain_owner(dev);
++ xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
++ }
++
+ /* TODO: It'd be nice to export a bridge and have all of its children
+ * get exported with it. This may be best done in xend (which will
+ * have to calculate resource usage anyway) but we probably want to
+@@ -251,6 +261,9 @@ static int pciback_remove_device(struct pciback_device *pdev,
+ goto out;
+ }
+
++ dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
++ xen_unregister_device_domain_owner(dev);
++
+ pciback_release_pci_dev(pdev, dev);
+
+ out:
+--
+1.7.3.4
+
+
+From cb6c976606d16119e8608c8bcc1ef9265881dd7f Mon Sep 17 00:00:00 2001
+From: Zhao, Yu <yu.zhao at intel.com>
+Date: Wed, 3 Mar 2010 13:27:55 -0500
+Subject: [PATCH 025/139] xen-pciback: guest SR-IOV support for PV guest
+
+These changes are for PV guest to use Virtual Function. Because the VF's
+vendor, device registers in cfg space are 0xffff, which are invalid and
+ignored by PCI device scan. Values in 'struct pci_dev' are fixed up by
+SR-IOV code, and using these values will present correct VID and DID to
+PV guest kernel.
+
+And command registers in the cfg space are read only 0, which means we
+have to emulate MMIO enable bit (VF only uses MMIO resource) so PV
+kernel can work properly.
+
+Acked-by: jbeulich at novell.com
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/conf_space_header.c | 71 ++++++++++++++++++++++++++++--
+ 1 files changed, 66 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
+index 3ae7da1..1f4f86e 100644
+--- a/drivers/xen/pciback/conf_space_header.c
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -18,6 +18,25 @@ struct pci_bar_info {
+ #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
+ #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
+
++static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
++{
++ int i;
++ int ret;
++
++ ret = pciback_read_config_word(dev, offset, value, data);
++ if (!atomic_read(&dev->enable_cnt))
++ return ret;
++
++ for (i = 0; i < PCI_ROM_RESOURCE; i++) {
++ if (dev->resource[i].flags & IORESOURCE_IO)
++ *value |= PCI_COMMAND_IO;
++ if (dev->resource[i].flags & IORESOURCE_MEM)
++ *value |= PCI_COMMAND_MEMORY;
++ }
++
++ return ret;
++}
++
+ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
+ {
+ int err;
+@@ -142,10 +161,26 @@ static inline void read_dev_bar(struct pci_dev *dev,
+ struct pci_bar_info *bar_info, int offset,
+ u32 len_mask)
+ {
+- pci_read_config_dword(dev, offset, &bar_info->val);
+- pci_write_config_dword(dev, offset, len_mask);
+- pci_read_config_dword(dev, offset, &bar_info->len_val);
+- pci_write_config_dword(dev, offset, bar_info->val);
++ int pos;
++ struct resource *res = dev->resource;
++
++ if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
++ pos = PCI_ROM_RESOURCE;
++ else {
++ pos = (offset - PCI_BASE_ADDRESS_0) / 4;
++ if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
++ PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
++ (PCI_BASE_ADDRESS_SPACE_MEMORY |
++ PCI_BASE_ADDRESS_MEM_TYPE_64))) {
++ bar_info->val = res[pos - 1].start >> 32;
++ bar_info->len_val = res[pos - 1].end >> 32;
++ return;
++ }
++ }
++
++ bar_info->val = res[pos].start |
++ (res[pos].flags & PCI_REGION_FLAG_MASK);
++ bar_info->len_val = res[pos].end - res[pos].start + 1;
+ }
+
+ static void *bar_init(struct pci_dev *dev, int offset)
+@@ -186,6 +221,22 @@ static void bar_release(struct pci_dev *dev, int offset, void *data)
+ kfree(data);
+ }
+
++static int pciback_read_vendor(struct pci_dev *dev, int offset,
++ u16 *value, void *data)
++{
++ *value = dev->vendor;
++
++ return 0;
++}
++
++static int pciback_read_device(struct pci_dev *dev, int offset,
++ u16 *value, void *data)
++{
++ *value = dev->device;
++
++ return 0;
++}
++
+ static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
+ void *data)
+ {
+@@ -213,9 +264,19 @@ out:
+
+ static const struct config_field header_common[] = {
+ {
++ .offset = PCI_VENDOR_ID,
++ .size = 2,
++ .u.w.read = pciback_read_vendor,
++ },
++ {
++ .offset = PCI_DEVICE_ID,
++ .size = 2,
++ .u.w.read = pciback_read_device,
++ },
++ {
+ .offset = PCI_COMMAND,
+ .size = 2,
+- .u.w.read = pciback_read_config_word,
++ .u.w.read = command_read,
+ .u.w.write = command_write,
+ },
+ {
+--
+1.7.3.4
+
+
+From 1d77305c7900f3b6ec5d403d9aba6f0034b0112e Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 3 Mar 2010 13:38:43 -0500
+Subject: [PATCH 026/139] xen-pciback: Disable MSI/MSI-X when reseting device
+
+In cases where the guest is abruptly killed and has not disabled
+MSI/MSI-X interrupts we want to do that.
+
+Otherwise when the guest is started up and enables MSI, we would
+get a WARN() that the device already had been enabled.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/pciback_ops.c | 8 ++++++++
+ 1 files changed, 8 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index 2b9a93e..011db67 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -23,6 +23,14 @@ void pciback_reset_device(struct pci_dev *dev)
+
+ /* Disable devices (but not bridges) */
+ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
++#ifdef CONFIG_PCI_MSI
++ /* The guest could have been abruptly killed without
++ * disabling MSI/MSI-X interrupts.*/
++ if (dev->msix_enabled)
++ pci_disable_msix(dev);
++ if (dev->msi_enabled)
++ pci_disable_msi(dev);
++#endif
+ pci_disable_device(dev);
+
+ pci_write_config_word(dev, PCI_COMMAND, 0);
+--
+1.7.3.4
+
+
+From c89edb63b60166fe354493dd465cf5662b2c077d Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 12 Apr 2010 11:46:00 -0400
+Subject: [PATCH 027/139] xen-pciback: Allocate IRQ handler for device that is shared with guest.
+
+If the pciback module is loaded with fake_irq_handler=1 we install
+for all devices that are to be passed to the guest domain a IRQ handler.
+The IRQ handler will return IRQ_HANDLED or IRQ_NONE depending on
+on the ack_intr flag.
+
+The trigger to install this IRQ handler is when the enable_isr flag
+is set.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/pci_stub.c | 13 ++++-
+ drivers/xen/pciback/pciback.h | 12 ++++-
+ drivers/xen/pciback/pciback_ops.c | 95 ++++++++++++++++++++++++++++++++++++-
+ 3 files changed, 115 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 02178e2..45bbe99 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -21,6 +21,8 @@
+ #include "conf_space.h"
+ #include "conf_space_quirks.h"
+
++#define DRV_NAME "pciback"
++
+ static char *pci_devs_to_hide;
+ wait_queue_head_t aer_wait_queue;
+ /*Add sem for sync AER handling and pciback remove/reconfigue ops,
+@@ -290,13 +292,20 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)
+ * would need to be called somewhere to free the memory allocated
+ * here and then to call kfree(pci_get_drvdata(psdev->dev)).
+ */
+- dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
++ dev_data = kzalloc(sizeof(*dev_data) + strlen(DRV_NAME "[]")
++ + strlen(pci_name(dev)) + 1, GFP_ATOMIC);
+ if (!dev_data) {
+ err = -ENOMEM;
+ goto out;
+ }
+ pci_set_drvdata(dev, dev_data);
+
++ /*
++ * Setup name for fake IRQ handler. It will only be enabled
++ * once the device is turned on by the guest.
++ */
++ sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
++
+ dev_dbg(&dev->dev, "initializing config\n");
+
+ init_waitqueue_head(&aer_wait_queue);
+@@ -837,7 +846,7 @@ static struct pci_error_handlers pciback_error_handler = {
+ */
+
+ static struct pci_driver pciback_pci_driver = {
+- .name = "pciback",
++ .name = DRV_NAME,
+ .id_table = pcistub_ids,
+ .probe = pcistub_probe,
+ .remove = pcistub_remove,
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+index 98e2912..9d1b0a6 100644
+--- a/drivers/xen/pciback/pciback.h
++++ b/drivers/xen/pciback/pciback.h
+@@ -45,8 +45,13 @@ struct pciback_device {
+
+ struct pciback_dev_data {
+ struct list_head config_fields;
+- int permissive;
+- int warned_on_write;
++ unsigned int permissive : 1;
++ unsigned int warned_on_write : 1;
++ unsigned int enable_intx : 1;
++ unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */
++ unsigned int ack_intr : 1; /* .. and ACK-ing */
++ unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
++ char irq_name[0]; /* pciback[000:04:00.0] */
+ };
+
+ /* Used by XenBus and pciback_ops.c */
+@@ -131,3 +136,6 @@ extern int verbose_request;
+ void test_and_schedule_op(struct pciback_device *pdev);
+ #endif
+
++/* Handles shared IRQs that can to device domain and control domain. */
++void pciback_irq_handler(struct pci_dev *dev, int reset);
++irqreturn_t pciback_guest_interrupt(int irq, void *dev_id);
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index 011db67..cb54893 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -13,6 +13,78 @@
+ int verbose_request;
+ module_param(verbose_request, int, 0644);
+
++/* Ensure a device is has the fake IRQ handler "turned on/off" and is
++ * ready to be exported. This MUST be run after pciback_reset_device
++ * which does the actual PCI device enable/disable.
++ */
++void pciback_control_isr(struct pci_dev *dev, int reset)
++{
++ struct pciback_dev_data *dev_data;
++ int rc;
++ int enable = 0;
++
++ dev_data = pci_get_drvdata(dev);
++ if (!dev_data)
++ return;
++
++ /* We don't deal with bridges */
++ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
++ return;
++
++ if (reset) {
++ dev_data->enable_intx = 0;
++ dev_data->ack_intr = 0;
++ }
++ enable = dev_data->enable_intx;
++
++ /* Asked to disable, but ISR isn't runnig */
++ if (!enable && !dev_data->isr_on)
++ return;
++
++ /* Squirrel away the IRQs in the dev_data. We need this
++ * b/c when device transitions to MSI, the dev->irq is
++ * overwritten with the MSI vector.
++ */
++ if (enable)
++ dev_data->irq = dev->irq;
++
++ dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
++ dev_data->irq_name,
++ dev_data->irq,
++ pci_is_enabled(dev) ? "on" : "off",
++ dev->msi_enabled ? "MSI" : "",
++ dev->msix_enabled ? "MSI/X" : "",
++ dev_data->isr_on ? "enable" : "disable",
++ enable ? "enable" : "disable");
++
++ if (enable) {
++ rc = request_irq(dev_data->irq,
++ pciback_guest_interrupt, IRQF_SHARED,
++ dev_data->irq_name, dev);
++ if (rc) {
++ dev_err(&dev->dev, "%s: failed to install fake IRQ " \
++ "handler for IRQ %d! (rc:%d)\n", dev_data->irq_name,
++ dev_data->irq, rc);
++ goto out;
++ }
++ }
++ else {
++ free_irq(dev_data->irq, dev);
++ dev_data->irq = 0;
++ }
++ dev_data->isr_on = enable;
++ dev_data->ack_intr = enable;
++out:
++ dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
++ dev_data->irq_name,
++ dev_data->irq,
++ pci_is_enabled(dev) ? "on" : "off",
++ dev->msi_enabled ? "MSI" : "",
++ dev->msix_enabled ? "MSI/X" : "",
++ enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
++ (dev_data->isr_on ? "failed to disable" : "disabled"));
++}
++
+ /* Ensure a device is "turned off" and ready to be exported.
+ * (Also see pciback_config_reset to ensure virtual configuration space is
+ * ready to be re-exported)
+@@ -21,6 +93,8 @@ void pciback_reset_device(struct pci_dev *dev)
+ {
+ u16 cmd;
+
++ pciback_control_isr(dev, 1 /* reset device */);
++
+ /* Disable devices (but not bridges) */
+ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
+ #ifdef CONFIG_PCI_MSI
+@@ -78,13 +152,18 @@ void pciback_do_op(struct work_struct *data)
+ struct pciback_device *pdev =
+ container_of(data, struct pciback_device, op_work);
+ struct pci_dev *dev;
++ struct pciback_dev_data *dev_data = NULL;
+ struct xen_pci_op *op = &pdev->sh_info->op;
++ int test_intx = 0;
+
+ dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
+
+ if (dev == NULL)
+ op->err = XEN_PCI_ERR_dev_not_found;
+ else {
++ dev_data = pci_get_drvdata(dev);
++ if (dev_data)
++ test_intx = dev_data->enable_intx;
+ switch (op->cmd) {
+ case XEN_PCI_OP_conf_read:
+ op->err = pciback_config_read(dev,
+@@ -109,10 +188,15 @@ void pciback_do_op(struct work_struct *data)
+ break;
+ #endif
+ default:
+- op->err = XEN_PCI_ERR_not_implemented;
++ op->err = XEN_PCI_ERR_not_implemented;
+ break;
+ }
+ }
++ if (!op->err && dev && dev_data) {
++ /* Transition detected */
++ if ((dev_data->enable_intx != test_intx))
++ pciback_control_isr(dev, 0 /* no reset */);
++ }
+ /* Tell the driver domain that we're done. */
+ wmb();
+ clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
+@@ -137,3 +221,12 @@ irqreturn_t pciback_handle_event(int irq, void *dev_id)
+
+ return IRQ_HANDLED;
+ }
++irqreturn_t pciback_guest_interrupt(int irq, void *dev_id)
++{
++ struct pci_dev *dev = (struct pci_dev *)dev_id;
++ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++
++ if (dev_data->isr_on && dev_data->ack_intr)
++ return IRQ_HANDLED;
++ return IRQ_NONE;
++}
+--
+1.7.3.4
+
+
+From 29a451f41647deedc2fa535520e648c76755568c Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 12 Apr 2010 11:47:15 -0400
+Subject: [PATCH 028/139] xen-pciback: Add SysFS instrumentation for the fake IRQ handler.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/pci_stub.c | 75 +++++++++++++++++++++++++++++++++++++++-
+ 1 files changed, 74 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 45bbe99..ee2cd68 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -1038,6 +1038,70 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
+
+ DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
+
++static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
++{
++ struct pcistub_device *psdev;
++ struct pciback_dev_data *dev_data;
++ size_t count = 0;
++ unsigned long flags;
++
++ spin_lock_irqsave(&pcistub_devices_lock, flags);
++ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++ if (count >= PAGE_SIZE)
++ break;
++ if (!psdev->dev)
++ continue;
++ dev_data = pci_get_drvdata(psdev->dev);
++ if (!dev_data)
++ continue;
++ count +=
++ scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing\n",
++ pci_name(psdev->dev),
++ dev_data->isr_on ? "on" : "off",
++ dev_data->ack_intr ? "ack" : "not ack");
++ }
++ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++ return count;
++}
++
++DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
++
++static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
++ const char *buf,
++ size_t count)
++{
++ struct pcistub_device *psdev;
++ struct pciback_dev_data *dev_data;
++ int domain, bus, slot, func;
++ int err = -ENOENT;
++
++ err = str_to_slot(buf, &domain, &bus, &slot, &func);
++ if (err)
++ goto out;
++
++ psdev = pcistub_device_find(domain, bus, slot, func);
++
++ if (!psdev)
++ goto out;
++
++ dev_data = pci_get_drvdata(psdev->dev);
++ if (!dev_data)
++ goto out;
++
++ dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
++ dev_data->irq_name, dev_data->isr_on,
++ !dev_data->isr_on);
++
++ dev_data->isr_on = !(dev_data->isr_on);
++ if (dev_data->isr_on)
++ dev_data->ack_intr = 1;
++out:
++ if (!err)
++ err = count;
++ return err;
++}
++DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
++
+ static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
+ size_t count)
+ {
+@@ -1177,7 +1241,10 @@ static void pcistub_exit(void)
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
+-
++ driver_remove_file(&pciback_pci_driver.driver,
++ &driver_attr_irq_handlers);
++ driver_remove_file(&pciback_pci_driver.driver,
++ &driver_attr_irq_handler_state);
+ pci_unregister_driver(&pciback_pci_driver);
+ }
+
+@@ -1236,6 +1303,12 @@ static int __init pcistub_init(void)
+ err = driver_create_file(&pciback_pci_driver.driver,
+ &driver_attr_permissive);
+
++ if (!err)
++ err = driver_create_file(&pciback_pci_driver.driver,
++ &driver_attr_irq_handlers);
++ if (!err)
++ err = driver_create_file(&pciback_pci_driver.driver,
++ &driver_attr_irq_handler_state);
+ if (err)
+ pcistub_exit();
+
+--
+1.7.3.4
+
+
+From 6c7c36d411eeab67192fe0ed96ac1e048b4a1755 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 19 Apr 2010 14:39:10 -0400
+Subject: [PATCH 029/139] xen-pciback: When device transitions to MSI/MSI-X stop ACK-ing on the
+ legacy interrupt.
+
+But don't remove the irq handler from the legacy interrupt. The device
+might still transition back to the legacy interrupts.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/conf_space_capability_msi.c | 17 ++++++++++++++++-
+ 1 files changed, 16 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+index b70ea8b..a236e2d 100644
+--- a/drivers/xen/pciback/conf_space_capability_msi.c
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -12,6 +12,7 @@
+ int pciback_enable_msi(struct pciback_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+ {
++ struct pciback_dev_data *dev_data;
+ int otherend = pdev->xdev->otherend_id;
+ int status;
+
+@@ -27,21 +28,29 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ /* The value the guest needs is actually the IDT vector, not the
+ * the local domain's IRQ number. */
+ op->value = xen_gsi_from_irq(dev->irq);
++ dev_data = pci_get_drvdata(dev);
++ if (dev_data)
++ dev_data->ack_intr = 0;
+ return 0;
+ }
+
+ int pciback_disable_msi(struct pciback_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+ {
++ struct pciback_dev_data *dev_data;
+ pci_disable_msi(dev);
+
+ op->value = xen_gsi_from_irq(dev->irq);
++ dev_data = pci_get_drvdata(dev);
++ if (dev_data)
++ dev_data->ack_intr = 1;
+ return 0;
+ }
+
+ int pciback_enable_msix(struct pciback_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+ {
++ struct pciback_dev_data *dev_data;
+ int i, result;
+ struct msix_entry *entries;
+
+@@ -68,6 +77,9 @@ int pciback_enable_msix(struct pciback_device *pdev,
+ kfree(entries);
+
+ op->value = result;
++ dev_data = pci_get_drvdata(dev);
++ if (dev_data)
++ dev_data->ack_intr = 0;
+
+ return result;
+ }
+@@ -75,10 +87,13 @@ int pciback_enable_msix(struct pciback_device *pdev,
+ int pciback_disable_msix(struct pciback_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+ {
+-
++ struct pciback_dev_data *dev_data;
+ pci_disable_msix(dev);
+
+ op->value = xen_gsi_from_irq(dev->irq);
++ dev_data = pci_get_drvdata(dev);
++ if (dev_data)
++ dev_data->ack_intr = 1;
+ return 0;
+ }
+
+--
+1.7.3.4
+
+
+From c1cc36c68f096f2b1e796ba84d9c583009939d91 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 19 Apr 2010 14:40:38 -0400
+Subject: [PATCH 030/139] xen-pciback: Enable interrupt handler when device is enabled.
+
+And also request it to be disabled when the device has been
+disabled.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/conf_space_header.c | 6 ++++++
+ 1 files changed, 6 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
+index 1f4f86e..cb450f4 100644
+--- a/drivers/xen/pciback/conf_space_header.c
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -39,8 +39,10 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
+
+ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
+ {
++ struct pciback_dev_data *dev_data;
+ int err;
+
++ dev_data = pci_get_drvdata(dev);
+ if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG "pciback: %s: enable\n",
+@@ -48,11 +50,15 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
+ err = pci_enable_device(dev);
+ if (err)
+ return err;
++ if (dev_data)
++ dev_data->enable_intx = 1;
+ } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG "pciback: %s: disable\n",
+ pci_name(dev));
+ pci_disable_device(dev);
++ if (dev_data)
++ dev_data->enable_intx = 0;
+ }
+
+ if (!dev->is_busmaster && is_master_cmd(value)) {
+--
+1.7.3.4
+
+
+From a732e3d6ed4831c460586bd7a16ef7f6b7d28936 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 19 Apr 2010 16:23:06 -0400
+Subject: [PATCH 031/139] xen-pciback: Probe the IRQ line to check if it is not shared.
+
+If it is not shared, we stop ACK-ing the IRQ line as there is
+no need for this irq handler to return IRQ_HANDLED.
+
+We have to this check once much much later than the pciback
+and pcifront have started talking as guests doing the hypercall
+that would notify the other guest that the IRQ line is shared
+is done asynchronously.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/pci_stub.c | 5 +++--
+ drivers/xen/pciback/pciback.h | 1 +
+ drivers/xen/pciback/pciback_ops.c | 12 +++++++++++-
+ 3 files changed, 15 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index ee2cd68..88c7ca1 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -1055,10 +1055,11 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
+ if (!dev_data)
+ continue;
+ count +=
+- scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing\n",
++ scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing:%ld\n",
+ pci_name(psdev->dev),
+ dev_data->isr_on ? "on" : "off",
+- dev_data->ack_intr ? "ack" : "not ack");
++ dev_data->ack_intr ? "ack" : "not ack",
++ dev_data->handled);
+ }
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ return count;
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+index 9d1b0a6..fc31052 100644
+--- a/drivers/xen/pciback/pciback.h
++++ b/drivers/xen/pciback/pciback.h
+@@ -50,6 +50,7 @@ struct pciback_dev_data {
+ unsigned int enable_intx : 1;
+ unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */
+ unsigned int ack_intr : 1; /* .. and ACK-ing */
++ unsigned long handled;
+ unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
+ char irq_name[0]; /* pciback[000:04:00.0] */
+ };
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index cb54893..5543881 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -226,7 +226,17 @@ irqreturn_t pciback_guest_interrupt(int irq, void *dev_id)
+ struct pci_dev *dev = (struct pci_dev *)dev_id;
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+
+- if (dev_data->isr_on && dev_data->ack_intr)
++ if (dev_data->isr_on && dev_data->ack_intr) {
++ dev_data->handled++;
++ if ((dev_data->handled % 1000) == 0) {
++ if (xen_ignore_irq(irq)) {
++ printk(KERN_INFO "%s IRQ line is not shared "
++ "with other domains. Turning ISR off\n",
++ dev_data->irq_name);
++ dev_data->ack_intr = 0;
++ }
++ }
+ return IRQ_HANDLED;
++ }
+ return IRQ_NONE;
+ }
+--
+1.7.3.4
+
+
+From 3312c11c3f9c857b2457c293e6b6e15928a32f32 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Tue, 20 Apr 2010 20:22:40 -0400
+Subject: [PATCH 032/139] xen-pciback: Add debug statements for the MSI/MSI-X configuration module.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/conf_space_capability_msi.c | 11 +++++++++++
+ 1 files changed, 11 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+index a236e2d..b15131e 100644
+--- a/drivers/xen/pciback/conf_space_capability_msi.c
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -16,6 +16,9 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ int otherend = pdev->xdev->otherend_id;
+ int status;
+
++ if (unlikely(verbose_request))
++ printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev));
++
+ status = pci_enable_msi(dev);
+
+ if (status) {
+@@ -31,6 +34,7 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ dev_data = pci_get_drvdata(dev);
+ if (dev_data)
+ dev_data->ack_intr = 0;
++
+ return 0;
+ }
+
+@@ -38,6 +42,9 @@ int pciback_disable_msi(struct pciback_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+ {
+ struct pciback_dev_data *dev_data;
++
++ if (unlikely(verbose_request))
++ printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev));
+ pci_disable_msi(dev);
+
+ op->value = xen_gsi_from_irq(dev->irq);
+@@ -54,6 +61,8 @@ int pciback_enable_msix(struct pciback_device *pdev,
+ int i, result;
+ struct msix_entry *entries;
+
++ if (unlikely(verbose_request))
++ printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev));
+ if (op->value > SH_INFO_MAX_VEC)
+ return -EINVAL;
+
+@@ -88,6 +97,8 @@ int pciback_disable_msix(struct pciback_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+ {
+ struct pciback_dev_data *dev_data;
++ if (unlikely(verbose_request))
++ printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", pci_name(dev));
+ pci_disable_msix(dev);
+
+ op->value = xen_gsi_from_irq(dev->irq);
+--
+1.7.3.4
+
+
+From 52257d7ad18bd91fd614df5ef960a88af3ed5200 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Fri, 23 Jul 2010 14:35:47 -0400
+Subject: [PATCH 033/139] xen-pciback: Redo spinlock usage.
+
+We were using coarse spinlocks that could end up with a deadlock.
+This patch fixes that and makes the spinlocks much more fine-grained.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/xenbus.c | 34 +++++++++++++++++++++-------------
+ 1 files changed, 21 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index d448bf5..f0d5426 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -54,23 +54,31 @@ static void pciback_disconnect(struct pciback_device *pdev)
+ unbind_from_irqhandler(pdev->evtchn_irq, pdev);
+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
+ }
++ spin_unlock(&pdev->dev_lock);
+
+ /* If the driver domain started an op, make sure we complete it
+ * before releasing the shared memory */
++
++ /* Note, the workqueue does not use spinlocks at all.*/
+ flush_workqueue(pciback_wq);
+
++ spin_lock(&pdev->dev_lock);
+ if (pdev->sh_info != NULL) {
+ xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
+ pdev->sh_info = NULL;
+ }
+-
+ spin_unlock(&pdev->dev_lock);
++
+ }
+
+ static void free_pdev(struct pciback_device *pdev)
+ {
+- if (pdev->be_watching)
++ spin_lock(&pdev->dev_lock);
++ if (pdev->be_watching) {
+ unregister_xenbus_watch(&pdev->be_watch);
++ pdev->be_watching = 0;
++ }
++ spin_unlock(&pdev->dev_lock);
+
+ pciback_disconnect(pdev);
+
+@@ -98,7 +106,10 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
+ "Error mapping other domain page in ours.");
+ goto out;
+ }
++
++ spin_lock(&pdev->dev_lock);
+ pdev->sh_info = vaddr;
++ spin_unlock(&pdev->dev_lock);
+
+ err = bind_interdomain_evtchn_to_irqhandler(
+ pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
+@@ -108,7 +119,10 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
+ "Error binding event channel to IRQ");
+ goto out;
+ }
++
++ spin_lock(&pdev->dev_lock);
+ pdev->evtchn_irq = err;
++ spin_unlock(&pdev->dev_lock);
+ err = 0;
+
+ dev_dbg(&pdev->xdev->dev, "Attached!\n");
+@@ -122,7 +136,6 @@ static int pciback_attach(struct pciback_device *pdev)
+ int gnt_ref, remote_evtchn;
+ char *magic = NULL;
+
+- spin_lock(&pdev->dev_lock);
+
+ /* Make sure we only do this setup once */
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+@@ -168,7 +181,6 @@ static int pciback_attach(struct pciback_device *pdev)
+
+ dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
+ out:
+- spin_unlock(&pdev->dev_lock);
+
+ kfree(magic);
+
+@@ -340,7 +352,6 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ char state_str[64];
+ char dev_str[64];
+
+- spin_lock(&pdev->dev_lock);
+
+ dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
+
+@@ -481,8 +492,6 @@ static int pciback_reconfigure(struct pciback_device *pdev)
+ }
+
+ out:
+- spin_unlock(&pdev->dev_lock);
+-
+ return 0;
+ }
+
+@@ -539,8 +548,6 @@ static int pciback_setup_backend(struct pciback_device *pdev)
+ char dev_str[64];
+ char state_str[64];
+
+- spin_lock(&pdev->dev_lock);
+-
+ /* It's possible we could get the call to setup twice, so make sure
+ * we're not already connected.
+ */
+@@ -621,8 +628,6 @@ static int pciback_setup_backend(struct pciback_device *pdev)
+ "Error switching to initialised state!");
+
+ out:
+- spin_unlock(&pdev->dev_lock);
+-
+ if (!err)
+ /* see if pcifront is already configured (if not, we'll wait) */
+ pciback_attach(pdev);
+@@ -669,7 +674,10 @@ static int pciback_xenbus_probe(struct xenbus_device *dev,
+ pciback_be_watch);
+ if (err)
+ goto out;
++
++ spin_lock(&pdev->dev_lock);
+ pdev->be_watching = 1;
++ spin_unlock(&pdev->dev_lock);
+
+ /* We need to force a call to our callback here in case
+ * xend already configured us!
+@@ -708,8 +716,8 @@ int __init pciback_xenbus_register(void)
+ {
+ pciback_wq = create_workqueue("pciback_workqueue");
+ if (!pciback_wq) {
+- printk(KERN_ERR "pciback_xenbus_register: create"
+- "pciback_workqueue failed\n");
++ printk(KERN_ERR "%s: create"
++ "pciback_workqueue failed\n",__FUNCTION__);
+ return -EFAULT;
+ }
+ return xenbus_register_backend(&xenbus_pciback_driver);
+--
+1.7.3.4
+
+
+From a9e0cfab0577730e74787b701edc727756a52b11 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 28 Jul 2010 13:28:34 -0400
+Subject: [PATCH 034/139] xen-pciback: Remove spinlock for be->watching state.
+
+There is no need to guard this with a spinlock. It
+is already guarded by the xenwatch_thread against multiple
+customers.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/xenbus.c | 4 ----
+ 1 files changed, 0 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index f0d5426..993b659 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -73,12 +73,10 @@ static void pciback_disconnect(struct pciback_device *pdev)
+
+ static void free_pdev(struct pciback_device *pdev)
+ {
+- spin_lock(&pdev->dev_lock);
+ if (pdev->be_watching) {
+ unregister_xenbus_watch(&pdev->be_watch);
+ pdev->be_watching = 0;
+ }
+- spin_unlock(&pdev->dev_lock);
+
+ pciback_disconnect(pdev);
+
+@@ -675,9 +673,7 @@ static int pciback_xenbus_probe(struct xenbus_device *dev,
+ if (err)
+ goto out;
+
+- spin_lock(&pdev->dev_lock);
+ pdev->be_watching = 1;
+- spin_unlock(&pdev->dev_lock);
+
+ /* We need to force a call to our callback here in case
+ * xend already configured us!
+--
+1.7.3.4
+
+
+From c0cae0b36c43e75d4d69c60f5319e6ba802b2233 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 13 Dec 2010 11:06:36 -0500
+Subject: [PATCH 035/139] xen/pciback: Fix checkpatch warnings and errors.
+
+Checkpatch found some extra warnings and errors. This mega
+patch fixes all of them in one swoop.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ arch/x86/include/asm/xen/pci.h | 2 +-
+ drivers/xen/events.c | 38 ++++++++++----------
+ drivers/xen/pciback/conf_space.c | 4 +-
+ drivers/xen/pciback/conf_space_capability_msi.c | 11 +++---
+ drivers/xen/pciback/conf_space_header.c | 42 +++++++++++-----------
+ drivers/xen/pciback/controller.c | 2 +-
+ drivers/xen/pciback/pci_stub.c | 7 ++--
+ drivers/xen/pciback/pciback.h | 16 ++++----
+ drivers/xen/pciback/pciback_ops.c | 9 ++---
+ drivers/xen/pciback/xenbus.c | 14 ++++----
+ 10 files changed, 73 insertions(+), 72 deletions(-)
+
+diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
+index 8474b4b..7e61d78 100644
+--- a/arch/x86/include/asm/xen/pci.h
++++ b/arch/x86/include/asm/xen/pci.h
+@@ -27,7 +27,7 @@ static inline int xen_find_device_domain_owner(struct pci_dev *dev)
+ return -1;
+ }
+ static inline int xen_register_device_domain_owner(struct pci_dev *dev,
+- uint16_t domain)
++ uint16_t domain)
+ {
+ return -1;
+ }
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 95eea13..3929c20 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -698,7 +698,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+ domid = rc = xen_find_device_domain_owner(dev);
+ if (rc < 0)
+ domid = DOMID_SELF;
+-
++
+ memset(&map_irq, 0, sizeof(map_irq));
+ map_irq.domid = domid;
+ map_irq.type = MAP_PIRQ_TYPE_MSI;
+@@ -850,18 +850,18 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ }
+
+ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
+- unsigned int remote_port)
++ unsigned int remote_port)
+ {
+- struct evtchn_bind_interdomain bind_interdomain;
+- int err;
++ struct evtchn_bind_interdomain bind_interdomain;
++ int err;
+
+- bind_interdomain.remote_dom = remote_domain;
+- bind_interdomain.remote_port = remote_port;
++ bind_interdomain.remote_dom = remote_domain;
++ bind_interdomain.remote_port = remote_port;
+
+- err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+- &bind_interdomain);
++ err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
++ &bind_interdomain);
+
+- return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
++ return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
+ }
+
+
+@@ -966,19 +966,19 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
+ const char *devname,
+ void *dev_id)
+ {
+- int irq, retval;
++ int irq, retval;
+
+- irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
+- if (irq < 0)
+- return irq;
++ irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
++ if (irq < 0)
++ return irq;
+
+- retval = request_irq(irq, handler, irqflags, devname, dev_id);
+- if (retval != 0) {
+- unbind_from_irq(irq);
+- return retval;
+- }
++ retval = request_irq(irq, handler, irqflags, devname, dev_id);
++ if (retval != 0) {
++ unbind_from_irq(irq);
++ return retval;
++ }
+
+- return irq;
++ return irq;
+ }
+ EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
+
+diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
+index 370c18e..eb6bba0 100644
+--- a/drivers/xen/pciback/conf_space.c
++++ b/drivers/xen/pciback/conf_space.c
+@@ -18,8 +18,8 @@
+ static int permissive;
+ module_param(permissive, bool, 0644);
+
+-#define DEFINE_PCI_CONFIG(op, size, type) \
+-int pciback_##op##_config_##size \
++#define DEFINE_PCI_CONFIG(op, size, type) \
++int pciback_##op##_config_##size \
+ (struct pci_dev *dev, int offset, type value, void *data) \
+ { \
+ return pci_##op##_config_##size(dev, offset, value); \
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+index b15131e..3acda69 100644
+--- a/drivers/xen/pciback/conf_space_capability_msi.c
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -16,7 +16,7 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ int otherend = pdev->xdev->otherend_id;
+ int status;
+
+- if (unlikely(verbose_request))
++ if (unlikely(verbose_request))
+ printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev));
+
+ status = pci_enable_msi(dev);
+@@ -43,7 +43,7 @@ int pciback_disable_msi(struct pciback_device *pdev,
+ {
+ struct pciback_dev_data *dev_data;
+
+- if (unlikely(verbose_request))
++ if (unlikely(verbose_request))
+ printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev));
+ pci_disable_msi(dev);
+
+@@ -61,7 +61,7 @@ int pciback_enable_msix(struct pciback_device *pdev,
+ int i, result;
+ struct msix_entry *entries;
+
+- if (unlikely(verbose_request))
++ if (unlikely(verbose_request))
+ printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev));
+ if (op->value > SH_INFO_MAX_VEC)
+ return -EINVAL;
+@@ -97,8 +97,9 @@ int pciback_disable_msix(struct pciback_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+ {
+ struct pciback_dev_data *dev_data;
+- if (unlikely(verbose_request))
+- printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", pci_name(dev));
++ if (unlikely(verbose_request))
++ printk(KERN_DEBUG "pciback: %s: disable MSI-X\n",
++ pci_name(dev));
+ pci_disable_msix(dev);
+
+ op->value = xen_gsi_from_irq(dev->irq);
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
+index cb450f4..22ad0f5 100644
+--- a/drivers/xen/pciback/conf_space_header.c
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -316,27 +316,27 @@ static const struct config_field header_common[] = {
+ {}
+ };
+
+-#define CFG_FIELD_BAR(reg_offset) \
+- { \
+- .offset = reg_offset, \
+- .size = 4, \
+- .init = bar_init, \
+- .reset = bar_reset, \
+- .release = bar_release, \
+- .u.dw.read = bar_read, \
+- .u.dw.write = bar_write, \
+- }
+-
+-#define CFG_FIELD_ROM(reg_offset) \
+- { \
+- .offset = reg_offset, \
+- .size = 4, \
+- .init = rom_init, \
+- .reset = bar_reset, \
+- .release = bar_release, \
+- .u.dw.read = bar_read, \
+- .u.dw.write = rom_write, \
+- }
++#define CFG_FIELD_BAR(reg_offset) \
++ { \
++ .offset = reg_offset, \
++ .size = 4, \
++ .init = bar_init, \
++ .reset = bar_reset, \
++ .release = bar_release, \
++ .u.dw.read = bar_read, \
++ .u.dw.write = bar_write, \
++ }
++
++#define CFG_FIELD_ROM(reg_offset) \
++ { \
++ .offset = reg_offset, \
++ .size = 4, \
++ .init = rom_init, \
++ .reset = bar_reset, \
++ .release = bar_release, \
++ .u.dw.read = bar_read, \
++ .u.dw.write = rom_write, \
++ }
+
+ static const struct config_field header_0[] = {
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
+diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
+index 7f04f11..5a7e4cc 100644
+--- a/drivers/xen/pciback/controller.c
++++ b/drivers/xen/pciback/controller.c
+@@ -378,7 +378,7 @@ int pciback_publish_pci_roots(struct pciback_device *pdev,
+ }
+
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
+- "%lx", (sizeof(struct acpi_resource) * 2) + 1);
++ "%lx", (sizeof(struct acpi_resource) *2) + 1);
+
+ out:
+ spin_unlock(&dev_data->lock);
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index 88c7ca1..c8f6f29 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -13,7 +13,7 @@
+ #include <linux/pci.h>
+ #include <linux/wait.h>
+ #include <linux/sched.h>
+-#include <asm/atomic.h>
++#include <linux/atomic.h>
+ #include <xen/events.h>
+ #include <asm/xen/pci.h>
+ #include <asm/xen/hypervisor.h>
+@@ -603,7 +603,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
+ if (test_bit(_XEN_PCIF_active,
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
+ dev_dbg(&psdev->dev->dev,
+- "schedule pci_conf service in pciback \n");
++ "schedule pci_conf service in pciback\n");
+ test_and_schedule_op(psdev->pdev);
+ }
+
+@@ -1055,7 +1055,8 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
+ if (!dev_data)
+ continue;
+ count +=
+- scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing:%ld\n",
++ scnprintf(buf + count, PAGE_SIZE - count,
++ "%s:%s:%sing:%ld\n",
+ pci_name(psdev->dev),
+ dev_data->isr_on ? "on" : "off",
+ dev_data->ack_intr ? "ack" : "not ack",
+diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
+index fc31052..5c14020 100644
+--- a/drivers/xen/pciback/pciback.h
++++ b/drivers/xen/pciback/pciback.h
+@@ -12,7 +12,7 @@
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
+ #include <linux/workqueue.h>
+-#include <asm/atomic.h>
++#include <linux/atomic.h>
+ #include <xen/interface/io/pciif.h>
+
+ struct pci_dev_entry {
+@@ -20,8 +20,8 @@ struct pci_dev_entry {
+ struct pci_dev *dev;
+ };
+
+-#define _PDEVF_op_active (0)
+-#define PDEVF_op_active (1<<(_PDEVF_op_active))
++#define _PDEVF_op_active (0)
++#define PDEVF_op_active (1<<(_PDEVF_op_active))
+ #define _PCIB_op_pending (1)
+ #define PCIB_op_pending (1<<(_PCIB_op_pending))
+
+@@ -45,11 +45,11 @@ struct pciback_device {
+
+ struct pciback_dev_data {
+ struct list_head config_fields;
+- unsigned int permissive : 1;
+- unsigned int warned_on_write : 1;
+- unsigned int enable_intx : 1;
+- unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */
+- unsigned int ack_intr : 1; /* .. and ACK-ing */
++ unsigned int permissive:1;
++ unsigned int warned_on_write:1;
++ unsigned int enable_intx:1;
++ unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
++ unsigned int ack_intr:1; /* .. and ACK-ing */
+ unsigned long handled;
+ unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
+ char irq_name[0]; /* pciback[000:04:00.0] */
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+index 5543881..9a465e9 100644
+--- a/drivers/xen/pciback/pciback_ops.c
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -63,12 +63,11 @@ void pciback_control_isr(struct pci_dev *dev, int reset)
+ dev_data->irq_name, dev);
+ if (rc) {
+ dev_err(&dev->dev, "%s: failed to install fake IRQ " \
+- "handler for IRQ %d! (rc:%d)\n", dev_data->irq_name,
+- dev_data->irq, rc);
++ "handler for IRQ %d! (rc:%d)\n",
++ dev_data->irq_name, dev_data->irq, rc);
+ goto out;
+ }
+- }
+- else {
++ } else {
+ free_irq(dev_data->irq, dev);
+ dev_data->irq = 0;
+ }
+@@ -188,7 +187,7 @@ void pciback_do_op(struct work_struct *data)
+ break;
+ #endif
+ default:
+- op->err = XEN_PCI_ERR_not_implemented;
++ op->err = XEN_PCI_ERR_not_implemented;
+ break;
+ }
+ }
+diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
+index 993b659..70030c4 100644
+--- a/drivers/xen/pciback/xenbus.c
++++ b/drivers/xen/pciback/xenbus.c
+@@ -700,12 +700,12 @@ static const struct xenbus_device_id xenpci_ids[] = {
+ };
+
+ static struct xenbus_driver xenbus_pciback_driver = {
+- .name = "pciback",
+- .owner = THIS_MODULE,
+- .ids = xenpci_ids,
+- .probe = pciback_xenbus_probe,
+- .remove = pciback_xenbus_remove,
+- .otherend_changed = pciback_frontend_changed,
++ .name = "pciback",
++ .owner = THIS_MODULE,
++ .ids = xenpci_ids,
++ .probe = pciback_xenbus_probe,
++ .remove = pciback_xenbus_remove,
++ .otherend_changed = pciback_frontend_changed,
+ };
+
+ int __init pciback_xenbus_register(void)
+@@ -713,7 +713,7 @@ int __init pciback_xenbus_register(void)
+ pciback_wq = create_workqueue("pciback_workqueue");
+ if (!pciback_wq) {
+ printk(KERN_ERR "%s: create"
+- "pciback_workqueue failed\n",__FUNCTION__);
++ "pciback_workqueue failed\n", __func__);
+ return -EFAULT;
+ }
+ return xenbus_register_backend(&xenbus_pciback_driver);
+--
+1.7.3.4
+
+
+From 83d24d8dbd9e52a7ac94deae2d9fff6681ce8761 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 13 Dec 2010 11:30:29 -0500
+Subject: [PATCH 036/139] xen/xen-pciback: Swap over to DEFINE_PCI_DEVICE_TABLE
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/pciback/pci_stub.c | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+index c8f6f29..09dd60c 100644
+--- a/drivers/xen/pciback/pci_stub.c
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -497,7 +497,7 @@ static void pcistub_remove(struct pci_dev *dev)
+ }
+ }
+
+-static const struct pci_device_id pcistub_ids[] = {
++DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = {
+ {
+ .vendor = PCI_ANY_ID,
+ .device = PCI_ANY_ID,
+--
+1.7.3.4
+
+
+From 105aad363797212fbd2a4c887b723407c5851175 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 9 Dec 2010 15:01:11 -0500
+Subject: [PATCH 038/139] xen/irq: Don't fall over when nr_irqs_gsi > nr_irqs.
+
+This scenario where the nr_irq_gsi is greater than nr_irqs
+is rather strange but lets still try to survive. Make sure
+to print a warning so the user wouldn't be surprised in case
+things don't work.
+
+Solves a bootup-crash when booting Xen and Linux under QEMU.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ drivers/xen/events.c | 9 +++++++++
+ 1 files changed, 9 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 4d4a23d..98b7220 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -411,6 +411,7 @@ static int find_unbound_irq(void)
+ if (bottom == nr_irqs)
+ goto no_irqs;
+
++retry:
+ /* This loop starts from the top of IRQ space and goes down.
+ * We need this b/c if we have a PCI device in a Xen PV guest
+ * we do not have an IO-APIC (though the backend might have them)
+@@ -434,6 +435,14 @@ static int find_unbound_irq(void)
+ goto no_irqs;
+
+ res = irq_alloc_desc_at(irq, -1);
++ if (res == -EEXIST) {
++ top--;
++ if (bottom > top)
++ printk(KERN_ERR "Eating in GSI/MSI space (%d)!" \
++ " Your PCI device might not work!\n", top);
++ if (top > NR_IRQS_LEGACY)
++ goto retry;
++ }
+
+ if (WARN_ON(res != irq))
+ return -1;
+--
+1.7.3.4
+
+
+From bfb56cd0b9304ddc6cfb411315bf7e5fea3e8bc7 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 11 Jan 2011 17:20:13 +0000
+Subject: [PATCH 039/139] xen: handled remapped IRQs when enabling a pcifront PCI device.
+
+This happens to not be an issue currently because we take pains to try
+to ensure that the GSI-IRQ mapping is 1-1 in a PV guest and that
+regular event channels do not clash. However a subsequent patch is
+going to break this 1-1 mapping.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Cc: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+---
+ arch/x86/pci/xen.c | 22 ++++++++++++++--------
+ 1 files changed, 14 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
+index 25cd4a0..2a12f3d 100644
+--- a/arch/x86/pci/xen.c
++++ b/arch/x86/pci/xen.c
+@@ -226,21 +226,27 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev)
+ {
+ int rc;
+ int share = 1;
++ u8 gsi;
+
+- dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
+-
+- if (dev->irq < 0)
+- return -EINVAL;
++ rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
++ if (rc < 0) {
++ dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
++ rc);
++ return rc;
++ }
+
+- if (dev->irq < NR_IRQS_LEGACY)
++ if (gsi < NR_IRQS_LEGACY)
+ share = 0;
+
+- rc = xen_allocate_pirq(dev->irq, share, "pcifront");
++ rc = xen_allocate_pirq(gsi, share, "pcifront");
+ if (rc < 0) {
+- dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
+- dev->irq, rc);
++ dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
++ gsi, rc);
+ return rc;
+ }
++
++ dev->irq = rc;
++ dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
+ return 0;
+ }
+
+--
+1.7.3.4
+
+
+From 80b3b503bba489dcbdd808c5dd50a6be3aa06949 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 11 Jan 2011 17:20:14 +0000
+Subject: [PATCH 040/139] xen:events: move find_unbound_irq inside CONFIG_PCI_MSI
+
+The only caller is xen_allocate_pirq_msi which is also under this
+ifdef so this fixes:
+ drivers/xen/events.c:377: warning: 'find_unbound_pirq' defined but not used
+when CONFIG_PCI_MSI=n
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Cc: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+---
+ drivers/xen/events.c | 34 +++++++++++++++++-----------------
+ 1 files changed, 17 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 98b7220..ae8d45d 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -384,23 +384,6 @@ static int get_nr_hw_irqs(void)
+ return ret;
+ }
+
+-static int find_unbound_pirq(int type)
+-{
+- int rc, i;
+- struct physdev_get_free_pirq op_get_free_pirq;
+- op_get_free_pirq.type = type;
+-
+- rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
+- if (!rc)
+- return op_get_free_pirq.pirq;
+-
+- for (i = 0; i < nr_irqs; i++) {
+- if (pirq_to_irq[i] < 0)
+- return i;
+- }
+- return -1;
+-}
+-
+ static int find_unbound_irq(void)
+ {
+ struct irq_data *data;
+@@ -683,6 +666,23 @@ out:
+ #include <linux/msi.h>
+ #include "../pci/msi.h"
+
++static int find_unbound_pirq(int type)
++{
++ int rc, i;
++ struct physdev_get_free_pirq op_get_free_pirq;
++ op_get_free_pirq.type = type;
++
++ rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
++ if (!rc)
++ return op_get_free_pirq.pirq;
++
++ for (i = 0; i < nr_irqs; i++) {
++ if (pirq_to_irq[i] < 0)
++ return i;
++ }
++ return -1;
++}
++
+ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
+ {
+ spin_lock(&irq_mapping_update_lock);
+--
+1.7.3.4
+
+
+From c514d00c80574e839d34c239363153b90bb8efcc Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 11 Jan 2011 17:20:15 +0000
+Subject: [PATCH 041/139] xen: events: add xen_allocate_irq_{dynamic, gsi} and xen_free_irq
+
+This is neater than open-coded calls to irq_alloc_desc_at and
+irq_free_desc.
+
+No intended behavioural change.
+
+Note that we previously were not checking the return value of
+irq_alloc_desc_at which would be failing for GSI<NR_IRQS_LEGACY
+because the core architecture code has already allocated those for
+us. Hence the additional check against NR_IRQS_LEGACY in
+xen_allocate_irq_gsi.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Cc: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+---
+ drivers/xen/events.c | 53 +++++++++++++++++++++++++++++++++-----------------
+ 1 files changed, 35 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index ae8d45d..74fb216 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -384,7 +384,7 @@ static int get_nr_hw_irqs(void)
+ return ret;
+ }
+
+-static int find_unbound_irq(void)
++static int xen_allocate_irq_dynamic(void)
+ {
+ struct irq_data *data;
+ int irq, res;
+@@ -442,6 +442,30 @@ static bool identity_mapped_irq(unsigned irq)
+ return irq < get_nr_hw_irqs();
+ }
+
++static int xen_allocate_irq_gsi(unsigned gsi)
++{
++ int irq;
++
++ if (!identity_mapped_irq(gsi) &&
++ (xen_initial_domain() || !xen_pv_domain()))
++ return xen_allocate_irq_dynamic();
++
++ /* Legacy IRQ descriptors are already allocated by the arch. */
++ if (gsi < NR_IRQS_LEGACY)
++ return gsi;
++
++ irq = irq_alloc_desc_at(gsi, -1);
++ if (irq < 0)
++ panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq);
++
++ return irq;
++}
++
++static void xen_free_irq(unsigned irq)
++{
++ irq_free_desc(irq);
++}
++
+ static void pirq_unmask_notify(int irq)
+ {
+ struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) };
+@@ -627,14 +651,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
+ goto out; /* XXX need refcount? */
+ }
+
+- /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
+- * we are using the !xen_initial_domain() to drop in the function.*/
+- if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
+- xen_pv_domain())) {
+- irq = gsi;
+- irq_alloc_desc_at(irq, -1);
+- } else
+- irq = find_unbound_irq();
++ irq = xen_allocate_irq_gsi(gsi);
+
+ set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
+ handle_level_irq, name);
+@@ -647,7 +664,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
+ * this in the priv domain. */
+ if (xen_initial_domain() &&
+ HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+- irq_free_desc(irq);
++ xen_free_irq(irq);
+ irq = -ENOSPC;
+ goto out;
+ }
+@@ -688,7 +705,7 @@ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
+ spin_lock(&irq_mapping_update_lock);
+
+ if (alloc & XEN_ALLOC_IRQ) {
+- *irq = find_unbound_irq();
++ *irq = xen_allocate_irq_dynamic();
+ if (*irq == -1)
+ goto out;
+ }
+@@ -738,7 +755,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+
+ spin_lock(&irq_mapping_update_lock);
+
+- irq = find_unbound_irq();
++ irq = xen_allocate_irq_dynamic();
+
+ if (irq == -1)
+ goto out;
+@@ -747,7 +764,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+ if (rc) {
+ printk(KERN_WARNING "xen map irq failed %d\n", rc);
+
+- irq_free_desc(irq);
++ xen_free_irq(irq);
+
+ irq = -1;
+ goto out;
+@@ -789,7 +806,7 @@ int xen_destroy_irq(int irq)
+ }
+ irq_info[irq] = mk_unbound_info();
+
+- irq_free_desc(irq);
++ xen_free_irq(irq);
+
+ out:
+ spin_unlock(&irq_mapping_update_lock);
+@@ -820,7 +837,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
+ irq = evtchn_to_irq[evtchn];
+
+ if (irq == -1) {
+- irq = find_unbound_irq();
++ irq = xen_allocate_irq_dynamic();
+
+ set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+ handle_fasteoi_irq, "event");
+@@ -845,7 +862,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ irq = per_cpu(ipi_to_irq, cpu)[ipi];
+
+ if (irq == -1) {
+- irq = find_unbound_irq();
++ irq = xen_allocate_irq_dynamic();
+ if (irq < 0)
+ goto out;
+
+@@ -881,7 +898,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+ irq = per_cpu(virq_to_irq, cpu)[virq];
+
+ if (irq == -1) {
+- irq = find_unbound_irq();
++ irq = xen_allocate_irq_dynamic();
+
+ set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
+ handle_percpu_irq, "virq");
+@@ -940,7 +957,7 @@ static void unbind_from_irq(unsigned int irq)
+ if (irq_info[irq].type != IRQT_UNBOUND) {
+ irq_info[irq] = mk_unbound_info();
+
+- irq_free_desc(irq);
++ xen_free_irq(irq);
+ }
+
+ spin_unlock(&irq_mapping_update_lock);
+--
+1.7.3.4
+
+
+From 323430f2697268e6261b673fa2b86d6f3f3c7cff Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 11 Jan 2011 17:20:16 +0000
+Subject: [PATCH 042/139] xen: events: allocate GSIs and dynamic IRQs from separate IRQ ranges.
+
+There are three cases which we need to care about, PV guest, PV domain
+0 and HVM guest.
+
+The PV guest case is simple since it has no access to ACPI or real
+APICs and therefore has no GSIs therefore we simply dynamically
+allocate all IRQs. The potentially interesting case here is PIRQ type
+event channels associated with passed through PCI devices. However
+even in this case the guest has no direct interaction with the
+physical GSI since that happens in the PCI backend.
+
+The PV domain 0 and HVM guest cases are actually the same. In domain 0
+case the kernel sees the host ACPI and GSIs (although it only sees the
+APIC indirectly via the hypervisor) and in the HVM guest case it sees
+the virtualised ACPI and emulated APICs. In these cases we start
+allocating dynamic IRQs at nr_irqs_gsi so that they cannot clash with
+any GSI.
+
+Currently xen_allocate_irq_dynamic starts at nr_irqs and works
+backwards looking for a free IRQ in order to (try and) avoid clashing
+with GSIs used in domain 0 and in HVM guests. This change avoids that
+although we retain the behaviour of allowing dynamic IRQs to encroach
+on the GSI range if no suitable IRQs are available since a future IRQ
+clash is deemed preferable to failure right now.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Cc: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+---
+ drivers/xen/events.c | 84 +++++++++++++++----------------------------------
+ 1 files changed, 26 insertions(+), 58 deletions(-)
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 74fb216..a7b60f6 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -373,81 +373,49 @@ static void unmask_evtchn(int port)
+ put_cpu();
+ }
+
+-static int get_nr_hw_irqs(void)
++static int xen_allocate_irq_dynamic(void)
+ {
+- int ret = 1;
++ int first = 0;
++ int irq;
+
+ #ifdef CONFIG_X86_IO_APIC
+- ret = get_nr_irqs_gsi();
++ /*
++ * For an HVM guest or domain 0 which see "real" (emulated or
++ * actual repectively) GSIs we allocate dynamic IRQs
++ * e.g. those corresponding to event channels or MSIs
++ * etc. from the range above those "real" GSIs to avoid
++ * collisions.
++ */
++ if (xen_initial_domain() || xen_hvm_domain())
++ first = get_nr_irqs_gsi();
+ #endif
+
+- return ret;
+-}
+-
+-static int xen_allocate_irq_dynamic(void)
+-{
+- struct irq_data *data;
+- int irq, res;
+- int bottom = get_nr_hw_irqs();
+- int top = nr_irqs-1;
+-
+- if (bottom == nr_irqs)
+- goto no_irqs;
+-
+ retry:
+- /* This loop starts from the top of IRQ space and goes down.
+- * We need this b/c if we have a PCI device in a Xen PV guest
+- * we do not have an IO-APIC (though the backend might have them)
+- * mapped in. To not have a collision of physical IRQs with the Xen
+- * event channels start at the top of the IRQ space for virtual IRQs.
+- */
+- for (irq = top; irq > bottom; irq--) {
+- data = irq_get_irq_data(irq);
+- /* only 15->0 have init'd desc; handle irq > 16 */
+- if (!data)
+- break;
+- if (data->chip == &no_irq_chip)
+- break;
+- if (data->chip != &xen_dynamic_chip)
+- continue;
+- if (irq_info[irq].type == IRQT_UNBOUND)
+- return irq;
+- }
++ irq = irq_alloc_desc_from(first, -1);
+
+- if (irq == bottom)
+- goto no_irqs;
+-
+- res = irq_alloc_desc_at(irq, -1);
+- if (res == -EEXIST) {
+- top--;
+- if (bottom > top)
+- printk(KERN_ERR "Eating in GSI/MSI space (%d)!" \
+- " Your PCI device might not work!\n", top);
+- if (top > NR_IRQS_LEGACY)
+- goto retry;
++ if (irq == -ENOMEM && first > NR_IRQS_LEGACY) {
++ printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n");
++ first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY);
++ goto retry;
+ }
+
+- if (WARN_ON(res != irq))
+- return -1;
++ if (irq < 0)
++ panic("No available IRQ to bind to: increase nr_irqs!\n");
+
+ return irq;
+-
+-no_irqs:
+- panic("No available IRQ to bind to: increase nr_irqs!\n");
+-}
+-
+-static bool identity_mapped_irq(unsigned irq)
+-{
+- /* identity map all the hardware irqs */
+- return irq < get_nr_hw_irqs();
+ }
+
+ static int xen_allocate_irq_gsi(unsigned gsi)
+ {
+ int irq;
+
+- if (!identity_mapped_irq(gsi) &&
+- (xen_initial_domain() || !xen_pv_domain()))
++ /*
++ * A PV guest has no concept of a GSI (since it has no ACPI
++ * nor access to/knowledge of the physical APICs). Therefore
++ * all IRQs are dynamically allocated from the entire IRQ
++ * space.
++ */
++ if (xen_pv_domain() && !xen_initial_domain())
+ return xen_allocate_irq_dynamic();
+
+ /* Legacy IRQ descriptors are already allocated by the arch. */
+--
+1.7.3.4
+
+
+From c986ab83cede3fc02d9f73c65dd83c20ebdf3d0e Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 29 Nov 2010 13:52:18 -0500
+Subject: [PATCH 043/139] ttm: Introduce a placeholder for DMA (bus) addresses.
+
+This is right now limited to only non-pool constructs.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Tested-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/gpu/drm/ttm/ttm_page_alloc.c | 9 ++++++---
+ drivers/gpu/drm/ttm/ttm_tt.c | 10 ++++++++--
+ include/drm/ttm/ttm_bo_driver.h | 2 ++
+ include/drm/ttm/ttm_page_alloc.h | 8 ++++++--
+ 4 files changed, 22 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
+index b1e02ff..6859288 100644
+--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
++++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
+@@ -38,6 +38,7 @@
+ #include <linux/mm.h>
+ #include <linux/seq_file.h> /* for seq_printf */
+ #include <linux/slab.h>
++#include <linux/dma-mapping.h>
+
+ #include <asm/atomic.h>
+
+@@ -662,7 +663,8 @@ out:
+ * cached pages.
+ */
+ int ttm_get_pages(struct list_head *pages, int flags,
+- enum ttm_caching_state cstate, unsigned count)
++ enum ttm_caching_state cstate, unsigned count,
++ dma_addr_t *dma_address)
+ {
+ struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
+ struct page *p = NULL;
+@@ -720,7 +722,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
+ printk(KERN_ERR TTM_PFX
+ "Failed to allocate extra pages "
+ "for large request.");
+- ttm_put_pages(pages, 0, flags, cstate);
++ ttm_put_pages(pages, 0, flags, cstate, NULL);
+ return r;
+ }
+ }
+@@ -731,7 +733,8 @@ int ttm_get_pages(struct list_head *pages, int flags,
+
+ /* Put all pages in pages list to correct pool to wait for reuse */
+ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
+- enum ttm_caching_state cstate)
++ enum ttm_caching_state cstate,
++ dma_addr_t *dma_address)
+ {
+ unsigned long irq_flags;
+ struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
+diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
+index af789dc..0d39001 100644
+--- a/drivers/gpu/drm/ttm/ttm_tt.c
++++ b/drivers/gpu/drm/ttm/ttm_tt.c
+@@ -49,12 +49,16 @@ static int ttm_tt_swapin(struct ttm_tt *ttm);
+ static void ttm_tt_alloc_page_directory(struct ttm_tt *ttm)
+ {
+ ttm->pages = drm_calloc_large(ttm->num_pages, sizeof(*ttm->pages));
++ ttm->dma_address = drm_calloc_large(ttm->num_pages,
++ sizeof(*ttm->dma_address));
+ }
+
+ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
+ {
+ drm_free_large(ttm->pages);
+ ttm->pages = NULL;
++ drm_free_large(ttm->dma_address);
++ ttm->dma_address = NULL;
+ }
+
+ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
+@@ -105,7 +109,8 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
+
+ INIT_LIST_HEAD(&h);
+
+- ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
++ ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1,
++ &ttm->dma_address[index]);
+
+ if (ret != 0)
+ return NULL;
+@@ -298,7 +303,8 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
+ count++;
+ }
+ }
+- ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state);
++ ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
++ ttm->dma_address);
+ ttm->state = tt_unpopulated;
+ ttm->first_himem_page = ttm->num_pages;
+ ttm->last_lomem_page = -1;
+diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
+index 8e0c848..6dc4fcc 100644
+--- a/include/drm/ttm/ttm_bo_driver.h
++++ b/include/drm/ttm/ttm_bo_driver.h
+@@ -149,6 +149,7 @@ enum ttm_caching_state {
+ * @swap_storage: Pointer to shmem struct file for swap storage.
+ * @caching_state: The current caching state of the pages.
+ * @state: The current binding state of the pages.
++ * @dma_address: The DMA (bus) addresses of the pages (if TTM_PAGE_FLAG_DMA32)
+ *
+ * This is a structure holding the pages, caching- and aperture binding
+ * status for a buffer object that isn't backed by fixed (VRAM / AGP)
+@@ -173,6 +174,7 @@ struct ttm_tt {
+ tt_unbound,
+ tt_unpopulated,
+ } state;
++ dma_addr_t *dma_address;
+ };
+
+ #define TTM_MEMTYPE_FLAG_FIXED (1 << 0) /* Fixed (on-card) PCI memory */
+diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
+index 1168214..8062890 100644
+--- a/include/drm/ttm/ttm_page_alloc.h
++++ b/include/drm/ttm/ttm_page_alloc.h
+@@ -36,11 +36,13 @@
+ * @flags: ttm flags for page allocation.
+ * @cstate: ttm caching state for the page.
+ * @count: number of pages to allocate.
++ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
+ */
+ int ttm_get_pages(struct list_head *pages,
+ int flags,
+ enum ttm_caching_state cstate,
+- unsigned count);
++ unsigned count,
++ dma_addr_t *dma_address);
+ /**
+ * Put linked list of pages to pool.
+ *
+@@ -49,11 +51,13 @@ int ttm_get_pages(struct list_head *pages,
+ * count.
+ * @flags: ttm flags for page allocation.
+ * @cstate: ttm caching state.
++ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
+ */
+ void ttm_put_pages(struct list_head *pages,
+ unsigned page_count,
+ int flags,
+- enum ttm_caching_state cstate);
++ enum ttm_caching_state cstate,
++ dma_addr_t *dma_address);
+ /**
+ * Initialize pool allocator.
+ */
+--
+1.7.3.4
+
+
+From ed805774fd3e3a8a0baa003d6419c5bd07d8cc5a Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Mon, 29 Nov 2010 14:03:30 -0500
+Subject: [PATCH 044/139] tm: Utilize the dma_addr_t array for pages that are to in DMA32 pool.
+
+We only use the "if (pool == NULL)" path for right now.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Tested-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/gpu/drm/ttm/ttm_page_alloc.c | 26 +++++++++++++++++++++++---
+ 1 files changed, 23 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
+index 6859288..5d09677 100644
+--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
++++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
+@@ -683,14 +683,22 @@ int ttm_get_pages(struct list_head *pages, int flags,
+ gfp_flags |= GFP_HIGHUSER;
+
+ for (r = 0; r < count; ++r) {
+- p = alloc_page(gfp_flags);
++ if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
++ void *addr;
++ addr = dma_alloc_coherent(NULL, PAGE_SIZE,
++ &dma_address[r],
++ gfp_flags);
++ if (addr == NULL)
++ return -ENOMEM;
++ p = virt_to_page(addr);
++ } else
++ p = alloc_page(gfp_flags);
+ if (!p) {
+
+ printk(KERN_ERR TTM_PFX
+ "Unable to allocate page.");
+ return -ENOMEM;
+ }
+-
+ list_add(&p->lru, pages);
+ }
+ return 0;
+@@ -739,12 +747,24 @@ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
+ unsigned long irq_flags;
+ struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
+ struct page *p, *tmp;
++ unsigned r;
+
+ if (pool == NULL) {
+ /* No pool for this memory type so free the pages */
+
++ r = page_count-1;
+ list_for_each_entry_safe(p, tmp, pages, lru) {
+- __free_page(p);
++ if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
++ void *addr = page_address(p);
++ WARN_ON(!addr || !dma_address[r]);
++ if (addr)
++ dma_free_coherent(NULL, PAGE_SIZE,
++ addr,
++ dma_address[r]);
++ dma_address[r] = 0;
++ } else
++ __free_page(p);
++ r--;
+ }
+ /* Make the pages list empty */
+ INIT_LIST_HEAD(pages);
+--
+1.7.3.4
+
+
+From c779160e3b0246d7de606eafb855df9b283a5c2a Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 2 Dec 2010 10:24:13 -0500
+Subject: [PATCH 045/139] ttm: Expand (*populate) to support an array of DMA addresses.
+
+We pass in the array of ttm pages to be populated in the GART/MM
+of the card (or AGP). Patch titled: "ttm: Utilize the dma_addr_t array
+for pages that are to in DMA32 pool." uses the DMA API to make those
+pages have a proper DMA addresses (in the situation where
+page_to_phys or virt_to_phys do not give use the DMA (bus) address).
+
+Since we are using the DMA API on those pages, we should pass in the
+DMA address to this function so it can save it in its proper fields
+(later patches use it).
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Tested-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/gpu/drm/nouveau/nouveau_sgdma.c | 3 ++-
+ drivers/gpu/drm/radeon/radeon_ttm.c | 3 ++-
+ drivers/gpu/drm/ttm/ttm_agp_backend.c | 3 ++-
+ drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
+ drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c | 3 ++-
+ include/drm/ttm/ttm_bo_driver.h | 4 +++-
+ 6 files changed, 12 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+index 288baca..edc140a 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
++++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+@@ -20,7 +20,8 @@ struct nouveau_sgdma_be {
+
+ static int
+ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
+- struct page **pages, struct page *dummy_read_page)
++ struct page **pages, struct page *dummy_read_page,
++ dma_addr_t *dma_addrs)
+ {
+ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+ struct drm_device *dev = nvbe->dev;
+diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
+index 01c2c73..6f156e9 100644
+--- a/drivers/gpu/drm/radeon/radeon_ttm.c
++++ b/drivers/gpu/drm/radeon/radeon_ttm.c
+@@ -655,7 +655,8 @@ struct radeon_ttm_backend {
+ static int radeon_ttm_backend_populate(struct ttm_backend *backend,
+ unsigned long num_pages,
+ struct page **pages,
+- struct page *dummy_read_page)
++ struct page *dummy_read_page,
++ dma_addr_t *dma_addrs)
+ {
+ struct radeon_ttm_backend *gtt;
+
+diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
+index f999e36..1c4a72f 100644
+--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
++++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
+@@ -47,7 +47,8 @@ struct ttm_agp_backend {
+
+ static int ttm_agp_populate(struct ttm_backend *backend,
+ unsigned long num_pages, struct page **pages,
+- struct page *dummy_read_page)
++ struct page *dummy_read_page,
++ dma_addr_t *dma_addrs)
+ {
+ struct ttm_agp_backend *agp_be =
+ container_of(backend, struct ttm_agp_backend, backend);
+diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
+index 0d39001..86d5b17 100644
+--- a/drivers/gpu/drm/ttm/ttm_tt.c
++++ b/drivers/gpu/drm/ttm/ttm_tt.c
+@@ -169,7 +169,7 @@ int ttm_tt_populate(struct ttm_tt *ttm)
+ }
+
+ be->func->populate(be, ttm->num_pages, ttm->pages,
+- ttm->dummy_read_page);
++ ttm->dummy_read_page, ttm->dma_address);
+ ttm->state = tt_unbound;
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+index 80bc37b..87e43e0 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+@@ -102,7 +102,8 @@ struct vmw_ttm_backend {
+
+ static int vmw_ttm_populate(struct ttm_backend *backend,
+ unsigned long num_pages, struct page **pages,
+- struct page *dummy_read_page)
++ struct page *dummy_read_page,
++ dma_addr_t *dma_addrs)
+ {
+ struct vmw_ttm_backend *vmw_be =
+ container_of(backend, struct vmw_ttm_backend, backend);
+diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
+index 6dc4fcc..ebcd3dd 100644
+--- a/include/drm/ttm/ttm_bo_driver.h
++++ b/include/drm/ttm/ttm_bo_driver.h
+@@ -50,13 +50,15 @@ struct ttm_backend_func {
+ * @pages: Array of pointers to ttm pages.
+ * @dummy_read_page: Page to be used instead of NULL pages in the
+ * array @pages.
++ * @dma_addrs: Array of DMA (bus) address of the ttm pages.
+ *
+ * Populate the backend with ttm pages. Depending on the backend,
+ * it may or may not copy the @pages array.
+ */
+ int (*populate) (struct ttm_backend *backend,
+ unsigned long num_pages, struct page **pages,
+- struct page *dummy_read_page);
++ struct page *dummy_read_page,
++ dma_addr_t *dma_addrs);
+ /**
+ * struct ttm_backend_func member clear
+ *
+--
+1.7.3.4
+
+
+From 340dec734021d8600029e8b3178280cc8d3af251 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 2 Dec 2010 11:04:29 -0500
+Subject: [PATCH 046/139] radeon/ttm/PCIe: Use dma_addr if TTM has set it.
+
+If the TTM layer has used the DMA API to setup pages that are
+TTM_PAGE_FLAG_DMA32 (look at patch titled: "ttm: Utilize the dma_addr_t
+array for pages that are to in DMA32 pool."), lets use it
+when programming the GART in the PCIe type cards.
+
+This patch skips doing the pci_map_page (and pci_unmap_page) if
+there is a DMA addresses passed in for that page. If the dma_address
+is zero (or DMA_ERROR_CODE), then we continue on with our old
+behaviour.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Tested-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/gpu/drm/radeon/radeon.h | 4 ++-
+ drivers/gpu/drm/radeon/radeon_gart.c | 36 ++++++++++++++++++++++++---------
+ drivers/gpu/drm/radeon/radeon_ttm.c | 5 +++-
+ 3 files changed, 33 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
+index 73f600d..c9bbab9 100644
+--- a/drivers/gpu/drm/radeon/radeon.h
++++ b/drivers/gpu/drm/radeon/radeon.h
+@@ -317,6 +317,7 @@ struct radeon_gart {
+ union radeon_gart_table table;
+ struct page **pages;
+ dma_addr_t *pages_addr;
++ bool *ttm_alloced;
+ bool ready;
+ };
+
+@@ -329,7 +330,8 @@ void radeon_gart_fini(struct radeon_device *rdev);
+ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
+ int pages);
+ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
+- int pages, struct page **pagelist);
++ int pages, struct page **pagelist,
++ dma_addr_t *dma_addr);
+
+
+ /*
+diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
+index e65b903..4a5ac4b 100644
+--- a/drivers/gpu/drm/radeon/radeon_gart.c
++++ b/drivers/gpu/drm/radeon/radeon_gart.c
+@@ -149,8 +149,9 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
+ p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
+ for (i = 0; i < pages; i++, p++) {
+ if (rdev->gart.pages[p]) {
+- pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
+- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
++ if (!rdev->gart.ttm_alloced[p])
++ pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
++ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ rdev->gart.pages[p] = NULL;
+ rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
+ page_base = rdev->gart.pages_addr[p];
+@@ -165,7 +166,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
+ }
+
+ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
+- int pages, struct page **pagelist)
++ int pages, struct page **pagelist, dma_addr_t *dma_addr)
+ {
+ unsigned t;
+ unsigned p;
+@@ -180,15 +181,22 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
+ p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
+
+ for (i = 0; i < pages; i++, p++) {
+- /* we need to support large memory configurations */
+- /* assume that unbind have already been call on the range */
+- rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
++ /* On TTM path, we only use the DMA API if TTM_PAGE_FLAG_DMA32
++ * is requested. */
++ if (dma_addr[i] != DMA_ERROR_CODE) {
++ rdev->gart.ttm_alloced[p] = true;
++ rdev->gart.pages_addr[p] = dma_addr[i];
++ } else {
++ /* we need to support large memory configurations */
++ /* assume that unbind have already been call on the range */
++ rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
+ 0, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+- if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
+- /* FIXME: failed to map page (return -ENOMEM?) */
+- radeon_gart_unbind(rdev, offset, pages);
+- return -ENOMEM;
++ if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
++ /* FIXME: failed to map page (return -ENOMEM?) */
++ radeon_gart_unbind(rdev, offset, pages);
++ return -ENOMEM;
++ }
+ }
+ rdev->gart.pages[p] = pagelist[i];
+ page_base = rdev->gart.pages_addr[p];
+@@ -251,6 +259,12 @@ int radeon_gart_init(struct radeon_device *rdev)
+ radeon_gart_fini(rdev);
+ return -ENOMEM;
+ }
++ rdev->gart.ttm_alloced = kzalloc(sizeof(bool) *
++ rdev->gart.num_cpu_pages, GFP_KERNEL);
++ if (rdev->gart.ttm_alloced == NULL) {
++ radeon_gart_fini(rdev);
++ return -ENOMEM;
++ }
+ /* set GART entry to point to the dummy page by default */
+ for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
+ rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
+@@ -267,6 +281,8 @@ void radeon_gart_fini(struct radeon_device *rdev)
+ rdev->gart.ready = false;
+ kfree(rdev->gart.pages);
+ kfree(rdev->gart.pages_addr);
++ kfree(rdev->gart.ttm_alloced);
+ rdev->gart.pages = NULL;
+ rdev->gart.pages_addr = NULL;
++ rdev->gart.ttm_alloced = NULL;
+ }
+diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
+index 6f156e9..ca04505 100644
+--- a/drivers/gpu/drm/radeon/radeon_ttm.c
++++ b/drivers/gpu/drm/radeon/radeon_ttm.c
+@@ -647,6 +647,7 @@ struct radeon_ttm_backend {
+ unsigned long num_pages;
+ struct page **pages;
+ struct page *dummy_read_page;
++ dma_addr_t *dma_addrs;
+ bool populated;
+ bool bound;
+ unsigned offset;
+@@ -662,6 +663,7 @@ static int radeon_ttm_backend_populate(struct ttm_backend *backend,
+
+ gtt = container_of(backend, struct radeon_ttm_backend, backend);
+ gtt->pages = pages;
++ gtt->dma_addrs = dma_addrs;
+ gtt->num_pages = num_pages;
+ gtt->dummy_read_page = dummy_read_page;
+ gtt->populated = true;
+@@ -674,6 +676,7 @@ static void radeon_ttm_backend_clear(struct ttm_backend *backend)
+
+ gtt = container_of(backend, struct radeon_ttm_backend, backend);
+ gtt->pages = NULL;
++ gtt->dma_addrs = NULL;
+ gtt->num_pages = 0;
+ gtt->dummy_read_page = NULL;
+ gtt->populated = false;
+@@ -694,7 +697,7 @@ static int radeon_ttm_backend_bind(struct ttm_backend *backend,
+ gtt->num_pages, bo_mem, backend);
+ }
+ r = radeon_gart_bind(gtt->rdev, gtt->offset,
+- gtt->num_pages, gtt->pages);
++ gtt->num_pages, gtt->pages, gtt->dma_addrs);
+ if (r) {
+ DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
+ gtt->num_pages, gtt->offset);
+--
+1.7.3.4
+
+
+From b4efe20150e5e9a483faf67ba0d947dbface96cd Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 2 Dec 2010 11:36:24 -0500
+Subject: [PATCH 047/139] nouveau/ttm/PCIe: Use dma_addr if TTM has set it.
+
+If the TTM layer has used the DMA API to setup pages that are
+TTM_PAGE_FLAG_DMA32 (look at patch titled: "ttm: Utilize the dma_addr_t
+array for pages that are to in DMA32 pool."), lets use it
+when programming the GART in the PCIe type cards.
+
+This patch skips doing the pci_map_page (and pci_unmap_page) if
+there is a DMA addresses passed in for that page. If the dma_address
+is zero (or DMA_ERROR_CODE), then we continue on with our old
+behaviour.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Tested-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/gpu/drm/nouveau/nouveau_sgdma.c | 28 +++++++++++++++++++++-------
+ 1 files changed, 21 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+index edc140a..bbdd982 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
++++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+@@ -12,6 +12,7 @@ struct nouveau_sgdma_be {
+ struct drm_device *dev;
+
+ dma_addr_t *pages;
++ bool *ttm_alloced;
+ unsigned nr_pages;
+
+ unsigned pte_start;
+@@ -35,15 +36,25 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
+ if (!nvbe->pages)
+ return -ENOMEM;
+
++ nvbe->ttm_alloced = kmalloc(sizeof(bool) * num_pages, GFP_KERNEL);
++ if (!nvbe->ttm_alloced)
++ return -ENOMEM;
++
+ nvbe->nr_pages = 0;
+ while (num_pages--) {
+- nvbe->pages[nvbe->nr_pages] =
+- pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
++ if (dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE) {
++ nvbe->pages[nvbe->nr_pages] =
++ dma_addrs[nvbe->nr_pages];
++ nvbe->ttm_alloced[nvbe->nr_pages] = true;
++ } else {
++ nvbe->pages[nvbe->nr_pages] =
++ pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+- if (pci_dma_mapping_error(dev->pdev,
+- nvbe->pages[nvbe->nr_pages])) {
+- be->func->clear(be);
+- return -EFAULT;
++ if (pci_dma_mapping_error(dev->pdev,
++ nvbe->pages[nvbe->nr_pages])) {
++ be->func->clear(be);
++ return -EFAULT;
++ }
+ }
+
+ nvbe->nr_pages++;
+@@ -66,11 +77,14 @@ nouveau_sgdma_clear(struct ttm_backend *be)
+ be->func->unbind(be);
+
+ while (nvbe->nr_pages--) {
+- pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
++ if (!nvbe->ttm_alloced[nvbe->nr_pages])
++ pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ }
+ kfree(nvbe->pages);
++ kfree(nvbe->ttm_alloced);
+ nvbe->pages = NULL;
++ nvbe->ttm_alloced = NULL;
+ nvbe->nr_pages = 0;
+ }
+ }
+--
+1.7.3.4
+
+
+From 03c4949992e2b7e84b7cdeb156d803db3f848b6c Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Mon, 9 Feb 2009 12:05:52 -0800
+Subject: [PATCH 051/139] xen: netback: Initial import of linux-2.6.18-xen.hg netback driver.
+
+This corresponds to 774:107e10e0e07c in that tree.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/Kconfig | 7 +
+ drivers/xen/Makefile | 1 +
+ drivers/xen/netback/Makefile | 3 +
+ drivers/xen/netback/common.h | 217 ++++++
+ drivers/xen/netback/interface.c | 336 ++++++++
+ drivers/xen/netback/netback.c | 1637 +++++++++++++++++++++++++++++++++++++++
+ drivers/xen/netback/xenbus.c | 454 +++++++++++
+ 7 files changed, 2655 insertions(+), 0 deletions(-)
+ create mode 100644 drivers/xen/netback/Makefile
+ create mode 100644 drivers/xen/netback/common.h
+ create mode 100644 drivers/xen/netback/interface.c
+ create mode 100644 drivers/xen/netback/netback.c
+ create mode 100644 drivers/xen/netback/xenbus.c
+
+diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
+index 5a48ce9..7e83d43 100644
+--- a/drivers/xen/Kconfig
++++ b/drivers/xen/Kconfig
+@@ -37,6 +37,13 @@ config XEN_BACKEND
+ depends on XEN_PCIDEV_BACKEND
+
+
++config XEN_NETDEV_BACKEND
++ bool "Xen backend network device"
++ depends on XEN_BACKEND && NET
++ help
++ Implement the network backend driver, which passes packets
++ from the guest domain's frontend drivers to the network.
++
+ config XENFS
+ tristate "Xen filesystem"
+ default y
+diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
+index 533a199..c0e0509 100644
+--- a/drivers/xen/Makefile
++++ b/drivers/xen/Makefile
+@@ -9,6 +9,7 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
+ obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
+ obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
+ obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/
++obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/
+ obj-$(CONFIG_XENFS) += xenfs/
+ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
+ obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
+diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
+new file mode 100644
+index 0000000..f4a0c51
+--- /dev/null
++++ b/drivers/xen/netback/Makefile
+@@ -0,0 +1,3 @@
++obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
++
++netbk-y := netback.o xenbus.o interface.o
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+new file mode 100644
+index 0000000..9a54d57
+--- /dev/null
++++ b/drivers/xen/netback/common.h
+@@ -0,0 +1,217 @@
++/******************************************************************************
++ * arch/xen/drivers/netif/backend/common.h
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#ifndef __NETIF__BACKEND__COMMON_H__
++#define __NETIF__BACKEND__COMMON_H__
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/slab.h>
++#include <linux/ip.h>
++#include <linux/in.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/wait.h>
++#include <xen/evtchn.h>
++#include <xen/interface/io/netif.h>
++#include <asm/io.h>
++#include <asm/pgalloc.h>
++#include <xen/interface/grant_table.h>
++#include <xen/gnttab.h>
++#include <xen/driver_util.h>
++#include <xen/xenbus.h>
++
++#define DPRINTK(_f, _a...) \
++ pr_debug("(file=%s, line=%d) " _f, \
++ __FILE__ , __LINE__ , ## _a )
++#define IPRINTK(fmt, args...) \
++ printk(KERN_INFO "xen_net: " fmt, ##args)
++#define WPRINTK(fmt, args...) \
++ printk(KERN_WARNING "xen_net: " fmt, ##args)
++
++typedef struct netif_st {
++ /* Unique identifier for this interface. */
++ domid_t domid;
++ unsigned int handle;
++
++ u8 fe_dev_addr[6];
++
++ /* Physical parameters of the comms window. */
++ grant_handle_t tx_shmem_handle;
++ grant_ref_t tx_shmem_ref;
++ grant_handle_t rx_shmem_handle;
++ grant_ref_t rx_shmem_ref;
++ unsigned int irq;
++
++ /* The shared rings and indexes. */
++ netif_tx_back_ring_t tx;
++ netif_rx_back_ring_t rx;
++ struct vm_struct *tx_comms_area;
++ struct vm_struct *rx_comms_area;
++
++ /* Set of features that can be turned on in dev->features. */
++ int features;
++
++ /* Internal feature information. */
++ u8 can_queue:1; /* can queue packets for receiver? */
++ u8 copying_receiver:1; /* copy packets to receiver? */
++
++ /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
++ RING_IDX rx_req_cons_peek;
++
++ /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
++ unsigned long credit_bytes;
++ unsigned long credit_usec;
++ unsigned long remaining_credit;
++ struct timer_list credit_timeout;
++
++ /* Enforce draining of the transmit queue. */
++ struct timer_list tx_queue_timeout;
++
++ /* Miscellaneous private stuff. */
++ struct list_head list; /* scheduling list */
++ atomic_t refcnt;
++ struct net_device *dev;
++ struct net_device_stats stats;
++
++ unsigned int carrier;
++
++ wait_queue_head_t waiting_to_free;
++} netif_t;
++
++/*
++ * Implement our own carrier flag: the network stack's version causes delays
++ * when the carrier is re-enabled (in particular, dev_activate() may not
++ * immediately be called, which can cause packet loss; also the etherbridge
++ * can be rather lazy in activating its port).
++ */
++#define netback_carrier_on(netif) ((netif)->carrier = 1)
++#define netback_carrier_off(netif) ((netif)->carrier = 0)
++#define netback_carrier_ok(netif) ((netif)->carrier)
++
++enum {
++ NETBK_DONT_COPY_SKB,
++ NETBK_DELAYED_COPY_SKB,
++ NETBK_ALWAYS_COPY_SKB,
++};
++
++extern int netbk_copy_skb_mode;
++
++/* Function pointers into netback accelerator plugin modules */
++struct netback_accel_hooks {
++ struct module *owner;
++ int (*probe)(struct xenbus_device *dev);
++ int (*remove)(struct xenbus_device *dev);
++};
++
++/* Structure to track the state of a netback accelerator plugin */
++struct netback_accelerator {
++ struct list_head link;
++ int id;
++ char *eth_name;
++ atomic_t use_count;
++ struct netback_accel_hooks *hooks;
++};
++
++struct backend_info {
++ struct xenbus_device *dev;
++ netif_t *netif;
++ enum xenbus_state frontend_state;
++
++ /* State relating to the netback accelerator */
++ void *netback_accel_priv;
++ /* The accelerator that this backend is currently using */
++ struct netback_accelerator *accelerator;
++};
++
++#define NETBACK_ACCEL_VERSION 0x00010001
++
++/*
++ * Connect an accelerator plugin module to netback. Returns zero on
++ * success, < 0 on error, > 0 (with highest version number supported)
++ * if version mismatch.
++ */
++extern int netback_connect_accelerator(unsigned version,
++ int id, const char *eth_name,
++ struct netback_accel_hooks *hooks);
++/* Disconnect a previously connected accelerator plugin module */
++extern void netback_disconnect_accelerator(int id, const char *eth_name);
++
++
++extern
++void netback_probe_accelerators(struct backend_info *be,
++ struct xenbus_device *dev);
++extern
++void netback_remove_accelerators(struct backend_info *be,
++ struct xenbus_device *dev);
++extern
++void netif_accel_init(void);
++
++
++#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
++#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
++
++void netif_disconnect(netif_t *netif);
++
++netif_t *netif_alloc(domid_t domid, unsigned int handle);
++int netif_map(netif_t *netif, unsigned long tx_ring_ref,
++ unsigned long rx_ring_ref, unsigned int evtchn);
++
++#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
++#define netif_put(_b) \
++ do { \
++ if ( atomic_dec_and_test(&(_b)->refcnt) ) \
++ wake_up(&(_b)->waiting_to_free); \
++ } while (0)
++
++void netif_xenbus_init(void);
++
++#define netif_schedulable(netif) \
++ (netif_running((netif)->dev) && netback_carrier_ok(netif))
++
++void netif_schedule_work(netif_t *netif);
++void netif_deschedule_work(netif_t *netif);
++
++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
++struct net_device_stats *netif_be_get_stats(struct net_device *dev);
++irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++
++static inline int netbk_can_queue(struct net_device *dev)
++{
++ netif_t *netif = netdev_priv(dev);
++ return netif->can_queue;
++}
++
++static inline int netbk_can_sg(struct net_device *dev)
++{
++ netif_t *netif = netdev_priv(dev);
++ return netif->features & NETIF_F_SG;
++}
++
++#endif /* __NETIF__BACKEND__COMMON_H__ */
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+new file mode 100644
+index 0000000..7e67941
+--- /dev/null
++++ b/drivers/xen/netback/interface.c
+@@ -0,0 +1,336 @@
++/******************************************************************************
++ * arch/xen/drivers/netif/backend/interface.c
++ *
++ * Network-device interface management.
++ *
++ * Copyright (c) 2004-2005, Keir Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++#include <linux/ethtool.h>
++#include <linux/rtnetlink.h>
++
++/*
++ * Module parameter 'queue_length':
++ *
++ * Enables queuing in the network stack when a client has run out of receive
++ * descriptors. Although this feature can improve receive bandwidth by avoiding
++ * packet loss, it can also result in packets sitting in the 'tx_queue' for
++ * unbounded time. This is bad if those packets hold onto foreign resources.
++ * For example, consider a packet that holds onto resources belonging to the
++ * guest for which it is queued (e.g., packet received on vif1.0, destined for
++ * vif1.1 which is not activated in the guest): in this situation the guest
++ * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
++ * run a timer (tx_queue_timeout) to drain the queue when the interface is
++ * blocked.
++ */
++static unsigned long netbk_queue_length = 32;
++module_param_named(queue_length, netbk_queue_length, ulong, 0);
++
++static void __netif_up(netif_t *netif)
++{
++ enable_irq(netif->irq);
++ netif_schedule_work(netif);
++}
++
++static void __netif_down(netif_t *netif)
++{
++ disable_irq(netif->irq);
++ netif_deschedule_work(netif);
++}
++
++static int net_open(struct net_device *dev)
++{
++ netif_t *netif = netdev_priv(dev);
++ if (netback_carrier_ok(netif)) {
++ __netif_up(netif);
++ netif_start_queue(dev);
++ }
++ return 0;
++}
++
++static int net_close(struct net_device *dev)
++{
++ netif_t *netif = netdev_priv(dev);
++ if (netback_carrier_ok(netif))
++ __netif_down(netif);
++ netif_stop_queue(dev);
++ return 0;
++}
++
++static int netbk_change_mtu(struct net_device *dev, int mtu)
++{
++ int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
++
++ if (mtu > max)
++ return -EINVAL;
++ dev->mtu = mtu;
++ return 0;
++}
++
++static int netbk_set_sg(struct net_device *dev, u32 data)
++{
++ if (data) {
++ netif_t *netif = netdev_priv(dev);
++
++ if (!(netif->features & NETIF_F_SG))
++ return -ENOSYS;
++ }
++
++ return ethtool_op_set_sg(dev, data);
++}
++
++static int netbk_set_tso(struct net_device *dev, u32 data)
++{
++ if (data) {
++ netif_t *netif = netdev_priv(dev);
++
++ if (!(netif->features & NETIF_F_TSO))
++ return -ENOSYS;
++ }
++
++ return ethtool_op_set_tso(dev, data);
++}
++
++static struct ethtool_ops network_ethtool_ops =
++{
++ .get_tx_csum = ethtool_op_get_tx_csum,
++ .set_tx_csum = ethtool_op_set_tx_csum,
++ .get_sg = ethtool_op_get_sg,
++ .set_sg = netbk_set_sg,
++ .get_tso = ethtool_op_get_tso,
++ .set_tso = netbk_set_tso,
++ .get_link = ethtool_op_get_link,
++};
++
++netif_t *netif_alloc(domid_t domid, unsigned int handle)
++{
++ int err = 0;
++ struct net_device *dev;
++ netif_t *netif;
++ char name[IFNAMSIZ] = {};
++
++ snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
++ dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
++ if (dev == NULL) {
++ DPRINTK("Could not create netif: out of memory\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ netif = netdev_priv(dev);
++ memset(netif, 0, sizeof(*netif));
++ netif->domid = domid;
++ netif->handle = handle;
++ atomic_set(&netif->refcnt, 1);
++ init_waitqueue_head(&netif->waiting_to_free);
++ netif->dev = dev;
++
++ netback_carrier_off(netif);
++
++ netif->credit_bytes = netif->remaining_credit = ~0UL;
++ netif->credit_usec = 0UL;
++ init_timer(&netif->credit_timeout);
++ /* Initialize 'expires' now: it's used to track the credit window. */
++ netif->credit_timeout.expires = jiffies;
++
++ init_timer(&netif->tx_queue_timeout);
++
++ dev->hard_start_xmit = netif_be_start_xmit;
++ dev->get_stats = netif_be_get_stats;
++ dev->open = net_open;
++ dev->stop = net_close;
++ dev->change_mtu = netbk_change_mtu;
++ dev->features = NETIF_F_IP_CSUM;
++
++ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
++
++ dev->tx_queue_len = netbk_queue_length;
++
++ /*
++ * Initialise a dummy MAC address. We choose the numerically
++ * largest non-broadcast address to prevent the address getting
++ * stolen by an Ethernet bridge for STP purposes.
++ * (FE:FF:FF:FF:FF:FF)
++ */
++ memset(dev->dev_addr, 0xFF, ETH_ALEN);
++ dev->dev_addr[0] &= ~0x01;
++
++ rtnl_lock();
++ err = register_netdevice(dev);
++ rtnl_unlock();
++ if (err) {
++ DPRINTK("Could not register new net device %s: err=%d\n",
++ dev->name, err);
++ free_netdev(dev);
++ return ERR_PTR(err);
++ }
++
++ DPRINTK("Successfully created netif\n");
++ return netif;
++}
++
++static int map_frontend_pages(
++ netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
++{
++ struct gnttab_map_grant_ref op;
++
++ gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
++ GNTMAP_host_map, tx_ring_ref, netif->domid);
++
++ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++ BUG();
++
++ if (op.status) {
++ DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
++ return op.status;
++ }
++
++ netif->tx_shmem_ref = tx_ring_ref;
++ netif->tx_shmem_handle = op.handle;
++
++ gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
++ GNTMAP_host_map, rx_ring_ref, netif->domid);
++
++ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++ BUG();
++
++ if (op.status) {
++ DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
++ return op.status;
++ }
++
++ netif->rx_shmem_ref = rx_ring_ref;
++ netif->rx_shmem_handle = op.handle;
++
++ return 0;
++}
++
++static void unmap_frontend_pages(netif_t *netif)
++{
++ struct gnttab_unmap_grant_ref op;
++
++ gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
++ GNTMAP_host_map, netif->tx_shmem_handle);
++
++ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++ BUG();
++
++ gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
++ GNTMAP_host_map, netif->rx_shmem_handle);
++
++ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++ BUG();
++}
++
++int netif_map(netif_t *netif, unsigned long tx_ring_ref,
++ unsigned long rx_ring_ref, unsigned int evtchn)
++{
++ int err = -ENOMEM;
++ netif_tx_sring_t *txs;
++ netif_rx_sring_t *rxs;
++
++ /* Already connected through? */
++ if (netif->irq)
++ return 0;
++
++ netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
++ if (netif->tx_comms_area == NULL)
++ return -ENOMEM;
++ netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
++ if (netif->rx_comms_area == NULL)
++ goto err_rx;
++
++ err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
++ if (err)
++ goto err_map;
++
++ err = bind_interdomain_evtchn_to_irqhandler(
++ netif->domid, evtchn, netif_be_int, 0,
++ netif->dev->name, netif);
++ if (err < 0)
++ goto err_hypervisor;
++ netif->irq = err;
++ disable_irq(netif->irq);
++
++ txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
++ BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
++
++ rxs = (netif_rx_sring_t *)
++ ((char *)netif->rx_comms_area->addr);
++ BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
++
++ netif->rx_req_cons_peek = 0;
++
++ netif_get(netif);
++
++ rtnl_lock();
++ netback_carrier_on(netif);
++ if (netif_running(netif->dev))
++ __netif_up(netif);
++ rtnl_unlock();
++
++ return 0;
++err_hypervisor:
++ unmap_frontend_pages(netif);
++err_map:
++ free_vm_area(netif->rx_comms_area);
++err_rx:
++ free_vm_area(netif->tx_comms_area);
++ return err;
++}
++
++void netif_disconnect(netif_t *netif)
++{
++ if (netback_carrier_ok(netif)) {
++ rtnl_lock();
++ netback_carrier_off(netif);
++ netif_carrier_off(netif->dev); /* discard queued packets */
++ if (netif_running(netif->dev))
++ __netif_down(netif);
++ rtnl_unlock();
++ netif_put(netif);
++ }
++
++ atomic_dec(&netif->refcnt);
++ wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
++
++ del_timer_sync(&netif->credit_timeout);
++ del_timer_sync(&netif->tx_queue_timeout);
++
++ if (netif->irq)
++ unbind_from_irqhandler(netif->irq, netif);
++
++ unregister_netdev(netif->dev);
++
++ if (netif->tx.sring) {
++ unmap_frontend_pages(netif);
++ free_vm_area(netif->tx_comms_area);
++ free_vm_area(netif->rx_comms_area);
++ }
++
++ free_netdev(netif->dev);
++}
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+new file mode 100644
+index 0000000..db629d4
+--- /dev/null
++++ b/drivers/xen/netback/netback.c
+@@ -0,0 +1,1637 @@
++/******************************************************************************
++ * drivers/xen/netback/netback.c
++ *
++ * Back-end of the driver for virtual network devices. This portion of the
++ * driver exports a 'unified' network-device interface that can be accessed
++ * by any operating system that implements a compatible front end. A
++ * reference front-end implementation can be found in:
++ * drivers/xen/netfront/netfront.c
++ *
++ * Copyright (c) 2002-2005, K A Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++#include <xen/balloon.h>
++#include <xen/interface/memory.h>
++
++/*define NETBE_DEBUG_INTERRUPT*/
++
++struct netbk_rx_meta {
++ skb_frag_t frag;
++ int id;
++ u8 copy:1;
++};
++
++struct netbk_tx_pending_inuse {
++ struct list_head list;
++ unsigned long alloc_time;
++};
++
++static void netif_idx_release(u16 pending_idx);
++static void make_tx_response(netif_t *netif,
++ netif_tx_request_t *txp,
++ s8 st);
++static netif_rx_response_t *make_rx_response(netif_t *netif,
++ u16 id,
++ s8 st,
++ u16 offset,
++ u16 size,
++ u16 flags);
++
++static void net_tx_action(unsigned long unused);
++static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
++
++static void net_rx_action(unsigned long unused);
++static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
++
++static struct timer_list net_timer;
++static struct timer_list netbk_tx_pending_timer;
++
++#define MAX_PENDING_REQS 256
++
++static struct sk_buff_head rx_queue;
++
++static struct page **mmap_pages;
++static inline unsigned long idx_to_pfn(unsigned int idx)
++{
++ return page_to_pfn(mmap_pages[idx]);
++}
++
++static inline unsigned long idx_to_kaddr(unsigned int idx)
++{
++ return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
++}
++
++/* extra field used in struct page */
++static inline void netif_set_page_index(struct page *pg, unsigned int index)
++{
++ *(unsigned long *)&pg->mapping = index;
++}
++
++static inline int netif_page_index(struct page *pg)
++{
++ unsigned long idx = (unsigned long)pg->mapping;
++
++ if (!PageForeign(pg))
++ return -1;
++
++ if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg))
++ return -1;
++
++ return idx;
++}
++
++#define PKT_PROT_LEN 64
++
++static struct pending_tx_info {
++ netif_tx_request_t req;
++ netif_t *netif;
++} pending_tx_info[MAX_PENDING_REQS];
++static u16 pending_ring[MAX_PENDING_REQS];
++typedef unsigned int PEND_RING_IDX;
++#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
++static PEND_RING_IDX pending_prod, pending_cons;
++#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
++
++/* Freed TX SKBs get batched on this ring before return to pending_ring. */
++static u16 dealloc_ring[MAX_PENDING_REQS];
++static PEND_RING_IDX dealloc_prod, dealloc_cons;
++
++/* Doubly-linked list of in-use pending entries. */
++static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
++static LIST_HEAD(pending_inuse_head);
++
++static struct sk_buff_head tx_queue;
++
++static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
++static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
++static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
++
++static struct list_head net_schedule_list;
++static spinlock_t net_schedule_list_lock;
++
++#define MAX_MFN_ALLOC 64
++static unsigned long mfn_list[MAX_MFN_ALLOC];
++static unsigned int alloc_index = 0;
++
++/* Setting this allows the safe use of this driver without netloop. */
++static int MODPARM_copy_skb = 1;
++module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
++MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
++
++int netbk_copy_skb_mode;
++
++static inline unsigned long alloc_mfn(void)
++{
++ BUG_ON(alloc_index == 0);
++ return mfn_list[--alloc_index];
++}
++
++static int check_mfn(int nr)
++{
++ struct xen_memory_reservation reservation = {
++ .extent_order = 0,
++ .domid = DOMID_SELF
++ };
++ int rc;
++
++ if (likely(alloc_index >= nr))
++ return 0;
++
++ set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
++ reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
++ rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
++ if (likely(rc > 0))
++ alloc_index += rc;
++
++ return alloc_index >= nr ? 0 : -ENOMEM;
++}
++
++static inline void maybe_schedule_tx_action(void)
++{
++ smp_mb();
++ if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
++ !list_empty(&net_schedule_list))
++ tasklet_schedule(&net_tx_tasklet);
++}
++
++static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
++{
++ struct skb_shared_info *ninfo;
++ struct sk_buff *nskb;
++ unsigned long offset;
++ int ret;
++ int len;
++ int headlen;
++
++ BUG_ON(skb_shinfo(skb)->frag_list != NULL);
++
++ nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
++ if (unlikely(!nskb))
++ goto err;
++
++ skb_reserve(nskb, 16 + NET_IP_ALIGN);
++ headlen = nskb->end - nskb->data;
++ if (headlen > skb_headlen(skb))
++ headlen = skb_headlen(skb);
++ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
++ BUG_ON(ret);
++
++ ninfo = skb_shinfo(nskb);
++ ninfo->gso_size = skb_shinfo(skb)->gso_size;
++ ninfo->gso_type = skb_shinfo(skb)->gso_type;
++
++ offset = headlen;
++ len = skb->len - headlen;
++
++ nskb->len = skb->len;
++ nskb->data_len = len;
++ nskb->truesize += len;
++
++ while (len) {
++ struct page *page;
++ int copy;
++ int zero;
++
++ if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
++ dump_stack();
++ goto err_free;
++ }
++
++ copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
++ zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
++
++ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
++ if (unlikely(!page))
++ goto err_free;
++
++ ret = skb_copy_bits(skb, offset, page_address(page), copy);
++ BUG_ON(ret);
++
++ ninfo->frags[ninfo->nr_frags].page = page;
++ ninfo->frags[ninfo->nr_frags].page_offset = 0;
++ ninfo->frags[ninfo->nr_frags].size = copy;
++ ninfo->nr_frags++;
++
++ offset += copy;
++ len -= copy;
++ }
++
++ offset = nskb->data - skb->data;
++
++ nskb->h.raw = skb->h.raw + offset;
++ nskb->nh.raw = skb->nh.raw + offset;
++ nskb->mac.raw = skb->mac.raw + offset;
++
++ return nskb;
++
++ err_free:
++ kfree_skb(nskb);
++ err:
++ return NULL;
++}
++
++static inline int netbk_max_required_rx_slots(netif_t *netif)
++{
++ if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
++ return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
++ return 1; /* all in one */
++}
++
++static inline int netbk_queue_full(netif_t *netif)
++{
++ RING_IDX peek = netif->rx_req_cons_peek;
++ RING_IDX needed = netbk_max_required_rx_slots(netif);
++
++ return ((netif->rx.sring->req_prod - peek) < needed) ||
++ ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
++}
++
++static void tx_queue_callback(unsigned long data)
++{
++ netif_t *netif = (netif_t *)data;
++ if (netif_schedulable(netif))
++ netif_wake_queue(netif->dev);
++}
++
++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++ netif_t *netif = netdev_priv(dev);
++
++ BUG_ON(skb->dev != dev);
++
++ /* Drop the packet if the target domain has no receive buffers. */
++ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
++ goto drop;
++
++ /*
++ * Copy the packet here if it's destined for a flipping interface
++ * but isn't flippable (e.g. extra references to data).
++ * XXX For now we also copy skbuffs whose head crosses a page
++ * boundary, because netbk_gop_skb can't handle them.
++ */
++ if (!netif->copying_receiver ||
++ ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
++ struct sk_buff *nskb = netbk_copy_skb(skb);
++ if ( unlikely(nskb == NULL) )
++ goto drop;
++ /* Copy only the header fields we use in this driver. */
++ nskb->dev = skb->dev;
++ nskb->ip_summed = skb->ip_summed;
++ nskb->proto_data_valid = skb->proto_data_valid;
++ dev_kfree_skb(skb);
++ skb = nskb;
++ }
++
++ netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
++ !!skb_shinfo(skb)->gso_size;
++ netif_get(netif);
++
++ if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
++ netif->rx.sring->req_event = netif->rx_req_cons_peek +
++ netbk_max_required_rx_slots(netif);
++ mb(); /* request notification /then/ check & stop the queue */
++ if (netbk_queue_full(netif)) {
++ netif_stop_queue(dev);
++ /*
++ * Schedule 500ms timeout to restart the queue, thus
++ * ensuring that an inactive queue will be drained.
++ * Packets will be immediately be dropped until more
++ * receive buffers become available (see
++ * netbk_queue_full() check above).
++ */
++ netif->tx_queue_timeout.data = (unsigned long)netif;
++ netif->tx_queue_timeout.function = tx_queue_callback;
++ __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
++ }
++ }
++
++ skb_queue_tail(&rx_queue, skb);
++ tasklet_schedule(&net_rx_tasklet);
++
++ return 0;
++
++ drop:
++ netif->stats.tx_dropped++;
++ dev_kfree_skb(skb);
++ return 0;
++}
++
++#if 0
++static void xen_network_done_notify(void)
++{
++ static struct net_device *eth0_dev = NULL;
++ if (unlikely(eth0_dev == NULL))
++ eth0_dev = __dev_get_by_name("eth0");
++ netif_rx_schedule(eth0_dev);
++}
++/*
++ * Add following to poll() function in NAPI driver (Tigon3 is example):
++ * if ( xen_network_done() )
++ * tg3_enable_ints(tp);
++ */
++int xen_network_done(void)
++{
++ return skb_queue_empty(&rx_queue);
++}
++#endif
++
++struct netrx_pending_operations {
++ unsigned trans_prod, trans_cons;
++ unsigned mmu_prod, mmu_mcl;
++ unsigned mcl_prod, mcl_cons;
++ unsigned copy_prod, copy_cons;
++ unsigned meta_prod, meta_cons;
++ mmu_update_t *mmu;
++ gnttab_transfer_t *trans;
++ gnttab_copy_t *copy;
++ multicall_entry_t *mcl;
++ struct netbk_rx_meta *meta;
++};
++
++/* Set up the grant operations for this fragment. If it's a flipping
++ interface, we also set up the unmap request from here. */
++static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
++ int i, struct netrx_pending_operations *npo,
++ struct page *page, unsigned long size,
++ unsigned long offset)
++{
++ mmu_update_t *mmu;
++ gnttab_transfer_t *gop;
++ gnttab_copy_t *copy_gop;
++ multicall_entry_t *mcl;
++ netif_rx_request_t *req;
++ unsigned long old_mfn, new_mfn;
++ int idx = netif_page_index(page);
++
++ old_mfn = virt_to_mfn(page_address(page));
++
++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
++ if (netif->copying_receiver) {
++ /* The fragment needs to be copied rather than
++ flipped. */
++ meta->copy = 1;
++ copy_gop = npo->copy + npo->copy_prod++;
++ copy_gop->flags = GNTCOPY_dest_gref;
++ if (idx > -1) {
++ struct pending_tx_info *src_pend = &pending_tx_info[idx];
++ copy_gop->source.domid = src_pend->netif->domid;
++ copy_gop->source.u.ref = src_pend->req.gref;
++ copy_gop->flags |= GNTCOPY_source_gref;
++ } else {
++ copy_gop->source.domid = DOMID_SELF;
++ copy_gop->source.u.gmfn = old_mfn;
++ }
++ copy_gop->source.offset = offset;
++ copy_gop->dest.domid = netif->domid;
++ copy_gop->dest.offset = 0;
++ copy_gop->dest.u.ref = req->gref;
++ copy_gop->len = size;
++ } else {
++ meta->copy = 0;
++ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++ new_mfn = alloc_mfn();
++
++ /*
++ * Set the new P2M table entry before
++ * reassigning the old data page. Heed the
++ * comment in pgtable-2level.h:pte_page(). :-)
++ */
++ set_phys_to_machine(page_to_pfn(page), new_mfn);
++
++ mcl = npo->mcl + npo->mcl_prod++;
++ MULTI_update_va_mapping(mcl,
++ (unsigned long)page_address(page),
++ pfn_pte_ma(new_mfn, PAGE_KERNEL),
++ 0);
++
++ mmu = npo->mmu + npo->mmu_prod++;
++ mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
++ MMU_MACHPHYS_UPDATE;
++ mmu->val = page_to_pfn(page);
++ }
++
++ gop = npo->trans + npo->trans_prod++;
++ gop->mfn = old_mfn;
++ gop->domid = netif->domid;
++ gop->ref = req->gref;
++ }
++ return req->id;
++}
++
++static void netbk_gop_skb(struct sk_buff *skb,
++ struct netrx_pending_operations *npo)
++{
++ netif_t *netif = netdev_priv(skb->dev);
++ int nr_frags = skb_shinfo(skb)->nr_frags;
++ int i;
++ int extra;
++ struct netbk_rx_meta *head_meta, *meta;
++
++ head_meta = npo->meta + npo->meta_prod++;
++ head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
++ head_meta->frag.size = skb_shinfo(skb)->gso_size;
++ extra = !!head_meta->frag.size + 1;
++
++ for (i = 0; i < nr_frags; i++) {
++ meta = npo->meta + npo->meta_prod++;
++ meta->frag = skb_shinfo(skb)->frags[i];
++ meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
++ meta->frag.page,
++ meta->frag.size,
++ meta->frag.page_offset);
++ }
++
++ /*
++ * This must occur at the end to ensure that we don't trash skb_shinfo
++ * until we're done. We know that the head doesn't cross a page
++ * boundary because such packets get copied in netif_be_start_xmit.
++ */
++ head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
++ virt_to_page(skb->data),
++ skb_headlen(skb),
++ offset_in_page(skb->data));
++
++ netif->rx.req_cons += nr_frags + extra;
++}
++
++static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
++{
++ int i;
++
++ for (i = 0; i < nr_frags; i++)
++ put_page(meta[i].frag.page);
++}
++
++/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
++ used to set up the operations on the top of
++ netrx_pending_operations, which have since been done. Check that
++ they didn't give any errors and advance over them. */
++static int netbk_check_gop(int nr_frags, domid_t domid,
++ struct netrx_pending_operations *npo)
++{
++ multicall_entry_t *mcl;
++ gnttab_transfer_t *gop;
++ gnttab_copy_t *copy_op;
++ int status = NETIF_RSP_OKAY;
++ int i;
++
++ for (i = 0; i <= nr_frags; i++) {
++ if (npo->meta[npo->meta_cons + i].copy) {
++ copy_op = npo->copy + npo->copy_cons++;
++ if (copy_op->status != GNTST_okay) {
++ DPRINTK("Bad status %d from copy to DOM%d.\n",
++ copy_op->status, domid);
++ status = NETIF_RSP_ERROR;
++ }
++ } else {
++ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++ mcl = npo->mcl + npo->mcl_cons++;
++ /* The update_va_mapping() must not fail. */
++ BUG_ON(mcl->result != 0);
++ }
++
++ gop = npo->trans + npo->trans_cons++;
++ /* Check the reassignment error code. */
++ if (gop->status != 0) {
++ DPRINTK("Bad status %d from grant transfer to DOM%u\n",
++ gop->status, domid);
++ /*
++ * Page no longer belongs to us unless
++ * GNTST_bad_page, but that should be
++ * a fatal error anyway.
++ */
++ BUG_ON(gop->status == GNTST_bad_page);
++ status = NETIF_RSP_ERROR;
++ }
++ }
++ }
++
++ return status;
++}
++
++static void netbk_add_frag_responses(netif_t *netif, int status,
++ struct netbk_rx_meta *meta, int nr_frags)
++{
++ int i;
++ unsigned long offset;
++
++ for (i = 0; i < nr_frags; i++) {
++ int id = meta[i].id;
++ int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
++
++ if (meta[i].copy)
++ offset = 0;
++ else
++ offset = meta[i].frag.page_offset;
++ make_rx_response(netif, id, status, offset,
++ meta[i].frag.size, flags);
++ }
++}
++
++static void net_rx_action(unsigned long unused)
++{
++ netif_t *netif = NULL;
++ s8 status;
++ u16 id, irq, flags;
++ netif_rx_response_t *resp;
++ multicall_entry_t *mcl;
++ struct sk_buff_head rxq;
++ struct sk_buff *skb;
++ int notify_nr = 0;
++ int ret;
++ int nr_frags;
++ int count;
++ unsigned long offset;
++
++ /*
++ * Putting hundreds of bytes on the stack is considered rude.
++ * Static works because a tasklet can only be on one CPU at any time.
++ */
++ static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
++ static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
++ static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
++ static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
++ static unsigned char rx_notify[NR_IRQS];
++ static u16 notify_list[NET_RX_RING_SIZE];
++ static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++
++ struct netrx_pending_operations npo = {
++ mmu: rx_mmu,
++ trans: grant_trans_op,
++ copy: grant_copy_op,
++ mcl: rx_mcl,
++ meta: meta};
++
++ skb_queue_head_init(&rxq);
++
++ count = 0;
++
++ while ((skb = skb_dequeue(&rx_queue)) != NULL) {
++ nr_frags = skb_shinfo(skb)->nr_frags;
++ *(int *)skb->cb = nr_frags;
++
++ if (!xen_feature(XENFEAT_auto_translated_physmap) &&
++ !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
++ check_mfn(nr_frags + 1)) {
++ /* Memory squeeze? Back off for an arbitrary while. */
++ if ( net_ratelimit() )
++ WPRINTK("Memory squeeze in netback "
++ "driver.\n");
++ mod_timer(&net_timer, jiffies + HZ);
++ skb_queue_head(&rx_queue, skb);
++ break;
++ }
++
++ netbk_gop_skb(skb, &npo);
++
++ count += nr_frags + 1;
++
++ __skb_queue_tail(&rxq, skb);
++
++ /* Filled the batch queue? */
++ if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
++ break;
++ }
++
++ BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
++
++ npo.mmu_mcl = npo.mcl_prod;
++ if (npo.mcl_prod) {
++ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
++ BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
++ mcl = npo.mcl + npo.mcl_prod++;
++
++ BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
++ mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
++
++ mcl->op = __HYPERVISOR_mmu_update;
++ mcl->args[0] = (unsigned long)rx_mmu;
++ mcl->args[1] = npo.mmu_prod;
++ mcl->args[2] = 0;
++ mcl->args[3] = DOMID_SELF;
++ }
++
++ if (npo.trans_prod) {
++ BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
++ mcl = npo.mcl + npo.mcl_prod++;
++ mcl->op = __HYPERVISOR_grant_table_op;
++ mcl->args[0] = GNTTABOP_transfer;
++ mcl->args[1] = (unsigned long)grant_trans_op;
++ mcl->args[2] = npo.trans_prod;
++ }
++
++ if (npo.copy_prod) {
++ BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
++ mcl = npo.mcl + npo.mcl_prod++;
++ mcl->op = __HYPERVISOR_grant_table_op;
++ mcl->args[0] = GNTTABOP_copy;
++ mcl->args[1] = (unsigned long)grant_copy_op;
++ mcl->args[2] = npo.copy_prod;
++ }
++
++ /* Nothing to do? */
++ if (!npo.mcl_prod)
++ return;
++
++ BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
++
++ ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
++ BUG_ON(ret != 0);
++ /* The mmu_machphys_update() must not fail. */
++ BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
++
++ while ((skb = __skb_dequeue(&rxq)) != NULL) {
++ nr_frags = *(int *)skb->cb;
++
++ netif = netdev_priv(skb->dev);
++ /* We can't rely on skb_release_data to release the
++ pages used by fragments for us, since it tries to
++ touch the pages in the fraglist. If we're in
++ flipping mode, that doesn't work. In copying mode,
++ we still have access to all of the pages, and so
++ it's safe to let release_data deal with it. */
++ /* (Freeing the fragments is safe since we copy
++ non-linear skbs destined for flipping interfaces) */
++ if (!netif->copying_receiver) {
++ atomic_set(&(skb_shinfo(skb)->dataref), 1);
++ skb_shinfo(skb)->frag_list = NULL;
++ skb_shinfo(skb)->nr_frags = 0;
++ netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
++ }
++
++ netif->stats.tx_bytes += skb->len;
++ netif->stats.tx_packets++;
++
++ status = netbk_check_gop(nr_frags, netif->domid, &npo);
++
++ id = meta[npo.meta_cons].id;
++ flags = nr_frags ? NETRXF_more_data : 0;
++
++ if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
++ flags |= NETRXF_csum_blank | NETRXF_data_validated;
++ else if (skb->proto_data_valid) /* remote but checksummed? */
++ flags |= NETRXF_data_validated;
++
++ if (meta[npo.meta_cons].copy)
++ offset = 0;
++ else
++ offset = offset_in_page(skb->data);
++ resp = make_rx_response(netif, id, status, offset,
++ skb_headlen(skb), flags);
++
++ if (meta[npo.meta_cons].frag.size) {
++ struct netif_extra_info *gso =
++ (struct netif_extra_info *)
++ RING_GET_RESPONSE(&netif->rx,
++ netif->rx.rsp_prod_pvt++);
++
++ resp->flags |= NETRXF_extra_info;
++
++ gso->u.gso.size = meta[npo.meta_cons].frag.size;
++ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
++ gso->u.gso.pad = 0;
++ gso->u.gso.features = 0;
++
++ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
++ gso->flags = 0;
++ }
++
++ netbk_add_frag_responses(netif, status,
++ meta + npo.meta_cons + 1,
++ nr_frags);
++
++ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
++ irq = netif->irq;
++ if (ret && !rx_notify[irq]) {
++ rx_notify[irq] = 1;
++ notify_list[notify_nr++] = irq;
++ }
++
++ if (netif_queue_stopped(netif->dev) &&
++ netif_schedulable(netif) &&
++ !netbk_queue_full(netif))
++ netif_wake_queue(netif->dev);
++
++ netif_put(netif);
++ dev_kfree_skb(skb);
++ npo.meta_cons += nr_frags + 1;
++ }
++
++ while (notify_nr != 0) {
++ irq = notify_list[--notify_nr];
++ rx_notify[irq] = 0;
++ notify_remote_via_irq(irq);
++ }
++
++ /* More work to do? */
++ if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
++ tasklet_schedule(&net_rx_tasklet);
++#if 0
++ else
++ xen_network_done_notify();
++#endif
++}
++
++static void net_alarm(unsigned long unused)
++{
++ tasklet_schedule(&net_rx_tasklet);
++}
++
++static void netbk_tx_pending_timeout(unsigned long unused)
++{
++ tasklet_schedule(&net_tx_tasklet);
++}
++
++struct net_device_stats *netif_be_get_stats(struct net_device *dev)
++{
++ netif_t *netif = netdev_priv(dev);
++ return &netif->stats;
++}
++
++static int __on_net_schedule_list(netif_t *netif)
++{
++ return netif->list.next != NULL;
++}
++
++static void remove_from_net_schedule_list(netif_t *netif)
++{
++ spin_lock_irq(&net_schedule_list_lock);
++ if (likely(__on_net_schedule_list(netif))) {
++ list_del(&netif->list);
++ netif->list.next = NULL;
++ netif_put(netif);
++ }
++ spin_unlock_irq(&net_schedule_list_lock);
++}
++
++static void add_to_net_schedule_list_tail(netif_t *netif)
++{
++ if (__on_net_schedule_list(netif))
++ return;
++
++ spin_lock_irq(&net_schedule_list_lock);
++ if (!__on_net_schedule_list(netif) &&
++ likely(netif_schedulable(netif))) {
++ list_add_tail(&netif->list, &net_schedule_list);
++ netif_get(netif);
++ }
++ spin_unlock_irq(&net_schedule_list_lock);
++}
++
++/*
++ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
++ * If this driver is pipelining transmit requests then we can be very
++ * aggressive in avoiding new-packet notifications -- frontend only needs to
++ * send a notification if there are no outstanding unreceived responses.
++ * If we may be buffer transmit buffers for any reason then we must be rather
++ * more conservative and treat this as the final check for pending work.
++ */
++void netif_schedule_work(netif_t *netif)
++{
++ int more_to_do;
++
++#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
++ more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
++#else
++ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
++#endif
++
++ if (more_to_do) {
++ add_to_net_schedule_list_tail(netif);
++ maybe_schedule_tx_action();
++ }
++}
++
++void netif_deschedule_work(netif_t *netif)
++{
++ remove_from_net_schedule_list(netif);
++}
++
++
++static void tx_add_credit(netif_t *netif)
++{
++ unsigned long max_burst, max_credit;
++
++ /*
++ * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
++ * Otherwise the interface can seize up due to insufficient credit.
++ */
++ max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
++ max_burst = min(max_burst, 131072UL);
++ max_burst = max(max_burst, netif->credit_bytes);
++
++ /* Take care that adding a new chunk of credit doesn't wrap to zero. */
++ max_credit = netif->remaining_credit + netif->credit_bytes;
++ if (max_credit < netif->remaining_credit)
++ max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
++
++ netif->remaining_credit = min(max_credit, max_burst);
++}
++
++static void tx_credit_callback(unsigned long data)
++{
++ netif_t *netif = (netif_t *)data;
++ tx_add_credit(netif);
++ netif_schedule_work(netif);
++}
++
++static inline int copy_pending_req(PEND_RING_IDX pending_idx)
++{
++ return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
++ &mmap_pages[pending_idx]);
++}
++
++inline static void net_tx_action_dealloc(void)
++{
++ struct netbk_tx_pending_inuse *inuse, *n;
++ gnttab_unmap_grant_ref_t *gop;
++ u16 pending_idx;
++ PEND_RING_IDX dc, dp;
++ netif_t *netif;
++ int ret;
++ LIST_HEAD(list);
++
++ dc = dealloc_cons;
++ gop = tx_unmap_ops;
++
++ /*
++ * Free up any grants we have finished using
++ */
++ do {
++ dp = dealloc_prod;
++
++ /* Ensure we see all indices enqueued by netif_idx_release(). */
++ smp_rmb();
++
++ while (dc != dp) {
++ unsigned long pfn;
++
++ pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
++ list_move_tail(&pending_inuse[pending_idx].list, &list);
++
++ pfn = idx_to_pfn(pending_idx);
++ /* Already unmapped? */
++ if (!phys_to_machine_mapping_valid(pfn))
++ continue;
++
++ gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
++ GNTMAP_host_map,
++ grant_tx_handle[pending_idx]);
++ gop++;
++ }
++
++ if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
++ list_empty(&pending_inuse_head))
++ break;
++
++ /* Copy any entries that have been pending for too long. */
++ list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
++ if (time_after(inuse->alloc_time + HZ / 2, jiffies))
++ break;
++
++ switch (copy_pending_req(inuse - pending_inuse)) {
++ case 0:
++ list_move_tail(&inuse->list, &list);
++ continue;
++ case -EBUSY:
++ list_del_init(&inuse->list);
++ continue;
++ case -ENOENT:
++ continue;
++ }
++
++ break;
++ }
++ } while (dp != dealloc_prod);
++
++ dealloc_cons = dc;
++
++ ret = HYPERVISOR_grant_table_op(
++ GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
++ BUG_ON(ret);
++
++ list_for_each_entry_safe(inuse, n, &list, list) {
++ pending_idx = inuse - pending_inuse;
++
++ netif = pending_tx_info[pending_idx].netif;
++
++ make_tx_response(netif, &pending_tx_info[pending_idx].req,
++ NETIF_RSP_OKAY);
++
++ /* Ready for next use. */
++ gnttab_reset_grant_page(mmap_pages[pending_idx]);
++
++ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++
++ netif_put(netif);
++
++ list_del_init(&inuse->list);
++ }
++}
++
++static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
++{
++ RING_IDX cons = netif->tx.req_cons;
++
++ do {
++ make_tx_response(netif, txp, NETIF_RSP_ERROR);
++ if (cons >= end)
++ break;
++ txp = RING_GET_REQUEST(&netif->tx, cons++);
++ } while (1);
++ netif->tx.req_cons = cons;
++ netif_schedule_work(netif);
++ netif_put(netif);
++}
++
++static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
++ netif_tx_request_t *txp, int work_to_do)
++{
++ RING_IDX cons = netif->tx.req_cons;
++ int frags = 0;
++
++ if (!(first->flags & NETTXF_more_data))
++ return 0;
++
++ do {
++ if (frags >= work_to_do) {
++ DPRINTK("Need more frags\n");
++ return -frags;
++ }
++
++ if (unlikely(frags >= MAX_SKB_FRAGS)) {
++ DPRINTK("Too many frags\n");
++ return -frags;
++ }
++
++ memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
++ sizeof(*txp));
++ if (txp->size > first->size) {
++ DPRINTK("Frags galore\n");
++ return -frags;
++ }
++
++ first->size -= txp->size;
++ frags++;
++
++ if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
++ DPRINTK("txp->offset: %x, size: %u\n",
++ txp->offset, txp->size);
++ return -frags;
++ }
++ } while ((txp++)->flags & NETTXF_more_data);
++
++ return frags;
++}
++
++static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
++ struct sk_buff *skb,
++ netif_tx_request_t *txp,
++ gnttab_map_grant_ref_t *mop)
++{
++ struct skb_shared_info *shinfo = skb_shinfo(skb);
++ skb_frag_t *frags = shinfo->frags;
++ unsigned long pending_idx = *((u16 *)skb->data);
++ int i, start;
++
++ /* Skip first skb fragment if it is on same page as header fragment. */
++ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
++
++ for (i = start; i < shinfo->nr_frags; i++, txp++) {
++ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
++
++ gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
++ GNTMAP_host_map | GNTMAP_readonly,
++ txp->gref, netif->domid);
++
++ memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
++ netif_get(netif);
++ pending_tx_info[pending_idx].netif = netif;
++ frags[i].page = (void *)pending_idx;
++ }
++
++ return mop;
++}
++
++static int netbk_tx_check_mop(struct sk_buff *skb,
++ gnttab_map_grant_ref_t **mopp)
++{
++ gnttab_map_grant_ref_t *mop = *mopp;
++ int pending_idx = *((u16 *)skb->data);
++ netif_t *netif = pending_tx_info[pending_idx].netif;
++ netif_tx_request_t *txp;
++ struct skb_shared_info *shinfo = skb_shinfo(skb);
++ int nr_frags = shinfo->nr_frags;
++ int i, err, start;
++
++ /* Check status of header. */
++ err = mop->status;
++ if (unlikely(err)) {
++ txp = &pending_tx_info[pending_idx].req;
++ make_tx_response(netif, txp, NETIF_RSP_ERROR);
++ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++ netif_put(netif);
++ } else {
++ set_phys_to_machine(
++ __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
++ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
++ grant_tx_handle[pending_idx] = mop->handle;
++ }
++
++ /* Skip first skb fragment if it is on same page as header fragment. */
++ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
++
++ for (i = start; i < nr_frags; i++) {
++ int j, newerr;
++
++ pending_idx = (unsigned long)shinfo->frags[i].page;
++
++ /* Check error status: if okay then remember grant handle. */
++ newerr = (++mop)->status;
++ if (likely(!newerr)) {
++ set_phys_to_machine(
++ __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
++ FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
++ grant_tx_handle[pending_idx] = mop->handle;
++ /* Had a previous error? Invalidate this fragment. */
++ if (unlikely(err))
++ netif_idx_release(pending_idx);
++ continue;
++ }
++
++ /* Error on this fragment: respond to client with an error. */
++ txp = &pending_tx_info[pending_idx].req;
++ make_tx_response(netif, txp, NETIF_RSP_ERROR);
++ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++ netif_put(netif);
++
++ /* Not the first error? Preceding frags already invalidated. */
++ if (err)
++ continue;
++
++ /* First error: invalidate header and preceding fragments. */
++ pending_idx = *((u16 *)skb->data);
++ netif_idx_release(pending_idx);
++ for (j = start; j < i; j++) {
++ pending_idx = (unsigned long)shinfo->frags[i].page;
++ netif_idx_release(pending_idx);
++ }
++
++ /* Remember the error: invalidate all subsequent fragments. */
++ err = newerr;
++ }
++
++ *mopp = mop + 1;
++ return err;
++}
++
++static void netbk_fill_frags(struct sk_buff *skb)
++{
++ struct skb_shared_info *shinfo = skb_shinfo(skb);
++ int nr_frags = shinfo->nr_frags;
++ int i;
++
++ for (i = 0; i < nr_frags; i++) {
++ skb_frag_t *frag = shinfo->frags + i;
++ netif_tx_request_t *txp;
++ unsigned long pending_idx;
++
++ pending_idx = (unsigned long)frag->page;
++
++ pending_inuse[pending_idx].alloc_time = jiffies;
++ list_add_tail(&pending_inuse[pending_idx].list,
++ &pending_inuse_head);
++
++ txp = &pending_tx_info[pending_idx].req;
++ frag->page = virt_to_page(idx_to_kaddr(pending_idx));
++ frag->size = txp->size;
++ frag->page_offset = txp->offset;
++
++ skb->len += txp->size;
++ skb->data_len += txp->size;
++ skb->truesize += txp->size;
++ }
++}
++
++int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
++ int work_to_do)
++{
++ struct netif_extra_info extra;
++ RING_IDX cons = netif->tx.req_cons;
++
++ do {
++ if (unlikely(work_to_do-- <= 0)) {
++ DPRINTK("Missing extra info\n");
++ return -EBADR;
++ }
++
++ memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
++ sizeof(extra));
++ if (unlikely(!extra.type ||
++ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
++ netif->tx.req_cons = ++cons;
++ DPRINTK("Invalid extra type: %d\n", extra.type);
++ return -EINVAL;
++ }
++
++ memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
++ netif->tx.req_cons = ++cons;
++ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
++
++ return work_to_do;
++}
++
++static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
++{
++ if (!gso->u.gso.size) {
++ DPRINTK("GSO size must not be zero.\n");
++ return -EINVAL;
++ }
++
++ /* Currently only TCPv4 S.O. is supported. */
++ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
++ DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
++ return -EINVAL;
++ }
++
++ skb_shinfo(skb)->gso_size = gso->u.gso.size;
++ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
++
++ /* Header must be checked, and gso_segs computed. */
++ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
++ skb_shinfo(skb)->gso_segs = 0;
++
++ return 0;
++}
++
++/* Called after netfront has transmitted */
++static void net_tx_action(unsigned long unused)
++{
++ struct list_head *ent;
++ struct sk_buff *skb;
++ netif_t *netif;
++ netif_tx_request_t txreq;
++ netif_tx_request_t txfrags[MAX_SKB_FRAGS];
++ struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
++ u16 pending_idx;
++ RING_IDX i;
++ gnttab_map_grant_ref_t *mop;
++ unsigned int data_len;
++ int ret, work_to_do;
++
++ if (dealloc_cons != dealloc_prod)
++ net_tx_action_dealloc();
++
++ mop = tx_map_ops;
++ while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++ !list_empty(&net_schedule_list)) {
++ /* Get a netif from the list with work to do. */
++ ent = net_schedule_list.next;
++ netif = list_entry(ent, netif_t, list);
++ netif_get(netif);
++ remove_from_net_schedule_list(netif);
++
++ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
++ if (!work_to_do) {
++ netif_put(netif);
++ continue;
++ }
++
++ i = netif->tx.req_cons;
++ rmb(); /* Ensure that we see the request before we copy it. */
++ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
++
++ /* Credit-based scheduling. */
++ if (txreq.size > netif->remaining_credit) {
++ unsigned long now = jiffies;
++ unsigned long next_credit =
++ netif->credit_timeout.expires +
++ msecs_to_jiffies(netif->credit_usec / 1000);
++
++ /* Timer could already be pending in rare cases. */
++ if (timer_pending(&netif->credit_timeout)) {
++ netif_put(netif);
++ continue;
++ }
++
++ /* Passed the point where we can replenish credit? */
++ if (time_after_eq(now, next_credit)) {
++ netif->credit_timeout.expires = now;
++ tx_add_credit(netif);
++ }
++
++ /* Still too big to send right now? Set a callback. */
++ if (txreq.size > netif->remaining_credit) {
++ netif->credit_timeout.data =
++ (unsigned long)netif;
++ netif->credit_timeout.function =
++ tx_credit_callback;
++ __mod_timer(&netif->credit_timeout,
++ next_credit);
++ netif_put(netif);
++ continue;
++ }
++ }
++ netif->remaining_credit -= txreq.size;
++
++ work_to_do--;
++ netif->tx.req_cons = ++i;
++
++ memset(extras, 0, sizeof(extras));
++ if (txreq.flags & NETTXF_extra_info) {
++ work_to_do = netbk_get_extras(netif, extras,
++ work_to_do);
++ i = netif->tx.req_cons;
++ if (unlikely(work_to_do < 0)) {
++ netbk_tx_err(netif, &txreq, i);
++ continue;
++ }
++ }
++
++ ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
++ if (unlikely(ret < 0)) {
++ netbk_tx_err(netif, &txreq, i - ret);
++ continue;
++ }
++ i += ret;
++
++ if (unlikely(txreq.size < ETH_HLEN)) {
++ DPRINTK("Bad packet size: %d\n", txreq.size);
++ netbk_tx_err(netif, &txreq, i);
++ continue;
++ }
++
++ /* No crossing a page as the payload mustn't fragment. */
++ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
++ DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
++ txreq.offset, txreq.size,
++ (txreq.offset &~PAGE_MASK) + txreq.size);
++ netbk_tx_err(netif, &txreq, i);
++ continue;
++ }
++
++ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
++
++ data_len = (txreq.size > PKT_PROT_LEN &&
++ ret < MAX_SKB_FRAGS) ?
++ PKT_PROT_LEN : txreq.size;
++
++ skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
++ GFP_ATOMIC | __GFP_NOWARN);
++ if (unlikely(skb == NULL)) {
++ DPRINTK("Can't allocate a skb in start_xmit.\n");
++ netbk_tx_err(netif, &txreq, i);
++ break;
++ }
++
++ /* Packets passed to netif_rx() must have some headroom. */
++ skb_reserve(skb, 16 + NET_IP_ALIGN);
++
++ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
++ struct netif_extra_info *gso;
++ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
++
++ if (netbk_set_skb_gso(skb, gso)) {
++ kfree_skb(skb);
++ netbk_tx_err(netif, &txreq, i);
++ continue;
++ }
++ }
++
++ gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
++ GNTMAP_host_map | GNTMAP_readonly,
++ txreq.gref, netif->domid);
++ mop++;
++
++ memcpy(&pending_tx_info[pending_idx].req,
++ &txreq, sizeof(txreq));
++ pending_tx_info[pending_idx].netif = netif;
++ *((u16 *)skb->data) = pending_idx;
++
++ __skb_put(skb, data_len);
++
++ skb_shinfo(skb)->nr_frags = ret;
++ if (data_len < txreq.size) {
++ skb_shinfo(skb)->nr_frags++;
++ skb_shinfo(skb)->frags[0].page =
++ (void *)(unsigned long)pending_idx;
++ } else {
++ /* Discriminate from any valid pending_idx value. */
++ skb_shinfo(skb)->frags[0].page = (void *)~0UL;
++ }
++
++ if (skb->data_len < skb_shinfo(skb)->gso_size) {
++ skb_shinfo(skb)->gso_size = 0;
++ skb_shinfo(skb)->gso_type = 0;
++ }
++
++ __skb_queue_tail(&tx_queue, skb);
++
++ pending_cons++;
++
++ mop = netbk_get_requests(netif, skb, txfrags, mop);
++
++ netif->tx.req_cons = i;
++ netif_schedule_work(netif);
++
++ if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
++ break;
++ }
++
++ if (mop == tx_map_ops)
++ return;
++
++ ret = HYPERVISOR_grant_table_op(
++ GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
++ BUG_ON(ret);
++
++ mop = tx_map_ops;
++ while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
++ netif_tx_request_t *txp;
++
++ pending_idx = *((u16 *)skb->data);
++ netif = pending_tx_info[pending_idx].netif;
++ txp = &pending_tx_info[pending_idx].req;
++
++ /* Check the remap error code. */
++ if (unlikely(netbk_tx_check_mop(skb, &mop))) {
++ DPRINTK("netback grant failed.\n");
++ skb_shinfo(skb)->nr_frags = 0;
++ kfree_skb(skb);
++ continue;
++ }
++
++ data_len = skb->len;
++ memcpy(skb->data,
++ (void *)(idx_to_kaddr(pending_idx)|txp->offset),
++ data_len);
++ if (data_len < txp->size) {
++ /* Append the packet payload as a fragment. */
++ txp->offset += data_len;
++ txp->size -= data_len;
++ } else {
++ /* Schedule a response immediately. */
++ netif_idx_release(pending_idx);
++ }
++
++ /*
++ * Old frontends do not assert data_validated but we
++ * can infer it from csum_blank so test both flags.
++ */
++ if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ skb->proto_data_valid = 1;
++ } else {
++ skb->ip_summed = CHECKSUM_NONE;
++ skb->proto_data_valid = 0;
++ }
++ skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
++
++ netbk_fill_frags(skb);
++
++ skb->dev = netif->dev;
++ skb->protocol = eth_type_trans(skb, skb->dev);
++
++ netif->stats.rx_bytes += skb->len;
++ netif->stats.rx_packets++;
++
++ if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
++ unlikely(skb_linearize(skb))) {
++ DPRINTK("Can't linearize skb in net_tx_action.\n");
++ kfree_skb(skb);
++ continue;
++ }
++
++ netif_rx(skb);
++ netif->dev->last_rx = jiffies;
++ }
++
++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++ !list_empty(&pending_inuse_head)) {
++ struct netbk_tx_pending_inuse *oldest;
++
++ oldest = list_entry(pending_inuse_head.next,
++ struct netbk_tx_pending_inuse, list);
++ mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
++ }
++}
++
++static void netif_idx_release(u16 pending_idx)
++{
++ static DEFINE_SPINLOCK(_lock);
++ unsigned long flags;
++
++ spin_lock_irqsave(&_lock, flags);
++ dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
++ /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
++ smp_wmb();
++ dealloc_prod++;
++ spin_unlock_irqrestore(&_lock, flags);
++
++ tasklet_schedule(&net_tx_tasklet);
++}
++
++static void netif_page_release(struct page *page, unsigned int order)
++{
++ int idx = netif_page_index(page);
++ BUG_ON(order);
++ BUG_ON(idx < 0);
++ netif_idx_release(idx);
++}
++
++irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++{
++ netif_t *netif = dev_id;
++
++ add_to_net_schedule_list_tail(netif);
++ maybe_schedule_tx_action();
++
++ if (netif_schedulable(netif) && !netbk_queue_full(netif))
++ netif_wake_queue(netif->dev);
++
++ return IRQ_HANDLED;
++}
++
++static void make_tx_response(netif_t *netif,
++ netif_tx_request_t *txp,
++ s8 st)
++{
++ RING_IDX i = netif->tx.rsp_prod_pvt;
++ netif_tx_response_t *resp;
++ int notify;
++
++ resp = RING_GET_RESPONSE(&netif->tx, i);
++ resp->id = txp->id;
++ resp->status = st;
++
++ if (txp->flags & NETTXF_extra_info)
++ RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
++
++ netif->tx.rsp_prod_pvt = ++i;
++ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
++ if (notify)
++ notify_remote_via_irq(netif->irq);
++
++#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
++ if (i == netif->tx.req_cons) {
++ int more_to_do;
++ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
++ if (more_to_do)
++ add_to_net_schedule_list_tail(netif);
++ }
++#endif
++}
++
++static netif_rx_response_t *make_rx_response(netif_t *netif,
++ u16 id,
++ s8 st,
++ u16 offset,
++ u16 size,
++ u16 flags)
++{
++ RING_IDX i = netif->rx.rsp_prod_pvt;
++ netif_rx_response_t *resp;
++
++ resp = RING_GET_RESPONSE(&netif->rx, i);
++ resp->offset = offset;
++ resp->flags = flags;
++ resp->id = id;
++ resp->status = (s16)size;
++ if (st < 0)
++ resp->status = (s16)st;
++
++ netif->rx.rsp_prod_pvt = ++i;
++
++ return resp;
++}
++
++#ifdef NETBE_DEBUG_INTERRUPT
++static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
++{
++ struct list_head *ent;
++ netif_t *netif;
++ int i = 0;
++
++ printk(KERN_ALERT "netif_schedule_list:\n");
++ spin_lock_irq(&net_schedule_list_lock);
++
++ list_for_each (ent, &net_schedule_list) {
++ netif = list_entry(ent, netif_t, list);
++ printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
++ "rx_resp_prod=%08x\n",
++ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
++ printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
++ netif->tx.req_cons, netif->tx.rsp_prod_pvt);
++ printk(KERN_ALERT " shared(rx_req_prod=%08x "
++ "rx_resp_prod=%08x\n",
++ netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
++ printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
++ netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
++ printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
++ netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
++ i++;
++ }
++
++ spin_unlock_irq(&net_schedule_list_lock);
++ printk(KERN_ALERT " ** End of netif_schedule_list **\n");
++
++ return IRQ_HANDLED;
++}
++#endif
++
++static int __init netback_init(void)
++{
++ int i;
++ struct page *page;
++
++ if (!is_running_on_xen())
++ return -ENODEV;
++
++ /* We can increase reservation by this much in net_rx_action(). */
++ balloon_update_driver_allowance(NET_RX_RING_SIZE);
++
++ skb_queue_head_init(&rx_queue);
++ skb_queue_head_init(&tx_queue);
++
++ init_timer(&net_timer);
++ net_timer.data = 0;
++ net_timer.function = net_alarm;
++
++ init_timer(&netbk_tx_pending_timer);
++ netbk_tx_pending_timer.data = 0;
++ netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
++
++ mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
++ if (mmap_pages == NULL) {
++ printk("%s: out of memory\n", __FUNCTION__);
++ return -ENOMEM;
++ }
++
++ for (i = 0; i < MAX_PENDING_REQS; i++) {
++ page = mmap_pages[i];
++ SetPageForeign(page, netif_page_release);
++ netif_set_page_index(page, i);
++ INIT_LIST_HEAD(&pending_inuse[i].list);
++ }
++
++ pending_cons = 0;
++ pending_prod = MAX_PENDING_REQS;
++ for (i = 0; i < MAX_PENDING_REQS; i++)
++ pending_ring[i] = i;
++
++ spin_lock_init(&net_schedule_list_lock);
++ INIT_LIST_HEAD(&net_schedule_list);
++
++ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
++ if (MODPARM_copy_skb) {
++ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
++ NULL, 0))
++ netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
++ else
++ netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
++ }
++
++ netif_accel_init();
++
++ netif_xenbus_init();
++
++#ifdef NETBE_DEBUG_INTERRUPT
++ (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
++ 0,
++ netif_be_dbg,
++ SA_SHIRQ,
++ "net-be-dbg",
++ &netif_be_dbg);
++#endif
++
++ return 0;
++}
++
++module_init(netback_init);
++
++MODULE_LICENSE("Dual BSD/GPL");
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+new file mode 100644
+index 0000000..d7faeb6
+--- /dev/null
++++ b/drivers/xen/netback/xenbus.c
+@@ -0,0 +1,454 @@
++/* Xenbus code for netif backend
++ Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
++ Copyright (C) 2005 XenSource Ltd
++
++ This program is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2 of the License, or
++ (at your option) any later version.
++
++ This program is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with this program; if not, write to the Free Software
++ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++*/
++
++#include <stdarg.h>
++#include <linux/module.h>
++#include <xen/xenbus.h>
++#include "common.h"
++
++#if 0
++#undef DPRINTK
++#define DPRINTK(fmt, args...) \
++ printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
++#endif
++
++
++static int connect_rings(struct backend_info *);
++static void connect(struct backend_info *);
++static void backend_create_netif(struct backend_info *be);
++
++static int netback_remove(struct xenbus_device *dev)
++{
++ struct backend_info *be = dev->dev.driver_data;
++
++ netback_remove_accelerators(be, dev);
++
++ if (be->netif) {
++ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++ netif_disconnect(be->netif);
++ be->netif = NULL;
++ }
++ kfree(be);
++ dev->dev.driver_data = NULL;
++ return 0;
++}
++
++
++/**
++ * Entry point to this code when a new device is created. Allocate the basic
++ * structures and switch to InitWait.
++ */
++static int netback_probe(struct xenbus_device *dev,
++ const struct xenbus_device_id *id)
++{
++ const char *message;
++ struct xenbus_transaction xbt;
++ int err;
++ int sg;
++ struct backend_info *be = kzalloc(sizeof(struct backend_info),
++ GFP_KERNEL);
++ if (!be) {
++ xenbus_dev_fatal(dev, -ENOMEM,
++ "allocating backend structure");
++ return -ENOMEM;
++ }
++
++ be->dev = dev;
++ dev->dev.driver_data = be;
++
++ sg = 1;
++ if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
++ sg = 0;
++
++ do {
++ err = xenbus_transaction_start(&xbt);
++ if (err) {
++ xenbus_dev_fatal(dev, err, "starting transaction");
++ goto fail;
++ }
++
++ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
++ if (err) {
++ message = "writing feature-sg";
++ goto abort_transaction;
++ }
++
++ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
++ "%d", sg);
++ if (err) {
++ message = "writing feature-gso-tcpv4";
++ goto abort_transaction;
++ }
++
++ /* We support rx-copy path. */
++ err = xenbus_printf(xbt, dev->nodename,
++ "feature-rx-copy", "%d", 1);
++ if (err) {
++ message = "writing feature-rx-copy";
++ goto abort_transaction;
++ }
++
++ /*
++ * We don't support rx-flip path (except old guests who don't
++ * grok this feature flag).
++ */
++ err = xenbus_printf(xbt, dev->nodename,
++ "feature-rx-flip", "%d", 0);
++ if (err) {
++ message = "writing feature-rx-flip";
++ goto abort_transaction;
++ }
++
++ err = xenbus_transaction_end(xbt, 0);
++ } while (err == -EAGAIN);
++
++ if (err) {
++ xenbus_dev_fatal(dev, err, "completing transaction");
++ goto fail;
++ }
++
++ netback_probe_accelerators(be, dev);
++
++ err = xenbus_switch_state(dev, XenbusStateInitWait);
++ if (err)
++ goto fail;
++
++ /* This kicks hotplug scripts, so do it immediately. */
++ backend_create_netif(be);
++
++ return 0;
++
++abort_transaction:
++ xenbus_transaction_end(xbt, 1);
++ xenbus_dev_fatal(dev, err, "%s", message);
++fail:
++ DPRINTK("failed");
++ netback_remove(dev);
++ return err;
++}
++
++
++/**
++ * Handle the creation of the hotplug script environment. We add the script
++ * and vif variables to the environment, for the benefit of the vif-* hotplug
++ * scripts.
++ */
++static int netback_uevent(struct xenbus_device *xdev, char **envp,
++ int num_envp, char *buffer, int buffer_size)
++{
++ struct backend_info *be = xdev->dev.driver_data;
++ netif_t *netif = be->netif;
++ int i = 0, length = 0;
++ char *val;
++
++ DPRINTK("netback_uevent");
++
++ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
++ if (IS_ERR(val)) {
++ int err = PTR_ERR(val);
++ xenbus_dev_fatal(xdev, err, "reading script");
++ return err;
++ }
++ else {
++ add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
++ &length, "script=%s", val);
++ kfree(val);
++ }
++
++ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
++ "vif=%s", netif->dev->name);
++
++ envp[i] = NULL;
++
++ return 0;
++}
++
++
++static void backend_create_netif(struct backend_info *be)
++{
++ int err;
++ long handle;
++ struct xenbus_device *dev = be->dev;
++
++ if (be->netif != NULL)
++ return;
++
++ err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
++ if (err != 1) {
++ xenbus_dev_fatal(dev, err, "reading handle");
++ return;
++ }
++
++ be->netif = netif_alloc(dev->otherend_id, handle);
++ if (IS_ERR(be->netif)) {
++ err = PTR_ERR(be->netif);
++ be->netif = NULL;
++ xenbus_dev_fatal(dev, err, "creating interface");
++ return;
++ }
++
++ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
++}
++
++
++/**
++ * Callback received when the frontend's state changes.
++ */
++static void frontend_changed(struct xenbus_device *dev,
++ enum xenbus_state frontend_state)
++{
++ struct backend_info *be = dev->dev.driver_data;
++
++ DPRINTK("%s", xenbus_strstate(frontend_state));
++
++ be->frontend_state = frontend_state;
++
++ switch (frontend_state) {
++ case XenbusStateInitialising:
++ if (dev->state == XenbusStateClosed) {
++ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
++ __FUNCTION__, dev->nodename);
++ xenbus_switch_state(dev, XenbusStateInitWait);
++ }
++ break;
++
++ case XenbusStateInitialised:
++ break;
++
++ case XenbusStateConnected:
++ if (dev->state == XenbusStateConnected)
++ break;
++ backend_create_netif(be);
++ if (be->netif)
++ connect(be);
++ break;
++
++ case XenbusStateClosing:
++ if (be->netif) {
++ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++ netif_disconnect(be->netif);
++ be->netif = NULL;
++ }
++ xenbus_switch_state(dev, XenbusStateClosing);
++ break;
++
++ case XenbusStateClosed:
++ xenbus_switch_state(dev, XenbusStateClosed);
++ if (xenbus_dev_is_online(dev))
++ break;
++ /* fall through if not online */
++ case XenbusStateUnknown:
++ device_unregister(&dev->dev);
++ break;
++
++ default:
++ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
++ frontend_state);
++ break;
++ }
++}
++
++
++static void xen_net_read_rate(struct xenbus_device *dev,
++ unsigned long *bytes, unsigned long *usec)
++{
++ char *s, *e;
++ unsigned long b, u;
++ char *ratestr;
++
++ /* Default to unlimited bandwidth. */
++ *bytes = ~0UL;
++ *usec = 0;
++
++ ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
++ if (IS_ERR(ratestr))
++ return;
++
++ s = ratestr;
++ b = simple_strtoul(s, &e, 10);
++ if ((s == e) || (*e != ','))
++ goto fail;
++
++ s = e + 1;
++ u = simple_strtoul(s, &e, 10);
++ if ((s == e) || (*e != '\0'))
++ goto fail;
++
++ *bytes = b;
++ *usec = u;
++
++ kfree(ratestr);
++ return;
++
++ fail:
++ WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
++ kfree(ratestr);
++}
++
++static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
++{
++ char *s, *e, *macstr;
++ int i;
++
++ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
++ if (IS_ERR(macstr))
++ return PTR_ERR(macstr);
++
++ for (i = 0; i < ETH_ALEN; i++) {
++ mac[i] = simple_strtoul(s, &e, 16);
++ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
++ kfree(macstr);
++ return -ENOENT;
++ }
++ s = e+1;
++ }
++
++ kfree(macstr);
++ return 0;
++}
++
++static void connect(struct backend_info *be)
++{
++ int err;
++ struct xenbus_device *dev = be->dev;
++
++ err = connect_rings(be);
++ if (err)
++ return;
++
++ err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
++ if (err) {
++ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
++ return;
++ }
++
++ xen_net_read_rate(dev, &be->netif->credit_bytes,
++ &be->netif->credit_usec);
++ be->netif->remaining_credit = be->netif->credit_bytes;
++
++ xenbus_switch_state(dev, XenbusStateConnected);
++
++ netif_wake_queue(be->netif->dev);
++}
++
++
++static int connect_rings(struct backend_info *be)
++{
++ struct xenbus_device *dev = be->dev;
++ unsigned long tx_ring_ref, rx_ring_ref;
++ unsigned int evtchn, rx_copy;
++ int err;
++ int val;
++
++ DPRINTK("");
++
++ err = xenbus_gather(XBT_NIL, dev->otherend,
++ "tx-ring-ref", "%lu", &tx_ring_ref,
++ "rx-ring-ref", "%lu", &rx_ring_ref,
++ "event-channel", "%u", &evtchn, NULL);
++ if (err) {
++ xenbus_dev_fatal(dev, err,
++ "reading %s/ring-ref and event-channel",
++ dev->otherend);
++ return err;
++ }
++
++ err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
++ &rx_copy);
++ if (err == -ENOENT) {
++ err = 0;
++ rx_copy = 0;
++ }
++ if (err < 0) {
++ xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
++ dev->otherend);
++ return err;
++ }
++ be->netif->copying_receiver = !!rx_copy;
++
++ if (be->netif->dev->tx_queue_len != 0) {
++ if (xenbus_scanf(XBT_NIL, dev->otherend,
++ "feature-rx-notify", "%d", &val) < 0)
++ val = 0;
++ if (val)
++ be->netif->can_queue = 1;
++ else
++ /* Must be non-zero for pfifo_fast to work. */
++ be->netif->dev->tx_queue_len = 1;
++ }
++
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
++ val = 0;
++ if (val) {
++ be->netif->features |= NETIF_F_SG;
++ be->netif->dev->features |= NETIF_F_SG;
++ }
++
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
++ &val) < 0)
++ val = 0;
++ if (val) {
++ be->netif->features |= NETIF_F_TSO;
++ be->netif->dev->features |= NETIF_F_TSO;
++ }
++
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
++ "%d", &val) < 0)
++ val = 0;
++ if (val) {
++ be->netif->features &= ~NETIF_F_IP_CSUM;
++ be->netif->dev->features &= ~NETIF_F_IP_CSUM;
++ }
++
++ /* Map the shared frame, irq etc. */
++ err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
++ if (err) {
++ xenbus_dev_fatal(dev, err,
++ "mapping shared-frames %lu/%lu port %u",
++ tx_ring_ref, rx_ring_ref, evtchn);
++ return err;
++ }
++ return 0;
++}
++
++
++/* ** Driver Registration ** */
++
++
++static const struct xenbus_device_id netback_ids[] = {
++ { "vif" },
++ { "" }
++};
++
++
++static struct xenbus_driver netback = {
++ .name = "vif",
++ .owner = THIS_MODULE,
++ .ids = netback_ids,
++ .probe = netback_probe,
++ .remove = netback_remove,
++ .uevent = netback_uevent,
++ .otherend_changed = frontend_changed,
++};
++
++
++void netif_xenbus_init(void)
++{
++ xenbus_register_backend(&netback);
++}
+--
+1.7.3.4
+
+
+From 5b30803bf5f58ee980edd8d88a2d73dda995ee93 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Mon, 9 Feb 2009 12:05:52 -0800
+Subject: [PATCH 052/139] xen: netback: first cut at porting to upstream and cleaning up
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/Kconfig | 2 +-
+ drivers/xen/netback/Makefile | 2 +-
+ drivers/xen/netback/common.h | 33 +++---
+ drivers/xen/netback/interface.c | 37 +++---
+ drivers/xen/netback/netback.c | 248 ++++++++++++++++++++++++---------------
+ drivers/xen/netback/xenbus.c | 25 ++--
+ 6 files changed, 201 insertions(+), 146 deletions(-)
+
+diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
+index 7e83d43..30290a8 100644
+--- a/drivers/xen/Kconfig
++++ b/drivers/xen/Kconfig
+@@ -38,7 +38,7 @@ config XEN_BACKEND
+ to other virtual machines.
+
+ config XEN_NETDEV_BACKEND
+- bool "Xen backend network device"
++ tristate "Xen backend network device"
+ depends on XEN_BACKEND && NET
+ help
+ Implement the network backend driver, which passes packets
+diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
+index f4a0c51..a01a1a3 100644
+--- a/drivers/xen/netback/Makefile
++++ b/drivers/xen/netback/Makefile
+@@ -1,3 +1,3 @@
+ obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
+
+-netbk-y := netback.o xenbus.o interface.o
++netbk-y := netback.o xenbus.o interface.o
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 9a54d57..65b88f4 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -43,8 +43,7 @@
+ #include <asm/io.h>
+ #include <asm/pgalloc.h>
+ #include <xen/interface/grant_table.h>
+-#include <xen/gnttab.h>
+-#include <xen/driver_util.h>
++#include <xen/grant_table.h>
+ #include <xen/xenbus.h>
+
+ #define DPRINTK(_f, _a...) \
+@@ -55,7 +54,7 @@
+ #define WPRINTK(fmt, args...) \
+ printk(KERN_WARNING "xen_net: " fmt, ##args)
+
+-typedef struct netif_st {
++struct xen_netif {
+ /* Unique identifier for this interface. */
+ domid_t domid;
+ unsigned int handle;
+@@ -70,8 +69,8 @@ typedef struct netif_st {
+ unsigned int irq;
+
+ /* The shared rings and indexes. */
+- netif_tx_back_ring_t tx;
+- netif_rx_back_ring_t rx;
++ struct xen_netif_tx_back_ring tx;
++ struct xen_netif_rx_back_ring rx;
+ struct vm_struct *tx_comms_area;
+ struct vm_struct *rx_comms_area;
+
+@@ -103,7 +102,7 @@ typedef struct netif_st {
+ unsigned int carrier;
+
+ wait_queue_head_t waiting_to_free;
+-} netif_t;
++};
+
+ /*
+ * Implement our own carrier flag: the network stack's version causes delays
+@@ -141,7 +140,7 @@ struct netback_accelerator {
+
+ struct backend_info {
+ struct xenbus_device *dev;
+- netif_t *netif;
++ struct xen_netif *netif;
+ enum xenbus_state frontend_state;
+
+ /* State relating to the netback accelerator */
+@@ -174,13 +173,13 @@ extern
+ void netif_accel_init(void);
+
+
+-#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+-#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
++#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
++#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
+
+-void netif_disconnect(netif_t *netif);
++void netif_disconnect(struct xen_netif *netif);
+
+-netif_t *netif_alloc(domid_t domid, unsigned int handle);
+-int netif_map(netif_t *netif, unsigned long tx_ring_ref,
++struct xen_netif *netif_alloc(domid_t domid, unsigned int handle);
++int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ unsigned long rx_ring_ref, unsigned int evtchn);
+
+ #define netif_get(_b) (atomic_inc(&(_b)->refcnt))
+@@ -195,22 +194,22 @@ void netif_xenbus_init(void);
+ #define netif_schedulable(netif) \
+ (netif_running((netif)->dev) && netback_carrier_ok(netif))
+
+-void netif_schedule_work(netif_t *netif);
+-void netif_deschedule_work(netif_t *netif);
++void netif_schedule_work(struct xen_netif *netif);
++void netif_deschedule_work(struct xen_netif *netif);
+
+ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev);
+-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++irqreturn_t netif_be_int(int irq, void *dev_id);
+
+ static inline int netbk_can_queue(struct net_device *dev)
+ {
+- netif_t *netif = netdev_priv(dev);
++ struct xen_netif *netif = netdev_priv(dev);
+ return netif->can_queue;
+ }
+
+ static inline int netbk_can_sg(struct net_device *dev)
+ {
+- netif_t *netif = netdev_priv(dev);
++ struct xen_netif *netif = netdev_priv(dev);
+ return netif->features & NETIF_F_SG;
+ }
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 7e67941..d184ad7 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -34,6 +34,9 @@
+ #include <linux/ethtool.h>
+ #include <linux/rtnetlink.h>
+
++#include <xen/events.h>
++#include <asm/xen/hypercall.h>
++
+ /*
+ * Module parameter 'queue_length':
+ *
+@@ -51,13 +54,13 @@
+ static unsigned long netbk_queue_length = 32;
+ module_param_named(queue_length, netbk_queue_length, ulong, 0);
+
+-static void __netif_up(netif_t *netif)
++static void __netif_up(struct xen_netif *netif)
+ {
+ enable_irq(netif->irq);
+ netif_schedule_work(netif);
+ }
+
+-static void __netif_down(netif_t *netif)
++static void __netif_down(struct xen_netif *netif)
+ {
+ disable_irq(netif->irq);
+ netif_deschedule_work(netif);
+@@ -65,7 +68,7 @@ static void __netif_down(netif_t *netif)
+
+ static int net_open(struct net_device *dev)
+ {
+- netif_t *netif = netdev_priv(dev);
++ struct xen_netif *netif = netdev_priv(dev);
+ if (netback_carrier_ok(netif)) {
+ __netif_up(netif);
+ netif_start_queue(dev);
+@@ -75,7 +78,7 @@ static int net_open(struct net_device *dev)
+
+ static int net_close(struct net_device *dev)
+ {
+- netif_t *netif = netdev_priv(dev);
++ struct xen_netif *netif = netdev_priv(dev);
+ if (netback_carrier_ok(netif))
+ __netif_down(netif);
+ netif_stop_queue(dev);
+@@ -95,7 +98,7 @@ static int netbk_change_mtu(struct net_device *dev, int mtu)
+ static int netbk_set_sg(struct net_device *dev, u32 data)
+ {
+ if (data) {
+- netif_t *netif = netdev_priv(dev);
++ struct xen_netif *netif = netdev_priv(dev);
+
+ if (!(netif->features & NETIF_F_SG))
+ return -ENOSYS;
+@@ -107,7 +110,7 @@ static int netbk_set_sg(struct net_device *dev, u32 data)
+ static int netbk_set_tso(struct net_device *dev, u32 data)
+ {
+ if (data) {
+- netif_t *netif = netdev_priv(dev);
++ struct xen_netif *netif = netdev_priv(dev);
+
+ if (!(netif->features & NETIF_F_TSO))
+ return -ENOSYS;
+@@ -127,15 +130,15 @@ static struct ethtool_ops network_ethtool_ops =
+ .get_link = ethtool_op_get_link,
+ };
+
+-netif_t *netif_alloc(domid_t domid, unsigned int handle)
++struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
+ {
+ int err = 0;
+ struct net_device *dev;
+- netif_t *netif;
++ struct xen_netif *netif;
+ char name[IFNAMSIZ] = {};
+
+ snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
+- dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
++ dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
+ if (dev == NULL) {
+ DPRINTK("Could not create netif: out of memory\n");
+ return ERR_PTR(-ENOMEM);
+@@ -194,7 +197,7 @@ netif_t *netif_alloc(domid_t domid, unsigned int handle)
+ }
+
+ static int map_frontend_pages(
+- netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
++ struct xen_netif *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
+ {
+ struct gnttab_map_grant_ref op;
+
+@@ -229,7 +232,7 @@ static int map_frontend_pages(
+ return 0;
+ }
+
+-static void unmap_frontend_pages(netif_t *netif)
++static void unmap_frontend_pages(struct xen_netif *netif)
+ {
+ struct gnttab_unmap_grant_ref op;
+
+@@ -246,12 +249,12 @@ static void unmap_frontend_pages(netif_t *netif)
+ BUG();
+ }
+
+-int netif_map(netif_t *netif, unsigned long tx_ring_ref,
++int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ unsigned long rx_ring_ref, unsigned int evtchn)
+ {
+ int err = -ENOMEM;
+- netif_tx_sring_t *txs;
+- netif_rx_sring_t *rxs;
++ struct xen_netif_tx_sring *txs;
++ struct xen_netif_rx_sring *rxs;
+
+ /* Already connected through? */
+ if (netif->irq)
+@@ -276,10 +279,10 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref,
+ netif->irq = err;
+ disable_irq(netif->irq);
+
+- txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
++ txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
+ BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+
+- rxs = (netif_rx_sring_t *)
++ rxs = (struct xen_netif_rx_sring *)
+ ((char *)netif->rx_comms_area->addr);
+ BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+
+@@ -303,7 +306,7 @@ err_rx:
+ return err;
+ }
+
+-void netif_disconnect(netif_t *netif)
++void netif_disconnect(struct xen_netif *netif)
+ {
+ if (netback_carrier_ok(netif)) {
+ rtnl_lock();
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index db629d4..c959075 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -35,9 +35,17 @@
+ */
+
+ #include "common.h"
++
++#include <linux/tcp.h>
++#include <linux/udp.h>
++
+ #include <xen/balloon.h>
++#include <xen/events.h>
+ #include <xen/interface/memory.h>
+
++#include <asm/xen/hypercall.h>
++#include <asm/xen/page.h>
++
+ /*define NETBE_DEBUG_INTERRUPT*/
+
+ struct netbk_rx_meta {
+@@ -51,11 +59,12 @@ struct netbk_tx_pending_inuse {
+ unsigned long alloc_time;
+ };
+
++
+ static void netif_idx_release(u16 pending_idx);
+-static void make_tx_response(netif_t *netif,
+- netif_tx_request_t *txp,
++static void make_tx_response(struct xen_netif *netif,
++ struct xen_netif_tx_request *txp,
+ s8 st);
+-static netif_rx_response_t *make_rx_response(netif_t *netif,
++static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+ u16 id,
+ s8 st,
+ u16 offset,
+@@ -108,8 +117,8 @@ static inline int netif_page_index(struct page *pg)
+ #define PKT_PROT_LEN 64
+
+ static struct pending_tx_info {
+- netif_tx_request_t req;
+- netif_t *netif;
++ struct xen_netif_tx_request req;
++ struct xen_netif *netif;
+ } pending_tx_info[MAX_PENDING_REQS];
+ static u16 pending_ring[MAX_PENDING_REQS];
+ typedef unsigned int PEND_RING_IDX;
+@@ -128,8 +137,8 @@ static LIST_HEAD(pending_inuse_head);
+ static struct sk_buff_head tx_queue;
+
+ static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+-static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
+-static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
++static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
++static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+
+ static struct list_head net_schedule_list;
+ static spinlock_t net_schedule_list_lock;
+@@ -195,7 +204,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+ goto err;
+
+ skb_reserve(nskb, 16 + NET_IP_ALIGN);
+- headlen = nskb->end - nskb->data;
++ headlen = skb_end_pointer(nskb) - nskb->data;
+ if (headlen > skb_headlen(skb))
+ headlen = skb_headlen(skb);
+ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+@@ -243,9 +252,9 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+
+ offset = nskb->data - skb->data;
+
+- nskb->h.raw = skb->h.raw + offset;
+- nskb->nh.raw = skb->nh.raw + offset;
+- nskb->mac.raw = skb->mac.raw + offset;
++ nskb->transport_header = skb->transport_header + offset;
++ nskb->network_header = skb->network_header + offset;
++ nskb->mac_header = skb->mac_header + offset;
+
+ return nskb;
+
+@@ -255,14 +264,14 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+ return NULL;
+ }
+
+-static inline int netbk_max_required_rx_slots(netif_t *netif)
++static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+ {
+ if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
+ return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
+ return 1; /* all in one */
+ }
+
+-static inline int netbk_queue_full(netif_t *netif)
++static inline int netbk_queue_full(struct xen_netif *netif)
+ {
+ RING_IDX peek = netif->rx_req_cons_peek;
+ RING_IDX needed = netbk_max_required_rx_slots(netif);
+@@ -273,14 +282,14 @@ static inline int netbk_queue_full(netif_t *netif)
+
+ static void tx_queue_callback(unsigned long data)
+ {
+- netif_t *netif = (netif_t *)data;
++ struct xen_netif *netif = (struct xen_netif *)data;
+ if (netif_schedulable(netif))
+ netif_wake_queue(netif->dev);
+ }
+
+ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+- netif_t *netif = netdev_priv(dev);
++ struct xen_netif *netif = netdev_priv(dev);
+
+ BUG_ON(skb->dev != dev);
+
+@@ -302,7 +311,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ /* Copy only the header fields we use in this driver. */
+ nskb->dev = skb->dev;
+ nskb->ip_summed = skb->ip_summed;
+- nskb->proto_data_valid = skb->proto_data_valid;
+ dev_kfree_skb(skb);
+ skb = nskb;
+ }
+@@ -366,25 +374,25 @@ struct netrx_pending_operations {
+ unsigned mcl_prod, mcl_cons;
+ unsigned copy_prod, copy_cons;
+ unsigned meta_prod, meta_cons;
+- mmu_update_t *mmu;
+- gnttab_transfer_t *trans;
+- gnttab_copy_t *copy;
+- multicall_entry_t *mcl;
++ struct mmu_update *mmu;
++ struct gnttab_transfer *trans;
++ struct gnttab_copy *copy;
++ struct multicall_entry *mcl;
+ struct netbk_rx_meta *meta;
+ };
+
+ /* Set up the grant operations for this fragment. If it's a flipping
+ interface, we also set up the unmap request from here. */
+-static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
++static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
+ int i, struct netrx_pending_operations *npo,
+ struct page *page, unsigned long size,
+ unsigned long offset)
+ {
+- mmu_update_t *mmu;
+- gnttab_transfer_t *gop;
+- gnttab_copy_t *copy_gop;
+- multicall_entry_t *mcl;
+- netif_rx_request_t *req;
++ struct mmu_update *mmu;
++ struct gnttab_transfer *gop;
++ struct gnttab_copy *copy_gop;
++ struct multicall_entry *mcl;
++ struct xen_netif_rx_request *req;
+ unsigned long old_mfn, new_mfn;
+ int idx = netif_page_index(page);
+
+@@ -426,12 +434,12 @@ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
+ mcl = npo->mcl + npo->mcl_prod++;
+ MULTI_update_va_mapping(mcl,
+ (unsigned long)page_address(page),
+- pfn_pte_ma(new_mfn, PAGE_KERNEL),
++ mfn_pte(new_mfn, PAGE_KERNEL),
+ 0);
+
+ mmu = npo->mmu + npo->mmu_prod++;
+- mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
+- MMU_MACHPHYS_UPDATE;
++ mmu->ptr = ((phys_addr_t)new_mfn << PAGE_SHIFT) |
++ MMU_MACHPHYS_UPDATE;
+ mmu->val = page_to_pfn(page);
+ }
+
+@@ -446,7 +454,7 @@ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
+ static void netbk_gop_skb(struct sk_buff *skb,
+ struct netrx_pending_operations *npo)
+ {
+- netif_t *netif = netdev_priv(skb->dev);
++ struct xen_netif *netif = netdev_priv(skb->dev);
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ int i;
+ int extra;
+@@ -494,9 +502,9 @@ static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
+ static int netbk_check_gop(int nr_frags, domid_t domid,
+ struct netrx_pending_operations *npo)
+ {
+- multicall_entry_t *mcl;
+- gnttab_transfer_t *gop;
+- gnttab_copy_t *copy_op;
++ struct multicall_entry *mcl;
++ struct gnttab_transfer *gop;
++ struct gnttab_copy *copy_op;
+ int status = NETIF_RSP_OKAY;
+ int i;
+
+@@ -534,7 +542,7 @@ static int netbk_check_gop(int nr_frags, domid_t domid,
+ return status;
+ }
+
+-static void netbk_add_frag_responses(netif_t *netif, int status,
++static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+ struct netbk_rx_meta *meta, int nr_frags)
+ {
+ int i;
+@@ -555,11 +563,11 @@ static void netbk_add_frag_responses(netif_t *netif, int status,
+
+ static void net_rx_action(unsigned long unused)
+ {
+- netif_t *netif = NULL;
++ struct xen_netif *netif = NULL;
+ s8 status;
+ u16 id, irq, flags;
+- netif_rx_response_t *resp;
+- multicall_entry_t *mcl;
++ struct xen_netif_rx_response *resp;
++ struct multicall_entry *mcl;
+ struct sk_buff_head rxq;
+ struct sk_buff *skb;
+ int notify_nr = 0;
+@@ -572,10 +580,10 @@ static void net_rx_action(unsigned long unused)
+ * Putting hundreds of bytes on the stack is considered rude.
+ * Static works because a tasklet can only be on one CPU at any time.
+ */
+- static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
+- static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+- static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
+- static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
++ static struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
++ static struct mmu_update rx_mmu[NET_RX_RING_SIZE];
++ static struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
++ static struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
+ static unsigned char rx_notify[NR_IRQS];
+ static u16 notify_list[NET_RX_RING_SIZE];
+ static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+@@ -596,7 +604,7 @@ static void net_rx_action(unsigned long unused)
+ *(int *)skb->cb = nr_frags;
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap) &&
+- !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
++ !((struct xen_netif *)netdev_priv(skb->dev))->copying_receiver &&
+ check_mfn(nr_frags + 1)) {
+ /* Memory squeeze? Back off for an arbitrary while. */
+ if ( net_ratelimit() )
+@@ -692,9 +700,10 @@ static void net_rx_action(unsigned long unused)
+ id = meta[npo.meta_cons].id;
+ flags = nr_frags ? NETRXF_more_data : 0;
+
+- if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
++ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+ flags |= NETRXF_csum_blank | NETRXF_data_validated;
+- else if (skb->proto_data_valid) /* remote but checksummed? */
++ else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
++ /* remote but checksummed. */
+ flags |= NETRXF_data_validated;
+
+ if (meta[npo.meta_cons].copy)
+@@ -705,8 +714,8 @@ static void net_rx_action(unsigned long unused)
+ skb_headlen(skb), flags);
+
+ if (meta[npo.meta_cons].frag.size) {
+- struct netif_extra_info *gso =
+- (struct netif_extra_info *)
++ struct xen_netif_extra_info *gso =
++ (struct xen_netif_extra_info *)
+ RING_GET_RESPONSE(&netif->rx,
+ netif->rx.rsp_prod_pvt++);
+
+@@ -769,16 +778,16 @@ static void netbk_tx_pending_timeout(unsigned long unused)
+
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+ {
+- netif_t *netif = netdev_priv(dev);
++ struct xen_netif *netif = netdev_priv(dev);
+ return &netif->stats;
+ }
+
+-static int __on_net_schedule_list(netif_t *netif)
++static int __on_net_schedule_list(struct xen_netif *netif)
+ {
+ return netif->list.next != NULL;
+ }
+
+-static void remove_from_net_schedule_list(netif_t *netif)
++static void remove_from_net_schedule_list(struct xen_netif *netif)
+ {
+ spin_lock_irq(&net_schedule_list_lock);
+ if (likely(__on_net_schedule_list(netif))) {
+@@ -789,7 +798,7 @@ static void remove_from_net_schedule_list(netif_t *netif)
+ spin_unlock_irq(&net_schedule_list_lock);
+ }
+
+-static void add_to_net_schedule_list_tail(netif_t *netif)
++static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+ {
+ if (__on_net_schedule_list(netif))
+ return;
+@@ -811,7 +820,7 @@ static void add_to_net_schedule_list_tail(netif_t *netif)
+ * If we may be buffer transmit buffers for any reason then we must be rather
+ * more conservative and treat this as the final check for pending work.
+ */
+-void netif_schedule_work(netif_t *netif)
++void netif_schedule_work(struct xen_netif *netif)
+ {
+ int more_to_do;
+
+@@ -827,13 +836,13 @@ void netif_schedule_work(netif_t *netif)
+ }
+ }
+
+-void netif_deschedule_work(netif_t *netif)
++void netif_deschedule_work(struct xen_netif *netif)
+ {
+ remove_from_net_schedule_list(netif);
+ }
+
+
+-static void tx_add_credit(netif_t *netif)
++static void tx_add_credit(struct xen_netif *netif)
+ {
+ unsigned long max_burst, max_credit;
+
+@@ -855,7 +864,7 @@ static void tx_add_credit(netif_t *netif)
+
+ static void tx_credit_callback(unsigned long data)
+ {
+- netif_t *netif = (netif_t *)data;
++ struct xen_netif *netif = (struct xen_netif *)data;
+ tx_add_credit(netif);
+ netif_schedule_work(netif);
+ }
+@@ -869,10 +878,10 @@ static inline int copy_pending_req(PEND_RING_IDX pending_idx)
+ inline static void net_tx_action_dealloc(void)
+ {
+ struct netbk_tx_pending_inuse *inuse, *n;
+- gnttab_unmap_grant_ref_t *gop;
++ struct gnttab_unmap_grant_ref *gop;
+ u16 pending_idx;
+ PEND_RING_IDX dc, dp;
+- netif_t *netif;
++ struct xen_netif *netif;
+ int ret;
+ LIST_HEAD(list);
+
+@@ -954,7 +963,7 @@ inline static void net_tx_action_dealloc(void)
+ }
+ }
+
+-static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
++static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *txp, RING_IDX end)
+ {
+ RING_IDX cons = netif->tx.req_cons;
+
+@@ -969,8 +978,8 @@ static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
+ netif_put(netif);
+ }
+
+-static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
+- netif_tx_request_t *txp, int work_to_do)
++static int netbk_count_requests(struct xen_netif *netif, struct xen_netif_tx_request *first,
++ struct xen_netif_tx_request *txp, int work_to_do)
+ {
+ RING_IDX cons = netif->tx.req_cons;
+ int frags = 0;
+@@ -1009,10 +1018,10 @@ static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
+ return frags;
+ }
+
+-static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
++static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
+ struct sk_buff *skb,
+- netif_tx_request_t *txp,
+- gnttab_map_grant_ref_t *mop)
++ struct xen_netif_tx_request *txp,
++ struct gnttab_map_grant_ref *mop)
+ {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ skb_frag_t *frags = shinfo->frags;
+@@ -1039,12 +1048,12 @@ static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
+ }
+
+ static int netbk_tx_check_mop(struct sk_buff *skb,
+- gnttab_map_grant_ref_t **mopp)
++ struct gnttab_map_grant_ref **mopp)
+ {
+- gnttab_map_grant_ref_t *mop = *mopp;
++ struct gnttab_map_grant_ref *mop = *mopp;
+ int pending_idx = *((u16 *)skb->data);
+- netif_t *netif = pending_tx_info[pending_idx].netif;
+- netif_tx_request_t *txp;
++ struct xen_netif *netif = pending_tx_info[pending_idx].netif;
++ struct xen_netif_tx_request *txp;
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int nr_frags = shinfo->nr_frags;
+ int i, err, start;
+@@ -1118,7 +1127,7 @@ static void netbk_fill_frags(struct sk_buff *skb)
+
+ for (i = 0; i < nr_frags; i++) {
+ skb_frag_t *frag = shinfo->frags + i;
+- netif_tx_request_t *txp;
++ struct xen_netif_tx_request *txp;
+ unsigned long pending_idx;
+
+ pending_idx = (unsigned long)frag->page;
+@@ -1138,10 +1147,10 @@ static void netbk_fill_frags(struct sk_buff *skb)
+ }
+ }
+
+-int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
++int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extras,
+ int work_to_do)
+ {
+- struct netif_extra_info extra;
++ struct xen_netif_extra_info extra;
+ RING_IDX cons = netif->tx.req_cons;
+
+ do {
+@@ -1166,7 +1175,7 @@ int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
+ return work_to_do;
+ }
+
+-static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
++static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso)
+ {
+ if (!gso->u.gso.size) {
+ DPRINTK("GSO size must not be zero.\n");
+@@ -1189,18 +1198,57 @@ static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
+ return 0;
+ }
+
++static int skb_checksum_setup(struct sk_buff *skb)
++{
++ struct iphdr *iph;
++ unsigned char *th;
++ int err = -EPROTO;
++
++ if (skb->protocol != htons(ETH_P_IP))
++ goto out;
++
++ iph = (void *)skb->data;
++ th = skb->data + 4 * iph->ihl;
++ if (th >= skb_tail_pointer(skb))
++ goto out;
++
++ skb->csum_start = th - skb->head;
++ switch (iph->protocol) {
++ case IPPROTO_TCP:
++ skb->csum_offset = offsetof(struct tcphdr, check);
++ break;
++ case IPPROTO_UDP:
++ skb->csum_offset = offsetof(struct udphdr, check);
++ break;
++ default:
++ if (net_ratelimit())
++ printk(KERN_ERR "Attempting to checksum a non-"
++ "TCP/UDP packet, dropping a protocol"
++ " %d packet", iph->protocol);
++ goto out;
++ }
++
++ if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
++ goto out;
++
++ err = 0;
++
++out:
++ return err;
++}
++
+ /* Called after netfront has transmitted */
+ static void net_tx_action(unsigned long unused)
+ {
+ struct list_head *ent;
+ struct sk_buff *skb;
+- netif_t *netif;
+- netif_tx_request_t txreq;
+- netif_tx_request_t txfrags[MAX_SKB_FRAGS];
+- struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
++ struct xen_netif *netif;
++ struct xen_netif_tx_request txreq;
++ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
++ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+ u16 pending_idx;
+ RING_IDX i;
+- gnttab_map_grant_ref_t *mop;
++ struct gnttab_map_grant_ref *mop;
+ unsigned int data_len;
+ int ret, work_to_do;
+
+@@ -1212,7 +1260,7 @@ static void net_tx_action(unsigned long unused)
+ !list_empty(&net_schedule_list)) {
+ /* Get a netif from the list with work to do. */
+ ent = net_schedule_list.next;
+- netif = list_entry(ent, netif_t, list);
++ netif = list_entry(ent, struct xen_netif, list);
+ netif_get(netif);
+ remove_from_net_schedule_list(netif);
+
+@@ -1313,7 +1361,7 @@ static void net_tx_action(unsigned long unused)
+ skb_reserve(skb, 16 + NET_IP_ALIGN);
+
+ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+- struct netif_extra_info *gso;
++ struct xen_netif_extra_info *gso;
+ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+
+ if (netbk_set_skb_gso(skb, gso)) {
+@@ -1372,7 +1420,7 @@ static void net_tx_action(unsigned long unused)
+
+ mop = tx_map_ops;
+ while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+- netif_tx_request_t *txp;
++ struct xen_netif_tx_request *txp;
+
+ pending_idx = *((u16 *)skb->data);
+ netif = pending_tx_info[pending_idx].netif;
+@@ -1403,14 +1451,10 @@ static void net_tx_action(unsigned long unused)
+ * Old frontends do not assert data_validated but we
+ * can infer it from csum_blank so test both flags.
+ */
+- if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
+- skb->ip_summed = CHECKSUM_UNNECESSARY;
+- skb->proto_data_valid = 1;
+- } else {
++ if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank))
++ skb->ip_summed = CHECKSUM_PARTIAL;
++ else
+ skb->ip_summed = CHECKSUM_NONE;
+- skb->proto_data_valid = 0;
+- }
+- skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
+
+ netbk_fill_frags(skb);
+
+@@ -1420,6 +1464,14 @@ static void net_tx_action(unsigned long unused)
+ netif->stats.rx_bytes += skb->len;
+ netif->stats.rx_packets++;
+
++ if (skb->ip_summed == CHECKSUM_PARTIAL) {
++ if (skb_checksum_setup(skb)) {
++ DPRINTK("Can't setup checksum in net_tx_action\n");
++ kfree_skb(skb);
++ continue;
++ }
++ }
++
+ if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+ unlikely(skb_linearize(skb))) {
+ DPRINTK("Can't linearize skb in net_tx_action.\n");
+@@ -1464,9 +1516,9 @@ static void netif_page_release(struct page *page, unsigned int order)
+ netif_idx_release(idx);
+ }
+
+-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t netif_be_int(int irq, void *dev_id)
+ {
+- netif_t *netif = dev_id;
++ struct xen_netif *netif = dev_id;
+
+ add_to_net_schedule_list_tail(netif);
+ maybe_schedule_tx_action();
+@@ -1477,12 +1529,12 @@ irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+ return IRQ_HANDLED;
+ }
+
+-static void make_tx_response(netif_t *netif,
+- netif_tx_request_t *txp,
++static void make_tx_response(struct xen_netif *netif,
++ struct xen_netif_tx_request *txp,
+ s8 st)
+ {
+ RING_IDX i = netif->tx.rsp_prod_pvt;
+- netif_tx_response_t *resp;
++ struct xen_netif_tx_response *resp;
+ int notify;
+
+ resp = RING_GET_RESPONSE(&netif->tx, i);
+@@ -1507,7 +1559,7 @@ static void make_tx_response(netif_t *netif,
+ #endif
+ }
+
+-static netif_rx_response_t *make_rx_response(netif_t *netif,
++static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+ u16 id,
+ s8 st,
+ u16 offset,
+@@ -1515,7 +1567,7 @@ static netif_rx_response_t *make_rx_response(netif_t *netif,
+ u16 flags)
+ {
+ RING_IDX i = netif->rx.rsp_prod_pvt;
+- netif_rx_response_t *resp;
++ struct xen_netif_rx_response *resp;
+
+ resp = RING_GET_RESPONSE(&netif->rx, i);
+ resp->offset = offset;
+@@ -1534,14 +1586,14 @@ static netif_rx_response_t *make_rx_response(netif_t *netif,
+ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+ {
+ struct list_head *ent;
+- netif_t *netif;
++ struct xen_netif *netif;
+ int i = 0;
+
+ printk(KERN_ALERT "netif_schedule_list:\n");
+ spin_lock_irq(&net_schedule_list_lock);
+
+ list_for_each (ent, &net_schedule_list) {
+- netif = list_entry(ent, netif_t, list);
++ netif = list_entry(ent, struct xen_netif, list);
+ printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+ "rx_resp_prod=%08x\n",
+ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
+@@ -1569,11 +1621,13 @@ static int __init netback_init(void)
+ int i;
+ struct page *page;
+
+- if (!is_running_on_xen())
++ printk(KERN_CRIT "*** netif_init\n");
++
++ if (!xen_domain())
+ return -ENODEV;
+
+ /* We can increase reservation by this much in net_rx_action(). */
+- balloon_update_driver_allowance(NET_RX_RING_SIZE);
++// balloon_update_driver_allowance(NET_RX_RING_SIZE);
+
+ skb_queue_head_init(&rx_queue);
+ skb_queue_head_init(&tx_queue);
+@@ -1616,7 +1670,7 @@ static int __init netback_init(void)
+ netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+ }
+
+- netif_accel_init();
++ //netif_accel_init();
+
+ netif_xenbus_init();
+
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index d7faeb6..ed7c006 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -37,7 +37,7 @@ static int netback_remove(struct xenbus_device *dev)
+ {
+ struct backend_info *be = dev->dev.driver_data;
+
+- netback_remove_accelerators(be, dev);
++ //netback_remove_accelerators(be, dev);
+
+ if (be->netif) {
+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+@@ -123,7 +123,7 @@ static int netback_probe(struct xenbus_device *dev,
+ goto fail;
+ }
+
+- netback_probe_accelerators(be, dev);
++ //netback_probe_accelerators(be, dev);
+
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
+ if (err)
+@@ -149,12 +149,10 @@ fail:
+ * and vif variables to the environment, for the benefit of the vif-* hotplug
+ * scripts.
+ */
+-static int netback_uevent(struct xenbus_device *xdev, char **envp,
+- int num_envp, char *buffer, int buffer_size)
++static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
+ {
+ struct backend_info *be = xdev->dev.driver_data;
+- netif_t *netif = be->netif;
+- int i = 0, length = 0;
++ struct xen_netif *netif = be->netif;
+ char *val;
+
+ DPRINTK("netback_uevent");
+@@ -166,15 +164,15 @@ static int netback_uevent(struct xenbus_device *xdev, char **envp,
+ return err;
+ }
+ else {
+- add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
+- &length, "script=%s", val);
++ if (add_uevent_var(env, "script=%s", val)) {
++ kfree(val);
++ return -ENOMEM;
++ }
+ kfree(val);
+ }
+
+- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+- "vif=%s", netif->dev->name);
+-
+- envp[i] = NULL;
++ if (add_uevent_var(env, "vif=%s", netif->dev->name))
++ return -ENOMEM;
+
+ return 0;
+ }
+@@ -450,5 +448,6 @@ static struct xenbus_driver netback = {
+
+ void netif_xenbus_init(void)
+ {
+- xenbus_register_backend(&netback);
++ printk(KERN_CRIT "registering netback\n");
++ (void)xenbus_register_backend(&netback);
+ }
+--
+1.7.3.4
+
+
+From a41a2ab9e1ac4ef8320f69f2719e973e25faff5c Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy at goop.org>
+Date: Mon, 9 Feb 2009 16:39:01 -0800
+Subject: [PATCH 053/139] xen: netback: don't include xen/evtchn.h
+
+Its a usermode header for users of /dev/evtchn
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy at goop.org>
+---
+ drivers/xen/netback/common.h | 1 -
+ 1 files changed, 0 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 65b88f4..5665ed1 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -38,7 +38,6 @@
+ #include <linux/netdevice.h>
+ #include <linux/etherdevice.h>
+ #include <linux/wait.h>
+-#include <xen/evtchn.h>
+ #include <xen/interface/io/netif.h>
+ #include <asm/io.h>
+ #include <asm/pgalloc.h>
+--
+1.7.3.4
+
+
+From f28a7c6148bb979acf99c0cbe3b441d0fb0853d9 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Wed, 18 Feb 2009 15:55:18 -0800
+Subject: [PATCH 054/139] xen: netback: use mod_timer
+
+__mod_timer is no longer a public API.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index c959075..e920703 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -334,7 +334,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ */
+ netif->tx_queue_timeout.data = (unsigned long)netif;
+ netif->tx_queue_timeout.function = tx_queue_callback;
+- __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
++ mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
+ }
+ }
+
+@@ -1299,7 +1299,7 @@ static void net_tx_action(unsigned long unused)
+ (unsigned long)netif;
+ netif->credit_timeout.function =
+ tx_credit_callback;
+- __mod_timer(&netif->credit_timeout,
++ mod_timer(&netif->credit_timeout,
+ next_credit);
+ netif_put(netif);
+ continue;
+--
+1.7.3.4
+
+
+From 52f97ad360f28762c785343ba5c9f8abb83536f3 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich at novell.com>
+Date: Fri, 6 Mar 2009 08:29:31 +0000
+Subject: [PATCH 055/139] xen: netback: unmap tx ring gref when mapping of rx ring gref failed
+
+[ijc-ported from linux-2.6.18-xen.hg 782:51decc39e5e7]
+Signed-off-by: Jan Beulich <jbeulich at novell.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/interface.c | 6 ++++++
+ 1 files changed, 6 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index d184ad7..f3d9ea1 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -222,6 +222,12 @@ static int map_frontend_pages(
+ BUG();
+
+ if (op.status) {
++ struct gnttab_unmap_grant_ref unop;
++
++ gnttab_set_unmap_op(&unop,
++ (unsigned long)netif->tx_comms_area->addr,
++ GNTMAP_host_map, netif->tx_shmem_handle);
++ HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
+ DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
+ return op.status;
+ }
+--
+1.7.3.4
+
+
+From f9b63790f1404eb03ac824147b2294a46e485643 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <Ian.Campbell at citrix.com>
+Date: Fri, 6 Mar 2009 08:29:32 +0000
+Subject: [PATCH 056/139] xen: netback: add ethtool stat to track copied skbs.
+
+Copied skbs should be rare but we have no way of verifying that.
+
+[ijc-ported from linux-2.6.18-xen.hg 792:db9857bb0320]
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/common.h | 3 ++
+ drivers/xen/netback/interface.c | 47 +++++++++++++++++++++++++++++++++++++++
+ drivers/xen/netback/netback.c | 6 ++++-
+ 3 files changed, 55 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 5665ed1..6ba804d 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -92,6 +92,9 @@ struct xen_netif {
+ /* Enforce draining of the transmit queue. */
+ struct timer_list tx_queue_timeout;
+
++ /* Statistics */
++ int nr_copied_skbs;
++
+ /* Miscellaneous private stuff. */
+ struct list_head list; /* scheduling list */
+ atomic_t refcnt;
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index f3d9ea1..1a99c87 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -119,8 +119,51 @@ static int netbk_set_tso(struct net_device *dev, u32 data)
+ return ethtool_op_set_tso(dev, data);
+ }
+
++static void netbk_get_drvinfo(struct net_device *dev,
++ struct ethtool_drvinfo *info)
++{
++ strcpy(info->driver, "netbk");
++}
++
++static const struct netif_stat {
++ char name[ETH_GSTRING_LEN];
++ u16 offset;
++} netbk_stats[] = {
++ { "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
++};
++
++static int netbk_get_stats_count(struct net_device *dev)
++{
++ return ARRAY_SIZE(netbk_stats);
++}
++
++static void netbk_get_ethtool_stats(struct net_device *dev,
++ struct ethtool_stats *stats, u64 * data)
++{
++ void *netif = netdev_priv(dev);
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
++ data[i] = *(int *)(netif + netbk_stats[i].offset);
++}
++
++static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
++{
++ int i;
++
++ switch (stringset) {
++ case ETH_SS_STATS:
++ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
++ memcpy(data + i * ETH_GSTRING_LEN,
++ netbk_stats[i].name, ETH_GSTRING_LEN);
++ break;
++ }
++}
++
+ static struct ethtool_ops network_ethtool_ops =
+ {
++ .get_drvinfo = netbk_get_drvinfo,
++
+ .get_tx_csum = ethtool_op_get_tx_csum,
+ .set_tx_csum = ethtool_op_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+@@ -128,6 +171,10 @@ static struct ethtool_ops network_ethtool_ops =
+ .get_tso = ethtool_op_get_tso,
+ .set_tso = netbk_set_tso,
+ .get_link = ethtool_op_get_link,
++
++ .get_stats_count = netbk_get_stats_count,
++ .get_ethtool_stats = netbk_get_ethtool_stats,
++ .get_strings = netbk_get_strings,
+ };
+
+ struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index e920703..f59fadb 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -923,7 +923,11 @@ inline static void net_tx_action_dealloc(void)
+ if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+ break;
+
+- switch (copy_pending_req(inuse - pending_inuse)) {
++ pending_idx = inuse - pending_inuse;
++
++ pending_tx_info[pending_idx].netif->nr_copied_skbs++;
++
++ switch (copy_pending_req(pending_idx)) {
+ case 0:
+ list_move_tail(&inuse->list, &list);
+ continue;
+--
+1.7.3.4
+
+
+From c41d8da3d853d4e89ba38693b90c1fe512095704 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <Ian.Campbell at citrix.com>
+Date: Fri, 6 Mar 2009 08:29:33 +0000
+Subject: [PATCH 057/139] xen: netback: make queue length parameter writeable in sysfs
+
+Any changes will only take effect for newly created VIFs.
+
+Also hook up the vif devices to their parent and publish bus info via
+ethtool.
+
+[ijc-ported from linux-2.6.18-xen.hg 793:3aa9b8a7876b]
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/interface.c | 3 ++-
+ drivers/xen/netback/xenbus.c | 1 +
+ 2 files changed, 3 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 1a99c87..7706170 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -52,7 +52,7 @@
+ * blocked.
+ */
+ static unsigned long netbk_queue_length = 32;
+-module_param_named(queue_length, netbk_queue_length, ulong, 0);
++module_param_named(queue_length, netbk_queue_length, ulong, 0644);
+
+ static void __netif_up(struct xen_netif *netif)
+ {
+@@ -123,6 +123,7 @@ static void netbk_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+ {
+ strcpy(info->driver, "netbk");
++ strcpy(info->bus_info, dev->dev.parent->bus_id);
+ }
+
+ static const struct netif_stat {
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index ed7c006..dc7b367 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -200,6 +200,7 @@ static void backend_create_netif(struct backend_info *be)
+ xenbus_dev_fatal(dev, err, "creating interface");
+ return;
+ }
++ SET_NETDEV_DEV(be->netif->dev, &dev->dev);
+
+ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
+ }
+--
+1.7.3.4
+
+
+From f204d7567ab11ddb1ff3208ab5ed8921b575af5d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <Ian.Campbell at citrix.com>
+Date: Mon, 16 Mar 2009 22:05:16 +0000
+Subject: [PATCH 058/139] xen: netback: parent sysfs device should be set before registering.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/common.h | 2 +-
+ drivers/xen/netback/interface.c | 4 +++-
+ drivers/xen/netback/xenbus.c | 3 +--
+ 3 files changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 6ba804d..123a169 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -180,7 +180,7 @@ void netif_accel_init(void);
+
+ void netif_disconnect(struct xen_netif *netif);
+
+-struct xen_netif *netif_alloc(domid_t domid, unsigned int handle);
++struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
+ int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ unsigned long rx_ring_ref, unsigned int evtchn);
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 7706170..5e0d26d 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -178,7 +178,7 @@ static struct ethtool_ops network_ethtool_ops =
+ .get_strings = netbk_get_strings,
+ };
+
+-struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
++struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
+ {
+ int err = 0;
+ struct net_device *dev;
+@@ -192,6 +192,8 @@ struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
+ return ERR_PTR(-ENOMEM);
+ }
+
++ SET_NETDEV_DEV(dev, parent);
++
+ netif = netdev_priv(dev);
+ memset(netif, 0, sizeof(*netif));
+ netif->domid = domid;
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index dc7b367..749931e 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -193,14 +193,13 @@ static void backend_create_netif(struct backend_info *be)
+ return;
+ }
+
+- be->netif = netif_alloc(dev->otherend_id, handle);
++ be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
+ if (IS_ERR(be->netif)) {
+ err = PTR_ERR(be->netif);
+ be->netif = NULL;
+ xenbus_dev_fatal(dev, err, "creating interface");
+ return;
+ }
+- SET_NETDEV_DEV(be->netif->dev, &dev->dev);
+
+ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
+ }
+--
+1.7.3.4
+
+
+From bb606178665ea78b505cb54864899478b6020584 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 12:42:36 -0700
+Subject: [PATCH 059/139] xen: netback: use NET_SKB_PAD rather than "16"
+
+There's a constant for the default skb headroom.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 6 +++---
+ 1 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index f59fadb..400f398 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -203,7 +203,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+ if (unlikely(!nskb))
+ goto err;
+
+- skb_reserve(nskb, 16 + NET_IP_ALIGN);
++ skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
+ headlen = skb_end_pointer(nskb) - nskb->data;
+ if (headlen > skb_headlen(skb))
+ headlen = skb_headlen(skb);
+@@ -1353,7 +1353,7 @@ static void net_tx_action(unsigned long unused)
+ ret < MAX_SKB_FRAGS) ?
+ PKT_PROT_LEN : txreq.size;
+
+- skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
++ skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(skb == NULL)) {
+ DPRINTK("Can't allocate a skb in start_xmit.\n");
+@@ -1362,7 +1362,7 @@ static void net_tx_action(unsigned long unused)
+ }
+
+ /* Packets passed to netif_rx() must have some headroom. */
+- skb_reserve(skb, 16 + NET_IP_ALIGN);
++ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+
+ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+ struct xen_netif_extra_info *gso;
+--
+1.7.3.4
+
+
+From fe41ab031dfa0c6f9821c2667ce821e7f4f635ed Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 13:31:26 -0700
+Subject: [PATCH 060/139] xen: netback: completely drop flip support
+
+Nobody uses it?
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h | 1 -
+ drivers/xen/netback/netback.c | 245 ++++-------------------------------------
+ drivers/xen/netback/xenbus.c | 3 +-
+ 3 files changed, 22 insertions(+), 227 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 123a169..06f04c1 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -78,7 +78,6 @@ struct xen_netif {
+
+ /* Internal feature information. */
+ u8 can_queue:1; /* can queue packets for receiver? */
+- u8 copying_receiver:1; /* copy packets to receiver? */
+
+ /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
+ RING_IDX rx_req_cons_peek;
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 400f398..36bea2b 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -51,7 +51,6 @@
+ struct netbk_rx_meta {
+ skb_frag_t frag;
+ int id;
+- u8 copy:1;
+ };
+
+ struct netbk_tx_pending_inuse {
+@@ -160,26 +159,6 @@ static inline unsigned long alloc_mfn(void)
+ return mfn_list[--alloc_index];
+ }
+
+-static int check_mfn(int nr)
+-{
+- struct xen_memory_reservation reservation = {
+- .extent_order = 0,
+- .domid = DOMID_SELF
+- };
+- int rc;
+-
+- if (likely(alloc_index >= nr))
+- return 0;
+-
+- set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
+- reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
+- rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
+- if (likely(rc > 0))
+- alloc_index += rc;
+-
+- return alloc_index >= nr ? 0 : -ENOMEM;
+-}
+-
+ static inline void maybe_schedule_tx_action(void)
+ {
+ smp_mb();
+@@ -188,82 +167,6 @@ static inline void maybe_schedule_tx_action(void)
+ tasklet_schedule(&net_tx_tasklet);
+ }
+
+-static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+-{
+- struct skb_shared_info *ninfo;
+- struct sk_buff *nskb;
+- unsigned long offset;
+- int ret;
+- int len;
+- int headlen;
+-
+- BUG_ON(skb_shinfo(skb)->frag_list != NULL);
+-
+- nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
+- if (unlikely(!nskb))
+- goto err;
+-
+- skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
+- headlen = skb_end_pointer(nskb) - nskb->data;
+- if (headlen > skb_headlen(skb))
+- headlen = skb_headlen(skb);
+- ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+- BUG_ON(ret);
+-
+- ninfo = skb_shinfo(nskb);
+- ninfo->gso_size = skb_shinfo(skb)->gso_size;
+- ninfo->gso_type = skb_shinfo(skb)->gso_type;
+-
+- offset = headlen;
+- len = skb->len - headlen;
+-
+- nskb->len = skb->len;
+- nskb->data_len = len;
+- nskb->truesize += len;
+-
+- while (len) {
+- struct page *page;
+- int copy;
+- int zero;
+-
+- if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
+- dump_stack();
+- goto err_free;
+- }
+-
+- copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
+- zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
+-
+- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
+- if (unlikely(!page))
+- goto err_free;
+-
+- ret = skb_copy_bits(skb, offset, page_address(page), copy);
+- BUG_ON(ret);
+-
+- ninfo->frags[ninfo->nr_frags].page = page;
+- ninfo->frags[ninfo->nr_frags].page_offset = 0;
+- ninfo->frags[ninfo->nr_frags].size = copy;
+- ninfo->nr_frags++;
+-
+- offset += copy;
+- len -= copy;
+- }
+-
+- offset = nskb->data - skb->data;
+-
+- nskb->transport_header = skb->transport_header + offset;
+- nskb->network_header = skb->network_header + offset;
+- nskb->mac_header = skb->mac_header + offset;
+-
+- return nskb;
+-
+- err_free:
+- kfree_skb(nskb);
+- err:
+- return NULL;
+-}
+-
+ static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+ {
+ if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
+@@ -297,24 +200,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
+ goto drop;
+
+- /*
+- * Copy the packet here if it's destined for a flipping interface
+- * but isn't flippable (e.g. extra references to data).
+- * XXX For now we also copy skbuffs whose head crosses a page
+- * boundary, because netbk_gop_skb can't handle them.
+- */
+- if (!netif->copying_receiver ||
+- ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
+- struct sk_buff *nskb = netbk_copy_skb(skb);
+- if ( unlikely(nskb == NULL) )
+- goto drop;
+- /* Copy only the header fields we use in this driver. */
+- nskb->dev = skb->dev;
+- nskb->ip_summed = skb->ip_summed;
+- dev_kfree_skb(skb);
+- skb = nskb;
+- }
+-
+ netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
+ !!skb_shinfo(skb)->gso_size;
+ netif_get(netif);
+@@ -388,66 +273,32 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
+ struct page *page, unsigned long size,
+ unsigned long offset)
+ {
+- struct mmu_update *mmu;
+- struct gnttab_transfer *gop;
+ struct gnttab_copy *copy_gop;
+- struct multicall_entry *mcl;
+ struct xen_netif_rx_request *req;
+- unsigned long old_mfn, new_mfn;
++ unsigned long old_mfn;
+ int idx = netif_page_index(page);
+
+ old_mfn = virt_to_mfn(page_address(page));
+
+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
+- if (netif->copying_receiver) {
+- /* The fragment needs to be copied rather than
+- flipped. */
+- meta->copy = 1;
+- copy_gop = npo->copy + npo->copy_prod++;
+- copy_gop->flags = GNTCOPY_dest_gref;
+- if (idx > -1) {
+- struct pending_tx_info *src_pend = &pending_tx_info[idx];
+- copy_gop->source.domid = src_pend->netif->domid;
+- copy_gop->source.u.ref = src_pend->req.gref;
+- copy_gop->flags |= GNTCOPY_source_gref;
+- } else {
+- copy_gop->source.domid = DOMID_SELF;
+- copy_gop->source.u.gmfn = old_mfn;
+- }
+- copy_gop->source.offset = offset;
+- copy_gop->dest.domid = netif->domid;
+- copy_gop->dest.offset = 0;
+- copy_gop->dest.u.ref = req->gref;
+- copy_gop->len = size;
+- } else {
+- meta->copy = 0;
+- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+- new_mfn = alloc_mfn();
+-
+- /*
+- * Set the new P2M table entry before
+- * reassigning the old data page. Heed the
+- * comment in pgtable-2level.h:pte_page(). :-)
+- */
+- set_phys_to_machine(page_to_pfn(page), new_mfn);
+-
+- mcl = npo->mcl + npo->mcl_prod++;
+- MULTI_update_va_mapping(mcl,
+- (unsigned long)page_address(page),
+- mfn_pte(new_mfn, PAGE_KERNEL),
+- 0);
+-
+- mmu = npo->mmu + npo->mmu_prod++;
+- mmu->ptr = ((phys_addr_t)new_mfn << PAGE_SHIFT) |
+- MMU_MACHPHYS_UPDATE;
+- mmu->val = page_to_pfn(page);
+- }
+
+- gop = npo->trans + npo->trans_prod++;
+- gop->mfn = old_mfn;
+- gop->domid = netif->domid;
+- gop->ref = req->gref;
++ copy_gop = npo->copy + npo->copy_prod++;
++ copy_gop->flags = GNTCOPY_dest_gref;
++ if (idx > -1) {
++ struct pending_tx_info *src_pend = &pending_tx_info[idx];
++ copy_gop->source.domid = src_pend->netif->domid;
++ copy_gop->source.u.ref = src_pend->req.gref;
++ copy_gop->flags |= GNTCOPY_source_gref;
++ } else {
++ copy_gop->source.domid = DOMID_SELF;
++ copy_gop->source.u.gmfn = old_mfn;
+ }
++ copy_gop->source.offset = offset;
++ copy_gop->dest.domid = netif->domid;
++ copy_gop->dest.offset = 0;
++ copy_gop->dest.u.ref = req->gref;
++ copy_gop->len = size;
++
+ return req->id;
+ }
+
+@@ -502,41 +353,17 @@ static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
+ static int netbk_check_gop(int nr_frags, domid_t domid,
+ struct netrx_pending_operations *npo)
+ {
+- struct multicall_entry *mcl;
+- struct gnttab_transfer *gop;
+ struct gnttab_copy *copy_op;
+ int status = NETIF_RSP_OKAY;
+ int i;
+
+ for (i = 0; i <= nr_frags; i++) {
+- if (npo->meta[npo->meta_cons + i].copy) {
+ copy_op = npo->copy + npo->copy_cons++;
+ if (copy_op->status != GNTST_okay) {
+ DPRINTK("Bad status %d from copy to DOM%d.\n",
+ copy_op->status, domid);
+ status = NETIF_RSP_ERROR;
+ }
+- } else {
+- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+- mcl = npo->mcl + npo->mcl_cons++;
+- /* The update_va_mapping() must not fail. */
+- BUG_ON(mcl->result != 0);
+- }
+-
+- gop = npo->trans + npo->trans_cons++;
+- /* Check the reassignment error code. */
+- if (gop->status != 0) {
+- DPRINTK("Bad status %d from grant transfer to DOM%u\n",
+- gop->status, domid);
+- /*
+- * Page no longer belongs to us unless
+- * GNTST_bad_page, but that should be
+- * a fatal error anyway.
+- */
+- BUG_ON(gop->status == GNTST_bad_page);
+- status = NETIF_RSP_ERROR;
+- }
+- }
+ }
+
+ return status;
+@@ -551,11 +378,8 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+ for (i = 0; i < nr_frags; i++) {
+ int id = meta[i].id;
+ int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
+-
+- if (meta[i].copy)
+- offset = 0;
+- else
+- offset = meta[i].frag.page_offset;
++
++ offset = 0;
+ make_rx_response(netif, id, status, offset,
+ meta[i].frag.size, flags);
+ }
+@@ -603,18 +427,6 @@ static void net_rx_action(unsigned long unused)
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ *(int *)skb->cb = nr_frags;
+
+- if (!xen_feature(XENFEAT_auto_translated_physmap) &&
+- !((struct xen_netif *)netdev_priv(skb->dev))->copying_receiver &&
+- check_mfn(nr_frags + 1)) {
+- /* Memory squeeze? Back off for an arbitrary while. */
+- if ( net_ratelimit() )
+- WPRINTK("Memory squeeze in netback "
+- "driver.\n");
+- mod_timer(&net_timer, jiffies + HZ);
+- skb_queue_head(&rx_queue, skb);
+- break;
+- }
+-
+ netbk_gop_skb(skb, &npo);
+
+ count += nr_frags + 1;
+@@ -677,20 +489,6 @@ static void net_rx_action(unsigned long unused)
+ nr_frags = *(int *)skb->cb;
+
+ netif = netdev_priv(skb->dev);
+- /* We can't rely on skb_release_data to release the
+- pages used by fragments for us, since it tries to
+- touch the pages in the fraglist. If we're in
+- flipping mode, that doesn't work. In copying mode,
+- we still have access to all of the pages, and so
+- it's safe to let release_data deal with it. */
+- /* (Freeing the fragments is safe since we copy
+- non-linear skbs destined for flipping interfaces) */
+- if (!netif->copying_receiver) {
+- atomic_set(&(skb_shinfo(skb)->dataref), 1);
+- skb_shinfo(skb)->frag_list = NULL;
+- skb_shinfo(skb)->nr_frags = 0;
+- netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
+- }
+
+ netif->stats.tx_bytes += skb->len;
+ netif->stats.tx_packets++;
+@@ -706,10 +504,7 @@ static void net_rx_action(unsigned long unused)
+ /* remote but checksummed. */
+ flags |= NETRXF_data_validated;
+
+- if (meta[npo.meta_cons].copy)
+- offset = 0;
+- else
+- offset = offset_in_page(skb->data);
++ offset = 0;
+ resp = make_rx_response(netif, id, status, offset,
+ skb_headlen(skb), flags);
+
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index 749931e..a492288 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -378,7 +378,8 @@ static int connect_rings(struct backend_info *be)
+ dev->otherend);
+ return err;
+ }
+- be->netif->copying_receiver = !!rx_copy;
++ if (!rx_copy)
++ return -EOPNOTSUPP;
+
+ if (be->netif->dev->tx_queue_len != 0) {
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
+--
+1.7.3.4
+
+
+From 17d465234118873ab4f5a7992feb4ce7b5537cf7 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 15:19:39 -0700
+Subject: [PATCH 061/139] xen: netback: demacro MASK_PEND_IDX
+
+Replace it with a more meaningful inline: pending_index().
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 21 +++++++++++++--------
+ 1 files changed, 13 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 36bea2b..4095622 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -121,7 +121,12 @@ static struct pending_tx_info {
+ } pending_tx_info[MAX_PENDING_REQS];
+ static u16 pending_ring[MAX_PENDING_REQS];
+ typedef unsigned int PEND_RING_IDX;
+-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
++
++static inline PEND_RING_IDX pending_index(unsigned i)
++{
++ return i & (MAX_PENDING_REQS-1);
++}
++
+ static PEND_RING_IDX pending_prod, pending_cons;
+ #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+@@ -695,7 +700,7 @@ inline static void net_tx_action_dealloc(void)
+ while (dc != dp) {
+ unsigned long pfn;
+
+- pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
++ pending_idx = dealloc_ring[pending_index(dc++)];
+ list_move_tail(&pending_inuse[pending_idx].list, &list);
+
+ pfn = idx_to_pfn(pending_idx);
+@@ -754,7 +759,7 @@ inline static void net_tx_action_dealloc(void)
+ /* Ready for next use. */
+ gnttab_reset_grant_page(mmap_pages[pending_idx]);
+
+- pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++ pending_ring[pending_index(pending_prod++)] = pending_idx;
+
+ netif_put(netif);
+
+@@ -831,7 +836,7 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+ for (i = start; i < shinfo->nr_frags; i++, txp++) {
+- pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
++ pending_idx = pending_ring[pending_index(pending_cons++)];
+
+ gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
+ GNTMAP_host_map | GNTMAP_readonly,
+@@ -862,7 +867,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ if (unlikely(err)) {
+ txp = &pending_tx_info[pending_idx].req;
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
+- pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++ pending_ring[pending_index(pending_prod++)] = pending_idx;
+ netif_put(netif);
+ } else {
+ set_phys_to_machine(
+@@ -895,7 +900,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ /* Error on this fragment: respond to client with an error. */
+ txp = &pending_tx_info[pending_idx].req;
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
+- pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++ pending_ring[pending_index(pending_prod++)] = pending_idx;
+ netif_put(netif);
+
+ /* Not the first error? Preceding frags already invalidated. */
+@@ -1142,7 +1147,7 @@ static void net_tx_action(unsigned long unused)
+ continue;
+ }
+
+- pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
++ pending_idx = pending_ring[pending_index(pending_cons)];
+
+ data_len = (txreq.size > PKT_PROT_LEN &&
+ ret < MAX_SKB_FRAGS) ?
+@@ -1298,7 +1303,7 @@ static void netif_idx_release(u16 pending_idx)
+ unsigned long flags;
+
+ spin_lock_irqsave(&_lock, flags);
+- dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
++ dealloc_ring[pending_index(dealloc_prod)] = pending_idx;
+ /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+ smp_wmb();
+ dealloc_prod++;
+--
+1.7.3.4
+
+
+From d47af34f87b2d365c75aa3579ad512619ef3d579 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 15:29:30 -0700
+Subject: [PATCH 062/139] xen: netback: convert PEND_RING_IDX into a proper typedef name
+
+Rename PEND_RING_IDX to pending_ring_idx_t. Its not used that much,
+the extra typing won't kill anyone.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 12 ++++++------
+ 1 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 4095622..8292e96 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -120,19 +120,19 @@ static struct pending_tx_info {
+ struct xen_netif *netif;
+ } pending_tx_info[MAX_PENDING_REQS];
+ static u16 pending_ring[MAX_PENDING_REQS];
+-typedef unsigned int PEND_RING_IDX;
++typedef unsigned int pending_ring_idx_t;
+
+-static inline PEND_RING_IDX pending_index(unsigned i)
++static inline pending_ring_idx_t pending_index(unsigned i)
+ {
+ return i & (MAX_PENDING_REQS-1);
+ }
+
+-static PEND_RING_IDX pending_prod, pending_cons;
++static pending_ring_idx_t pending_prod, pending_cons;
+ #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+ /* Freed TX SKBs get batched on this ring before return to pending_ring. */
+ static u16 dealloc_ring[MAX_PENDING_REQS];
+-static PEND_RING_IDX dealloc_prod, dealloc_cons;
++static pending_ring_idx_t dealloc_prod, dealloc_cons;
+
+ /* Doubly-linked list of in-use pending entries. */
+ static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+@@ -669,7 +669,7 @@ static void tx_credit_callback(unsigned long data)
+ netif_schedule_work(netif);
+ }
+
+-static inline int copy_pending_req(PEND_RING_IDX pending_idx)
++static inline int copy_pending_req(pending_ring_idx_t pending_idx)
+ {
+ return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
+ &mmap_pages[pending_idx]);
+@@ -680,7 +680,7 @@ inline static void net_tx_action_dealloc(void)
+ struct netbk_tx_pending_inuse *inuse, *n;
+ struct gnttab_unmap_grant_ref *gop;
+ u16 pending_idx;
+- PEND_RING_IDX dc, dp;
++ pending_ring_idx_t dc, dp;
+ struct xen_netif *netif;
+ int ret;
+ LIST_HEAD(list);
+--
+1.7.3.4
+
+
+From 56727a43f329d50c2a00fed0316ffd87d6c23ebd Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 15:31:32 -0700
+Subject: [PATCH 063/139] xen: netback: rename NR_PENDING_REQS to nr_pending_reqs()
+
+Use function syntax to show its actually computing a value, rather than
+a constant.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 10 +++++++---
+ 1 files changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 8292e96..5410a68 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -128,7 +128,11 @@ static inline pending_ring_idx_t pending_index(unsigned i)
+ }
+
+ static pending_ring_idx_t pending_prod, pending_cons;
+-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
++
++static inline pending_ring_idx_t nr_pending_reqs(void)
++{
++ return MAX_PENDING_REQS - pending_prod + pending_cons;
++}
+
+ /* Freed TX SKBs get batched on this ring before return to pending_ring. */
+ static u16 dealloc_ring[MAX_PENDING_REQS];
+@@ -167,7 +171,7 @@ static inline unsigned long alloc_mfn(void)
+ static inline void maybe_schedule_tx_action(void)
+ {
+ smp_mb();
+- if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
++ if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
+ !list_empty(&net_schedule_list))
+ tasklet_schedule(&net_tx_tasklet);
+ }
+@@ -1060,7 +1064,7 @@ static void net_tx_action(unsigned long unused)
+ net_tx_action_dealloc();
+
+ mop = tx_map_ops;
+- while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++ while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ !list_empty(&net_schedule_list)) {
+ /* Get a netif from the list with work to do. */
+ ent = net_schedule_list.next;
+--
+1.7.3.4
+
+
+From 55b360614f1bd44d0b1395b4aabf41d8f1f13f17 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 15:45:45 -0700
+Subject: [PATCH 064/139] xen: netback: pre-initialize list and spinlocks; use empty list to indicate not on list
+
+Statically pre-initialize net_schedule_list head and lock.
+
+Use an empty list to mark when a xen_netif is not on the schedule list,
+rather than NULL (which may upset list debugging).
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/interface.c | 1 +
+ drivers/xen/netback/netback.c | 12 ++++--------
+ 2 files changed, 5 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 5e0d26d..dc4fb53 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -201,6 +201,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ atomic_set(&netif->refcnt, 1);
+ init_waitqueue_head(&netif->waiting_to_free);
+ netif->dev = dev;
++ INIT_LIST_HEAD(&netif->list);
+
+ netback_carrier_off(netif);
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 5410a68..cbd4b03 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -148,8 +148,8 @@ static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+ static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+ static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+
+-static struct list_head net_schedule_list;
+-static spinlock_t net_schedule_list_lock;
++static LIST_HEAD(net_schedule_list);
++static DEFINE_SPINLOCK(net_schedule_list_lock);
+
+ #define MAX_MFN_ALLOC 64
+ static unsigned long mfn_list[MAX_MFN_ALLOC];
+@@ -588,15 +588,14 @@ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+
+ static int __on_net_schedule_list(struct xen_netif *netif)
+ {
+- return netif->list.next != NULL;
++ return !list_empty(&netif->list);
+ }
+
+ static void remove_from_net_schedule_list(struct xen_netif *netif)
+ {
+ spin_lock_irq(&net_schedule_list_lock);
+ if (likely(__on_net_schedule_list(netif))) {
+- list_del(&netif->list);
+- netif->list.next = NULL;
++ list_del_init(&netif->list);
+ netif_put(netif);
+ }
+ spin_unlock_irq(&net_schedule_list_lock);
+@@ -1466,9 +1465,6 @@ static int __init netback_init(void)
+ for (i = 0; i < MAX_PENDING_REQS; i++)
+ pending_ring[i] = i;
+
+- spin_lock_init(&net_schedule_list_lock);
+- INIT_LIST_HEAD(&net_schedule_list);
+-
+ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+ if (MODPARM_copy_skb) {
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+--
+1.7.3.4
+
+
+From e12cf57de7a6c20e4c8900ce7bf4e6924a12f49e Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 15:48:10 -0700
+Subject: [PATCH 065/139] xen: netback: remove CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+
+Keir says:
+> > Does CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER need to be a config
+> > option? Could/should we always/never set it?
+> It doesn't work well with local delivery into dom0, nor even with IP
+> fragment reassembly. I don't think we would ever turn it on these days.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 21 ---------------------
+ 1 files changed, 0 insertions(+), 21 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index cbd4b03..f00e405 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -615,23 +615,11 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+ spin_unlock_irq(&net_schedule_list_lock);
+ }
+
+-/*
+- * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
+- * If this driver is pipelining transmit requests then we can be very
+- * aggressive in avoiding new-packet notifications -- frontend only needs to
+- * send a notification if there are no outstanding unreceived responses.
+- * If we may be buffer transmit buffers for any reason then we must be rather
+- * more conservative and treat this as the final check for pending work.
+- */
+ void netif_schedule_work(struct xen_netif *netif)
+ {
+ int more_to_do;
+
+-#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+- more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
+-#else
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+-#endif
+
+ if (more_to_do) {
+ add_to_net_schedule_list_tail(netif);
+@@ -1355,15 +1343,6 @@ static void make_tx_response(struct xen_netif *netif,
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+ if (notify)
+ notify_remote_via_irq(netif->irq);
+-
+-#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+- if (i == netif->tx.req_cons) {
+- int more_to_do;
+- RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+- if (more_to_do)
+- add_to_net_schedule_list_tail(netif);
+- }
+-#endif
+ }
+
+ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+--
+1.7.3.4
+
+
+From adf542f9c714e3b7c76fcf9e44e0a89cae21a341 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 22:28:52 -0700
+Subject: [PATCH 066/139] xen: netback: make netif_get/put inlines
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h | 16 ++++++++++------
+ 1 files changed, 10 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 06f04c1..9056be0 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -183,12 +183,16 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ unsigned long rx_ring_ref, unsigned int evtchn);
+
+-#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
+-#define netif_put(_b) \
+- do { \
+- if ( atomic_dec_and_test(&(_b)->refcnt) ) \
+- wake_up(&(_b)->waiting_to_free); \
+- } while (0)
++static inline void netif_get(struct xen_netif *netif)
++{
++ atomic_inc(&netif->refcnt);
++}
++
++static inline void netif_put(struct xen_netif *netif)
++{
++ if (atomic_dec_and_test(&netif->refcnt))
++ wake_up(&netif->waiting_to_free);
++}
+
+ void netif_xenbus_init(void);
+
+--
+1.7.3.4
+
+
+From f06459a199f953a68f001f06e54dde54e1e16c87 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 19 Mar 2009 22:30:24 -0700
+Subject: [PATCH 067/139] xen: netback: move code around
+
+net_tx_action() into several functions; move variables into
+their innermost scopes; rename "i" to "idx".
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 158 ++++++++++++++++++++++++-----------------
+ 1 files changed, 94 insertions(+), 64 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index f00e405..4d63ff3 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -773,7 +773,8 @@ static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *t
+ netif_put(netif);
+ }
+
+-static int netbk_count_requests(struct xen_netif *netif, struct xen_netif_tx_request *first,
++static int netbk_count_requests(struct xen_netif *netif,
++ struct xen_netif_tx_request *first,
+ struct xen_netif_tx_request *txp, int work_to_do)
+ {
+ RING_IDX cons = netif->tx.req_cons;
+@@ -1032,30 +1033,58 @@ out:
+ return err;
+ }
+
+-/* Called after netfront has transmitted */
+-static void net_tx_action(unsigned long unused)
++static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
+ {
+- struct list_head *ent;
+- struct sk_buff *skb;
+- struct xen_netif *netif;
+- struct xen_netif_tx_request txreq;
+- struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+- struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+- u16 pending_idx;
+- RING_IDX i;
+- struct gnttab_map_grant_ref *mop;
+- unsigned int data_len;
+- int ret, work_to_do;
++ unsigned long now = jiffies;
++ unsigned long next_credit =
++ netif->credit_timeout.expires +
++ msecs_to_jiffies(netif->credit_usec / 1000);
++
++ /* Timer could already be pending in rare cases. */
++ if (timer_pending(&netif->credit_timeout))
++ return true;
++
++ /* Passed the point where we can replenish credit? */
++ if (time_after_eq(now, next_credit)) {
++ netif->credit_timeout.expires = now;
++ tx_add_credit(netif);
++ }
+
+- if (dealloc_cons != dealloc_prod)
+- net_tx_action_dealloc();
++ /* Still too big to send right now? Set a callback. */
++ if (size > netif->remaining_credit) {
++ netif->credit_timeout.data =
++ (unsigned long)netif;
++ netif->credit_timeout.function =
++ tx_credit_callback;
++ mod_timer(&netif->credit_timeout,
++ next_credit);
++
++ return true;
++ }
++
++ return false;
++}
++
++static unsigned net_tx_build_mops(void)
++{
++ struct gnttab_map_grant_ref *mop;
++ struct sk_buff *skb;
++ int ret;
+
+ mop = tx_map_ops;
+ while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ !list_empty(&net_schedule_list)) {
++ struct xen_netif *netif;
++ struct xen_netif_tx_request txreq;
++ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
++ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
++ u16 pending_idx;
++ RING_IDX idx;
++ int work_to_do;
++ unsigned int data_len;
++
+ /* Get a netif from the list with work to do. */
+- ent = net_schedule_list.next;
+- netif = list_entry(ent, struct xen_netif, list);
++ netif = list_first_entry(&net_schedule_list, struct xen_netif, list);
+ netif_get(netif);
+ remove_from_net_schedule_list(netif);
+
+@@ -1065,67 +1094,43 @@ static void net_tx_action(unsigned long unused)
+ continue;
+ }
+
+- i = netif->tx.req_cons;
++ idx = netif->tx.req_cons;
+ rmb(); /* Ensure that we see the request before we copy it. */
+- memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
++ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
+
+ /* Credit-based scheduling. */
+- if (txreq.size > netif->remaining_credit) {
+- unsigned long now = jiffies;
+- unsigned long next_credit =
+- netif->credit_timeout.expires +
+- msecs_to_jiffies(netif->credit_usec / 1000);
+-
+- /* Timer could already be pending in rare cases. */
+- if (timer_pending(&netif->credit_timeout)) {
+- netif_put(netif);
+- continue;
+- }
+-
+- /* Passed the point where we can replenish credit? */
+- if (time_after_eq(now, next_credit)) {
+- netif->credit_timeout.expires = now;
+- tx_add_credit(netif);
+- }
+-
+- /* Still too big to send right now? Set a callback. */
+- if (txreq.size > netif->remaining_credit) {
+- netif->credit_timeout.data =
+- (unsigned long)netif;
+- netif->credit_timeout.function =
+- tx_credit_callback;
+- mod_timer(&netif->credit_timeout,
+- next_credit);
+- netif_put(netif);
+- continue;
+- }
++ if (txreq.size > netif->remaining_credit &&
++ tx_credit_exceeded(netif, txreq.size)) {
++ netif_put(netif);
++ continue;
+ }
++
+ netif->remaining_credit -= txreq.size;
+
+ work_to_do--;
+- netif->tx.req_cons = ++i;
++ netif->tx.req_cons = ++idx;
+
+ memset(extras, 0, sizeof(extras));
+ if (txreq.flags & NETTXF_extra_info) {
+ work_to_do = netbk_get_extras(netif, extras,
+ work_to_do);
+- i = netif->tx.req_cons;
++ idx = netif->tx.req_cons;
+ if (unlikely(work_to_do < 0)) {
+- netbk_tx_err(netif, &txreq, i);
++ netbk_tx_err(netif, &txreq, idx);
+ continue;
+ }
+ }
+
+ ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
+ if (unlikely(ret < 0)) {
+- netbk_tx_err(netif, &txreq, i - ret);
++ netbk_tx_err(netif, &txreq, idx - ret);
+ continue;
+ }
+- i += ret;
++ idx += ret;
+
+ if (unlikely(txreq.size < ETH_HLEN)) {
+ DPRINTK("Bad packet size: %d\n", txreq.size);
+- netbk_tx_err(netif, &txreq, i);
++ netbk_tx_err(netif, &txreq, idx);
+ continue;
+ }
+
+@@ -1134,7 +1139,7 @@ static void net_tx_action(unsigned long unused)
+ DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
+ txreq.offset, txreq.size,
+ (txreq.offset &~PAGE_MASK) + txreq.size);
+- netbk_tx_err(netif, &txreq, i);
++ netbk_tx_err(netif, &txreq, idx);
+ continue;
+ }
+
+@@ -1148,7 +1153,7 @@ static void net_tx_action(unsigned long unused)
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(skb == NULL)) {
+ DPRINTK("Can't allocate a skb in start_xmit.\n");
+- netbk_tx_err(netif, &txreq, i);
++ netbk_tx_err(netif, &txreq, idx);
+ break;
+ }
+
+@@ -1161,7 +1166,7 @@ static void net_tx_action(unsigned long unused)
+
+ if (netbk_set_skb_gso(skb, gso)) {
+ kfree_skb(skb);
+- netbk_tx_err(netif, &txreq, i);
++ netbk_tx_err(netif, &txreq, idx);
+ continue;
+ }
+ }
+@@ -1199,23 +1204,27 @@ static void net_tx_action(unsigned long unused)
+
+ mop = netbk_get_requests(netif, skb, txfrags, mop);
+
+- netif->tx.req_cons = i;
++ netif->tx.req_cons = idx;
+ netif_schedule_work(netif);
+
+ if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
+ break;
+ }
+
+- if (mop == tx_map_ops)
+- return;
++ return mop - tx_map_ops;
++}
+
+- ret = HYPERVISOR_grant_table_op(
+- GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
+- BUG_ON(ret);
++static void net_tx_submit(void)
++{
++ struct gnttab_map_grant_ref *mop;
++ struct sk_buff *skb;
+
+ mop = tx_map_ops;
+ while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+ struct xen_netif_tx_request *txp;
++ struct xen_netif *netif;
++ u16 pending_idx;
++ unsigned data_len;
+
+ pending_idx = *((u16 *)skb->data);
+ netif = pending_tx_info[pending_idx].netif;
+@@ -1288,6 +1297,27 @@ static void net_tx_action(unsigned long unused)
+ }
+ }
+
++/* Called after netfront has transmitted */
++static void net_tx_action(unsigned long unused)
++{
++ unsigned nr_mops;
++ int ret;
++
++ if (dealloc_cons != dealloc_prod)
++ net_tx_action_dealloc();
++
++ nr_mops = net_tx_build_mops();
++
++ if (nr_mops == 0)
++ return;
++
++ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
++ tx_map_ops, nr_mops);
++ BUG_ON(ret);
++
++ net_tx_submit();
++}
++
+ static void netif_idx_release(u16 pending_idx)
+ {
+ static DEFINE_SPINLOCK(_lock);
+--
+1.7.3.4
+
+
+From cec84ff81d9f6ca882908572b984215529b4117b Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Fri, 20 Mar 2009 23:18:12 -0700
+Subject: [PATCH 068/139] xen: netback: document PKT_PROT_LEN
+
+Document the rationale for the existence and value of PKT_PROT_LEN.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 9 +++++++++
+ 1 files changed, 9 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 4d63ff3..80b424f 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -113,6 +113,15 @@ static inline int netif_page_index(struct page *pg)
+ return idx;
+ }
+
++/*
++ * This is the amount of packet we copy rather than map, so that the
++ * guest can't fiddle with the contents of the headers while we do
++ * packet processing on them (netfilter, routing, etc). This could
++ * probably do with being larger, since 1) 64-bytes isn't necessarily
++ * long enough to cover a full christmas-tree ip+tcp header, let alone
++ * packet contents, and 2) the data is probably in cache anyway
++ * (though perhaps some other cpu's cache).
++ */
+ #define PKT_PROT_LEN 64
+
+ static struct pending_tx_info {
+--
+1.7.3.4
+
+
+From a9402ee935757e8facebc6e886f9912c2c523da7 Mon Sep 17 00:00:00 2001
+From: Christophe Saout <chtephan at leto.intern.saout.de>
+Date: Sun, 12 Apr 2009 13:40:27 +0200
+Subject: [PATCH 069/139] xen: netback: use dev_name() instead of removed ->bus_id.
+
+Signed-off-by: Christophe Saout <chtephan at leto.intern.saout.de>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/interface.c | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index dc4fb53..3bb5c20 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -123,7 +123,7 @@ static void netbk_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+ {
+ strcpy(info->driver, "netbk");
+- strcpy(info->bus_info, dev->dev.parent->bus_id);
++ strcpy(info->bus_info, dev_name(dev->dev.parent));
+ }
+
+ static const struct netif_stat {
+--
+1.7.3.4
+
+
+From 35de1701fca19d693e9722bffbe7609caf1d5ac6 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 29 Jun 2009 14:04:23 -0700
+Subject: [PATCH 070/139] xen: netback: convert to net_device_ops
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/interface.c | 17 +++++++++++------
+ 1 files changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 3bb5c20..21c1f95 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -178,6 +178,15 @@ static struct ethtool_ops network_ethtool_ops =
+ .get_strings = netbk_get_strings,
+ };
+
++static struct net_device_ops netback_ops =
++{
++ .ndo_start_xmit = netif_be_start_xmit,
++ .ndo_get_stats = netif_be_get_stats,
++ .ndo_open = net_open,
++ .ndo_stop = net_close,
++ .ndo_change_mtu = netbk_change_mtu,
++};
++
+ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
+ {
+ int err = 0;
+@@ -213,12 +222,8 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+
+ init_timer(&netif->tx_queue_timeout);
+
+- dev->hard_start_xmit = netif_be_start_xmit;
+- dev->get_stats = netif_be_get_stats;
+- dev->open = net_open;
+- dev->stop = net_close;
+- dev->change_mtu = netbk_change_mtu;
+- dev->features = NETIF_F_IP_CSUM;
++ dev->netdev_ops = &netback_ops;
++ dev->features = NETIF_F_IP_CSUM;
+
+ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+
+--
+1.7.3.4
+
+
+From c6f3885ef05e96489025e1c1c7299aac7cf43d87 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Fri, 4 Sep 2009 14:55:43 -0700
+Subject: [PATCH 071/139] xen: netback: reinstate missing code
+
+Change c3219dc868fe3e84070d6da2d0759a834b6f7251, "Completely drop flip
+support" was a bit too aggressive in removing code, and removed a chunk
+which was used for not only flip but if a buffer crossed a page boundary.
+Reinstate that code.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 91 +++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 91 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 80b424f..7c0f05b 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -185,6 +185,82 @@ static inline void maybe_schedule_tx_action(void)
+ tasklet_schedule(&net_tx_tasklet);
+ }
+
++static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
++{
++ struct skb_shared_info *ninfo;
++ struct sk_buff *nskb;
++ unsigned long offset;
++ int ret;
++ int len;
++ int headlen;
++
++ BUG_ON(skb_shinfo(skb)->frag_list != NULL);
++
++ nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
++ if (unlikely(!nskb))
++ goto err;
++
++ skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
++ headlen = skb_end_pointer(nskb) - nskb->data;
++ if (headlen > skb_headlen(skb))
++ headlen = skb_headlen(skb);
++ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
++ BUG_ON(ret);
++
++ ninfo = skb_shinfo(nskb);
++ ninfo->gso_size = skb_shinfo(skb)->gso_size;
++ ninfo->gso_type = skb_shinfo(skb)->gso_type;
++
++ offset = headlen;
++ len = skb->len - headlen;
++
++ nskb->len = skb->len;
++ nskb->data_len = len;
++ nskb->truesize += len;
++
++ while (len) {
++ struct page *page;
++ int copy;
++ int zero;
++
++ if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
++ dump_stack();
++ goto err_free;
++ }
++
++ copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
++ zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
++
++ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
++ if (unlikely(!page))
++ goto err_free;
++
++ ret = skb_copy_bits(skb, offset, page_address(page), copy);
++ BUG_ON(ret);
++
++ ninfo->frags[ninfo->nr_frags].page = page;
++ ninfo->frags[ninfo->nr_frags].page_offset = 0;
++ ninfo->frags[ninfo->nr_frags].size = copy;
++ ninfo->nr_frags++;
++
++ offset += copy;
++ len -= copy;
++ }
++
++ offset = nskb->data - skb->data;
++
++ nskb->transport_header = skb->transport_header + offset;
++ nskb->network_header = skb->network_header + offset;
++ nskb->mac_header = skb->mac_header + offset;
++
++ return nskb;
++
++ err_free:
++ kfree_skb(nskb);
++ err:
++ return NULL;
++}
++
+ static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+ {
+ if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
+@@ -218,6 +294,21 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
+ goto drop;
+
++ /*
++ * XXX For now we also copy skbuffs whose head crosses a page
++ * boundary, because netbk_gop_skb can't handle them.
++ */
++ if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
++ struct sk_buff *nskb = netbk_copy_skb(skb);
++ if ( unlikely(nskb == NULL) )
++ goto drop;
++ /* Copy only the header fields we use in this driver. */
++ nskb->dev = skb->dev;
++ nskb->ip_summed = skb->ip_summed;
++ dev_kfree_skb(skb);
++ skb = nskb;
++ }
++
+ netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
+ !!skb_shinfo(skb)->gso_size;
+ netif_get(netif);
+--
+1.7.3.4
+
+
+From 2e290d790877df4368691180f76206ad27a42505 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Wed, 9 Sep 2009 15:19:15 -0700
+Subject: [PATCH 072/139] xen: netback: remove debug noise
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 2 --
+ 1 files changed, 0 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 7c0f05b..d7d738e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1537,8 +1537,6 @@ static int __init netback_init(void)
+ int i;
+ struct page *page;
+
+- printk(KERN_CRIT "*** netif_init\n");
+-
+ if (!xen_domain())
+ return -ENODEV;
+
+--
+1.7.3.4
+
+
+From 3ba3bb7d563704c3050de6116aa0a761a5791428 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Thu, 8 Oct 2009 13:23:09 -0400
+Subject: [PATCH 073/139] Fix compile warnings: ignoring return value of 'xenbus_register_backend' ..
+
+We neglect to check the return value of xenbus_register_backend
+and take actions when that fails. This patch fixes that and adds
+code to deal with those type of failures.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h | 2 +-
+ drivers/xen/netback/netback.c | 12 +++++++++++-
+ drivers/xen/netback/xenbus.c | 4 ++--
+ 3 files changed, 14 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 9056be0..0675946 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -194,7 +194,7 @@ static inline void netif_put(struct xen_netif *netif)
+ wake_up(&netif->waiting_to_free);
+ }
+
+-void netif_xenbus_init(void);
++int netif_xenbus_init(void);
+
+ #define netif_schedulable(netif) \
+ (netif_running((netif)->dev) && netback_carrier_ok(netif))
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index d7d738e..860c61e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1536,6 +1536,7 @@ static int __init netback_init(void)
+ {
+ int i;
+ struct page *page;
++ int rc = 0;
+
+ if (!xen_domain())
+ return -ENODEV;
+@@ -1583,7 +1584,9 @@ static int __init netback_init(void)
+
+ //netif_accel_init();
+
+- netif_xenbus_init();
++ rc = netif_xenbus_init();
++ if (rc)
++ goto failed_init;
+
+ #ifdef NETBE_DEBUG_INTERRUPT
+ (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
+@@ -1595,6 +1598,13 @@ static int __init netback_init(void)
+ #endif
+
+ return 0;
++
++failed_init:
++ free_empty_pages_and_pagevec(mmap_pages, MAX_PENDING_REQS);
++ del_timer(&netbk_tx_pending_timer);
++ del_timer(&net_timer);
++ return rc;
++
+ }
+
+ module_init(netback_init);
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index a492288..c46b235 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -447,8 +447,8 @@ static struct xenbus_driver netback = {
+ };
+
+
+-void netif_xenbus_init(void)
++int netif_xenbus_init(void)
+ {
+ printk(KERN_CRIT "registering netback\n");
+- (void)xenbus_register_backend(&netback);
++ return xenbus_register_backend(&netback);
+ }
+--
+1.7.3.4
+
+
+From 4bc919e07d5dc48cb95b22cc6e90c6110c229343 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Tue, 27 Oct 2009 12:37:50 -0700
+Subject: [PATCH 074/139] xen: netback: don't screw around with packet gso state
+
+These lines were reverted from 2.6.18 netback as the network stack
+was modified to deal with packets shorter than the gso size, so there's
+no need to fiddle with the gso state in netback.
+
+Taken from linux-2.6.18-xen.hg change 8081d19dce89
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 5 -----
+ 1 files changed, 0 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 860c61e..9a14976 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1293,11 +1293,6 @@ static unsigned net_tx_build_mops(void)
+ skb_shinfo(skb)->frags[0].page = (void *)~0UL;
+ }
+
+- if (skb->data_len < skb_shinfo(skb)->gso_size) {
+- skb_shinfo(skb)->gso_size = 0;
+- skb_shinfo(skb)->gso_type = 0;
+- }
+-
+ __skb_queue_tail(&tx_queue, skb);
+
+ pending_cons++;
+--
+1.7.3.4
+
+
+From f2b947783c47a721497e5d325c736234f71501e7 Mon Sep 17 00:00:00 2001
+From: Steven Smith <ssmith at weybridge.uk.xensource.com>
+Date: Fri, 30 Oct 2009 13:55:23 -0700
+Subject: [PATCH 075/139] xen: netback: make sure that pg->mapping is never NULL for a page mapped from a foreign domain.
+
+Otherwise, the foreign maps tracking infrastructure gets confused, and
+thinks that the foreign page is local. This means that you can't
+forward that packet to another foreign domain. This leads to very
+high packet drop, and hence very poor performance.
+
+Signed-off-by: Steven Smith <steven.smith at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 9a14976..111fec7 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -97,12 +97,12 @@ static inline unsigned long idx_to_kaddr(unsigned int idx)
+ /* extra field used in struct page */
+ static inline void netif_set_page_index(struct page *pg, unsigned int index)
+ {
+- *(unsigned long *)&pg->mapping = index;
++ *(unsigned long *)&pg->mapping = index + 1;
+ }
+
+ static inline int netif_page_index(struct page *pg)
+ {
+- unsigned long idx = (unsigned long)pg->mapping;
++ unsigned long idx = (unsigned long)pg->mapping - 1;
+
+ if (!PageForeign(pg))
+ return -1;
+--
+1.7.3.4
+
+
+From df8b27ea0fb2695842104e06caaecb55780577a7 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ijc at hellion.org.uk>
+Date: Thu, 3 Dec 2009 21:56:19 +0000
+Subject: [PATCH 076/139] xen: rename netbk module xen-netback.
+
+netbk is rather generic for a modular distro style kernel.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/Makefile | 4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
+index a01a1a3..e346e81 100644
+--- a/drivers/xen/netback/Makefile
++++ b/drivers/xen/netback/Makefile
+@@ -1,3 +1,3 @@
+-obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
++obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
+
+-netbk-y := netback.o xenbus.o interface.o
++xen-netback-y := netback.o xenbus.o interface.o
+--
+1.7.3.4
+
+
+From 279097395ad64ae4df15e206a487cd5fd3be39a8 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Tue, 16 Feb 2010 14:40:37 -0800
+Subject: [PATCH 077/139] xen: netback: use dev_get/set_drvdata() inteface
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/xenbus.c | 10 +++++-----
+ 1 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index c46b235..79e6fb0 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -35,7 +35,7 @@ static void backend_create_netif(struct backend_info *be);
+
+ static int netback_remove(struct xenbus_device *dev)
+ {
+- struct backend_info *be = dev->dev.driver_data;
++ struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+ //netback_remove_accelerators(be, dev);
+
+@@ -45,7 +45,7 @@ static int netback_remove(struct xenbus_device *dev)
+ be->netif = NULL;
+ }
+ kfree(be);
+- dev->dev.driver_data = NULL;
++ dev_set_drvdata(&dev->dev, NULL);
+ return 0;
+ }
+
+@@ -70,7 +70,7 @@ static int netback_probe(struct xenbus_device *dev,
+ }
+
+ be->dev = dev;
+- dev->dev.driver_data = be;
++ dev_set_drvdata(&dev->dev, be);
+
+ sg = 1;
+ if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
+@@ -151,7 +151,7 @@ fail:
+ */
+ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
+ {
+- struct backend_info *be = xdev->dev.driver_data;
++ struct backend_info *be = dev_get_drvdata(&xdev->dev);
+ struct xen_netif *netif = be->netif;
+ char *val;
+
+@@ -211,7 +211,7 @@ static void backend_create_netif(struct backend_info *be)
+ static void frontend_changed(struct xenbus_device *dev,
+ enum xenbus_state frontend_state)
+ {
+- struct backend_info *be = dev->dev.driver_data;
++ struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+ DPRINTK("%s", xenbus_strstate(frontend_state));
+
+--
+1.7.3.4
+
+
+From 31d0b5f5763faf607e32f3b5a0f6b37a34bbbf09 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Tue, 16 Feb 2010 14:41:12 -0800
+Subject: [PATCH 078/139] xen: netback: include linux/sched.h for TASK_* definitions
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h | 2 ++
+ 1 files changed, 2 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 0675946..d8653d3 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -38,6 +38,8 @@
+ #include <linux/netdevice.h>
+ #include <linux/etherdevice.h>
+ #include <linux/wait.h>
++#include <linux/sched.h>
++
+ #include <xen/interface/io/netif.h>
+ #include <asm/io.h>
+ #include <asm/pgalloc.h>
+--
+1.7.3.4
+
+
+From cdefc88924b3cdfcac64be737a00a4ec5593cfd5 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 23 Feb 2010 11:52:27 +0000
+Subject: [PATCH 079/139] xen: netback: remove unused xen_network_done code
+
+It has been disabled effectively forever.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c | 23 -----------------------
+ 1 files changed, 0 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 111fec7..4b24893 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -343,25 +343,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ return 0;
+ }
+
+-#if 0
+-static void xen_network_done_notify(void)
+-{
+- static struct net_device *eth0_dev = NULL;
+- if (unlikely(eth0_dev == NULL))
+- eth0_dev = __dev_get_by_name("eth0");
+- netif_rx_schedule(eth0_dev);
+-}
+-/*
+- * Add following to poll() function in NAPI driver (Tigon3 is example):
+- * if ( xen_network_done() )
+- * tg3_enable_ints(tp);
+- */
+-int xen_network_done(void)
+-{
+- return skb_queue_empty(&rx_queue);
+-}
+-#endif
+-
+ struct netrx_pending_operations {
+ unsigned trans_prod, trans_cons;
+ unsigned mmu_prod, mmu_mcl;
+@@ -664,10 +645,6 @@ static void net_rx_action(unsigned long unused)
+ /* More work to do? */
+ if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
+ tasklet_schedule(&net_rx_tasklet);
+-#if 0
+- else
+- xen_network_done_notify();
+-#endif
+ }
+
+ static void net_alarm(unsigned long unused)
+--
+1.7.3.4
+
+
+From 994be068dd9947cedcee69a7185e54738cda33d4 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 23 Feb 2010 11:58:26 +0000
+Subject: [PATCH 080/139] xen: netback: factor disconnect from backend into new function.
+
+Makes subsequent patches cleaner.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/xenbus.c | 16 ++++++++++++----
+ 1 files changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index 79e6fb0..1f36b4d4 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -205,6 +205,16 @@ static void backend_create_netif(struct backend_info *be)
+ }
+
+
++static void disconnect_backend(struct xenbus_device *dev)
++{
++ struct backend_info *be = dev_get_drvdata(&dev->dev);
++
++ if (be->netif) {
++ netif_disconnect(be->netif);
++ be->netif = NULL;
++ }
++}
++
+ /**
+ * Callback received when the frontend's state changes.
+ */
+@@ -238,11 +248,9 @@ static void frontend_changed(struct xenbus_device *dev,
+ break;
+
+ case XenbusStateClosing:
+- if (be->netif) {
++ if (be->netif)
+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+- netif_disconnect(be->netif);
+- be->netif = NULL;
+- }
++ disconnect_backend(dev);
+ xenbus_switch_state(dev, XenbusStateClosing);
+ break;
+
+--
+1.7.3.4
+
+
+From 9dcb4c18e5b29d8862cd7783d5b0040913010563 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 23 Feb 2010 12:10:24 +0000
+Subject: [PATCH 081/139] xen: netback: wait for hotplug scripts to complete before signalling connected to frontend
+
+Avoid the situation where the frontend is sending packets but the
+domain 0 bridging (or whatever) is not yet configured (because the
+hotplug scripts are too slow) and so packets get dropped.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Steven.Smith at citrix.com
+---
+ drivers/xen/netback/common.h | 2 +
+ drivers/xen/netback/xenbus.c | 45 +++++++++++++++++++++++++++++++++++++++++-
+ 2 files changed, 46 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index d8653d3..1983768 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -145,6 +145,8 @@ struct backend_info {
+ struct xenbus_device *dev;
+ struct xen_netif *netif;
+ enum xenbus_state frontend_state;
++ struct xenbus_watch hotplug_status_watch;
++ int have_hotplug_status_watch:1;
+
+ /* State relating to the netback accelerator */
+ void *netback_accel_priv;
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index 1f36b4d4..d2407cc 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -32,6 +32,7 @@
+ static int connect_rings(struct backend_info *);
+ static void connect(struct backend_info *);
+ static void backend_create_netif(struct backend_info *be);
++static void unregister_hotplug_status_watch(struct backend_info *be);
+
+ static int netback_remove(struct xenbus_device *dev)
+ {
+@@ -39,8 +40,10 @@ static int netback_remove(struct xenbus_device *dev)
+
+ //netback_remove_accelerators(be, dev);
+
++ unregister_hotplug_status_watch(be);
+ if (be->netif) {
+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
+ netif_disconnect(be->netif);
+ be->netif = NULL;
+ }
+@@ -210,6 +213,7 @@ static void disconnect_backend(struct xenbus_device *dev)
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+ if (be->netif) {
++ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
+ netif_disconnect(be->netif);
+ be->netif = NULL;
+ }
+@@ -329,6 +333,36 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+ return 0;
+ }
+
++static void unregister_hotplug_status_watch(struct backend_info *be)
++{
++ if (be->have_hotplug_status_watch) {
++ unregister_xenbus_watch(&be->hotplug_status_watch);
++ kfree(be->hotplug_status_watch.node);
++ }
++ be->have_hotplug_status_watch = 0;
++}
++
++static void hotplug_status_changed(struct xenbus_watch *watch,
++ const char **vec,
++ unsigned int vec_size)
++{
++ struct backend_info *be = container_of(watch,
++ struct backend_info,
++ hotplug_status_watch);
++ char *str;
++ unsigned int len;
++
++ str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
++ if (IS_ERR(str))
++ return;
++ if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
++ xenbus_switch_state(be->dev, XenbusStateConnected);
++ /* Not interested in this watch anymore. */
++ unregister_hotplug_status_watch(be);
++ }
++ kfree(str);
++}
++
+ static void connect(struct backend_info *be)
+ {
+ int err;
+@@ -348,7 +382,16 @@ static void connect(struct backend_info *be)
+ &be->netif->credit_usec);
+ be->netif->remaining_credit = be->netif->credit_bytes;
+
+- xenbus_switch_state(dev, XenbusStateConnected);
++ unregister_hotplug_status_watch(be);
++ err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
++ hotplug_status_changed,
++ "%s/%s", dev->nodename, "hotplug-status");
++ if (err) {
++ /* Switch now, since we can't do a watch. */
++ xenbus_switch_state(dev, XenbusStateConnected);
++ } else {
++ be->have_hotplug_status_watch = 1;
++ }
+
+ netif_wake_queue(be->netif->dev);
+ }
+--
+1.7.3.4
+
+
+From 509cc7f20f866277a8f5d5895bb266b5b68aac6d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 23 Feb 2010 12:11:51 +0000
+Subject: [PATCH 082/139] xen: netback: Always pull through PKT_PROT_LEN bytes into the linear part of an skb.
+
+Previously PKT_PROT_LEN would only have an effect on the first fragment.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c | 10 ++++++++++
+ 1 files changed, 10 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 4b24893..d4a7a56 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1334,6 +1334,16 @@ static void net_tx_submit(void)
+
+ netbk_fill_frags(skb);
+
++ /*
++ * If the initial fragment was < PKT_PROT_LEN then
++ * pull through some bytes from the other fragments to
++ * increase the linear region to PKT_PROT_LEN bytes.
++ */
++ if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
++ int target = min_t(int, skb->len, PKT_PROT_LEN);
++ __pskb_pull_tail(skb, target - skb_headlen(skb));
++ }
++
+ skb->dev = netif->dev;
+ skb->protocol = eth_type_trans(skb, skb->dev);
+
+--
+1.7.3.4
+
+
+From 673c82b5110cfffafe1e1978bc07d6d10d111d50 Mon Sep 17 00:00:00 2001
+From: Steven Smith <ssmith at xensource.com>
+Date: Tue, 23 Feb 2010 11:49:26 +0000
+Subject: [PATCH 083/139] xen: netback: try to pull a minimum of 72 bytes into the skb data area
+ when receiving a packet into netback.
+
+The previous number, 64, tended to place a fragment boundary in the middle of
+the TCP header options and led to unnecessary fragmentation in Windows <->
+Windows networking.
+
+Signed-off-by: Steven Smith <ssmith at xensource.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c | 9 +++------
+ 1 files changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index d4a7a56..44357d7 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -116,13 +116,10 @@ static inline int netif_page_index(struct page *pg)
+ /*
+ * This is the amount of packet we copy rather than map, so that the
+ * guest can't fiddle with the contents of the headers while we do
+- * packet processing on them (netfilter, routing, etc). This could
+- * probably do with being larger, since 1) 64-bytes isn't necessarily
+- * long enough to cover a full christmas-tree ip+tcp header, let alone
+- * packet contents, and 2) the data is probably in cache anyway
+- * (though perhaps some other cpu's cache).
++ * packet processing on them (netfilter, routing, etc). 72 is enough
++ * to cover TCP+IP headers including options.
+ */
+-#define PKT_PROT_LEN 64
++#define PKT_PROT_LEN 72
+
+ static struct pending_tx_info {
+ struct xen_netif_tx_request req;
+--
+1.7.3.4
+
+
+From c83bd213efd3ebf700189249c30d987b1cb14d7e Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 23 Feb 2010 11:54:30 +0000
+Subject: [PATCH 084/139] xen: netback: Allow setting of large MTU before rings have connected.
+
+This allows large MTU to be configured by the VIF hotplug
+script. Previously this would fail because at the point the hotplug
+script runs the VIF features have most likely not been negotiated with
+the frontend and so SG has not yet been enabled. Invert this behaviour
+so that SG is assumed present until negotiations prove otherwise and
+reduce MTU at that point.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/interface.c | 6 +++++-
+ drivers/xen/netback/xenbus.c | 8 +++++---
+ 2 files changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 21c1f95..b23b14d 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -104,6 +104,9 @@ static int netbk_set_sg(struct net_device *dev, u32 data)
+ return -ENOSYS;
+ }
+
++ if (dev->mtu > ETH_DATA_LEN)
++ dev->mtu = ETH_DATA_LEN;
++
+ return ethtool_op_set_sg(dev, data);
+ }
+
+@@ -207,6 +210,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ memset(netif, 0, sizeof(*netif));
+ netif->domid = domid;
+ netif->handle = handle;
++ netif->features = NETIF_F_SG;
+ atomic_set(&netif->refcnt, 1);
+ init_waitqueue_head(&netif->waiting_to_free);
+ netif->dev = dev;
+@@ -223,7 +227,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ init_timer(&netif->tx_queue_timeout);
+
+ dev->netdev_ops = &netback_ops;
+- dev->features = NETIF_F_IP_CSUM;
++ dev->features = NETIF_F_IP_CSUM|NETIF_F_SG;
+
+ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index d2407cc..fcd3c34 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -445,9 +445,11 @@ static int connect_rings(struct backend_info *be)
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
+ val = 0;
+- if (val) {
+- be->netif->features |= NETIF_F_SG;
+- be->netif->dev->features |= NETIF_F_SG;
++ if (!val) {
++ be->netif->features &= ~NETIF_F_SG;
++ be->netif->dev->features &= ~NETIF_F_SG;
++ if (be->netif->dev->mtu > ETH_DATA_LEN)
++ be->netif->dev->mtu = ETH_DATA_LEN;
+ }
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
+--
+1.7.3.4
+
+
+From e5cd35b00cb63f3a3fa1651260a58d59bbc134b7 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Fri, 19 Mar 2010 13:09:16 -0700
+Subject: [PATCH 085/139] xen: netback: use get_sset_count rather than obsolete get_stats_count
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/interface.c | 11 ++++++++---
+ 1 files changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index b23b14d..086d939 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -136,9 +136,14 @@ static const struct netif_stat {
+ { "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
+ };
+
+-static int netbk_get_stats_count(struct net_device *dev)
++static int netbk_get_sset_count(struct net_device *dev, int string_set)
+ {
+- return ARRAY_SIZE(netbk_stats);
++ switch (string_set) {
++ case ETH_SS_STATS:
++ return ARRAY_SIZE(netbk_stats);
++ default:
++ return -EINVAL;
++ }
+ }
+
+ static void netbk_get_ethtool_stats(struct net_device *dev,
+@@ -176,7 +181,7 @@ static struct ethtool_ops network_ethtool_ops =
+ .set_tso = netbk_set_tso,
+ .get_link = ethtool_op_get_link,
+
+- .get_stats_count = netbk_get_stats_count,
++ .get_sset_count = netbk_get_sset_count,
+ .get_ethtool_stats = netbk_get_ethtool_stats,
+ .get_strings = netbk_get_strings,
+ };
+--
+1.7.3.4
+
+
+From 0c34835ee66ad641f01a8077a973b7ec1bfdcd86 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 11 May 2010 09:33:42 +0100
+Subject: [PATCH 086/139] xen: netback: correctly setup skb->ip_summed on receive
+
+In 2.6.18 CHECKSUM_PARTIAL and CHECKSUM_UNNECESSARY were both synonyms for
+CHECKSUM_HW. This is no longer the case and we need to select the correct one.
+
+ data_validated csum_blank -> ip_summed
+ 0 0 CHECKSUM_NONE
+ 0 1 CHECKSUM_PARTIAL
+ 1 0 CHECKSUM_UNNECESSARY
+ 1 1 CHECKSUM_PARTIAL
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Tested-by: Matej Zary <zary at cvtisr.sk>
+Tested-by: Michael D Labriola <mlabriol at gdeb.com>
+---
+ drivers/xen/netback/netback.c | 10 +++-------
+ 1 files changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 44357d7..725da0f 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1320,14 +1320,10 @@ static void net_tx_submit(void)
+ netif_idx_release(pending_idx);
+ }
+
+- /*
+- * Old frontends do not assert data_validated but we
+- * can infer it from csum_blank so test both flags.
+- */
+- if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank))
++ if (txp->flags & NETTXF_csum_blank)
+ skb->ip_summed = CHECKSUM_PARTIAL;
+- else
+- skb->ip_summed = CHECKSUM_NONE;
++ else if (txp->flags & NETTXF_data_validated)
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ netbk_fill_frags(skb);
+
+--
+1.7.3.4
+
+
+From 094944631cc5a9d6e623302c987f78117c0bf7ac Mon Sep 17 00:00:00 2001
+From: Dongxiao Xu <dongxiao.xu at intel.com>
+Date: Wed, 19 May 2010 16:58:56 -0700
+Subject: [PATCH 087/139] xen: netback: Move global/static variables into struct xen_netbk.
+
+Bundle a lot of discrete variables into a single structure.
+
+Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h | 59 +++++++
+ drivers/xen/netback/netback.c | 360 ++++++++++++++++++++---------------------
+ 2 files changed, 232 insertions(+), 187 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 1983768..00208f4 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -222,4 +222,63 @@ static inline int netbk_can_sg(struct net_device *dev)
+ return netif->features & NETIF_F_SG;
+ }
+
++struct pending_tx_info {
++ struct xen_netif_tx_request req;
++ struct xen_netif *netif;
++};
++typedef unsigned int pending_ring_idx_t;
++
++struct netbk_rx_meta {
++ skb_frag_t frag;
++ int id;
++};
++
++struct netbk_tx_pending_inuse {
++ struct list_head list;
++ unsigned long alloc_time;
++};
++
++#define MAX_PENDING_REQS 256
++
++struct xen_netbk {
++ struct tasklet_struct net_tx_tasklet;
++ struct tasklet_struct net_rx_tasklet;
++
++ struct sk_buff_head rx_queue;
++ struct sk_buff_head tx_queue;
++
++ struct timer_list net_timer;
++ struct timer_list netbk_tx_pending_timer;
++
++ struct page **mmap_pages;
++
++ pending_ring_idx_t pending_prod;
++ pending_ring_idx_t pending_cons;
++ pending_ring_idx_t dealloc_prod;
++ pending_ring_idx_t dealloc_cons;
++
++ struct list_head pending_inuse_head;
++ struct list_head net_schedule_list;
++
++ /* Protect the net_schedule_list in netif. */
++ spinlock_t net_schedule_list_lock;
++
++ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
++ struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
++ struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
++ struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
++
++ grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
++ u16 pending_ring[MAX_PENDING_REQS];
++ u16 dealloc_ring[MAX_PENDING_REQS];
++
++ struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
++ struct mmu_update rx_mmu[NET_RX_RING_SIZE];
++ struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
++ struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
++ unsigned char rx_notify[NR_IRQS];
++ u16 notify_list[NET_RX_RING_SIZE];
++ struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++};
++
+ #endif /* __NETIF__BACKEND__COMMON_H__ */
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 725da0f..417f497 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -48,16 +48,7 @@
+
+ /*define NETBE_DEBUG_INTERRUPT*/
+
+-struct netbk_rx_meta {
+- skb_frag_t frag;
+- int id;
+-};
+-
+-struct netbk_tx_pending_inuse {
+- struct list_head list;
+- unsigned long alloc_time;
+-};
+-
++static struct xen_netbk *netbk;
+
+ static void netif_idx_release(u16 pending_idx);
+ static void make_tx_response(struct xen_netif *netif,
+@@ -71,22 +62,12 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+ u16 flags);
+
+ static void net_tx_action(unsigned long unused);
+-static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
+
+ static void net_rx_action(unsigned long unused);
+-static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
+-
+-static struct timer_list net_timer;
+-static struct timer_list netbk_tx_pending_timer;
+
+-#define MAX_PENDING_REQS 256
+-
+-static struct sk_buff_head rx_queue;
+-
+-static struct page **mmap_pages;
+ static inline unsigned long idx_to_pfn(unsigned int idx)
+ {
+- return page_to_pfn(mmap_pages[idx]);
++ return page_to_pfn(netbk->mmap_pages[idx]);
+ }
+
+ static inline unsigned long idx_to_kaddr(unsigned int idx)
+@@ -107,7 +88,7 @@ static inline int netif_page_index(struct page *pg)
+ if (!PageForeign(pg))
+ return -1;
+
+- if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg))
++ if ((idx >= MAX_PENDING_REQS) || (netbk->mmap_pages[idx] != pg))
+ return -1;
+
+ return idx;
+@@ -121,46 +102,17 @@ static inline int netif_page_index(struct page *pg)
+ */
+ #define PKT_PROT_LEN 72
+
+-static struct pending_tx_info {
+- struct xen_netif_tx_request req;
+- struct xen_netif *netif;
+-} pending_tx_info[MAX_PENDING_REQS];
+-static u16 pending_ring[MAX_PENDING_REQS];
+-typedef unsigned int pending_ring_idx_t;
+-
+ static inline pending_ring_idx_t pending_index(unsigned i)
+ {
+ return i & (MAX_PENDING_REQS-1);
+ }
+
+-static pending_ring_idx_t pending_prod, pending_cons;
+-
+ static inline pending_ring_idx_t nr_pending_reqs(void)
+ {
+- return MAX_PENDING_REQS - pending_prod + pending_cons;
++ return MAX_PENDING_REQS -
++ netbk->pending_prod + netbk->pending_cons;
+ }
+
+-/* Freed TX SKBs get batched on this ring before return to pending_ring. */
+-static u16 dealloc_ring[MAX_PENDING_REQS];
+-static pending_ring_idx_t dealloc_prod, dealloc_cons;
+-
+-/* Doubly-linked list of in-use pending entries. */
+-static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+-static LIST_HEAD(pending_inuse_head);
+-
+-static struct sk_buff_head tx_queue;
+-
+-static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+-static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+-static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+-
+-static LIST_HEAD(net_schedule_list);
+-static DEFINE_SPINLOCK(net_schedule_list_lock);
+-
+-#define MAX_MFN_ALLOC 64
+-static unsigned long mfn_list[MAX_MFN_ALLOC];
+-static unsigned int alloc_index = 0;
+-
+ /* Setting this allows the safe use of this driver without netloop. */
+ static int MODPARM_copy_skb = 1;
+ module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
+@@ -168,18 +120,12 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+
+ int netbk_copy_skb_mode;
+
+-static inline unsigned long alloc_mfn(void)
+-{
+- BUG_ON(alloc_index == 0);
+- return mfn_list[--alloc_index];
+-}
+-
+ static inline void maybe_schedule_tx_action(void)
+ {
+ smp_mb();
+ if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
+- !list_empty(&net_schedule_list))
+- tasklet_schedule(&net_tx_tasklet);
++ !list_empty(&netbk->net_schedule_list))
++ tasklet_schedule(&netbk->net_tx_tasklet);
+ }
+
+ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+@@ -328,9 +274,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
+ }
+ }
+-
+- skb_queue_tail(&rx_queue, skb);
+- tasklet_schedule(&net_rx_tasklet);
++ skb_queue_tail(&netbk->rx_queue, skb);
++ tasklet_schedule(&netbk->net_rx_tasklet);
+
+ return 0;
+
+@@ -372,7 +317,7 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
+ copy_gop = npo->copy + npo->copy_prod++;
+ copy_gop->flags = GNTCOPY_dest_gref;
+ if (idx > -1) {
+- struct pending_tx_info *src_pend = &pending_tx_info[idx];
++ struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
+ copy_gop->source.domid = src_pend->netif->domid;
+ copy_gop->source.u.ref = src_pend->req.gref;
+ copy_gop->flags |= GNTCOPY_source_gref;
+@@ -487,30 +432,19 @@ static void net_rx_action(unsigned long unused)
+ int count;
+ unsigned long offset;
+
+- /*
+- * Putting hundreds of bytes on the stack is considered rude.
+- * Static works because a tasklet can only be on one CPU at any time.
+- */
+- static struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
+- static struct mmu_update rx_mmu[NET_RX_RING_SIZE];
+- static struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
+- static struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
+- static unsigned char rx_notify[NR_IRQS];
+- static u16 notify_list[NET_RX_RING_SIZE];
+- static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+-
+ struct netrx_pending_operations npo = {
+- mmu: rx_mmu,
+- trans: grant_trans_op,
+- copy: grant_copy_op,
+- mcl: rx_mcl,
+- meta: meta};
++ .mmu = netbk->rx_mmu,
++ .trans = netbk->grant_trans_op,
++ .copy = netbk->grant_copy_op,
++ .mcl = netbk->rx_mcl,
++ .meta = netbk->meta,
++ };
+
+ skb_queue_head_init(&rxq);
+
+ count = 0;
+
+- while ((skb = skb_dequeue(&rx_queue)) != NULL) {
++ while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ *(int *)skb->cb = nr_frags;
+
+@@ -525,39 +459,39 @@ static void net_rx_action(unsigned long unused)
+ break;
+ }
+
+- BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
++ BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
+
+ npo.mmu_mcl = npo.mcl_prod;
+ if (npo.mcl_prod) {
+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
+- BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
++ BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu));
+ mcl = npo.mcl + npo.mcl_prod++;
+
+ BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
+ mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+
+ mcl->op = __HYPERVISOR_mmu_update;
+- mcl->args[0] = (unsigned long)rx_mmu;
++ mcl->args[0] = (unsigned long)netbk->rx_mmu;
+ mcl->args[1] = npo.mmu_prod;
+ mcl->args[2] = 0;
+ mcl->args[3] = DOMID_SELF;
+ }
+
+ if (npo.trans_prod) {
+- BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
++ BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op));
+ mcl = npo.mcl + npo.mcl_prod++;
+ mcl->op = __HYPERVISOR_grant_table_op;
+ mcl->args[0] = GNTTABOP_transfer;
+- mcl->args[1] = (unsigned long)grant_trans_op;
++ mcl->args[1] = (unsigned long)netbk->grant_trans_op;
+ mcl->args[2] = npo.trans_prod;
+ }
+
+ if (npo.copy_prod) {
+- BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
++ BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
+ mcl = npo.mcl + npo.mcl_prod++;
+ mcl->op = __HYPERVISOR_grant_table_op;
+ mcl->args[0] = GNTTABOP_copy;
+- mcl->args[1] = (unsigned long)grant_copy_op;
++ mcl->args[1] = (unsigned long)netbk->grant_copy_op;
+ mcl->args[2] = npo.copy_prod;
+ }
+
+@@ -565,7 +499,7 @@ static void net_rx_action(unsigned long unused)
+ if (!npo.mcl_prod)
+ return;
+
+- BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
++ BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl));
+
+ ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
+ BUG_ON(ret != 0);
+@@ -582,7 +516,7 @@ static void net_rx_action(unsigned long unused)
+
+ status = netbk_check_gop(nr_frags, netif->domid, &npo);
+
+- id = meta[npo.meta_cons].id;
++ id = netbk->meta[npo.meta_cons].id;
+ flags = nr_frags ? NETRXF_more_data : 0;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+@@ -595,7 +529,7 @@ static void net_rx_action(unsigned long unused)
+ resp = make_rx_response(netif, id, status, offset,
+ skb_headlen(skb), flags);
+
+- if (meta[npo.meta_cons].frag.size) {
++ if (netbk->meta[npo.meta_cons].frag.size) {
+ struct xen_netif_extra_info *gso =
+ (struct xen_netif_extra_info *)
+ RING_GET_RESPONSE(&netif->rx,
+@@ -603,7 +537,7 @@ static void net_rx_action(unsigned long unused)
+
+ resp->flags |= NETRXF_extra_info;
+
+- gso->u.gso.size = meta[npo.meta_cons].frag.size;
++ gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size;
+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+ gso->u.gso.pad = 0;
+ gso->u.gso.features = 0;
+@@ -613,14 +547,14 @@ static void net_rx_action(unsigned long unused)
+ }
+
+ netbk_add_frag_responses(netif, status,
+- meta + npo.meta_cons + 1,
+- nr_frags);
++ netbk->meta + npo.meta_cons + 1,
++ nr_frags);
+
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+ irq = netif->irq;
+- if (ret && !rx_notify[irq]) {
+- rx_notify[irq] = 1;
+- notify_list[notify_nr++] = irq;
++ if (ret && !netbk->rx_notify[irq]) {
++ netbk->rx_notify[irq] = 1;
++ netbk->notify_list[notify_nr++] = irq;
+ }
+
+ if (netif_queue_stopped(netif->dev) &&
+@@ -634,24 +568,25 @@ static void net_rx_action(unsigned long unused)
+ }
+
+ while (notify_nr != 0) {
+- irq = notify_list[--notify_nr];
+- rx_notify[irq] = 0;
++ irq = netbk->notify_list[--notify_nr];
++ netbk->rx_notify[irq] = 0;
+ notify_remote_via_irq(irq);
+ }
+
+ /* More work to do? */
+- if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
+- tasklet_schedule(&net_rx_tasklet);
++ if (!skb_queue_empty(&netbk->rx_queue) &&
++ !timer_pending(&netbk->net_timer))
++ tasklet_schedule(&netbk->net_rx_tasklet);
+ }
+
+ static void net_alarm(unsigned long unused)
+ {
+- tasklet_schedule(&net_rx_tasklet);
++ tasklet_schedule(&netbk->net_rx_tasklet);
+ }
+
+ static void netbk_tx_pending_timeout(unsigned long unused)
+ {
+- tasklet_schedule(&net_tx_tasklet);
++ tasklet_schedule(&netbk->net_tx_tasklet);
+ }
+
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+@@ -667,12 +602,12 @@ static int __on_net_schedule_list(struct xen_netif *netif)
+
+ static void remove_from_net_schedule_list(struct xen_netif *netif)
+ {
+- spin_lock_irq(&net_schedule_list_lock);
++ spin_lock_irq(&netbk->net_schedule_list_lock);
+ if (likely(__on_net_schedule_list(netif))) {
+ list_del_init(&netif->list);
+ netif_put(netif);
+ }
+- spin_unlock_irq(&net_schedule_list_lock);
++ spin_unlock_irq(&netbk->net_schedule_list_lock);
+ }
+
+ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+@@ -680,13 +615,13 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+ if (__on_net_schedule_list(netif))
+ return;
+
+- spin_lock_irq(&net_schedule_list_lock);
++ spin_lock_irq(&netbk->net_schedule_list_lock);
+ if (!__on_net_schedule_list(netif) &&
+ likely(netif_schedulable(netif))) {
+- list_add_tail(&netif->list, &net_schedule_list);
++ list_add_tail(&netif->list, &netbk->net_schedule_list);
+ netif_get(netif);
+ }
+- spin_unlock_irq(&net_schedule_list_lock);
++ spin_unlock_irq(&netbk->net_schedule_list_lock);
+ }
+
+ void netif_schedule_work(struct xen_netif *netif)
+@@ -736,8 +671,9 @@ static void tx_credit_callback(unsigned long data)
+
+ static inline int copy_pending_req(pending_ring_idx_t pending_idx)
+ {
+- return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
+- &mmap_pages[pending_idx]);
++ return gnttab_copy_grant_page(
++ netbk->grant_tx_handle[pending_idx],
++ &netbk->mmap_pages[pending_idx]);
+ }
+
+ inline static void net_tx_action_dealloc(void)
+@@ -750,22 +686,24 @@ inline static void net_tx_action_dealloc(void)
+ int ret;
+ LIST_HEAD(list);
+
+- dc = dealloc_cons;
+- gop = tx_unmap_ops;
++ dc = netbk->dealloc_cons;
++ gop = netbk->tx_unmap_ops;
+
+ /*
+ * Free up any grants we have finished using
+ */
+ do {
+- dp = dealloc_prod;
++ dp = netbk->dealloc_prod;
+
+ /* Ensure we see all indices enqueued by netif_idx_release(). */
+ smp_rmb();
+
+ while (dc != dp) {
+ unsigned long pfn;
++ struct netbk_tx_pending_inuse *pending_inuse =
++ netbk->pending_inuse;
+
+- pending_idx = dealloc_ring[pending_index(dc++)];
++ pending_idx = netbk->dealloc_ring[pending_index(dc++)];
+ list_move_tail(&pending_inuse[pending_idx].list, &list);
+
+ pfn = idx_to_pfn(pending_idx);
+@@ -773,22 +711,27 @@ inline static void net_tx_action_dealloc(void)
+ if (!phys_to_machine_mapping_valid(pfn))
+ continue;
+
+- gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
+- GNTMAP_host_map,
+- grant_tx_handle[pending_idx]);
++ gnttab_set_unmap_op(gop,
++ idx_to_kaddr(pending_idx),
++ GNTMAP_host_map,
++ netbk->grant_tx_handle[pending_idx]);
+ gop++;
+ }
+
+ if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
+- list_empty(&pending_inuse_head))
++ list_empty(&netbk->pending_inuse_head))
+ break;
+
+ /* Copy any entries that have been pending for too long. */
+- list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
++ list_for_each_entry_safe(inuse, n,
++ &netbk->pending_inuse_head, list) {
++ struct pending_tx_info *pending_tx_info;
++ pending_tx_info = netbk->pending_tx_info;
++
+ if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+ break;
+
+- pending_idx = inuse - pending_inuse;
++ pending_idx = inuse - netbk->pending_inuse;
+
+ pending_tx_info[pending_idx].netif->nr_copied_skbs++;
+
+@@ -805,16 +748,21 @@ inline static void net_tx_action_dealloc(void)
+
+ break;
+ }
+- } while (dp != dealloc_prod);
++ } while (dp != netbk->dealloc_prod);
+
+- dealloc_cons = dc;
++ netbk->dealloc_cons = dc;
+
+ ret = HYPERVISOR_grant_table_op(
+- GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
++ GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
++ gop - netbk->tx_unmap_ops);
+ BUG_ON(ret);
+
+ list_for_each_entry_safe(inuse, n, &list, list) {
+- pending_idx = inuse - pending_inuse;
++ struct pending_tx_info *pending_tx_info;
++ pending_ring_idx_t index;
++
++ pending_tx_info = netbk->pending_tx_info;
++ pending_idx = inuse - netbk->pending_inuse;
+
+ netif = pending_tx_info[pending_idx].netif;
+
+@@ -822,9 +770,10 @@ inline static void net_tx_action_dealloc(void)
+ NETIF_RSP_OKAY);
+
+ /* Ready for next use. */
+- gnttab_reset_grant_page(mmap_pages[pending_idx]);
++ gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
+
+- pending_ring[pending_index(pending_prod++)] = pending_idx;
++ index = pending_index(netbk->pending_prod++);
++ netbk->pending_ring[index] = pending_idx;
+
+ netif_put(netif);
+
+@@ -832,7 +781,8 @@ inline static void net_tx_action_dealloc(void)
+ }
+ }
+
+-static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *txp, RING_IDX end)
++static void netbk_tx_err(struct xen_netif *netif,
++ struct xen_netif_tx_request *txp, RING_IDX end)
+ {
+ RING_IDX cons = netif->tx.req_cons;
+
+@@ -902,7 +852,12 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+ for (i = start; i < shinfo->nr_frags; i++, txp++) {
+- pending_idx = pending_ring[pending_index(pending_cons++)];
++ pending_ring_idx_t index;
++ struct pending_tx_info *pending_tx_info =
++ netbk->pending_tx_info;
++
++ index = pending_index(netbk->pending_cons++);
++ pending_idx = netbk->pending_ring[index];
+
+ gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
+ GNTMAP_host_map | GNTMAP_readonly,
+@@ -922,6 +877,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ {
+ struct gnttab_map_grant_ref *mop = *mopp;
+ int pending_idx = *((u16 *)skb->data);
++ struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
+ struct xen_netif *netif = pending_tx_info[pending_idx].netif;
+ struct xen_netif_tx_request *txp;
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+@@ -931,15 +887,17 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ /* Check status of header. */
+ err = mop->status;
+ if (unlikely(err)) {
++ pending_ring_idx_t index;
++ index = pending_index(netbk->pending_prod++);
+ txp = &pending_tx_info[pending_idx].req;
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
+- pending_ring[pending_index(pending_prod++)] = pending_idx;
++ netbk->pending_ring[index] = pending_idx;
+ netif_put(netif);
+ } else {
+ set_phys_to_machine(
+ __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
+ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+- grant_tx_handle[pending_idx] = mop->handle;
++ netbk->grant_tx_handle[pending_idx] = mop->handle;
+ }
+
+ /* Skip first skb fragment if it is on same page as header fragment. */
+@@ -947,16 +905,19 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+
+ for (i = start; i < nr_frags; i++) {
+ int j, newerr;
++ pending_ring_idx_t index;
+
+ pending_idx = (unsigned long)shinfo->frags[i].page;
+
+ /* Check error status: if okay then remember grant handle. */
+ newerr = (++mop)->status;
+ if (likely(!newerr)) {
++ unsigned long addr;
++ addr = idx_to_kaddr(pending_idx);
+ set_phys_to_machine(
+- __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
++ __pa(addr)>>PAGE_SHIFT,
+ FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+- grant_tx_handle[pending_idx] = mop->handle;
++ netbk->grant_tx_handle[pending_idx] = mop->handle;
+ /* Had a previous error? Invalidate this fragment. */
+ if (unlikely(err))
+ netif_idx_release(pending_idx);
+@@ -964,9 +925,10 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ }
+
+ /* Error on this fragment: respond to client with an error. */
+- txp = &pending_tx_info[pending_idx].req;
++ txp = &netbk->pending_tx_info[pending_idx].req;
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
+- pending_ring[pending_index(pending_prod++)] = pending_idx;
++ index = pending_index(netbk->pending_prod++);
++ netbk->pending_ring[index] = pending_idx;
+ netif_put(netif);
+
+ /* Not the first error? Preceding frags already invalidated. */
+@@ -1002,11 +964,11 @@ static void netbk_fill_frags(struct sk_buff *skb)
+
+ pending_idx = (unsigned long)frag->page;
+
+- pending_inuse[pending_idx].alloc_time = jiffies;
+- list_add_tail(&pending_inuse[pending_idx].list,
+- &pending_inuse_head);
++ netbk->pending_inuse[pending_idx].alloc_time = jiffies;
++ list_add_tail(&netbk->pending_inuse[pending_idx].list,
++ &netbk->pending_inuse_head);
+
+- txp = &pending_tx_info[pending_idx].req;
++ txp = &netbk->pending_tx_info[pending_idx].req;
+ frag->page = virt_to_page(idx_to_kaddr(pending_idx));
+ frag->size = txp->size;
+ frag->page_offset = txp->offset;
+@@ -1145,9 +1107,9 @@ static unsigned net_tx_build_mops(void)
+ struct sk_buff *skb;
+ int ret;
+
+- mop = tx_map_ops;
++ mop = netbk->tx_map_ops;
+ while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+- !list_empty(&net_schedule_list)) {
++ !list_empty(&netbk->net_schedule_list)) {
+ struct xen_netif *netif;
+ struct xen_netif_tx_request txreq;
+ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+@@ -1156,9 +1118,11 @@ static unsigned net_tx_build_mops(void)
+ RING_IDX idx;
+ int work_to_do;
+ unsigned int data_len;
++ pending_ring_idx_t index;
+
+ /* Get a netif from the list with work to do. */
+- netif = list_first_entry(&net_schedule_list, struct xen_netif, list);
++ netif = list_first_entry(&netbk->net_schedule_list,
++ struct xen_netif, list);
+ netif_get(netif);
+ remove_from_net_schedule_list(netif);
+
+@@ -1217,7 +1181,8 @@ static unsigned net_tx_build_mops(void)
+ continue;
+ }
+
+- pending_idx = pending_ring[pending_index(pending_cons)];
++ index = pending_index(netbk->pending_cons);
++ pending_idx = netbk->pending_ring[index];
+
+ data_len = (txreq.size > PKT_PROT_LEN &&
+ ret < MAX_SKB_FRAGS) ?
+@@ -1250,9 +1215,9 @@ static unsigned net_tx_build_mops(void)
+ txreq.gref, netif->domid);
+ mop++;
+
+- memcpy(&pending_tx_info[pending_idx].req,
++ memcpy(&netbk->pending_tx_info[pending_idx].req,
+ &txreq, sizeof(txreq));
+- pending_tx_info[pending_idx].netif = netif;
++ netbk->pending_tx_info[pending_idx].netif = netif;
+ *((u16 *)skb->data) = pending_idx;
+
+ __skb_put(skb, data_len);
+@@ -1267,20 +1232,20 @@ static unsigned net_tx_build_mops(void)
+ skb_shinfo(skb)->frags[0].page = (void *)~0UL;
+ }
+
+- __skb_queue_tail(&tx_queue, skb);
++ __skb_queue_tail(&netbk->tx_queue, skb);
+
+- pending_cons++;
++ netbk->pending_cons++;
+
+ mop = netbk_get_requests(netif, skb, txfrags, mop);
+
+ netif->tx.req_cons = idx;
+ netif_schedule_work(netif);
+
+- if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
++ if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
+ break;
+ }
+
+- return mop - tx_map_ops;
++ return mop - netbk->tx_map_ops;
+ }
+
+ static void net_tx_submit(void)
+@@ -1288,16 +1253,16 @@ static void net_tx_submit(void)
+ struct gnttab_map_grant_ref *mop;
+ struct sk_buff *skb;
+
+- mop = tx_map_ops;
+- while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
++ mop = netbk->tx_map_ops;
++ while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
+ struct xen_netif_tx_request *txp;
+ struct xen_netif *netif;
+ u16 pending_idx;
+ unsigned data_len;
+
+ pending_idx = *((u16 *)skb->data);
+- netif = pending_tx_info[pending_idx].netif;
+- txp = &pending_tx_info[pending_idx].req;
++ netif = netbk->pending_tx_info[pending_idx].netif;
++ txp = &netbk->pending_tx_info[pending_idx].req;
+
+ /* Check the remap error code. */
+ if (unlikely(netbk_tx_check_mop(skb, &mop))) {
+@@ -1363,12 +1328,13 @@ static void net_tx_submit(void)
+ }
+
+ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+- !list_empty(&pending_inuse_head)) {
++ !list_empty(&netbk->pending_inuse_head)) {
+ struct netbk_tx_pending_inuse *oldest;
+
+- oldest = list_entry(pending_inuse_head.next,
++ oldest = list_entry(netbk->pending_inuse_head.next,
+ struct netbk_tx_pending_inuse, list);
+- mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
++ mod_timer(&netbk->netbk_tx_pending_timer,
++ oldest->alloc_time + HZ);
+ }
+ }
+
+@@ -1378,7 +1344,7 @@ static void net_tx_action(unsigned long unused)
+ unsigned nr_mops;
+ int ret;
+
+- if (dealloc_cons != dealloc_prod)
++ if (netbk->dealloc_cons != netbk->dealloc_prod)
+ net_tx_action_dealloc();
+
+ nr_mops = net_tx_build_mops();
+@@ -1387,7 +1353,7 @@ static void net_tx_action(unsigned long unused)
+ return;
+
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+- tx_map_ops, nr_mops);
++ netbk->tx_map_ops, nr_mops);
+ BUG_ON(ret);
+
+ net_tx_submit();
+@@ -1397,15 +1363,17 @@ static void netif_idx_release(u16 pending_idx)
+ {
+ static DEFINE_SPINLOCK(_lock);
+ unsigned long flags;
++ pending_ring_idx_t index;
+
+ spin_lock_irqsave(&_lock, flags);
+- dealloc_ring[pending_index(dealloc_prod)] = pending_idx;
++ index = pending_index(netbk->dealloc_prod);
++ netbk->dealloc_ring[index] = pending_idx;
+ /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+ smp_wmb();
+- dealloc_prod++;
++ netbk->dealloc_prod++;
+ spin_unlock_irqrestore(&_lock, flags);
+
+- tasklet_schedule(&net_tx_tasklet);
++ tasklet_schedule(&netbk->net_tx_tasklet);
+ }
+
+ static void netif_page_release(struct page *page, unsigned int order)
+@@ -1481,9 +1449,9 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+ int i = 0;
+
+ printk(KERN_ALERT "netif_schedule_list:\n");
+- spin_lock_irq(&net_schedule_list_lock);
++ spin_lock_irq(&netbk->net_schedule_list_lock);
+
+- list_for_each (ent, &net_schedule_list) {
++ list_for_each(ent, &netbk->net_schedule_list) {
+ netif = list_entry(ent, struct xen_netif, list);
+ printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+ "rx_resp_prod=%08x\n",
+@@ -1500,7 +1468,7 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+ i++;
+ }
+
+- spin_unlock_irq(&net_schedule_list_lock);
++ spin_unlock_irq(&netbk->net_schedule_list_lock);
+ printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+
+ return IRQ_HANDLED;
+@@ -1516,37 +1484,53 @@ static int __init netback_init(void)
+ if (!xen_domain())
+ return -ENODEV;
+
++ netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk));
++ if (!netbk) {
++ printk(KERN_ALERT "%s: out of memory\n", __func__);
++ return -ENOMEM;
++ }
++
+ /* We can increase reservation by this much in net_rx_action(). */
+ // balloon_update_driver_allowance(NET_RX_RING_SIZE);
+
+- skb_queue_head_init(&rx_queue);
+- skb_queue_head_init(&tx_queue);
++ skb_queue_head_init(&netbk->rx_queue);
++ skb_queue_head_init(&netbk->tx_queue);
+
+- init_timer(&net_timer);
+- net_timer.data = 0;
+- net_timer.function = net_alarm;
++ init_timer(&netbk->net_timer);
++ netbk->net_timer.data = 0;
++ netbk->net_timer.function = net_alarm;
+
+- init_timer(&netbk_tx_pending_timer);
+- netbk_tx_pending_timer.data = 0;
+- netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
++ init_timer(&netbk->netbk_tx_pending_timer);
++ netbk->netbk_tx_pending_timer.data = 0;
++ netbk->netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
+
+- mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+- if (mmap_pages == NULL) {
+- printk("%s: out of memory\n", __FUNCTION__);
+- return -ENOMEM;
++ netbk->mmap_pages =
++ alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
++ if (!netbk->mmap_pages) {
++ printk(KERN_ALERT "%s: out of memory\n", __func__);
++ rc = -ENOMEM;
++ goto failed_init2;
+ }
+
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
+- page = mmap_pages[i];
++ page = netbk->mmap_pages[i];
+ SetPageForeign(page, netif_page_release);
+ netif_set_page_index(page, i);
+- INIT_LIST_HEAD(&pending_inuse[i].list);
++ INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+ }
+
+- pending_cons = 0;
+- pending_prod = MAX_PENDING_REQS;
++ netbk->pending_cons = 0;
++ netbk->pending_prod = MAX_PENDING_REQS;
+ for (i = 0; i < MAX_PENDING_REQS; i++)
+- pending_ring[i] = i;
++ netbk->pending_ring[i] = i;
++
++ tasklet_init(&netbk->net_tx_tasklet, net_tx_action, 0);
++ tasklet_init(&netbk->net_rx_tasklet, net_rx_action, 0);
++
++ INIT_LIST_HEAD(&netbk->pending_inuse_head);
++ INIT_LIST_HEAD(&netbk->net_schedule_list);
++
++ spin_lock_init(&netbk->net_schedule_list_lock);
+
+ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+ if (MODPARM_copy_skb) {
+@@ -1561,7 +1545,7 @@ static int __init netback_init(void)
+
+ rc = netif_xenbus_init();
+ if (rc)
+- goto failed_init;
++ goto failed_init1;
+
+ #ifdef NETBE_DEBUG_INTERRUPT
+ (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
+@@ -1574,10 +1558,12 @@ static int __init netback_init(void)
+
+ return 0;
+
+-failed_init:
+- free_empty_pages_and_pagevec(mmap_pages, MAX_PENDING_REQS);
+- del_timer(&netbk_tx_pending_timer);
+- del_timer(&net_timer);
++failed_init1:
++ free_empty_pages_and_pagevec(netbk->mmap_pages, MAX_PENDING_REQS);
++failed_init2:
++ del_timer(&netbk->netbk_tx_pending_timer);
++ del_timer(&netbk->net_timer);
++ vfree(netbk);
+ return rc;
+
+ }
+--
+1.7.3.4
+
+
+From c099c22d8b1c12fc7d68998982eb4ccd4918e813 Mon Sep 17 00:00:00 2001
+From: Dongxiao Xu <dongxiao.xu at intel.com>
+Date: Wed, 19 May 2010 16:58:57 -0700
+Subject: [PATCH 088/139] xen: netback: Introduce a new struct type page_ext.
+
+struct page_ext is used to store the group and idx information by
+which a specified page could be identified.
+
+Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h | 15 +++++++++++++++
+ drivers/xen/netback/netback.c | 28 +++++++++++++++++-----------
+ 2 files changed, 32 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 00208f4..5e0e467 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -240,6 +240,21 @@ struct netbk_tx_pending_inuse {
+
+ #define MAX_PENDING_REQS 256
+
++/* extra field used in struct page */
++union page_ext {
++ struct {
++#if BITS_PER_LONG < 64
++#define IDX_WIDTH 8
++#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
++ unsigned int group:GROUP_WIDTH;
++ unsigned int idx:IDX_WIDTH;
++#else
++ unsigned int group, idx;
++#endif
++ } e;
++ void *mapping;
++};
++
+ struct xen_netbk {
+ struct tasklet_struct net_tx_tasklet;
+ struct tasklet_struct net_rx_tasklet;
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 417f497..71ec999 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -76,22 +76,27 @@ static inline unsigned long idx_to_kaddr(unsigned int idx)
+ }
+
+ /* extra field used in struct page */
+-static inline void netif_set_page_index(struct page *pg, unsigned int index)
++static inline void netif_set_page_ext(struct page *pg, unsigned int group,
++ unsigned int idx)
+ {
+- *(unsigned long *)&pg->mapping = index + 1;
++ union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
++
++ BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
++ pg->mapping = ext.mapping;
+ }
+
+-static inline int netif_page_index(struct page *pg)
++static inline unsigned int netif_page_group(const struct page *pg)
+ {
+- unsigned long idx = (unsigned long)pg->mapping - 1;
++ union page_ext ext = { .mapping = pg->mapping };
+
+- if (!PageForeign(pg))
+- return -1;
++ return ext.e.group - 1;
++}
+
+- if ((idx >= MAX_PENDING_REQS) || (netbk->mmap_pages[idx] != pg))
+- return -1;
++static inline unsigned int netif_page_index(const struct page *pg)
++{
++ union page_ext ext = { .mapping = pg->mapping };
+
+- return idx;
++ return ext.e.idx;
+ }
+
+ /*
+@@ -1380,7 +1385,8 @@ static void netif_page_release(struct page *page, unsigned int order)
+ {
+ int idx = netif_page_index(page);
+ BUG_ON(order);
+- BUG_ON(idx < 0);
++ BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
++ BUG_ON(netbk->mmap_pages[idx] != page);
+ netif_idx_release(idx);
+ }
+
+@@ -1515,7 +1521,7 @@ static int __init netback_init(void)
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
+ page = netbk->mmap_pages[i];
+ SetPageForeign(page, netif_page_release);
+- netif_set_page_index(page, i);
++ netif_set_page_ext(page, 0, i);
+ INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+ }
+
+--
+1.7.3.4
+
+
+From 9534985c5b9cc3f6238d6cb8bba7d376e82039d3 Mon Sep 17 00:00:00 2001
+From: Dongxiao Xu <dongxiao.xu at intel.com>
+Date: Wed, 19 May 2010 17:08:21 -0700
+Subject: [PATCH 089/139] xen: netback: Multiple tasklets support.
+
+Now netback uses one pair of tasklets for Tx/Rx data transaction.
+Netback tasklet could only run at one CPU at a time, and it is
+used to serve all the netfronts. Therefore it has become a
+performance bottle neck. This patch is to use multiple tasklet
+pairs to replace the current single pair in dom0.
+
+Assuming that Dom0 has CPUNR VCPUs, we define CPUNR kinds of
+tasklets pair (CPUNR for Tx, and CPUNR for Rx). Each pare of
+tasklets serve specific group of netfronts. Also for those global
+and static variables, we duplicated them for each group in
+order to avoid the spinlock.
+
+Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h | 6 +
+ drivers/xen/netback/interface.c | 27 ++++
+ drivers/xen/netback/netback.c | 270 ++++++++++++++++++++++++---------------
+ 3 files changed, 197 insertions(+), 106 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 5e0e467..847ba58 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -58,6 +58,7 @@
+ struct xen_netif {
+ /* Unique identifier for this interface. */
+ domid_t domid;
++ int group;
+ unsigned int handle;
+
+ u8 fe_dev_addr[6];
+@@ -278,6 +279,8 @@ struct xen_netbk {
+ /* Protect the net_schedule_list in netif. */
+ spinlock_t net_schedule_list_lock;
+
++ atomic_t netfront_count;
++
+ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+ struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+ struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+@@ -296,4 +299,7 @@ struct xen_netbk {
+ struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+ };
+
++extern struct xen_netbk *xen_netbk;
++extern int xen_netbk_group_nr;
++
+ #endif /* __NETIF__BACKEND__COMMON_H__ */
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 086d939..172ef4c 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -54,8 +54,33 @@
+ static unsigned long netbk_queue_length = 32;
+ module_param_named(queue_length, netbk_queue_length, ulong, 0644);
+
++static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
++ struct xen_netif *netif)
++{
++ int i;
++ int min_netfront_count;
++ int min_group = 0;
++ min_netfront_count = atomic_read(&netbk[0].netfront_count);
++ for (i = 0; i < group_nr; i++) {
++ int netfront_count = atomic_read(&netbk[i].netfront_count);
++ if (netfront_count < min_netfront_count) {
++ min_group = i;
++ min_netfront_count = netfront_count;
++ }
++ }
++
++ netif->group = min_group;
++ atomic_inc(&netbk[netif->group].netfront_count);
++}
++
++static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
++{
++ atomic_dec(&netbk[netif->group].netfront_count);
++}
++
+ static void __netif_up(struct xen_netif *netif)
+ {
++ netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
+ enable_irq(netif->irq);
+ netif_schedule_work(netif);
+ }
+@@ -64,6 +89,7 @@ static void __netif_down(struct xen_netif *netif)
+ {
+ disable_irq(netif->irq);
+ netif_deschedule_work(netif);
++ netbk_remove_netif(xen_netbk, netif);
+ }
+
+ static int net_open(struct net_device *dev)
+@@ -214,6 +240,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ netif = netdev_priv(dev);
+ memset(netif, 0, sizeof(*netif));
+ netif->domid = domid;
++ netif->group = -1;
+ netif->handle = handle;
+ netif->features = NETIF_F_SG;
+ atomic_set(&netif->refcnt, 1);
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 71ec999..feefb14 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -48,9 +48,10 @@
+
+ /*define NETBE_DEBUG_INTERRUPT*/
+
+-static struct xen_netbk *netbk;
++struct xen_netbk *xen_netbk;
++int xen_netbk_group_nr;
+
+-static void netif_idx_release(u16 pending_idx);
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
+ static void make_tx_response(struct xen_netif *netif,
+ struct xen_netif_tx_request *txp,
+ s8 st);
+@@ -61,18 +62,20 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+ u16 size,
+ u16 flags);
+
+-static void net_tx_action(unsigned long unused);
++static void net_tx_action(unsigned long data);
+
+-static void net_rx_action(unsigned long unused);
++static void net_rx_action(unsigned long data);
+
+-static inline unsigned long idx_to_pfn(unsigned int idx)
++static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
++ unsigned int idx)
+ {
+ return page_to_pfn(netbk->mmap_pages[idx]);
+ }
+
+-static inline unsigned long idx_to_kaddr(unsigned int idx)
++static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
++ unsigned int idx)
+ {
+- return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
++ return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
+ }
+
+ /* extra field used in struct page */
+@@ -112,7 +115,7 @@ static inline pending_ring_idx_t pending_index(unsigned i)
+ return i & (MAX_PENDING_REQS-1);
+ }
+
+-static inline pending_ring_idx_t nr_pending_reqs(void)
++static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
+ {
+ return MAX_PENDING_REQS -
+ netbk->pending_prod + netbk->pending_cons;
+@@ -125,10 +128,10 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+
+ int netbk_copy_skb_mode;
+
+-static inline void maybe_schedule_tx_action(void)
++static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
+ {
+ smp_mb();
+- if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
++ if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
+ !list_empty(&netbk->net_schedule_list))
+ tasklet_schedule(&netbk->net_tx_tasklet);
+ }
+@@ -235,9 +238,15 @@ static void tx_queue_callback(unsigned long data)
+ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct xen_netif *netif = netdev_priv(dev);
++ struct xen_netbk *netbk;
+
+ BUG_ON(skb->dev != dev);
+
++ if (netif->group == -1)
++ goto drop;
++
++ netbk = &xen_netbk[netif->group];
++
+ /* Drop the packet if the target domain has no receive buffers. */
+ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
+ goto drop;
+@@ -313,6 +322,7 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
+ struct gnttab_copy *copy_gop;
+ struct xen_netif_rx_request *req;
+ unsigned long old_mfn;
++ int group = netif_page_group(page);
+ int idx = netif_page_index(page);
+
+ old_mfn = virt_to_mfn(page_address(page));
+@@ -321,7 +331,8 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
+
+ copy_gop = npo->copy + npo->copy_prod++;
+ copy_gop->flags = GNTCOPY_dest_gref;
+- if (idx > -1) {
++ if (PageForeign(page)) {
++ struct xen_netbk *netbk = &xen_netbk[group];
+ struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
+ copy_gop->source.domid = src_pend->netif->domid;
+ copy_gop->source.u.ref = src_pend->req.gref;
+@@ -422,9 +433,10 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+ }
+ }
+
+-static void net_rx_action(unsigned long unused)
++static void net_rx_action(unsigned long data)
+ {
+ struct xen_netif *netif = NULL;
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
+ s8 status;
+ u16 id, irq, flags;
+ struct xen_netif_rx_response *resp;
+@@ -584,13 +596,15 @@ static void net_rx_action(unsigned long unused)
+ tasklet_schedule(&netbk->net_rx_tasklet);
+ }
+
+-static void net_alarm(unsigned long unused)
++static void net_alarm(unsigned long data)
+ {
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
+ tasklet_schedule(&netbk->net_rx_tasklet);
+ }
+
+-static void netbk_tx_pending_timeout(unsigned long unused)
++static void netbk_tx_pending_timeout(unsigned long data)
+ {
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
+ tasklet_schedule(&netbk->net_tx_tasklet);
+ }
+
+@@ -607,6 +621,7 @@ static int __on_net_schedule_list(struct xen_netif *netif)
+
+ static void remove_from_net_schedule_list(struct xen_netif *netif)
+ {
++ struct xen_netbk *netbk = &xen_netbk[netif->group];
+ spin_lock_irq(&netbk->net_schedule_list_lock);
+ if (likely(__on_net_schedule_list(netif))) {
+ list_del_init(&netif->list);
+@@ -617,6 +632,7 @@ static void remove_from_net_schedule_list(struct xen_netif *netif)
+
+ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+ {
++ struct xen_netbk *netbk = &xen_netbk[netif->group];
+ if (__on_net_schedule_list(netif))
+ return;
+
+@@ -631,13 +647,14 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+
+ void netif_schedule_work(struct xen_netif *netif)
+ {
++ struct xen_netbk *netbk = &xen_netbk[netif->group];
+ int more_to_do;
+
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+
+ if (more_to_do) {
+ add_to_net_schedule_list_tail(netif);
+- maybe_schedule_tx_action();
++ maybe_schedule_tx_action(netbk);
+ }
+ }
+
+@@ -674,14 +691,15 @@ static void tx_credit_callback(unsigned long data)
+ netif_schedule_work(netif);
+ }
+
+-static inline int copy_pending_req(pending_ring_idx_t pending_idx)
++static inline int copy_pending_req(struct xen_netbk *netbk,
++ pending_ring_idx_t pending_idx)
+ {
+ return gnttab_copy_grant_page(
+ netbk->grant_tx_handle[pending_idx],
+ &netbk->mmap_pages[pending_idx]);
+ }
+
+-inline static void net_tx_action_dealloc(void)
++static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+ {
+ struct netbk_tx_pending_inuse *inuse, *n;
+ struct gnttab_unmap_grant_ref *gop;
+@@ -711,13 +729,13 @@ inline static void net_tx_action_dealloc(void)
+ pending_idx = netbk->dealloc_ring[pending_index(dc++)];
+ list_move_tail(&pending_inuse[pending_idx].list, &list);
+
+- pfn = idx_to_pfn(pending_idx);
++ pfn = idx_to_pfn(netbk, pending_idx);
+ /* Already unmapped? */
+ if (!phys_to_machine_mapping_valid(pfn))
+ continue;
+
+ gnttab_set_unmap_op(gop,
+- idx_to_kaddr(pending_idx),
++ idx_to_kaddr(netbk, pending_idx),
+ GNTMAP_host_map,
+ netbk->grant_tx_handle[pending_idx]);
+ gop++;
+@@ -740,7 +758,7 @@ inline static void net_tx_action_dealloc(void)
+
+ pending_tx_info[pending_idx].netif->nr_copied_skbs++;
+
+- switch (copy_pending_req(pending_idx)) {
++ switch (copy_pending_req(netbk, pending_idx)) {
+ case 0:
+ list_move_tail(&inuse->list, &list);
+ continue;
+@@ -843,7 +861,8 @@ static int netbk_count_requests(struct xen_netif *netif,
+ return frags;
+ }
+
+-static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
++static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
++ struct xen_netif *netif,
+ struct sk_buff *skb,
+ struct xen_netif_tx_request *txp,
+ struct gnttab_map_grant_ref *mop)
+@@ -864,7 +883,7 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
+ index = pending_index(netbk->pending_cons++);
+ pending_idx = netbk->pending_ring[index];
+
+- gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
++ gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
+ GNTMAP_host_map | GNTMAP_readonly,
+ txp->gref, netif->domid);
+
+@@ -877,8 +896,9 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
+ return mop;
+ }
+
+-static int netbk_tx_check_mop(struct sk_buff *skb,
+- struct gnttab_map_grant_ref **mopp)
++static int netbk_tx_check_mop(struct xen_netbk *netbk,
++ struct sk_buff *skb,
++ struct gnttab_map_grant_ref **mopp)
+ {
+ struct gnttab_map_grant_ref *mop = *mopp;
+ int pending_idx = *((u16 *)skb->data);
+@@ -900,7 +920,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ netif_put(netif);
+ } else {
+ set_phys_to_machine(
+- __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
++ __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
+ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+ netbk->grant_tx_handle[pending_idx] = mop->handle;
+ }
+@@ -918,14 +938,14 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ newerr = (++mop)->status;
+ if (likely(!newerr)) {
+ unsigned long addr;
+- addr = idx_to_kaddr(pending_idx);
++ addr = idx_to_kaddr(netbk, pending_idx);
+ set_phys_to_machine(
+ __pa(addr)>>PAGE_SHIFT,
+ FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+ netbk->grant_tx_handle[pending_idx] = mop->handle;
+ /* Had a previous error? Invalidate this fragment. */
+ if (unlikely(err))
+- netif_idx_release(pending_idx);
++ netif_idx_release(netbk, pending_idx);
+ continue;
+ }
+
+@@ -942,10 +962,10 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+
+ /* First error: invalidate header and preceding fragments. */
+ pending_idx = *((u16 *)skb->data);
+- netif_idx_release(pending_idx);
++ netif_idx_release(netbk, pending_idx);
+ for (j = start; j < i; j++) {
+ pending_idx = (unsigned long)shinfo->frags[i].page;
+- netif_idx_release(pending_idx);
++ netif_idx_release(netbk, pending_idx);
+ }
+
+ /* Remember the error: invalidate all subsequent fragments. */
+@@ -956,7 +976,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
+ return err;
+ }
+
+-static void netbk_fill_frags(struct sk_buff *skb)
++static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+ {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int nr_frags = shinfo->nr_frags;
+@@ -974,7 +994,7 @@ static void netbk_fill_frags(struct sk_buff *skb)
+ &netbk->pending_inuse_head);
+
+ txp = &netbk->pending_tx_info[pending_idx].req;
+- frag->page = virt_to_page(idx_to_kaddr(pending_idx));
++ frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
+ frag->size = txp->size;
+ frag->page_offset = txp->offset;
+
+@@ -1106,14 +1126,14 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
+ return false;
+ }
+
+-static unsigned net_tx_build_mops(void)
++static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ {
+ struct gnttab_map_grant_ref *mop;
+ struct sk_buff *skb;
+ int ret;
+
+ mop = netbk->tx_map_ops;
+- while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++ while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ !list_empty(&netbk->net_schedule_list)) {
+ struct xen_netif *netif;
+ struct xen_netif_tx_request txreq;
+@@ -1215,7 +1235,7 @@ static unsigned net_tx_build_mops(void)
+ }
+ }
+
+- gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
++ gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
+ GNTMAP_host_map | GNTMAP_readonly,
+ txreq.gref, netif->domid);
+ mop++;
+@@ -1241,7 +1261,7 @@ static unsigned net_tx_build_mops(void)
+
+ netbk->pending_cons++;
+
+- mop = netbk_get_requests(netif, skb, txfrags, mop);
++ mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
+
+ netif->tx.req_cons = idx;
+ netif_schedule_work(netif);
+@@ -1253,7 +1273,7 @@ static unsigned net_tx_build_mops(void)
+ return mop - netbk->tx_map_ops;
+ }
+
+-static void net_tx_submit(void)
++static void net_tx_submit(struct xen_netbk *netbk)
+ {
+ struct gnttab_map_grant_ref *mop;
+ struct sk_buff *skb;
+@@ -1270,7 +1290,7 @@ static void net_tx_submit(void)
+ txp = &netbk->pending_tx_info[pending_idx].req;
+
+ /* Check the remap error code. */
+- if (unlikely(netbk_tx_check_mop(skb, &mop))) {
++ if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
+ DPRINTK("netback grant failed.\n");
+ skb_shinfo(skb)->nr_frags = 0;
+ kfree_skb(skb);
+@@ -1279,7 +1299,7 @@ static void net_tx_submit(void)
+
+ data_len = skb->len;
+ memcpy(skb->data,
+- (void *)(idx_to_kaddr(pending_idx)|txp->offset),
++ (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
+ data_len);
+ if (data_len < txp->size) {
+ /* Append the packet payload as a fragment. */
+@@ -1287,7 +1307,7 @@ static void net_tx_submit(void)
+ txp->size -= data_len;
+ } else {
+ /* Schedule a response immediately. */
+- netif_idx_release(pending_idx);
++ netif_idx_release(netbk, pending_idx);
+ }
+
+ if (txp->flags & NETTXF_csum_blank)
+@@ -1295,7 +1315,7 @@ static void net_tx_submit(void)
+ else if (txp->flags & NETTXF_data_validated)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+- netbk_fill_frags(skb);
++ netbk_fill_frags(netbk, skb);
+
+ /*
+ * If the initial fragment was < PKT_PROT_LEN then
+@@ -1344,15 +1364,16 @@ static void net_tx_submit(void)
+ }
+
+ /* Called after netfront has transmitted */
+-static void net_tx_action(unsigned long unused)
++static void net_tx_action(unsigned long data)
+ {
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
+ unsigned nr_mops;
+ int ret;
+
+ if (netbk->dealloc_cons != netbk->dealloc_prod)
+- net_tx_action_dealloc();
++ net_tx_action_dealloc(netbk);
+
+- nr_mops = net_tx_build_mops();
++ nr_mops = net_tx_build_mops(netbk);
+
+ if (nr_mops == 0)
+ return;
+@@ -1361,10 +1382,10 @@ static void net_tx_action(unsigned long unused)
+ netbk->tx_map_ops, nr_mops);
+ BUG_ON(ret);
+
+- net_tx_submit();
++ net_tx_submit(netbk);
+ }
+
+-static void netif_idx_release(u16 pending_idx)
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+ {
+ static DEFINE_SPINLOCK(_lock);
+ unsigned long flags;
+@@ -1383,19 +1404,28 @@ static void netif_idx_release(u16 pending_idx)
+
+ static void netif_page_release(struct page *page, unsigned int order)
+ {
++ int group = netif_page_group(page);
+ int idx = netif_page_index(page);
++ struct xen_netbk *netbk = &xen_netbk[group];
+ BUG_ON(order);
++ BUG_ON(group < 0 || group >= xen_netbk_group_nr);
+ BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
+ BUG_ON(netbk->mmap_pages[idx] != page);
+- netif_idx_release(idx);
++ netif_idx_release(netbk, idx);
+ }
+
+ irqreturn_t netif_be_int(int irq, void *dev_id)
+ {
+ struct xen_netif *netif = dev_id;
++ struct xen_netbk *netbk;
++
++ if (netif->group == -1)
++ return IRQ_NONE;
++
++ netbk = &xen_netbk[netif->group];
+
+ add_to_net_schedule_list_tail(netif);
+- maybe_schedule_tx_action();
++ maybe_schedule_tx_action(netbk);
+
+ if (netif_schedulable(netif) && !netbk_queue_full(netif))
+ netif_wake_queue(netif->dev);
+@@ -1453,28 +1483,40 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+ struct list_head *ent;
+ struct xen_netif *netif;
+ int i = 0;
++ int group = 0;
+
+ printk(KERN_ALERT "netif_schedule_list:\n");
+- spin_lock_irq(&netbk->net_schedule_list_lock);
+
+- list_for_each(ent, &netbk->net_schedule_list) {
+- netif = list_entry(ent, struct xen_netif, list);
+- printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+- "rx_resp_prod=%08x\n",
+- i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
+- printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
+- netif->tx.req_cons, netif->tx.rsp_prod_pvt);
+- printk(KERN_ALERT " shared(rx_req_prod=%08x "
+- "rx_resp_prod=%08x\n",
+- netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
+- printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
+- netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
+- printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
+- netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
+- i++;
++ for (group = 0; group < xen_netbk_group_nr; group++) {
++ struct xen_netbk *netbk = &xen_netbk[group];
++ spin_lock_irq(&netbk->net_schedule_list_lock);
++ printk(KERN_ALERT "xen_netback group number: %d\n", group);
++ list_for_each(ent, &netbk->net_schedule_list) {
++ netif = list_entry(ent, struct xen_netif, list);
++ printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
++ "rx_resp_prod=%08x\n",
++ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
++ printk(KERN_ALERT
++ " tx_req_cons=%08x, tx_resp_prod=%08x)\n",
++ netif->tx.req_cons, netif->tx.rsp_prod_pvt);
++ printk(KERN_ALERT
++ " shared(rx_req_prod=%08x "
++ "rx_resp_prod=%08x\n",
++ netif->rx.sring->req_prod,
++ netif->rx.sring->rsp_prod);
++ printk(KERN_ALERT
++ " rx_event=%08x, tx_req_prod=%08x\n",
++ netif->rx.sring->rsp_event,
++ netif->tx.sring->req_prod);
++ printk(KERN_ALERT
++ " tx_resp_prod=%08x, tx_event=%08x)\n",
++ netif->tx.sring->rsp_prod,
++ netif->tx.sring->rsp_event);
++ i++;
++ }
++ spin_unlock_irq(&netbk->net_schedule_list_lock);
+ }
+
+- spin_unlock_irq(&netbk->net_schedule_list_lock);
+ printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+
+ return IRQ_HANDLED;
+@@ -1486,12 +1528,15 @@ static int __init netback_init(void)
+ int i;
+ struct page *page;
+ int rc = 0;
++ int group;
+
+ if (!xen_domain())
+ return -ENODEV;
+
+- netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk));
+- if (!netbk) {
++ xen_netbk_group_nr = num_online_cpus();
++ xen_netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk) *
++ xen_netbk_group_nr);
++ if (!xen_netbk) {
+ printk(KERN_ALERT "%s: out of memory\n", __func__);
+ return -ENOMEM;
+ }
+@@ -1499,44 +1544,54 @@ static int __init netback_init(void)
+ /* We can increase reservation by this much in net_rx_action(). */
+ // balloon_update_driver_allowance(NET_RX_RING_SIZE);
+
+- skb_queue_head_init(&netbk->rx_queue);
+- skb_queue_head_init(&netbk->tx_queue);
+-
+- init_timer(&netbk->net_timer);
+- netbk->net_timer.data = 0;
+- netbk->net_timer.function = net_alarm;
+-
+- init_timer(&netbk->netbk_tx_pending_timer);
+- netbk->netbk_tx_pending_timer.data = 0;
+- netbk->netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
++ for (group = 0; group < xen_netbk_group_nr; group++) {
++ struct xen_netbk *netbk = &xen_netbk[group];
++ skb_queue_head_init(&netbk->rx_queue);
++ skb_queue_head_init(&netbk->tx_queue);
++
++ init_timer(&netbk->net_timer);
++ netbk->net_timer.data = (unsigned long)netbk;
++ netbk->net_timer.function = net_alarm;
++
++ init_timer(&netbk->netbk_tx_pending_timer);
++ netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
++ netbk->netbk_tx_pending_timer.function =
++ netbk_tx_pending_timeout;
++
++ netbk->mmap_pages =
++ alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
++ if (!netbk->mmap_pages) {
++ printk(KERN_ALERT "%s: out of memory\n", __func__);
++ del_timer(&netbk->netbk_tx_pending_timer);
++ del_timer(&netbk->net_timer);
++ rc = -ENOMEM;
++ goto failed_init;
++ }
+
+- netbk->mmap_pages =
+- alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+- if (!netbk->mmap_pages) {
+- printk(KERN_ALERT "%s: out of memory\n", __func__);
+- rc = -ENOMEM;
+- goto failed_init2;
+- }
++ for (i = 0; i < MAX_PENDING_REQS; i++) {
++ page = netbk->mmap_pages[i];
++ SetPageForeign(page, netif_page_release);
++ netif_set_page_ext(page, group, i);
++ INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
++ }
+
+- for (i = 0; i < MAX_PENDING_REQS; i++) {
+- page = netbk->mmap_pages[i];
+- SetPageForeign(page, netif_page_release);
+- netif_set_page_ext(page, 0, i);
+- INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+- }
++ netbk->pending_cons = 0;
++ netbk->pending_prod = MAX_PENDING_REQS;
++ for (i = 0; i < MAX_PENDING_REQS; i++)
++ netbk->pending_ring[i] = i;
+
+- netbk->pending_cons = 0;
+- netbk->pending_prod = MAX_PENDING_REQS;
+- for (i = 0; i < MAX_PENDING_REQS; i++)
+- netbk->pending_ring[i] = i;
++ tasklet_init(&netbk->net_tx_tasklet, net_tx_action,
++ (unsigned long)netbk);
++ tasklet_init(&netbk->net_rx_tasklet, net_rx_action,
++ (unsigned long)netbk);
+
+- tasklet_init(&netbk->net_tx_tasklet, net_tx_action, 0);
+- tasklet_init(&netbk->net_rx_tasklet, net_rx_action, 0);
++ INIT_LIST_HEAD(&netbk->pending_inuse_head);
++ INIT_LIST_HEAD(&netbk->net_schedule_list);
+
+- INIT_LIST_HEAD(&netbk->pending_inuse_head);
+- INIT_LIST_HEAD(&netbk->net_schedule_list);
++ spin_lock_init(&netbk->net_schedule_list_lock);
+
+- spin_lock_init(&netbk->net_schedule_list_lock);
++ atomic_set(&netbk->netfront_count, 0);
++ }
+
+ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+ if (MODPARM_copy_skb) {
+@@ -1551,25 +1606,28 @@ static int __init netback_init(void)
+
+ rc = netif_xenbus_init();
+ if (rc)
+- goto failed_init1;
++ goto failed_init;
+
+ #ifdef NETBE_DEBUG_INTERRUPT
+ (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
+ 0,
+ netif_be_dbg,
+- SA_SHIRQ,
++ IRQF_SHARED,
+ "net-be-dbg",
+ &netif_be_dbg);
+ #endif
+
+ return 0;
+
+-failed_init1:
+- free_empty_pages_and_pagevec(netbk->mmap_pages, MAX_PENDING_REQS);
+-failed_init2:
+- del_timer(&netbk->netbk_tx_pending_timer);
+- del_timer(&netbk->net_timer);
+- vfree(netbk);
++failed_init:
++ for (i = 0; i < group; i++) {
++ struct xen_netbk *netbk = &xen_netbk[i];
++ free_empty_pages_and_pagevec(netbk->mmap_pages,
++ MAX_PENDING_REQS);
++ del_timer(&netbk->netbk_tx_pending_timer);
++ del_timer(&netbk->net_timer);
++ }
++ vfree(xen_netbk);
+ return rc;
+
+ }
+--
+1.7.3.4
+
+
+From e7317b70c0436c109b605bb377939cb2eaff6a6f Mon Sep 17 00:00:00 2001
+From: Dongxiao Xu <dongxiao.xu at intel.com>
+Date: Wed, 19 May 2010 17:08:22 -0700
+Subject: [PATCH 090/139] xen: netback: Use Kernel thread to replace the tasklet.
+
+Kernel thread has more control over QoS, and could improve dom0's
+userspace responseness. This option is defaultly off currently.
+
+Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h | 13 ++++-
+ drivers/xen/netback/netback.c | 109 ++++++++++++++++++++++++++++++++++++----
+ 2 files changed, 109 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 847ba58..36cb2b9 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -257,8 +257,17 @@ union page_ext {
+ };
+
+ struct xen_netbk {
+- struct tasklet_struct net_tx_tasklet;
+- struct tasklet_struct net_rx_tasklet;
++ union {
++ struct {
++ struct tasklet_struct net_tx_tasklet;
++ struct tasklet_struct net_rx_tasklet;
++ } tasklet;
++
++ struct {
++ wait_queue_head_t netbk_action_wq;
++ struct task_struct *task;
++ } kthread;
++ };
+
+ struct sk_buff_head rx_queue;
+ struct sk_buff_head tx_queue;
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index feefb14..547dcaa 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -38,6 +38,7 @@
+
+ #include <linux/tcp.h>
+ #include <linux/udp.h>
++#include <linux/kthread.h>
+
+ #include <xen/balloon.h>
+ #include <xen/events.h>
+@@ -128,12 +129,31 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+
+ int netbk_copy_skb_mode;
+
++static int MODPARM_netback_kthread;
++module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
++MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
++
++/*
++ * Netback bottom half handler.
++ * dir indicates the data direction.
++ * rx: 1, tx: 0.
++ */
++static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
++{
++ if (MODPARM_netback_kthread)
++ wake_up(&netbk->kthread.netbk_action_wq);
++ else if (dir)
++ tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
++ else
++ tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
++}
++
+ static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
+ {
+ smp_mb();
+ if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
+ !list_empty(&netbk->net_schedule_list))
+- tasklet_schedule(&netbk->net_tx_tasklet);
++ xen_netbk_bh_handler(netbk, 0);
+ }
+
+ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+@@ -289,7 +309,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ }
+ }
+ skb_queue_tail(&netbk->rx_queue, skb);
+- tasklet_schedule(&netbk->net_rx_tasklet);
++
++ xen_netbk_bh_handler(netbk, 1);
+
+ return 0;
+
+@@ -593,19 +614,19 @@ static void net_rx_action(unsigned long data)
+ /* More work to do? */
+ if (!skb_queue_empty(&netbk->rx_queue) &&
+ !timer_pending(&netbk->net_timer))
+- tasklet_schedule(&netbk->net_rx_tasklet);
++ xen_netbk_bh_handler(netbk, 1);
+ }
+
+ static void net_alarm(unsigned long data)
+ {
+ struct xen_netbk *netbk = (struct xen_netbk *)data;
+- tasklet_schedule(&netbk->net_rx_tasklet);
++ xen_netbk_bh_handler(netbk, 1);
+ }
+
+ static void netbk_tx_pending_timeout(unsigned long data)
+ {
+ struct xen_netbk *netbk = (struct xen_netbk *)data;
+- tasklet_schedule(&netbk->net_tx_tasklet);
++ xen_netbk_bh_handler(netbk, 0);
+ }
+
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+@@ -1348,7 +1369,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ continue;
+ }
+
+- netif_rx(skb);
++ netif_rx_ni(skb);
+ netif->dev->last_rx = jiffies;
+ }
+
+@@ -1399,7 +1420,7 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+ netbk->dealloc_prod++;
+ spin_unlock_irqrestore(&_lock, flags);
+
+- tasklet_schedule(&netbk->net_tx_tasklet);
++ xen_netbk_bh_handler(netbk, 0);
+ }
+
+ static void netif_page_release(struct page *page, unsigned int order)
+@@ -1523,6 +1544,46 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+ }
+ #endif
+
++static inline int rx_work_todo(struct xen_netbk *netbk)
++{
++ return !skb_queue_empty(&netbk->rx_queue);
++}
++
++static inline int tx_work_todo(struct xen_netbk *netbk)
++{
++ if (netbk->dealloc_cons != netbk->dealloc_prod)
++ return 1;
++
++ if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++ !list_empty(&netbk->net_schedule_list))
++ return 1;
++
++ return 0;
++}
++
++static int netbk_action_thread(void *data)
++{
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
++ while (!kthread_should_stop()) {
++ wait_event_interruptible(netbk->kthread.netbk_action_wq,
++ rx_work_todo(netbk)
++ || tx_work_todo(netbk)
++ || kthread_should_stop());
++ cond_resched();
++
++ if (kthread_should_stop())
++ break;
++
++ if (rx_work_todo(netbk))
++ net_rx_action((unsigned long)netbk);
++
++ if (tx_work_todo(netbk))
++ net_tx_action((unsigned long)netbk);
++ }
++
++ return 0;
++}
++
+ static int __init netback_init(void)
+ {
+ int i;
+@@ -1580,10 +1641,34 @@ static int __init netback_init(void)
+ for (i = 0; i < MAX_PENDING_REQS; i++)
+ netbk->pending_ring[i] = i;
+
+- tasklet_init(&netbk->net_tx_tasklet, net_tx_action,
+- (unsigned long)netbk);
+- tasklet_init(&netbk->net_rx_tasklet, net_rx_action,
+- (unsigned long)netbk);
++ if (MODPARM_netback_kthread) {
++ init_waitqueue_head(&netbk->kthread.netbk_action_wq);
++ netbk->kthread.task =
++ kthread_create(netbk_action_thread,
++ (void *)netbk,
++ "netback/%u", group);
++
++ if (!IS_ERR(netbk->kthread.task)) {
++ kthread_bind(netbk->kthread.task, group);
++ wake_up_process(netbk->kthread.task);
++ } else {
++ printk(KERN_ALERT
++ "kthread_run() fails at netback\n");
++ free_empty_pages_and_pagevec(netbk->mmap_pages,
++ MAX_PENDING_REQS);
++ del_timer(&netbk->netbk_tx_pending_timer);
++ del_timer(&netbk->net_timer);
++ rc = PTR_ERR(netbk->kthread.task);
++ goto failed_init;
++ }
++ } else {
++ tasklet_init(&netbk->tasklet.net_tx_tasklet,
++ net_tx_action,
++ (unsigned long)netbk);
++ tasklet_init(&netbk->tasklet.net_rx_tasklet,
++ net_rx_action,
++ (unsigned long)netbk);
++ }
+
+ INIT_LIST_HEAD(&netbk->pending_inuse_head);
+ INIT_LIST_HEAD(&netbk->net_schedule_list);
+@@ -1626,6 +1711,8 @@ failed_init:
+ MAX_PENDING_REQS);
+ del_timer(&netbk->netbk_tx_pending_timer);
+ del_timer(&netbk->net_timer);
++ if (MODPARM_netback_kthread)
++ kthread_stop(netbk->kthread.task);
+ }
+ vfree(xen_netbk);
+ return rc;
+--
+1.7.3.4
+
+
+From 6359d5939c5d1f59b794cd02e8cdbd36b9f3434d Mon Sep 17 00:00:00 2001
+From: James Harper <james.harper at bendigoit.com.au>
+Date: Fri, 28 May 2010 23:12:56 -0700
+Subject: [PATCH 091/139] xen: netback: avoid null-pointer access in netback_uevent
+
+Check if drvdata has been set up yet and return if it hasn't.
+
+Signed-off-by: James Harper <james.harper at bendigoit.com.au>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/xenbus.c | 9 +++++++--
+ 1 files changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index fcd3c34..e30b0c7 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -154,12 +154,17 @@ fail:
+ */
+ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
+ {
+- struct backend_info *be = dev_get_drvdata(&xdev->dev);
+- struct xen_netif *netif = be->netif;
++ struct backend_info *be;
++ struct xen_netif *netif;
+ char *val;
+
+ DPRINTK("netback_uevent");
+
++ be = dev_get_drvdata(&xdev->dev);
++ if (!be)
++ return 0;
++ netif = be->netif;
++
+ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+ if (IS_ERR(val)) {
+ int err = PTR_ERR(val);
+--
+1.7.3.4
+
+
+From 4a818daa044d9d499412e8f6e2e3086c0521e7b3 Mon Sep 17 00:00:00 2001
+From: Keir Fraser <keir.fraser at citrix.com>
+Date: Fri, 11 Jun 2010 11:48:30 +0100
+Subject: [PATCH 092/139] xen: netback: Fixes for delayed copy of tx network packets.
+
+ - Should call net_tx_action_dealloc() even when dealloc ring is
+ empty, as there may in any case be work to do on the
+ pending_inuse list.
+ - Should not exit directly from the middle of the tx_action tasklet,
+ as the tx_pending_timer should always be checked and updated at the
+ end of the tasklet.
+
+Signed-off-by: Keir Fraser <keir.fraser at citrix.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+[picked from linux-2.6.18-xen.hg 959:1a97bd686258, ported across a43e2175 "xen/netback: move code around"]
+---
+ drivers/xen/netback/netback.c | 25 ++++++++++++-------------
+ 1 files changed, 12 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 547dcaa..58dfbd2 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1372,16 +1372,6 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ netif_rx_ni(skb);
+ netif->dev->last_rx = jiffies;
+ }
+-
+- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+- !list_empty(&netbk->pending_inuse_head)) {
+- struct netbk_tx_pending_inuse *oldest;
+-
+- oldest = list_entry(netbk->pending_inuse_head.next,
+- struct netbk_tx_pending_inuse, list);
+- mod_timer(&netbk->netbk_tx_pending_timer,
+- oldest->alloc_time + HZ);
+- }
+ }
+
+ /* Called after netfront has transmitted */
+@@ -1391,19 +1381,28 @@ static void net_tx_action(unsigned long data)
+ unsigned nr_mops;
+ int ret;
+
+- if (netbk->dealloc_cons != netbk->dealloc_prod)
+- net_tx_action_dealloc(netbk);
++ net_tx_action_dealloc(netbk);
+
+ nr_mops = net_tx_build_mops(netbk);
+
+ if (nr_mops == 0)
+- return;
++ goto out;
+
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+ netbk->tx_map_ops, nr_mops);
+ BUG_ON(ret);
+
+ net_tx_submit(netbk);
++out:
++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++ !list_empty(&netbk->pending_inuse_head)) {
++ struct netbk_tx_pending_inuse *oldest;
++
++ oldest = list_entry(netbk->pending_inuse_head.next,
++ struct netbk_tx_pending_inuse, list);
++ mod_timer(&netbk->netbk_tx_pending_timer,
++ oldest->alloc_time + HZ);
++ }
+ }
+
+ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+--
+1.7.3.4
+
+
+From 48fa1af97e6c9d304c04f70a75de1340e7d79e18 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 11 Jun 2010 10:51:01 +0100
+Subject: [PATCH 093/139] xen: netback: handle NET_SKBUFF_DATA_USES_OFFSET correctly
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jan Beulich <JBeulich at novell.com>
+---
+ drivers/xen/netback/netback.c | 4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 58dfbd2..aa094af 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -218,7 +218,11 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+ len -= copy;
+ }
+
++#ifdef NET_SKBUFF_DATA_USES_OFFSET
++ offset = 0;
++#else
+ offset = nskb->data - skb->data;
++#endif
+
+ nskb->transport_header = skb->transport_header + offset;
+ nskb->network_header = skb->network_header + offset;
+--
+1.7.3.4
+
+
+From 7d3e6e42251f179e407fa5236f613e5500b3a3ea Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 11 Jun 2010 10:51:01 +0100
+Subject: [PATCH 094/139] xen: netback: drop frag member from struct netbk_rx_meta
+
+It has been unused since c3219dc "xen/netback: completely drop flip
+support", as has netbk_free_pages().
+
+(Although it now has only a single member struct netbk_rx_meta will
+gain other members in a subsequent patch so there is no point
+reworking to get rid of the struct)
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/common.h | 1 -
+ drivers/xen/netback/netback.c | 8 --------
+ 2 files changed, 0 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 36cb2b9..be4fe91 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -230,7 +230,6 @@ struct pending_tx_info {
+ typedef unsigned int pending_ring_idx_t;
+
+ struct netbk_rx_meta {
+- skb_frag_t frag;
+ int id;
+ };
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index aa094af..9f7e489 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -411,14 +411,6 @@ static void netbk_gop_skb(struct sk_buff *skb,
+ netif->rx.req_cons += nr_frags + extra;
+ }
+
+-static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
+-{
+- int i;
+-
+- for (i = 0; i < nr_frags; i++)
+- put_page(meta[i].frag.page);
+-}
+-
+ /* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
+ used to set up the operations on the top of
+ netrx_pending_operations, which have since been done. Check that
+--
+1.7.3.4
+
+
+From 1ced27150d0092c40ebbbbb3896192003d433c0e Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 11 Jun 2010 10:51:01 +0100
+Subject: [PATCH 095/139] xen: netback: linearise SKBs as we copy them into guest memory on guest-RX.
+
+There's no point in sending lots of little packets to a copying
+receiver if we can instead arrange to copy them all into a single RX
+buffer. We need to copy anyway, so there's no overhead here, and this
+is a little bit easier on the receiving domain's network stack.
+
+Based on a patch by Steven Smith. Fixed to not skip unnecessarily to
+the next buffer which could leave the head fragment of a received
+frame empty if the headlen of an SKB was large (which would crash
+netfront). Instead we only try and pack "small enough" fragments
+together but do not try to coalesce large or whole page fragments.
+
+In previous iterations of this patch we also tried to only include
+2048 bytes per frag because very old netfronts stored other
+information in the second half of the page. It has been determined
+that only frontends which support scatter-gather are going to come
+down this path and that any guest which supports scatter-gather is
+also new enough to allow us to use the full page size for each
+fragment (since this limitation which fixed as part of the SG
+implementation) so we do not need this restriction.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Steven Smith <Steven.Smith at eu.citrix.com>
+---
+ drivers/xen/netback/common.h | 15 ++-
+ drivers/xen/netback/netback.c | 282 ++++++++++++++++++++++++++++++-----------
+ 2 files changed, 218 insertions(+), 79 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index be4fe91..9c0c048 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -82,7 +82,9 @@ struct xen_netif {
+ /* Internal feature information. */
+ u8 can_queue:1; /* can queue packets for receiver? */
+
+- /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
++ /* Allow netif_be_start_xmit() to peek ahead in the rx request
++ * ring. This is a prediction of what rx_req_cons will be once
++ * all queued skbs are put on the ring. */
+ RING_IDX rx_req_cons_peek;
+
+ /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
+@@ -231,6 +233,8 @@ typedef unsigned int pending_ring_idx_t;
+
+ struct netbk_rx_meta {
+ int id;
++ int size;
++ int gso_size;
+ };
+
+ struct netbk_tx_pending_inuse {
+@@ -240,6 +244,8 @@ struct netbk_tx_pending_inuse {
+
+ #define MAX_PENDING_REQS 256
+
++#define MAX_BUFFER_OFFSET PAGE_SIZE
++
+ /* extra field used in struct page */
+ union page_ext {
+ struct {
+@@ -301,7 +307,12 @@ struct xen_netbk {
+ struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
+ struct mmu_update rx_mmu[NET_RX_RING_SIZE];
+ struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
+- struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
++ /*
++ * Each head or fragment can be up to 4096 bytes. Given
++ * MAX_BUFFER_OFFSET of 4096 the worst case is that each
++ * head/fragment uses 2 copy operation.
++ */
++ struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
+ unsigned char rx_notify[NR_IRQS];
+ u16 notify_list[NET_RX_RING_SIZE];
+ struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 9f7e489..d53d88e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -259,6 +259,48 @@ static void tx_queue_callback(unsigned long data)
+ netif_wake_queue(netif->dev);
+ }
+
++/* Figure out how many ring slots we're going to need to send @skb to
++ the guest. */
++static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
++{
++ unsigned count;
++ unsigned copy_off;
++ unsigned i;
++
++ copy_off = 0;
++ count = 1;
++
++ BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
++
++ copy_off = skb_headlen(skb);
++
++ if (skb_shinfo(skb)->gso_size)
++ count++;
++
++ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
++ unsigned long size = skb_shinfo(skb)->frags[i].size;
++ unsigned long bytes;
++ while (size > 0) {
++ BUG_ON(copy_off > MAX_BUFFER_OFFSET);
++
++ /* These checks are the same as in netbk_gop_frag_copy */
++ if (copy_off == MAX_BUFFER_OFFSET
++ || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) {
++ count++;
++ copy_off = 0;
++ }
++
++ bytes = size;
++ if (copy_off + bytes > MAX_BUFFER_OFFSET)
++ bytes = MAX_BUFFER_OFFSET - copy_off;
++
++ copy_off += bytes;
++ size -= bytes;
++ }
++ }
++ return count;
++}
++
+ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct xen_netif *netif = netdev_priv(dev);
+@@ -290,8 +332,9 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ skb = nskb;
+ }
+
+- netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
+- !!skb_shinfo(skb)->gso_size;
++ /* Reserve ring slots for the worst-case number of
++ * fragments. */
++ netif->rx_req_cons_peek += count_skb_slots(skb, netif);
+ netif_get(netif);
+
+ if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
+@@ -335,96 +378,165 @@ struct netrx_pending_operations {
+ struct gnttab_copy *copy;
+ struct multicall_entry *mcl;
+ struct netbk_rx_meta *meta;
++ int copy_off;
++ grant_ref_t copy_gref;
+ };
+
+ /* Set up the grant operations for this fragment. If it's a flipping
+ interface, we also set up the unmap request from here. */
+-static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
+- int i, struct netrx_pending_operations *npo,
+- struct page *page, unsigned long size,
+- unsigned long offset)
++
++static void netbk_gop_frag_copy(struct xen_netif *netif,
++ struct netrx_pending_operations *npo,
++ struct page *page, unsigned long size,
++ unsigned long offset, int head)
+ {
+ struct gnttab_copy *copy_gop;
+- struct xen_netif_rx_request *req;
+- unsigned long old_mfn;
++ struct netbk_rx_meta *meta;
+ int group = netif_page_group(page);
+ int idx = netif_page_index(page);
++ unsigned long bytes;
++
++ /* Data must not cross a page boundary. */
++ BUG_ON(size + offset > PAGE_SIZE);
+
+- old_mfn = virt_to_mfn(page_address(page));
++ meta = npo->meta + npo->meta_prod - 1;
+
+- req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
++ while (size > 0) {
++ BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+
+- copy_gop = npo->copy + npo->copy_prod++;
+- copy_gop->flags = GNTCOPY_dest_gref;
+- if (PageForeign(page)) {
++ /*
++ * Move to a new receive buffer if:
++ *
++ * simple case: we have completely filled the current buffer.
++ *
++ * complex case: the current frag would overflow
++ * the current buffer but only if:
++ * (i) this frag would fit completely in the next buffer
++ * and (ii) there is already some data in the current buffer
++ * and (iii) this is not the head buffer.
++ *
++ * Where:
++ * - (i) stops us splitting a frag into two copies
++ * unless the frag is too large for a single buffer.
++ * - (ii) stops us from leaving a buffer pointlessly empty.
++ * - (iii) stops us leaving the first buffer
++ * empty. Strictly speaking this is already covered
++ * by (ii) but is explicitly checked because
++ * netfront relies on the first buffer being
++ * non-empty and can crash otherwise.
++ *
++ * This means we will effectively linearise small
++ * frags but do not needlessly split large buffers
++ * into multiple copies tend to give large frags their
++ * own buffers as before.
++ */
++ if (npo->copy_off == MAX_BUFFER_OFFSET
++ || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) {
++ struct xen_netif_rx_request *req;
++
++ BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
++ /* Overflowed this request, go to the next one */
++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++ meta = npo->meta + npo->meta_prod++;
++ meta->size = 0;
++ meta->id = req->id;
++ npo->copy_off = 0;
++ npo->copy_gref = req->gref;
++ }
++
++ bytes = size;
++ if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
++ bytes = MAX_BUFFER_OFFSET - npo->copy_off;
++
++ copy_gop = npo->copy + npo->copy_prod++;
++ copy_gop->flags = GNTCOPY_dest_gref;
++ if (PageForeign(page)) {
+ struct xen_netbk *netbk = &xen_netbk[group];
+ struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
+ copy_gop->source.domid = src_pend->netif->domid;
+ copy_gop->source.u.ref = src_pend->req.gref;
+- copy_gop->flags |= GNTCOPY_source_gref;
+- } else {
+- copy_gop->source.domid = DOMID_SELF;
+- copy_gop->source.u.gmfn = old_mfn;
+- }
+- copy_gop->source.offset = offset;
+- copy_gop->dest.domid = netif->domid;
+- copy_gop->dest.offset = 0;
+- copy_gop->dest.u.ref = req->gref;
+- copy_gop->len = size;
++ copy_gop->flags |= GNTCOPY_source_gref;
++ } else {
++ copy_gop->source.domid = DOMID_SELF;
++ copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
++ }
++ copy_gop->source.offset = offset;
++ copy_gop->dest.domid = netif->domid;
+
+- return req->id;
++ copy_gop->dest.offset = npo->copy_off;
++ copy_gop->dest.u.ref = npo->copy_gref;
++ copy_gop->len = bytes;
++
++ npo->copy_off += bytes;
++ meta->size += bytes;
++
++ offset += bytes;
++ size -= bytes;
++ head = 0; /* Must be something in this buffer now */
++ }
+ }
+
+-static void netbk_gop_skb(struct sk_buff *skb,
+- struct netrx_pending_operations *npo)
++/* Prepare an SKB to be transmitted to the frontend. This is
++ responsible for allocating grant operations, meta structures, etc.
++ It returns the number of meta structures consumed. The number of
++ ring slots used is always equal to the number of meta slots used
++ plus the number of GSO descriptors used. Currently, we use either
++ zero GSO descriptors (for non-GSO packets) or one descriptor (for
++ frontend-side LRO). */
++static int netbk_gop_skb(struct sk_buff *skb,
++ struct netrx_pending_operations *npo)
+ {
+ struct xen_netif *netif = netdev_priv(skb->dev);
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ int i;
+- int extra;
+- struct netbk_rx_meta *head_meta, *meta;
++ struct xen_netif_rx_request *req;
++ struct netbk_rx_meta *meta;
++ int old_meta_prod;
++
++ old_meta_prod = npo->meta_prod;
+
+- head_meta = npo->meta + npo->meta_prod++;
+- head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
+- head_meta->frag.size = skb_shinfo(skb)->gso_size;
+- extra = !!head_meta->frag.size + 1;
++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++ meta = npo->meta + npo->meta_prod++;
++ meta->gso_size = skb_shinfo(skb)->gso_size;
++ meta->size = 0;
++ meta->id = req->id;
++ npo->copy_off = 0;
++ npo->copy_gref = req->gref;
++
++ netbk_gop_frag_copy(netif,
++ npo, virt_to_page(skb->data),
++ skb_headlen(skb),
++ offset_in_page(skb->data), 1);
++
++ /* Leave a gap for the GSO descriptor. */
++ if (skb_shinfo(skb)->gso_size)
++ netif->rx.req_cons++;
+
+ for (i = 0; i < nr_frags; i++) {
+- meta = npo->meta + npo->meta_prod++;
+- meta->frag = skb_shinfo(skb)->frags[i];
+- meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
+- meta->frag.page,
+- meta->frag.size,
+- meta->frag.page_offset);
++ netbk_gop_frag_copy(netif, npo,
++ skb_shinfo(skb)->frags[i].page,
++ skb_shinfo(skb)->frags[i].size,
++ skb_shinfo(skb)->frags[i].page_offset,
++ 0);
+ }
+
+- /*
+- * This must occur at the end to ensure that we don't trash skb_shinfo
+- * until we're done. We know that the head doesn't cross a page
+- * boundary because such packets get copied in netif_be_start_xmit.
+- */
+- head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
+- virt_to_page(skb->data),
+- skb_headlen(skb),
+- offset_in_page(skb->data));
+-
+- netif->rx.req_cons += nr_frags + extra;
++ return npo->meta_prod - old_meta_prod;
+ }
+
+ /* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
+ used to set up the operations on the top of
+ netrx_pending_operations, which have since been done. Check that
+ they didn't give any errors and advance over them. */
+-static int netbk_check_gop(int nr_frags, domid_t domid,
++static int netbk_check_gop(int nr_meta_slots, domid_t domid,
+ struct netrx_pending_operations *npo)
+ {
+ struct gnttab_copy *copy_op;
+ int status = NETIF_RSP_OKAY;
+ int i;
+
+- for (i = 0; i <= nr_frags; i++) {
+- copy_op = npo->copy + npo->copy_cons++;
+- if (copy_op->status != GNTST_okay) {
++ for (i = 0; i < nr_meta_slots; i++) {
++ copy_op = npo->copy + npo->copy_cons++;
++ if (copy_op->status != GNTST_okay) {
+ DPRINTK("Bad status %d from copy to DOM%d.\n",
+ copy_op->status, domid);
+ status = NETIF_RSP_ERROR;
+@@ -435,27 +547,35 @@ static int netbk_check_gop(int nr_frags, domid_t domid,
+ }
+
+ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+- struct netbk_rx_meta *meta, int nr_frags)
++ struct netbk_rx_meta *meta,
++ int nr_meta_slots)
+ {
+ int i;
+ unsigned long offset;
+
+- for (i = 0; i < nr_frags; i++) {
+- int id = meta[i].id;
+- int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
+-
++ for (i = 0; i < nr_meta_slots; i++) {
++ int flags;
++ if (i == nr_meta_slots - 1)
++ flags = 0;
++ else
++ flags = NETRXF_more_data;
++
+ offset = 0;
+- make_rx_response(netif, id, status, offset,
+- meta[i].frag.size, flags);
++ make_rx_response(netif, meta[i].id, status, offset,
++ meta[i].size, flags);
+ }
+ }
+
++struct skb_cb_overlay {
++ int meta_slots_used;
++};
++
+ static void net_rx_action(unsigned long data)
+ {
+ struct xen_netif *netif = NULL;
+ struct xen_netbk *netbk = (struct xen_netbk *)data;
+ s8 status;
+- u16 id, irq, flags;
++ u16 irq, flags;
+ struct xen_netif_rx_response *resp;
+ struct multicall_entry *mcl;
+ struct sk_buff_head rxq;
+@@ -465,6 +585,7 @@ static void net_rx_action(unsigned long data)
+ int nr_frags;
+ int count;
+ unsigned long offset;
++ struct skb_cb_overlay *sco;
+
+ struct netrx_pending_operations npo = {
+ .mmu = netbk->rx_mmu,
+@@ -479,10 +600,11 @@ static void net_rx_action(unsigned long data)
+ count = 0;
+
+ while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
++ netif = netdev_priv(skb->dev);
+ nr_frags = skb_shinfo(skb)->nr_frags;
+- *(int *)skb->cb = nr_frags;
+
+- netbk_gop_skb(skb, &npo);
++ sco = (struct skb_cb_overlay *)skb->cb;
++ sco->meta_slots_used = netbk_gop_skb(skb, &npo);
+
+ count += nr_frags + 1;
+
+@@ -541,18 +663,20 @@ static void net_rx_action(unsigned long data)
+ BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
+
+ while ((skb = __skb_dequeue(&rxq)) != NULL) {
+- nr_frags = *(int *)skb->cb;
++ sco = (struct skb_cb_overlay *)skb->cb;
+
+ netif = netdev_priv(skb->dev);
+
+ netif->stats.tx_bytes += skb->len;
+ netif->stats.tx_packets++;
+
+- status = netbk_check_gop(nr_frags, netif->domid, &npo);
+-
+- id = netbk->meta[npo.meta_cons].id;
+- flags = nr_frags ? NETRXF_more_data : 0;
++ status = netbk_check_gop(sco->meta_slots_used,
++ netif->domid, &npo);
+
++ if (sco->meta_slots_used == 1)
++ flags = 0;
++ else
++ flags = NETRXF_more_data;
+ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+ flags |= NETRXF_csum_blank | NETRXF_data_validated;
+ else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+@@ -560,10 +684,12 @@ static void net_rx_action(unsigned long data)
+ flags |= NETRXF_data_validated;
+
+ offset = 0;
+- resp = make_rx_response(netif, id, status, offset,
+- skb_headlen(skb), flags);
++ resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
++ status, offset,
++ netbk->meta[npo.meta_cons].size,
++ flags);
+
+- if (netbk->meta[npo.meta_cons].frag.size) {
++ if (netbk->meta[npo.meta_cons].gso_size) {
+ struct xen_netif_extra_info *gso =
+ (struct xen_netif_extra_info *)
+ RING_GET_RESPONSE(&netif->rx,
+@@ -571,7 +697,7 @@ static void net_rx_action(unsigned long data)
+
+ resp->flags |= NETRXF_extra_info;
+
+- gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size;
++ gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+ gso->u.gso.pad = 0;
+ gso->u.gso.features = 0;
+@@ -580,9 +706,11 @@ static void net_rx_action(unsigned long data)
+ gso->flags = 0;
+ }
+
+- netbk_add_frag_responses(netif, status,
+- netbk->meta + npo.meta_cons + 1,
+- nr_frags);
++ if (sco->meta_slots_used > 1) {
++ netbk_add_frag_responses(netif, status,
++ netbk->meta + npo.meta_cons + 1,
++ sco->meta_slots_used - 1);
++ }
+
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+ irq = netif->irq;
+@@ -597,8 +725,8 @@ static void net_rx_action(unsigned long data)
+ netif_wake_queue(netif->dev);
+
+ netif_put(netif);
++ npo.meta_cons += sco->meta_slots_used;
+ dev_kfree_skb(skb);
+- npo.meta_cons += nr_frags + 1;
+ }
+
+ while (notify_nr != 0) {
+--
+1.7.3.4
+
+
+From 96069b28e612232fb739ef48d9c2c5178b19f562 Mon Sep 17 00:00:00 2001
+From: Dongxiao Xu <dongxiao.xu at intel.com>
+Date: Thu, 10 Jun 2010 19:03:15 +0800
+Subject: [PATCH 096/139] xen: netback: Set allocated memory to zero from vmalloc.
+
+This should fix the windows/linux pv driver issue.
+
+Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 1 +
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index d53d88e..c7024d4 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1724,6 +1724,7 @@ static int __init netback_init(void)
+ printk(KERN_ALERT "%s: out of memory\n", __func__);
+ return -ENOMEM;
+ }
++ memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
+
+ /* We can increase reservation by this much in net_rx_action(). */
+ // balloon_update_driver_allowance(NET_RX_RING_SIZE);
+--
+1.7.3.4
+
+
+From 109a748d1c11b7eeaaacedb08c48bc65640b0bb8 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 14 Jun 2010 13:23:33 +0100
+Subject: [PATCH 097/139] xen: netback: minor code formatting fixup
+
+Don't include redundant casts from allocation.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 3 +--
+ 1 files changed, 1 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index c7024d4..58e920a 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1718,8 +1718,7 @@ static int __init netback_init(void)
+ return -ENODEV;
+
+ xen_netbk_group_nr = num_online_cpus();
+- xen_netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk) *
+- xen_netbk_group_nr);
++ xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
+ if (!xen_netbk) {
+ printk(KERN_ALERT "%s: out of memory\n", __func__);
+ return -ENOMEM;
+--
+1.7.3.4
+
+
+From 2424b59d68ee6ccdb7e52ab68bdba3a8b742513d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 30 Jun 2010 10:12:49 +0100
+Subject: [PATCH 098/139] xen: netback: drop more relics of flipping mode
+
+The mmu_update and gnttab_transfer arrays were only used by flipping
+mode. With those gone the multicall now consists of a single call to
+GNTTABOP_copy so drop the multicall as well and just make the one
+hypercall.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+Cc: Paul Durrant <paul.durrant at citrix.com>
+---
+ drivers/xen/netback/common.h | 3 --
+ drivers/xen/netback/netback.c | 55 +++--------------------------------------
+ 2 files changed, 4 insertions(+), 54 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 9c0c048..08e7a0e 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -304,9 +304,6 @@ struct xen_netbk {
+ u16 pending_ring[MAX_PENDING_REQS];
+ u16 dealloc_ring[MAX_PENDING_REQS];
+
+- struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
+- struct mmu_update rx_mmu[NET_RX_RING_SIZE];
+- struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
+ /*
+ * Each head or fragment can be up to 4096 bytes. Given
+ * MAX_BUFFER_OFFSET of 4096 the worst case is that each
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 58e920a..ca65840 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -368,15 +368,9 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ }
+
+ struct netrx_pending_operations {
+- unsigned trans_prod, trans_cons;
+- unsigned mmu_prod, mmu_mcl;
+- unsigned mcl_prod, mcl_cons;
+ unsigned copy_prod, copy_cons;
+ unsigned meta_prod, meta_cons;
+- struct mmu_update *mmu;
+- struct gnttab_transfer *trans;
+ struct gnttab_copy *copy;
+- struct multicall_entry *mcl;
+ struct netbk_rx_meta *meta;
+ int copy_off;
+ grant_ref_t copy_gref;
+@@ -577,7 +571,6 @@ static void net_rx_action(unsigned long data)
+ s8 status;
+ u16 irq, flags;
+ struct xen_netif_rx_response *resp;
+- struct multicall_entry *mcl;
+ struct sk_buff_head rxq;
+ struct sk_buff *skb;
+ int notify_nr = 0;
+@@ -588,10 +581,7 @@ static void net_rx_action(unsigned long data)
+ struct skb_cb_overlay *sco;
+
+ struct netrx_pending_operations npo = {
+- .mmu = netbk->rx_mmu,
+- .trans = netbk->grant_trans_op,
+ .copy = netbk->grant_copy_op,
+- .mcl = netbk->rx_mcl,
+ .meta = netbk->meta,
+ };
+
+@@ -617,50 +607,13 @@ static void net_rx_action(unsigned long data)
+
+ BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
+
+- npo.mmu_mcl = npo.mcl_prod;
+- if (npo.mcl_prod) {
+- BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
+- BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu));
+- mcl = npo.mcl + npo.mcl_prod++;
+-
+- BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
+- mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+-
+- mcl->op = __HYPERVISOR_mmu_update;
+- mcl->args[0] = (unsigned long)netbk->rx_mmu;
+- mcl->args[1] = npo.mmu_prod;
+- mcl->args[2] = 0;
+- mcl->args[3] = DOMID_SELF;
+- }
+-
+- if (npo.trans_prod) {
+- BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op));
+- mcl = npo.mcl + npo.mcl_prod++;
+- mcl->op = __HYPERVISOR_grant_table_op;
+- mcl->args[0] = GNTTABOP_transfer;
+- mcl->args[1] = (unsigned long)netbk->grant_trans_op;
+- mcl->args[2] = npo.trans_prod;
+- }
+-
+- if (npo.copy_prod) {
+- BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
+- mcl = npo.mcl + npo.mcl_prod++;
+- mcl->op = __HYPERVISOR_grant_table_op;
+- mcl->args[0] = GNTTABOP_copy;
+- mcl->args[1] = (unsigned long)netbk->grant_copy_op;
+- mcl->args[2] = npo.copy_prod;
+- }
+-
+- /* Nothing to do? */
+- if (!npo.mcl_prod)
++ if (!npo.copy_prod)
+ return;
+
+- BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl));
+-
+- ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
++ BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
++ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
++ npo.copy_prod);
+ BUG_ON(ret != 0);
+- /* The mmu_machphys_update() must not fail. */
+- BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
+
+ while ((skb = __skb_dequeue(&rxq)) != NULL) {
+ sco = (struct skb_cb_overlay *)skb->cb;
+--
+1.7.3.4
+
+
+From 673a19d9e2d78939c6dc9c49e7e35ee54b54c8c7 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Fri, 2 Jul 2010 10:28:11 +0100
+Subject: [PATCH 099/139] xen: netback: Fix basic indentation issue
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 11 +++++++----
+ 1 files changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index ca65840..848503e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -445,10 +445,13 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ copy_gop = npo->copy + npo->copy_prod++;
+ copy_gop->flags = GNTCOPY_dest_gref;
+ if (PageForeign(page)) {
+- struct xen_netbk *netbk = &xen_netbk[group];
+- struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
+- copy_gop->source.domid = src_pend->netif->domid;
+- copy_gop->source.u.ref = src_pend->req.gref;
++ struct xen_netbk *netbk = &xen_netbk[group];
++ struct pending_tx_info *src_pend;
++
++ src_pend = &netbk->pending_tx_info[idx];
++
++ copy_gop->source.domid = src_pend->netif->domid;
++ copy_gop->source.u.ref = src_pend->req.gref;
+ copy_gop->flags |= GNTCOPY_source_gref;
+ } else {
+ copy_gop->source.domid = DOMID_SELF;
+--
+1.7.3.4
+
+
+From d08b2d1f2ff4723b335d0fb5b91ffd6cb6a005d3 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Mon, 5 Jul 2010 11:45:29 +0100
+Subject: [PATCH 100/139] xen: netback: Add a new style of passing GSO packets to frontends.
+
+feature-gso-tcpv4-prefix uses precedes the packet data passed to
+the frontend with a ring entry that contains the necessary
+metadata. This style of GSO passing is required for Citrix
+Windows PV Drivers.
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Cc: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h | 3 ++-
+ drivers/xen/netback/netback.c | 37 ++++++++++++++++++++++++++++++++++---
+ drivers/xen/netback/xenbus.c | 15 ++++++++++++---
+ include/xen/interface/io/netif.h | 4 ++++
+ 4 files changed, 52 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 08e7a0e..78451ab 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -80,7 +80,8 @@ struct xen_netif {
+ int features;
+
+ /* Internal feature information. */
+- u8 can_queue:1; /* can queue packets for receiver? */
++ u8 can_queue:1; /* can queue packets for receiver? */
++ u8 gso_prefix:1; /* use a prefix segment for GSO information */
+
+ /* Allow netif_be_start_xmit() to peek ahead in the rx request
+ * ring. This is a prediction of what rx_req_cons will be once
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 848503e..e93a62e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -432,6 +432,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ /* Overflowed this request, go to the next one */
+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+ meta = npo->meta + npo->meta_prod++;
++ meta->gso_size = 0;
+ meta->size = 0;
+ meta->id = req->id;
+ npo->copy_off = 0;
+@@ -492,9 +493,23 @@ static int netbk_gop_skb(struct sk_buff *skb,
+
+ old_meta_prod = npo->meta_prod;
+
++ /* Set up a GSO prefix descriptor, if necessary */
++ if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++ meta = npo->meta + npo->meta_prod++;
++ meta->gso_size = skb_shinfo(skb)->gso_size;
++ meta->size = 0;
++ meta->id = req->id;
++ }
++
+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+ meta = npo->meta + npo->meta_prod++;
+- meta->gso_size = skb_shinfo(skb)->gso_size;
++
++ if (!netif->gso_prefix)
++ meta->gso_size = skb_shinfo(skb)->gso_size;
++ else
++ meta->gso_size = 0;
++
+ meta->size = 0;
+ meta->id = req->id;
+ npo->copy_off = 0;
+@@ -506,7 +521,7 @@ static int netbk_gop_skb(struct sk_buff *skb,
+ offset_in_page(skb->data), 1);
+
+ /* Leave a gap for the GSO descriptor. */
+- if (skb_shinfo(skb)->gso_size)
++ if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
+ netif->rx.req_cons++;
+
+ for (i = 0; i < nr_frags; i++) {
+@@ -623,6 +638,21 @@ static void net_rx_action(unsigned long data)
+
+ netif = netdev_priv(skb->dev);
+
++ if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
++ resp = RING_GET_RESPONSE(&netif->rx,
++ netif->rx.rsp_prod_pvt++);
++
++ resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
++
++ resp->offset = netbk->meta[npo.meta_cons].gso_size;
++ resp->id = netbk->meta[npo.meta_cons].id;
++ resp->status = sco->meta_slots_used;
++
++ npo.meta_cons++;
++ sco->meta_slots_used--;
++ }
++
++
+ netif->stats.tx_bytes += skb->len;
+ netif->stats.tx_packets++;
+
+@@ -633,6 +663,7 @@ static void net_rx_action(unsigned long data)
+ flags = 0;
+ else
+ flags = NETRXF_more_data;
++
+ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+ flags |= NETRXF_csum_blank | NETRXF_data_validated;
+ else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+@@ -645,7 +676,7 @@ static void net_rx_action(unsigned long data)
+ netbk->meta[npo.meta_cons].size,
+ flags);
+
+- if (netbk->meta[npo.meta_cons].gso_size) {
++ if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
+ struct xen_netif_extra_info *gso =
+ (struct xen_netif_extra_info *)
+ RING_GET_RESPONSE(&netif->rx,
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index e30b0c7..cda987f 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -457,16 +457,25 @@ static int connect_rings(struct backend_info *be)
+ be->netif->dev->mtu = ETH_DATA_LEN;
+ }
+
+- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
+- &val) < 0)
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
++ "%d", &val) < 0)
+ val = 0;
+ if (val) {
+ be->netif->features |= NETIF_F_TSO;
+ be->netif->dev->features |= NETIF_F_TSO;
+ }
+
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
++ "%d", &val) < 0)
++ val = 0;
++ if (val) {
++ be->netif->features |= NETIF_F_TSO;
++ be->netif->dev->features |= NETIF_F_TSO;
++ be->netif->gso_prefix = 1;
++ }
++
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
+- "%d", &val) < 0)
++ "%d", &val) < 0)
+ val = 0;
+ if (val) {
+ be->netif->features &= ~NETIF_F_IP_CSUM;
+diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
+index 518481c..8309344 100644
+--- a/include/xen/interface/io/netif.h
++++ b/include/xen/interface/io/netif.h
+@@ -131,6 +131,10 @@ struct xen_netif_rx_request {
+ #define _NETRXF_extra_info (3)
+ #define NETRXF_extra_info (1U<<_NETRXF_extra_info)
+
++/* GSO Prefix descriptor. */
++#define _NETRXF_gso_prefix (4)
++#define NETRXF_gso_prefix (1U<<_NETRXF_gso_prefix)
++
+ struct xen_netif_rx_response {
+ uint16_t id;
+ uint16_t offset; /* Offset in page of start of received packet */
+--
+1.7.3.4
+
+
+From bd910979612331d60a629c16a49ebeb5efa0f035 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Fri, 2 Jul 2010 10:28:13 +0100
+Subject: [PATCH 101/139] xen: netback: Make frontend features distinct from netback feature flags.
+
+Make sure that if a feature flag is disabled by ethtool on netback
+that we do not gratuitously re-enabled it when we check the frontend
+features during ring connection.
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Cc: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h | 14 ++++++--
+ drivers/xen/netback/interface.c | 68 ++++++++++++++++++++++++++++++--------
+ drivers/xen/netback/netback.c | 2 +-
+ drivers/xen/netback/xenbus.c | 44 ++++++++++---------------
+ 4 files changed, 81 insertions(+), 47 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 78451ab..a5f3759 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -76,12 +76,17 @@ struct xen_netif {
+ struct vm_struct *tx_comms_area;
+ struct vm_struct *rx_comms_area;
+
+- /* Set of features that can be turned on in dev->features. */
+- int features;
++ /* Flags that must not be set in dev->features */
++ int features_disabled;
++
++ /* Frontend feature information. */
++ u8 can_sg:1;
++ u8 gso:1;
++ u8 gso_prefix:1;
++ u8 csum:1;
+
+ /* Internal feature information. */
+ u8 can_queue:1; /* can queue packets for receiver? */
+- u8 gso_prefix:1; /* use a prefix segment for GSO information */
+
+ /* Allow netif_be_start_xmit() to peek ahead in the rx request
+ * ring. This is a prediction of what rx_req_cons will be once
+@@ -187,6 +192,7 @@ void netif_accel_init(void);
+
+ void netif_disconnect(struct xen_netif *netif);
+
++void netif_set_features(struct xen_netif *netif);
+ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
+ int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ unsigned long rx_ring_ref, unsigned int evtchn);
+@@ -223,7 +229,7 @@ static inline int netbk_can_queue(struct net_device *dev)
+ static inline int netbk_can_sg(struct net_device *dev)
+ {
+ struct xen_netif *netif = netdev_priv(dev);
+- return netif->features & NETIF_F_SG;
++ return netif->can_sg;
+ }
+
+ struct pending_tx_info {
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 172ef4c..2e8508a 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -121,31 +121,69 @@ static int netbk_change_mtu(struct net_device *dev, int mtu)
+ return 0;
+ }
+
+-static int netbk_set_sg(struct net_device *dev, u32 data)
++void netif_set_features(struct xen_netif *netif)
+ {
+- if (data) {
+- struct xen_netif *netif = netdev_priv(dev);
++ struct net_device *dev = netif->dev;
++ int features = dev->features;
++
++ if (netif->can_sg)
++ features |= NETIF_F_SG;
++ if (netif->gso || netif->gso_prefix)
++ features |= NETIF_F_TSO;
++ if (netif->csum)
++ features |= NETIF_F_IP_CSUM;
++
++ features &= ~(netif->features_disabled);
+
+- if (!(netif->features & NETIF_F_SG))
++ if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
++ dev->mtu = ETH_DATA_LEN;
++
++ dev->features = features;
++}
++
++static int netbk_set_tx_csum(struct net_device *dev, u32 data)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ if (data) {
++ if (!netif->csum)
+ return -ENOSYS;
++ netif->features_disabled &= ~NETIF_F_IP_CSUM;
++ } else {
++ netif->features_disabled |= NETIF_F_IP_CSUM;
+ }
+
+- if (dev->mtu > ETH_DATA_LEN)
+- dev->mtu = ETH_DATA_LEN;
++ netif_set_features(netif);
++ return 0;
++}
+
+- return ethtool_op_set_sg(dev, data);
++static int netbk_set_sg(struct net_device *dev, u32 data)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ if (data) {
++ if (!netif->can_sg)
++ return -ENOSYS;
++ netif->features_disabled &= ~NETIF_F_SG;
++ } else {
++ netif->features_disabled |= NETIF_F_SG;
++ }
++
++ netif_set_features(netif);
++ return 0;
+ }
+
+ static int netbk_set_tso(struct net_device *dev, u32 data)
+ {
++ struct xen_netif *netif = netdev_priv(dev);
+ if (data) {
+- struct xen_netif *netif = netdev_priv(dev);
+-
+- if (!(netif->features & NETIF_F_TSO))
++ if (!netif->gso && !netif->gso_prefix)
+ return -ENOSYS;
++ netif->features_disabled &= ~NETIF_F_TSO;
++ } else {
++ netif->features_disabled |= NETIF_F_TSO;
+ }
+
+- return ethtool_op_set_tso(dev, data);
++ netif_set_features(netif);
++ return 0;
+ }
+
+ static void netbk_get_drvinfo(struct net_device *dev,
+@@ -200,7 +238,7 @@ static struct ethtool_ops network_ethtool_ops =
+ .get_drvinfo = netbk_get_drvinfo,
+
+ .get_tx_csum = ethtool_op_get_tx_csum,
+- .set_tx_csum = ethtool_op_set_tx_csum,
++ .set_tx_csum = netbk_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = netbk_set_sg,
+ .get_tso = ethtool_op_get_tso,
+@@ -242,7 +280,8 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ netif->domid = domid;
+ netif->group = -1;
+ netif->handle = handle;
+- netif->features = NETIF_F_SG;
++ netif->can_sg = 1;
++ netif->csum = 1;
+ atomic_set(&netif->refcnt, 1);
+ init_waitqueue_head(&netif->waiting_to_free);
+ netif->dev = dev;
+@@ -259,8 +298,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ init_timer(&netif->tx_queue_timeout);
+
+ dev->netdev_ops = &netback_ops;
+- dev->features = NETIF_F_IP_CSUM|NETIF_F_SG;
+-
++ netif_set_features(netif);
+ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+
+ dev->tx_queue_len = netbk_queue_length;
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index e93a62e..63a771e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -238,7 +238,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+
+ static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+ {
+- if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
++ if (netif->can_sg || netif->gso || netif->gso_prefix)
+ return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
+ return 1; /* all in one */
+ }
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index cda987f..17ff5cf 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -404,6 +404,7 @@ static void connect(struct backend_info *be)
+
+ static int connect_rings(struct backend_info *be)
+ {
++ struct xen_netif *netif = be->netif;
+ struct xenbus_device *dev = be->dev;
+ unsigned long tx_ring_ref, rx_ring_ref;
+ unsigned int evtchn, rx_copy;
+@@ -437,53 +438,42 @@ static int connect_rings(struct backend_info *be)
+ if (!rx_copy)
+ return -EOPNOTSUPP;
+
+- if (be->netif->dev->tx_queue_len != 0) {
++ if (netif->dev->tx_queue_len != 0) {
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
+ "feature-rx-notify", "%d", &val) < 0)
+ val = 0;
+ if (val)
+- be->netif->can_queue = 1;
++ netif->can_queue = 1;
+ else
+ /* Must be non-zero for pfifo_fast to work. */
+- be->netif->dev->tx_queue_len = 1;
++ netif->dev->tx_queue_len = 1;
+ }
+
+- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
++ "%d", &val) < 0)
+ val = 0;
+- if (!val) {
+- be->netif->features &= ~NETIF_F_SG;
+- be->netif->dev->features &= ~NETIF_F_SG;
+- if (be->netif->dev->mtu > ETH_DATA_LEN)
+- be->netif->dev->mtu = ETH_DATA_LEN;
+- }
++ netif->can_sg = !!val;
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
+- "%d", &val) < 0)
++ "%d", &val) < 0)
+ val = 0;
+- if (val) {
+- be->netif->features |= NETIF_F_TSO;
+- be->netif->dev->features |= NETIF_F_TSO;
+- }
++ netif->gso = !!val;
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
+- "%d", &val) < 0)
++ "%d", &val) < 0)
+ val = 0;
+- if (val) {
+- be->netif->features |= NETIF_F_TSO;
+- be->netif->dev->features |= NETIF_F_TSO;
+- be->netif->gso_prefix = 1;
+- }
++ netif->gso_prefix = !!val;
+
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
+- "%d", &val) < 0)
++ "%d", &val) < 0)
+ val = 0;
+- if (val) {
+- be->netif->features &= ~NETIF_F_IP_CSUM;
+- be->netif->dev->features &= ~NETIF_F_IP_CSUM;
+- }
++ netif->csum = !val;
++
++ /* Set dev->features */
++ netif_set_features(netif);
+
+ /* Map the shared frame, irq etc. */
+- err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
++ err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
+ if (err) {
+ xenbus_dev_fatal(dev, err,
+ "mapping shared-frames %lu/%lu port %u",
+--
+1.7.3.4
+
+
+From cf8c20169427de5829e3ec723712b77de52e64ac Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Thu, 15 Jul 2010 10:46:50 -0700
+Subject: [PATCH 102/139] xen: netback: only initialize for PV domains
+
+HVM domains don't support netback
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 63a771e..911c85b 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1701,7 +1701,7 @@ static int __init netback_init(void)
+ int rc = 0;
+ int group;
+
+- if (!xen_domain())
++ if (!xen_pv_domain())
+ return -ENODEV;
+
+ xen_netbk_group_nr = num_online_cpus();
+--
+1.7.3.4
+
+
+From 00a5c7eddb919701ac998b33bf4f283efaa06bbc Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 21 Jul 2010 13:24:26 -0700
+Subject: [PATCH 103/139] xen/rings: make protocol specific usage of shared sring explicit
+
+I don't think protocol specific data't really belongs in this header
+but since it is already there and we seem to be stuck with it lets at
+least make the users explicit lest people get caught out by future new
+fields moving the pad field around.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+(cherry picked from commit df0afd34ec3015e44b8121d0e542d32fb04d438d)
+---
+ include/xen/interface/io/ring.h | 8 +++++++-
+ 1 files changed, 7 insertions(+), 1 deletions(-)
+
+diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
+index e8cbf43..e2d62cf 100644
+--- a/include/xen/interface/io/ring.h
++++ b/include/xen/interface/io/ring.h
+@@ -73,7 +73,13 @@ union __name##_sring_entry { \
+ struct __name##_sring { \
+ RING_IDX req_prod, req_event; \
+ RING_IDX rsp_prod, rsp_event; \
+- uint8_t pad[48]; \
++ union { \
++ struct { \
++ uint8_t msg; \
++ } tapif_user; \
++ uint8_t pvt_pad[4]; \
++ } private; \
++ uint8_t pad[44]; \
+ union __name##_sring_entry ring[1]; /* variable-length */ \
+ }; \
+ \
+--
+1.7.3.4
+
+
+From 1866aec6117132b4399f9e956994af259ad5cfdb Mon Sep 17 00:00:00 2001
+From: Bastian Blank <waldi at debian.org>
+Date: Thu, 29 Jul 2010 17:30:18 +0200
+Subject: [PATCH 104/139] xen: netback: Fix null-pointer access in netback_uevent
+
+The uevent method of Xen netback does not check if the the network
+device is already setup and tries to dereference a null-pointer if not.
+
+Signed-off-by: Bastian Blank <waldi at debian.org>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/xenbus.c | 10 ++--------
+ 1 files changed, 2 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index 17ff5cf..1fec65a 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -154,17 +154,11 @@ fail:
+ */
+ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
+ {
+- struct backend_info *be;
+- struct xen_netif *netif;
++ struct backend_info *be = dev_get_drvdata(&xdev->dev);
+ char *val;
+
+ DPRINTK("netback_uevent");
+
+- be = dev_get_drvdata(&xdev->dev);
+- if (!be)
+- return 0;
+- netif = be->netif;
+-
+ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+ if (IS_ERR(val)) {
+ int err = PTR_ERR(val);
+@@ -179,7 +173,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
+ kfree(val);
+ }
+
+- if (add_uevent_var(env, "vif=%s", netif->dev->name))
++ if (be && be->netif && add_uevent_var(env, "vif=%s", be->netif->dev->name))
+ return -ENOMEM;
+
+ return 0;
+--
+1.7.3.4
+
+
+From 7f1732b25d00393131220a0369caa8a28faf46e1 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 30 Jul 2010 15:16:47 +0100
+Subject: [PATCH 105/139] xen: netback: check if foreign pages are actually netback-created foreign pages.
+
+020ba906 "xen/netback: Multiple tasklets support." changed
+netbk_gop_frag_copy to attempt to lookup a pending_tx_info for any
+foreign page, regardless of whether the page was a netback-foreign
+page.
+
+In the case of non-netback pages this can lead to dereferencing a NULL
+src_pend->netif.
+
+Restore the behaviour of netif_page_index prior toa3031942
+"xen/netback: Introduce a new struct type page_ext" by performing
+tests to ensure that page is a netback page and extend the same checks
+to netif_page_group.
+
+Actually combine netif_page_{index,group} in to a single function
+since they are always called together and it saves duplicating all the
+checks.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+Cc: Xu, Dongxiao <dongxiao.xu at intel.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 56 ++++++++++++++++++++++++++++------------
+ 1 files changed, 39 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 911c85b..95df223 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -89,18 +89,37 @@ static inline void netif_set_page_ext(struct page *pg, unsigned int group,
+ pg->mapping = ext.mapping;
+ }
+
+-static inline unsigned int netif_page_group(const struct page *pg)
++static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsigned int *_idx)
+ {
+ union page_ext ext = { .mapping = pg->mapping };
++ struct xen_netbk *netbk;
++ unsigned int group, idx;
+
+- return ext.e.group - 1;
+-}
++ if (!PageForeign(pg))
++ return 0;
+
+-static inline unsigned int netif_page_index(const struct page *pg)
+-{
+- union page_ext ext = { .mapping = pg->mapping };
++ group = ext.e.group - 1;
++
++ if (group < 0 || group >= xen_netbk_group_nr)
++ return 0;
++
++ netbk = &xen_netbk[group];
++
++ if (netbk->mmap_pages == NULL)
++ return 0;
+
+- return ext.e.idx;
++ idx = ext.e.idx;
++
++ if ((idx < 0) || (idx >= MAX_PENDING_REQS))
++ return 0;
++
++ if (netbk->mmap_pages[idx] != pg)
++ return 0;
++
++ *_group = group;
++ *_idx = idx;
++
++ return 1;
+ }
+
+ /*
+@@ -386,8 +405,12 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ {
+ struct gnttab_copy *copy_gop;
+ struct netbk_rx_meta *meta;
+- int group = netif_page_group(page);
+- int idx = netif_page_index(page);
++ /*
++ * These variables a used iff netif_get_page_ext returns true,
++ * in which case they are guaranteed to be initialized.
++ */
++ unsigned int uninitialized_var(group), uninitialized_var(idx);
++ int foreign = netif_get_page_ext(page, &group, &idx);
+ unsigned long bytes;
+
+ /* Data must not cross a page boundary. */
+@@ -445,7 +468,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+
+ copy_gop = npo->copy + npo->copy_prod++;
+ copy_gop->flags = GNTCOPY_dest_gref;
+- if (PageForeign(page)) {
++ if (foreign) {
+ struct xen_netbk *netbk = &xen_netbk[group];
+ struct pending_tx_info *src_pend;
+
+@@ -1535,14 +1558,13 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+
+ static void netif_page_release(struct page *page, unsigned int order)
+ {
+- int group = netif_page_group(page);
+- int idx = netif_page_index(page);
+- struct xen_netbk *netbk = &xen_netbk[group];
++ unsigned int group, idx;
++ int foreign = netif_get_page_ext(page, &group, &idx);
++
++ BUG_ON(!foreign);
+ BUG_ON(order);
+- BUG_ON(group < 0 || group >= xen_netbk_group_nr);
+- BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
+- BUG_ON(netbk->mmap_pages[idx] != page);
+- netif_idx_release(netbk, idx);
++
++ netif_idx_release(&xen_netbk[group], idx);
+ }
+
+ irqreturn_t netif_be_int(int irq, void *dev_id)
+--
+1.7.3.4
+
+
+From 14a12990d12cd9ee919d5579c1d0c3df74ad66e7 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 30 Jul 2010 15:16:46 +0100
+Subject: [PATCH 106/139] xen: netback: do not unleash netback threads until initialisation is complete
+
+Otherwise netbk_action_thread can reference &netbk->net_schedule_list
+(via tx_work_todo) before it is initialised. Until now it was zeroed
+which is probably safe but not exactly robust.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+Cc: Xu, Dongxiao <dongxiao.xu at intel.com>
+Cc: Paul Durrant <Paul.Durrant at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 4 +++-
+ 1 files changed, 3 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 95df223..2646383 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1782,7 +1782,6 @@ static int __init netback_init(void)
+
+ if (!IS_ERR(netbk->kthread.task)) {
+ kthread_bind(netbk->kthread.task, group);
+- wake_up_process(netbk->kthread.task);
+ } else {
+ printk(KERN_ALERT
+ "kthread_run() fails at netback\n");
+@@ -1808,6 +1807,9 @@ static int __init netback_init(void)
+ spin_lock_init(&netbk->net_schedule_list_lock);
+
+ atomic_set(&netbk->netfront_count, 0);
++
++ if (MODPARM_netback_kthread)
++ wake_up_process(netbk->kthread.task);
+ }
+
+ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+--
+1.7.3.4
+
+
+From 6decec42c631f2e2e268f00ce8841faf38817ca8 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ijc at hellion.org.uk>
+Date: Thu, 2 Sep 2010 14:36:40 +0100
+Subject: [PATCH 108/139] xen: netback: save interrupt state in add_to_net_schedule_list_tail
+
+add_to_net_schedule_list_tail is called from both hard interrupt context
+(add_to_net_schedule_list_tail) and soft interrupt/process context
+(netif_schedule_work) so use the interrupt state saving spinlock
+variants.
+
+Fixes:
+ ------------[ cut here ]------------
+ WARNING: at kernel/lockdep.c:2323 trace_hardirqs_on_caller+0xef/0x1a0()
+ Hardware name: PowerEdge 860
+ Modules linked in: rtc_cmos rtc_core rtc_lib
+ Pid: 16, comm: xenwatch Not tainted 2.6.32.18-x86_32p-xen0-00850-ge6b9b2c #98
+ Call Trace:
+ [<c103951c>] warn_slowpath_common+0x6c/0xc0
+ [<c1039585>] warn_slowpath_null+0x15/0x20
+ [<c105f60f>] trace_hardirqs_on_caller+0xef/0x1a0
+ [<c105f6cb>] trace_hardirqs_on+0xb/0x10
+ [<c136cc72>] _spin_unlock_irq+0x22/0x40
+ [<c11ab9ef>] add_to_net_schedule_list_tail+0x5f/0xb0
+ [<c11aba6b>] netif_be_int+0x2b/0x120
+ [<c106dd8e>] handle_IRQ_event+0x2e/0xe0
+ [<c106f98e>] handle_level_irq+0x6e/0xf0
+ [<c1197cdf>] __xen_evtchn_do_upcall+0x16f/0x190
+ [<c11981b8>] xen_evtchn_do_upcall+0x28/0x40
+ [<c100b487>] xen_do_upcall+0x7/0xc
+ [<c119bcf9>] xs_talkv+0x59/0x1a0
+ [<c119bf6a>] xs_single+0x3a/0x50
+ [<c119c6f9>] xenbus_read+0x39/0x60
+ [<c11adf77>] frontend_changed+0x3e7/0x6a0
+ [<c119d35a>] xenbus_otherend_changed+0x8a/0xa0
+ [<c119d572>] frontend_changed+0x12/0x20
+ [<c119b9dc>] xenwatch_thread+0x7c/0x140
+ [<c104ea74>] kthread+0x74/0x80
+ [<c100b433>] kernel_thread_helper+0x7/0x10
+ ---[ end trace 48d73949a8e0909a ]---
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/netback.c | 6 ++++--
+ 1 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 2646383..1d080f6 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -787,17 +787,19 @@ static void remove_from_net_schedule_list(struct xen_netif *netif)
+
+ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+ {
++ unsigned long flags;
++
+ struct xen_netbk *netbk = &xen_netbk[netif->group];
+ if (__on_net_schedule_list(netif))
+ return;
+
+- spin_lock_irq(&netbk->net_schedule_list_lock);
++ spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
+ if (!__on_net_schedule_list(netif) &&
+ likely(netif_schedulable(netif))) {
+ list_add_tail(&netif->list, &netbk->net_schedule_list);
+ netif_get(netif);
+ }
+- spin_unlock_irq(&netbk->net_schedule_list_lock);
++ spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
+ }
+
+ void netif_schedule_work(struct xen_netif *netif)
+--
+1.7.3.4
+
+
+From 0e667d904c6ab6c44cedef51ef00964f9e0559ba Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 8 Oct 2010 17:11:51 +0100
+Subject: [PATCH 109/139] xen: netback: increase size of rx_meta array.
+
+We can end up needing as many of these as we need grant copy operations so
+increase the array size for the same reason.
+
+Crash observed on XenServer:
+kernel: ------------[ cut here ]------------
+kernel: kernel BUG at drivers/xen/netback/netback.c:834!
+kernel: invalid opcode: 0000 [#1] SMP
+kernel: last sysfs file: /sys/devices/xen-backend/vbd-10-768/statistics/rd_usecs
+kernel: Process netback (pid: 1413, ti=ec8a4000 task=ed0a6b70 task.ti=ec8a4000)
+kernel: Stack: 00000000 00000612 00000001 00000000 00020000 00000000 ecfbe000 00000000
+kernel: ec8a5f80 ec8a5f98 ec8a5fac 00000000 c0537220 c0539220 00000000 c0534220
+kernel: cd7afaa0 cd7afaa0 0000000c 00000014 062de396 00000001 00000001 00000014
+kernel: Call Trace:
+kernel: [<c0285f10>] ? netbk_action_thread+0x0/0x1fe0
+kernel: [<c013daf2>] ? kthread+0x42/0x70
+kernel: [<c013dab0>] ? kthread+0x0/0x70
+kernel: [<c010569b>] ? kernel_thread_helper+0x7/0x10
+kernel: =======================
+kernel: Code: 00 00 c7 42 08 20 82 53 c0 8b 85 e4 fe ff ff c7 42 10 00 00 00 00 \
+ c7 42 14 f0 7f 00 00 89 42 0c 8b 8d ec fe ff ff e9 3e e9 ff ff <0f> \
+ 0b eb fe 0f 0b eb fe 0f 0b eb fe 0f 0b eb fe 31 c0 e8 bf 31
+kernel: EIP: [<c028790a>] netbk_action_thread+0x19fa/0x1fe0 SS:ESP 0069:ec8a5d98
+
+Corresponding to
+ BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/netback/common.h | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index a5f3759..ce0041a 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -319,7 +319,7 @@ struct xen_netbk {
+ struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
+ unsigned char rx_notify[NR_IRQS];
+ u16 notify_list[NET_RX_RING_SIZE];
+- struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++ struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
+ };
+
+ extern struct xen_netbk *xen_netbk;
+--
+1.7.3.4
+
+
+From 36713152990836043c908777654ea01ed13ccdf4 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 15 Oct 2010 13:41:44 +0100
+Subject: [PATCH 110/139] xen: netback: take net_schedule_list_lock when removing entry from net_schedule_list
+
+There is a race in net_tx_build_mops between checking if
+net_schedule_list is empty and actually dequeuing the first entry on
+the list. If another thread dequeues the only entry on the list during
+this window we crash because list_first_entry expects a non-empty
+list, like so:
+
+[ 0.133127] BUG: unable to handle kernel NULL pointer dereference at 00000008
+[ 0.133132] IP: [<c12aae71>] net_tx_build_mops+0x91/0xa70
+[ 0.133142] *pdpt = 0000000000000000 *pde = 000000000000000f
+[ 0.133147] Oops: 0002 1 SMP
+[ 0.133150] last sysfs file:
+[ 0.133152] Modules linked in:
+[ 0.133154]
+[ 0.133156] Pid: 55, comm: netback/1 Not tainted (2.6.32.12-0.7.1 #1) Latitude E4310
+[ 0.133158] EIP: 0061:[<c12aae71>] EFLAGS: 00010202 CPU: 1
+[ 0.133161] EIP is at net_tx_build_mops+0x91/0xa70
+[ 0.133163] EAX: 00000012 EBX: 00000008 ECX: e112b734 EDX: e112b76c
+[ 0.133165] ESI: ffffff30 EDI: 00000000 EBP: e112b734 ESP: dfe85d98
+[ 0.133167] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069
+[ 0.133169] Process netback/1 (pid: 55, ti=dfe84000 task=dfe83340 task.ti=dfe84000)
+[ 0.133170] Stack:
+[ 0.133172] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
+[ 0.133177] <0> 00000000 e112b734 e112ec08 e112b7f8 e112ec08 ffffff30 00000000 00000000
+[ 0.133186] <0> 00000000 00000000 00000000 e112b76c dfe85df4 00000001 00000000 aaaaaaaa
+[ 0.133193] Call Trace:
+[ 0.133202] [<c12abc7f>] net_tx_action+0x42f/0xac0
+[ 0.133206] [<c12ac37a>] netbk_action_thread+0x6a/0x1b0
+[ 0.133212] [<c1057444>] kthread+0x74/0x80
+[ 0.133218] [<c10049d7>] kernel_thread_helper+0x7/0x10
+[ 0.133220] Code: c4 00 00 00 89 74 24 58 39 74 24 2c 0f 84 c7 06 00 00 8b 74 24 \
+ 58 8b 5c 24 58 81 ee d0 00 00 00 83 c3 08 89 74 24 34 8b 7c 24 \
+ 58 <f0> ff 47 08 89 f0 e8 b4 f9 ff ff 8b 46 2c 8b 56 34 89 44 24 5c
+[ 0.133261] EIP: [<c12aae71>] net_tx_build_mops+0x91/0xa70 SS:ESP 0069:dfe85d98
+[ 0.133265] CR2: 0000000000000008
+[ 0.133274] --[ end trace e2c5c15f54bd9d93 ]--
+
+Therefore after the initial lock free check for an empty list check
+again with the lock held before dequeueing the entry.
+
+Based on a patch by Tomasz Wroblewski.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Tomasz Wroblewski <tomasz.wroblewski at citrix.com>
+---
+ drivers/xen/netback/netback.c | 35 ++++++++++++++++++++++++++++-------
+ 1 files changed, 28 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 1d080f6..3b03435 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -774,15 +774,34 @@ static int __on_net_schedule_list(struct xen_netif *netif)
+ return !list_empty(&netif->list);
+ }
+
++/* Must be called with net_schedule_list_lock held */
+ static void remove_from_net_schedule_list(struct xen_netif *netif)
+ {
+- struct xen_netbk *netbk = &xen_netbk[netif->group];
+- spin_lock_irq(&netbk->net_schedule_list_lock);
+ if (likely(__on_net_schedule_list(netif))) {
+ list_del_init(&netif->list);
+ netif_put(netif);
+ }
++}
++
++static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
++{
++ struct xen_netif *netif = NULL;
++
++ spin_lock_irq(&netbk->net_schedule_list_lock);
++ if (list_empty(&netbk->net_schedule_list))
++ goto out;
++
++ netif = list_first_entry(&netbk->net_schedule_list,
++ struct xen_netif, list);
++ if (!netif)
++ goto out;
++
++ netif_get(netif);
++
++ remove_from_net_schedule_list(netif);
++out:
+ spin_unlock_irq(&netbk->net_schedule_list_lock);
++ return netif;
+ }
+
+ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+@@ -817,7 +836,10 @@ void netif_schedule_work(struct xen_netif *netif)
+
+ void netif_deschedule_work(struct xen_netif *netif)
+ {
++ struct xen_netbk *netbk = &xen_netbk[netif->group];
++ spin_lock_irq(&netbk->net_schedule_list_lock);
+ remove_from_net_schedule_list(netif);
++ spin_unlock_irq(&netbk->net_schedule_list_lock);
+ }
+
+
+@@ -1301,12 +1323,11 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ int work_to_do;
+ unsigned int data_len;
+ pending_ring_idx_t index;
+-
++
+ /* Get a netif from the list with work to do. */
+- netif = list_first_entry(&netbk->net_schedule_list,
+- struct xen_netif, list);
+- netif_get(netif);
+- remove_from_net_schedule_list(netif);
++ netif = poll_net_schedule_list(netbk);
++ if (!netif)
++ continue;
+
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+ if (!work_to_do) {
+--
+1.7.3.4
+
+
+From 6ad4931abe4b111253da13805cc504cc72b0df1c Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Wed, 15 Dec 2010 09:48:12 +0000
+Subject: [PATCH 111/139] xen: netback: Re-define PKT_PROT_LEN to be bigger.
+
+Re-define PKT_PROT_LEN to be big enough to handle maximal IPv4 and TCP options and phrase
+the definition so that it's reasonably obvious that's what it's for.
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c | 14 +++++++++-----
+ 1 files changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 3b03435..9bbd230 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -36,9 +36,11 @@
+
+ #include "common.h"
+
+-#include <linux/tcp.h>
+-#include <linux/udp.h>
+ #include <linux/kthread.h>
++#include <linux/if_vlan.h>
++#include <linux/udp.h>
++
++#include <net/tcp.h>
+
+ #include <xen/balloon.h>
+ #include <xen/events.h>
+@@ -125,10 +127,12 @@ static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsi
+ /*
+ * This is the amount of packet we copy rather than map, so that the
+ * guest can't fiddle with the contents of the headers while we do
+- * packet processing on them (netfilter, routing, etc). 72 is enough
+- * to cover TCP+IP headers including options.
++ * packet processing on them (netfilter, routing, etc).
+ */
+-#define PKT_PROT_LEN 72
++#define PKT_PROT_LEN (ETH_HLEN + \
++ VLAN_HLEN + \
++ sizeof(struct iphdr) + MAX_IPOPTLEN + \
++ sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
+
+ static inline pending_ring_idx_t pending_index(unsigned i)
+ {
+--
+1.7.3.4
+
+
+From a2d629a773aba2049106bad81596ef88e80a9cd4 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Tue, 14 Dec 2010 20:35:19 +0000
+Subject: [PATCH 112/139] xen: netback: Don't count packets we don't actually receive.
+
+Make sure we only bump rx_packets when we're definitely going to call netif_rx_ni().
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c | 6 +++---
+ 1 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 9bbd230..78d3509 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1512,9 +1512,6 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ skb->dev = netif->dev;
+ skb->protocol = eth_type_trans(skb, skb->dev);
+
+- netif->stats.rx_bytes += skb->len;
+- netif->stats.rx_packets++;
+-
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ if (skb_checksum_setup(skb)) {
+ DPRINTK("Can't setup checksum in net_tx_action\n");
+@@ -1530,6 +1527,9 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ continue;
+ }
+
++ netif->stats.rx_bytes += skb->len;
++ netif->stats.rx_packets++;
++
+ netif_rx_ni(skb);
+ netif->dev->last_rx = jiffies;
+ }
+--
+1.7.3.4
+
+
+From c6efc62e71720df66d9a91d33a3de813f0ec41c2 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Tue, 14 Dec 2010 20:35:20 +0000
+Subject: [PATCH 113/139] xen: netback: Remove the 500ms timeout to restart the netif queue.
+
+It is generally unhelpful as it results in a massive tail-drop should a guest become
+unresponsive for a relatively short period of time and no back-pressure (other than
+that caused by a higher layer protocol) is applied to the sender.
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c | 20 +-------------------
+ 1 files changed, 1 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 78d3509..2caa5f8 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -275,13 +275,6 @@ static inline int netbk_queue_full(struct xen_netif *netif)
+ ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
+ }
+
+-static void tx_queue_callback(unsigned long data)
+-{
+- struct xen_netif *netif = (struct xen_netif *)data;
+- if (netif_schedulable(netif))
+- netif_wake_queue(netif->dev);
+-}
+-
+ /* Figure out how many ring slots we're going to need to send @skb to
+ the guest. */
+ static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
+@@ -364,19 +357,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ netif->rx.sring->req_event = netif->rx_req_cons_peek +
+ netbk_max_required_rx_slots(netif);
+ mb(); /* request notification /then/ check & stop the queue */
+- if (netbk_queue_full(netif)) {
++ if (netbk_queue_full(netif))
+ netif_stop_queue(dev);
+- /*
+- * Schedule 500ms timeout to restart the queue, thus
+- * ensuring that an inactive queue will be drained.
+- * Packets will be immediately be dropped until more
+- * receive buffers become available (see
+- * netbk_queue_full() check above).
+- */
+- netif->tx_queue_timeout.data = (unsigned long)netif;
+- netif->tx_queue_timeout.function = tx_queue_callback;
+- mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
+- }
+ }
+ skb_queue_tail(&netbk->rx_queue, skb);
+
+--
+1.7.3.4
+
+
+From 0e8da97315f8cc89611f9194097931df4e67efc7 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Tue, 14 Dec 2010 20:35:21 +0000
+Subject: [PATCH 114/139] xen: netback: Add a missing test to tx_work_todo.
+
+Adda test so that, when netback is using worker threads, net_tx_action()
+gets called in a timely manner when the pending_inuse list is populated.
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c | 4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 2caa5f8..dd52d01 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1695,6 +1695,10 @@ static inline int tx_work_todo(struct xen_netbk *netbk)
+ if (netbk->dealloc_cons != netbk->dealloc_prod)
+ return 1;
+
++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++ !list_empty(&netbk->pending_inuse_head))
++ return 1;
++
+ if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ !list_empty(&netbk->net_schedule_list))
+ return 1;
+--
+1.7.3.4
+
+
+From e2f4dacefdb6cdff5e4e0b380632ff7ca750ba8b Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant at citrix.com>
+Date: Tue, 14 Dec 2010 20:35:22 +0000
+Subject: [PATCH 115/139] xen: netback: Re-factor net_tx_action_dealloc() slightly.
+
+There is no need for processing of the pending_inuse list to be within the dealloc_prod/cons
+loop.
+
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c | 26 ++++++++++++++------------
+ 1 files changed, 14 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index dd52d01..53b3a0e 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -906,11 +906,20 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+ gop++;
+ }
+
+- if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
+- list_empty(&netbk->pending_inuse_head))
+- break;
++ } while (dp != netbk->dealloc_prod);
++
++ netbk->dealloc_cons = dc;
+
+- /* Copy any entries that have been pending for too long. */
++ ret = HYPERVISOR_grant_table_op(
++ GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
++ gop - netbk->tx_unmap_ops);
++ BUG_ON(ret);
++
++ /*
++ * Copy any entries that have been pending for too long
++ */
++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++ !list_empty(&netbk->pending_inuse_head)) {
+ list_for_each_entry_safe(inuse, n,
+ &netbk->pending_inuse_head, list) {
+ struct pending_tx_info *pending_tx_info;
+@@ -936,14 +945,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+
+ break;
+ }
+- } while (dp != netbk->dealloc_prod);
+-
+- netbk->dealloc_cons = dc;
+-
+- ret = HYPERVISOR_grant_table_op(
+- GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
+- gop - netbk->tx_unmap_ops);
+- BUG_ON(ret);
++ }
+
+ list_for_each_entry_safe(inuse, n, &list, list) {
+ struct pending_tx_info *pending_tx_info;
+--
+1.7.3.4
+
+
+From 082386b4a581b2ba5a125cc8944a57ceb33ff37c Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 5 Jan 2011 09:57:37 +0000
+Subject: [PATCH 116/139] xen: netback: Drop GSO SKBs which do not have csum_blank.
+
+The Linux network stack expects all GSO SKBs to have ip_summed ==
+CHECKSUM_PARTIAL (which implies that the frame contains a partial
+checksum) and the Xen network ring protocol similarly expects an SKB
+which has GSO set to also have NETRX_csum_blank (which also implies a
+partial checksum). Therefore drop such frames on receive otherwise
+they will trigger the warning in skb_gso_segment.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Jeremy Fitzhardinge <jeremy at goop.org>
+Cc: xen-devel at lists.xensource.com
+---
+ drivers/xen/netback/netback.c | 4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 53b3a0e..2766b93 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1502,6 +1502,10 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ kfree_skb(skb);
+ continue;
+ }
++ } else if (skb_is_gso(skb)) {
++ DPRINTK("Dropping GSO but not CHECKSUM_PARTIAL skb\n");
++ kfree_skb(skb);
++ continue;
+ }
+
+ if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+--
+1.7.3.4
+
+
+From 27e6a8538a7e781f4774e4746f67eb113996333d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 19 Jan 2011 12:43:38 +0000
+Subject: [PATCH 117/139] xen: netback: completely remove tx_queue_timer
+
+"xen: netback: Remove the 500ms timeout to restart the netif queue." missed
+removing the timer initialisation.
+
+Also remove the related comment which has been obsolete since the default for
+MODPARM_copy_skb was switched to true some time ago.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+Cc: Paul Durrant <Paul.Durrant at citrix.com>
+---
+ drivers/xen/netback/common.h | 3 ---
+ drivers/xen/netback/interface.c | 13 +------------
+ 2 files changed, 1 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index ce0041a..7e03a46 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -99,9 +99,6 @@ struct xen_netif {
+ unsigned long remaining_credit;
+ struct timer_list credit_timeout;
+
+- /* Enforce draining of the transmit queue. */
+- struct timer_list tx_queue_timeout;
+-
+ /* Statistics */
+ int nr_copied_skbs;
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index 2e8508a..efdc21c 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -41,15 +41,7 @@
+ * Module parameter 'queue_length':
+ *
+ * Enables queuing in the network stack when a client has run out of receive
+- * descriptors. Although this feature can improve receive bandwidth by avoiding
+- * packet loss, it can also result in packets sitting in the 'tx_queue' for
+- * unbounded time. This is bad if those packets hold onto foreign resources.
+- * For example, consider a packet that holds onto resources belonging to the
+- * guest for which it is queued (e.g., packet received on vif1.0, destined for
+- * vif1.1 which is not activated in the guest): in this situation the guest
+- * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
+- * run a timer (tx_queue_timeout) to drain the queue when the interface is
+- * blocked.
++ * descriptors.
+ */
+ static unsigned long netbk_queue_length = 32;
+ module_param_named(queue_length, netbk_queue_length, ulong, 0644);
+@@ -295,8 +287,6 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ /* Initialize 'expires' now: it's used to track the credit window. */
+ netif->credit_timeout.expires = jiffies;
+
+- init_timer(&netif->tx_queue_timeout);
+-
+ dev->netdev_ops = &netback_ops;
+ netif_set_features(netif);
+ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+@@ -458,7 +448,6 @@ void netif_disconnect(struct xen_netif *netif)
+ wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
+
+ del_timer_sync(&netif->credit_timeout);
+- del_timer_sync(&netif->tx_queue_timeout);
+
+ if (netif->irq)
+ unbind_from_irqhandler(netif->irq, netif);
+--
+1.7.3.4
+
+
+From eccfb3db4b10548f9ecc71cc6f79bbec7e594f1d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 18 Jan 2011 11:37:12 +0000
+Subject: [PATCH 118/139] xen: netback: rationalise types used in count_skb_slots
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c | 10 +++-------
+ 1 files changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 2766b93..52f1745 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -277,14 +277,10 @@ static inline int netbk_queue_full(struct xen_netif *netif)
+
+ /* Figure out how many ring slots we're going to need to send @skb to
+ the guest. */
+-static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
++static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
+ {
+- unsigned count;
+- unsigned copy_off;
+- unsigned i;
+-
+- copy_off = 0;
+- count = 1;
++ unsigned int count = 1;
++ int i, copy_off = 0;
+
+ BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
+
+--
+1.7.3.4
+
+
+From 351ea99b9be1dbab49e069a4250740acc4375f6d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 18 Jan 2011 11:21:35 +0000
+Subject: [PATCH 119/139] xen: netback: refactor logic for moving to a new receive buffer.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/netback.c | 80 ++++++++++++++++++++++++-----------------
+ 1 files changed, 47 insertions(+), 33 deletions(-)
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 52f1745..ae4821a 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -275,8 +275,51 @@ static inline int netbk_queue_full(struct xen_netif *netif)
+ ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
+ }
+
+-/* Figure out how many ring slots we're going to need to send @skb to
+- the guest. */
++/*
++ * Returns true if we should start a new receive buffer instead of
++ * adding 'size' bytes to a buffer which currently contains 'offset'
++ * bytes.
++ */
++static bool start_new_rx_buffer(int offset, unsigned long size, int head)
++{
++ /* simple case: we have completely filled the current buffer. */
++ if (offset == MAX_BUFFER_OFFSET)
++ return true;
++
++ /*
++ * complex case: start a fresh buffer if the current frag
++ * would overflow the current buffer but only if:
++ * (i) this frag would fit completely in the next buffer
++ * and (ii) there is already some data in the current buffer
++ * and (iii) this is not the head buffer.
++ *
++ * Where:
++ * - (i) stops us splitting a frag into two copies
++ * unless the frag is too large for a single buffer.
++ * - (ii) stops us from leaving a buffer pointlessly empty.
++ * - (iii) stops us leaving the first buffer
++ * empty. Strictly speaking this is already covered
++ * by (ii) but is explicitly checked because
++ * netfront relies on the first buffer being
++ * non-empty and can crash otherwise.
++ *
++ * This means we will effectively linearise small
++ * frags but do not needlessly split large buffers
++ * into multiple copies tend to give large frags their
++ * own buffers as before.
++ */
++ if ((offset + size > MAX_BUFFER_OFFSET) &&
++ (size <= MAX_BUFFER_OFFSET) && offset && !head)
++ return true;
++
++ return false;
++}
++
++/*
++ * Figure out how many ring slots we're going to need to send @skb to
++ * the guest. This function is essentially a dry run of
++ * netbk_gop_frag_copy.
++ */
+ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
+ {
+ unsigned int count = 1;
+@@ -295,9 +338,7 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif
+ while (size > 0) {
+ BUG_ON(copy_off > MAX_BUFFER_OFFSET);
+
+- /* These checks are the same as in netbk_gop_frag_copy */
+- if (copy_off == MAX_BUFFER_OFFSET
+- || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) {
++ if (start_new_rx_buffer(copy_off, size, 0)) {
+ count++;
+ copy_off = 0;
+ }
+@@ -403,34 +444,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ while (size > 0) {
+ BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+
+- /*
+- * Move to a new receive buffer if:
+- *
+- * simple case: we have completely filled the current buffer.
+- *
+- * complex case: the current frag would overflow
+- * the current buffer but only if:
+- * (i) this frag would fit completely in the next buffer
+- * and (ii) there is already some data in the current buffer
+- * and (iii) this is not the head buffer.
+- *
+- * Where:
+- * - (i) stops us splitting a frag into two copies
+- * unless the frag is too large for a single buffer.
+- * - (ii) stops us from leaving a buffer pointlessly empty.
+- * - (iii) stops us leaving the first buffer
+- * empty. Strictly speaking this is already covered
+- * by (ii) but is explicitly checked because
+- * netfront relies on the first buffer being
+- * non-empty and can crash otherwise.
+- *
+- * This means we will effectively linearise small
+- * frags but do not needlessly split large buffers
+- * into multiple copies tend to give large frags their
+- * own buffers as before.
+- */
+- if (npo->copy_off == MAX_BUFFER_OFFSET
+- || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) {
++ if (start_new_rx_buffer(npo->copy_off, size, head)) {
+ struct xen_netif_rx_request *req;
+
+ BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
+--
+1.7.3.4
+
+
+From 790bdefa794705301733d53d36c3e8d3a98c811d Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Fri, 24 Dec 2010 13:37:04 +0000
+Subject: [PATCH 120/139] xen: netback: cleanup coding style
+
+Fix checkpatch.pl errors plus manual sweep.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/common.h | 52 +-------------
+ drivers/xen/netback/interface.c | 14 ++--
+ drivers/xen/netback/netback.c | 142 ++++++++++++++++++++++----------------
+ drivers/xen/netback/xenbus.c | 61 +++++++----------
+ 4 files changed, 118 insertions(+), 151 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 7e03a46..77bb3fc 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -35,13 +35,13 @@
+ #include <linux/slab.h>
+ #include <linux/ip.h>
+ #include <linux/in.h>
++#include <linux/io.h>
+ #include <linux/netdevice.h>
+ #include <linux/etherdevice.h>
+ #include <linux/wait.h>
+ #include <linux/sched.h>
+
+ #include <xen/interface/io/netif.h>
+-#include <asm/io.h>
+ #include <asm/pgalloc.h>
+ #include <xen/interface/grant_table.h>
+ #include <xen/grant_table.h>
+@@ -49,7 +49,7 @@
+
+ #define DPRINTK(_f, _a...) \
+ pr_debug("(file=%s, line=%d) " _f, \
+- __FILE__ , __LINE__ , ## _a )
++ __FILE__ , __LINE__ , ## _a)
+ #define IPRINTK(fmt, args...) \
+ printk(KERN_INFO "xen_net: " fmt, ##args)
+ #define WPRINTK(fmt, args...) \
+@@ -131,66 +131,22 @@ enum {
+
+ extern int netbk_copy_skb_mode;
+
+-/* Function pointers into netback accelerator plugin modules */
+-struct netback_accel_hooks {
+- struct module *owner;
+- int (*probe)(struct xenbus_device *dev);
+- int (*remove)(struct xenbus_device *dev);
+-};
+-
+-/* Structure to track the state of a netback accelerator plugin */
+-struct netback_accelerator {
+- struct list_head link;
+- int id;
+- char *eth_name;
+- atomic_t use_count;
+- struct netback_accel_hooks *hooks;
+-};
+-
+ struct backend_info {
+ struct xenbus_device *dev;
+ struct xen_netif *netif;
+ enum xenbus_state frontend_state;
+ struct xenbus_watch hotplug_status_watch;
+ int have_hotplug_status_watch:1;
+-
+- /* State relating to the netback accelerator */
+- void *netback_accel_priv;
+- /* The accelerator that this backend is currently using */
+- struct netback_accelerator *accelerator;
+ };
+
+-#define NETBACK_ACCEL_VERSION 0x00010001
+-
+-/*
+- * Connect an accelerator plugin module to netback. Returns zero on
+- * success, < 0 on error, > 0 (with highest version number supported)
+- * if version mismatch.
+- */
+-extern int netback_connect_accelerator(unsigned version,
+- int id, const char *eth_name,
+- struct netback_accel_hooks *hooks);
+-/* Disconnect a previously connected accelerator plugin module */
+-extern void netback_disconnect_accelerator(int id, const char *eth_name);
+-
+-
+-extern
+-void netback_probe_accelerators(struct backend_info *be,
+- struct xenbus_device *dev);
+-extern
+-void netback_remove_accelerators(struct backend_info *be,
+- struct xenbus_device *dev);
+-extern
+-void netif_accel_init(void);
+-
+-
+ #define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
+ #define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
+
+ void netif_disconnect(struct xen_netif *netif);
+
+ void netif_set_features(struct xen_netif *netif);
+-struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
++struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
++ unsigned int handle);
+ int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+ unsigned long rx_ring_ref, unsigned int evtchn);
+
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index efdc21c..c66b180 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -225,8 +225,7 @@ static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
+ }
+ }
+
+-static struct ethtool_ops network_ethtool_ops =
+-{
++static struct ethtool_ops network_ethtool_ops = {
+ .get_drvinfo = netbk_get_drvinfo,
+
+ .get_tx_csum = ethtool_op_get_tx_csum,
+@@ -242,8 +241,7 @@ static struct ethtool_ops network_ethtool_ops =
+ .get_strings = netbk_get_strings,
+ };
+
+-static struct net_device_ops netback_ops =
+-{
++static struct net_device_ops netback_ops = {
+ .ndo_start_xmit = netif_be_start_xmit,
+ .ndo_get_stats = netif_be_get_stats,
+ .ndo_open = net_open,
+@@ -251,7 +249,8 @@ static struct net_device_ops netback_ops =
+ .ndo_change_mtu = netbk_change_mtu,
+ };
+
+-struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
++struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
++ unsigned int handle)
+ {
+ int err = 0;
+ struct net_device *dev;
+@@ -316,8 +315,9 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
+ return netif;
+ }
+
+-static int map_frontend_pages(
+- struct xen_netif *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
++static int map_frontend_pages(struct xen_netif *netif,
++ grant_ref_t tx_ring_ref,
++ grant_ref_t rx_ring_ref)
+ {
+ struct gnttab_map_grant_ref op;
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index ae4821a..99440fd 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -1,11 +1,9 @@
+-/******************************************************************************
+- * drivers/xen/netback/netback.c
+- *
++/*
+ * Back-end of the driver for virtual network devices. This portion of the
+ * driver exports a 'unified' network-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A
+ * reference front-end implementation can be found in:
+- * drivers/xen/netfront/netfront.c
++ * drivers/net/xen-netfront.c
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ *
+@@ -82,8 +80,8 @@ static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
+ }
+
+ /* extra field used in struct page */
+-static inline void netif_set_page_ext(struct page *pg, unsigned int group,
+- unsigned int idx)
++static inline void netif_set_page_ext(struct page *pg,
++ unsigned int group, unsigned int idx)
+ {
+ union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
+
+@@ -91,7 +89,8 @@ static inline void netif_set_page_ext(struct page *pg, unsigned int group,
+ pg->mapping = ext.mapping;
+ }
+
+-static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsigned int *_idx)
++static int netif_get_page_ext(struct page *pg,
++ unsigned int *_group, unsigned int *_idx)
+ {
+ union page_ext ext = { .mapping = pg->mapping };
+ struct xen_netbk *netbk;
+@@ -325,7 +324,7 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif
+ unsigned int count = 1;
+ int i, copy_off = 0;
+
+- BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
++ BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);
+
+ copy_off = skb_headlen(skb);
+
+@@ -376,7 +375,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ */
+ if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
+ struct sk_buff *nskb = netbk_copy_skb(skb);
+- if ( unlikely(nskb == NULL) )
++ if (unlikely(nskb == NULL))
+ goto drop;
+ /* Copy only the header fields we use in this driver. */
+ nskb->dev = skb->dev;
+@@ -385,8 +384,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ skb = nskb;
+ }
+
+- /* Reserve ring slots for the worst-case number of
+- * fragments. */
++ /* Reserve ring slots for the worst-case number of fragments. */
+ netif->rx_req_cons_peek += count_skb_slots(skb, netif);
+ netif_get(netif);
+
+@@ -418,9 +416,29 @@ struct netrx_pending_operations {
+ grant_ref_t copy_gref;
+ };
+
+-/* Set up the grant operations for this fragment. If it's a flipping
+- interface, we also set up the unmap request from here. */
++static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
++ struct netrx_pending_operations *npo)
++{
++ struct netbk_rx_meta *meta;
++ struct xen_netif_rx_request *req;
+
++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++
++ meta = npo->meta + npo->meta_prod++;
++ meta->gso_size = 0;
++ meta->size = 0;
++ meta->id = req->id;
++
++ npo->copy_off = 0;
++ npo->copy_gref = req->gref;
++
++ return meta;
++}
++
++/*
++ * Set up the grant operations for this fragment. If it's a flipping
++ * interface, we also set up the unmap request from here.
++ */
+ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ struct netrx_pending_operations *npo,
+ struct page *page, unsigned long size,
+@@ -431,7 +449,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ /*
+ * These variables a used iff netif_get_page_ext returns true,
+ * in which case they are guaranteed to be initialized.
+- */
++ */
+ unsigned int uninitialized_var(group), uninitialized_var(idx);
+ int foreign = netif_get_page_ext(page, &group, &idx);
+ unsigned long bytes;
+@@ -445,17 +463,13 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+
+ if (start_new_rx_buffer(npo->copy_off, size, head)) {
+- struct xen_netif_rx_request *req;
+-
+- BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
+- /* Overflowed this request, go to the next one */
+- req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+- meta = npo->meta + npo->meta_prod++;
+- meta->gso_size = 0;
+- meta->size = 0;
+- meta->id = req->id;
+- npo->copy_off = 0;
+- npo->copy_gref = req->gref;
++ /*
++ * Netfront requires there to be some data in
++ * the head buffer.
++ */
++ BUG_ON(head);
++
++ meta = get_next_rx_buffer(netif, npo);
+ }
+
+ bytes = size;
+@@ -474,8 +488,9 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+ copy_gop->source.u.ref = src_pend->req.gref;
+ copy_gop->flags |= GNTCOPY_source_gref;
+ } else {
++ void *vaddr = page_address(page);
+ copy_gop->source.domid = DOMID_SELF;
+- copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
++ copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
+ }
+ copy_gop->source.offset = offset;
+ copy_gop->dest.domid = netif->domid;
+@@ -489,17 +504,22 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
+
+ offset += bytes;
+ size -= bytes;
+- head = 0; /* Must be something in this buffer now */
++ head = 0; /* There must be something in this buffer now. */
+ }
+ }
+
+-/* Prepare an SKB to be transmitted to the frontend. This is
+- responsible for allocating grant operations, meta structures, etc.
+- It returns the number of meta structures consumed. The number of
+- ring slots used is always equal to the number of meta slots used
+- plus the number of GSO descriptors used. Currently, we use either
+- zero GSO descriptors (for non-GSO packets) or one descriptor (for
+- frontend-side LRO). */
++/*
++ * Prepare an SKB to be transmitted to the frontend.
++ *
++ * This function is responsible for allocating grant operations, meta
++ * structures, etc.
++ *
++ * It returns the number of meta structures consumed. The number of
++ * ring slots used is always equal to the number of meta slots used
++ * plus the number of GSO descriptors used. Currently, we use either
++ * zero GSO descriptors (for non-GSO packets) or one descriptor (for
++ * frontend-side LRO).
++ */
+ static int netbk_gop_skb(struct sk_buff *skb,
+ struct netrx_pending_operations *npo)
+ {
+@@ -554,10 +574,12 @@ static int netbk_gop_skb(struct sk_buff *skb,
+ return npo->meta_prod - old_meta_prod;
+ }
+
+-/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
+- used to set up the operations on the top of
+- netrx_pending_operations, which have since been done. Check that
+- they didn't give any errors and advance over them. */
++/*
++ * This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
++ * used to set up the operations on the top of
++ * netrx_pending_operations, which have since been done. Check that
++ * they didn't give any errors and advance over them.
++ */
+ static int netbk_check_gop(int nr_meta_slots, domid_t domid,
+ struct netrx_pending_operations *npo)
+ {
+@@ -584,6 +606,12 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+ int i;
+ unsigned long offset;
+
++ /* No fragments used */
++ if (nr_meta_slots <= 1)
++ return;
++
++ nr_meta_slots--;
++
+ for (i = 0; i < nr_meta_slots; i++) {
+ int flags;
+ if (i == nr_meta_slots - 1)
+@@ -712,11 +740,9 @@ static void net_rx_action(unsigned long data)
+ gso->flags = 0;
+ }
+
+- if (sco->meta_slots_used > 1) {
+- netbk_add_frag_responses(netif, status,
+- netbk->meta + npo.meta_cons + 1,
+- sco->meta_slots_used - 1);
+- }
++ netbk_add_frag_responses(netif, status,
++ netbk->meta + npo.meta_cons + 1,
++ sco->meta_slots_used);
+
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+ irq = netif->irq;
+@@ -887,9 +913,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+ dc = netbk->dealloc_cons;
+ gop = netbk->tx_unmap_ops;
+
+- /*
+- * Free up any grants we have finished using
+- */
++ /* Free up any grants we have finished using. */
+ do {
+ dp = netbk->dealloc_prod;
+
+@@ -999,7 +1023,8 @@ static void netbk_tx_err(struct xen_netif *netif,
+
+ static int netbk_count_requests(struct xen_netif *netif,
+ struct xen_netif_tx_request *first,
+- struct xen_netif_tx_request *txp, int work_to_do)
++ struct xen_netif_tx_request *txp,
++ int work_to_do)
+ {
+ RING_IDX cons = netif->tx.req_cons;
+ int frags = 0;
+@@ -1039,10 +1064,10 @@ static int netbk_count_requests(struct xen_netif *netif,
+ }
+
+ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+- struct xen_netif *netif,
+- struct sk_buff *skb,
+- struct xen_netif_tx_request *txp,
+- struct gnttab_map_grant_ref *mop)
++ struct xen_netif *netif,
++ struct sk_buff *skb,
++ struct xen_netif_tx_request *txp,
++ struct gnttab_map_grant_ref *mop)
+ {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ skb_frag_t *frags = shinfo->frags;
+@@ -1181,7 +1206,8 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+ }
+ }
+
+-int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extras,
++int netbk_get_extras(struct xen_netif *netif,
++ struct xen_netif_extra_info *extras,
+ int work_to_do)
+ {
+ struct xen_netif_extra_info extra;
+@@ -1209,7 +1235,8 @@ int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extra
+ return work_to_do;
+ }
+
+-static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso)
++static int netbk_set_skb_gso(struct sk_buff *skb,
++ struct xen_netif_extra_info *gso)
+ {
+ if (!gso->u.gso.size) {
+ DPRINTK("GSO size must not be zero.\n");
+@@ -1315,7 +1342,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ struct xen_netif *netif;
+ struct xen_netif_tx_request txreq;
+ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+- struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
++ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
+ u16 pending_idx;
+ RING_IDX idx;
+ int work_to_do;
+@@ -1377,7 +1404,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
+ DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
+ txreq.offset, txreq.size,
+- (txreq.offset &~PAGE_MASK) + txreq.size);
++ (txreq.offset&~PAGE_MASK) + txreq.size);
+ netbk_tx_err(netif, &txreq, idx);
+ continue;
+ }
+@@ -1763,9 +1790,6 @@ static int __init netback_init(void)
+ }
+ memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
+
+- /* We can increase reservation by this much in net_rx_action(). */
+-// balloon_update_driver_allowance(NET_RX_RING_SIZE);
+-
+ for (group = 0; group < xen_netbk_group_nr; group++) {
+ struct xen_netbk *netbk = &xen_netbk[group];
+ skb_queue_head_init(&netbk->rx_queue);
+@@ -1850,8 +1874,6 @@ static int __init netback_init(void)
+ netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+ }
+
+- //netif_accel_init();
+-
+ rc = netif_xenbus_init();
+ if (rc)
+ goto failed_init;
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index 1fec65a..e2095fc 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -1,20 +1,20 @@
+ /* Xenbus code for netif backend
+- Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
+- Copyright (C) 2005 XenSource Ltd
+-
+- This program is free software; you can redistribute it and/or modify
+- it under the terms of the GNU General Public License as published by
+- the Free Software Foundation; either version 2 of the License, or
+- (at your option) any later version.
+-
+- This program is distributed in the hope that it will be useful,
+- but WITHOUT ANY WARRANTY; without even the implied warranty of
+- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+- GNU General Public License for more details.
+-
+- You should have received a copy of the GNU General Public License
+- along with this program; if not, write to the Free Software
+- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ * Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
++ * Copyright (C) 2005 XenSource Ltd
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+ #include <stdarg.h>
+@@ -22,13 +22,6 @@
+ #include <xen/xenbus.h>
+ #include "common.h"
+
+-#if 0
+-#undef DPRINTK
+-#define DPRINTK(fmt, args...) \
+- printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
+-#endif
+-
+-
+ static int connect_rings(struct backend_info *);
+ static void connect(struct backend_info *);
+ static void backend_create_netif(struct backend_info *be);
+@@ -36,9 +29,7 @@ static void unregister_hotplug_status_watch(struct backend_info *be);
+
+ static int netback_remove(struct xenbus_device *dev)
+ {
+- struct backend_info *be = dev_get_drvdata(&dev->dev);
+-
+- //netback_remove_accelerators(be, dev);
++ struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+ unregister_hotplug_status_watch(be);
+ if (be->netif) {
+@@ -126,8 +117,6 @@ static int netback_probe(struct xenbus_device *dev,
+ goto fail;
+ }
+
+- //netback_probe_accelerators(be, dev);
+-
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
+ if (err)
+ goto fail;
+@@ -147,12 +136,13 @@ fail:
+ }
+
+
+-/**
++/*
+ * Handle the creation of the hotplug script environment. We add the script
+ * and vif variables to the environment, for the benefit of the vif-* hotplug
+ * scripts.
+ */
+-static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
++static int netback_uevent(struct xenbus_device *xdev,
++ struct kobj_uevent_env *env)
+ {
+ struct backend_info *be = dev_get_drvdata(&xdev->dev);
+ char *val;
+@@ -164,8 +154,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
+ int err = PTR_ERR(val);
+ xenbus_dev_fatal(xdev, err, "reading script");
+ return err;
+- }
+- else {
++ } else {
+ if (add_uevent_var(env, "script=%s", val)) {
+ kfree(val);
+ return -ENOMEM;
+@@ -173,10 +162,10 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
+ kfree(val);
+ }
+
+- if (be && be->netif && add_uevent_var(env, "vif=%s", be->netif->dev->name))
+- return -ENOMEM;
++ if (!be || !be->netif)
++ return 0;
+
+- return 0;
++ return add_uevent_var(env, "vif=%s", be->netif->dev->name);
+ }
+
+
+@@ -234,7 +223,7 @@ static void frontend_changed(struct xenbus_device *dev,
+ case XenbusStateInitialising:
+ if (dev->state == XenbusStateClosed) {
+ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
+- __FUNCTION__, dev->nodename);
++ __func__, dev->nodename);
+ xenbus_switch_state(dev, XenbusStateInitWait);
+ }
+ break;
+--
+1.7.3.4
+
+
+From 0b0514b47e27f4c1b438b30972011aa27ac1ee8f Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 19 Jan 2011 10:51:45 +0000
+Subject: [PATCH 121/139] xen: netback: drop private ?PRINTK macros in favour of pr_*
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/xen/netback/common.h | 10 ++--------
+ drivers/xen/netback/interface.c | 13 +++++++------
+ drivers/xen/netback/netback.c | 28 ++++++++++++++--------------
+ drivers/xen/netback/xenbus.c | 13 +++----------
+ 4 files changed, 26 insertions(+), 38 deletions(-)
+
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+index 77bb3fc..079e1de 100644
+--- a/drivers/xen/netback/common.h
++++ b/drivers/xen/netback/common.h
+@@ -29,6 +29,8 @@
+ #ifndef __NETIF__BACKEND__COMMON_H__
+ #define __NETIF__BACKEND__COMMON_H__
+
++#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
++
+ #include <linux/version.h>
+ #include <linux/module.h>
+ #include <linux/interrupt.h>
+@@ -47,14 +49,6 @@
+ #include <xen/grant_table.h>
+ #include <xen/xenbus.h>
+
+-#define DPRINTK(_f, _a...) \
+- pr_debug("(file=%s, line=%d) " _f, \
+- __FILE__ , __LINE__ , ## _a)
+-#define IPRINTK(fmt, args...) \
+- printk(KERN_INFO "xen_net: " fmt, ##args)
+-#define WPRINTK(fmt, args...) \
+- printk(KERN_WARNING "xen_net: " fmt, ##args)
+-
+ struct xen_netif {
+ /* Unique identifier for this interface. */
+ domid_t domid;
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+index c66b180..c36db26 100644
+--- a/drivers/xen/netback/interface.c
++++ b/drivers/xen/netback/interface.c
+@@ -31,6 +31,7 @@
+ */
+
+ #include "common.h"
++
+ #include <linux/ethtool.h>
+ #include <linux/rtnetlink.h>
+
+@@ -260,7 +261,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
+ snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
+ dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
+ if (dev == NULL) {
+- DPRINTK("Could not create netif: out of memory\n");
++ pr_debug("Could not allocate netdev\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+@@ -305,13 +306,13 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
+ err = register_netdevice(dev);
+ rtnl_unlock();
+ if (err) {
+- DPRINTK("Could not register new net device %s: err=%d\n",
+- dev->name, err);
++ pr_debug("Could not register new net device %s: err=%d\n",
++ dev->name, err);
+ free_netdev(dev);
+ return ERR_PTR(err);
+ }
+
+- DPRINTK("Successfully created netif\n");
++ pr_debug("Successfully created netif\n");
+ return netif;
+ }
+
+@@ -328,7 +329,7 @@ static int map_frontend_pages(struct xen_netif *netif,
+ BUG();
+
+ if (op.status) {
+- DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
++ pr_debug("Gnttab failure mapping tx_ring_ref!\n");
+ return op.status;
+ }
+
+@@ -348,7 +349,7 @@ static int map_frontend_pages(struct xen_netif *netif,
+ (unsigned long)netif->tx_comms_area->addr,
+ GNTMAP_host_map, netif->tx_shmem_handle);
+ HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
+- DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
++ pr_debug("Gnttab failure mapping rx_ring_ref!\n");
+ return op.status;
+ }
+
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+index 99440fd..e0ca232 100644
+--- a/drivers/xen/netback/netback.c
++++ b/drivers/xen/netback/netback.c
+@@ -590,8 +590,8 @@ static int netbk_check_gop(int nr_meta_slots, domid_t domid,
+ for (i = 0; i < nr_meta_slots; i++) {
+ copy_op = npo->copy + npo->copy_cons++;
+ if (copy_op->status != GNTST_okay) {
+- DPRINTK("Bad status %d from copy to DOM%d.\n",
+- copy_op->status, domid);
++ pr_debug("Bad status %d from copy to DOM%d.\n",
++ copy_op->status, domid);
+ status = NETIF_RSP_ERROR;
+ }
+ }
+@@ -1215,7 +1215,7 @@ int netbk_get_extras(struct xen_netif *netif,
+
+ do {
+ if (unlikely(work_to_do-- <= 0)) {
+- DPRINTK("Missing extra info\n");
++ pr_debug("Missing extra info\n");
+ return -EBADR;
+ }
+
+@@ -1224,7 +1224,7 @@ int netbk_get_extras(struct xen_netif *netif,
+ if (unlikely(!extra.type ||
+ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+ netif->tx.req_cons = ++cons;
+- DPRINTK("Invalid extra type: %d\n", extra.type);
++ pr_debug("Invalid extra type: %d\n", extra.type);
+ return -EINVAL;
+ }
+
+@@ -1239,13 +1239,13 @@ static int netbk_set_skb_gso(struct sk_buff *skb,
+ struct xen_netif_extra_info *gso)
+ {
+ if (!gso->u.gso.size) {
+- DPRINTK("GSO size must not be zero.\n");
++ pr_debug("GSO size must not be zero.\n");
+ return -EINVAL;
+ }
+
+ /* Currently only TCPv4 S.O. is supported. */
+ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+- DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
++ pr_debug("Bad GSO type %d.\n", gso->u.gso.type);
+ return -EINVAL;
+ }
+
+@@ -1395,16 +1395,16 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ idx += ret;
+
+ if (unlikely(txreq.size < ETH_HLEN)) {
+- DPRINTK("Bad packet size: %d\n", txreq.size);
++ pr_debug("Bad packet size: %d\n", txreq.size);
+ netbk_tx_err(netif, &txreq, idx);
+ continue;
+ }
+
+ /* No crossing a page as the payload mustn't fragment. */
+ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
+- DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
+- txreq.offset, txreq.size,
+- (txreq.offset&~PAGE_MASK) + txreq.size);
++ pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
++ txreq.offset, txreq.size,
++ (txreq.offset&~PAGE_MASK) + txreq.size);
+ netbk_tx_err(netif, &txreq, idx);
+ continue;
+ }
+@@ -1419,7 +1419,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(skb == NULL)) {
+- DPRINTK("Can't allocate a skb in start_xmit.\n");
++ pr_debug("Can't allocate a skb in start_xmit.\n");
+ netbk_tx_err(netif, &txreq, idx);
+ break;
+ }
+@@ -1494,7 +1494,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
+
+ /* Check the remap error code. */
+ if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
+- DPRINTK("netback grant failed.\n");
++ pr_debug("netback grant failed.\n");
+ skb_shinfo(skb)->nr_frags = 0;
+ kfree_skb(skb);
+ continue;
+@@ -1535,12 +1535,12 @@ static void net_tx_submit(struct xen_netbk *netbk)
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ if (skb_checksum_setup(skb)) {
+- DPRINTK("Can't setup checksum in net_tx_action\n");
++ pr_debug("skb_checksum_setup failed\n");
+ kfree_skb(skb);
+ continue;
+ }
+ } else if (skb_is_gso(skb)) {
+- DPRINTK("Dropping GSO but not CHECKSUM_PARTIAL skb\n");
++ pr_debug("GSO SKB checksum is not partial\n");
+ kfree_skb(skb);
+ continue;
+ }
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+index e2095fc..640c696 100644
+--- a/drivers/xen/netback/xenbus.c
++++ b/drivers/xen/netback/xenbus.c
+@@ -17,9 +17,6 @@
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+-#include <stdarg.h>
+-#include <linux/module.h>
+-#include <xen/xenbus.h>
+ #include "common.h"
+
+ static int connect_rings(struct backend_info *);
+@@ -130,7 +127,7 @@ abort_transaction:
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(dev, err, "%s", message);
+ fail:
+- DPRINTK("failed");
++ pr_debug("failed");
+ netback_remove(dev);
+ return err;
+ }
+@@ -147,8 +144,6 @@ static int netback_uevent(struct xenbus_device *xdev,
+ struct backend_info *be = dev_get_drvdata(&xdev->dev);
+ char *val;
+
+- DPRINTK("netback_uevent");
+-
+ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+ if (IS_ERR(val)) {
+ int err = PTR_ERR(val);
+@@ -215,7 +210,7 @@ static void frontend_changed(struct xenbus_device *dev,
+ {
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+- DPRINTK("%s", xenbus_strstate(frontend_state));
++ pr_debug("frontend state %s", xenbus_strstate(frontend_state));
+
+ be->frontend_state = frontend_state;
+
+@@ -295,7 +290,7 @@ static void xen_net_read_rate(struct xenbus_device *dev,
+ return;
+
+ fail:
+- WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
++ pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
+ kfree(ratestr);
+ }
+
+@@ -394,8 +389,6 @@ static int connect_rings(struct backend_info *be)
+ int err;
+ int val;
+
+- DPRINTK("");
+-
+ err = xenbus_gather(XBT_NIL, dev->otherend,
+ "tx-ring-ref", "%lu", &tx_ring_ref,
+ "rx-ring-ref", "%lu", &rx_ring_ref,
+--
+1.7.3.4
+
+
+From e9124d120cf83945516c4085b32ea40c1bb94ffb Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Tue, 18 Jan 2011 12:54:12 +0000
+Subject: [PATCH 122/139] xen: netback: move under drivers/net/xen-netback/
+
+From the kernel's PoV netback is just another network device driver.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/net/Kconfig | 7 +
+ drivers/net/Makefile | 1 +
+ drivers/net/xen-netback/Makefile | 3 +
+ drivers/net/xen-netback/common.h | 275 +++++
+ drivers/net/xen-netback/interface.c | 465 +++++++++
+ drivers/net/xen-netback/netback.c | 1909 +++++++++++++++++++++++++++++++++++
+ drivers/net/xen-netback/xenbus.c | 487 +++++++++
+ drivers/xen/Kconfig | 7 -
+ drivers/xen/Makefile | 1 -
+ drivers/xen/netback/Makefile | 3 -
+ drivers/xen/netback/common.h | 275 -----
+ drivers/xen/netback/interface.c | 465 ---------
+ drivers/xen/netback/netback.c | 1909 -----------------------------------
+ drivers/xen/netback/xenbus.c | 487 ---------
+ 14 files changed, 3147 insertions(+), 3147 deletions(-)
+ create mode 100644 drivers/net/xen-netback/Makefile
+ create mode 100644 drivers/net/xen-netback/common.h
+ create mode 100644 drivers/net/xen-netback/interface.c
+ create mode 100644 drivers/net/xen-netback/netback.c
+ create mode 100644 drivers/net/xen-netback/xenbus.c
+ delete mode 100644 drivers/xen/netback/Makefile
+ delete mode 100644 drivers/xen/netback/common.h
+ delete mode 100644 drivers/xen/netback/interface.c
+ delete mode 100644 drivers/xen/netback/netback.c
+ delete mode 100644 drivers/xen/netback/xenbus.c
+
+diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
+index cbf0635..5b088f5 100644
+--- a/drivers/net/Kconfig
++++ b/drivers/net/Kconfig
+@@ -2970,6 +2970,13 @@ config XEN_NETDEV_FRONTEND
+ if you are compiling a kernel for a Xen guest, you almost
+ certainly want to enable this.
+
++config XEN_NETDEV_BACKEND
++ tristate "Xen backend network device"
++ depends on XEN_BACKEND
++ help
++ Implement the network backend driver, which passes packets
++ from the guest domain's frontend drivers to the network.
++
+ config ISERIES_VETH
+ tristate "iSeries Virtual Ethernet driver support"
+ depends on PPC_ISERIES
+diff --git a/drivers/net/Makefile b/drivers/net/Makefile
+index b90738d..145dfd7 100644
+--- a/drivers/net/Makefile
++++ b/drivers/net/Makefile
+@@ -171,6 +171,7 @@ obj-$(CONFIG_SLIP) += slip.o
+ obj-$(CONFIG_SLHC) += slhc.o
+
+ obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
++obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
+
+ obj-$(CONFIG_DUMMY) += dummy.o
+ obj-$(CONFIG_IFB) += ifb.o
+diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile
+new file mode 100644
+index 0000000..e346e81
+--- /dev/null
++++ b/drivers/net/xen-netback/Makefile
+@@ -0,0 +1,3 @@
++obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
++
++xen-netback-y := netback.o xenbus.o interface.o
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+new file mode 100644
+index 0000000..079e1de
+--- /dev/null
++++ b/drivers/net/xen-netback/common.h
+@@ -0,0 +1,275 @@
++/******************************************************************************
++ * arch/xen/drivers/netif/backend/common.h
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#ifndef __NETIF__BACKEND__COMMON_H__
++#define __NETIF__BACKEND__COMMON_H__
++
++#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/slab.h>
++#include <linux/ip.h>
++#include <linux/in.h>
++#include <linux/io.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/wait.h>
++#include <linux/sched.h>
++
++#include <xen/interface/io/netif.h>
++#include <asm/pgalloc.h>
++#include <xen/interface/grant_table.h>
++#include <xen/grant_table.h>
++#include <xen/xenbus.h>
++
++struct xen_netif {
++ /* Unique identifier for this interface. */
++ domid_t domid;
++ int group;
++ unsigned int handle;
++
++ u8 fe_dev_addr[6];
++
++ /* Physical parameters of the comms window. */
++ grant_handle_t tx_shmem_handle;
++ grant_ref_t tx_shmem_ref;
++ grant_handle_t rx_shmem_handle;
++ grant_ref_t rx_shmem_ref;
++ unsigned int irq;
++
++ /* The shared rings and indexes. */
++ struct xen_netif_tx_back_ring tx;
++ struct xen_netif_rx_back_ring rx;
++ struct vm_struct *tx_comms_area;
++ struct vm_struct *rx_comms_area;
++
++ /* Flags that must not be set in dev->features */
++ int features_disabled;
++
++ /* Frontend feature information. */
++ u8 can_sg:1;
++ u8 gso:1;
++ u8 gso_prefix:1;
++ u8 csum:1;
++
++ /* Internal feature information. */
++ u8 can_queue:1; /* can queue packets for receiver? */
++
++ /* Allow netif_be_start_xmit() to peek ahead in the rx request
++ * ring. This is a prediction of what rx_req_cons will be once
++ * all queued skbs are put on the ring. */
++ RING_IDX rx_req_cons_peek;
++
++ /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
++ unsigned long credit_bytes;
++ unsigned long credit_usec;
++ unsigned long remaining_credit;
++ struct timer_list credit_timeout;
++
++ /* Statistics */
++ int nr_copied_skbs;
++
++ /* Miscellaneous private stuff. */
++ struct list_head list; /* scheduling list */
++ atomic_t refcnt;
++ struct net_device *dev;
++ struct net_device_stats stats;
++
++ unsigned int carrier;
++
++ wait_queue_head_t waiting_to_free;
++};
++
++/*
++ * Implement our own carrier flag: the network stack's version causes delays
++ * when the carrier is re-enabled (in particular, dev_activate() may not
++ * immediately be called, which can cause packet loss; also the etherbridge
++ * can be rather lazy in activating its port).
++ */
++#define netback_carrier_on(netif) ((netif)->carrier = 1)
++#define netback_carrier_off(netif) ((netif)->carrier = 0)
++#define netback_carrier_ok(netif) ((netif)->carrier)
++
++enum {
++ NETBK_DONT_COPY_SKB,
++ NETBK_DELAYED_COPY_SKB,
++ NETBK_ALWAYS_COPY_SKB,
++};
++
++extern int netbk_copy_skb_mode;
++
++struct backend_info {
++ struct xenbus_device *dev;
++ struct xen_netif *netif;
++ enum xenbus_state frontend_state;
++ struct xenbus_watch hotplug_status_watch;
++ int have_hotplug_status_watch:1;
++};
++
++#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
++#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
++
++void netif_disconnect(struct xen_netif *netif);
++
++void netif_set_features(struct xen_netif *netif);
++struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
++ unsigned int handle);
++int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
++ unsigned long rx_ring_ref, unsigned int evtchn);
++
++static inline void netif_get(struct xen_netif *netif)
++{
++ atomic_inc(&netif->refcnt);
++}
++
++static inline void netif_put(struct xen_netif *netif)
++{
++ if (atomic_dec_and_test(&netif->refcnt))
++ wake_up(&netif->waiting_to_free);
++}
++
++int netif_xenbus_init(void);
++
++#define netif_schedulable(netif) \
++ (netif_running((netif)->dev) && netback_carrier_ok(netif))
++
++void netif_schedule_work(struct xen_netif *netif);
++void netif_deschedule_work(struct xen_netif *netif);
++
++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
++struct net_device_stats *netif_be_get_stats(struct net_device *dev);
++irqreturn_t netif_be_int(int irq, void *dev_id);
++
++static inline int netbk_can_queue(struct net_device *dev)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ return netif->can_queue;
++}
++
++static inline int netbk_can_sg(struct net_device *dev)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ return netif->can_sg;
++}
++
++struct pending_tx_info {
++ struct xen_netif_tx_request req;
++ struct xen_netif *netif;
++};
++typedef unsigned int pending_ring_idx_t;
++
++struct netbk_rx_meta {
++ int id;
++ int size;
++ int gso_size;
++};
++
++struct netbk_tx_pending_inuse {
++ struct list_head list;
++ unsigned long alloc_time;
++};
++
++#define MAX_PENDING_REQS 256
++
++#define MAX_BUFFER_OFFSET PAGE_SIZE
++
++/* extra field used in struct page */
++union page_ext {
++ struct {
++#if BITS_PER_LONG < 64
++#define IDX_WIDTH 8
++#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
++ unsigned int group:GROUP_WIDTH;
++ unsigned int idx:IDX_WIDTH;
++#else
++ unsigned int group, idx;
++#endif
++ } e;
++ void *mapping;
++};
++
++struct xen_netbk {
++ union {
++ struct {
++ struct tasklet_struct net_tx_tasklet;
++ struct tasklet_struct net_rx_tasklet;
++ } tasklet;
++
++ struct {
++ wait_queue_head_t netbk_action_wq;
++ struct task_struct *task;
++ } kthread;
++ };
++
++ struct sk_buff_head rx_queue;
++ struct sk_buff_head tx_queue;
++
++ struct timer_list net_timer;
++ struct timer_list netbk_tx_pending_timer;
++
++ struct page **mmap_pages;
++
++ pending_ring_idx_t pending_prod;
++ pending_ring_idx_t pending_cons;
++ pending_ring_idx_t dealloc_prod;
++ pending_ring_idx_t dealloc_cons;
++
++ struct list_head pending_inuse_head;
++ struct list_head net_schedule_list;
++
++ /* Protect the net_schedule_list in netif. */
++ spinlock_t net_schedule_list_lock;
++
++ atomic_t netfront_count;
++
++ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
++ struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
++ struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
++ struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
++
++ grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
++ u16 pending_ring[MAX_PENDING_REQS];
++ u16 dealloc_ring[MAX_PENDING_REQS];
++
++ /*
++ * Each head or fragment can be up to 4096 bytes. Given
++ * MAX_BUFFER_OFFSET of 4096 the worst case is that each
++ * head/fragment uses 2 copy operation.
++ */
++ struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
++ unsigned char rx_notify[NR_IRQS];
++ u16 notify_list[NET_RX_RING_SIZE];
++ struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
++};
++
++extern struct xen_netbk *xen_netbk;
++extern int xen_netbk_group_nr;
++
++#endif /* __NETIF__BACKEND__COMMON_H__ */
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+new file mode 100644
+index 0000000..c36db26
+--- /dev/null
++++ b/drivers/net/xen-netback/interface.c
+@@ -0,0 +1,465 @@
++/******************************************************************************
++ * arch/xen/drivers/netif/backend/interface.c
++ *
++ * Network-device interface management.
++ *
++ * Copyright (c) 2004-2005, Keir Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++
++#include <linux/ethtool.h>
++#include <linux/rtnetlink.h>
++
++#include <xen/events.h>
++#include <asm/xen/hypercall.h>
++
++/*
++ * Module parameter 'queue_length':
++ *
++ * Enables queuing in the network stack when a client has run out of receive
++ * descriptors.
++ */
++static unsigned long netbk_queue_length = 32;
++module_param_named(queue_length, netbk_queue_length, ulong, 0644);
++
++static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
++ struct xen_netif *netif)
++{
++ int i;
++ int min_netfront_count;
++ int min_group = 0;
++ min_netfront_count = atomic_read(&netbk[0].netfront_count);
++ for (i = 0; i < group_nr; i++) {
++ int netfront_count = atomic_read(&netbk[i].netfront_count);
++ if (netfront_count < min_netfront_count) {
++ min_group = i;
++ min_netfront_count = netfront_count;
++ }
++ }
++
++ netif->group = min_group;
++ atomic_inc(&netbk[netif->group].netfront_count);
++}
++
++static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
++{
++ atomic_dec(&netbk[netif->group].netfront_count);
++}
++
++static void __netif_up(struct xen_netif *netif)
++{
++ netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
++ enable_irq(netif->irq);
++ netif_schedule_work(netif);
++}
++
++static void __netif_down(struct xen_netif *netif)
++{
++ disable_irq(netif->irq);
++ netif_deschedule_work(netif);
++ netbk_remove_netif(xen_netbk, netif);
++}
++
++static int net_open(struct net_device *dev)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ if (netback_carrier_ok(netif)) {
++ __netif_up(netif);
++ netif_start_queue(dev);
++ }
++ return 0;
++}
++
++static int net_close(struct net_device *dev)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ if (netback_carrier_ok(netif))
++ __netif_down(netif);
++ netif_stop_queue(dev);
++ return 0;
++}
++
++static int netbk_change_mtu(struct net_device *dev, int mtu)
++{
++ int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
++
++ if (mtu > max)
++ return -EINVAL;
++ dev->mtu = mtu;
++ return 0;
++}
++
++void netif_set_features(struct xen_netif *netif)
++{
++ struct net_device *dev = netif->dev;
++ int features = dev->features;
++
++ if (netif->can_sg)
++ features |= NETIF_F_SG;
++ if (netif->gso || netif->gso_prefix)
++ features |= NETIF_F_TSO;
++ if (netif->csum)
++ features |= NETIF_F_IP_CSUM;
++
++ features &= ~(netif->features_disabled);
++
++ if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
++ dev->mtu = ETH_DATA_LEN;
++
++ dev->features = features;
++}
++
++static int netbk_set_tx_csum(struct net_device *dev, u32 data)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ if (data) {
++ if (!netif->csum)
++ return -ENOSYS;
++ netif->features_disabled &= ~NETIF_F_IP_CSUM;
++ } else {
++ netif->features_disabled |= NETIF_F_IP_CSUM;
++ }
++
++ netif_set_features(netif);
++ return 0;
++}
++
++static int netbk_set_sg(struct net_device *dev, u32 data)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ if (data) {
++ if (!netif->can_sg)
++ return -ENOSYS;
++ netif->features_disabled &= ~NETIF_F_SG;
++ } else {
++ netif->features_disabled |= NETIF_F_SG;
++ }
++
++ netif_set_features(netif);
++ return 0;
++}
++
++static int netbk_set_tso(struct net_device *dev, u32 data)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ if (data) {
++ if (!netif->gso && !netif->gso_prefix)
++ return -ENOSYS;
++ netif->features_disabled &= ~NETIF_F_TSO;
++ } else {
++ netif->features_disabled |= NETIF_F_TSO;
++ }
++
++ netif_set_features(netif);
++ return 0;
++}
++
++static void netbk_get_drvinfo(struct net_device *dev,
++ struct ethtool_drvinfo *info)
++{
++ strcpy(info->driver, "netbk");
++ strcpy(info->bus_info, dev_name(dev->dev.parent));
++}
++
++static const struct netif_stat {
++ char name[ETH_GSTRING_LEN];
++ u16 offset;
++} netbk_stats[] = {
++ { "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
++};
++
++static int netbk_get_sset_count(struct net_device *dev, int string_set)
++{
++ switch (string_set) {
++ case ETH_SS_STATS:
++ return ARRAY_SIZE(netbk_stats);
++ default:
++ return -EINVAL;
++ }
++}
++
++static void netbk_get_ethtool_stats(struct net_device *dev,
++ struct ethtool_stats *stats, u64 * data)
++{
++ void *netif = netdev_priv(dev);
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
++ data[i] = *(int *)(netif + netbk_stats[i].offset);
++}
++
++static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
++{
++ int i;
++
++ switch (stringset) {
++ case ETH_SS_STATS:
++ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
++ memcpy(data + i * ETH_GSTRING_LEN,
++ netbk_stats[i].name, ETH_GSTRING_LEN);
++ break;
++ }
++}
++
++static struct ethtool_ops network_ethtool_ops = {
++ .get_drvinfo = netbk_get_drvinfo,
++
++ .get_tx_csum = ethtool_op_get_tx_csum,
++ .set_tx_csum = netbk_set_tx_csum,
++ .get_sg = ethtool_op_get_sg,
++ .set_sg = netbk_set_sg,
++ .get_tso = ethtool_op_get_tso,
++ .set_tso = netbk_set_tso,
++ .get_link = ethtool_op_get_link,
++
++ .get_sset_count = netbk_get_sset_count,
++ .get_ethtool_stats = netbk_get_ethtool_stats,
++ .get_strings = netbk_get_strings,
++};
++
++static struct net_device_ops netback_ops = {
++ .ndo_start_xmit = netif_be_start_xmit,
++ .ndo_get_stats = netif_be_get_stats,
++ .ndo_open = net_open,
++ .ndo_stop = net_close,
++ .ndo_change_mtu = netbk_change_mtu,
++};
++
++struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
++ unsigned int handle)
++{
++ int err = 0;
++ struct net_device *dev;
++ struct xen_netif *netif;
++ char name[IFNAMSIZ] = {};
++
++ snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
++ dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
++ if (dev == NULL) {
++ pr_debug("Could not allocate netdev\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ SET_NETDEV_DEV(dev, parent);
++
++ netif = netdev_priv(dev);
++ memset(netif, 0, sizeof(*netif));
++ netif->domid = domid;
++ netif->group = -1;
++ netif->handle = handle;
++ netif->can_sg = 1;
++ netif->csum = 1;
++ atomic_set(&netif->refcnt, 1);
++ init_waitqueue_head(&netif->waiting_to_free);
++ netif->dev = dev;
++ INIT_LIST_HEAD(&netif->list);
++
++ netback_carrier_off(netif);
++
++ netif->credit_bytes = netif->remaining_credit = ~0UL;
++ netif->credit_usec = 0UL;
++ init_timer(&netif->credit_timeout);
++ /* Initialize 'expires' now: it's used to track the credit window. */
++ netif->credit_timeout.expires = jiffies;
++
++ dev->netdev_ops = &netback_ops;
++ netif_set_features(netif);
++ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
++
++ dev->tx_queue_len = netbk_queue_length;
++
++ /*
++ * Initialise a dummy MAC address. We choose the numerically
++ * largest non-broadcast address to prevent the address getting
++ * stolen by an Ethernet bridge for STP purposes.
++ * (FE:FF:FF:FF:FF:FF)
++ */
++ memset(dev->dev_addr, 0xFF, ETH_ALEN);
++ dev->dev_addr[0] &= ~0x01;
++
++ rtnl_lock();
++ err = register_netdevice(dev);
++ rtnl_unlock();
++ if (err) {
++ pr_debug("Could not register new net device %s: err=%d\n",
++ dev->name, err);
++ free_netdev(dev);
++ return ERR_PTR(err);
++ }
++
++ pr_debug("Successfully created netif\n");
++ return netif;
++}
++
++static int map_frontend_pages(struct xen_netif *netif,
++ grant_ref_t tx_ring_ref,
++ grant_ref_t rx_ring_ref)
++{
++ struct gnttab_map_grant_ref op;
++
++ gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
++ GNTMAP_host_map, tx_ring_ref, netif->domid);
++
++ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++ BUG();
++
++ if (op.status) {
++ pr_debug("Gnttab failure mapping tx_ring_ref!\n");
++ return op.status;
++ }
++
++ netif->tx_shmem_ref = tx_ring_ref;
++ netif->tx_shmem_handle = op.handle;
++
++ gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
++ GNTMAP_host_map, rx_ring_ref, netif->domid);
++
++ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++ BUG();
++
++ if (op.status) {
++ struct gnttab_unmap_grant_ref unop;
++
++ gnttab_set_unmap_op(&unop,
++ (unsigned long)netif->tx_comms_area->addr,
++ GNTMAP_host_map, netif->tx_shmem_handle);
++ HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
++ pr_debug("Gnttab failure mapping rx_ring_ref!\n");
++ return op.status;
++ }
++
++ netif->rx_shmem_ref = rx_ring_ref;
++ netif->rx_shmem_handle = op.handle;
++
++ return 0;
++}
++
++static void unmap_frontend_pages(struct xen_netif *netif)
++{
++ struct gnttab_unmap_grant_ref op;
++
++ gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
++ GNTMAP_host_map, netif->tx_shmem_handle);
++
++ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++ BUG();
++
++ gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
++ GNTMAP_host_map, netif->rx_shmem_handle);
++
++ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++ BUG();
++}
++
++int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
++ unsigned long rx_ring_ref, unsigned int evtchn)
++{
++ int err = -ENOMEM;
++ struct xen_netif_tx_sring *txs;
++ struct xen_netif_rx_sring *rxs;
++
++ /* Already connected through? */
++ if (netif->irq)
++ return 0;
++
++ netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
++ if (netif->tx_comms_area == NULL)
++ return -ENOMEM;
++ netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
++ if (netif->rx_comms_area == NULL)
++ goto err_rx;
++
++ err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
++ if (err)
++ goto err_map;
++
++ err = bind_interdomain_evtchn_to_irqhandler(
++ netif->domid, evtchn, netif_be_int, 0,
++ netif->dev->name, netif);
++ if (err < 0)
++ goto err_hypervisor;
++ netif->irq = err;
++ disable_irq(netif->irq);
++
++ txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
++ BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
++
++ rxs = (struct xen_netif_rx_sring *)
++ ((char *)netif->rx_comms_area->addr);
++ BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
++
++ netif->rx_req_cons_peek = 0;
++
++ netif_get(netif);
++
++ rtnl_lock();
++ netback_carrier_on(netif);
++ if (netif_running(netif->dev))
++ __netif_up(netif);
++ rtnl_unlock();
++
++ return 0;
++err_hypervisor:
++ unmap_frontend_pages(netif);
++err_map:
++ free_vm_area(netif->rx_comms_area);
++err_rx:
++ free_vm_area(netif->tx_comms_area);
++ return err;
++}
++
++void netif_disconnect(struct xen_netif *netif)
++{
++ if (netback_carrier_ok(netif)) {
++ rtnl_lock();
++ netback_carrier_off(netif);
++ netif_carrier_off(netif->dev); /* discard queued packets */
++ if (netif_running(netif->dev))
++ __netif_down(netif);
++ rtnl_unlock();
++ netif_put(netif);
++ }
++
++ atomic_dec(&netif->refcnt);
++ wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
++
++ del_timer_sync(&netif->credit_timeout);
++
++ if (netif->irq)
++ unbind_from_irqhandler(netif->irq, netif);
++
++ unregister_netdev(netif->dev);
++
++ if (netif->tx.sring) {
++ unmap_frontend_pages(netif);
++ free_vm_area(netif->tx_comms_area);
++ free_vm_area(netif->rx_comms_area);
++ }
++
++ free_netdev(netif->dev);
++}
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+new file mode 100644
+index 0000000..e0ca232
+--- /dev/null
++++ b/drivers/net/xen-netback/netback.c
+@@ -0,0 +1,1909 @@
++/*
++ * Back-end of the driver for virtual network devices. This portion of the
++ * driver exports a 'unified' network-device interface that can be accessed
++ * by any operating system that implements a compatible front end. A
++ * reference front-end implementation can be found in:
++ * drivers/net/xen-netfront.c
++ *
++ * Copyright (c) 2002-2005, K A Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++
++#include <linux/kthread.h>
++#include <linux/if_vlan.h>
++#include <linux/udp.h>
++
++#include <net/tcp.h>
++
++#include <xen/balloon.h>
++#include <xen/events.h>
++#include <xen/interface/memory.h>
++
++#include <asm/xen/hypercall.h>
++#include <asm/xen/page.h>
++
++/*define NETBE_DEBUG_INTERRUPT*/
++
++struct xen_netbk *xen_netbk;
++int xen_netbk_group_nr;
++
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
++static void make_tx_response(struct xen_netif *netif,
++ struct xen_netif_tx_request *txp,
++ s8 st);
++static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
++ u16 id,
++ s8 st,
++ u16 offset,
++ u16 size,
++ u16 flags);
++
++static void net_tx_action(unsigned long data);
++
++static void net_rx_action(unsigned long data);
++
++static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
++ unsigned int idx)
++{
++ return page_to_pfn(netbk->mmap_pages[idx]);
++}
++
++static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
++ unsigned int idx)
++{
++ return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
++}
++
++/* extra field used in struct page */
++static inline void netif_set_page_ext(struct page *pg,
++ unsigned int group, unsigned int idx)
++{
++ union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
++
++ BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
++ pg->mapping = ext.mapping;
++}
++
++static int netif_get_page_ext(struct page *pg,
++ unsigned int *_group, unsigned int *_idx)
++{
++ union page_ext ext = { .mapping = pg->mapping };
++ struct xen_netbk *netbk;
++ unsigned int group, idx;
++
++ if (!PageForeign(pg))
++ return 0;
++
++ group = ext.e.group - 1;
++
++ if (group < 0 || group >= xen_netbk_group_nr)
++ return 0;
++
++ netbk = &xen_netbk[group];
++
++ if (netbk->mmap_pages == NULL)
++ return 0;
++
++ idx = ext.e.idx;
++
++ if ((idx < 0) || (idx >= MAX_PENDING_REQS))
++ return 0;
++
++ if (netbk->mmap_pages[idx] != pg)
++ return 0;
++
++ *_group = group;
++ *_idx = idx;
++
++ return 1;
++}
++
++/*
++ * This is the amount of packet we copy rather than map, so that the
++ * guest can't fiddle with the contents of the headers while we do
++ * packet processing on them (netfilter, routing, etc).
++ */
++#define PKT_PROT_LEN (ETH_HLEN + \
++ VLAN_HLEN + \
++ sizeof(struct iphdr) + MAX_IPOPTLEN + \
++ sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
++
++static inline pending_ring_idx_t pending_index(unsigned i)
++{
++ return i & (MAX_PENDING_REQS-1);
++}
++
++static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
++{
++ return MAX_PENDING_REQS -
++ netbk->pending_prod + netbk->pending_cons;
++}
++
++/* Setting this allows the safe use of this driver without netloop. */
++static int MODPARM_copy_skb = 1;
++module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
++MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
++
++int netbk_copy_skb_mode;
++
++static int MODPARM_netback_kthread;
++module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
++MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
++
++/*
++ * Netback bottom half handler.
++ * dir indicates the data direction.
++ * rx: 1, tx: 0.
++ */
++static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
++{
++ if (MODPARM_netback_kthread)
++ wake_up(&netbk->kthread.netbk_action_wq);
++ else if (dir)
++ tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
++ else
++ tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
++}
++
++static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
++{
++ smp_mb();
++ if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
++ !list_empty(&netbk->net_schedule_list))
++ xen_netbk_bh_handler(netbk, 0);
++}
++
++static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
++{
++ struct skb_shared_info *ninfo;
++ struct sk_buff *nskb;
++ unsigned long offset;
++ int ret;
++ int len;
++ int headlen;
++
++ BUG_ON(skb_shinfo(skb)->frag_list != NULL);
++
++ nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
++ if (unlikely(!nskb))
++ goto err;
++
++ skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
++ headlen = skb_end_pointer(nskb) - nskb->data;
++ if (headlen > skb_headlen(skb))
++ headlen = skb_headlen(skb);
++ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
++ BUG_ON(ret);
++
++ ninfo = skb_shinfo(nskb);
++ ninfo->gso_size = skb_shinfo(skb)->gso_size;
++ ninfo->gso_type = skb_shinfo(skb)->gso_type;
++
++ offset = headlen;
++ len = skb->len - headlen;
++
++ nskb->len = skb->len;
++ nskb->data_len = len;
++ nskb->truesize += len;
++
++ while (len) {
++ struct page *page;
++ int copy;
++ int zero;
++
++ if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
++ dump_stack();
++ goto err_free;
++ }
++
++ copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
++ zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
++
++ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
++ if (unlikely(!page))
++ goto err_free;
++
++ ret = skb_copy_bits(skb, offset, page_address(page), copy);
++ BUG_ON(ret);
++
++ ninfo->frags[ninfo->nr_frags].page = page;
++ ninfo->frags[ninfo->nr_frags].page_offset = 0;
++ ninfo->frags[ninfo->nr_frags].size = copy;
++ ninfo->nr_frags++;
++
++ offset += copy;
++ len -= copy;
++ }
++
++#ifdef NET_SKBUFF_DATA_USES_OFFSET
++ offset = 0;
++#else
++ offset = nskb->data - skb->data;
++#endif
++
++ nskb->transport_header = skb->transport_header + offset;
++ nskb->network_header = skb->network_header + offset;
++ nskb->mac_header = skb->mac_header + offset;
++
++ return nskb;
++
++ err_free:
++ kfree_skb(nskb);
++ err:
++ return NULL;
++}
++
++static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
++{
++ if (netif->can_sg || netif->gso || netif->gso_prefix)
++ return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
++ return 1; /* all in one */
++}
++
++static inline int netbk_queue_full(struct xen_netif *netif)
++{
++ RING_IDX peek = netif->rx_req_cons_peek;
++ RING_IDX needed = netbk_max_required_rx_slots(netif);
++
++ return ((netif->rx.sring->req_prod - peek) < needed) ||
++ ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
++}
++
++/*
++ * Returns true if we should start a new receive buffer instead of
++ * adding 'size' bytes to a buffer which currently contains 'offset'
++ * bytes.
++ */
++static bool start_new_rx_buffer(int offset, unsigned long size, int head)
++{
++ /* simple case: we have completely filled the current buffer. */
++ if (offset == MAX_BUFFER_OFFSET)
++ return true;
++
++ /*
++ * complex case: start a fresh buffer if the current frag
++ * would overflow the current buffer but only if:
++ * (i) this frag would fit completely in the next buffer
++ * and (ii) there is already some data in the current buffer
++ * and (iii) this is not the head buffer.
++ *
++ * Where:
++ * - (i) stops us splitting a frag into two copies
++ * unless the frag is too large for a single buffer.
++ * - (ii) stops us from leaving a buffer pointlessly empty.
++ * - (iii) stops us leaving the first buffer
++ * empty. Strictly speaking this is already covered
++ * by (ii) but is explicitly checked because
++ * netfront relies on the first buffer being
++ * non-empty and can crash otherwise.
++ *
++ * This means we will effectively linearise small
++ * frags but do not needlessly split large buffers
++ * into multiple copies tend to give large frags their
++ * own buffers as before.
++ */
++ if ((offset + size > MAX_BUFFER_OFFSET) &&
++ (size <= MAX_BUFFER_OFFSET) && offset && !head)
++ return true;
++
++ return false;
++}
++
++/*
++ * Figure out how many ring slots we're going to need to send @skb to
++ * the guest. This function is essentially a dry run of
++ * netbk_gop_frag_copy.
++ */
++static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
++{
++ unsigned int count = 1;
++ int i, copy_off = 0;
++
++ BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);
++
++ copy_off = skb_headlen(skb);
++
++ if (skb_shinfo(skb)->gso_size)
++ count++;
++
++ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
++ unsigned long size = skb_shinfo(skb)->frags[i].size;
++ unsigned long bytes;
++ while (size > 0) {
++ BUG_ON(copy_off > MAX_BUFFER_OFFSET);
++
++ if (start_new_rx_buffer(copy_off, size, 0)) {
++ count++;
++ copy_off = 0;
++ }
++
++ bytes = size;
++ if (copy_off + bytes > MAX_BUFFER_OFFSET)
++ bytes = MAX_BUFFER_OFFSET - copy_off;
++
++ copy_off += bytes;
++ size -= bytes;
++ }
++ }
++ return count;
++}
++
++int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ struct xen_netbk *netbk;
++
++ BUG_ON(skb->dev != dev);
++
++ if (netif->group == -1)
++ goto drop;
++
++ netbk = &xen_netbk[netif->group];
++
++ /* Drop the packet if the target domain has no receive buffers. */
++ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
++ goto drop;
++
++ /*
++ * XXX For now we also copy skbuffs whose head crosses a page
++ * boundary, because netbk_gop_skb can't handle them.
++ */
++ if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
++ struct sk_buff *nskb = netbk_copy_skb(skb);
++ if (unlikely(nskb == NULL))
++ goto drop;
++ /* Copy only the header fields we use in this driver. */
++ nskb->dev = skb->dev;
++ nskb->ip_summed = skb->ip_summed;
++ dev_kfree_skb(skb);
++ skb = nskb;
++ }
++
++ /* Reserve ring slots for the worst-case number of fragments. */
++ netif->rx_req_cons_peek += count_skb_slots(skb, netif);
++ netif_get(netif);
++
++ if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
++ netif->rx.sring->req_event = netif->rx_req_cons_peek +
++ netbk_max_required_rx_slots(netif);
++ mb(); /* request notification /then/ check & stop the queue */
++ if (netbk_queue_full(netif))
++ netif_stop_queue(dev);
++ }
++ skb_queue_tail(&netbk->rx_queue, skb);
++
++ xen_netbk_bh_handler(netbk, 1);
++
++ return 0;
++
++ drop:
++ netif->stats.tx_dropped++;
++ dev_kfree_skb(skb);
++ return 0;
++}
++
++struct netrx_pending_operations {
++ unsigned copy_prod, copy_cons;
++ unsigned meta_prod, meta_cons;
++ struct gnttab_copy *copy;
++ struct netbk_rx_meta *meta;
++ int copy_off;
++ grant_ref_t copy_gref;
++};
++
++static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
++ struct netrx_pending_operations *npo)
++{
++ struct netbk_rx_meta *meta;
++ struct xen_netif_rx_request *req;
++
++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++
++ meta = npo->meta + npo->meta_prod++;
++ meta->gso_size = 0;
++ meta->size = 0;
++ meta->id = req->id;
++
++ npo->copy_off = 0;
++ npo->copy_gref = req->gref;
++
++ return meta;
++}
++
++/*
++ * Set up the grant operations for this fragment. If it's a flipping
++ * interface, we also set up the unmap request from here.
++ */
++static void netbk_gop_frag_copy(struct xen_netif *netif,
++ struct netrx_pending_operations *npo,
++ struct page *page, unsigned long size,
++ unsigned long offset, int head)
++{
++ struct gnttab_copy *copy_gop;
++ struct netbk_rx_meta *meta;
++ /*
++ * These variables a used iff netif_get_page_ext returns true,
++ * in which case they are guaranteed to be initialized.
++ */
++ unsigned int uninitialized_var(group), uninitialized_var(idx);
++ int foreign = netif_get_page_ext(page, &group, &idx);
++ unsigned long bytes;
++
++ /* Data must not cross a page boundary. */
++ BUG_ON(size + offset > PAGE_SIZE);
++
++ meta = npo->meta + npo->meta_prod - 1;
++
++ while (size > 0) {
++ BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
++
++ if (start_new_rx_buffer(npo->copy_off, size, head)) {
++ /*
++ * Netfront requires there to be some data in
++ * the head buffer.
++ */
++ BUG_ON(head);
++
++ meta = get_next_rx_buffer(netif, npo);
++ }
++
++ bytes = size;
++ if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
++ bytes = MAX_BUFFER_OFFSET - npo->copy_off;
++
++ copy_gop = npo->copy + npo->copy_prod++;
++ copy_gop->flags = GNTCOPY_dest_gref;
++ if (foreign) {
++ struct xen_netbk *netbk = &xen_netbk[group];
++ struct pending_tx_info *src_pend;
++
++ src_pend = &netbk->pending_tx_info[idx];
++
++ copy_gop->source.domid = src_pend->netif->domid;
++ copy_gop->source.u.ref = src_pend->req.gref;
++ copy_gop->flags |= GNTCOPY_source_gref;
++ } else {
++ void *vaddr = page_address(page);
++ copy_gop->source.domid = DOMID_SELF;
++ copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
++ }
++ copy_gop->source.offset = offset;
++ copy_gop->dest.domid = netif->domid;
++
++ copy_gop->dest.offset = npo->copy_off;
++ copy_gop->dest.u.ref = npo->copy_gref;
++ copy_gop->len = bytes;
++
++ npo->copy_off += bytes;
++ meta->size += bytes;
++
++ offset += bytes;
++ size -= bytes;
++ head = 0; /* There must be something in this buffer now. */
++ }
++}
++
++/*
++ * Prepare an SKB to be transmitted to the frontend.
++ *
++ * This function is responsible for allocating grant operations, meta
++ * structures, etc.
++ *
++ * It returns the number of meta structures consumed. The number of
++ * ring slots used is always equal to the number of meta slots used
++ * plus the number of GSO descriptors used. Currently, we use either
++ * zero GSO descriptors (for non-GSO packets) or one descriptor (for
++ * frontend-side LRO).
++ */
++static int netbk_gop_skb(struct sk_buff *skb,
++ struct netrx_pending_operations *npo)
++{
++ struct xen_netif *netif = netdev_priv(skb->dev);
++ int nr_frags = skb_shinfo(skb)->nr_frags;
++ int i;
++ struct xen_netif_rx_request *req;
++ struct netbk_rx_meta *meta;
++ int old_meta_prod;
++
++ old_meta_prod = npo->meta_prod;
++
++ /* Set up a GSO prefix descriptor, if necessary */
++ if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++ meta = npo->meta + npo->meta_prod++;
++ meta->gso_size = skb_shinfo(skb)->gso_size;
++ meta->size = 0;
++ meta->id = req->id;
++ }
++
++ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++ meta = npo->meta + npo->meta_prod++;
++
++ if (!netif->gso_prefix)
++ meta->gso_size = skb_shinfo(skb)->gso_size;
++ else
++ meta->gso_size = 0;
++
++ meta->size = 0;
++ meta->id = req->id;
++ npo->copy_off = 0;
++ npo->copy_gref = req->gref;
++
++ netbk_gop_frag_copy(netif,
++ npo, virt_to_page(skb->data),
++ skb_headlen(skb),
++ offset_in_page(skb->data), 1);
++
++ /* Leave a gap for the GSO descriptor. */
++ if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
++ netif->rx.req_cons++;
++
++ for (i = 0; i < nr_frags; i++) {
++ netbk_gop_frag_copy(netif, npo,
++ skb_shinfo(skb)->frags[i].page,
++ skb_shinfo(skb)->frags[i].size,
++ skb_shinfo(skb)->frags[i].page_offset,
++ 0);
++ }
++
++ return npo->meta_prod - old_meta_prod;
++}
++
++/*
++ * This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
++ * used to set up the operations on the top of
++ * netrx_pending_operations, which have since been done. Check that
++ * they didn't give any errors and advance over them.
++ */
++static int netbk_check_gop(int nr_meta_slots, domid_t domid,
++ struct netrx_pending_operations *npo)
++{
++ struct gnttab_copy *copy_op;
++ int status = NETIF_RSP_OKAY;
++ int i;
++
++ for (i = 0; i < nr_meta_slots; i++) {
++ copy_op = npo->copy + npo->copy_cons++;
++ if (copy_op->status != GNTST_okay) {
++ pr_debug("Bad status %d from copy to DOM%d.\n",
++ copy_op->status, domid);
++ status = NETIF_RSP_ERROR;
++ }
++ }
++
++ return status;
++}
++
++static void netbk_add_frag_responses(struct xen_netif *netif, int status,
++ struct netbk_rx_meta *meta,
++ int nr_meta_slots)
++{
++ int i;
++ unsigned long offset;
++
++ /* No fragments used */
++ if (nr_meta_slots <= 1)
++ return;
++
++ nr_meta_slots--;
++
++ for (i = 0; i < nr_meta_slots; i++) {
++ int flags;
++ if (i == nr_meta_slots - 1)
++ flags = 0;
++ else
++ flags = NETRXF_more_data;
++
++ offset = 0;
++ make_rx_response(netif, meta[i].id, status, offset,
++ meta[i].size, flags);
++ }
++}
++
++struct skb_cb_overlay {
++ int meta_slots_used;
++};
++
++static void net_rx_action(unsigned long data)
++{
++ struct xen_netif *netif = NULL;
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
++ s8 status;
++ u16 irq, flags;
++ struct xen_netif_rx_response *resp;
++ struct sk_buff_head rxq;
++ struct sk_buff *skb;
++ int notify_nr = 0;
++ int ret;
++ int nr_frags;
++ int count;
++ unsigned long offset;
++ struct skb_cb_overlay *sco;
++
++ struct netrx_pending_operations npo = {
++ .copy = netbk->grant_copy_op,
++ .meta = netbk->meta,
++ };
++
++ skb_queue_head_init(&rxq);
++
++ count = 0;
++
++ while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
++ netif = netdev_priv(skb->dev);
++ nr_frags = skb_shinfo(skb)->nr_frags;
++
++ sco = (struct skb_cb_overlay *)skb->cb;
++ sco->meta_slots_used = netbk_gop_skb(skb, &npo);
++
++ count += nr_frags + 1;
++
++ __skb_queue_tail(&rxq, skb);
++
++ /* Filled the batch queue? */
++ if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
++ break;
++ }
++
++ BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
++
++ if (!npo.copy_prod)
++ return;
++
++ BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
++ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
++ npo.copy_prod);
++ BUG_ON(ret != 0);
++
++ while ((skb = __skb_dequeue(&rxq)) != NULL) {
++ sco = (struct skb_cb_overlay *)skb->cb;
++
++ netif = netdev_priv(skb->dev);
++
++ if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
++ resp = RING_GET_RESPONSE(&netif->rx,
++ netif->rx.rsp_prod_pvt++);
++
++ resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
++
++ resp->offset = netbk->meta[npo.meta_cons].gso_size;
++ resp->id = netbk->meta[npo.meta_cons].id;
++ resp->status = sco->meta_slots_used;
++
++ npo.meta_cons++;
++ sco->meta_slots_used--;
++ }
++
++
++ netif->stats.tx_bytes += skb->len;
++ netif->stats.tx_packets++;
++
++ status = netbk_check_gop(sco->meta_slots_used,
++ netif->domid, &npo);
++
++ if (sco->meta_slots_used == 1)
++ flags = 0;
++ else
++ flags = NETRXF_more_data;
++
++ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
++ flags |= NETRXF_csum_blank | NETRXF_data_validated;
++ else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
++ /* remote but checksummed. */
++ flags |= NETRXF_data_validated;
++
++ offset = 0;
++ resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
++ status, offset,
++ netbk->meta[npo.meta_cons].size,
++ flags);
++
++ if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
++ struct xen_netif_extra_info *gso =
++ (struct xen_netif_extra_info *)
++ RING_GET_RESPONSE(&netif->rx,
++ netif->rx.rsp_prod_pvt++);
++
++ resp->flags |= NETRXF_extra_info;
++
++ gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
++ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
++ gso->u.gso.pad = 0;
++ gso->u.gso.features = 0;
++
++ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
++ gso->flags = 0;
++ }
++
++ netbk_add_frag_responses(netif, status,
++ netbk->meta + npo.meta_cons + 1,
++ sco->meta_slots_used);
++
++ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
++ irq = netif->irq;
++ if (ret && !netbk->rx_notify[irq]) {
++ netbk->rx_notify[irq] = 1;
++ netbk->notify_list[notify_nr++] = irq;
++ }
++
++ if (netif_queue_stopped(netif->dev) &&
++ netif_schedulable(netif) &&
++ !netbk_queue_full(netif))
++ netif_wake_queue(netif->dev);
++
++ netif_put(netif);
++ npo.meta_cons += sco->meta_slots_used;
++ dev_kfree_skb(skb);
++ }
++
++ while (notify_nr != 0) {
++ irq = netbk->notify_list[--notify_nr];
++ netbk->rx_notify[irq] = 0;
++ notify_remote_via_irq(irq);
++ }
++
++ /* More work to do? */
++ if (!skb_queue_empty(&netbk->rx_queue) &&
++ !timer_pending(&netbk->net_timer))
++ xen_netbk_bh_handler(netbk, 1);
++}
++
++static void net_alarm(unsigned long data)
++{
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
++ xen_netbk_bh_handler(netbk, 1);
++}
++
++static void netbk_tx_pending_timeout(unsigned long data)
++{
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
++ xen_netbk_bh_handler(netbk, 0);
++}
++
++struct net_device_stats *netif_be_get_stats(struct net_device *dev)
++{
++ struct xen_netif *netif = netdev_priv(dev);
++ return &netif->stats;
++}
++
++static int __on_net_schedule_list(struct xen_netif *netif)
++{
++ return !list_empty(&netif->list);
++}
++
++/* Must be called with net_schedule_list_lock held */
++static void remove_from_net_schedule_list(struct xen_netif *netif)
++{
++ if (likely(__on_net_schedule_list(netif))) {
++ list_del_init(&netif->list);
++ netif_put(netif);
++ }
++}
++
++static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
++{
++ struct xen_netif *netif = NULL;
++
++ spin_lock_irq(&netbk->net_schedule_list_lock);
++ if (list_empty(&netbk->net_schedule_list))
++ goto out;
++
++ netif = list_first_entry(&netbk->net_schedule_list,
++ struct xen_netif, list);
++ if (!netif)
++ goto out;
++
++ netif_get(netif);
++
++ remove_from_net_schedule_list(netif);
++out:
++ spin_unlock_irq(&netbk->net_schedule_list_lock);
++ return netif;
++}
++
++static void add_to_net_schedule_list_tail(struct xen_netif *netif)
++{
++ unsigned long flags;
++
++ struct xen_netbk *netbk = &xen_netbk[netif->group];
++ if (__on_net_schedule_list(netif))
++ return;
++
++ spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
++ if (!__on_net_schedule_list(netif) &&
++ likely(netif_schedulable(netif))) {
++ list_add_tail(&netif->list, &netbk->net_schedule_list);
++ netif_get(netif);
++ }
++ spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
++}
++
++void netif_schedule_work(struct xen_netif *netif)
++{
++ struct xen_netbk *netbk = &xen_netbk[netif->group];
++ int more_to_do;
++
++ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
++
++ if (more_to_do) {
++ add_to_net_schedule_list_tail(netif);
++ maybe_schedule_tx_action(netbk);
++ }
++}
++
++void netif_deschedule_work(struct xen_netif *netif)
++{
++ struct xen_netbk *netbk = &xen_netbk[netif->group];
++ spin_lock_irq(&netbk->net_schedule_list_lock);
++ remove_from_net_schedule_list(netif);
++ spin_unlock_irq(&netbk->net_schedule_list_lock);
++}
++
++
++static void tx_add_credit(struct xen_netif *netif)
++{
++ unsigned long max_burst, max_credit;
++
++ /*
++ * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
++ * Otherwise the interface can seize up due to insufficient credit.
++ */
++ max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
++ max_burst = min(max_burst, 131072UL);
++ max_burst = max(max_burst, netif->credit_bytes);
++
++ /* Take care that adding a new chunk of credit doesn't wrap to zero. */
++ max_credit = netif->remaining_credit + netif->credit_bytes;
++ if (max_credit < netif->remaining_credit)
++ max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
++
++ netif->remaining_credit = min(max_credit, max_burst);
++}
++
++static void tx_credit_callback(unsigned long data)
++{
++ struct xen_netif *netif = (struct xen_netif *)data;
++ tx_add_credit(netif);
++ netif_schedule_work(netif);
++}
++
++static inline int copy_pending_req(struct xen_netbk *netbk,
++ pending_ring_idx_t pending_idx)
++{
++ return gnttab_copy_grant_page(
++ netbk->grant_tx_handle[pending_idx],
++ &netbk->mmap_pages[pending_idx]);
++}
++
++static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
++{
++ struct netbk_tx_pending_inuse *inuse, *n;
++ struct gnttab_unmap_grant_ref *gop;
++ u16 pending_idx;
++ pending_ring_idx_t dc, dp;
++ struct xen_netif *netif;
++ int ret;
++ LIST_HEAD(list);
++
++ dc = netbk->dealloc_cons;
++ gop = netbk->tx_unmap_ops;
++
++ /* Free up any grants we have finished using. */
++ do {
++ dp = netbk->dealloc_prod;
++
++ /* Ensure we see all indices enqueued by netif_idx_release(). */
++ smp_rmb();
++
++ while (dc != dp) {
++ unsigned long pfn;
++ struct netbk_tx_pending_inuse *pending_inuse =
++ netbk->pending_inuse;
++
++ pending_idx = netbk->dealloc_ring[pending_index(dc++)];
++ list_move_tail(&pending_inuse[pending_idx].list, &list);
++
++ pfn = idx_to_pfn(netbk, pending_idx);
++ /* Already unmapped? */
++ if (!phys_to_machine_mapping_valid(pfn))
++ continue;
++
++ gnttab_set_unmap_op(gop,
++ idx_to_kaddr(netbk, pending_idx),
++ GNTMAP_host_map,
++ netbk->grant_tx_handle[pending_idx]);
++ gop++;
++ }
++
++ } while (dp != netbk->dealloc_prod);
++
++ netbk->dealloc_cons = dc;
++
++ ret = HYPERVISOR_grant_table_op(
++ GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
++ gop - netbk->tx_unmap_ops);
++ BUG_ON(ret);
++
++ /*
++ * Copy any entries that have been pending for too long
++ */
++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++ !list_empty(&netbk->pending_inuse_head)) {
++ list_for_each_entry_safe(inuse, n,
++ &netbk->pending_inuse_head, list) {
++ struct pending_tx_info *pending_tx_info;
++ pending_tx_info = netbk->pending_tx_info;
++
++ if (time_after(inuse->alloc_time + HZ / 2, jiffies))
++ break;
++
++ pending_idx = inuse - netbk->pending_inuse;
++
++ pending_tx_info[pending_idx].netif->nr_copied_skbs++;
++
++ switch (copy_pending_req(netbk, pending_idx)) {
++ case 0:
++ list_move_tail(&inuse->list, &list);
++ continue;
++ case -EBUSY:
++ list_del_init(&inuse->list);
++ continue;
++ case -ENOENT:
++ continue;
++ }
++
++ break;
++ }
++ }
++
++ list_for_each_entry_safe(inuse, n, &list, list) {
++ struct pending_tx_info *pending_tx_info;
++ pending_ring_idx_t index;
++
++ pending_tx_info = netbk->pending_tx_info;
++ pending_idx = inuse - netbk->pending_inuse;
++
++ netif = pending_tx_info[pending_idx].netif;
++
++ make_tx_response(netif, &pending_tx_info[pending_idx].req,
++ NETIF_RSP_OKAY);
++
++ /* Ready for next use. */
++ gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
++
++ index = pending_index(netbk->pending_prod++);
++ netbk->pending_ring[index] = pending_idx;
++
++ netif_put(netif);
++
++ list_del_init(&inuse->list);
++ }
++}
++
++static void netbk_tx_err(struct xen_netif *netif,
++ struct xen_netif_tx_request *txp, RING_IDX end)
++{
++ RING_IDX cons = netif->tx.req_cons;
++
++ do {
++ make_tx_response(netif, txp, NETIF_RSP_ERROR);
++ if (cons >= end)
++ break;
++ txp = RING_GET_REQUEST(&netif->tx, cons++);
++ } while (1);
++ netif->tx.req_cons = cons;
++ netif_schedule_work(netif);
++ netif_put(netif);
++}
++
++static int netbk_count_requests(struct xen_netif *netif,
++ struct xen_netif_tx_request *first,
++ struct xen_netif_tx_request *txp,
++ int work_to_do)
++{
++ RING_IDX cons = netif->tx.req_cons;
++ int frags = 0;
++
++ if (!(first->flags & NETTXF_more_data))
++ return 0;
++
++ do {
++ if (frags >= work_to_do) {
++ DPRINTK("Need more frags\n");
++ return -frags;
++ }
++
++ if (unlikely(frags >= MAX_SKB_FRAGS)) {
++ DPRINTK("Too many frags\n");
++ return -frags;
++ }
++
++ memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
++ sizeof(*txp));
++ if (txp->size > first->size) {
++ DPRINTK("Frags galore\n");
++ return -frags;
++ }
++
++ first->size -= txp->size;
++ frags++;
++
++ if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
++ DPRINTK("txp->offset: %x, size: %u\n",
++ txp->offset, txp->size);
++ return -frags;
++ }
++ } while ((txp++)->flags & NETTXF_more_data);
++
++ return frags;
++}
++
++static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
++ struct xen_netif *netif,
++ struct sk_buff *skb,
++ struct xen_netif_tx_request *txp,
++ struct gnttab_map_grant_ref *mop)
++{
++ struct skb_shared_info *shinfo = skb_shinfo(skb);
++ skb_frag_t *frags = shinfo->frags;
++ unsigned long pending_idx = *((u16 *)skb->data);
++ int i, start;
++
++ /* Skip first skb fragment if it is on same page as header fragment. */
++ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
++
++ for (i = start; i < shinfo->nr_frags; i++, txp++) {
++ pending_ring_idx_t index;
++ struct pending_tx_info *pending_tx_info =
++ netbk->pending_tx_info;
++
++ index = pending_index(netbk->pending_cons++);
++ pending_idx = netbk->pending_ring[index];
++
++ gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
++ GNTMAP_host_map | GNTMAP_readonly,
++ txp->gref, netif->domid);
++
++ memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
++ netif_get(netif);
++ pending_tx_info[pending_idx].netif = netif;
++ frags[i].page = (void *)pending_idx;
++ }
++
++ return mop;
++}
++
++static int netbk_tx_check_mop(struct xen_netbk *netbk,
++ struct sk_buff *skb,
++ struct gnttab_map_grant_ref **mopp)
++{
++ struct gnttab_map_grant_ref *mop = *mopp;
++ int pending_idx = *((u16 *)skb->data);
++ struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
++ struct xen_netif *netif = pending_tx_info[pending_idx].netif;
++ struct xen_netif_tx_request *txp;
++ struct skb_shared_info *shinfo = skb_shinfo(skb);
++ int nr_frags = shinfo->nr_frags;
++ int i, err, start;
++
++ /* Check status of header. */
++ err = mop->status;
++ if (unlikely(err)) {
++ pending_ring_idx_t index;
++ index = pending_index(netbk->pending_prod++);
++ txp = &pending_tx_info[pending_idx].req;
++ make_tx_response(netif, txp, NETIF_RSP_ERROR);
++ netbk->pending_ring[index] = pending_idx;
++ netif_put(netif);
++ } else {
++ set_phys_to_machine(
++ __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
++ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
++ netbk->grant_tx_handle[pending_idx] = mop->handle;
++ }
++
++ /* Skip first skb fragment if it is on same page as header fragment. */
++ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
++
++ for (i = start; i < nr_frags; i++) {
++ int j, newerr;
++ pending_ring_idx_t index;
++
++ pending_idx = (unsigned long)shinfo->frags[i].page;
++
++ /* Check error status: if okay then remember grant handle. */
++ newerr = (++mop)->status;
++ if (likely(!newerr)) {
++ unsigned long addr;
++ addr = idx_to_kaddr(netbk, pending_idx);
++ set_phys_to_machine(
++ __pa(addr)>>PAGE_SHIFT,
++ FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
++ netbk->grant_tx_handle[pending_idx] = mop->handle;
++ /* Had a previous error? Invalidate this fragment. */
++ if (unlikely(err))
++ netif_idx_release(netbk, pending_idx);
++ continue;
++ }
++
++ /* Error on this fragment: respond to client with an error. */
++ txp = &netbk->pending_tx_info[pending_idx].req;
++ make_tx_response(netif, txp, NETIF_RSP_ERROR);
++ index = pending_index(netbk->pending_prod++);
++ netbk->pending_ring[index] = pending_idx;
++ netif_put(netif);
++
++ /* Not the first error? Preceding frags already invalidated. */
++ if (err)
++ continue;
++
++ /* First error: invalidate header and preceding fragments. */
++ pending_idx = *((u16 *)skb->data);
++ netif_idx_release(netbk, pending_idx);
++ for (j = start; j < i; j++) {
++ pending_idx = (unsigned long)shinfo->frags[i].page;
++ netif_idx_release(netbk, pending_idx);
++ }
++
++ /* Remember the error: invalidate all subsequent fragments. */
++ err = newerr;
++ }
++
++ *mopp = mop + 1;
++ return err;
++}
++
++static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
++{
++ struct skb_shared_info *shinfo = skb_shinfo(skb);
++ int nr_frags = shinfo->nr_frags;
++ int i;
++
++ for (i = 0; i < nr_frags; i++) {
++ skb_frag_t *frag = shinfo->frags + i;
++ struct xen_netif_tx_request *txp;
++ unsigned long pending_idx;
++
++ pending_idx = (unsigned long)frag->page;
++
++ netbk->pending_inuse[pending_idx].alloc_time = jiffies;
++ list_add_tail(&netbk->pending_inuse[pending_idx].list,
++ &netbk->pending_inuse_head);
++
++ txp = &netbk->pending_tx_info[pending_idx].req;
++ frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
++ frag->size = txp->size;
++ frag->page_offset = txp->offset;
++
++ skb->len += txp->size;
++ skb->data_len += txp->size;
++ skb->truesize += txp->size;
++ }
++}
++
++int netbk_get_extras(struct xen_netif *netif,
++ struct xen_netif_extra_info *extras,
++ int work_to_do)
++{
++ struct xen_netif_extra_info extra;
++ RING_IDX cons = netif->tx.req_cons;
++
++ do {
++ if (unlikely(work_to_do-- <= 0)) {
++ pr_debug("Missing extra info\n");
++ return -EBADR;
++ }
++
++ memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
++ sizeof(extra));
++ if (unlikely(!extra.type ||
++ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
++ netif->tx.req_cons = ++cons;
++ pr_debug("Invalid extra type: %d\n", extra.type);
++ return -EINVAL;
++ }
++
++ memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
++ netif->tx.req_cons = ++cons;
++ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
++
++ return work_to_do;
++}
++
++static int netbk_set_skb_gso(struct sk_buff *skb,
++ struct xen_netif_extra_info *gso)
++{
++ if (!gso->u.gso.size) {
++ pr_debug("GSO size must not be zero.\n");
++ return -EINVAL;
++ }
++
++ /* Currently only TCPv4 S.O. is supported. */
++ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
++ pr_debug("Bad GSO type %d.\n", gso->u.gso.type);
++ return -EINVAL;
++ }
++
++ skb_shinfo(skb)->gso_size = gso->u.gso.size;
++ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
++
++ /* Header must be checked, and gso_segs computed. */
++ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
++ skb_shinfo(skb)->gso_segs = 0;
++
++ return 0;
++}
++
++static int skb_checksum_setup(struct sk_buff *skb)
++{
++ struct iphdr *iph;
++ unsigned char *th;
++ int err = -EPROTO;
++
++ if (skb->protocol != htons(ETH_P_IP))
++ goto out;
++
++ iph = (void *)skb->data;
++ th = skb->data + 4 * iph->ihl;
++ if (th >= skb_tail_pointer(skb))
++ goto out;
++
++ skb->csum_start = th - skb->head;
++ switch (iph->protocol) {
++ case IPPROTO_TCP:
++ skb->csum_offset = offsetof(struct tcphdr, check);
++ break;
++ case IPPROTO_UDP:
++ skb->csum_offset = offsetof(struct udphdr, check);
++ break;
++ default:
++ if (net_ratelimit())
++ printk(KERN_ERR "Attempting to checksum a non-"
++ "TCP/UDP packet, dropping a protocol"
++ " %d packet", iph->protocol);
++ goto out;
++ }
++
++ if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
++ goto out;
++
++ err = 0;
++
++out:
++ return err;
++}
++
++static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
++{
++ unsigned long now = jiffies;
++ unsigned long next_credit =
++ netif->credit_timeout.expires +
++ msecs_to_jiffies(netif->credit_usec / 1000);
++
++ /* Timer could already be pending in rare cases. */
++ if (timer_pending(&netif->credit_timeout))
++ return true;
++
++ /* Passed the point where we can replenish credit? */
++ if (time_after_eq(now, next_credit)) {
++ netif->credit_timeout.expires = now;
++ tx_add_credit(netif);
++ }
++
++ /* Still too big to send right now? Set a callback. */
++ if (size > netif->remaining_credit) {
++ netif->credit_timeout.data =
++ (unsigned long)netif;
++ netif->credit_timeout.function =
++ tx_credit_callback;
++ mod_timer(&netif->credit_timeout,
++ next_credit);
++
++ return true;
++ }
++
++ return false;
++}
++
++static unsigned net_tx_build_mops(struct xen_netbk *netbk)
++{
++ struct gnttab_map_grant_ref *mop;
++ struct sk_buff *skb;
++ int ret;
++
++ mop = netbk->tx_map_ops;
++ while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++ !list_empty(&netbk->net_schedule_list)) {
++ struct xen_netif *netif;
++ struct xen_netif_tx_request txreq;
++ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
++ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
++ u16 pending_idx;
++ RING_IDX idx;
++ int work_to_do;
++ unsigned int data_len;
++ pending_ring_idx_t index;
++
++ /* Get a netif from the list with work to do. */
++ netif = poll_net_schedule_list(netbk);
++ if (!netif)
++ continue;
++
++ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
++ if (!work_to_do) {
++ netif_put(netif);
++ continue;
++ }
++
++ idx = netif->tx.req_cons;
++ rmb(); /* Ensure that we see the request before we copy it. */
++ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
++
++ /* Credit-based scheduling. */
++ if (txreq.size > netif->remaining_credit &&
++ tx_credit_exceeded(netif, txreq.size)) {
++ netif_put(netif);
++ continue;
++ }
++
++ netif->remaining_credit -= txreq.size;
++
++ work_to_do--;
++ netif->tx.req_cons = ++idx;
++
++ memset(extras, 0, sizeof(extras));
++ if (txreq.flags & NETTXF_extra_info) {
++ work_to_do = netbk_get_extras(netif, extras,
++ work_to_do);
++ idx = netif->tx.req_cons;
++ if (unlikely(work_to_do < 0)) {
++ netbk_tx_err(netif, &txreq, idx);
++ continue;
++ }
++ }
++
++ ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
++ if (unlikely(ret < 0)) {
++ netbk_tx_err(netif, &txreq, idx - ret);
++ continue;
++ }
++ idx += ret;
++
++ if (unlikely(txreq.size < ETH_HLEN)) {
++ pr_debug("Bad packet size: %d\n", txreq.size);
++ netbk_tx_err(netif, &txreq, idx);
++ continue;
++ }
++
++ /* No crossing a page as the payload mustn't fragment. */
++ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
++ pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
++ txreq.offset, txreq.size,
++ (txreq.offset&~PAGE_MASK) + txreq.size);
++ netbk_tx_err(netif, &txreq, idx);
++ continue;
++ }
++
++ index = pending_index(netbk->pending_cons);
++ pending_idx = netbk->pending_ring[index];
++
++ data_len = (txreq.size > PKT_PROT_LEN &&
++ ret < MAX_SKB_FRAGS) ?
++ PKT_PROT_LEN : txreq.size;
++
++ skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
++ GFP_ATOMIC | __GFP_NOWARN);
++ if (unlikely(skb == NULL)) {
++ pr_debug("Can't allocate a skb in start_xmit.\n");
++ netbk_tx_err(netif, &txreq, idx);
++ break;
++ }
++
++ /* Packets passed to netif_rx() must have some headroom. */
++ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
++
++ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
++ struct xen_netif_extra_info *gso;
++ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
++
++ if (netbk_set_skb_gso(skb, gso)) {
++ kfree_skb(skb);
++ netbk_tx_err(netif, &txreq, idx);
++ continue;
++ }
++ }
++
++ gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
++ GNTMAP_host_map | GNTMAP_readonly,
++ txreq.gref, netif->domid);
++ mop++;
++
++ memcpy(&netbk->pending_tx_info[pending_idx].req,
++ &txreq, sizeof(txreq));
++ netbk->pending_tx_info[pending_idx].netif = netif;
++ *((u16 *)skb->data) = pending_idx;
++
++ __skb_put(skb, data_len);
++
++ skb_shinfo(skb)->nr_frags = ret;
++ if (data_len < txreq.size) {
++ skb_shinfo(skb)->nr_frags++;
++ skb_shinfo(skb)->frags[0].page =
++ (void *)(unsigned long)pending_idx;
++ } else {
++ /* Discriminate from any valid pending_idx value. */
++ skb_shinfo(skb)->frags[0].page = (void *)~0UL;
++ }
++
++ __skb_queue_tail(&netbk->tx_queue, skb);
++
++ netbk->pending_cons++;
++
++ mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
++
++ netif->tx.req_cons = idx;
++ netif_schedule_work(netif);
++
++ if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
++ break;
++ }
++
++ return mop - netbk->tx_map_ops;
++}
++
++static void net_tx_submit(struct xen_netbk *netbk)
++{
++ struct gnttab_map_grant_ref *mop;
++ struct sk_buff *skb;
++
++ mop = netbk->tx_map_ops;
++ while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
++ struct xen_netif_tx_request *txp;
++ struct xen_netif *netif;
++ u16 pending_idx;
++ unsigned data_len;
++
++ pending_idx = *((u16 *)skb->data);
++ netif = netbk->pending_tx_info[pending_idx].netif;
++ txp = &netbk->pending_tx_info[pending_idx].req;
++
++ /* Check the remap error code. */
++ if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
++ pr_debug("netback grant failed.\n");
++ skb_shinfo(skb)->nr_frags = 0;
++ kfree_skb(skb);
++ continue;
++ }
++
++ data_len = skb->len;
++ memcpy(skb->data,
++ (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
++ data_len);
++ if (data_len < txp->size) {
++ /* Append the packet payload as a fragment. */
++ txp->offset += data_len;
++ txp->size -= data_len;
++ } else {
++ /* Schedule a response immediately. */
++ netif_idx_release(netbk, pending_idx);
++ }
++
++ if (txp->flags & NETTXF_csum_blank)
++ skb->ip_summed = CHECKSUM_PARTIAL;
++ else if (txp->flags & NETTXF_data_validated)
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++
++ netbk_fill_frags(netbk, skb);
++
++ /*
++ * If the initial fragment was < PKT_PROT_LEN then
++ * pull through some bytes from the other fragments to
++ * increase the linear region to PKT_PROT_LEN bytes.
++ */
++ if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
++ int target = min_t(int, skb->len, PKT_PROT_LEN);
++ __pskb_pull_tail(skb, target - skb_headlen(skb));
++ }
++
++ skb->dev = netif->dev;
++ skb->protocol = eth_type_trans(skb, skb->dev);
++
++ if (skb->ip_summed == CHECKSUM_PARTIAL) {
++ if (skb_checksum_setup(skb)) {
++ pr_debug("skb_checksum_setup failed\n");
++ kfree_skb(skb);
++ continue;
++ }
++ } else if (skb_is_gso(skb)) {
++ pr_debug("GSO SKB checksum is not partial\n");
++ kfree_skb(skb);
++ continue;
++ }
++
++ if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
++ unlikely(skb_linearize(skb))) {
++ DPRINTK("Can't linearize skb in net_tx_action.\n");
++ kfree_skb(skb);
++ continue;
++ }
++
++ netif->stats.rx_bytes += skb->len;
++ netif->stats.rx_packets++;
++
++ netif_rx_ni(skb);
++ netif->dev->last_rx = jiffies;
++ }
++}
++
++/* Called after netfront has transmitted */
++static void net_tx_action(unsigned long data)
++{
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
++ unsigned nr_mops;
++ int ret;
++
++ net_tx_action_dealloc(netbk);
++
++ nr_mops = net_tx_build_mops(netbk);
++
++ if (nr_mops == 0)
++ goto out;
++
++ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
++ netbk->tx_map_ops, nr_mops);
++ BUG_ON(ret);
++
++ net_tx_submit(netbk);
++out:
++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++ !list_empty(&netbk->pending_inuse_head)) {
++ struct netbk_tx_pending_inuse *oldest;
++
++ oldest = list_entry(netbk->pending_inuse_head.next,
++ struct netbk_tx_pending_inuse, list);
++ mod_timer(&netbk->netbk_tx_pending_timer,
++ oldest->alloc_time + HZ);
++ }
++}
++
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
++{
++ static DEFINE_SPINLOCK(_lock);
++ unsigned long flags;
++ pending_ring_idx_t index;
++
++ spin_lock_irqsave(&_lock, flags);
++ index = pending_index(netbk->dealloc_prod);
++ netbk->dealloc_ring[index] = pending_idx;
++ /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
++ smp_wmb();
++ netbk->dealloc_prod++;
++ spin_unlock_irqrestore(&_lock, flags);
++
++ xen_netbk_bh_handler(netbk, 0);
++}
++
++static void netif_page_release(struct page *page, unsigned int order)
++{
++ unsigned int group, idx;
++ int foreign = netif_get_page_ext(page, &group, &idx);
++
++ BUG_ON(!foreign);
++ BUG_ON(order);
++
++ netif_idx_release(&xen_netbk[group], idx);
++}
++
++irqreturn_t netif_be_int(int irq, void *dev_id)
++{
++ struct xen_netif *netif = dev_id;
++ struct xen_netbk *netbk;
++
++ if (netif->group == -1)
++ return IRQ_NONE;
++
++ netbk = &xen_netbk[netif->group];
++
++ add_to_net_schedule_list_tail(netif);
++ maybe_schedule_tx_action(netbk);
++
++ if (netif_schedulable(netif) && !netbk_queue_full(netif))
++ netif_wake_queue(netif->dev);
++
++ return IRQ_HANDLED;
++}
++
++static void make_tx_response(struct xen_netif *netif,
++ struct xen_netif_tx_request *txp,
++ s8 st)
++{
++ RING_IDX i = netif->tx.rsp_prod_pvt;
++ struct xen_netif_tx_response *resp;
++ int notify;
++
++ resp = RING_GET_RESPONSE(&netif->tx, i);
++ resp->id = txp->id;
++ resp->status = st;
++
++ if (txp->flags & NETTXF_extra_info)
++ RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
++
++ netif->tx.rsp_prod_pvt = ++i;
++ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
++ if (notify)
++ notify_remote_via_irq(netif->irq);
++}
++
++static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
++ u16 id,
++ s8 st,
++ u16 offset,
++ u16 size,
++ u16 flags)
++{
++ RING_IDX i = netif->rx.rsp_prod_pvt;
++ struct xen_netif_rx_response *resp;
++
++ resp = RING_GET_RESPONSE(&netif->rx, i);
++ resp->offset = offset;
++ resp->flags = flags;
++ resp->id = id;
++ resp->status = (s16)size;
++ if (st < 0)
++ resp->status = (s16)st;
++
++ netif->rx.rsp_prod_pvt = ++i;
++
++ return resp;
++}
++
++#ifdef NETBE_DEBUG_INTERRUPT
++static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
++{
++ struct list_head *ent;
++ struct xen_netif *netif;
++ int i = 0;
++ int group = 0;
++
++ printk(KERN_ALERT "netif_schedule_list:\n");
++
++ for (group = 0; group < xen_netbk_group_nr; group++) {
++ struct xen_netbk *netbk = &xen_netbk[group];
++ spin_lock_irq(&netbk->net_schedule_list_lock);
++ printk(KERN_ALERT "xen_netback group number: %d\n", group);
++ list_for_each(ent, &netbk->net_schedule_list) {
++ netif = list_entry(ent, struct xen_netif, list);
++ printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
++ "rx_resp_prod=%08x\n",
++ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
++ printk(KERN_ALERT
++ " tx_req_cons=%08x, tx_resp_prod=%08x)\n",
++ netif->tx.req_cons, netif->tx.rsp_prod_pvt);
++ printk(KERN_ALERT
++ " shared(rx_req_prod=%08x "
++ "rx_resp_prod=%08x\n",
++ netif->rx.sring->req_prod,
++ netif->rx.sring->rsp_prod);
++ printk(KERN_ALERT
++ " rx_event=%08x, tx_req_prod=%08x\n",
++ netif->rx.sring->rsp_event,
++ netif->tx.sring->req_prod);
++ printk(KERN_ALERT
++ " tx_resp_prod=%08x, tx_event=%08x)\n",
++ netif->tx.sring->rsp_prod,
++ netif->tx.sring->rsp_event);
++ i++;
++ }
++ spin_unlock_irq(&netbk->net_schedule_list_lock);
++ }
++
++ printk(KERN_ALERT " ** End of netif_schedule_list **\n");
++
++ return IRQ_HANDLED;
++}
++#endif
++
++static inline int rx_work_todo(struct xen_netbk *netbk)
++{
++ return !skb_queue_empty(&netbk->rx_queue);
++}
++
++static inline int tx_work_todo(struct xen_netbk *netbk)
++{
++ if (netbk->dealloc_cons != netbk->dealloc_prod)
++ return 1;
++
++ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
++ !list_empty(&netbk->pending_inuse_head))
++ return 1;
++
++ if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++ !list_empty(&netbk->net_schedule_list))
++ return 1;
++
++ return 0;
++}
++
++static int netbk_action_thread(void *data)
++{
++ struct xen_netbk *netbk = (struct xen_netbk *)data;
++ while (!kthread_should_stop()) {
++ wait_event_interruptible(netbk->kthread.netbk_action_wq,
++ rx_work_todo(netbk)
++ || tx_work_todo(netbk)
++ || kthread_should_stop());
++ cond_resched();
++
++ if (kthread_should_stop())
++ break;
++
++ if (rx_work_todo(netbk))
++ net_rx_action((unsigned long)netbk);
++
++ if (tx_work_todo(netbk))
++ net_tx_action((unsigned long)netbk);
++ }
++
++ return 0;
++}
++
++static int __init netback_init(void)
++{
++ int i;
++ struct page *page;
++ int rc = 0;
++ int group;
++
++ if (!xen_pv_domain())
++ return -ENODEV;
++
++ xen_netbk_group_nr = num_online_cpus();
++ xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
++ if (!xen_netbk) {
++ printk(KERN_ALERT "%s: out of memory\n", __func__);
++ return -ENOMEM;
++ }
++ memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
++
++ for (group = 0; group < xen_netbk_group_nr; group++) {
++ struct xen_netbk *netbk = &xen_netbk[group];
++ skb_queue_head_init(&netbk->rx_queue);
++ skb_queue_head_init(&netbk->tx_queue);
++
++ init_timer(&netbk->net_timer);
++ netbk->net_timer.data = (unsigned long)netbk;
++ netbk->net_timer.function = net_alarm;
++
++ init_timer(&netbk->netbk_tx_pending_timer);
++ netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
++ netbk->netbk_tx_pending_timer.function =
++ netbk_tx_pending_timeout;
++
++ netbk->mmap_pages =
++ alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
++ if (!netbk->mmap_pages) {
++ printk(KERN_ALERT "%s: out of memory\n", __func__);
++ del_timer(&netbk->netbk_tx_pending_timer);
++ del_timer(&netbk->net_timer);
++ rc = -ENOMEM;
++ goto failed_init;
++ }
++
++ for (i = 0; i < MAX_PENDING_REQS; i++) {
++ page = netbk->mmap_pages[i];
++ SetPageForeign(page, netif_page_release);
++ netif_set_page_ext(page, group, i);
++ INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
++ }
++
++ netbk->pending_cons = 0;
++ netbk->pending_prod = MAX_PENDING_REQS;
++ for (i = 0; i < MAX_PENDING_REQS; i++)
++ netbk->pending_ring[i] = i;
++
++ if (MODPARM_netback_kthread) {
++ init_waitqueue_head(&netbk->kthread.netbk_action_wq);
++ netbk->kthread.task =
++ kthread_create(netbk_action_thread,
++ (void *)netbk,
++ "netback/%u", group);
++
++ if (!IS_ERR(netbk->kthread.task)) {
++ kthread_bind(netbk->kthread.task, group);
++ } else {
++ printk(KERN_ALERT
++ "kthread_run() fails at netback\n");
++ free_empty_pages_and_pagevec(netbk->mmap_pages,
++ MAX_PENDING_REQS);
++ del_timer(&netbk->netbk_tx_pending_timer);
++ del_timer(&netbk->net_timer);
++ rc = PTR_ERR(netbk->kthread.task);
++ goto failed_init;
++ }
++ } else {
++ tasklet_init(&netbk->tasklet.net_tx_tasklet,
++ net_tx_action,
++ (unsigned long)netbk);
++ tasklet_init(&netbk->tasklet.net_rx_tasklet,
++ net_rx_action,
++ (unsigned long)netbk);
++ }
++
++ INIT_LIST_HEAD(&netbk->pending_inuse_head);
++ INIT_LIST_HEAD(&netbk->net_schedule_list);
++
++ spin_lock_init(&netbk->net_schedule_list_lock);
++
++ atomic_set(&netbk->netfront_count, 0);
++
++ if (MODPARM_netback_kthread)
++ wake_up_process(netbk->kthread.task);
++ }
++
++ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
++ if (MODPARM_copy_skb) {
++ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
++ NULL, 0))
++ netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
++ else
++ netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
++ }
++
++ rc = netif_xenbus_init();
++ if (rc)
++ goto failed_init;
++
++#ifdef NETBE_DEBUG_INTERRUPT
++ (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
++ 0,
++ netif_be_dbg,
++ IRQF_SHARED,
++ "net-be-dbg",
++ &netif_be_dbg);
++#endif
++
++ return 0;
++
++failed_init:
++ for (i = 0; i < group; i++) {
++ struct xen_netbk *netbk = &xen_netbk[i];
++ free_empty_pages_and_pagevec(netbk->mmap_pages,
++ MAX_PENDING_REQS);
++ del_timer(&netbk->netbk_tx_pending_timer);
++ del_timer(&netbk->net_timer);
++ if (MODPARM_netback_kthread)
++ kthread_stop(netbk->kthread.task);
++ }
++ vfree(xen_netbk);
++ return rc;
++
++}
++
++module_init(netback_init);
++
++MODULE_LICENSE("Dual BSD/GPL");
+diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
+new file mode 100644
+index 0000000..640c696
+--- /dev/null
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -0,0 +1,487 @@
++/* Xenbus code for netif backend
++ * Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
++ * Copyright (C) 2005 XenSource Ltd
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++*/
++
++#include "common.h"
++
++static int connect_rings(struct backend_info *);
++static void connect(struct backend_info *);
++static void backend_create_netif(struct backend_info *be);
++static void unregister_hotplug_status_watch(struct backend_info *be);
++
++static int netback_remove(struct xenbus_device *dev)
++{
++ struct backend_info *be = dev_get_drvdata(&dev->dev);
++
++ unregister_hotplug_status_watch(be);
++ if (be->netif) {
++ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
++ netif_disconnect(be->netif);
++ be->netif = NULL;
++ }
++ kfree(be);
++ dev_set_drvdata(&dev->dev, NULL);
++ return 0;
++}
++
++
++/**
++ * Entry point to this code when a new device is created. Allocate the basic
++ * structures and switch to InitWait.
++ */
++static int netback_probe(struct xenbus_device *dev,
++ const struct xenbus_device_id *id)
++{
++ const char *message;
++ struct xenbus_transaction xbt;
++ int err;
++ int sg;
++ struct backend_info *be = kzalloc(sizeof(struct backend_info),
++ GFP_KERNEL);
++ if (!be) {
++ xenbus_dev_fatal(dev, -ENOMEM,
++ "allocating backend structure");
++ return -ENOMEM;
++ }
++
++ be->dev = dev;
++ dev_set_drvdata(&dev->dev, be);
++
++ sg = 1;
++ if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
++ sg = 0;
++
++ do {
++ err = xenbus_transaction_start(&xbt);
++ if (err) {
++ xenbus_dev_fatal(dev, err, "starting transaction");
++ goto fail;
++ }
++
++ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
++ if (err) {
++ message = "writing feature-sg";
++ goto abort_transaction;
++ }
++
++ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
++ "%d", sg);
++ if (err) {
++ message = "writing feature-gso-tcpv4";
++ goto abort_transaction;
++ }
++
++ /* We support rx-copy path. */
++ err = xenbus_printf(xbt, dev->nodename,
++ "feature-rx-copy", "%d", 1);
++ if (err) {
++ message = "writing feature-rx-copy";
++ goto abort_transaction;
++ }
++
++ /*
++ * We don't support rx-flip path (except old guests who don't
++ * grok this feature flag).
++ */
++ err = xenbus_printf(xbt, dev->nodename,
++ "feature-rx-flip", "%d", 0);
++ if (err) {
++ message = "writing feature-rx-flip";
++ goto abort_transaction;
++ }
++
++ err = xenbus_transaction_end(xbt, 0);
++ } while (err == -EAGAIN);
++
++ if (err) {
++ xenbus_dev_fatal(dev, err, "completing transaction");
++ goto fail;
++ }
++
++ err = xenbus_switch_state(dev, XenbusStateInitWait);
++ if (err)
++ goto fail;
++
++ /* This kicks hotplug scripts, so do it immediately. */
++ backend_create_netif(be);
++
++ return 0;
++
++abort_transaction:
++ xenbus_transaction_end(xbt, 1);
++ xenbus_dev_fatal(dev, err, "%s", message);
++fail:
++ pr_debug("failed");
++ netback_remove(dev);
++ return err;
++}
++
++
++/*
++ * Handle the creation of the hotplug script environment. We add the script
++ * and vif variables to the environment, for the benefit of the vif-* hotplug
++ * scripts.
++ */
++static int netback_uevent(struct xenbus_device *xdev,
++ struct kobj_uevent_env *env)
++{
++ struct backend_info *be = dev_get_drvdata(&xdev->dev);
++ char *val;
++
++ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
++ if (IS_ERR(val)) {
++ int err = PTR_ERR(val);
++ xenbus_dev_fatal(xdev, err, "reading script");
++ return err;
++ } else {
++ if (add_uevent_var(env, "script=%s", val)) {
++ kfree(val);
++ return -ENOMEM;
++ }
++ kfree(val);
++ }
++
++ if (!be || !be->netif)
++ return 0;
++
++ return add_uevent_var(env, "vif=%s", be->netif->dev->name);
++}
++
++
++static void backend_create_netif(struct backend_info *be)
++{
++ int err;
++ long handle;
++ struct xenbus_device *dev = be->dev;
++
++ if (be->netif != NULL)
++ return;
++
++ err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
++ if (err != 1) {
++ xenbus_dev_fatal(dev, err, "reading handle");
++ return;
++ }
++
++ be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
++ if (IS_ERR(be->netif)) {
++ err = PTR_ERR(be->netif);
++ be->netif = NULL;
++ xenbus_dev_fatal(dev, err, "creating interface");
++ return;
++ }
++
++ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
++}
++
++
++static void disconnect_backend(struct xenbus_device *dev)
++{
++ struct backend_info *be = dev_get_drvdata(&dev->dev);
++
++ if (be->netif) {
++ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
++ netif_disconnect(be->netif);
++ be->netif = NULL;
++ }
++}
++
++/**
++ * Callback received when the frontend's state changes.
++ */
++static void frontend_changed(struct xenbus_device *dev,
++ enum xenbus_state frontend_state)
++{
++ struct backend_info *be = dev_get_drvdata(&dev->dev);
++
++ pr_debug("frontend state %s", xenbus_strstate(frontend_state));
++
++ be->frontend_state = frontend_state;
++
++ switch (frontend_state) {
++ case XenbusStateInitialising:
++ if (dev->state == XenbusStateClosed) {
++ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
++ __func__, dev->nodename);
++ xenbus_switch_state(dev, XenbusStateInitWait);
++ }
++ break;
++
++ case XenbusStateInitialised:
++ break;
++
++ case XenbusStateConnected:
++ if (dev->state == XenbusStateConnected)
++ break;
++ backend_create_netif(be);
++ if (be->netif)
++ connect(be);
++ break;
++
++ case XenbusStateClosing:
++ if (be->netif)
++ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++ disconnect_backend(dev);
++ xenbus_switch_state(dev, XenbusStateClosing);
++ break;
++
++ case XenbusStateClosed:
++ xenbus_switch_state(dev, XenbusStateClosed);
++ if (xenbus_dev_is_online(dev))
++ break;
++ /* fall through if not online */
++ case XenbusStateUnknown:
++ device_unregister(&dev->dev);
++ break;
++
++ default:
++ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
++ frontend_state);
++ break;
++ }
++}
++
++
++static void xen_net_read_rate(struct xenbus_device *dev,
++ unsigned long *bytes, unsigned long *usec)
++{
++ char *s, *e;
++ unsigned long b, u;
++ char *ratestr;
++
++ /* Default to unlimited bandwidth. */
++ *bytes = ~0UL;
++ *usec = 0;
++
++ ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
++ if (IS_ERR(ratestr))
++ return;
++
++ s = ratestr;
++ b = simple_strtoul(s, &e, 10);
++ if ((s == e) || (*e != ','))
++ goto fail;
++
++ s = e + 1;
++ u = simple_strtoul(s, &e, 10);
++ if ((s == e) || (*e != '\0'))
++ goto fail;
++
++ *bytes = b;
++ *usec = u;
++
++ kfree(ratestr);
++ return;
++
++ fail:
++ pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
++ kfree(ratestr);
++}
++
++static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
++{
++ char *s, *e, *macstr;
++ int i;
++
++ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
++ if (IS_ERR(macstr))
++ return PTR_ERR(macstr);
++
++ for (i = 0; i < ETH_ALEN; i++) {
++ mac[i] = simple_strtoul(s, &e, 16);
++ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
++ kfree(macstr);
++ return -ENOENT;
++ }
++ s = e+1;
++ }
++
++ kfree(macstr);
++ return 0;
++}
++
++static void unregister_hotplug_status_watch(struct backend_info *be)
++{
++ if (be->have_hotplug_status_watch) {
++ unregister_xenbus_watch(&be->hotplug_status_watch);
++ kfree(be->hotplug_status_watch.node);
++ }
++ be->have_hotplug_status_watch = 0;
++}
++
++static void hotplug_status_changed(struct xenbus_watch *watch,
++ const char **vec,
++ unsigned int vec_size)
++{
++ struct backend_info *be = container_of(watch,
++ struct backend_info,
++ hotplug_status_watch);
++ char *str;
++ unsigned int len;
++
++ str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
++ if (IS_ERR(str))
++ return;
++ if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
++ xenbus_switch_state(be->dev, XenbusStateConnected);
++ /* Not interested in this watch anymore. */
++ unregister_hotplug_status_watch(be);
++ }
++ kfree(str);
++}
++
++static void connect(struct backend_info *be)
++{
++ int err;
++ struct xenbus_device *dev = be->dev;
++
++ err = connect_rings(be);
++ if (err)
++ return;
++
++ err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
++ if (err) {
++ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
++ return;
++ }
++
++ xen_net_read_rate(dev, &be->netif->credit_bytes,
++ &be->netif->credit_usec);
++ be->netif->remaining_credit = be->netif->credit_bytes;
++
++ unregister_hotplug_status_watch(be);
++ err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
++ hotplug_status_changed,
++ "%s/%s", dev->nodename, "hotplug-status");
++ if (err) {
++ /* Switch now, since we can't do a watch. */
++ xenbus_switch_state(dev, XenbusStateConnected);
++ } else {
++ be->have_hotplug_status_watch = 1;
++ }
++
++ netif_wake_queue(be->netif->dev);
++}
++
++
++static int connect_rings(struct backend_info *be)
++{
++ struct xen_netif *netif = be->netif;
++ struct xenbus_device *dev = be->dev;
++ unsigned long tx_ring_ref, rx_ring_ref;
++ unsigned int evtchn, rx_copy;
++ int err;
++ int val;
++
++ err = xenbus_gather(XBT_NIL, dev->otherend,
++ "tx-ring-ref", "%lu", &tx_ring_ref,
++ "rx-ring-ref", "%lu", &rx_ring_ref,
++ "event-channel", "%u", &evtchn, NULL);
++ if (err) {
++ xenbus_dev_fatal(dev, err,
++ "reading %s/ring-ref and event-channel",
++ dev->otherend);
++ return err;
++ }
++
++ err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
++ &rx_copy);
++ if (err == -ENOENT) {
++ err = 0;
++ rx_copy = 0;
++ }
++ if (err < 0) {
++ xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
++ dev->otherend);
++ return err;
++ }
++ if (!rx_copy)
++ return -EOPNOTSUPP;
++
++ if (netif->dev->tx_queue_len != 0) {
++ if (xenbus_scanf(XBT_NIL, dev->otherend,
++ "feature-rx-notify", "%d", &val) < 0)
++ val = 0;
++ if (val)
++ netif->can_queue = 1;
++ else
++ /* Must be non-zero for pfifo_fast to work. */
++ netif->dev->tx_queue_len = 1;
++ }
++
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
++ "%d", &val) < 0)
++ val = 0;
++ netif->can_sg = !!val;
++
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
++ "%d", &val) < 0)
++ val = 0;
++ netif->gso = !!val;
++
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
++ "%d", &val) < 0)
++ val = 0;
++ netif->gso_prefix = !!val;
++
++ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
++ "%d", &val) < 0)
++ val = 0;
++ netif->csum = !val;
++
++ /* Set dev->features */
++ netif_set_features(netif);
++
++ /* Map the shared frame, irq etc. */
++ err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
++ if (err) {
++ xenbus_dev_fatal(dev, err,
++ "mapping shared-frames %lu/%lu port %u",
++ tx_ring_ref, rx_ring_ref, evtchn);
++ return err;
++ }
++ return 0;
++}
++
++
++/* ** Driver Registration ** */
++
++
++static const struct xenbus_device_id netback_ids[] = {
++ { "vif" },
++ { "" }
++};
++
++
++static struct xenbus_driver netback = {
++ .name = "vif",
++ .owner = THIS_MODULE,
++ .ids = netback_ids,
++ .probe = netback_probe,
++ .remove = netback_remove,
++ .uevent = netback_uevent,
++ .otherend_changed = frontend_changed,
++};
++
++
++int netif_xenbus_init(void)
++{
++ printk(KERN_CRIT "registering netback\n");
++ return xenbus_register_backend(&netback);
++}
+diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
+index 30290a8..5a48ce9 100644
+--- a/drivers/xen/Kconfig
++++ b/drivers/xen/Kconfig
+@@ -37,13 +37,6 @@ config XEN_BACKEND
+ depends on XEN_PCIDEV_BACKEND
+
+
+-config XEN_NETDEV_BACKEND
+- tristate "Xen backend network device"
+- depends on XEN_BACKEND && NET
+- help
+- Implement the network backend driver, which passes packets
+- from the guest domain's frontend drivers to the network.
+-
+ config XENFS
+ tristate "Xen filesystem"
+ default y
+diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
+index c0e0509..533a199 100644
+--- a/drivers/xen/Makefile
++++ b/drivers/xen/Makefile
+@@ -9,7 +9,6 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
+ obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
+ obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
+ obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/
+-obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/
+ obj-$(CONFIG_XENFS) += xenfs/
+ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
+ obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
+diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
+deleted file mode 100644
+index e346e81..0000000
+--- a/drivers/xen/netback/Makefile
++++ /dev/null
+@@ -1,3 +0,0 @@
+-obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
+-
+-xen-netback-y := netback.o xenbus.o interface.o
+diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
+deleted file mode 100644
+index 079e1de..0000000
+--- a/drivers/xen/netback/common.h
++++ /dev/null
+@@ -1,275 +0,0 @@
+-/******************************************************************************
+- * arch/xen/drivers/netif/backend/common.h
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License version 2
+- * as published by the Free Software Foundation; or, when distributed
+- * separately from the Linux kernel or incorporated into other
+- * software packages, subject to the following license:
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a copy
+- * of this source file (the "Software"), to deal in the Software without
+- * restriction, including without limitation the rights to use, copy, modify,
+- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+- * and to permit persons to whom the Software is furnished to do so, subject to
+- * the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included in
+- * all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+- * IN THE SOFTWARE.
+- */
+-
+-#ifndef __NETIF__BACKEND__COMMON_H__
+-#define __NETIF__BACKEND__COMMON_H__
+-
+-#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
+-
+-#include <linux/version.h>
+-#include <linux/module.h>
+-#include <linux/interrupt.h>
+-#include <linux/slab.h>
+-#include <linux/ip.h>
+-#include <linux/in.h>
+-#include <linux/io.h>
+-#include <linux/netdevice.h>
+-#include <linux/etherdevice.h>
+-#include <linux/wait.h>
+-#include <linux/sched.h>
+-
+-#include <xen/interface/io/netif.h>
+-#include <asm/pgalloc.h>
+-#include <xen/interface/grant_table.h>
+-#include <xen/grant_table.h>
+-#include <xen/xenbus.h>
+-
+-struct xen_netif {
+- /* Unique identifier for this interface. */
+- domid_t domid;
+- int group;
+- unsigned int handle;
+-
+- u8 fe_dev_addr[6];
+-
+- /* Physical parameters of the comms window. */
+- grant_handle_t tx_shmem_handle;
+- grant_ref_t tx_shmem_ref;
+- grant_handle_t rx_shmem_handle;
+- grant_ref_t rx_shmem_ref;
+- unsigned int irq;
+-
+- /* The shared rings and indexes. */
+- struct xen_netif_tx_back_ring tx;
+- struct xen_netif_rx_back_ring rx;
+- struct vm_struct *tx_comms_area;
+- struct vm_struct *rx_comms_area;
+-
+- /* Flags that must not be set in dev->features */
+- int features_disabled;
+-
+- /* Frontend feature information. */
+- u8 can_sg:1;
+- u8 gso:1;
+- u8 gso_prefix:1;
+- u8 csum:1;
+-
+- /* Internal feature information. */
+- u8 can_queue:1; /* can queue packets for receiver? */
+-
+- /* Allow netif_be_start_xmit() to peek ahead in the rx request
+- * ring. This is a prediction of what rx_req_cons will be once
+- * all queued skbs are put on the ring. */
+- RING_IDX rx_req_cons_peek;
+-
+- /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
+- unsigned long credit_bytes;
+- unsigned long credit_usec;
+- unsigned long remaining_credit;
+- struct timer_list credit_timeout;
+-
+- /* Statistics */
+- int nr_copied_skbs;
+-
+- /* Miscellaneous private stuff. */
+- struct list_head list; /* scheduling list */
+- atomic_t refcnt;
+- struct net_device *dev;
+- struct net_device_stats stats;
+-
+- unsigned int carrier;
+-
+- wait_queue_head_t waiting_to_free;
+-};
+-
+-/*
+- * Implement our own carrier flag: the network stack's version causes delays
+- * when the carrier is re-enabled (in particular, dev_activate() may not
+- * immediately be called, which can cause packet loss; also the etherbridge
+- * can be rather lazy in activating its port).
+- */
+-#define netback_carrier_on(netif) ((netif)->carrier = 1)
+-#define netback_carrier_off(netif) ((netif)->carrier = 0)
+-#define netback_carrier_ok(netif) ((netif)->carrier)
+-
+-enum {
+- NETBK_DONT_COPY_SKB,
+- NETBK_DELAYED_COPY_SKB,
+- NETBK_ALWAYS_COPY_SKB,
+-};
+-
+-extern int netbk_copy_skb_mode;
+-
+-struct backend_info {
+- struct xenbus_device *dev;
+- struct xen_netif *netif;
+- enum xenbus_state frontend_state;
+- struct xenbus_watch hotplug_status_watch;
+- int have_hotplug_status_watch:1;
+-};
+-
+-#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
+-#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
+-
+-void netif_disconnect(struct xen_netif *netif);
+-
+-void netif_set_features(struct xen_netif *netif);
+-struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
+- unsigned int handle);
+-int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+- unsigned long rx_ring_ref, unsigned int evtchn);
+-
+-static inline void netif_get(struct xen_netif *netif)
+-{
+- atomic_inc(&netif->refcnt);
+-}
+-
+-static inline void netif_put(struct xen_netif *netif)
+-{
+- if (atomic_dec_and_test(&netif->refcnt))
+- wake_up(&netif->waiting_to_free);
+-}
+-
+-int netif_xenbus_init(void);
+-
+-#define netif_schedulable(netif) \
+- (netif_running((netif)->dev) && netback_carrier_ok(netif))
+-
+-void netif_schedule_work(struct xen_netif *netif);
+-void netif_deschedule_work(struct xen_netif *netif);
+-
+-int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
+-struct net_device_stats *netif_be_get_stats(struct net_device *dev);
+-irqreturn_t netif_be_int(int irq, void *dev_id);
+-
+-static inline int netbk_can_queue(struct net_device *dev)
+-{
+- struct xen_netif *netif = netdev_priv(dev);
+- return netif->can_queue;
+-}
+-
+-static inline int netbk_can_sg(struct net_device *dev)
+-{
+- struct xen_netif *netif = netdev_priv(dev);
+- return netif->can_sg;
+-}
+-
+-struct pending_tx_info {
+- struct xen_netif_tx_request req;
+- struct xen_netif *netif;
+-};
+-typedef unsigned int pending_ring_idx_t;
+-
+-struct netbk_rx_meta {
+- int id;
+- int size;
+- int gso_size;
+-};
+-
+-struct netbk_tx_pending_inuse {
+- struct list_head list;
+- unsigned long alloc_time;
+-};
+-
+-#define MAX_PENDING_REQS 256
+-
+-#define MAX_BUFFER_OFFSET PAGE_SIZE
+-
+-/* extra field used in struct page */
+-union page_ext {
+- struct {
+-#if BITS_PER_LONG < 64
+-#define IDX_WIDTH 8
+-#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
+- unsigned int group:GROUP_WIDTH;
+- unsigned int idx:IDX_WIDTH;
+-#else
+- unsigned int group, idx;
+-#endif
+- } e;
+- void *mapping;
+-};
+-
+-struct xen_netbk {
+- union {
+- struct {
+- struct tasklet_struct net_tx_tasklet;
+- struct tasklet_struct net_rx_tasklet;
+- } tasklet;
+-
+- struct {
+- wait_queue_head_t netbk_action_wq;
+- struct task_struct *task;
+- } kthread;
+- };
+-
+- struct sk_buff_head rx_queue;
+- struct sk_buff_head tx_queue;
+-
+- struct timer_list net_timer;
+- struct timer_list netbk_tx_pending_timer;
+-
+- struct page **mmap_pages;
+-
+- pending_ring_idx_t pending_prod;
+- pending_ring_idx_t pending_cons;
+- pending_ring_idx_t dealloc_prod;
+- pending_ring_idx_t dealloc_cons;
+-
+- struct list_head pending_inuse_head;
+- struct list_head net_schedule_list;
+-
+- /* Protect the net_schedule_list in netif. */
+- spinlock_t net_schedule_list_lock;
+-
+- atomic_t netfront_count;
+-
+- struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+- struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+- struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+- struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+-
+- grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+- u16 pending_ring[MAX_PENDING_REQS];
+- u16 dealloc_ring[MAX_PENDING_REQS];
+-
+- /*
+- * Each head or fragment can be up to 4096 bytes. Given
+- * MAX_BUFFER_OFFSET of 4096 the worst case is that each
+- * head/fragment uses 2 copy operation.
+- */
+- struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
+- unsigned char rx_notify[NR_IRQS];
+- u16 notify_list[NET_RX_RING_SIZE];
+- struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
+-};
+-
+-extern struct xen_netbk *xen_netbk;
+-extern int xen_netbk_group_nr;
+-
+-#endif /* __NETIF__BACKEND__COMMON_H__ */
+diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
+deleted file mode 100644
+index c36db26..0000000
+--- a/drivers/xen/netback/interface.c
++++ /dev/null
+@@ -1,465 +0,0 @@
+-/******************************************************************************
+- * arch/xen/drivers/netif/backend/interface.c
+- *
+- * Network-device interface management.
+- *
+- * Copyright (c) 2004-2005, Keir Fraser
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License version 2
+- * as published by the Free Software Foundation; or, when distributed
+- * separately from the Linux kernel or incorporated into other
+- * software packages, subject to the following license:
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a copy
+- * of this source file (the "Software"), to deal in the Software without
+- * restriction, including without limitation the rights to use, copy, modify,
+- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+- * and to permit persons to whom the Software is furnished to do so, subject to
+- * the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included in
+- * all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+- * IN THE SOFTWARE.
+- */
+-
+-#include "common.h"
+-
+-#include <linux/ethtool.h>
+-#include <linux/rtnetlink.h>
+-
+-#include <xen/events.h>
+-#include <asm/xen/hypercall.h>
+-
+-/*
+- * Module parameter 'queue_length':
+- *
+- * Enables queuing in the network stack when a client has run out of receive
+- * descriptors.
+- */
+-static unsigned long netbk_queue_length = 32;
+-module_param_named(queue_length, netbk_queue_length, ulong, 0644);
+-
+-static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
+- struct xen_netif *netif)
+-{
+- int i;
+- int min_netfront_count;
+- int min_group = 0;
+- min_netfront_count = atomic_read(&netbk[0].netfront_count);
+- for (i = 0; i < group_nr; i++) {
+- int netfront_count = atomic_read(&netbk[i].netfront_count);
+- if (netfront_count < min_netfront_count) {
+- min_group = i;
+- min_netfront_count = netfront_count;
+- }
+- }
+-
+- netif->group = min_group;
+- atomic_inc(&netbk[netif->group].netfront_count);
+-}
+-
+-static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
+-{
+- atomic_dec(&netbk[netif->group].netfront_count);
+-}
+-
+-static void __netif_up(struct xen_netif *netif)
+-{
+- netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
+- enable_irq(netif->irq);
+- netif_schedule_work(netif);
+-}
+-
+-static void __netif_down(struct xen_netif *netif)
+-{
+- disable_irq(netif->irq);
+- netif_deschedule_work(netif);
+- netbk_remove_netif(xen_netbk, netif);
+-}
+-
+-static int net_open(struct net_device *dev)
+-{
+- struct xen_netif *netif = netdev_priv(dev);
+- if (netback_carrier_ok(netif)) {
+- __netif_up(netif);
+- netif_start_queue(dev);
+- }
+- return 0;
+-}
+-
+-static int net_close(struct net_device *dev)
+-{
+- struct xen_netif *netif = netdev_priv(dev);
+- if (netback_carrier_ok(netif))
+- __netif_down(netif);
+- netif_stop_queue(dev);
+- return 0;
+-}
+-
+-static int netbk_change_mtu(struct net_device *dev, int mtu)
+-{
+- int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+-
+- if (mtu > max)
+- return -EINVAL;
+- dev->mtu = mtu;
+- return 0;
+-}
+-
+-void netif_set_features(struct xen_netif *netif)
+-{
+- struct net_device *dev = netif->dev;
+- int features = dev->features;
+-
+- if (netif->can_sg)
+- features |= NETIF_F_SG;
+- if (netif->gso || netif->gso_prefix)
+- features |= NETIF_F_TSO;
+- if (netif->csum)
+- features |= NETIF_F_IP_CSUM;
+-
+- features &= ~(netif->features_disabled);
+-
+- if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
+- dev->mtu = ETH_DATA_LEN;
+-
+- dev->features = features;
+-}
+-
+-static int netbk_set_tx_csum(struct net_device *dev, u32 data)
+-{
+- struct xen_netif *netif = netdev_priv(dev);
+- if (data) {
+- if (!netif->csum)
+- return -ENOSYS;
+- netif->features_disabled &= ~NETIF_F_IP_CSUM;
+- } else {
+- netif->features_disabled |= NETIF_F_IP_CSUM;
+- }
+-
+- netif_set_features(netif);
+- return 0;
+-}
+-
+-static int netbk_set_sg(struct net_device *dev, u32 data)
+-{
+- struct xen_netif *netif = netdev_priv(dev);
+- if (data) {
+- if (!netif->can_sg)
+- return -ENOSYS;
+- netif->features_disabled &= ~NETIF_F_SG;
+- } else {
+- netif->features_disabled |= NETIF_F_SG;
+- }
+-
+- netif_set_features(netif);
+- return 0;
+-}
+-
+-static int netbk_set_tso(struct net_device *dev, u32 data)
+-{
+- struct xen_netif *netif = netdev_priv(dev);
+- if (data) {
+- if (!netif->gso && !netif->gso_prefix)
+- return -ENOSYS;
+- netif->features_disabled &= ~NETIF_F_TSO;
+- } else {
+- netif->features_disabled |= NETIF_F_TSO;
+- }
+-
+- netif_set_features(netif);
+- return 0;
+-}
+-
+-static void netbk_get_drvinfo(struct net_device *dev,
+- struct ethtool_drvinfo *info)
+-{
+- strcpy(info->driver, "netbk");
+- strcpy(info->bus_info, dev_name(dev->dev.parent));
+-}
+-
+-static const struct netif_stat {
+- char name[ETH_GSTRING_LEN];
+- u16 offset;
+-} netbk_stats[] = {
+- { "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
+-};
+-
+-static int netbk_get_sset_count(struct net_device *dev, int string_set)
+-{
+- switch (string_set) {
+- case ETH_SS_STATS:
+- return ARRAY_SIZE(netbk_stats);
+- default:
+- return -EINVAL;
+- }
+-}
+-
+-static void netbk_get_ethtool_stats(struct net_device *dev,
+- struct ethtool_stats *stats, u64 * data)
+-{
+- void *netif = netdev_priv(dev);
+- int i;
+-
+- for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+- data[i] = *(int *)(netif + netbk_stats[i].offset);
+-}
+-
+-static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
+-{
+- int i;
+-
+- switch (stringset) {
+- case ETH_SS_STATS:
+- for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+- memcpy(data + i * ETH_GSTRING_LEN,
+- netbk_stats[i].name, ETH_GSTRING_LEN);
+- break;
+- }
+-}
+-
+-static struct ethtool_ops network_ethtool_ops = {
+- .get_drvinfo = netbk_get_drvinfo,
+-
+- .get_tx_csum = ethtool_op_get_tx_csum,
+- .set_tx_csum = netbk_set_tx_csum,
+- .get_sg = ethtool_op_get_sg,
+- .set_sg = netbk_set_sg,
+- .get_tso = ethtool_op_get_tso,
+- .set_tso = netbk_set_tso,
+- .get_link = ethtool_op_get_link,
+-
+- .get_sset_count = netbk_get_sset_count,
+- .get_ethtool_stats = netbk_get_ethtool_stats,
+- .get_strings = netbk_get_strings,
+-};
+-
+-static struct net_device_ops netback_ops = {
+- .ndo_start_xmit = netif_be_start_xmit,
+- .ndo_get_stats = netif_be_get_stats,
+- .ndo_open = net_open,
+- .ndo_stop = net_close,
+- .ndo_change_mtu = netbk_change_mtu,
+-};
+-
+-struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
+- unsigned int handle)
+-{
+- int err = 0;
+- struct net_device *dev;
+- struct xen_netif *netif;
+- char name[IFNAMSIZ] = {};
+-
+- snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
+- dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
+- if (dev == NULL) {
+- pr_debug("Could not allocate netdev\n");
+- return ERR_PTR(-ENOMEM);
+- }
+-
+- SET_NETDEV_DEV(dev, parent);
+-
+- netif = netdev_priv(dev);
+- memset(netif, 0, sizeof(*netif));
+- netif->domid = domid;
+- netif->group = -1;
+- netif->handle = handle;
+- netif->can_sg = 1;
+- netif->csum = 1;
+- atomic_set(&netif->refcnt, 1);
+- init_waitqueue_head(&netif->waiting_to_free);
+- netif->dev = dev;
+- INIT_LIST_HEAD(&netif->list);
+-
+- netback_carrier_off(netif);
+-
+- netif->credit_bytes = netif->remaining_credit = ~0UL;
+- netif->credit_usec = 0UL;
+- init_timer(&netif->credit_timeout);
+- /* Initialize 'expires' now: it's used to track the credit window. */
+- netif->credit_timeout.expires = jiffies;
+-
+- dev->netdev_ops = &netback_ops;
+- netif_set_features(netif);
+- SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+-
+- dev->tx_queue_len = netbk_queue_length;
+-
+- /*
+- * Initialise a dummy MAC address. We choose the numerically
+- * largest non-broadcast address to prevent the address getting
+- * stolen by an Ethernet bridge for STP purposes.
+- * (FE:FF:FF:FF:FF:FF)
+- */
+- memset(dev->dev_addr, 0xFF, ETH_ALEN);
+- dev->dev_addr[0] &= ~0x01;
+-
+- rtnl_lock();
+- err = register_netdevice(dev);
+- rtnl_unlock();
+- if (err) {
+- pr_debug("Could not register new net device %s: err=%d\n",
+- dev->name, err);
+- free_netdev(dev);
+- return ERR_PTR(err);
+- }
+-
+- pr_debug("Successfully created netif\n");
+- return netif;
+-}
+-
+-static int map_frontend_pages(struct xen_netif *netif,
+- grant_ref_t tx_ring_ref,
+- grant_ref_t rx_ring_ref)
+-{
+- struct gnttab_map_grant_ref op;
+-
+- gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
+- GNTMAP_host_map, tx_ring_ref, netif->domid);
+-
+- if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+- BUG();
+-
+- if (op.status) {
+- pr_debug("Gnttab failure mapping tx_ring_ref!\n");
+- return op.status;
+- }
+-
+- netif->tx_shmem_ref = tx_ring_ref;
+- netif->tx_shmem_handle = op.handle;
+-
+- gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
+- GNTMAP_host_map, rx_ring_ref, netif->domid);
+-
+- if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+- BUG();
+-
+- if (op.status) {
+- struct gnttab_unmap_grant_ref unop;
+-
+- gnttab_set_unmap_op(&unop,
+- (unsigned long)netif->tx_comms_area->addr,
+- GNTMAP_host_map, netif->tx_shmem_handle);
+- HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
+- pr_debug("Gnttab failure mapping rx_ring_ref!\n");
+- return op.status;
+- }
+-
+- netif->rx_shmem_ref = rx_ring_ref;
+- netif->rx_shmem_handle = op.handle;
+-
+- return 0;
+-}
+-
+-static void unmap_frontend_pages(struct xen_netif *netif)
+-{
+- struct gnttab_unmap_grant_ref op;
+-
+- gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
+- GNTMAP_host_map, netif->tx_shmem_handle);
+-
+- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+- BUG();
+-
+- gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
+- GNTMAP_host_map, netif->rx_shmem_handle);
+-
+- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+- BUG();
+-}
+-
+-int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+- unsigned long rx_ring_ref, unsigned int evtchn)
+-{
+- int err = -ENOMEM;
+- struct xen_netif_tx_sring *txs;
+- struct xen_netif_rx_sring *rxs;
+-
+- /* Already connected through? */
+- if (netif->irq)
+- return 0;
+-
+- netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
+- if (netif->tx_comms_area == NULL)
+- return -ENOMEM;
+- netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
+- if (netif->rx_comms_area == NULL)
+- goto err_rx;
+-
+- err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
+- if (err)
+- goto err_map;
+-
+- err = bind_interdomain_evtchn_to_irqhandler(
+- netif->domid, evtchn, netif_be_int, 0,
+- netif->dev->name, netif);
+- if (err < 0)
+- goto err_hypervisor;
+- netif->irq = err;
+- disable_irq(netif->irq);
+-
+- txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
+- BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+-
+- rxs = (struct xen_netif_rx_sring *)
+- ((char *)netif->rx_comms_area->addr);
+- BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+-
+- netif->rx_req_cons_peek = 0;
+-
+- netif_get(netif);
+-
+- rtnl_lock();
+- netback_carrier_on(netif);
+- if (netif_running(netif->dev))
+- __netif_up(netif);
+- rtnl_unlock();
+-
+- return 0;
+-err_hypervisor:
+- unmap_frontend_pages(netif);
+-err_map:
+- free_vm_area(netif->rx_comms_area);
+-err_rx:
+- free_vm_area(netif->tx_comms_area);
+- return err;
+-}
+-
+-void netif_disconnect(struct xen_netif *netif)
+-{
+- if (netback_carrier_ok(netif)) {
+- rtnl_lock();
+- netback_carrier_off(netif);
+- netif_carrier_off(netif->dev); /* discard queued packets */
+- if (netif_running(netif->dev))
+- __netif_down(netif);
+- rtnl_unlock();
+- netif_put(netif);
+- }
+-
+- atomic_dec(&netif->refcnt);
+- wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
+-
+- del_timer_sync(&netif->credit_timeout);
+-
+- if (netif->irq)
+- unbind_from_irqhandler(netif->irq, netif);
+-
+- unregister_netdev(netif->dev);
+-
+- if (netif->tx.sring) {
+- unmap_frontend_pages(netif);
+- free_vm_area(netif->tx_comms_area);
+- free_vm_area(netif->rx_comms_area);
+- }
+-
+- free_netdev(netif->dev);
+-}
+diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
+deleted file mode 100644
+index e0ca232..0000000
+--- a/drivers/xen/netback/netback.c
++++ /dev/null
+@@ -1,1909 +0,0 @@
+-/*
+- * Back-end of the driver for virtual network devices. This portion of the
+- * driver exports a 'unified' network-device interface that can be accessed
+- * by any operating system that implements a compatible front end. A
+- * reference front-end implementation can be found in:
+- * drivers/net/xen-netfront.c
+- *
+- * Copyright (c) 2002-2005, K A Fraser
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License version 2
+- * as published by the Free Software Foundation; or, when distributed
+- * separately from the Linux kernel or incorporated into other
+- * software packages, subject to the following license:
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a copy
+- * of this source file (the "Software"), to deal in the Software without
+- * restriction, including without limitation the rights to use, copy, modify,
+- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+- * and to permit persons to whom the Software is furnished to do so, subject to
+- * the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included in
+- * all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+- * IN THE SOFTWARE.
+- */
+-
+-#include "common.h"
+-
+-#include <linux/kthread.h>
+-#include <linux/if_vlan.h>
+-#include <linux/udp.h>
+-
+-#include <net/tcp.h>
+-
+-#include <xen/balloon.h>
+-#include <xen/events.h>
+-#include <xen/interface/memory.h>
+-
+-#include <asm/xen/hypercall.h>
+-#include <asm/xen/page.h>
+-
+-/*define NETBE_DEBUG_INTERRUPT*/
+-
+-struct xen_netbk *xen_netbk;
+-int xen_netbk_group_nr;
+-
+-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
+-static void make_tx_response(struct xen_netif *netif,
+- struct xen_netif_tx_request *txp,
+- s8 st);
+-static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+- u16 id,
+- s8 st,
+- u16 offset,
+- u16 size,
+- u16 flags);
+-
+-static void net_tx_action(unsigned long data);
+-
+-static void net_rx_action(unsigned long data);
+-
+-static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
+- unsigned int idx)
+-{
+- return page_to_pfn(netbk->mmap_pages[idx]);
+-}
+-
+-static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
+- unsigned int idx)
+-{
+- return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
+-}
+-
+-/* extra field used in struct page */
+-static inline void netif_set_page_ext(struct page *pg,
+- unsigned int group, unsigned int idx)
+-{
+- union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
+-
+- BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
+- pg->mapping = ext.mapping;
+-}
+-
+-static int netif_get_page_ext(struct page *pg,
+- unsigned int *_group, unsigned int *_idx)
+-{
+- union page_ext ext = { .mapping = pg->mapping };
+- struct xen_netbk *netbk;
+- unsigned int group, idx;
+-
+- if (!PageForeign(pg))
+- return 0;
+-
+- group = ext.e.group - 1;
+-
+- if (group < 0 || group >= xen_netbk_group_nr)
+- return 0;
+-
+- netbk = &xen_netbk[group];
+-
+- if (netbk->mmap_pages == NULL)
+- return 0;
+-
+- idx = ext.e.idx;
+-
+- if ((idx < 0) || (idx >= MAX_PENDING_REQS))
+- return 0;
+-
+- if (netbk->mmap_pages[idx] != pg)
+- return 0;
+-
+- *_group = group;
+- *_idx = idx;
+-
+- return 1;
+-}
+-
+-/*
+- * This is the amount of packet we copy rather than map, so that the
+- * guest can't fiddle with the contents of the headers while we do
+- * packet processing on them (netfilter, routing, etc).
+- */
+-#define PKT_PROT_LEN (ETH_HLEN + \
+- VLAN_HLEN + \
+- sizeof(struct iphdr) + MAX_IPOPTLEN + \
+- sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
+-
+-static inline pending_ring_idx_t pending_index(unsigned i)
+-{
+- return i & (MAX_PENDING_REQS-1);
+-}
+-
+-static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
+-{
+- return MAX_PENDING_REQS -
+- netbk->pending_prod + netbk->pending_cons;
+-}
+-
+-/* Setting this allows the safe use of this driver without netloop. */
+-static int MODPARM_copy_skb = 1;
+-module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
+-MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+-
+-int netbk_copy_skb_mode;
+-
+-static int MODPARM_netback_kthread;
+-module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
+-MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
+-
+-/*
+- * Netback bottom half handler.
+- * dir indicates the data direction.
+- * rx: 1, tx: 0.
+- */
+-static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
+-{
+- if (MODPARM_netback_kthread)
+- wake_up(&netbk->kthread.netbk_action_wq);
+- else if (dir)
+- tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
+- else
+- tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
+-}
+-
+-static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
+-{
+- smp_mb();
+- if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
+- !list_empty(&netbk->net_schedule_list))
+- xen_netbk_bh_handler(netbk, 0);
+-}
+-
+-static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+-{
+- struct skb_shared_info *ninfo;
+- struct sk_buff *nskb;
+- unsigned long offset;
+- int ret;
+- int len;
+- int headlen;
+-
+- BUG_ON(skb_shinfo(skb)->frag_list != NULL);
+-
+- nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
+- if (unlikely(!nskb))
+- goto err;
+-
+- skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
+- headlen = skb_end_pointer(nskb) - nskb->data;
+- if (headlen > skb_headlen(skb))
+- headlen = skb_headlen(skb);
+- ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+- BUG_ON(ret);
+-
+- ninfo = skb_shinfo(nskb);
+- ninfo->gso_size = skb_shinfo(skb)->gso_size;
+- ninfo->gso_type = skb_shinfo(skb)->gso_type;
+-
+- offset = headlen;
+- len = skb->len - headlen;
+-
+- nskb->len = skb->len;
+- nskb->data_len = len;
+- nskb->truesize += len;
+-
+- while (len) {
+- struct page *page;
+- int copy;
+- int zero;
+-
+- if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
+- dump_stack();
+- goto err_free;
+- }
+-
+- copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
+- zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
+-
+- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
+- if (unlikely(!page))
+- goto err_free;
+-
+- ret = skb_copy_bits(skb, offset, page_address(page), copy);
+- BUG_ON(ret);
+-
+- ninfo->frags[ninfo->nr_frags].page = page;
+- ninfo->frags[ninfo->nr_frags].page_offset = 0;
+- ninfo->frags[ninfo->nr_frags].size = copy;
+- ninfo->nr_frags++;
+-
+- offset += copy;
+- len -= copy;
+- }
+-
+-#ifdef NET_SKBUFF_DATA_USES_OFFSET
+- offset = 0;
+-#else
+- offset = nskb->data - skb->data;
+-#endif
+-
+- nskb->transport_header = skb->transport_header + offset;
+- nskb->network_header = skb->network_header + offset;
+- nskb->mac_header = skb->mac_header + offset;
+-
+- return nskb;
+-
+- err_free:
+- kfree_skb(nskb);
+- err:
+- return NULL;
+-}
+-
+-static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+-{
+- if (netif->can_sg || netif->gso || netif->gso_prefix)
+- return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
+- return 1; /* all in one */
+-}
+-
+-static inline int netbk_queue_full(struct xen_netif *netif)
+-{
+- RING_IDX peek = netif->rx_req_cons_peek;
+- RING_IDX needed = netbk_max_required_rx_slots(netif);
+-
+- return ((netif->rx.sring->req_prod - peek) < needed) ||
+- ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
+-}
+-
+-/*
+- * Returns true if we should start a new receive buffer instead of
+- * adding 'size' bytes to a buffer which currently contains 'offset'
+- * bytes.
+- */
+-static bool start_new_rx_buffer(int offset, unsigned long size, int head)
+-{
+- /* simple case: we have completely filled the current buffer. */
+- if (offset == MAX_BUFFER_OFFSET)
+- return true;
+-
+- /*
+- * complex case: start a fresh buffer if the current frag
+- * would overflow the current buffer but only if:
+- * (i) this frag would fit completely in the next buffer
+- * and (ii) there is already some data in the current buffer
+- * and (iii) this is not the head buffer.
+- *
+- * Where:
+- * - (i) stops us splitting a frag into two copies
+- * unless the frag is too large for a single buffer.
+- * - (ii) stops us from leaving a buffer pointlessly empty.
+- * - (iii) stops us leaving the first buffer
+- * empty. Strictly speaking this is already covered
+- * by (ii) but is explicitly checked because
+- * netfront relies on the first buffer being
+- * non-empty and can crash otherwise.
+- *
+- * This means we will effectively linearise small
+- * frags but do not needlessly split large buffers
+- * into multiple copies tend to give large frags their
+- * own buffers as before.
+- */
+- if ((offset + size > MAX_BUFFER_OFFSET) &&
+- (size <= MAX_BUFFER_OFFSET) && offset && !head)
+- return true;
+-
+- return false;
+-}
+-
+-/*
+- * Figure out how many ring slots we're going to need to send @skb to
+- * the guest. This function is essentially a dry run of
+- * netbk_gop_frag_copy.
+- */
+-static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
+-{
+- unsigned int count = 1;
+- int i, copy_off = 0;
+-
+- BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);
+-
+- copy_off = skb_headlen(skb);
+-
+- if (skb_shinfo(skb)->gso_size)
+- count++;
+-
+- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+- unsigned long size = skb_shinfo(skb)->frags[i].size;
+- unsigned long bytes;
+- while (size > 0) {
+- BUG_ON(copy_off > MAX_BUFFER_OFFSET);
+-
+- if (start_new_rx_buffer(copy_off, size, 0)) {
+- count++;
+- copy_off = 0;
+- }
+-
+- bytes = size;
+- if (copy_off + bytes > MAX_BUFFER_OFFSET)
+- bytes = MAX_BUFFER_OFFSET - copy_off;
+-
+- copy_off += bytes;
+- size -= bytes;
+- }
+- }
+- return count;
+-}
+-
+-int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+-{
+- struct xen_netif *netif = netdev_priv(dev);
+- struct xen_netbk *netbk;
+-
+- BUG_ON(skb->dev != dev);
+-
+- if (netif->group == -1)
+- goto drop;
+-
+- netbk = &xen_netbk[netif->group];
+-
+- /* Drop the packet if the target domain has no receive buffers. */
+- if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
+- goto drop;
+-
+- /*
+- * XXX For now we also copy skbuffs whose head crosses a page
+- * boundary, because netbk_gop_skb can't handle them.
+- */
+- if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
+- struct sk_buff *nskb = netbk_copy_skb(skb);
+- if (unlikely(nskb == NULL))
+- goto drop;
+- /* Copy only the header fields we use in this driver. */
+- nskb->dev = skb->dev;
+- nskb->ip_summed = skb->ip_summed;
+- dev_kfree_skb(skb);
+- skb = nskb;
+- }
+-
+- /* Reserve ring slots for the worst-case number of fragments. */
+- netif->rx_req_cons_peek += count_skb_slots(skb, netif);
+- netif_get(netif);
+-
+- if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
+- netif->rx.sring->req_event = netif->rx_req_cons_peek +
+- netbk_max_required_rx_slots(netif);
+- mb(); /* request notification /then/ check & stop the queue */
+- if (netbk_queue_full(netif))
+- netif_stop_queue(dev);
+- }
+- skb_queue_tail(&netbk->rx_queue, skb);
+-
+- xen_netbk_bh_handler(netbk, 1);
+-
+- return 0;
+-
+- drop:
+- netif->stats.tx_dropped++;
+- dev_kfree_skb(skb);
+- return 0;
+-}
+-
+-struct netrx_pending_operations {
+- unsigned copy_prod, copy_cons;
+- unsigned meta_prod, meta_cons;
+- struct gnttab_copy *copy;
+- struct netbk_rx_meta *meta;
+- int copy_off;
+- grant_ref_t copy_gref;
+-};
+-
+-static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
+- struct netrx_pending_operations *npo)
+-{
+- struct netbk_rx_meta *meta;
+- struct xen_netif_rx_request *req;
+-
+- req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+-
+- meta = npo->meta + npo->meta_prod++;
+- meta->gso_size = 0;
+- meta->size = 0;
+- meta->id = req->id;
+-
+- npo->copy_off = 0;
+- npo->copy_gref = req->gref;
+-
+- return meta;
+-}
+-
+-/*
+- * Set up the grant operations for this fragment. If it's a flipping
+- * interface, we also set up the unmap request from here.
+- */
+-static void netbk_gop_frag_copy(struct xen_netif *netif,
+- struct netrx_pending_operations *npo,
+- struct page *page, unsigned long size,
+- unsigned long offset, int head)
+-{
+- struct gnttab_copy *copy_gop;
+- struct netbk_rx_meta *meta;
+- /*
+- * These variables a used iff netif_get_page_ext returns true,
+- * in which case they are guaranteed to be initialized.
+- */
+- unsigned int uninitialized_var(group), uninitialized_var(idx);
+- int foreign = netif_get_page_ext(page, &group, &idx);
+- unsigned long bytes;
+-
+- /* Data must not cross a page boundary. */
+- BUG_ON(size + offset > PAGE_SIZE);
+-
+- meta = npo->meta + npo->meta_prod - 1;
+-
+- while (size > 0) {
+- BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+-
+- if (start_new_rx_buffer(npo->copy_off, size, head)) {
+- /*
+- * Netfront requires there to be some data in
+- * the head buffer.
+- */
+- BUG_ON(head);
+-
+- meta = get_next_rx_buffer(netif, npo);
+- }
+-
+- bytes = size;
+- if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
+- bytes = MAX_BUFFER_OFFSET - npo->copy_off;
+-
+- copy_gop = npo->copy + npo->copy_prod++;
+- copy_gop->flags = GNTCOPY_dest_gref;
+- if (foreign) {
+- struct xen_netbk *netbk = &xen_netbk[group];
+- struct pending_tx_info *src_pend;
+-
+- src_pend = &netbk->pending_tx_info[idx];
+-
+- copy_gop->source.domid = src_pend->netif->domid;
+- copy_gop->source.u.ref = src_pend->req.gref;
+- copy_gop->flags |= GNTCOPY_source_gref;
+- } else {
+- void *vaddr = page_address(page);
+- copy_gop->source.domid = DOMID_SELF;
+- copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
+- }
+- copy_gop->source.offset = offset;
+- copy_gop->dest.domid = netif->domid;
+-
+- copy_gop->dest.offset = npo->copy_off;
+- copy_gop->dest.u.ref = npo->copy_gref;
+- copy_gop->len = bytes;
+-
+- npo->copy_off += bytes;
+- meta->size += bytes;
+-
+- offset += bytes;
+- size -= bytes;
+- head = 0; /* There must be something in this buffer now. */
+- }
+-}
+-
+-/*
+- * Prepare an SKB to be transmitted to the frontend.
+- *
+- * This function is responsible for allocating grant operations, meta
+- * structures, etc.
+- *
+- * It returns the number of meta structures consumed. The number of
+- * ring slots used is always equal to the number of meta slots used
+- * plus the number of GSO descriptors used. Currently, we use either
+- * zero GSO descriptors (for non-GSO packets) or one descriptor (for
+- * frontend-side LRO).
+- */
+-static int netbk_gop_skb(struct sk_buff *skb,
+- struct netrx_pending_operations *npo)
+-{
+- struct xen_netif *netif = netdev_priv(skb->dev);
+- int nr_frags = skb_shinfo(skb)->nr_frags;
+- int i;
+- struct xen_netif_rx_request *req;
+- struct netbk_rx_meta *meta;
+- int old_meta_prod;
+-
+- old_meta_prod = npo->meta_prod;
+-
+- /* Set up a GSO prefix descriptor, if necessary */
+- if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
+- req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+- meta = npo->meta + npo->meta_prod++;
+- meta->gso_size = skb_shinfo(skb)->gso_size;
+- meta->size = 0;
+- meta->id = req->id;
+- }
+-
+- req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+- meta = npo->meta + npo->meta_prod++;
+-
+- if (!netif->gso_prefix)
+- meta->gso_size = skb_shinfo(skb)->gso_size;
+- else
+- meta->gso_size = 0;
+-
+- meta->size = 0;
+- meta->id = req->id;
+- npo->copy_off = 0;
+- npo->copy_gref = req->gref;
+-
+- netbk_gop_frag_copy(netif,
+- npo, virt_to_page(skb->data),
+- skb_headlen(skb),
+- offset_in_page(skb->data), 1);
+-
+- /* Leave a gap for the GSO descriptor. */
+- if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
+- netif->rx.req_cons++;
+-
+- for (i = 0; i < nr_frags; i++) {
+- netbk_gop_frag_copy(netif, npo,
+- skb_shinfo(skb)->frags[i].page,
+- skb_shinfo(skb)->frags[i].size,
+- skb_shinfo(skb)->frags[i].page_offset,
+- 0);
+- }
+-
+- return npo->meta_prod - old_meta_prod;
+-}
+-
+-/*
+- * This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
+- * used to set up the operations on the top of
+- * netrx_pending_operations, which have since been done. Check that
+- * they didn't give any errors and advance over them.
+- */
+-static int netbk_check_gop(int nr_meta_slots, domid_t domid,
+- struct netrx_pending_operations *npo)
+-{
+- struct gnttab_copy *copy_op;
+- int status = NETIF_RSP_OKAY;
+- int i;
+-
+- for (i = 0; i < nr_meta_slots; i++) {
+- copy_op = npo->copy + npo->copy_cons++;
+- if (copy_op->status != GNTST_okay) {
+- pr_debug("Bad status %d from copy to DOM%d.\n",
+- copy_op->status, domid);
+- status = NETIF_RSP_ERROR;
+- }
+- }
+-
+- return status;
+-}
+-
+-static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+- struct netbk_rx_meta *meta,
+- int nr_meta_slots)
+-{
+- int i;
+- unsigned long offset;
+-
+- /* No fragments used */
+- if (nr_meta_slots <= 1)
+- return;
+-
+- nr_meta_slots--;
+-
+- for (i = 0; i < nr_meta_slots; i++) {
+- int flags;
+- if (i == nr_meta_slots - 1)
+- flags = 0;
+- else
+- flags = NETRXF_more_data;
+-
+- offset = 0;
+- make_rx_response(netif, meta[i].id, status, offset,
+- meta[i].size, flags);
+- }
+-}
+-
+-struct skb_cb_overlay {
+- int meta_slots_used;
+-};
+-
+-static void net_rx_action(unsigned long data)
+-{
+- struct xen_netif *netif = NULL;
+- struct xen_netbk *netbk = (struct xen_netbk *)data;
+- s8 status;
+- u16 irq, flags;
+- struct xen_netif_rx_response *resp;
+- struct sk_buff_head rxq;
+- struct sk_buff *skb;
+- int notify_nr = 0;
+- int ret;
+- int nr_frags;
+- int count;
+- unsigned long offset;
+- struct skb_cb_overlay *sco;
+-
+- struct netrx_pending_operations npo = {
+- .copy = netbk->grant_copy_op,
+- .meta = netbk->meta,
+- };
+-
+- skb_queue_head_init(&rxq);
+-
+- count = 0;
+-
+- while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
+- netif = netdev_priv(skb->dev);
+- nr_frags = skb_shinfo(skb)->nr_frags;
+-
+- sco = (struct skb_cb_overlay *)skb->cb;
+- sco->meta_slots_used = netbk_gop_skb(skb, &npo);
+-
+- count += nr_frags + 1;
+-
+- __skb_queue_tail(&rxq, skb);
+-
+- /* Filled the batch queue? */
+- if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
+- break;
+- }
+-
+- BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
+-
+- if (!npo.copy_prod)
+- return;
+-
+- BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
+- ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
+- npo.copy_prod);
+- BUG_ON(ret != 0);
+-
+- while ((skb = __skb_dequeue(&rxq)) != NULL) {
+- sco = (struct skb_cb_overlay *)skb->cb;
+-
+- netif = netdev_priv(skb->dev);
+-
+- if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
+- resp = RING_GET_RESPONSE(&netif->rx,
+- netif->rx.rsp_prod_pvt++);
+-
+- resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
+-
+- resp->offset = netbk->meta[npo.meta_cons].gso_size;
+- resp->id = netbk->meta[npo.meta_cons].id;
+- resp->status = sco->meta_slots_used;
+-
+- npo.meta_cons++;
+- sco->meta_slots_used--;
+- }
+-
+-
+- netif->stats.tx_bytes += skb->len;
+- netif->stats.tx_packets++;
+-
+- status = netbk_check_gop(sco->meta_slots_used,
+- netif->domid, &npo);
+-
+- if (sco->meta_slots_used == 1)
+- flags = 0;
+- else
+- flags = NETRXF_more_data;
+-
+- if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+- flags |= NETRXF_csum_blank | NETRXF_data_validated;
+- else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+- /* remote but checksummed. */
+- flags |= NETRXF_data_validated;
+-
+- offset = 0;
+- resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
+- status, offset,
+- netbk->meta[npo.meta_cons].size,
+- flags);
+-
+- if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
+- struct xen_netif_extra_info *gso =
+- (struct xen_netif_extra_info *)
+- RING_GET_RESPONSE(&netif->rx,
+- netif->rx.rsp_prod_pvt++);
+-
+- resp->flags |= NETRXF_extra_info;
+-
+- gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
+- gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+- gso->u.gso.pad = 0;
+- gso->u.gso.features = 0;
+-
+- gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
+- gso->flags = 0;
+- }
+-
+- netbk_add_frag_responses(netif, status,
+- netbk->meta + npo.meta_cons + 1,
+- sco->meta_slots_used);
+-
+- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+- irq = netif->irq;
+- if (ret && !netbk->rx_notify[irq]) {
+- netbk->rx_notify[irq] = 1;
+- netbk->notify_list[notify_nr++] = irq;
+- }
+-
+- if (netif_queue_stopped(netif->dev) &&
+- netif_schedulable(netif) &&
+- !netbk_queue_full(netif))
+- netif_wake_queue(netif->dev);
+-
+- netif_put(netif);
+- npo.meta_cons += sco->meta_slots_used;
+- dev_kfree_skb(skb);
+- }
+-
+- while (notify_nr != 0) {
+- irq = netbk->notify_list[--notify_nr];
+- netbk->rx_notify[irq] = 0;
+- notify_remote_via_irq(irq);
+- }
+-
+- /* More work to do? */
+- if (!skb_queue_empty(&netbk->rx_queue) &&
+- !timer_pending(&netbk->net_timer))
+- xen_netbk_bh_handler(netbk, 1);
+-}
+-
+-static void net_alarm(unsigned long data)
+-{
+- struct xen_netbk *netbk = (struct xen_netbk *)data;
+- xen_netbk_bh_handler(netbk, 1);
+-}
+-
+-static void netbk_tx_pending_timeout(unsigned long data)
+-{
+- struct xen_netbk *netbk = (struct xen_netbk *)data;
+- xen_netbk_bh_handler(netbk, 0);
+-}
+-
+-struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+-{
+- struct xen_netif *netif = netdev_priv(dev);
+- return &netif->stats;
+-}
+-
+-static int __on_net_schedule_list(struct xen_netif *netif)
+-{
+- return !list_empty(&netif->list);
+-}
+-
+-/* Must be called with net_schedule_list_lock held */
+-static void remove_from_net_schedule_list(struct xen_netif *netif)
+-{
+- if (likely(__on_net_schedule_list(netif))) {
+- list_del_init(&netif->list);
+- netif_put(netif);
+- }
+-}
+-
+-static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
+-{
+- struct xen_netif *netif = NULL;
+-
+- spin_lock_irq(&netbk->net_schedule_list_lock);
+- if (list_empty(&netbk->net_schedule_list))
+- goto out;
+-
+- netif = list_first_entry(&netbk->net_schedule_list,
+- struct xen_netif, list);
+- if (!netif)
+- goto out;
+-
+- netif_get(netif);
+-
+- remove_from_net_schedule_list(netif);
+-out:
+- spin_unlock_irq(&netbk->net_schedule_list_lock);
+- return netif;
+-}
+-
+-static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+-{
+- unsigned long flags;
+-
+- struct xen_netbk *netbk = &xen_netbk[netif->group];
+- if (__on_net_schedule_list(netif))
+- return;
+-
+- spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
+- if (!__on_net_schedule_list(netif) &&
+- likely(netif_schedulable(netif))) {
+- list_add_tail(&netif->list, &netbk->net_schedule_list);
+- netif_get(netif);
+- }
+- spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
+-}
+-
+-void netif_schedule_work(struct xen_netif *netif)
+-{
+- struct xen_netbk *netbk = &xen_netbk[netif->group];
+- int more_to_do;
+-
+- RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+-
+- if (more_to_do) {
+- add_to_net_schedule_list_tail(netif);
+- maybe_schedule_tx_action(netbk);
+- }
+-}
+-
+-void netif_deschedule_work(struct xen_netif *netif)
+-{
+- struct xen_netbk *netbk = &xen_netbk[netif->group];
+- spin_lock_irq(&netbk->net_schedule_list_lock);
+- remove_from_net_schedule_list(netif);
+- spin_unlock_irq(&netbk->net_schedule_list_lock);
+-}
+-
+-
+-static void tx_add_credit(struct xen_netif *netif)
+-{
+- unsigned long max_burst, max_credit;
+-
+- /*
+- * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
+- * Otherwise the interface can seize up due to insufficient credit.
+- */
+- max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
+- max_burst = min(max_burst, 131072UL);
+- max_burst = max(max_burst, netif->credit_bytes);
+-
+- /* Take care that adding a new chunk of credit doesn't wrap to zero. */
+- max_credit = netif->remaining_credit + netif->credit_bytes;
+- if (max_credit < netif->remaining_credit)
+- max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
+-
+- netif->remaining_credit = min(max_credit, max_burst);
+-}
+-
+-static void tx_credit_callback(unsigned long data)
+-{
+- struct xen_netif *netif = (struct xen_netif *)data;
+- tx_add_credit(netif);
+- netif_schedule_work(netif);
+-}
+-
+-static inline int copy_pending_req(struct xen_netbk *netbk,
+- pending_ring_idx_t pending_idx)
+-{
+- return gnttab_copy_grant_page(
+- netbk->grant_tx_handle[pending_idx],
+- &netbk->mmap_pages[pending_idx]);
+-}
+-
+-static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+-{
+- struct netbk_tx_pending_inuse *inuse, *n;
+- struct gnttab_unmap_grant_ref *gop;
+- u16 pending_idx;
+- pending_ring_idx_t dc, dp;
+- struct xen_netif *netif;
+- int ret;
+- LIST_HEAD(list);
+-
+- dc = netbk->dealloc_cons;
+- gop = netbk->tx_unmap_ops;
+-
+- /* Free up any grants we have finished using. */
+- do {
+- dp = netbk->dealloc_prod;
+-
+- /* Ensure we see all indices enqueued by netif_idx_release(). */
+- smp_rmb();
+-
+- while (dc != dp) {
+- unsigned long pfn;
+- struct netbk_tx_pending_inuse *pending_inuse =
+- netbk->pending_inuse;
+-
+- pending_idx = netbk->dealloc_ring[pending_index(dc++)];
+- list_move_tail(&pending_inuse[pending_idx].list, &list);
+-
+- pfn = idx_to_pfn(netbk, pending_idx);
+- /* Already unmapped? */
+- if (!phys_to_machine_mapping_valid(pfn))
+- continue;
+-
+- gnttab_set_unmap_op(gop,
+- idx_to_kaddr(netbk, pending_idx),
+- GNTMAP_host_map,
+- netbk->grant_tx_handle[pending_idx]);
+- gop++;
+- }
+-
+- } while (dp != netbk->dealloc_prod);
+-
+- netbk->dealloc_cons = dc;
+-
+- ret = HYPERVISOR_grant_table_op(
+- GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
+- gop - netbk->tx_unmap_ops);
+- BUG_ON(ret);
+-
+- /*
+- * Copy any entries that have been pending for too long
+- */
+- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+- !list_empty(&netbk->pending_inuse_head)) {
+- list_for_each_entry_safe(inuse, n,
+- &netbk->pending_inuse_head, list) {
+- struct pending_tx_info *pending_tx_info;
+- pending_tx_info = netbk->pending_tx_info;
+-
+- if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+- break;
+-
+- pending_idx = inuse - netbk->pending_inuse;
+-
+- pending_tx_info[pending_idx].netif->nr_copied_skbs++;
+-
+- switch (copy_pending_req(netbk, pending_idx)) {
+- case 0:
+- list_move_tail(&inuse->list, &list);
+- continue;
+- case -EBUSY:
+- list_del_init(&inuse->list);
+- continue;
+- case -ENOENT:
+- continue;
+- }
+-
+- break;
+- }
+- }
+-
+- list_for_each_entry_safe(inuse, n, &list, list) {
+- struct pending_tx_info *pending_tx_info;
+- pending_ring_idx_t index;
+-
+- pending_tx_info = netbk->pending_tx_info;
+- pending_idx = inuse - netbk->pending_inuse;
+-
+- netif = pending_tx_info[pending_idx].netif;
+-
+- make_tx_response(netif, &pending_tx_info[pending_idx].req,
+- NETIF_RSP_OKAY);
+-
+- /* Ready for next use. */
+- gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
+-
+- index = pending_index(netbk->pending_prod++);
+- netbk->pending_ring[index] = pending_idx;
+-
+- netif_put(netif);
+-
+- list_del_init(&inuse->list);
+- }
+-}
+-
+-static void netbk_tx_err(struct xen_netif *netif,
+- struct xen_netif_tx_request *txp, RING_IDX end)
+-{
+- RING_IDX cons = netif->tx.req_cons;
+-
+- do {
+- make_tx_response(netif, txp, NETIF_RSP_ERROR);
+- if (cons >= end)
+- break;
+- txp = RING_GET_REQUEST(&netif->tx, cons++);
+- } while (1);
+- netif->tx.req_cons = cons;
+- netif_schedule_work(netif);
+- netif_put(netif);
+-}
+-
+-static int netbk_count_requests(struct xen_netif *netif,
+- struct xen_netif_tx_request *first,
+- struct xen_netif_tx_request *txp,
+- int work_to_do)
+-{
+- RING_IDX cons = netif->tx.req_cons;
+- int frags = 0;
+-
+- if (!(first->flags & NETTXF_more_data))
+- return 0;
+-
+- do {
+- if (frags >= work_to_do) {
+- DPRINTK("Need more frags\n");
+- return -frags;
+- }
+-
+- if (unlikely(frags >= MAX_SKB_FRAGS)) {
+- DPRINTK("Too many frags\n");
+- return -frags;
+- }
+-
+- memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
+- sizeof(*txp));
+- if (txp->size > first->size) {
+- DPRINTK("Frags galore\n");
+- return -frags;
+- }
+-
+- first->size -= txp->size;
+- frags++;
+-
+- if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
+- DPRINTK("txp->offset: %x, size: %u\n",
+- txp->offset, txp->size);
+- return -frags;
+- }
+- } while ((txp++)->flags & NETTXF_more_data);
+-
+- return frags;
+-}
+-
+-static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+- struct xen_netif *netif,
+- struct sk_buff *skb,
+- struct xen_netif_tx_request *txp,
+- struct gnttab_map_grant_ref *mop)
+-{
+- struct skb_shared_info *shinfo = skb_shinfo(skb);
+- skb_frag_t *frags = shinfo->frags;
+- unsigned long pending_idx = *((u16 *)skb->data);
+- int i, start;
+-
+- /* Skip first skb fragment if it is on same page as header fragment. */
+- start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+-
+- for (i = start; i < shinfo->nr_frags; i++, txp++) {
+- pending_ring_idx_t index;
+- struct pending_tx_info *pending_tx_info =
+- netbk->pending_tx_info;
+-
+- index = pending_index(netbk->pending_cons++);
+- pending_idx = netbk->pending_ring[index];
+-
+- gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
+- GNTMAP_host_map | GNTMAP_readonly,
+- txp->gref, netif->domid);
+-
+- memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+- netif_get(netif);
+- pending_tx_info[pending_idx].netif = netif;
+- frags[i].page = (void *)pending_idx;
+- }
+-
+- return mop;
+-}
+-
+-static int netbk_tx_check_mop(struct xen_netbk *netbk,
+- struct sk_buff *skb,
+- struct gnttab_map_grant_ref **mopp)
+-{
+- struct gnttab_map_grant_ref *mop = *mopp;
+- int pending_idx = *((u16 *)skb->data);
+- struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
+- struct xen_netif *netif = pending_tx_info[pending_idx].netif;
+- struct xen_netif_tx_request *txp;
+- struct skb_shared_info *shinfo = skb_shinfo(skb);
+- int nr_frags = shinfo->nr_frags;
+- int i, err, start;
+-
+- /* Check status of header. */
+- err = mop->status;
+- if (unlikely(err)) {
+- pending_ring_idx_t index;
+- index = pending_index(netbk->pending_prod++);
+- txp = &pending_tx_info[pending_idx].req;
+- make_tx_response(netif, txp, NETIF_RSP_ERROR);
+- netbk->pending_ring[index] = pending_idx;
+- netif_put(netif);
+- } else {
+- set_phys_to_machine(
+- __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
+- FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+- netbk->grant_tx_handle[pending_idx] = mop->handle;
+- }
+-
+- /* Skip first skb fragment if it is on same page as header fragment. */
+- start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+-
+- for (i = start; i < nr_frags; i++) {
+- int j, newerr;
+- pending_ring_idx_t index;
+-
+- pending_idx = (unsigned long)shinfo->frags[i].page;
+-
+- /* Check error status: if okay then remember grant handle. */
+- newerr = (++mop)->status;
+- if (likely(!newerr)) {
+- unsigned long addr;
+- addr = idx_to_kaddr(netbk, pending_idx);
+- set_phys_to_machine(
+- __pa(addr)>>PAGE_SHIFT,
+- FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+- netbk->grant_tx_handle[pending_idx] = mop->handle;
+- /* Had a previous error? Invalidate this fragment. */
+- if (unlikely(err))
+- netif_idx_release(netbk, pending_idx);
+- continue;
+- }
+-
+- /* Error on this fragment: respond to client with an error. */
+- txp = &netbk->pending_tx_info[pending_idx].req;
+- make_tx_response(netif, txp, NETIF_RSP_ERROR);
+- index = pending_index(netbk->pending_prod++);
+- netbk->pending_ring[index] = pending_idx;
+- netif_put(netif);
+-
+- /* Not the first error? Preceding frags already invalidated. */
+- if (err)
+- continue;
+-
+- /* First error: invalidate header and preceding fragments. */
+- pending_idx = *((u16 *)skb->data);
+- netif_idx_release(netbk, pending_idx);
+- for (j = start; j < i; j++) {
+- pending_idx = (unsigned long)shinfo->frags[i].page;
+- netif_idx_release(netbk, pending_idx);
+- }
+-
+- /* Remember the error: invalidate all subsequent fragments. */
+- err = newerr;
+- }
+-
+- *mopp = mop + 1;
+- return err;
+-}
+-
+-static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+-{
+- struct skb_shared_info *shinfo = skb_shinfo(skb);
+- int nr_frags = shinfo->nr_frags;
+- int i;
+-
+- for (i = 0; i < nr_frags; i++) {
+- skb_frag_t *frag = shinfo->frags + i;
+- struct xen_netif_tx_request *txp;
+- unsigned long pending_idx;
+-
+- pending_idx = (unsigned long)frag->page;
+-
+- netbk->pending_inuse[pending_idx].alloc_time = jiffies;
+- list_add_tail(&netbk->pending_inuse[pending_idx].list,
+- &netbk->pending_inuse_head);
+-
+- txp = &netbk->pending_tx_info[pending_idx].req;
+- frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
+- frag->size = txp->size;
+- frag->page_offset = txp->offset;
+-
+- skb->len += txp->size;
+- skb->data_len += txp->size;
+- skb->truesize += txp->size;
+- }
+-}
+-
+-int netbk_get_extras(struct xen_netif *netif,
+- struct xen_netif_extra_info *extras,
+- int work_to_do)
+-{
+- struct xen_netif_extra_info extra;
+- RING_IDX cons = netif->tx.req_cons;
+-
+- do {
+- if (unlikely(work_to_do-- <= 0)) {
+- pr_debug("Missing extra info\n");
+- return -EBADR;
+- }
+-
+- memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
+- sizeof(extra));
+- if (unlikely(!extra.type ||
+- extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+- netif->tx.req_cons = ++cons;
+- pr_debug("Invalid extra type: %d\n", extra.type);
+- return -EINVAL;
+- }
+-
+- memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
+- netif->tx.req_cons = ++cons;
+- } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
+-
+- return work_to_do;
+-}
+-
+-static int netbk_set_skb_gso(struct sk_buff *skb,
+- struct xen_netif_extra_info *gso)
+-{
+- if (!gso->u.gso.size) {
+- pr_debug("GSO size must not be zero.\n");
+- return -EINVAL;
+- }
+-
+- /* Currently only TCPv4 S.O. is supported. */
+- if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+- pr_debug("Bad GSO type %d.\n", gso->u.gso.type);
+- return -EINVAL;
+- }
+-
+- skb_shinfo(skb)->gso_size = gso->u.gso.size;
+- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+-
+- /* Header must be checked, and gso_segs computed. */
+- skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+- skb_shinfo(skb)->gso_segs = 0;
+-
+- return 0;
+-}
+-
+-static int skb_checksum_setup(struct sk_buff *skb)
+-{
+- struct iphdr *iph;
+- unsigned char *th;
+- int err = -EPROTO;
+-
+- if (skb->protocol != htons(ETH_P_IP))
+- goto out;
+-
+- iph = (void *)skb->data;
+- th = skb->data + 4 * iph->ihl;
+- if (th >= skb_tail_pointer(skb))
+- goto out;
+-
+- skb->csum_start = th - skb->head;
+- switch (iph->protocol) {
+- case IPPROTO_TCP:
+- skb->csum_offset = offsetof(struct tcphdr, check);
+- break;
+- case IPPROTO_UDP:
+- skb->csum_offset = offsetof(struct udphdr, check);
+- break;
+- default:
+- if (net_ratelimit())
+- printk(KERN_ERR "Attempting to checksum a non-"
+- "TCP/UDP packet, dropping a protocol"
+- " %d packet", iph->protocol);
+- goto out;
+- }
+-
+- if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
+- goto out;
+-
+- err = 0;
+-
+-out:
+- return err;
+-}
+-
+-static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
+-{
+- unsigned long now = jiffies;
+- unsigned long next_credit =
+- netif->credit_timeout.expires +
+- msecs_to_jiffies(netif->credit_usec / 1000);
+-
+- /* Timer could already be pending in rare cases. */
+- if (timer_pending(&netif->credit_timeout))
+- return true;
+-
+- /* Passed the point where we can replenish credit? */
+- if (time_after_eq(now, next_credit)) {
+- netif->credit_timeout.expires = now;
+- tx_add_credit(netif);
+- }
+-
+- /* Still too big to send right now? Set a callback. */
+- if (size > netif->remaining_credit) {
+- netif->credit_timeout.data =
+- (unsigned long)netif;
+- netif->credit_timeout.function =
+- tx_credit_callback;
+- mod_timer(&netif->credit_timeout,
+- next_credit);
+-
+- return true;
+- }
+-
+- return false;
+-}
+-
+-static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+-{
+- struct gnttab_map_grant_ref *mop;
+- struct sk_buff *skb;
+- int ret;
+-
+- mop = netbk->tx_map_ops;
+- while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+- !list_empty(&netbk->net_schedule_list)) {
+- struct xen_netif *netif;
+- struct xen_netif_tx_request txreq;
+- struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+- struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
+- u16 pending_idx;
+- RING_IDX idx;
+- int work_to_do;
+- unsigned int data_len;
+- pending_ring_idx_t index;
+-
+- /* Get a netif from the list with work to do. */
+- netif = poll_net_schedule_list(netbk);
+- if (!netif)
+- continue;
+-
+- RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+- if (!work_to_do) {
+- netif_put(netif);
+- continue;
+- }
+-
+- idx = netif->tx.req_cons;
+- rmb(); /* Ensure that we see the request before we copy it. */
+- memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
+-
+- /* Credit-based scheduling. */
+- if (txreq.size > netif->remaining_credit &&
+- tx_credit_exceeded(netif, txreq.size)) {
+- netif_put(netif);
+- continue;
+- }
+-
+- netif->remaining_credit -= txreq.size;
+-
+- work_to_do--;
+- netif->tx.req_cons = ++idx;
+-
+- memset(extras, 0, sizeof(extras));
+- if (txreq.flags & NETTXF_extra_info) {
+- work_to_do = netbk_get_extras(netif, extras,
+- work_to_do);
+- idx = netif->tx.req_cons;
+- if (unlikely(work_to_do < 0)) {
+- netbk_tx_err(netif, &txreq, idx);
+- continue;
+- }
+- }
+-
+- ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
+- if (unlikely(ret < 0)) {
+- netbk_tx_err(netif, &txreq, idx - ret);
+- continue;
+- }
+- idx += ret;
+-
+- if (unlikely(txreq.size < ETH_HLEN)) {
+- pr_debug("Bad packet size: %d\n", txreq.size);
+- netbk_tx_err(netif, &txreq, idx);
+- continue;
+- }
+-
+- /* No crossing a page as the payload mustn't fragment. */
+- if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
+- pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
+- txreq.offset, txreq.size,
+- (txreq.offset&~PAGE_MASK) + txreq.size);
+- netbk_tx_err(netif, &txreq, idx);
+- continue;
+- }
+-
+- index = pending_index(netbk->pending_cons);
+- pending_idx = netbk->pending_ring[index];
+-
+- data_len = (txreq.size > PKT_PROT_LEN &&
+- ret < MAX_SKB_FRAGS) ?
+- PKT_PROT_LEN : txreq.size;
+-
+- skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
+- GFP_ATOMIC | __GFP_NOWARN);
+- if (unlikely(skb == NULL)) {
+- pr_debug("Can't allocate a skb in start_xmit.\n");
+- netbk_tx_err(netif, &txreq, idx);
+- break;
+- }
+-
+- /* Packets passed to netif_rx() must have some headroom. */
+- skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+-
+- if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+- struct xen_netif_extra_info *gso;
+- gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+-
+- if (netbk_set_skb_gso(skb, gso)) {
+- kfree_skb(skb);
+- netbk_tx_err(netif, &txreq, idx);
+- continue;
+- }
+- }
+-
+- gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
+- GNTMAP_host_map | GNTMAP_readonly,
+- txreq.gref, netif->domid);
+- mop++;
+-
+- memcpy(&netbk->pending_tx_info[pending_idx].req,
+- &txreq, sizeof(txreq));
+- netbk->pending_tx_info[pending_idx].netif = netif;
+- *((u16 *)skb->data) = pending_idx;
+-
+- __skb_put(skb, data_len);
+-
+- skb_shinfo(skb)->nr_frags = ret;
+- if (data_len < txreq.size) {
+- skb_shinfo(skb)->nr_frags++;
+- skb_shinfo(skb)->frags[0].page =
+- (void *)(unsigned long)pending_idx;
+- } else {
+- /* Discriminate from any valid pending_idx value. */
+- skb_shinfo(skb)->frags[0].page = (void *)~0UL;
+- }
+-
+- __skb_queue_tail(&netbk->tx_queue, skb);
+-
+- netbk->pending_cons++;
+-
+- mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
+-
+- netif->tx.req_cons = idx;
+- netif_schedule_work(netif);
+-
+- if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
+- break;
+- }
+-
+- return mop - netbk->tx_map_ops;
+-}
+-
+-static void net_tx_submit(struct xen_netbk *netbk)
+-{
+- struct gnttab_map_grant_ref *mop;
+- struct sk_buff *skb;
+-
+- mop = netbk->tx_map_ops;
+- while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
+- struct xen_netif_tx_request *txp;
+- struct xen_netif *netif;
+- u16 pending_idx;
+- unsigned data_len;
+-
+- pending_idx = *((u16 *)skb->data);
+- netif = netbk->pending_tx_info[pending_idx].netif;
+- txp = &netbk->pending_tx_info[pending_idx].req;
+-
+- /* Check the remap error code. */
+- if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
+- pr_debug("netback grant failed.\n");
+- skb_shinfo(skb)->nr_frags = 0;
+- kfree_skb(skb);
+- continue;
+- }
+-
+- data_len = skb->len;
+- memcpy(skb->data,
+- (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
+- data_len);
+- if (data_len < txp->size) {
+- /* Append the packet payload as a fragment. */
+- txp->offset += data_len;
+- txp->size -= data_len;
+- } else {
+- /* Schedule a response immediately. */
+- netif_idx_release(netbk, pending_idx);
+- }
+-
+- if (txp->flags & NETTXF_csum_blank)
+- skb->ip_summed = CHECKSUM_PARTIAL;
+- else if (txp->flags & NETTXF_data_validated)
+- skb->ip_summed = CHECKSUM_UNNECESSARY;
+-
+- netbk_fill_frags(netbk, skb);
+-
+- /*
+- * If the initial fragment was < PKT_PROT_LEN then
+- * pull through some bytes from the other fragments to
+- * increase the linear region to PKT_PROT_LEN bytes.
+- */
+- if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
+- int target = min_t(int, skb->len, PKT_PROT_LEN);
+- __pskb_pull_tail(skb, target - skb_headlen(skb));
+- }
+-
+- skb->dev = netif->dev;
+- skb->protocol = eth_type_trans(skb, skb->dev);
+-
+- if (skb->ip_summed == CHECKSUM_PARTIAL) {
+- if (skb_checksum_setup(skb)) {
+- pr_debug("skb_checksum_setup failed\n");
+- kfree_skb(skb);
+- continue;
+- }
+- } else if (skb_is_gso(skb)) {
+- pr_debug("GSO SKB checksum is not partial\n");
+- kfree_skb(skb);
+- continue;
+- }
+-
+- if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+- unlikely(skb_linearize(skb))) {
+- DPRINTK("Can't linearize skb in net_tx_action.\n");
+- kfree_skb(skb);
+- continue;
+- }
+-
+- netif->stats.rx_bytes += skb->len;
+- netif->stats.rx_packets++;
+-
+- netif_rx_ni(skb);
+- netif->dev->last_rx = jiffies;
+- }
+-}
+-
+-/* Called after netfront has transmitted */
+-static void net_tx_action(unsigned long data)
+-{
+- struct xen_netbk *netbk = (struct xen_netbk *)data;
+- unsigned nr_mops;
+- int ret;
+-
+- net_tx_action_dealloc(netbk);
+-
+- nr_mops = net_tx_build_mops(netbk);
+-
+- if (nr_mops == 0)
+- goto out;
+-
+- ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+- netbk->tx_map_ops, nr_mops);
+- BUG_ON(ret);
+-
+- net_tx_submit(netbk);
+-out:
+- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+- !list_empty(&netbk->pending_inuse_head)) {
+- struct netbk_tx_pending_inuse *oldest;
+-
+- oldest = list_entry(netbk->pending_inuse_head.next,
+- struct netbk_tx_pending_inuse, list);
+- mod_timer(&netbk->netbk_tx_pending_timer,
+- oldest->alloc_time + HZ);
+- }
+-}
+-
+-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+-{
+- static DEFINE_SPINLOCK(_lock);
+- unsigned long flags;
+- pending_ring_idx_t index;
+-
+- spin_lock_irqsave(&_lock, flags);
+- index = pending_index(netbk->dealloc_prod);
+- netbk->dealloc_ring[index] = pending_idx;
+- /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+- smp_wmb();
+- netbk->dealloc_prod++;
+- spin_unlock_irqrestore(&_lock, flags);
+-
+- xen_netbk_bh_handler(netbk, 0);
+-}
+-
+-static void netif_page_release(struct page *page, unsigned int order)
+-{
+- unsigned int group, idx;
+- int foreign = netif_get_page_ext(page, &group, &idx);
+-
+- BUG_ON(!foreign);
+- BUG_ON(order);
+-
+- netif_idx_release(&xen_netbk[group], idx);
+-}
+-
+-irqreturn_t netif_be_int(int irq, void *dev_id)
+-{
+- struct xen_netif *netif = dev_id;
+- struct xen_netbk *netbk;
+-
+- if (netif->group == -1)
+- return IRQ_NONE;
+-
+- netbk = &xen_netbk[netif->group];
+-
+- add_to_net_schedule_list_tail(netif);
+- maybe_schedule_tx_action(netbk);
+-
+- if (netif_schedulable(netif) && !netbk_queue_full(netif))
+- netif_wake_queue(netif->dev);
+-
+- return IRQ_HANDLED;
+-}
+-
+-static void make_tx_response(struct xen_netif *netif,
+- struct xen_netif_tx_request *txp,
+- s8 st)
+-{
+- RING_IDX i = netif->tx.rsp_prod_pvt;
+- struct xen_netif_tx_response *resp;
+- int notify;
+-
+- resp = RING_GET_RESPONSE(&netif->tx, i);
+- resp->id = txp->id;
+- resp->status = st;
+-
+- if (txp->flags & NETTXF_extra_info)
+- RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
+-
+- netif->tx.rsp_prod_pvt = ++i;
+- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+- if (notify)
+- notify_remote_via_irq(netif->irq);
+-}
+-
+-static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+- u16 id,
+- s8 st,
+- u16 offset,
+- u16 size,
+- u16 flags)
+-{
+- RING_IDX i = netif->rx.rsp_prod_pvt;
+- struct xen_netif_rx_response *resp;
+-
+- resp = RING_GET_RESPONSE(&netif->rx, i);
+- resp->offset = offset;
+- resp->flags = flags;
+- resp->id = id;
+- resp->status = (s16)size;
+- if (st < 0)
+- resp->status = (s16)st;
+-
+- netif->rx.rsp_prod_pvt = ++i;
+-
+- return resp;
+-}
+-
+-#ifdef NETBE_DEBUG_INTERRUPT
+-static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+-{
+- struct list_head *ent;
+- struct xen_netif *netif;
+- int i = 0;
+- int group = 0;
+-
+- printk(KERN_ALERT "netif_schedule_list:\n");
+-
+- for (group = 0; group < xen_netbk_group_nr; group++) {
+- struct xen_netbk *netbk = &xen_netbk[group];
+- spin_lock_irq(&netbk->net_schedule_list_lock);
+- printk(KERN_ALERT "xen_netback group number: %d\n", group);
+- list_for_each(ent, &netbk->net_schedule_list) {
+- netif = list_entry(ent, struct xen_netif, list);
+- printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+- "rx_resp_prod=%08x\n",
+- i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
+- printk(KERN_ALERT
+- " tx_req_cons=%08x, tx_resp_prod=%08x)\n",
+- netif->tx.req_cons, netif->tx.rsp_prod_pvt);
+- printk(KERN_ALERT
+- " shared(rx_req_prod=%08x "
+- "rx_resp_prod=%08x\n",
+- netif->rx.sring->req_prod,
+- netif->rx.sring->rsp_prod);
+- printk(KERN_ALERT
+- " rx_event=%08x, tx_req_prod=%08x\n",
+- netif->rx.sring->rsp_event,
+- netif->tx.sring->req_prod);
+- printk(KERN_ALERT
+- " tx_resp_prod=%08x, tx_event=%08x)\n",
+- netif->tx.sring->rsp_prod,
+- netif->tx.sring->rsp_event);
+- i++;
+- }
+- spin_unlock_irq(&netbk->net_schedule_list_lock);
+- }
+-
+- printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+-
+- return IRQ_HANDLED;
+-}
+-#endif
+-
+-static inline int rx_work_todo(struct xen_netbk *netbk)
+-{
+- return !skb_queue_empty(&netbk->rx_queue);
+-}
+-
+-static inline int tx_work_todo(struct xen_netbk *netbk)
+-{
+- if (netbk->dealloc_cons != netbk->dealloc_prod)
+- return 1;
+-
+- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+- !list_empty(&netbk->pending_inuse_head))
+- return 1;
+-
+- if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+- !list_empty(&netbk->net_schedule_list))
+- return 1;
+-
+- return 0;
+-}
+-
+-static int netbk_action_thread(void *data)
+-{
+- struct xen_netbk *netbk = (struct xen_netbk *)data;
+- while (!kthread_should_stop()) {
+- wait_event_interruptible(netbk->kthread.netbk_action_wq,
+- rx_work_todo(netbk)
+- || tx_work_todo(netbk)
+- || kthread_should_stop());
+- cond_resched();
+-
+- if (kthread_should_stop())
+- break;
+-
+- if (rx_work_todo(netbk))
+- net_rx_action((unsigned long)netbk);
+-
+- if (tx_work_todo(netbk))
+- net_tx_action((unsigned long)netbk);
+- }
+-
+- return 0;
+-}
+-
+-static int __init netback_init(void)
+-{
+- int i;
+- struct page *page;
+- int rc = 0;
+- int group;
+-
+- if (!xen_pv_domain())
+- return -ENODEV;
+-
+- xen_netbk_group_nr = num_online_cpus();
+- xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
+- if (!xen_netbk) {
+- printk(KERN_ALERT "%s: out of memory\n", __func__);
+- return -ENOMEM;
+- }
+- memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
+-
+- for (group = 0; group < xen_netbk_group_nr; group++) {
+- struct xen_netbk *netbk = &xen_netbk[group];
+- skb_queue_head_init(&netbk->rx_queue);
+- skb_queue_head_init(&netbk->tx_queue);
+-
+- init_timer(&netbk->net_timer);
+- netbk->net_timer.data = (unsigned long)netbk;
+- netbk->net_timer.function = net_alarm;
+-
+- init_timer(&netbk->netbk_tx_pending_timer);
+- netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
+- netbk->netbk_tx_pending_timer.function =
+- netbk_tx_pending_timeout;
+-
+- netbk->mmap_pages =
+- alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+- if (!netbk->mmap_pages) {
+- printk(KERN_ALERT "%s: out of memory\n", __func__);
+- del_timer(&netbk->netbk_tx_pending_timer);
+- del_timer(&netbk->net_timer);
+- rc = -ENOMEM;
+- goto failed_init;
+- }
+-
+- for (i = 0; i < MAX_PENDING_REQS; i++) {
+- page = netbk->mmap_pages[i];
+- SetPageForeign(page, netif_page_release);
+- netif_set_page_ext(page, group, i);
+- INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+- }
+-
+- netbk->pending_cons = 0;
+- netbk->pending_prod = MAX_PENDING_REQS;
+- for (i = 0; i < MAX_PENDING_REQS; i++)
+- netbk->pending_ring[i] = i;
+-
+- if (MODPARM_netback_kthread) {
+- init_waitqueue_head(&netbk->kthread.netbk_action_wq);
+- netbk->kthread.task =
+- kthread_create(netbk_action_thread,
+- (void *)netbk,
+- "netback/%u", group);
+-
+- if (!IS_ERR(netbk->kthread.task)) {
+- kthread_bind(netbk->kthread.task, group);
+- } else {
+- printk(KERN_ALERT
+- "kthread_run() fails at netback\n");
+- free_empty_pages_and_pagevec(netbk->mmap_pages,
+- MAX_PENDING_REQS);
+- del_timer(&netbk->netbk_tx_pending_timer);
+- del_timer(&netbk->net_timer);
+- rc = PTR_ERR(netbk->kthread.task);
+- goto failed_init;
+- }
+- } else {
+- tasklet_init(&netbk->tasklet.net_tx_tasklet,
+- net_tx_action,
+- (unsigned long)netbk);
+- tasklet_init(&netbk->tasklet.net_rx_tasklet,
+- net_rx_action,
+- (unsigned long)netbk);
+- }
+-
+- INIT_LIST_HEAD(&netbk->pending_inuse_head);
+- INIT_LIST_HEAD(&netbk->net_schedule_list);
+-
+- spin_lock_init(&netbk->net_schedule_list_lock);
+-
+- atomic_set(&netbk->netfront_count, 0);
+-
+- if (MODPARM_netback_kthread)
+- wake_up_process(netbk->kthread.task);
+- }
+-
+- netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+- if (MODPARM_copy_skb) {
+- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+- NULL, 0))
+- netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
+- else
+- netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+- }
+-
+- rc = netif_xenbus_init();
+- if (rc)
+- goto failed_init;
+-
+-#ifdef NETBE_DEBUG_INTERRUPT
+- (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
+- 0,
+- netif_be_dbg,
+- IRQF_SHARED,
+- "net-be-dbg",
+- &netif_be_dbg);
+-#endif
+-
+- return 0;
+-
+-failed_init:
+- for (i = 0; i < group; i++) {
+- struct xen_netbk *netbk = &xen_netbk[i];
+- free_empty_pages_and_pagevec(netbk->mmap_pages,
+- MAX_PENDING_REQS);
+- del_timer(&netbk->netbk_tx_pending_timer);
+- del_timer(&netbk->net_timer);
+- if (MODPARM_netback_kthread)
+- kthread_stop(netbk->kthread.task);
+- }
+- vfree(xen_netbk);
+- return rc;
+-
+-}
+-
+-module_init(netback_init);
+-
+-MODULE_LICENSE("Dual BSD/GPL");
+diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
+deleted file mode 100644
+index 640c696..0000000
+--- a/drivers/xen/netback/xenbus.c
++++ /dev/null
+@@ -1,487 +0,0 @@
+-/* Xenbus code for netif backend
+- * Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
+- * Copyright (C) 2005 XenSource Ltd
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License as published by
+- * the Free Software Foundation; either version 2 of the License, or
+- * (at your option) any later version.
+- *
+- * This program is distributed in the hope that it will be useful,
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+- * GNU General Public License for more details.
+- *
+- * You should have received a copy of the GNU General Public License
+- * along with this program; if not, write to the Free Software
+- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-*/
+-
+-#include "common.h"
+-
+-static int connect_rings(struct backend_info *);
+-static void connect(struct backend_info *);
+-static void backend_create_netif(struct backend_info *be);
+-static void unregister_hotplug_status_watch(struct backend_info *be);
+-
+-static int netback_remove(struct xenbus_device *dev)
+-{
+- struct backend_info *be = dev_get_drvdata(&dev->dev);
+-
+- unregister_hotplug_status_watch(be);
+- if (be->netif) {
+- kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+- xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
+- netif_disconnect(be->netif);
+- be->netif = NULL;
+- }
+- kfree(be);
+- dev_set_drvdata(&dev->dev, NULL);
+- return 0;
+-}
+-
+-
+-/**
+- * Entry point to this code when a new device is created. Allocate the basic
+- * structures and switch to InitWait.
+- */
+-static int netback_probe(struct xenbus_device *dev,
+- const struct xenbus_device_id *id)
+-{
+- const char *message;
+- struct xenbus_transaction xbt;
+- int err;
+- int sg;
+- struct backend_info *be = kzalloc(sizeof(struct backend_info),
+- GFP_KERNEL);
+- if (!be) {
+- xenbus_dev_fatal(dev, -ENOMEM,
+- "allocating backend structure");
+- return -ENOMEM;
+- }
+-
+- be->dev = dev;
+- dev_set_drvdata(&dev->dev, be);
+-
+- sg = 1;
+- if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
+- sg = 0;
+-
+- do {
+- err = xenbus_transaction_start(&xbt);
+- if (err) {
+- xenbus_dev_fatal(dev, err, "starting transaction");
+- goto fail;
+- }
+-
+- err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
+- if (err) {
+- message = "writing feature-sg";
+- goto abort_transaction;
+- }
+-
+- err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
+- "%d", sg);
+- if (err) {
+- message = "writing feature-gso-tcpv4";
+- goto abort_transaction;
+- }
+-
+- /* We support rx-copy path. */
+- err = xenbus_printf(xbt, dev->nodename,
+- "feature-rx-copy", "%d", 1);
+- if (err) {
+- message = "writing feature-rx-copy";
+- goto abort_transaction;
+- }
+-
+- /*
+- * We don't support rx-flip path (except old guests who don't
+- * grok this feature flag).
+- */
+- err = xenbus_printf(xbt, dev->nodename,
+- "feature-rx-flip", "%d", 0);
+- if (err) {
+- message = "writing feature-rx-flip";
+- goto abort_transaction;
+- }
+-
+- err = xenbus_transaction_end(xbt, 0);
+- } while (err == -EAGAIN);
+-
+- if (err) {
+- xenbus_dev_fatal(dev, err, "completing transaction");
+- goto fail;
+- }
+-
+- err = xenbus_switch_state(dev, XenbusStateInitWait);
+- if (err)
+- goto fail;
+-
+- /* This kicks hotplug scripts, so do it immediately. */
+- backend_create_netif(be);
+-
+- return 0;
+-
+-abort_transaction:
+- xenbus_transaction_end(xbt, 1);
+- xenbus_dev_fatal(dev, err, "%s", message);
+-fail:
+- pr_debug("failed");
+- netback_remove(dev);
+- return err;
+-}
+-
+-
+-/*
+- * Handle the creation of the hotplug script environment. We add the script
+- * and vif variables to the environment, for the benefit of the vif-* hotplug
+- * scripts.
+- */
+-static int netback_uevent(struct xenbus_device *xdev,
+- struct kobj_uevent_env *env)
+-{
+- struct backend_info *be = dev_get_drvdata(&xdev->dev);
+- char *val;
+-
+- val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+- if (IS_ERR(val)) {
+- int err = PTR_ERR(val);
+- xenbus_dev_fatal(xdev, err, "reading script");
+- return err;
+- } else {
+- if (add_uevent_var(env, "script=%s", val)) {
+- kfree(val);
+- return -ENOMEM;
+- }
+- kfree(val);
+- }
+-
+- if (!be || !be->netif)
+- return 0;
+-
+- return add_uevent_var(env, "vif=%s", be->netif->dev->name);
+-}
+-
+-
+-static void backend_create_netif(struct backend_info *be)
+-{
+- int err;
+- long handle;
+- struct xenbus_device *dev = be->dev;
+-
+- if (be->netif != NULL)
+- return;
+-
+- err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
+- if (err != 1) {
+- xenbus_dev_fatal(dev, err, "reading handle");
+- return;
+- }
+-
+- be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
+- if (IS_ERR(be->netif)) {
+- err = PTR_ERR(be->netif);
+- be->netif = NULL;
+- xenbus_dev_fatal(dev, err, "creating interface");
+- return;
+- }
+-
+- kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
+-}
+-
+-
+-static void disconnect_backend(struct xenbus_device *dev)
+-{
+- struct backend_info *be = dev_get_drvdata(&dev->dev);
+-
+- if (be->netif) {
+- xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
+- netif_disconnect(be->netif);
+- be->netif = NULL;
+- }
+-}
+-
+-/**
+- * Callback received when the frontend's state changes.
+- */
+-static void frontend_changed(struct xenbus_device *dev,
+- enum xenbus_state frontend_state)
+-{
+- struct backend_info *be = dev_get_drvdata(&dev->dev);
+-
+- pr_debug("frontend state %s", xenbus_strstate(frontend_state));
+-
+- be->frontend_state = frontend_state;
+-
+- switch (frontend_state) {
+- case XenbusStateInitialising:
+- if (dev->state == XenbusStateClosed) {
+- printk(KERN_INFO "%s: %s: prepare for reconnect\n",
+- __func__, dev->nodename);
+- xenbus_switch_state(dev, XenbusStateInitWait);
+- }
+- break;
+-
+- case XenbusStateInitialised:
+- break;
+-
+- case XenbusStateConnected:
+- if (dev->state == XenbusStateConnected)
+- break;
+- backend_create_netif(be);
+- if (be->netif)
+- connect(be);
+- break;
+-
+- case XenbusStateClosing:
+- if (be->netif)
+- kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+- disconnect_backend(dev);
+- xenbus_switch_state(dev, XenbusStateClosing);
+- break;
+-
+- case XenbusStateClosed:
+- xenbus_switch_state(dev, XenbusStateClosed);
+- if (xenbus_dev_is_online(dev))
+- break;
+- /* fall through if not online */
+- case XenbusStateUnknown:
+- device_unregister(&dev->dev);
+- break;
+-
+- default:
+- xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
+- frontend_state);
+- break;
+- }
+-}
+-
+-
+-static void xen_net_read_rate(struct xenbus_device *dev,
+- unsigned long *bytes, unsigned long *usec)
+-{
+- char *s, *e;
+- unsigned long b, u;
+- char *ratestr;
+-
+- /* Default to unlimited bandwidth. */
+- *bytes = ~0UL;
+- *usec = 0;
+-
+- ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
+- if (IS_ERR(ratestr))
+- return;
+-
+- s = ratestr;
+- b = simple_strtoul(s, &e, 10);
+- if ((s == e) || (*e != ','))
+- goto fail;
+-
+- s = e + 1;
+- u = simple_strtoul(s, &e, 10);
+- if ((s == e) || (*e != '\0'))
+- goto fail;
+-
+- *bytes = b;
+- *usec = u;
+-
+- kfree(ratestr);
+- return;
+-
+- fail:
+- pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
+- kfree(ratestr);
+-}
+-
+-static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+-{
+- char *s, *e, *macstr;
+- int i;
+-
+- macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
+- if (IS_ERR(macstr))
+- return PTR_ERR(macstr);
+-
+- for (i = 0; i < ETH_ALEN; i++) {
+- mac[i] = simple_strtoul(s, &e, 16);
+- if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+- kfree(macstr);
+- return -ENOENT;
+- }
+- s = e+1;
+- }
+-
+- kfree(macstr);
+- return 0;
+-}
+-
+-static void unregister_hotplug_status_watch(struct backend_info *be)
+-{
+- if (be->have_hotplug_status_watch) {
+- unregister_xenbus_watch(&be->hotplug_status_watch);
+- kfree(be->hotplug_status_watch.node);
+- }
+- be->have_hotplug_status_watch = 0;
+-}
+-
+-static void hotplug_status_changed(struct xenbus_watch *watch,
+- const char **vec,
+- unsigned int vec_size)
+-{
+- struct backend_info *be = container_of(watch,
+- struct backend_info,
+- hotplug_status_watch);
+- char *str;
+- unsigned int len;
+-
+- str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
+- if (IS_ERR(str))
+- return;
+- if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
+- xenbus_switch_state(be->dev, XenbusStateConnected);
+- /* Not interested in this watch anymore. */
+- unregister_hotplug_status_watch(be);
+- }
+- kfree(str);
+-}
+-
+-static void connect(struct backend_info *be)
+-{
+- int err;
+- struct xenbus_device *dev = be->dev;
+-
+- err = connect_rings(be);
+- if (err)
+- return;
+-
+- err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
+- if (err) {
+- xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
+- return;
+- }
+-
+- xen_net_read_rate(dev, &be->netif->credit_bytes,
+- &be->netif->credit_usec);
+- be->netif->remaining_credit = be->netif->credit_bytes;
+-
+- unregister_hotplug_status_watch(be);
+- err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
+- hotplug_status_changed,
+- "%s/%s", dev->nodename, "hotplug-status");
+- if (err) {
+- /* Switch now, since we can't do a watch. */
+- xenbus_switch_state(dev, XenbusStateConnected);
+- } else {
+- be->have_hotplug_status_watch = 1;
+- }
+-
+- netif_wake_queue(be->netif->dev);
+-}
+-
+-
+-static int connect_rings(struct backend_info *be)
+-{
+- struct xen_netif *netif = be->netif;
+- struct xenbus_device *dev = be->dev;
+- unsigned long tx_ring_ref, rx_ring_ref;
+- unsigned int evtchn, rx_copy;
+- int err;
+- int val;
+-
+- err = xenbus_gather(XBT_NIL, dev->otherend,
+- "tx-ring-ref", "%lu", &tx_ring_ref,
+- "rx-ring-ref", "%lu", &rx_ring_ref,
+- "event-channel", "%u", &evtchn, NULL);
+- if (err) {
+- xenbus_dev_fatal(dev, err,
+- "reading %s/ring-ref and event-channel",
+- dev->otherend);
+- return err;
+- }
+-
+- err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
+- &rx_copy);
+- if (err == -ENOENT) {
+- err = 0;
+- rx_copy = 0;
+- }
+- if (err < 0) {
+- xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
+- dev->otherend);
+- return err;
+- }
+- if (!rx_copy)
+- return -EOPNOTSUPP;
+-
+- if (netif->dev->tx_queue_len != 0) {
+- if (xenbus_scanf(XBT_NIL, dev->otherend,
+- "feature-rx-notify", "%d", &val) < 0)
+- val = 0;
+- if (val)
+- netif->can_queue = 1;
+- else
+- /* Must be non-zero for pfifo_fast to work. */
+- netif->dev->tx_queue_len = 1;
+- }
+-
+- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
+- "%d", &val) < 0)
+- val = 0;
+- netif->can_sg = !!val;
+-
+- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
+- "%d", &val) < 0)
+- val = 0;
+- netif->gso = !!val;
+-
+- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
+- "%d", &val) < 0)
+- val = 0;
+- netif->gso_prefix = !!val;
+-
+- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
+- "%d", &val) < 0)
+- val = 0;
+- netif->csum = !val;
+-
+- /* Set dev->features */
+- netif_set_features(netif);
+-
+- /* Map the shared frame, irq etc. */
+- err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
+- if (err) {
+- xenbus_dev_fatal(dev, err,
+- "mapping shared-frames %lu/%lu port %u",
+- tx_ring_ref, rx_ring_ref, evtchn);
+- return err;
+- }
+- return 0;
+-}
+-
+-
+-/* ** Driver Registration ** */
+-
+-
+-static const struct xenbus_device_id netback_ids[] = {
+- { "vif" },
+- { "" }
+-};
+-
+-
+-static struct xenbus_driver netback = {
+- .name = "vif",
+- .owner = THIS_MODULE,
+- .ids = netback_ids,
+- .probe = netback_probe,
+- .remove = netback_remove,
+- .uevent = netback_uevent,
+- .otherend_changed = frontend_changed,
+-};
+-
+-
+-int netif_xenbus_init(void)
+-{
+- printk(KERN_CRIT "registering netback\n");
+- return xenbus_register_backend(&netback);
+-}
+--
+1.7.3.4
+
+
+From 01d93054b9f5223c8ed9a3c11ea5a89ce7db442c Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Sat, 11 Dec 2010 10:15:50 +0000
+Subject: [PATCH 123/139] xen: netback: Make dependency on PageForeign conditional
+
+When PageForeign is not available we fallback to a copying TX mode.
+
+All uses of PageForeign are now gated with HAVE_XEN_PAGE_FOREIGN, this should
+allow for easier removal of the dependency for upstream, e.g. using unifdef.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/net/xen-netback/common.h | 21 +++
+ drivers/net/xen-netback/interface.c | 4 +
+ drivers/net/xen-netback/netback.c | 272 +++++++++++++++++++++++++++++-----
+ drivers/net/xen-netback/xenbus.c | 2 +
+ 4 files changed, 259 insertions(+), 40 deletions(-)
+
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+index 079e1de..f45bac8 100644
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -93,8 +93,10 @@ struct xen_netif {
+ unsigned long remaining_credit;
+ struct timer_list credit_timeout;
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ /* Statistics */
+ int nr_copied_skbs;
++#endif
+
+ /* Miscellaneous private stuff. */
+ struct list_head list; /* scheduling list */
+@@ -117,6 +119,7 @@ struct xen_netif {
+ #define netback_carrier_off(netif) ((netif)->carrier = 0)
+ #define netback_carrier_ok(netif) ((netif)->carrier)
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ enum {
+ NETBK_DONT_COPY_SKB,
+ NETBK_DELAYED_COPY_SKB,
+@@ -124,6 +127,7 @@ enum {
+ };
+
+ extern int netbk_copy_skb_mode;
++#endif
+
+ struct backend_info {
+ struct xenbus_device *dev;
+@@ -191,10 +195,12 @@ struct netbk_rx_meta {
+ int gso_size;
+ };
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ struct netbk_tx_pending_inuse {
+ struct list_head list;
+ unsigned long alloc_time;
+ };
++#endif
+
+ #define MAX_PENDING_REQS 256
+
+@@ -232,16 +238,24 @@ struct xen_netbk {
+ struct sk_buff_head tx_queue;
+
+ struct timer_list net_timer;
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ struct timer_list netbk_tx_pending_timer;
++#endif
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ struct page **mmap_pages;
++#else
++ struct page *mmap_pages[MAX_PENDING_REQS];
++#endif
+
+ pending_ring_idx_t pending_prod;
+ pending_ring_idx_t pending_cons;
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ pending_ring_idx_t dealloc_prod;
+ pending_ring_idx_t dealloc_cons;
+
+ struct list_head pending_inuse_head;
++#endif
+ struct list_head net_schedule_list;
+
+ /* Protect the net_schedule_list in netif. */
+@@ -250,13 +264,20 @@ struct xen_netbk {
+ atomic_t netfront_count;
+
+ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+ struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+ struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+
+ grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
++#else
++ struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
++#endif
++
+ u16 pending_ring[MAX_PENDING_REQS];
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ u16 dealloc_ring[MAX_PENDING_REQS];
++#endif
+
+ /*
+ * Each head or fragment can be up to 4096 bytes. Given
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+index c36db26..3ff3aff 100644
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -186,6 +186,7 @@ static void netbk_get_drvinfo(struct net_device *dev,
+ strcpy(info->bus_info, dev_name(dev->dev.parent));
+ }
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ static const struct netif_stat {
+ char name[ETH_GSTRING_LEN];
+ u16 offset;
+@@ -225,6 +226,7 @@ static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
+ break;
+ }
+ }
++#endif
+
+ static struct ethtool_ops network_ethtool_ops = {
+ .get_drvinfo = netbk_get_drvinfo,
+@@ -237,9 +239,11 @@ static struct ethtool_ops network_ethtool_ops = {
+ .set_tso = netbk_set_tso,
+ .get_link = ethtool_op_get_link,
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ .get_sset_count = netbk_get_sset_count,
+ .get_ethtool_stats = netbk_get_ethtool_stats,
+ .get_strings = netbk_get_strings,
++#endif
+ };
+
+ static struct net_device_ops netback_ops = {
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+index e0ca232..6a1aa5c 100644
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -40,7 +40,9 @@
+
+ #include <net/tcp.h>
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ #include <xen/balloon.h>
++#endif
+ #include <xen/events.h>
+ #include <xen/interface/memory.h>
+
+@@ -80,9 +82,10 @@ static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
+ }
+
+ /* extra field used in struct page */
+-static inline void netif_set_page_ext(struct page *pg,
+- unsigned int group, unsigned int idx)
++static inline void netif_set_page_ext(struct page *pg, struct xen_netbk *netbk,
++ unsigned int idx)
+ {
++ unsigned int group = netbk - xen_netbk;
+ union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
+
+ BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
+@@ -96,8 +99,10 @@ static int netif_get_page_ext(struct page *pg,
+ struct xen_netbk *netbk;
+ unsigned int group, idx;
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ if (!PageForeign(pg))
+ return 0;
++#endif
+
+ group = ext.e.group - 1;
+
+@@ -106,8 +111,10 @@ static int netif_get_page_ext(struct page *pg,
+
+ netbk = &xen_netbk[group];
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ if (netbk->mmap_pages == NULL)
+ return 0;
++#endif
+
+ idx = ext.e.idx;
+
+@@ -144,12 +151,14 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
+ netbk->pending_prod + netbk->pending_cons;
+ }
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ /* Setting this allows the safe use of this driver without netloop. */
+ static int MODPARM_copy_skb = 1;
+ module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
+ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+
+ int netbk_copy_skb_mode;
++#endif
+
+ static int MODPARM_netback_kthread;
+ module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
+@@ -779,11 +788,13 @@ static void net_alarm(unsigned long data)
+ xen_netbk_bh_handler(netbk, 1);
+ }
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ static void netbk_tx_pending_timeout(unsigned long data)
+ {
+ struct xen_netbk *netbk = (struct xen_netbk *)data;
+ xen_netbk_bh_handler(netbk, 0);
+ }
++#endif
+
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+ {
+@@ -892,6 +903,7 @@ static void tx_credit_callback(unsigned long data)
+ netif_schedule_work(netif);
+ }
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ static inline int copy_pending_req(struct xen_netbk *netbk,
+ pending_ring_idx_t pending_idx)
+ {
+@@ -899,7 +911,9 @@ static inline int copy_pending_req(struct xen_netbk *netbk,
+ netbk->grant_tx_handle[pending_idx],
+ &netbk->mmap_pages[pending_idx]);
+ }
++#endif
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+ {
+ struct netbk_tx_pending_inuse *inuse, *n;
+@@ -1004,6 +1018,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+ list_del_init(&inuse->list);
+ }
+ }
++#endif
+
+ static void netbk_tx_err(struct xen_netif *netif,
+ struct xen_netif_tx_request *txp, RING_IDX end)
+@@ -1034,19 +1049,19 @@ static int netbk_count_requests(struct xen_netif *netif,
+
+ do {
+ if (frags >= work_to_do) {
+- DPRINTK("Need more frags\n");
++ printk(KERN_CRIT "Need more frags\n");
+ return -frags;
+ }
+
+ if (unlikely(frags >= MAX_SKB_FRAGS)) {
+- DPRINTK("Too many frags\n");
++ printk(KERN_CRIT "Too many frags\n");
+ return -frags;
+ }
+
+ memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
+ sizeof(*txp));
+ if (txp->size > first->size) {
+- DPRINTK("Frags galore\n");
++ printk(KERN_CRIT "Frags galore\n");
+ return -frags;
+ }
+
+@@ -1054,20 +1069,42 @@ static int netbk_count_requests(struct xen_netif *netif,
+ frags++;
+
+ if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
+- DPRINTK("txp->offset: %x, size: %u\n",
++ printk(KERN_CRIT "txp->offset: %x, size: %u\n",
+ txp->offset, txp->size);
+ return -frags;
+ }
+ } while ((txp++)->flags & NETTXF_more_data);
+-
+ return frags;
+ }
+
++#ifndef HAVE_XEN_PAGE_FOREIGN
++static struct page *netif_alloc_page(struct xen_netbk *netbk,
++ struct sk_buff *skb,
++ unsigned long pending_idx)
++{
++ struct page *page;
++ page = alloc_page(GFP_KERNEL|__GFP_COLD);
++ if (!page)
++ return NULL;
++ netif_set_page_ext(page, netbk, pending_idx);
++ netbk->mmap_pages[pending_idx] = page;
++ return page;
++}
++#endif
++
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+ struct xen_netif *netif,
+ struct sk_buff *skb,
+ struct xen_netif_tx_request *txp,
+- struct gnttab_map_grant_ref *mop)
++ struct gnttab_map_grant_ref *gop)
++#else
++static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk,
++ struct xen_netif *netif,
++ struct sk_buff *skb,
++ struct xen_netif_tx_request *txp,
++ struct gnttab_copy *gop)
++#endif
+ {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ skb_frag_t *frags = shinfo->frags;
+@@ -1078,16 +1115,39 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+ for (i = start; i < shinfo->nr_frags; i++, txp++) {
++#ifndef HAVE_XEN_PAGE_FOREIGN
++ struct page *page;
++#endif
+ pending_ring_idx_t index;
+ struct pending_tx_info *pending_tx_info =
+ netbk->pending_tx_info;
+
+ index = pending_index(netbk->pending_cons++);
+ pending_idx = netbk->pending_ring[index];
+-
+- gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
++#ifdef HAVE_XEN_PAGE_FOREIGN
++ gnttab_set_map_op(gop++, idx_to_kaddr(netbk, pending_idx),
+ GNTMAP_host_map | GNTMAP_readonly,
+ txp->gref, netif->domid);
++#else
++ page = netif_alloc_page(netbk, skb, pending_idx);
++ if (!page)
++ return NULL;
++
++ netbk->mmap_pages[pending_idx] = page;
++
++ gop->source.u.ref = txp->gref;
++ gop->source.domid = netif->domid;
++ gop->source.offset = txp->offset;
++
++ gop->dest.u.gmfn = virt_to_mfn(page_address(page));
++ gop->dest.domid = DOMID_SELF;
++ gop->dest.offset = txp->offset;
++
++ gop->len = txp->size;
++ gop->flags = GNTCOPY_source_gref;
++
++ gop++;
++#endif
+
+ memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+ netif_get(netif);
+@@ -1095,14 +1155,24 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+ frags[i].page = (void *)pending_idx;
+ }
+
+- return mop;
++ return gop;
+ }
+
+-static int netbk_tx_check_mop(struct xen_netbk *netbk,
++#ifdef HAVE_XEN_PAGE_FOREIGN
++static int netbk_tx_check_gop(struct xen_netbk *netbk,
+ struct sk_buff *skb,
+- struct gnttab_map_grant_ref **mopp)
++ struct gnttab_map_grant_ref **gopp)
++#else
++static int netbk_tx_check_gop(struct xen_netbk *netbk,
++ struct sk_buff *skb,
++ struct gnttab_copy **gopp)
++#endif
+ {
+- struct gnttab_map_grant_ref *mop = *mopp;
++#ifdef HAVE_XEN_PAGE_FOREIGN
++ struct gnttab_map_grant_ref *gop = *gopp;
++#else
++ struct gnttab_copy *gop = *gopp;
++#endif
+ int pending_idx = *((u16 *)skb->data);
+ struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
+ struct xen_netif *netif = pending_tx_info[pending_idx].netif;
+@@ -1112,7 +1182,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
+ int i, err, start;
+
+ /* Check status of header. */
+- err = mop->status;
++ err = gop->status;
+ if (unlikely(err)) {
+ pending_ring_idx_t index;
+ index = pending_index(netbk->pending_prod++);
+@@ -1120,11 +1190,13 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
+ netbk->pending_ring[index] = pending_idx;
+ netif_put(netif);
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ } else {
+ set_phys_to_machine(
+ __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
+- FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+- netbk->grant_tx_handle[pending_idx] = mop->handle;
++ FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
++ netbk->grant_tx_handle[pending_idx] = gop->handle;
++#endif
+ }
+
+ /* Skip first skb fragment if it is on same page as header fragment. */
+@@ -1137,14 +1209,16 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
+ pending_idx = (unsigned long)shinfo->frags[i].page;
+
+ /* Check error status: if okay then remember grant handle. */
+- newerr = (++mop)->status;
++ newerr = (++gop)->status;
+ if (likely(!newerr)) {
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ unsigned long addr;
+ addr = idx_to_kaddr(netbk, pending_idx);
+ set_phys_to_machine(
+ __pa(addr)>>PAGE_SHIFT,
+- FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+- netbk->grant_tx_handle[pending_idx] = mop->handle;
++ FOREIGN_FRAME(gop->dev_bus_addr>>PAGE_SHIFT));
++ netbk->grant_tx_handle[pending_idx] = gop->handle;
++#endif
+ /* Had a previous error? Invalidate this fragment. */
+ if (unlikely(err))
+ netif_idx_release(netbk, pending_idx);
+@@ -1174,7 +1248,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
+ err = newerr;
+ }
+
+- *mopp = mop + 1;
++ *gopp = gop + 1;
+ return err;
+ }
+
+@@ -1190,10 +1264,11 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+ unsigned long pending_idx;
+
+ pending_idx = (unsigned long)frag->page;
+-
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ netbk->pending_inuse[pending_idx].alloc_time = jiffies;
+ list_add_tail(&netbk->pending_inuse[pending_idx].list,
+ &netbk->pending_inuse_head);
++#endif
+
+ txp = &netbk->pending_tx_info[pending_idx].req;
+ frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
+@@ -1203,6 +1278,10 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+ skb->len += txp->size;
+ skb->data_len += txp->size;
+ skb->truesize += txp->size;
++
++ /* Take an extra reference to offset netif_idx_release */
++ get_page(netbk->mmap_pages[pending_idx]);
++ netif_idx_release(netbk, pending_idx);
+ }
+ }
+
+@@ -1330,18 +1409,24 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
+ return false;
+ }
+
+-static unsigned net_tx_build_mops(struct xen_netbk *netbk)
++static unsigned net_tx_build_gops(struct xen_netbk *netbk)
+ {
+- struct gnttab_map_grant_ref *mop;
++#ifdef HAVE_XEN_PAGE_FOREIGN
++ struct gnttab_map_grant_ref *gop = netbk->tx_map_ops, *request_gop;
++#else
++ struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
++#endif
+ struct sk_buff *skb;
+ int ret;
+
+- mop = netbk->tx_map_ops;
+ while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ !list_empty(&netbk->net_schedule_list)) {
+ struct xen_netif *netif;
+ struct xen_netif_tx_request txreq;
+ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
++#ifndef HAVE_XEN_PAGE_FOREIGN
++ struct page *page;
++#endif
+ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
+ u16 pending_idx;
+ RING_IDX idx;
+@@ -1438,10 +1523,35 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+ }
+ }
+
+- gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
++#ifdef HAVE_XEN_PAGE_FOREIGN
++ gnttab_set_map_op(gop, idx_to_kaddr(netbk, pending_idx),
+ GNTMAP_host_map | GNTMAP_readonly,
+ txreq.gref, netif->domid);
+- mop++;
++ gop++;
++#else
++ /* XXX could copy straight to head */
++ page = netif_alloc_page(netbk, skb, pending_idx);
++ if (!page) {
++ kfree_skb(skb);
++ netbk_tx_err(netif, &txreq, idx);
++ continue;
++ }
++
++ netbk->mmap_pages[pending_idx] = page;
++
++ gop->source.u.ref = txreq.gref;
++ gop->source.domid = netif->domid;
++ gop->source.offset = txreq.offset;
++
++ gop->dest.u.gmfn = virt_to_mfn(page_address(page));
++ gop->dest.domid = DOMID_SELF;
++ gop->dest.offset = txreq.offset;
++
++ gop->len = txreq.size;
++ gop->flags = GNTCOPY_source_gref;
++
++ gop++;
++#endif
+
+ memcpy(&netbk->pending_tx_info[pending_idx].req,
+ &txreq, sizeof(txreq));
+@@ -1464,24 +1574,43 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+
+ netbk->pending_cons++;
+
+- mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
++ request_gop = netbk_get_requests(netbk, netif,
++ skb, txfrags, gop);
++ if (request_gop == NULL) {
++ kfree_skb(skb);
++ netbk_tx_err(netif, &txreq, idx);
++ continue;
++ }
++ gop = request_gop;
+
+ netif->tx.req_cons = idx;
+ netif_schedule_work(netif);
+
+- if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
++#ifdef HAVE_XEN_PAGE_FOREIGN
++ if ((gop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
+ break;
++#else
++ if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
++ break;
++#endif
+ }
+
+- return mop - netbk->tx_map_ops;
++#ifdef HAVE_XEN_PAGE_FOREIGN
++ return gop - netbk->tx_map_ops;
++#else
++ return gop - netbk->tx_copy_ops;
++#endif
+ }
+
+ static void net_tx_submit(struct xen_netbk *netbk)
+ {
+- struct gnttab_map_grant_ref *mop;
++#ifdef HAVE_XEN_PAGE_FOREIGN
++ struct gnttab_map_grant_ref *gop = netbk->tx_map_ops;
++#else
++ struct gnttab_copy *gop = netbk->tx_copy_ops;
++#endif
+ struct sk_buff *skb;
+
+- mop = netbk->tx_map_ops;
+ while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
+ struct xen_netif_tx_request *txp;
+ struct xen_netif *netif;
+@@ -1493,7 +1622,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ txp = &netbk->pending_tx_info[pending_idx].req;
+
+ /* Check the remap error code. */
+- if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
++ if (unlikely(netbk_tx_check_gop(netbk, skb, &gop))) {
+ pr_debug("netback grant failed.\n");
+ skb_shinfo(skb)->nr_frags = 0;
+ kfree_skb(skb);
+@@ -1545,12 +1674,14 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ continue;
+ }
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+ unlikely(skb_linearize(skb))) {
+ DPRINTK("Can't linearize skb in net_tx_action.\n");
+ kfree_skb(skb);
+ continue;
+ }
++#endif
+
+ netif->stats.rx_bytes += skb->len;
+ netif->stats.rx_packets++;
+@@ -1564,21 +1695,31 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ static void net_tx_action(unsigned long data)
+ {
+ struct xen_netbk *netbk = (struct xen_netbk *)data;
+- unsigned nr_mops;
++ unsigned nr_gops;
+ int ret;
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ net_tx_action_dealloc(netbk);
++#endif
+
+- nr_mops = net_tx_build_mops(netbk);
++ nr_gops = net_tx_build_gops(netbk);
+
+- if (nr_mops == 0)
++#ifdef HAVE_XEN_PAGE_FOREIGN
++ if (nr_gops == 0)
+ goto out;
+-
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+- netbk->tx_map_ops, nr_mops);
++ netbk->tx_map_ops, nr_gops);
++#else
++ if (nr_gops == 0)
++ return;
++ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
++ netbk->tx_copy_ops, nr_gops);
++#endif
+ BUG_ON(ret);
+
+ net_tx_submit(netbk);
++
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ out:
+ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+ !list_empty(&netbk->pending_inuse_head)) {
+@@ -1589,8 +1730,10 @@ out:
+ mod_timer(&netbk->netbk_tx_pending_timer,
+ oldest->alloc_time + HZ);
+ }
++#endif
+ }
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+ {
+ static DEFINE_SPINLOCK(_lock);
+@@ -1618,6 +1761,34 @@ static void netif_page_release(struct page *page, unsigned int order)
+
+ netif_idx_release(&xen_netbk[group], idx);
+ }
++#else
++static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
++{
++ struct xen_netif *netif;
++ struct pending_tx_info *pending_tx_info;
++ pending_ring_idx_t index;
++
++ /* Already complete? */
++ if (netbk->mmap_pages[pending_idx] == NULL)
++ return;
++
++ pending_tx_info = &netbk->pending_tx_info[pending_idx];
++
++ netif = pending_tx_info->netif;
++
++ make_tx_response(netif, &pending_tx_info->req,
++ NETIF_RSP_OKAY);
++
++ index = pending_index(netbk->pending_prod++);
++ netbk->pending_ring[index] = pending_idx;
++
++ netif_put(netif);
++
++ netbk->mmap_pages[pending_idx]->mapping = 0;
++ put_page(netbk->mmap_pages[pending_idx]);
++ netbk->mmap_pages[pending_idx] = NULL;
++}
++#endif
+
+ irqreturn_t netif_be_int(int irq, void *dev_id)
+ {
+@@ -1735,12 +1906,14 @@ static inline int rx_work_todo(struct xen_netbk *netbk)
+
+ static inline int tx_work_todo(struct xen_netbk *netbk)
+ {
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ if (netbk->dealloc_cons != netbk->dealloc_prod)
+ return 1;
+
+ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+ !list_empty(&netbk->pending_inuse_head))
+ return 1;
++#endif
+
+ if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ !list_empty(&netbk->net_schedule_list))
+@@ -1775,7 +1948,9 @@ static int netbk_action_thread(void *data)
+ static int __init netback_init(void)
+ {
+ int i;
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ struct page *page;
++#endif
+ int rc = 0;
+ int group;
+
+@@ -1799,11 +1974,14 @@ static int __init netback_init(void)
+ netbk->net_timer.data = (unsigned long)netbk;
+ netbk->net_timer.function = net_alarm;
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ init_timer(&netbk->netbk_tx_pending_timer);
+ netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
+ netbk->netbk_tx_pending_timer.function =
+ netbk_tx_pending_timeout;
++#endif
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ netbk->mmap_pages =
+ alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+ if (!netbk->mmap_pages) {
+@@ -1817,9 +1995,10 @@ static int __init netback_init(void)
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
+ page = netbk->mmap_pages[i];
+ SetPageForeign(page, netif_page_release);
+- netif_set_page_ext(page, group, i);
++ netif_set_page_ext(page, netbk, i);
+ INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+ }
++#endif
+
+ netbk->pending_cons = 0;
+ netbk->pending_prod = MAX_PENDING_REQS;
+@@ -1838,9 +2017,11 @@ static int __init netback_init(void)
+ } else {
+ printk(KERN_ALERT
+ "kthread_run() fails at netback\n");
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ free_empty_pages_and_pagevec(netbk->mmap_pages,
+ MAX_PENDING_REQS);
+ del_timer(&netbk->netbk_tx_pending_timer);
++#endif
+ del_timer(&netbk->net_timer);
+ rc = PTR_ERR(netbk->kthread.task);
+ goto failed_init;
+@@ -1854,17 +2035,19 @@ static int __init netback_init(void)
+ (unsigned long)netbk);
+ }
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ INIT_LIST_HEAD(&netbk->pending_inuse_head);
++#endif
+ INIT_LIST_HEAD(&netbk->net_schedule_list);
+
+ spin_lock_init(&netbk->net_schedule_list_lock);
+
+ atomic_set(&netbk->netfront_count, 0);
+-
+ if (MODPARM_netback_kthread)
+ wake_up_process(netbk->kthread.task);
+ }
+
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+ if (MODPARM_copy_skb) {
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+@@ -1873,6 +2056,7 @@ static int __init netback_init(void)
+ else
+ netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+ }
++#endif
+
+ rc = netif_xenbus_init();
+ if (rc)
+@@ -1892,9 +2076,17 @@ static int __init netback_init(void)
+ failed_init:
+ for (i = 0; i < group; i++) {
+ struct xen_netbk *netbk = &xen_netbk[i];
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ free_empty_pages_and_pagevec(netbk->mmap_pages,
+ MAX_PENDING_REQS);
+ del_timer(&netbk->netbk_tx_pending_timer);
++#else
++ int j;
++ for (j = 0; j < MAX_PENDING_REQS; j++) {
++ if (netbk->mmap_pages[i])
++ __free_page(netbk->mmap_pages[i]);
++ }
++#endif
+ del_timer(&netbk->net_timer);
+ if (MODPARM_netback_kthread)
+ kthread_stop(netbk->kthread.task);
+diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
+index 640c696..f6bf50e 100644
+--- a/drivers/net/xen-netback/xenbus.c
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -64,8 +64,10 @@ static int netback_probe(struct xenbus_device *dev,
+ dev_set_drvdata(&dev->dev, be);
+
+ sg = 1;
++#ifdef HAVE_XEN_PAGE_FOREIGN
+ if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
+ sg = 0;
++#endif
+
+ do {
+ err = xenbus_transaction_start(&xbt);
+--
+1.7.3.4
+
+
+From 03ff29cd511480cae999d204ec068ee72075edcc Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 19 Jan 2011 09:43:44 +0000
+Subject: [PATCH 124/139] xen: netback: completely drop foreign page support
+
+for i in drivers/net/xen-netback/*.[ch] ; do
+ echo $i
+ ./scripts/unifdef -B -UHAVE_XEN_PAGE_FOREIGN $i > $i.unifdef
+ mv $i.unifdef $i
+done
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/net/xen-netback/common.h | 46 -----
+ drivers/net/xen-netback/interface.c | 47 -----
+ drivers/net/xen-netback/netback.c | 345 -----------------------------------
+ drivers/net/xen-netback/xenbus.c | 4 -
+ 4 files changed, 0 insertions(+), 442 deletions(-)
+
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+index f45bac8..2d55ed6 100644
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -93,11 +93,6 @@ struct xen_netif {
+ unsigned long remaining_credit;
+ struct timer_list credit_timeout;
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- /* Statistics */
+- int nr_copied_skbs;
+-#endif
+-
+ /* Miscellaneous private stuff. */
+ struct list_head list; /* scheduling list */
+ atomic_t refcnt;
+@@ -119,16 +114,6 @@ struct xen_netif {
+ #define netback_carrier_off(netif) ((netif)->carrier = 0)
+ #define netback_carrier_ok(netif) ((netif)->carrier)
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-enum {
+- NETBK_DONT_COPY_SKB,
+- NETBK_DELAYED_COPY_SKB,
+- NETBK_ALWAYS_COPY_SKB,
+-};
+-
+-extern int netbk_copy_skb_mode;
+-#endif
+-
+ struct backend_info {
+ struct xenbus_device *dev;
+ struct xen_netif *netif;
+@@ -195,13 +180,6 @@ struct netbk_rx_meta {
+ int gso_size;
+ };
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-struct netbk_tx_pending_inuse {
+- struct list_head list;
+- unsigned long alloc_time;
+-};
+-#endif
+-
+ #define MAX_PENDING_REQS 256
+
+ #define MAX_BUFFER_OFFSET PAGE_SIZE
+@@ -238,24 +216,11 @@ struct xen_netbk {
+ struct sk_buff_head tx_queue;
+
+ struct timer_list net_timer;
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- struct timer_list netbk_tx_pending_timer;
+-#endif
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- struct page **mmap_pages;
+-#else
+ struct page *mmap_pages[MAX_PENDING_REQS];
+-#endif
+
+ pending_ring_idx_t pending_prod;
+ pending_ring_idx_t pending_cons;
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- pending_ring_idx_t dealloc_prod;
+- pending_ring_idx_t dealloc_cons;
+-
+- struct list_head pending_inuse_head;
+-#endif
+ struct list_head net_schedule_list;
+
+ /* Protect the net_schedule_list in netif. */
+@@ -264,20 +229,9 @@ struct xen_netbk {
+ atomic_t netfront_count;
+
+ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+- struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+- struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+-
+- grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+-#else
+ struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
+-#endif
+
+ u16 pending_ring[MAX_PENDING_REQS];
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- u16 dealloc_ring[MAX_PENDING_REQS];
+-#endif
+
+ /*
+ * Each head or fragment can be up to 4096 bytes. Given
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+index 3ff3aff..eae5cf8 100644
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -186,48 +186,6 @@ static void netbk_get_drvinfo(struct net_device *dev,
+ strcpy(info->bus_info, dev_name(dev->dev.parent));
+ }
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static const struct netif_stat {
+- char name[ETH_GSTRING_LEN];
+- u16 offset;
+-} netbk_stats[] = {
+- { "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
+-};
+-
+-static int netbk_get_sset_count(struct net_device *dev, int string_set)
+-{
+- switch (string_set) {
+- case ETH_SS_STATS:
+- return ARRAY_SIZE(netbk_stats);
+- default:
+- return -EINVAL;
+- }
+-}
+-
+-static void netbk_get_ethtool_stats(struct net_device *dev,
+- struct ethtool_stats *stats, u64 * data)
+-{
+- void *netif = netdev_priv(dev);
+- int i;
+-
+- for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+- data[i] = *(int *)(netif + netbk_stats[i].offset);
+-}
+-
+-static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
+-{
+- int i;
+-
+- switch (stringset) {
+- case ETH_SS_STATS:
+- for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+- memcpy(data + i * ETH_GSTRING_LEN,
+- netbk_stats[i].name, ETH_GSTRING_LEN);
+- break;
+- }
+-}
+-#endif
+-
+ static struct ethtool_ops network_ethtool_ops = {
+ .get_drvinfo = netbk_get_drvinfo,
+
+@@ -239,11 +197,6 @@ static struct ethtool_ops network_ethtool_ops = {
+ .set_tso = netbk_set_tso,
+ .get_link = ethtool_op_get_link,
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- .get_sset_count = netbk_get_sset_count,
+- .get_ethtool_stats = netbk_get_ethtool_stats,
+- .get_strings = netbk_get_strings,
+-#endif
+ };
+
+ static struct net_device_ops netback_ops = {
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+index 6a1aa5c..b7b9341 100644
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -40,9 +40,6 @@
+
+ #include <net/tcp.h>
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-#include <xen/balloon.h>
+-#endif
+ #include <xen/events.h>
+ #include <xen/interface/memory.h>
+
+@@ -99,11 +96,6 @@ static int netif_get_page_ext(struct page *pg,
+ struct xen_netbk *netbk;
+ unsigned int group, idx;
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- if (!PageForeign(pg))
+- return 0;
+-#endif
+-
+ group = ext.e.group - 1;
+
+ if (group < 0 || group >= xen_netbk_group_nr)
+@@ -111,11 +103,6 @@ static int netif_get_page_ext(struct page *pg,
+
+ netbk = &xen_netbk[group];
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- if (netbk->mmap_pages == NULL)
+- return 0;
+-#endif
+-
+ idx = ext.e.idx;
+
+ if ((idx < 0) || (idx >= MAX_PENDING_REQS))
+@@ -151,15 +138,6 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
+ netbk->pending_prod + netbk->pending_cons;
+ }
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-/* Setting this allows the safe use of this driver without netloop. */
+-static int MODPARM_copy_skb = 1;
+-module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
+-MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+-
+-int netbk_copy_skb_mode;
+-#endif
+-
+ static int MODPARM_netback_kthread;
+ module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
+ MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
+@@ -788,14 +766,6 @@ static void net_alarm(unsigned long data)
+ xen_netbk_bh_handler(netbk, 1);
+ }
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static void netbk_tx_pending_timeout(unsigned long data)
+-{
+- struct xen_netbk *netbk = (struct xen_netbk *)data;
+- xen_netbk_bh_handler(netbk, 0);
+-}
+-#endif
+-
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+ {
+ struct xen_netif *netif = netdev_priv(dev);
+@@ -903,123 +873,6 @@ static void tx_credit_callback(unsigned long data)
+ netif_schedule_work(netif);
+ }
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static inline int copy_pending_req(struct xen_netbk *netbk,
+- pending_ring_idx_t pending_idx)
+-{
+- return gnttab_copy_grant_page(
+- netbk->grant_tx_handle[pending_idx],
+- &netbk->mmap_pages[pending_idx]);
+-}
+-#endif
+-
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+-{
+- struct netbk_tx_pending_inuse *inuse, *n;
+- struct gnttab_unmap_grant_ref *gop;
+- u16 pending_idx;
+- pending_ring_idx_t dc, dp;
+- struct xen_netif *netif;
+- int ret;
+- LIST_HEAD(list);
+-
+- dc = netbk->dealloc_cons;
+- gop = netbk->tx_unmap_ops;
+-
+- /* Free up any grants we have finished using. */
+- do {
+- dp = netbk->dealloc_prod;
+-
+- /* Ensure we see all indices enqueued by netif_idx_release(). */
+- smp_rmb();
+-
+- while (dc != dp) {
+- unsigned long pfn;
+- struct netbk_tx_pending_inuse *pending_inuse =
+- netbk->pending_inuse;
+-
+- pending_idx = netbk->dealloc_ring[pending_index(dc++)];
+- list_move_tail(&pending_inuse[pending_idx].list, &list);
+-
+- pfn = idx_to_pfn(netbk, pending_idx);
+- /* Already unmapped? */
+- if (!phys_to_machine_mapping_valid(pfn))
+- continue;
+-
+- gnttab_set_unmap_op(gop,
+- idx_to_kaddr(netbk, pending_idx),
+- GNTMAP_host_map,
+- netbk->grant_tx_handle[pending_idx]);
+- gop++;
+- }
+-
+- } while (dp != netbk->dealloc_prod);
+-
+- netbk->dealloc_cons = dc;
+-
+- ret = HYPERVISOR_grant_table_op(
+- GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
+- gop - netbk->tx_unmap_ops);
+- BUG_ON(ret);
+-
+- /*
+- * Copy any entries that have been pending for too long
+- */
+- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+- !list_empty(&netbk->pending_inuse_head)) {
+- list_for_each_entry_safe(inuse, n,
+- &netbk->pending_inuse_head, list) {
+- struct pending_tx_info *pending_tx_info;
+- pending_tx_info = netbk->pending_tx_info;
+-
+- if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+- break;
+-
+- pending_idx = inuse - netbk->pending_inuse;
+-
+- pending_tx_info[pending_idx].netif->nr_copied_skbs++;
+-
+- switch (copy_pending_req(netbk, pending_idx)) {
+- case 0:
+- list_move_tail(&inuse->list, &list);
+- continue;
+- case -EBUSY:
+- list_del_init(&inuse->list);
+- continue;
+- case -ENOENT:
+- continue;
+- }
+-
+- break;
+- }
+- }
+-
+- list_for_each_entry_safe(inuse, n, &list, list) {
+- struct pending_tx_info *pending_tx_info;
+- pending_ring_idx_t index;
+-
+- pending_tx_info = netbk->pending_tx_info;
+- pending_idx = inuse - netbk->pending_inuse;
+-
+- netif = pending_tx_info[pending_idx].netif;
+-
+- make_tx_response(netif, &pending_tx_info[pending_idx].req,
+- NETIF_RSP_OKAY);
+-
+- /* Ready for next use. */
+- gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
+-
+- index = pending_index(netbk->pending_prod++);
+- netbk->pending_ring[index] = pending_idx;
+-
+- netif_put(netif);
+-
+- list_del_init(&inuse->list);
+- }
+-}
+-#endif
+-
+ static void netbk_tx_err(struct xen_netif *netif,
+ struct xen_netif_tx_request *txp, RING_IDX end)
+ {
+@@ -1077,7 +930,6 @@ static int netbk_count_requests(struct xen_netif *netif,
+ return frags;
+ }
+
+-#ifndef HAVE_XEN_PAGE_FOREIGN
+ static struct page *netif_alloc_page(struct xen_netbk *netbk,
+ struct sk_buff *skb,
+ unsigned long pending_idx)
+@@ -1090,21 +942,12 @@ static struct page *netif_alloc_page(struct xen_netbk *netbk,
+ netbk->mmap_pages[pending_idx] = page;
+ return page;
+ }
+-#endif
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+- struct xen_netif *netif,
+- struct sk_buff *skb,
+- struct xen_netif_tx_request *txp,
+- struct gnttab_map_grant_ref *gop)
+-#else
+ static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk,
+ struct xen_netif *netif,
+ struct sk_buff *skb,
+ struct xen_netif_tx_request *txp,
+ struct gnttab_copy *gop)
+-#endif
+ {
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ skb_frag_t *frags = shinfo->frags;
+@@ -1115,20 +958,13 @@ static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk,
+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+ for (i = start; i < shinfo->nr_frags; i++, txp++) {
+-#ifndef HAVE_XEN_PAGE_FOREIGN
+ struct page *page;
+-#endif
+ pending_ring_idx_t index;
+ struct pending_tx_info *pending_tx_info =
+ netbk->pending_tx_info;
+
+ index = pending_index(netbk->pending_cons++);
+ pending_idx = netbk->pending_ring[index];
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- gnttab_set_map_op(gop++, idx_to_kaddr(netbk, pending_idx),
+- GNTMAP_host_map | GNTMAP_readonly,
+- txp->gref, netif->domid);
+-#else
+ page = netif_alloc_page(netbk, skb, pending_idx);
+ if (!page)
+ return NULL;
+@@ -1147,7 +983,6 @@ static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk,
+ gop->flags = GNTCOPY_source_gref;
+
+ gop++;
+-#endif
+
+ memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+ netif_get(netif);
+@@ -1158,21 +993,11 @@ static struct gnttab_copy *netbk_get_requests(struct xen_netbk *netbk,
+ return gop;
+ }
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static int netbk_tx_check_gop(struct xen_netbk *netbk,
+- struct sk_buff *skb,
+- struct gnttab_map_grant_ref **gopp)
+-#else
+ static int netbk_tx_check_gop(struct xen_netbk *netbk,
+ struct sk_buff *skb,
+ struct gnttab_copy **gopp)
+-#endif
+ {
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- struct gnttab_map_grant_ref *gop = *gopp;
+-#else
+ struct gnttab_copy *gop = *gopp;
+-#endif
+ int pending_idx = *((u16 *)skb->data);
+ struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
+ struct xen_netif *netif = pending_tx_info[pending_idx].netif;
+@@ -1190,13 +1015,6 @@ static int netbk_tx_check_gop(struct xen_netbk *netbk,
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
+ netbk->pending_ring[index] = pending_idx;
+ netif_put(netif);
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- } else {
+- set_phys_to_machine(
+- __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
+- FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
+- netbk->grant_tx_handle[pending_idx] = gop->handle;
+-#endif
+ }
+
+ /* Skip first skb fragment if it is on same page as header fragment. */
+@@ -1211,14 +1029,6 @@ static int netbk_tx_check_gop(struct xen_netbk *netbk,
+ /* Check error status: if okay then remember grant handle. */
+ newerr = (++gop)->status;
+ if (likely(!newerr)) {
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- unsigned long addr;
+- addr = idx_to_kaddr(netbk, pending_idx);
+- set_phys_to_machine(
+- __pa(addr)>>PAGE_SHIFT,
+- FOREIGN_FRAME(gop->dev_bus_addr>>PAGE_SHIFT));
+- netbk->grant_tx_handle[pending_idx] = gop->handle;
+-#endif
+ /* Had a previous error? Invalidate this fragment. */
+ if (unlikely(err))
+ netif_idx_release(netbk, pending_idx);
+@@ -1264,11 +1074,6 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+ unsigned long pending_idx;
+
+ pending_idx = (unsigned long)frag->page;
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- netbk->pending_inuse[pending_idx].alloc_time = jiffies;
+- list_add_tail(&netbk->pending_inuse[pending_idx].list,
+- &netbk->pending_inuse_head);
+-#endif
+
+ txp = &netbk->pending_tx_info[pending_idx].req;
+ frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
+@@ -1411,11 +1216,7 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
+
+ static unsigned net_tx_build_gops(struct xen_netbk *netbk)
+ {
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- struct gnttab_map_grant_ref *gop = netbk->tx_map_ops, *request_gop;
+-#else
+ struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
+-#endif
+ struct sk_buff *skb;
+ int ret;
+
+@@ -1424,9 +1225,7 @@ static unsigned net_tx_build_gops(struct xen_netbk *netbk)
+ struct xen_netif *netif;
+ struct xen_netif_tx_request txreq;
+ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+-#ifndef HAVE_XEN_PAGE_FOREIGN
+ struct page *page;
+-#endif
+ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
+ u16 pending_idx;
+ RING_IDX idx;
+@@ -1523,12 +1322,6 @@ static unsigned net_tx_build_gops(struct xen_netbk *netbk)
+ }
+ }
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- gnttab_set_map_op(gop, idx_to_kaddr(netbk, pending_idx),
+- GNTMAP_host_map | GNTMAP_readonly,
+- txreq.gref, netif->domid);
+- gop++;
+-#else
+ /* XXX could copy straight to head */
+ page = netif_alloc_page(netbk, skb, pending_idx);
+ if (!page) {
+@@ -1551,7 +1344,6 @@ static unsigned net_tx_build_gops(struct xen_netbk *netbk)
+ gop->flags = GNTCOPY_source_gref;
+
+ gop++;
+-#endif
+
+ memcpy(&netbk->pending_tx_info[pending_idx].req,
+ &txreq, sizeof(txreq));
+@@ -1586,29 +1378,16 @@ static unsigned net_tx_build_gops(struct xen_netbk *netbk)
+ netif->tx.req_cons = idx;
+ netif_schedule_work(netif);
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- if ((gop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
+- break;
+-#else
+ if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
+ break;
+-#endif
+ }
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- return gop - netbk->tx_map_ops;
+-#else
+ return gop - netbk->tx_copy_ops;
+-#endif
+ }
+
+ static void net_tx_submit(struct xen_netbk *netbk)
+ {
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- struct gnttab_map_grant_ref *gop = netbk->tx_map_ops;
+-#else
+ struct gnttab_copy *gop = netbk->tx_copy_ops;
+-#endif
+ struct sk_buff *skb;
+
+ while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
+@@ -1674,15 +1453,6 @@ static void net_tx_submit(struct xen_netbk *netbk)
+ continue;
+ }
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+- unlikely(skb_linearize(skb))) {
+- DPRINTK("Can't linearize skb in net_tx_action.\n");
+- kfree_skb(skb);
+- continue;
+- }
+-#endif
+-
+ netif->stats.rx_bytes += skb->len;
+ netif->stats.rx_packets++;
+
+@@ -1698,70 +1468,18 @@ static void net_tx_action(unsigned long data)
+ unsigned nr_gops;
+ int ret;
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- net_tx_action_dealloc(netbk);
+-#endif
+-
+ nr_gops = net_tx_build_gops(netbk);
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- if (nr_gops == 0)
+- goto out;
+- ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+- netbk->tx_map_ops, nr_gops);
+-#else
+ if (nr_gops == 0)
+ return;
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
+ netbk->tx_copy_ops, nr_gops);
+-#endif
+ BUG_ON(ret);
+
+ net_tx_submit(netbk);
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-out:
+- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+- !list_empty(&netbk->pending_inuse_head)) {
+- struct netbk_tx_pending_inuse *oldest;
+-
+- oldest = list_entry(netbk->pending_inuse_head.next,
+- struct netbk_tx_pending_inuse, list);
+- mod_timer(&netbk->netbk_tx_pending_timer,
+- oldest->alloc_time + HZ);
+- }
+-#endif
+ }
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+-{
+- static DEFINE_SPINLOCK(_lock);
+- unsigned long flags;
+- pending_ring_idx_t index;
+-
+- spin_lock_irqsave(&_lock, flags);
+- index = pending_index(netbk->dealloc_prod);
+- netbk->dealloc_ring[index] = pending_idx;
+- /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+- smp_wmb();
+- netbk->dealloc_prod++;
+- spin_unlock_irqrestore(&_lock, flags);
+-
+- xen_netbk_bh_handler(netbk, 0);
+-}
+-
+-static void netif_page_release(struct page *page, unsigned int order)
+-{
+- unsigned int group, idx;
+- int foreign = netif_get_page_ext(page, &group, &idx);
+-
+- BUG_ON(!foreign);
+- BUG_ON(order);
+-
+- netif_idx_release(&xen_netbk[group], idx);
+-}
+-#else
+ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+ {
+ struct xen_netif *netif;
+@@ -1788,7 +1506,6 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+ put_page(netbk->mmap_pages[pending_idx]);
+ netbk->mmap_pages[pending_idx] = NULL;
+ }
+-#endif
+
+ irqreturn_t netif_be_int(int irq, void *dev_id)
+ {
+@@ -1906,14 +1623,6 @@ static inline int rx_work_todo(struct xen_netbk *netbk)
+
+ static inline int tx_work_todo(struct xen_netbk *netbk)
+ {
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- if (netbk->dealloc_cons != netbk->dealloc_prod)
+- return 1;
+-
+- if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+- !list_empty(&netbk->pending_inuse_head))
+- return 1;
+-#endif
+
+ if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ !list_empty(&netbk->net_schedule_list))
+@@ -1948,9 +1657,6 @@ static int netbk_action_thread(void *data)
+ static int __init netback_init(void)
+ {
+ int i;
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- struct page *page;
+-#endif
+ int rc = 0;
+ int group;
+
+@@ -1974,32 +1680,6 @@ static int __init netback_init(void)
+ netbk->net_timer.data = (unsigned long)netbk;
+ netbk->net_timer.function = net_alarm;
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- init_timer(&netbk->netbk_tx_pending_timer);
+- netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
+- netbk->netbk_tx_pending_timer.function =
+- netbk_tx_pending_timeout;
+-#endif
+-
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- netbk->mmap_pages =
+- alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+- if (!netbk->mmap_pages) {
+- printk(KERN_ALERT "%s: out of memory\n", __func__);
+- del_timer(&netbk->netbk_tx_pending_timer);
+- del_timer(&netbk->net_timer);
+- rc = -ENOMEM;
+- goto failed_init;
+- }
+-
+- for (i = 0; i < MAX_PENDING_REQS; i++) {
+- page = netbk->mmap_pages[i];
+- SetPageForeign(page, netif_page_release);
+- netif_set_page_ext(page, netbk, i);
+- INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+- }
+-#endif
+-
+ netbk->pending_cons = 0;
+ netbk->pending_prod = MAX_PENDING_REQS;
+ for (i = 0; i < MAX_PENDING_REQS; i++)
+@@ -2017,11 +1697,6 @@ static int __init netback_init(void)
+ } else {
+ printk(KERN_ALERT
+ "kthread_run() fails at netback\n");
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- free_empty_pages_and_pagevec(netbk->mmap_pages,
+- MAX_PENDING_REQS);
+- del_timer(&netbk->netbk_tx_pending_timer);
+-#endif
+ del_timer(&netbk->net_timer);
+ rc = PTR_ERR(netbk->kthread.task);
+ goto failed_init;
+@@ -2035,9 +1710,6 @@ static int __init netback_init(void)
+ (unsigned long)netbk);
+ }
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- INIT_LIST_HEAD(&netbk->pending_inuse_head);
+-#endif
+ INIT_LIST_HEAD(&netbk->net_schedule_list);
+
+ spin_lock_init(&netbk->net_schedule_list_lock);
+@@ -2047,17 +1719,6 @@ static int __init netback_init(void)
+ wake_up_process(netbk->kthread.task);
+ }
+
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+- if (MODPARM_copy_skb) {
+- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+- NULL, 0))
+- netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
+- else
+- netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+- }
+-#endif
+-
+ rc = netif_xenbus_init();
+ if (rc)
+ goto failed_init;
+@@ -2076,17 +1737,11 @@ static int __init netback_init(void)
+ failed_init:
+ for (i = 0; i < group; i++) {
+ struct xen_netbk *netbk = &xen_netbk[i];
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- free_empty_pages_and_pagevec(netbk->mmap_pages,
+- MAX_PENDING_REQS);
+- del_timer(&netbk->netbk_tx_pending_timer);
+-#else
+ int j;
+ for (j = 0; j < MAX_PENDING_REQS; j++) {
+ if (netbk->mmap_pages[i])
+ __free_page(netbk->mmap_pages[i]);
+ }
+-#endif
+ del_timer(&netbk->net_timer);
+ if (MODPARM_netback_kthread)
+ kthread_stop(netbk->kthread.task);
+diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
+index f6bf50e..0aa3366 100644
+--- a/drivers/net/xen-netback/xenbus.c
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -64,10 +64,6 @@ static int netback_probe(struct xenbus_device *dev,
+ dev_set_drvdata(&dev->dev, be);
+
+ sg = 1;
+-#ifdef HAVE_XEN_PAGE_FOREIGN
+- if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
+- sg = 0;
+-#endif
+
+ do {
+ err = xenbus_transaction_start(&xbt);
+--
+1.7.3.4
+
+
+From 7f1492ef6a8026cba4c1b49b7a2030802f76ec83 Mon Sep 17 00:00:00 2001
+From: Ian Campbell <ian.campbell at citrix.com>
+Date: Wed, 19 Jan 2011 14:41:55 +0000
+Subject: [PATCH 125/139] xen: netback: drop ethtool drvinfo callback
+
+The default provided by the network core is sufficient for our needs.
+
+Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
+---
+ drivers/net/xen-netback/interface.c | 9 ---------
+ 1 files changed, 0 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+index eae5cf8..295ab3d 100644
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -179,16 +179,7 @@ static int netbk_set_tso(struct net_device *dev, u32 data)
+ return 0;
+ }
+
+-static void netbk_get_drvinfo(struct net_device *dev,
+- struct ethtool_drvinfo *info)
+-{
+- strcpy(info->driver, "netbk");
+- strcpy(info->bus_info, dev_name(dev->dev.parent));
+-}
+-
+ static struct ethtool_ops network_ethtool_ops = {
+- .get_drvinfo = netbk_get_drvinfo,
+-
+ .get_tx_csum = ethtool_op_get_tx_csum,
+ .set_tx_csum = netbk_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+--
+1.7.3.4
+
+
+From 83381aa69cf38fc26125019479527e0710fe27cd Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 15 Dec 2010 13:31:03 -0500
+Subject: [PATCH 126/139] ttm: Set VM_IO only on pages with TTM_MEMTYPE_FLAG_FIXED set.
+
+This patch is based on "[Patch RFC] ttm: nouveau accelerated on Xen
+pv-ops kernel"
+http://lists.freedesktop.org/archives/nouveau/2010-March/005326.html
+
+Under Xen, the PFN of page is virtualized. The physical addresses used
+for DMA programming needs to be the Machine Frame Number (MFN).
+Xen transparently does the correct translation using the _PAGE_IOMEM
+PTE bit. If the bit is set, Xen assumes that the backing memory is in
+the IOMEM space, and PFN equals MFN. If not set, page_to_pfn() returns
+a phantom MFN.
+
+The patch enables the ttm_bo_vm_fault() handler to behave correctly
+under Xen, and has no side-effects on normal (not under Xen) operations.
+
+The use of TTM_MEMTYPE_FLAG_FIXED in the check assumes that
+only pages which have this flag are backed by device memory or IO.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Signed-off-by: Arvind R <arvino55 at gmail.com>
+
+Conflicts:
+
+ drivers/gpu/drm/ttm/ttm_bo_vm.c
+---
+ drivers/gpu/drm/ttm/ttm_bo_vm.c | 7 ++++++-
+ 1 files changed, 6 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
+index 221b924..bb24374 100644
+--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
++++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
+@@ -247,6 +247,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
+ {
+ struct ttm_bo_driver *driver;
+ struct ttm_buffer_object *bo;
++ struct ttm_mem_type_manager *man;
+ int ret;
+
+ read_lock(&bdev->vm_lock);
+@@ -279,7 +280,11 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
+ */
+
+ vma->vm_private_data = bo;
+- vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
++ vma->vm_flags |= VM_RESERVED | VM_MIXEDMAP | VM_DONTEXPAND;
++ man = &bdev->man[bo->mem.mem_type];
++ if (man->flags & TTM_MEMTYPE_FLAG_FIXED)
++ vma->vm_flags |= VM_IO;
++ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+ return 0;
+ out_unref:
+ ttm_bo_unref(&bo);
+--
+1.7.3.4
+
+
+From 9c2e85765d147fc77ae27cb81a7091942f22a584 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 15 Dec 2010 13:32:15 -0500
+Subject: [PATCH 127/139] drm: recompute vma->vm_page_prot after changing vm_flags
+
+vm_get_page_prot() computes vm_page_prot depending on vm_flags, so
+we need to re-call it if we change flags.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+
+Conflicts:
+
+ drivers/gpu/drm/ttm/ttm_bo_vm.c
+---
+ drivers/gpu/drm/ttm/ttm_bo_vm.c | 1 +
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
+index bb24374..735dc1d 100644
+--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
++++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
+@@ -300,6 +300,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo)
+ vma->vm_ops = &ttm_bo_vm_ops;
+ vma->vm_private_data = ttm_bo_reference(bo);
+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
++ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+ return 0;
+ }
+ EXPORT_SYMBOL(ttm_fbdev_mmap);
+--
+1.7.3.4
+
+
+From 74632f8e51618dc31beba712d03dd0f1168cc241 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Fri, 20 Feb 2009 15:58:42 -0500
+Subject: [PATCH 128/139] x86: define arch_vm_get_page_prot to set _PAGE_IOMAP on VM_IO vmas
+
+Set _PAGE_IOMAP in ptes mapping a VM_IO vma. This says that the mapping
+is of a real piece of physical hardware, and not just system memory.
+
+Xen, in particular, uses to this to inhibit the normal pfn->mfn conversion
+that would normally happen - in other words, treat the address directly
+as a machine physical address without converting it from pseudo-physical.
+
+[ Impact: make VM_IO mappings map the right thing under Xen ]
+[ v2: rebased on v2.6.37-rc1]
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ arch/x86/include/asm/pgtable.h | 3 +++
+ arch/x86/mm/pgtable.c | 10 ++++++++++
+ 2 files changed, 13 insertions(+), 0 deletions(-)
+
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index 18601c8..284ee01 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -485,6 +485,9 @@ static inline unsigned long pages_to_mb(unsigned long npg)
+ #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+ remap_pfn_range(vma, vaddr, pfn, size, prot)
+
++#define arch_vm_get_page_prot arch_vm_get_page_prot
++extern pgprot_t arch_vm_get_page_prot(unsigned vm_flags);
++
+ #if PAGETABLE_LEVELS > 2
+ static inline int pud_none(pud_t pud)
+ {
+diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
+index 500242d..1e72207 100644
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -15,6 +15,16 @@
+
+ gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
+
++pgprot_t arch_vm_get_page_prot(unsigned vm_flags)
++{
++ pgprot_t ret = __pgprot(0);
++
++ if (vm_flags & VM_IO)
++ ret = __pgprot(_PAGE_IOMAP);
++
++ return ret;
++}
++
+ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+ {
+ return (pte_t *)__get_free_page(PGALLOC_GFP);
+--
+1.7.3.4
+
+
+From 81ec0e742ce919124909640039c05baa29b1568a Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Wed, 8 Dec 2010 11:03:27 -0800
+Subject: [PATCH 129/139] mm: remove unused "token" argument from apply_to_page_range callback.
+
+The argument is basically the struct page of the pte_t * passed into
+the callback. But there's no need to pass that, since it can be fairly
+easily derived from the pte_t * itself if needed (and no current users
+need to do that anyway).
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ arch/x86/xen/grant-table.c | 6 ++----
+ arch/x86/xen/mmu.c | 3 +--
+ include/linux/mm.h | 3 +--
+ mm/memory.c | 2 +-
+ mm/vmalloc.c | 2 +-
+ 5 files changed, 6 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
+index 49ba9b5..5bf892a 100644
+--- a/arch/x86/xen/grant-table.c
++++ b/arch/x86/xen/grant-table.c
+@@ -44,8 +44,7 @@
+
+ #include <asm/pgtable.h>
+
+-static int map_pte_fn(pte_t *pte, struct page *pmd_page,
+- unsigned long addr, void *data)
++static int map_pte_fn(pte_t *pte, unsigned long addr, void *data)
+ {
+ unsigned long **frames = (unsigned long **)data;
+
+@@ -54,8 +53,7 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page,
+ return 0;
+ }
+
+-static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
+- unsigned long addr, void *data)
++static int unmap_pte_fn(pte_t *pte, unsigned long addr, void *data)
+ {
+
+ set_pte_at(&init_mm, addr, pte, __pte(0));
+diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
+index 5e92b61..38ba804 100644
+--- a/arch/x86/xen/mmu.c
++++ b/arch/x86/xen/mmu.c
+@@ -2292,8 +2292,7 @@ struct remap_data {
+ struct mmu_update *mmu_update;
+ };
+
+-static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
+- unsigned long addr, void *data)
++static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
+ {
+ struct remap_data *rmd = data;
+ pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 956a355..bb898ec 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1529,8 +1529,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
+ #define FOLL_MLOCK 0x40 /* mark page as mlocked */
+ #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */
+
+-typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
+- void *data);
++typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
+ extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
+ unsigned long size, pte_fn_t fn, void *data);
+
+diff --git a/mm/memory.c b/mm/memory.c
+index 31250fa..740470c 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2032,7 +2032,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ token = pmd_pgtable(*pmd);
+
+ do {
+- err = fn(pte++, token, addr, data);
++ err = fn(pte++, addr, data);
+ if (err)
+ break;
+ } while (addr += PAGE_SIZE, addr != end);
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index f9b1667..5ddbdfe 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -2061,7 +2061,7 @@ void __attribute__((weak)) vmalloc_sync_all(void)
+ }
+
+
+-static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
++static int f(pte_t *pte, unsigned long addr, void *data)
+ {
+ /* apply_to_page_range() does all the hard work. */
+ return 0;
+--
+1.7.3.4
+
+
+From 7f635db45f8e921c9203fdfb904d0095b7af6480 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Tue, 30 Nov 2010 10:03:44 -0800
+Subject: [PATCH 130/139] mm: add apply_to_page_range_batch()
+
+apply_to_page_range() calls its callback function once for each pte, which
+is pretty inefficient since it will almost always be operating on a batch
+of adjacent ptes. apply_to_page_range_batch() calls its callback
+with both a pte_t * and a count, so it can operate on multiple ptes at
+once.
+
+The callback is expected to handle all its ptes, or return an error. For
+both apply_to_page_range and apply_to_page_range_batch, it is up to
+the caller to work out how much progress was made if either fails with
+an error.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ include/linux/mm.h | 6 +++++
+ mm/memory.c | 57 +++++++++++++++++++++++++++++++++++++--------------
+ 2 files changed, 47 insertions(+), 16 deletions(-)
+
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index bb898ec..5a32a8a 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1533,6 +1533,12 @@ typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
+ extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
+ unsigned long size, pte_fn_t fn, void *data);
+
++typedef int (*pte_batch_fn_t)(pte_t *pte, unsigned count,
++ unsigned long addr, void *data);
++extern int apply_to_page_range_batch(struct mm_struct *mm,
++ unsigned long address, unsigned long size,
++ pte_batch_fn_t fn, void *data);
++
+ #ifdef CONFIG_PROC_FS
+ void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
+ #else
+diff --git a/mm/memory.c b/mm/memory.c
+index 740470c..496e4e6 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2012,11 +2012,10 @@ EXPORT_SYMBOL(remap_pfn_range);
+
+ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+- pte_fn_t fn, void *data)
++ pte_batch_fn_t fn, void *data)
+ {
+ pte_t *pte;
+ int err;
+- pgtable_t token;
+ spinlock_t *uninitialized_var(ptl);
+
+ pte = (mm == &init_mm) ?
+@@ -2028,25 +2027,17 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ BUG_ON(pmd_huge(*pmd));
+
+ arch_enter_lazy_mmu_mode();
+-
+- token = pmd_pgtable(*pmd);
+-
+- do {
+- err = fn(pte++, addr, data);
+- if (err)
+- break;
+- } while (addr += PAGE_SIZE, addr != end);
+-
++ err = fn(pte, (end - addr) / PAGE_SIZE, addr, data);
+ arch_leave_lazy_mmu_mode();
+
+ if (mm != &init_mm)
+- pte_unmap_unlock(pte-1, ptl);
++ pte_unmap_unlock(pte, ptl);
+ return err;
+ }
+
+ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end,
+- pte_fn_t fn, void *data)
++ pte_batch_fn_t fn, void *data)
+ {
+ pmd_t *pmd;
+ unsigned long next;
+@@ -2068,7 +2059,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
+
+ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+- pte_fn_t fn, void *data)
++ pte_batch_fn_t fn, void *data)
+ {
+ pud_t *pud;
+ unsigned long next;
+@@ -2090,8 +2081,9 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ * Scan a region of virtual memory, filling in page tables as necessary
+ * and calling a provided function on each leaf page table.
+ */
+-int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+- unsigned long size, pte_fn_t fn, void *data)
++int apply_to_page_range_batch(struct mm_struct *mm,
++ unsigned long addr, unsigned long size,
++ pte_batch_fn_t fn, void *data)
+ {
+ pgd_t *pgd;
+ unsigned long next;
+@@ -2109,6 +2101,39 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+
+ return err;
+ }
++EXPORT_SYMBOL_GPL(apply_to_page_range_batch);
++
++struct pte_single_fn
++{
++ pte_fn_t fn;
++ void *data;
++};
++
++static int apply_pte_batch(pte_t *pte, unsigned count,
++ unsigned long addr, void *data)
++{
++ struct pte_single_fn *single = data;
++ int err = 0;
++
++ while (count--) {
++ err = single->fn(pte, addr, single->data);
++ if (err)
++ break;
++
++ addr += PAGE_SIZE;
++ pte++;
++ }
++
++ return err;
++}
++
++int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
++ unsigned long size, pte_fn_t fn, void *data)
++{
++ struct pte_single_fn single = { .fn = fn, .data = data };
++ return apply_to_page_range_batch(mm, addr, size,
++ apply_pte_batch, &single);
++}
+ EXPORT_SYMBOL_GPL(apply_to_page_range);
+
+ /*
+--
+1.7.3.4
+
+
+From dea51c0fa49689f6a489205c00ebf83c8e78f6cd Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 6 Dec 2010 12:26:53 -0800
+Subject: [PATCH 131/139] ioremap: use apply_to_page_range_batch() for ioremap_page_range()
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ lib/ioremap.c | 85 +++++++++++++++------------------------------------------
+ 1 files changed, 22 insertions(+), 63 deletions(-)
+
+diff --git a/lib/ioremap.c b/lib/ioremap.c
+index da4e2ad..e75d0d1 100644
+--- a/lib/ioremap.c
++++ b/lib/ioremap.c
+@@ -13,81 +13,40 @@
+ #include <asm/cacheflush.h>
+ #include <asm/pgtable.h>
+
+-static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
+- unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
++struct ioremap_data
+ {
+- pte_t *pte;
++ phys_addr_t phys_addr;
++ pgprot_t prot;
++};
++
++static int ioremap_pte_range(pte_t *pte, unsigned count,
++ unsigned long addr, void *v)
++{
++ struct ioremap_data *data = v;
+ u64 pfn;
+
+- pfn = phys_addr >> PAGE_SHIFT;
+- pte = pte_alloc_kernel(pmd, addr);
+- if (!pte)
+- return -ENOMEM;
+- do {
+- BUG_ON(!pte_none(*pte));
+- set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
+- pfn++;
+- } while (pte++, addr += PAGE_SIZE, addr != end);
+- return 0;
+-}
++ pfn = data->phys_addr >> PAGE_SHIFT;
++ data->phys_addr += count * PAGE_SIZE;
+
+-static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
+- unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+-{
+- pmd_t *pmd;
+- unsigned long next;
++ while (count--) {
++ BUG_ON(!pte_none(*pte));
+
+- phys_addr -= addr;
+- pmd = pmd_alloc(&init_mm, pud, addr);
+- if (!pmd)
+- return -ENOMEM;
+- do {
+- next = pmd_addr_end(addr, end);
+- if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot))
+- return -ENOMEM;
+- } while (pmd++, addr = next, addr != end);
+- return 0;
+-}
++ set_pte_at(&init_mm, addr, pte++, pfn_pte(pfn++, data->prot));
+
+-static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
+- unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+-{
+- pud_t *pud;
+- unsigned long next;
++ addr += PAGE_SIZE;
++ }
+
+- phys_addr -= addr;
+- pud = pud_alloc(&init_mm, pgd, addr);
+- if (!pud)
+- return -ENOMEM;
+- do {
+- next = pud_addr_end(addr, end);
+- if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot))
+- return -ENOMEM;
+- } while (pud++, addr = next, addr != end);
+ return 0;
+ }
+
+-int ioremap_page_range(unsigned long addr,
+- unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
++int ioremap_page_range(unsigned long addr, unsigned long end,
++ phys_addr_t phys_addr, pgprot_t prot)
+ {
+- pgd_t *pgd;
+- unsigned long start;
+- unsigned long next;
+- int err;
+-
+- BUG_ON(addr >= end);
+-
+- start = addr;
+- phys_addr -= addr;
+- pgd = pgd_offset_k(addr);
+- do {
+- next = pgd_addr_end(addr, end);
+- err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
+- if (err)
+- break;
+- } while (pgd++, addr = next, addr != end);
++ struct ioremap_data data = { .phys_addr = phys_addr, .prot = prot };
++ int err = apply_to_page_range_batch(&init_mm, addr, end - addr,
++ ioremap_pte_range, &data);
+
+- flush_cache_vmap(start, end);
++ flush_cache_vmap(addr, end);
+
+ return err;
+ }
+--
+1.7.3.4
+
+
+From 7a064a31021ba0b4adfc90061d7da2daa9b3d27e Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 29 Nov 2010 12:22:24 -0800
+Subject: [PATCH 132/139] vmalloc: use plain pte_clear() for unmaps
+
+ptep_get_and_clear() is potentially moderately expensive (at least
+an atomic operation, or potentially a trap-and-fault when virtualized)
+so use a plain pte_clear().
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ mm/vmalloc.c | 3 ++-
+ 1 files changed, 2 insertions(+), 1 deletions(-)
+
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index 5ddbdfe..c06dc1e 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -39,8 +39,9 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
+
+ pte = pte_offset_kernel(pmd, addr);
+ do {
+- pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
++ pte_t ptent = *pte;
+ WARN_ON(!pte_none(ptent) && !pte_present(ptent));
++ pte_clear(&init_mm, addr, pte);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ }
+
+--
+1.7.3.4
+
+
+From 334c14835ef823ce665eeebf6aad467064f47e47 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 29 Nov 2010 11:06:19 -0800
+Subject: [PATCH 133/139] vmalloc: use apply_to_page_range_batch() for vunmap_page_range()
+
+There's no need to open-code it when there's helpful utility function
+to do the job.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Cc: Nick Piggin <npiggin at kernel.dk>
+---
+ mm/vmalloc.c | 53 +++++++++--------------------------------------------
+ 1 files changed, 9 insertions(+), 44 deletions(-)
+
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index c06dc1e..e99aa3b 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -33,59 +33,24 @@
+
+ /*** Page table manipulation functions ***/
+
+-static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
++static int vunmap_pte(pte_t *pte, unsigned count,
++ unsigned long addr, void *data)
+ {
+- pte_t *pte;
+-
+- pte = pte_offset_kernel(pmd, addr);
+- do {
++ while (count--) {
+ pte_t ptent = *pte;
+- WARN_ON(!pte_none(ptent) && !pte_present(ptent));
+- pte_clear(&init_mm, addr, pte);
+- } while (pte++, addr += PAGE_SIZE, addr != end);
+-}
+-
+-static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
+-{
+- pmd_t *pmd;
+- unsigned long next;
+
+- pmd = pmd_offset(pud, addr);
+- do {
+- next = pmd_addr_end(addr, end);
+- if (pmd_none_or_clear_bad(pmd))
+- continue;
+- vunmap_pte_range(pmd, addr, next);
+- } while (pmd++, addr = next, addr != end);
+-}
++ WARN_ON(!pte_none(ptent) && !pte_present(ptent));
+
+-static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
+-{
+- pud_t *pud;
+- unsigned long next;
++ pte_clear(&init_mm, addr, pte++);
++ addr += PAGE_SIZE;
++ }
+
+- pud = pud_offset(pgd, addr);
+- do {
+- next = pud_addr_end(addr, end);
+- if (pud_none_or_clear_bad(pud))
+- continue;
+- vunmap_pmd_range(pud, addr, next);
+- } while (pud++, addr = next, addr != end);
++ return 0;
+ }
+
+ static void vunmap_page_range(unsigned long addr, unsigned long end)
+ {
+- pgd_t *pgd;
+- unsigned long next;
+-
+- BUG_ON(addr >= end);
+- pgd = pgd_offset_k(addr);
+- do {
+- next = pgd_addr_end(addr, end);
+- if (pgd_none_or_clear_bad(pgd))
+- continue;
+- vunmap_pud_range(pgd, addr, next);
+- } while (pgd++, addr = next, addr != end);
++ apply_to_page_range_batch(&init_mm, addr, end - addr, vunmap_pte, NULL);
+ }
+
+ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
+--
+1.7.3.4
+
+
+From 937b74f8d19f7e62d63d4e82c2cf21f3bd636d9e Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 29 Nov 2010 11:11:45 -0800
+Subject: [PATCH 134/139] vmalloc: use apply_to_page_range_batch() for vmap_page_range_noflush()
+
+There's no need to open-code it when there's a helpful utility
+function.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Cc: Nick Piggin <npiggin at kernel.dk>
+---
+ mm/vmalloc.c | 92 ++++++++++++++++++---------------------------------------
+ 1 files changed, 29 insertions(+), 63 deletions(-)
+
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index e99aa3b..cf4e705 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -53,63 +53,34 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
+ apply_to_page_range_batch(&init_mm, addr, end - addr, vunmap_pte, NULL);
+ }
+
+-static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
+- unsigned long end, pgprot_t prot, struct page **pages, int *nr)
++struct vmap_data
+ {
+- pte_t *pte;
++ struct page **pages;
++ unsigned index;
++ pgprot_t prot;
++};
+
+- /*
+- * nr is a running index into the array which helps higher level
+- * callers keep track of where we're up to.
+- */
++static int vmap_pte(pte_t *pte, unsigned count,
++ unsigned long addr, void *data)
++{
++ struct vmap_data *vmap = data;
+
+- pte = pte_alloc_kernel(pmd, addr);
+- if (!pte)
+- return -ENOMEM;
+- do {
+- struct page *page = pages[*nr];
++ while (count--) {
++ struct page *page = vmap->pages[vmap->index];
+
+ if (WARN_ON(!pte_none(*pte)))
+ return -EBUSY;
++
+ if (WARN_ON(!page))
+ return -ENOMEM;
+- set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
+- (*nr)++;
+- } while (pte++, addr += PAGE_SIZE, addr != end);
+- return 0;
+-}
+
+-static int vmap_pmd_range(pud_t *pud, unsigned long addr,
+- unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+-{
+- pmd_t *pmd;
+- unsigned long next;
+-
+- pmd = pmd_alloc(&init_mm, pud, addr);
+- if (!pmd)
+- return -ENOMEM;
+- do {
+- next = pmd_addr_end(addr, end);
+- if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
+- return -ENOMEM;
+- } while (pmd++, addr = next, addr != end);
+- return 0;
+-}
++ set_pte_at(&init_mm, addr, pte, mk_pte(page, vmap->prot));
+
+-static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+- unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+-{
+- pud_t *pud;
+- unsigned long next;
++ pte++;
++ addr += PAGE_SIZE;
++ vmap->index++;
++ }
+
+- pud = pud_alloc(&init_mm, pgd, addr);
+- if (!pud)
+- return -ENOMEM;
+- do {
+- next = pud_addr_end(addr, end);
+- if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
+- return -ENOMEM;
+- } while (pud++, addr = next, addr != end);
+ return 0;
+ }
+
+@@ -122,22 +93,17 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
+ pgprot_t prot, struct page **pages)
+ {
+- pgd_t *pgd;
+- unsigned long next;
+- unsigned long addr = start;
+- int err = 0;
+- int nr = 0;
+-
+- BUG_ON(addr >= end);
+- pgd = pgd_offset_k(addr);
+- do {
+- next = pgd_addr_end(addr, end);
+- err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
+- if (err)
+- return err;
+- } while (pgd++, addr = next, addr != end);
+-
+- return nr;
++ int err;
++ struct vmap_data vmap = {
++ .pages = pages,
++ .index = 0,
++ .prot = prot
++ };
++
++ err = apply_to_page_range_batch(&init_mm, start, end - start,
++ vmap_pte, &vmap);
++
++ return err ? err : vmap.index;
+ }
+
+ static int vmap_page_range(unsigned long start, unsigned long end,
+--
+1.7.3.4
+
+
+From d4205306bb6609275ad93a8d1bfb4de3d06d0eb5 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Wed, 1 Dec 2010 15:45:21 -0800
+Subject: [PATCH 135/139] vmalloc: use apply_to_page_range_batch() in alloc_vm_area()
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ mm/vmalloc.c | 8 ++++----
+ 1 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index cf4e705..64d395f 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -1993,9 +1993,9 @@ void __attribute__((weak)) vmalloc_sync_all(void)
+ }
+
+
+-static int f(pte_t *pte, unsigned long addr, void *data)
++static int f(pte_t *pte, unsigned count, unsigned long addr, void *data)
+ {
+- /* apply_to_page_range() does all the hard work. */
++ /* apply_to_page_range_batch() does all the hard work. */
+ return 0;
+ }
+
+@@ -2024,8 +2024,8 @@ struct vm_struct *alloc_vm_area(size_t size)
+ * This ensures that page tables are constructed for this region
+ * of kernel virtual address space and mapped into init_mm.
+ */
+- if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
+- area->size, f, NULL)) {
++ if (apply_to_page_range_batch(&init_mm, (unsigned long)area->addr,
++ area->size, f, NULL)) {
+ free_vm_area(area);
+ return NULL;
+ }
+--
+1.7.3.4
+
+
+From e35361f09bf25ecb5ba6877e44319de315b76f5e Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Wed, 1 Dec 2010 15:44:04 -0800
+Subject: [PATCH 136/139] xen/mmu: use apply_to_page_range_batch() in xen_remap_domain_mfn_range()
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ arch/x86/xen/mmu.c | 19 ++++++++++++-------
+ 1 files changed, 12 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
+index 38ba804..25da278 100644
+--- a/arch/x86/xen/mmu.c
++++ b/arch/x86/xen/mmu.c
+@@ -2292,14 +2292,19 @@ struct remap_data {
+ struct mmu_update *mmu_update;
+ };
+
+-static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
++static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned count,
++ unsigned long addr, void *data)
+ {
+ struct remap_data *rmd = data;
+- pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
+
+- rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
+- rmd->mmu_update->val = pte_val_ma(pte);
+- rmd->mmu_update++;
++ while (count--) {
++ pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
++
++ rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
++ rmd->mmu_update->val = pte_val_ma(pte);
++ rmd->mmu_update++;
++ ptep++;
++ }
+
+ return 0;
+ }
+@@ -2328,8 +2333,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+ range = (unsigned long)batch << PAGE_SHIFT;
+
+ rmd.mmu_update = mmu_update;
+- err = apply_to_page_range(vma->vm_mm, addr, range,
+- remap_area_mfn_pte_fn, &rmd);
++ err = apply_to_page_range_batch(vma->vm_mm, addr, range,
++ remap_area_mfn_pte_fn, &rmd);
+ if (err)
+ goto out;
+
+--
+1.7.3.4
+
+
+From 02533b01d70f7cbbe3cf47de3f27740ab334a11f Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Wed, 1 Dec 2010 15:50:12 -0800
+Subject: [PATCH 137/139] xen/grant-table: use apply_to_page_range_batch()
+
+No need to call the callback per-pte.
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ arch/x86/xen/grant-table.c | 28 ++++++++++++++++++----------
+ 1 files changed, 18 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
+index 5bf892a..11a8a45 100644
+--- a/arch/x86/xen/grant-table.c
++++ b/arch/x86/xen/grant-table.c
+@@ -44,19 +44,27 @@
+
+ #include <asm/pgtable.h>
+
+-static int map_pte_fn(pte_t *pte, unsigned long addr, void *data)
++static int map_pte_fn(pte_t *pte, unsigned count, unsigned long addr, void *data)
+ {
+ unsigned long **frames = (unsigned long **)data;
+
+- set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+- (*frames)++;
++ while (count--) {
++ set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
++ (*frames)++;
++ pte++;
++ addr += PAGE_SIZE;
++ }
+ return 0;
+ }
+
+-static int unmap_pte_fn(pte_t *pte, unsigned long addr, void *data)
++static int unmap_pte_fn(pte_t *pte, unsigned count, unsigned long addr, void *data)
+ {
++ while (count--) {
++ pte_clear(&init_mm, addr, pte);
++ addr += PAGE_SIZE;
++ pte++;
++ }
+
+- set_pte_at(&init_mm, addr, pte, __pte(0));
+ return 0;
+ }
+
+@@ -75,15 +83,15 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
+ *__shared = shared;
+ }
+
+- rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+- PAGE_SIZE * nr_gframes,
+- map_pte_fn, &frames);
++ rc = apply_to_page_range_batch(&init_mm, (unsigned long)shared,
++ PAGE_SIZE * nr_gframes,
++ map_pte_fn, &frames);
+ return rc;
+ }
+
+ void arch_gnttab_unmap_shared(struct grant_entry *shared,
+ unsigned long nr_gframes)
+ {
+- apply_to_page_range(&init_mm, (unsigned long)shared,
+- PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
++ apply_to_page_range_batch(&init_mm, (unsigned long)shared,
++ PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
+ }
+--
+1.7.3.4
+
+
+From cb3172f5566fe75b749b0873deedc42687c39064 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Wed, 19 Jan 2011 18:41:03 -0500
+Subject: [PATCH 138/139] x86/nx: Made .bss be HPAGE_ALIGNED.
+
+That makes it boot under Xen.
+---
+ arch/x86/kernel/vmlinux.lds.S | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
+index b34ab80..e37d10f 100644
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -341,7 +341,7 @@ SECTIONS
+ #endif
+
+ /* BSS */
+- . = ALIGN(PAGE_SIZE);
++ . = ALIGN(HPAGE_SIZE);
+ .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+ __bss_start = .;
+ *(.bss..page_aligned)
+--
+1.7.3.4
+
+
+From 8751f3b0fd2ca59c410052d1faecc2297bb91d62 Mon Sep 17 00:00:00 2001
+From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+Date: Mon, 24 Jan 2011 17:25:29 -0800
+Subject: [PATCH 139/139] xen/gntdev: remove token argument from find_grant_ptes
+
+Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
+---
+ drivers/xen/gntdev.c | 3 +--
+ 1 files changed, 1 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
+index 1e31cdc..2b777c0 100644
+--- a/drivers/xen/gntdev.c
++++ b/drivers/xen/gntdev.c
+@@ -226,8 +226,7 @@ static void gntdev_free_map(struct grant_map *map)
+
+ /* ------------------------------------------------------------------ */
+
+-static int find_grant_ptes(pte_t *pte, pgtable_t token,
+- unsigned long addr, void *data)
++static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
+ {
+ struct grant_map *map = data;
+ unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+--
+1.7.3.4
+
diff --git a/xen.pvhvm.fixes.patch b/xen.pvhvm.fixes.patch
new file mode 100644
index 0000000..cf6dc95
--- /dev/null
+++ b/xen.pvhvm.fixes.patch
@@ -0,0 +1,197 @@
+From bb89b5a4444a7514f3cf6cadb4f613832d7a9887 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Date: Fri, 14 Jan 2011 17:55:44 -0500
+Subject: [PATCH 1/4] m2p: No need to catch exceptions when we know that there is no RAM
+
+.. beyound what we think is the end of memory. However there might
+be more System RAM - but assigned to a guest. Hence jump to the
+M2P override check and consult.
+
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+---
+ arch/x86/include/asm/xen/page.h | 6 +++++-
+ 1 files changed, 5 insertions(+), 1 deletions(-)
+
+diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
+index f25bdf2..74a8559 100644
+--- a/arch/x86/include/asm/xen/page.h
++++ b/arch/x86/include/asm/xen/page.h
+@@ -77,6 +77,10 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return mfn;
+
++ if (unlikely((mfn >> machine_to_phys_order) != 0)) {
++ pfn = ~0;
++ goto try_override;
++ }
+ pfn = 0;
+ /*
+ * The array access can fail (e.g., device space beyond end of RAM).
+@@ -84,7 +88,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
+ * but we must handle the fault without crashing!
+ */
+ __get_user(pfn, &machine_to_phys_mapping[mfn]);
+-
++try_override:
+ /*
+ * If this appears to be a foreign mfn (because the pfn
+ * doesn't map back to the mfn), then check the local override
+--
+1.7.3.4
+
+
+From a7cdabcd381dffb5db72a31c78b65a2bcdec2a04 Mon Sep 17 00:00:00 2001
+From: Stefan Bader <stefan.bader at canonical.com>
+Date: Thu, 20 Jan 2011 15:19:46 +0000
+Subject: [PATCH 2/4] xen: p2m: correctly initialize partial p2m leave
+
+After changing the p2m mapping to a tree by
+
+ commit 58e05027b530ff081ecea68e38de8d59db8f87e0
+ xen: convert p2m to a 3 level tree
+
+and trying to boot a DomU with 615MB of memory, the following crash was
+observed in the dump:
+
+kernel direct mapping tables up to 26f00000 @ 1ec4000-1fff000
+BUG: unable to handle kernel NULL pointer dereference at (null)
+IP: [<c0107397>] xen_set_pte+0x27/0x60
+*pdpt = 0000000000000000 *pde = 0000000000000000
+
+Adding further debug statements showed that when trying to set up
+pfn=0x26700 the returned mapping was invalid.
+
+pfn=0x266ff calling set_pte(0xc1fe77f8, 0x6b3003)
+pfn=0x26700 calling set_pte(0xc1fe7800, 0x3)
+
+Although the last_pfn obtained from the startup info is 0x26700, which
+should in turn not be hit, the additional 8MB which are added as extra
+memory normally seem to be ok. This lead to looking into the initial
+p2m tree construction, which uses the smaller value and assuming that
+there is other code handling the extra memory.
+
+When the p2m tree is set up, the leaves are directly pointed to the
+array which the domain builder set up. But if the mapping is not on a
+boundary that fits into one p2m page, this will result in the last leaf
+being only partially valid. And as the invalid entries are not
+initialized in that case, things go badly wrong.
+
+I am trying to fix that by checking whether the current leaf is a
+complete map and if not, allocate a completely new page and copy only
+the valid pointers there. This may not be the most efficient or elegant
+solution, but at least it seems to allow me booting DomUs with memory
+assignments all over the range.
+
+Signed-off-by: Stefan Bader <stefan.bader at canonical.com>
+---
+ arch/x86/xen/p2m.c | 20 +++++++++++++++++++-
+ 1 files changed, 19 insertions(+), 1 deletions(-)
+
+diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
+index 8f2251d..c9307ec 100644
+--- a/arch/x86/xen/p2m.c
++++ b/arch/x86/xen/p2m.c
+@@ -237,7 +237,25 @@ void __init xen_build_dynamic_phys_to_machine(void)
+ p2m_top[topidx] = mid;
+ }
+
+- p2m_top[topidx][mididx] = &mfn_list[pfn];
++ /*
++ * As long as the mfn_list has enough entries to completely
++ * fill a p2m page, pointing into the array is ok. But if
++ * not the entries beyond the last pfn will be undefined.
++ * And guessing that the 'what-ever-there-is' does not take it
++ * too kindly when changing it to invalid markers, a new page
++ * is allocated, initialized and filled with the valid part.
++ */
++ if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) {
++ unsigned long p2midx;
++ unsigned long **p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
++ p2m_init(p2m);
++
++ for (p2midx = 0; pfn + p2midx < max_pfn; p2midx++) {
++ p2m[p2midx] = mfn_list[pfn + p2midx];
++ }
++ p2m_top[topidx][mididx] = p2m;
++ } else
++ p2m_top[topidx][mididx] = &mfn_list[pfn];
+ }
+
+ m2p_override_init();
+--
+1.7.3.4
+
+
+From b84683ad1e704c2a296d08ff0cbe29db936f94a7 Mon Sep 17 00:00:00 2001
+From: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+Date: Tue, 25 Jan 2011 12:03:42 +0000
+Subject: [PATCH 3/4] xen: make sure the e820 memory regions end at page boundary
+
+Signed-off-by: M A Young <m.a.young at durham.ac.uk>
+Signed-off-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+---
+ arch/x86/xen/setup.c | 5 ++++-
+ 1 files changed, 4 insertions(+), 1 deletions(-)
+
+diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
+index b5a7f92..a3d28a1 100644
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -179,7 +179,10 @@ char * __init xen_memory_setup(void)
+ e820.nr_map = 0;
+ xen_extra_mem_start = mem_end;
+ for (i = 0; i < memmap.nr_entries; i++) {
+- unsigned long long end = map[i].addr + map[i].size;
++ unsigned long long end;
++ if (map[i].type == E820_RAM)
++ map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;
++ end = map[i].addr + map[i].size;
+
+ if (map[i].type == E820_RAM && end > mem_end) {
+ /* RAM off the end - may be partially included */
+--
+1.7.3.4
+
+
+From daed469760dabbf6ae81c9869a263535fb587e63 Mon Sep 17 00:00:00 2001
+From: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+Date: Tue, 25 Jan 2011 12:05:11 +0000
+Subject: [PATCH 4/4] When destroying mappings between _brk_end and _end, do not go over _end
+
+Signed-off-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+---
+ arch/x86/mm/init.c | 15 ++++++++++++++-
+ 1 files changed, 14 insertions(+), 1 deletions(-)
+
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index 947f42a..ebc0221 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -291,10 +291,23 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
+ * located on different 2M pages. cleanup_highmap(), however,
+ * can only consider _end when it runs, so destroy any
+ * mappings beyond _brk_end here.
++ * Be careful not to go over _end.
+ */
+ pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
+ pmd = pmd_offset(pud, _brk_end - 1);
+- while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
++ while (++pmd < pmd_offset(pud, (unsigned long)_end - 1))
++ pmd_clear(pmd);
++ if (((unsigned long)_end) & ~PMD_MASK) {
++ pte_t *pte;
++ unsigned long addr;
++ for (addr = ((unsigned long)_end) & PMD_MASK;
++ addr < ((unsigned long)_end);
++ addr += PAGE_SIZE) {
++ pte = pte_offset_map(pmd, addr);
++ pte_clear(&init_mm, addr, pte);
++ pte_unmap(pte);
++ }
++ } else
+ pmd_clear(pmd);
+ }
+ #endif
+--
+1.7.3.4
+
More information about the scm-commits
mailing list