[kernel/rawhide/user/myoung/xendom0: 20/20] switch to single diff for pcifront.next-2.6.38.patch build new xen-blkback as a module

myoung myoung at fedoraproject.org
Tue Mar 15 20:11:15 UTC 2011


commit eb7b2f0ac955474ff95dc42aa6821aa71820cc22
Author: Michael Young <m.a.young at durham.ac.uk>
Date:   Tue Mar 15 20:09:04 2011 +0000

    switch to single diff for pcifront.next-2.6.38.patch
    build new xen-blkback as a module

 config-local                   |    1 +
 kernel.spec                    |    6 +
 xen.pcifront.next-2.6.38.patch |52239 ++++++++++------------------------------
 3 files changed, 13257 insertions(+), 38989 deletions(-)
---
diff --git a/config-local b/config-local
index 5c787ae..cfbe9b6 100644
--- a/config-local
+++ b/config-local
@@ -5,3 +5,4 @@ CONFIG_XEN_PCIDEV_BACKEND=m
 CONFIG_XEN_PCIDEV_BE_DEBUG=n
 CONFIG_XEN_DEBUG=n
 CONFIG_XEN_GRANT_DEV_ALLOC=m
+CONFIG_XEN_BLKDEV_BACKEND=m
diff --git a/kernel.spec b/kernel.spec
index 64f30a0..7925d2b 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -1993,6 +1993,12 @@ fi
 # and build.
 
 %changelog
+* Tue Mar 15 2011 Michael Young <m.a.young at durham.ac.uk>
+- switch to single diff for pcifront.next-2.6.38.patch
+  for ease of maintenance
+  patch is git diff v2.6.38...3d67ff0c8f180f2cc4ade645f6f9571b26816e5a
+- set new xen-blkback to be a module CONFIG_XEN_BLKDEV_BACKEND=m
+
 * Tue Mar 15 2011 Adam Jackson <ajax at redhat.com>
 - drm-intel-big-hammer.patch: Drop.
 
diff --git a/xen.pcifront.next-2.6.38.patch b/xen.pcifront.next-2.6.38.patch
index c80ccc1..90ad3bd 100644
--- a/xen.pcifront.next-2.6.38.patch
+++ b/xen.pcifront.next-2.6.38.patch
@@ -1,8490 +1,3091 @@
-From 03c4949992e2b7e84b7cdeb156d803db3f848b6c Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Mon, 9 Feb 2009 12:05:52 -0800
-Subject: [PATCH 001/203] xen: netback: Initial import of linux-2.6.18-xen.hg netback driver.
-
-This corresponds to 774:107e10e0e07c in that tree.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/Kconfig             |    7 +
- drivers/xen/Makefile            |    1 +
- drivers/xen/netback/Makefile    |    3 +
- drivers/xen/netback/common.h    |  217 ++++++
- drivers/xen/netback/interface.c |  336 ++++++++
- drivers/xen/netback/netback.c   | 1637 +++++++++++++++++++++++++++++++++++++++
- drivers/xen/netback/xenbus.c    |  454 +++++++++++
- 7 files changed, 2655 insertions(+), 0 deletions(-)
- create mode 100644 drivers/xen/netback/Makefile
- create mode 100644 drivers/xen/netback/common.h
- create mode 100644 drivers/xen/netback/interface.c
- create mode 100644 drivers/xen/netback/netback.c
- create mode 100644 drivers/xen/netback/xenbus.c
-
-diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
-index 5a48ce9..7e83d43 100644
---- a/drivers/xen/Kconfig
-+++ b/drivers/xen/Kconfig
-@@ -37,6 +37,13 @@ config XEN_BACKEND
- 	  Support for backend device drivers that provide I/O services
- 	  to other virtual machines.
+diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h
+index 96fc623..ed28bcd 100644
+--- a/arch/ia64/include/asm/xen/hypercall.h
++++ b/arch/ia64/include/asm/xen/hypercall.h
+@@ -107,7 +107,7 @@ extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
+ static inline int
+ xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
+ {
+-	return _hypercall2(int, sched_op_new, cmd, arg);
++	return _hypercall2(int, sched_op, cmd, arg);
+ }
  
-+config XEN_NETDEV_BACKEND
-+       bool "Xen backend network device"
-+       depends on XEN_BACKEND && NET
-+       help
-+         Implement the network backend driver, which passes packets
-+         from the guest domain's frontend drivers to the network.
-+
- config XENFS
- 	tristate "Xen filesystem"
- 	default y
-diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
-index 533a199..c0e0509 100644
---- a/drivers/xen/Makefile
-+++ b/drivers/xen/Makefile
-@@ -9,6 +9,7 @@ obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
- obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
- obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
- obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
-+obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
- obj-$(CONFIG_XENFS)		+= xenfs/
- obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
- obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
-diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
-new file mode 100644
-index 0000000..f4a0c51
---- /dev/null
-+++ b/drivers/xen/netback/Makefile
-@@ -0,0 +1,3 @@
-+obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
-+
-+netbk-y   := netback.o xenbus.o interface.o
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-new file mode 100644
-index 0000000..9a54d57
---- /dev/null
-+++ b/drivers/xen/netback/common.h
-@@ -0,0 +1,217 @@
-+/******************************************************************************
-+ * arch/xen/drivers/netif/backend/common.h
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#ifndef __NETIF__BACKEND__COMMON_H__
-+#define __NETIF__BACKEND__COMMON_H__
-+
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/interrupt.h>
-+#include <linux/slab.h>
-+#include <linux/ip.h>
-+#include <linux/in.h>
-+#include <linux/netdevice.h>
-+#include <linux/etherdevice.h>
-+#include <linux/wait.h>
-+#include <xen/evtchn.h>
-+#include <xen/interface/io/netif.h>
-+#include <asm/io.h>
-+#include <asm/pgalloc.h>
-+#include <xen/interface/grant_table.h>
-+#include <xen/gnttab.h>
-+#include <xen/driver_util.h>
-+#include <xen/xenbus.h>
-+
-+#define DPRINTK(_f, _a...)			\
-+	pr_debug("(file=%s, line=%d) " _f,	\
-+		 __FILE__ , __LINE__ , ## _a )
-+#define IPRINTK(fmt, args...)				\
-+	printk(KERN_INFO "xen_net: " fmt, ##args)
-+#define WPRINTK(fmt, args...)				\
-+	printk(KERN_WARNING "xen_net: " fmt, ##args)
-+
-+typedef struct netif_st {
-+	/* Unique identifier for this interface. */
-+	domid_t          domid;
-+	unsigned int     handle;
-+
-+	u8               fe_dev_addr[6];
-+
-+	/* Physical parameters of the comms window. */
-+	grant_handle_t   tx_shmem_handle;
-+	grant_ref_t      tx_shmem_ref;
-+	grant_handle_t   rx_shmem_handle;
-+	grant_ref_t      rx_shmem_ref;
-+	unsigned int     irq;
-+
-+	/* The shared rings and indexes. */
-+	netif_tx_back_ring_t tx;
-+	netif_rx_back_ring_t rx;
-+	struct vm_struct *tx_comms_area;
-+	struct vm_struct *rx_comms_area;
-+
-+	/* Set of features that can be turned on in dev->features. */
-+	int features;
-+
-+	/* Internal feature information. */
-+	u8 can_queue:1;	/* can queue packets for receiver? */
-+	u8 copying_receiver:1;	/* copy packets to receiver?       */
-+
-+	/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
-+	RING_IDX rx_req_cons_peek;
-+
-+	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
-+	unsigned long   credit_bytes;
-+	unsigned long   credit_usec;
-+	unsigned long   remaining_credit;
-+	struct timer_list credit_timeout;
-+
-+	/* Enforce draining of the transmit queue. */
-+	struct timer_list tx_queue_timeout;
-+
-+	/* Miscellaneous private stuff. */
-+	struct list_head list;  /* scheduling list */
-+	atomic_t         refcnt;
-+	struct net_device *dev;
-+	struct net_device_stats stats;
-+
-+	unsigned int carrier;
-+
-+	wait_queue_head_t waiting_to_free;
-+} netif_t;
-+
-+/*
-+ * Implement our own carrier flag: the network stack's version causes delays
-+ * when the carrier is re-enabled (in particular, dev_activate() may not
-+ * immediately be called, which can cause packet loss; also the etherbridge
-+ * can be rather lazy in activating its port).
-+ */
-+#define netback_carrier_on(netif)	((netif)->carrier = 1)
-+#define netback_carrier_off(netif)	((netif)->carrier = 0)
-+#define netback_carrier_ok(netif)	((netif)->carrier)
-+
-+enum {
-+	NETBK_DONT_COPY_SKB,
-+	NETBK_DELAYED_COPY_SKB,
-+	NETBK_ALWAYS_COPY_SKB,
-+};
-+
-+extern int netbk_copy_skb_mode;
-+
-+/* Function pointers into netback accelerator plugin modules */
-+struct netback_accel_hooks {
-+	struct module *owner;
-+	int  (*probe)(struct xenbus_device *dev);
-+	int (*remove)(struct xenbus_device *dev);
-+};
-+
-+/* Structure to track the state of a netback accelerator plugin */
-+struct netback_accelerator {
-+	struct list_head link;
-+	int id;
-+	char *eth_name;
-+	atomic_t use_count;
-+	struct netback_accel_hooks *hooks;
-+};
-+
-+struct backend_info {
-+	struct xenbus_device *dev;
-+	netif_t *netif;
-+	enum xenbus_state frontend_state;
-+
-+	/* State relating to the netback accelerator */
-+	void *netback_accel_priv;
-+	/* The accelerator that this backend is currently using */
-+	struct netback_accelerator *accelerator;
-+};
-+
-+#define NETBACK_ACCEL_VERSION 0x00010001
-+
-+/*
-+ * Connect an accelerator plugin module to netback.  Returns zero on
-+ * success, < 0 on error, > 0 (with highest version number supported)
-+ * if version mismatch.
-+ */
-+extern int netback_connect_accelerator(unsigned version,
-+				       int id, const char *eth_name,
-+				       struct netback_accel_hooks *hooks);
-+/* Disconnect a previously connected accelerator plugin module */
-+extern void netback_disconnect_accelerator(int id, const char *eth_name);
-+
-+
-+extern
-+void netback_probe_accelerators(struct backend_info *be,
-+				struct xenbus_device *dev);
-+extern
-+void netback_remove_accelerators(struct backend_info *be,
-+				 struct xenbus_device *dev);
-+extern
-+void netif_accel_init(void);
-+
-+
-+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
-+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
-+
-+void netif_disconnect(netif_t *netif);
-+
-+netif_t *netif_alloc(domid_t domid, unsigned int handle);
-+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
-+	      unsigned long rx_ring_ref, unsigned int evtchn);
-+
-+#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
-+#define netif_put(_b)						\
-+	do {							\
-+		if ( atomic_dec_and_test(&(_b)->refcnt) )	\
-+			wake_up(&(_b)->waiting_to_free);	\
-+	} while (0)
-+
-+void netif_xenbus_init(void);
-+
-+#define netif_schedulable(netif)				\
-+	(netif_running((netif)->dev) && netback_carrier_ok(netif))
-+
-+void netif_schedule_work(netif_t *netif);
-+void netif_deschedule_work(netif_t *netif);
-+
-+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
-+struct net_device_stats *netif_be_get_stats(struct net_device *dev);
-+irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
-+
-+static inline int netbk_can_queue(struct net_device *dev)
-+{
-+	netif_t *netif = netdev_priv(dev);
-+	return netif->can_queue;
-+}
+ static inline long
+diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c
+index fd66b04..419c862 100644
+--- a/arch/ia64/xen/suspend.c
++++ b/arch/ia64/xen/suspend.c
+@@ -37,19 +37,14 @@ xen_mm_unpin_all(void)
+ 	/* nothing */
+ }
+ 
+-void xen_pre_device_suspend(void)
+-{
+-	/* nothing */
+-}
+-
+ void
+-xen_pre_suspend()
++xen_arch_pre_suspend()
+ {
+ 	/* nothing */
+ }
+ 
+ void
+-xen_post_suspend(int suspend_cancelled)
++xen_arch_post_suspend(int suspend_cancelled)
+ {
+ 	if (suspend_cancelled)
+ 		return;
+diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
+index a3c28ae..8508bfe 100644
+--- a/arch/x86/include/asm/xen/hypercall.h
++++ b/arch/x86/include/asm/xen/hypercall.h
+@@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set)
+ static inline int
+ HYPERVISOR_sched_op(int cmd, void *arg)
+ {
+-	return _hypercall2(int, sched_op_new, cmd, arg);
++	return _hypercall2(int, sched_op, cmd, arg);
+ }
+ 
+ static inline long
+@@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value)
+ #endif
+ 
+ static inline int
+-HYPERVISOR_suspend(unsigned long srec)
++HYPERVISOR_suspend(unsigned long start_info_mfn)
+ {
+-	return _hypercall3(int, sched_op, SCHEDOP_shutdown,
+-			   SHUTDOWN_suspend, srec);
++	struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
 +
-+static inline int netbk_can_sg(struct net_device *dev)
-+{
-+	netif_t *netif = netdev_priv(dev);
-+	return netif->features & NETIF_F_SG;
-+}
++	/*
++	 * For a PV guest the tools require that the start_info mfn be
++	 * present in rdx/edx when the hypercall is made. Per the
++	 * hypercall calling convention this is the third hypercall
++	 * argument, which is start_info_mfn here.
++	 */
++	return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn);
+ }
+ 
+ static inline int
+diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
+index f25bdf2..64a619d 100644
+--- a/arch/x86/include/asm/xen/page.h
++++ b/arch/x86/include/asm/xen/page.h
+@@ -29,8 +29,10 @@ typedef struct xpaddr {
+ 
+ /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+ #define INVALID_P2M_ENTRY	(~0UL)
+-#define FOREIGN_FRAME_BIT	(1UL<<31)
++#define FOREIGN_FRAME_BIT	(1UL<<(BITS_PER_LONG-1))
++#define IDENTITY_FRAME_BIT	(1UL<<(BITS_PER_LONG-2))
+ #define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
++#define IDENTITY_FRAME(m)	((m) | IDENTITY_FRAME_BIT)
+ 
+ /* Maximum amount of memory we can handle in a domain in pages */
+ #define MAX_DOMAIN_PAGES						\
+@@ -41,12 +43,19 @@ extern unsigned int   machine_to_phys_order;
+ 
+ extern unsigned long get_phys_to_machine(unsigned long pfn);
+ extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
++extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
++extern unsigned long set_phys_range_identity(unsigned long pfn_s,
++					     unsigned long pfn_e);
+ 
+-extern int m2p_add_override(unsigned long mfn, struct page *page);
+-extern int m2p_remove_override(struct page *page);
++extern int m2p_add_override(unsigned long mfn, struct page *page,
++			    bool clear_pte);
++extern int m2p_remove_override(struct page *page, bool clear_pte);
+ extern struct page *m2p_find_override(unsigned long mfn);
+ extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
+ 
++#ifdef CONFIG_XEN_DEBUG_FS
++extern int p2m_dump_show(struct seq_file *m, void *v);
++#endif
+ static inline unsigned long pfn_to_mfn(unsigned long pfn)
+ {
+ 	unsigned long mfn;
+@@ -57,7 +66,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
+ 	mfn = get_phys_to_machine(pfn);
+ 
+ 	if (mfn != INVALID_P2M_ENTRY)
+-		mfn &= ~FOREIGN_FRAME_BIT;
++		mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
+ 
+ 	return mfn;
+ }
+@@ -73,25 +82,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
+ static inline unsigned long mfn_to_pfn(unsigned long mfn)
+ {
+ 	unsigned long pfn;
++	int ret = 0;
+ 
+ 	if (xen_feature(XENFEAT_auto_translated_physmap))
+ 		return mfn;
+ 
++	if (unlikely((mfn >> machine_to_phys_order) != 0)) {
++		pfn = ~0;
++		goto try_override;
++	}
+ 	pfn = 0;
+ 	/*
+ 	 * The array access can fail (e.g., device space beyond end of RAM).
+ 	 * In such cases it doesn't matter what we return (we return garbage),
+ 	 * but we must handle the fault without crashing!
+ 	 */
+-	__get_user(pfn, &machine_to_phys_mapping[mfn]);
+-
+-	/*
+-	 * If this appears to be a foreign mfn (because the pfn
+-	 * doesn't map back to the mfn), then check the local override
+-	 * table to see if there's a better pfn to use.
++	ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
++try_override:
++	/* ret might be < 0 if there are no entries in the m2p for mfn */
++	if (ret < 0)
++		pfn = ~0;
++	else if (get_phys_to_machine(pfn) != mfn)
++		/*
++		 * If this appears to be a foreign mfn (because the pfn
++		 * doesn't map back to the mfn), then check the local override
++		 * table to see if there's a better pfn to use.
++		 *
++		 * m2p_find_override_pfn returns ~0 if it doesn't find anything.
++		 */
++		pfn = m2p_find_override_pfn(mfn, ~0);
 +
-+#endif /* __NETIF__BACKEND__COMMON_H__ */
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-new file mode 100644
-index 0000000..7e67941
---- /dev/null
-+++ b/drivers/xen/netback/interface.c
-@@ -0,0 +1,336 @@
-+/******************************************************************************
-+ * arch/xen/drivers/netif/backend/interface.c
-+ *
-+ * Network-device interface management.
-+ *
-+ * Copyright (c) 2004-2005, Keir Fraser
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#include "common.h"
-+#include <linux/ethtool.h>
-+#include <linux/rtnetlink.h>
-+
-+/*
-+ * Module parameter 'queue_length':
-+ *
-+ * Enables queuing in the network stack when a client has run out of receive
-+ * descriptors. Although this feature can improve receive bandwidth by avoiding
-+ * packet loss, it can also result in packets sitting in the 'tx_queue' for
-+ * unbounded time. This is bad if those packets hold onto foreign resources.
-+ * For example, consider a packet that holds onto resources belonging to the
-+ * guest for which it is queued (e.g., packet received on vif1.0, destined for
-+ * vif1.1 which is not activated in the guest): in this situation the guest
-+ * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
-+ * run a timer (tx_queue_timeout) to drain the queue when the interface is
-+ * blocked.
-+ */
-+static unsigned long netbk_queue_length = 32;
-+module_param_named(queue_length, netbk_queue_length, ulong, 0);
-+
-+static void __netif_up(netif_t *netif)
-+{
-+	enable_irq(netif->irq);
-+	netif_schedule_work(netif);
-+}
-+
-+static void __netif_down(netif_t *netif)
-+{
-+	disable_irq(netif->irq);
-+	netif_deschedule_work(netif);
-+}
-+
-+static int net_open(struct net_device *dev)
-+{
-+	netif_t *netif = netdev_priv(dev);
-+	if (netback_carrier_ok(netif)) {
-+		__netif_up(netif);
-+		netif_start_queue(dev);
-+	}
-+	return 0;
-+}
-+
-+static int net_close(struct net_device *dev)
-+{
-+	netif_t *netif = netdev_priv(dev);
-+	if (netback_carrier_ok(netif))
-+		__netif_down(netif);
-+	netif_stop_queue(dev);
-+	return 0;
-+}
-+
-+static int netbk_change_mtu(struct net_device *dev, int mtu)
-+{
-+	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
-+
-+	if (mtu > max)
-+		return -EINVAL;
-+	dev->mtu = mtu;
-+	return 0;
-+}
-+
-+static int netbk_set_sg(struct net_device *dev, u32 data)
-+{
-+	if (data) {
-+		netif_t *netif = netdev_priv(dev);
-+
-+		if (!(netif->features & NETIF_F_SG))
-+			return -ENOSYS;
-+	}
-+
-+	return ethtool_op_set_sg(dev, data);
-+}
-+
-+static int netbk_set_tso(struct net_device *dev, u32 data)
-+{
-+	if (data) {
-+		netif_t *netif = netdev_priv(dev);
-+
-+		if (!(netif->features & NETIF_F_TSO))
-+			return -ENOSYS;
-+	}
-+
-+	return ethtool_op_set_tso(dev, data);
-+}
-+
-+static struct ethtool_ops network_ethtool_ops =
-+{
-+	.get_tx_csum = ethtool_op_get_tx_csum,
-+	.set_tx_csum = ethtool_op_set_tx_csum,
-+	.get_sg = ethtool_op_get_sg,
-+	.set_sg = netbk_set_sg,
-+	.get_tso = ethtool_op_get_tso,
-+	.set_tso = netbk_set_tso,
-+	.get_link = ethtool_op_get_link,
-+};
-+
-+netif_t *netif_alloc(domid_t domid, unsigned int handle)
-+{
-+	int err = 0;
-+	struct net_device *dev;
-+	netif_t *netif;
-+	char name[IFNAMSIZ] = {};
-+
-+	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
-+	dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
-+	if (dev == NULL) {
-+		DPRINTK("Could not create netif: out of memory\n");
-+		return ERR_PTR(-ENOMEM);
-+	}
-+
-+	netif = netdev_priv(dev);
-+	memset(netif, 0, sizeof(*netif));
-+	netif->domid  = domid;
-+	netif->handle = handle;
-+	atomic_set(&netif->refcnt, 1);
-+	init_waitqueue_head(&netif->waiting_to_free);
-+	netif->dev = dev;
-+
-+	netback_carrier_off(netif);
-+
-+	netif->credit_bytes = netif->remaining_credit = ~0UL;
-+	netif->credit_usec  = 0UL;
-+	init_timer(&netif->credit_timeout);
-+	/* Initialize 'expires' now: it's used to track the credit window. */
-+	netif->credit_timeout.expires = jiffies;
-+
-+	init_timer(&netif->tx_queue_timeout);
-+
-+	dev->hard_start_xmit = netif_be_start_xmit;
-+	dev->get_stats       = netif_be_get_stats;
-+	dev->open            = net_open;
-+	dev->stop            = net_close;
-+	dev->change_mtu	     = netbk_change_mtu;
-+	dev->features        = NETIF_F_IP_CSUM;
-+
-+	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
-+
-+	dev->tx_queue_len = netbk_queue_length;
-+
-+	/*
-+	 * Initialise a dummy MAC address. We choose the numerically
-+	 * largest non-broadcast address to prevent the address getting
-+	 * stolen by an Ethernet bridge for STP purposes.
-+	 * (FE:FF:FF:FF:FF:FF)
-+	 */
-+	memset(dev->dev_addr, 0xFF, ETH_ALEN);
-+	dev->dev_addr[0] &= ~0x01;
-+
-+	rtnl_lock();
-+	err = register_netdevice(dev);
-+	rtnl_unlock();
-+	if (err) {
-+		DPRINTK("Could not register new net device %s: err=%d\n",
-+			dev->name, err);
-+		free_netdev(dev);
-+		return ERR_PTR(err);
-+	}
-+
-+	DPRINTK("Successfully created netif\n");
-+	return netif;
-+}
-+
-+static int map_frontend_pages(
-+	netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
-+{
-+	struct gnttab_map_grant_ref op;
-+
-+	gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
-+			  GNTMAP_host_map, tx_ring_ref, netif->domid);
-+
-+	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-+		BUG();
-+
-+	if (op.status) {
-+		DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
-+		return op.status;
-+	}
-+
-+	netif->tx_shmem_ref    = tx_ring_ref;
-+	netif->tx_shmem_handle = op.handle;
-+
-+	gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
-+			  GNTMAP_host_map, rx_ring_ref, netif->domid);
-+
-+	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-+		BUG();
-+
-+	if (op.status) {
-+		DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
-+		return op.status;
-+	}
-+
-+	netif->rx_shmem_ref    = rx_ring_ref;
-+	netif->rx_shmem_handle = op.handle;
-+
-+	return 0;
-+}
-+
-+static void unmap_frontend_pages(netif_t *netif)
++	/* 
++	 * pfn is ~0 if there are no entries in the m2p for mfn or if the
++	 * entry doesn't map back to the mfn and m2p_override doesn't have a
++	 * valid entry for it.
+ 	 */
+-	if (get_phys_to_machine(pfn) != mfn)
+-		pfn = m2p_find_override_pfn(mfn, pfn);
++	if (pfn == ~0 &&
++			get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
++		pfn = mfn;
+ 
+ 	return pfn;
+ }
+diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
+index 2329b3e..4fbda9a 100644
+--- a/arch/x86/include/asm/xen/pci.h
++++ b/arch/x86/include/asm/xen/pci.h
+@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void)
+ #endif
+ #if defined(CONFIG_XEN_DOM0)
+ void __init xen_setup_pirqs(void);
++int xen_find_device_domain_owner(struct pci_dev *dev);
++int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
++int xen_unregister_device_domain_owner(struct pci_dev *dev);
+ #else
+ static inline void __init xen_setup_pirqs(void)
+ {
+ }
++static inline int xen_find_device_domain_owner(struct pci_dev *dev)
 +{
-+	struct gnttab_unmap_grant_ref op;
-+
-+	gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
-+			    GNTMAP_host_map, netif->tx_shmem_handle);
-+
-+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-+		BUG();
-+
-+	gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
-+			    GNTMAP_host_map, netif->rx_shmem_handle);
-+
-+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-+		BUG();
++	return -1;
 +}
-+
-+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
-+	      unsigned long rx_ring_ref, unsigned int evtchn)
++static inline int xen_register_device_domain_owner(struct pci_dev *dev,
++						   uint16_t domain)
 +{
-+	int err = -ENOMEM;
-+	netif_tx_sring_t *txs;
-+	netif_rx_sring_t *rxs;
-+
-+	/* Already connected through? */
-+	if (netif->irq)
-+		return 0;
-+
-+	netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
-+	if (netif->tx_comms_area == NULL)
-+		return -ENOMEM;
-+	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
-+	if (netif->rx_comms_area == NULL)
-+		goto err_rx;
-+
-+	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
-+	if (err)
-+		goto err_map;
-+
-+	err = bind_interdomain_evtchn_to_irqhandler(
-+		netif->domid, evtchn, netif_be_int, 0,
-+		netif->dev->name, netif);
-+	if (err < 0)
-+		goto err_hypervisor;
-+	netif->irq = err;
-+	disable_irq(netif->irq);
-+
-+	txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
-+	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
-+
-+	rxs = (netif_rx_sring_t *)
-+		((char *)netif->rx_comms_area->addr);
-+	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
-+
-+	netif->rx_req_cons_peek = 0;
-+
-+	netif_get(netif);
-+
-+	rtnl_lock();
-+	netback_carrier_on(netif);
-+	if (netif_running(netif->dev))
-+		__netif_up(netif);
-+	rtnl_unlock();
-+
-+	return 0;
-+err_hypervisor:
-+	unmap_frontend_pages(netif);
-+err_map:
-+	free_vm_area(netif->rx_comms_area);
-+err_rx:
-+	free_vm_area(netif->tx_comms_area);
-+	return err;
++	return -1;
 +}
-+
-+void netif_disconnect(netif_t *netif)
++static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
 +{
-+	if (netback_carrier_ok(netif)) {
-+		rtnl_lock();
-+		netback_carrier_off(netif);
-+		netif_carrier_off(netif->dev); /* discard queued packets */
-+		if (netif_running(netif->dev))
-+			__netif_down(netif);
-+		rtnl_unlock();
-+		netif_put(netif);
-+	}
-+
-+	atomic_dec(&netif->refcnt);
-+	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
-+
-+	del_timer_sync(&netif->credit_timeout);
-+	del_timer_sync(&netif->tx_queue_timeout);
-+
-+	if (netif->irq)
-+		unbind_from_irqhandler(netif->irq, netif);
-+
-+	unregister_netdev(netif->dev);
-+
-+	if (netif->tx.sring) {
-+		unmap_frontend_pages(netif);
-+		free_vm_area(netif->tx_comms_area);
-+		free_vm_area(netif->rx_comms_area);
-+	}
-+
-+	free_netdev(netif->dev);
++	return -1;
 +}
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-new file mode 100644
-index 0000000..db629d4
---- /dev/null
-+++ b/drivers/xen/netback/netback.c
-@@ -0,0 +1,1637 @@
-+/******************************************************************************
-+ * drivers/xen/netback/netback.c
-+ *
-+ * Back-end of the driver for virtual network devices. This portion of the
-+ * driver exports a 'unified' network-device interface that can be accessed
-+ * by any operating system that implements a compatible front end. A
-+ * reference front-end implementation can be found in:
-+ *  drivers/xen/netfront/netfront.c
-+ *
-+ * Copyright (c) 2002-2005, K A Fraser
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#include "common.h"
-+#include <xen/balloon.h>
-+#include <xen/interface/memory.h>
-+
-+/*define NETBE_DEBUG_INTERRUPT*/
-+
-+struct netbk_rx_meta {
-+	skb_frag_t frag;
-+	int id;
-+	u8 copy:1;
-+};
-+
-+struct netbk_tx_pending_inuse {
-+	struct list_head list;
-+	unsigned long alloc_time;
-+};
-+
-+static void netif_idx_release(u16 pending_idx);
-+static void make_tx_response(netif_t *netif,
-+			     netif_tx_request_t *txp,
-+			     s8       st);
-+static netif_rx_response_t *make_rx_response(netif_t *netif,
-+					     u16      id,
-+					     s8       st,
-+					     u16      offset,
-+					     u16      size,
-+					     u16      flags);
-+
-+static void net_tx_action(unsigned long unused);
-+static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
-+
-+static void net_rx_action(unsigned long unused);
-+static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
-+
-+static struct timer_list net_timer;
-+static struct timer_list netbk_tx_pending_timer;
-+
-+#define MAX_PENDING_REQS 256
-+
-+static struct sk_buff_head rx_queue;
-+
-+static struct page **mmap_pages;
-+static inline unsigned long idx_to_pfn(unsigned int idx)
-+{
-+	return page_to_pfn(mmap_pages[idx]);
-+}
-+
-+static inline unsigned long idx_to_kaddr(unsigned int idx)
-+{
-+	return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
-+}
-+
-+/* extra field used in struct page */
-+static inline void netif_set_page_index(struct page *pg, unsigned int index)
-+{
-+	*(unsigned long *)&pg->mapping = index;
-+}
-+
-+static inline int netif_page_index(struct page *pg)
-+{
-+	unsigned long idx = (unsigned long)pg->mapping;
-+
-+	if (!PageForeign(pg))
-+		return -1;
-+
-+	if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg))
-+		return -1;
-+
-+	return idx;
-+}
-+
-+#define PKT_PROT_LEN 64
-+
-+static struct pending_tx_info {
-+	netif_tx_request_t req;
-+	netif_t *netif;
-+} pending_tx_info[MAX_PENDING_REQS];
-+static u16 pending_ring[MAX_PENDING_REQS];
-+typedef unsigned int PEND_RING_IDX;
-+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-+static PEND_RING_IDX pending_prod, pending_cons;
-+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-+
-+/* Freed TX SKBs get batched on this ring before return to pending_ring. */
-+static u16 dealloc_ring[MAX_PENDING_REQS];
-+static PEND_RING_IDX dealloc_prod, dealloc_cons;
-+
-+/* Doubly-linked list of in-use pending entries. */
-+static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
-+static LIST_HEAD(pending_inuse_head);
-+
-+static struct sk_buff_head tx_queue;
-+
-+static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-+static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
-+static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
-+
-+static struct list_head net_schedule_list;
-+static spinlock_t net_schedule_list_lock;
-+
-+#define MAX_MFN_ALLOC 64
-+static unsigned long mfn_list[MAX_MFN_ALLOC];
-+static unsigned int alloc_index = 0;
-+
-+/* Setting this allows the safe use of this driver without netloop. */
-+static int MODPARM_copy_skb = 1;
-+module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
-+MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
-+
-+int netbk_copy_skb_mode;
-+
-+static inline unsigned long alloc_mfn(void)
-+{
-+	BUG_ON(alloc_index == 0);
-+	return mfn_list[--alloc_index];
-+}
-+
-+static int check_mfn(int nr)
-+{
-+	struct xen_memory_reservation reservation = {
-+		.extent_order = 0,
-+		.domid        = DOMID_SELF
-+	};
-+	int rc;
-+
-+	if (likely(alloc_index >= nr))
-+		return 0;
-+
-+	set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
-+	reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
-+	rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
-+	if (likely(rc > 0))
-+		alloc_index += rc;
-+
-+	return alloc_index >= nr ? 0 : -ENOMEM;
-+}
-+
-+static inline void maybe_schedule_tx_action(void)
-+{
-+	smp_mb();
-+	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
-+	    !list_empty(&net_schedule_list))
-+		tasklet_schedule(&net_tx_tasklet);
-+}
-+
-+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
-+{
-+	struct skb_shared_info *ninfo;
-+	struct sk_buff *nskb;
-+	unsigned long offset;
-+	int ret;
-+	int len;
-+	int headlen;
-+
-+	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
-+
-+	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
-+	if (unlikely(!nskb))
-+		goto err;
-+
-+	skb_reserve(nskb, 16 + NET_IP_ALIGN);
-+	headlen = nskb->end - nskb->data;
-+	if (headlen > skb_headlen(skb))
-+		headlen = skb_headlen(skb);
-+	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
-+	BUG_ON(ret);
-+
-+	ninfo = skb_shinfo(nskb);
-+	ninfo->gso_size = skb_shinfo(skb)->gso_size;
-+	ninfo->gso_type = skb_shinfo(skb)->gso_type;
-+
-+	offset = headlen;
-+	len = skb->len - headlen;
-+
-+	nskb->len = skb->len;
-+	nskb->data_len = len;
-+	nskb->truesize += len;
-+
-+	while (len) {
-+		struct page *page;
-+		int copy;
-+		int zero;
-+
-+		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
-+			dump_stack();
-+			goto err_free;
-+		}
-+
-+		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
-+		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
-+
-+		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
-+		if (unlikely(!page))
-+			goto err_free;
-+
-+		ret = skb_copy_bits(skb, offset, page_address(page), copy);
-+		BUG_ON(ret);
-+
-+		ninfo->frags[ninfo->nr_frags].page = page;
-+		ninfo->frags[ninfo->nr_frags].page_offset = 0;
-+		ninfo->frags[ninfo->nr_frags].size = copy;
-+		ninfo->nr_frags++;
-+
-+		offset += copy;
-+		len -= copy;
-+	}
-+
-+	offset = nskb->data - skb->data;
-+
-+	nskb->h.raw = skb->h.raw + offset;
-+	nskb->nh.raw = skb->nh.raw + offset;
-+	nskb->mac.raw = skb->mac.raw + offset;
-+
-+	return nskb;
-+
-+ err_free:
-+	kfree_skb(nskb);
-+ err:
-+	return NULL;
-+}
-+
-+static inline int netbk_max_required_rx_slots(netif_t *netif)
-+{
-+	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
-+		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
-+	return 1; /* all in one */
-+}
-+
-+static inline int netbk_queue_full(netif_t *netif)
-+{
-+	RING_IDX peek   = netif->rx_req_cons_peek;
-+	RING_IDX needed = netbk_max_required_rx_slots(netif);
-+
-+	return ((netif->rx.sring->req_prod - peek) < needed) ||
-+	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
-+}
-+
-+static void tx_queue_callback(unsigned long data)
-+{
-+	netif_t *netif = (netif_t *)data;
-+	if (netif_schedulable(netif))
-+		netif_wake_queue(netif->dev);
-+}
-+
-+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
-+{
-+	netif_t *netif = netdev_priv(dev);
-+
-+	BUG_ON(skb->dev != dev);
-+
-+	/* Drop the packet if the target domain has no receive buffers. */
-+	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
-+		goto drop;
-+
-+	/*
-+	 * Copy the packet here if it's destined for a flipping interface
-+	 * but isn't flippable (e.g. extra references to data).
-+	 * XXX For now we also copy skbuffs whose head crosses a page
-+	 * boundary, because netbk_gop_skb can't handle them.
-+	 */
-+	if (!netif->copying_receiver ||
-+	    ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
-+		struct sk_buff *nskb = netbk_copy_skb(skb);
-+		if ( unlikely(nskb == NULL) )
-+			goto drop;
-+		/* Copy only the header fields we use in this driver. */
-+		nskb->dev = skb->dev;
-+		nskb->ip_summed = skb->ip_summed;
-+		nskb->proto_data_valid = skb->proto_data_valid;
-+		dev_kfree_skb(skb);
-+		skb = nskb;
-+	}
-+
-+	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
-+				   !!skb_shinfo(skb)->gso_size;
-+	netif_get(netif);
-+
-+	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
-+		netif->rx.sring->req_event = netif->rx_req_cons_peek +
-+			netbk_max_required_rx_slots(netif);
-+		mb(); /* request notification /then/ check & stop the queue */
-+		if (netbk_queue_full(netif)) {
-+			netif_stop_queue(dev);
-+			/*
-+			 * Schedule 500ms timeout to restart the queue, thus
-+			 * ensuring that an inactive queue will be drained.
-+			 * Packets will be immediately be dropped until more
-+			 * receive buffers become available (see
-+			 * netbk_queue_full() check above).
-+			 */
-+			netif->tx_queue_timeout.data = (unsigned long)netif;
-+			netif->tx_queue_timeout.function = tx_queue_callback;
-+			__mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
-+		}
-+	}
-+
-+	skb_queue_tail(&rx_queue, skb);
-+	tasklet_schedule(&net_rx_tasklet);
-+
-+	return 0;
-+
-+ drop:
-+	netif->stats.tx_dropped++;
-+	dev_kfree_skb(skb);
-+	return 0;
-+}
-+
-+#if 0
-+static void xen_network_done_notify(void)
-+{
-+	static struct net_device *eth0_dev = NULL;
-+	if (unlikely(eth0_dev == NULL))
-+		eth0_dev = __dev_get_by_name("eth0");
-+	netif_rx_schedule(eth0_dev);
-+}
-+/*
-+ * Add following to poll() function in NAPI driver (Tigon3 is example):
-+ *  if ( xen_network_done() )
-+ *      tg3_enable_ints(tp);
-+ */
-+int xen_network_done(void)
-+{
-+	return skb_queue_empty(&rx_queue);
-+}
-+#endif
-+
-+struct netrx_pending_operations {
-+	unsigned trans_prod, trans_cons;
-+	unsigned mmu_prod, mmu_mcl;
-+	unsigned mcl_prod, mcl_cons;
-+	unsigned copy_prod, copy_cons;
-+	unsigned meta_prod, meta_cons;
-+	mmu_update_t *mmu;
-+	gnttab_transfer_t *trans;
-+	gnttab_copy_t *copy;
-+	multicall_entry_t *mcl;
-+	struct netbk_rx_meta *meta;
-+};
-+
-+/* Set up the grant operations for this fragment.  If it's a flipping
-+   interface, we also set up the unmap request from here. */
-+static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
-+			  int i, struct netrx_pending_operations *npo,
-+			  struct page *page, unsigned long size,
-+			  unsigned long offset)
-+{
-+	mmu_update_t *mmu;
-+	gnttab_transfer_t *gop;
-+	gnttab_copy_t *copy_gop;
-+	multicall_entry_t *mcl;
-+	netif_rx_request_t *req;
-+	unsigned long old_mfn, new_mfn;
-+	int idx = netif_page_index(page);
-+
-+	old_mfn = virt_to_mfn(page_address(page));
-+
-+	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
-+	if (netif->copying_receiver) {
-+		/* The fragment needs to be copied rather than
-+		   flipped. */
-+		meta->copy = 1;
-+		copy_gop = npo->copy + npo->copy_prod++;
-+		copy_gop->flags = GNTCOPY_dest_gref;
-+		if (idx > -1) {
-+			struct pending_tx_info *src_pend = &pending_tx_info[idx];
-+			copy_gop->source.domid = src_pend->netif->domid;
-+			copy_gop->source.u.ref = src_pend->req.gref;
-+			copy_gop->flags |= GNTCOPY_source_gref;
-+		} else {
-+			copy_gop->source.domid = DOMID_SELF;
-+			copy_gop->source.u.gmfn = old_mfn;
-+		}
-+		copy_gop->source.offset = offset;
-+		copy_gop->dest.domid = netif->domid;
-+		copy_gop->dest.offset = 0;
-+		copy_gop->dest.u.ref = req->gref;
-+		copy_gop->len = size;
-+	} else {
-+		meta->copy = 0;
-+		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-+			new_mfn = alloc_mfn();
-+
-+			/*
-+			 * Set the new P2M table entry before
-+			 * reassigning the old data page. Heed the
-+			 * comment in pgtable-2level.h:pte_page(). :-)
-+			 */
-+			set_phys_to_machine(page_to_pfn(page), new_mfn);
-+
-+			mcl = npo->mcl + npo->mcl_prod++;
-+			MULTI_update_va_mapping(mcl,
-+					     (unsigned long)page_address(page),
-+					     pfn_pte_ma(new_mfn, PAGE_KERNEL),
-+					     0);
-+
-+			mmu = npo->mmu + npo->mmu_prod++;
-+			mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
-+				MMU_MACHPHYS_UPDATE;
-+			mmu->val = page_to_pfn(page);
-+		}
-+
-+		gop = npo->trans + npo->trans_prod++;
-+		gop->mfn = old_mfn;
-+		gop->domid = netif->domid;
-+		gop->ref = req->gref;
-+	}
-+	return req->id;
-+}
-+
-+static void netbk_gop_skb(struct sk_buff *skb,
-+			  struct netrx_pending_operations *npo)
-+{
-+	netif_t *netif = netdev_priv(skb->dev);
-+	int nr_frags = skb_shinfo(skb)->nr_frags;
-+	int i;
-+	int extra;
-+	struct netbk_rx_meta *head_meta, *meta;
-+
-+	head_meta = npo->meta + npo->meta_prod++;
-+	head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
-+	head_meta->frag.size = skb_shinfo(skb)->gso_size;
-+	extra = !!head_meta->frag.size + 1;
-+
-+	for (i = 0; i < nr_frags; i++) {
-+		meta = npo->meta + npo->meta_prod++;
-+		meta->frag = skb_shinfo(skb)->frags[i];
-+		meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
-+					  meta->frag.page,
-+					  meta->frag.size,
-+					  meta->frag.page_offset);
-+	}
-+
-+	/*
-+	 * This must occur at the end to ensure that we don't trash skb_shinfo
-+	 * until we're done. We know that the head doesn't cross a page
-+	 * boundary because such packets get copied in netif_be_start_xmit.
-+	 */
-+	head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
-+				       virt_to_page(skb->data),
-+				       skb_headlen(skb),
-+				       offset_in_page(skb->data));
-+
-+	netif->rx.req_cons += nr_frags + extra;
-+}
-+
-+static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
-+{
-+	int i;
-+
-+	for (i = 0; i < nr_frags; i++)
-+		put_page(meta[i].frag.page);
-+}
-+
-+/* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
-+   used to set up the operations on the top of
-+   netrx_pending_operations, which have since been done.  Check that
-+   they didn't give any errors and advance over them. */
-+static int netbk_check_gop(int nr_frags, domid_t domid,
-+			   struct netrx_pending_operations *npo)
-+{
-+	multicall_entry_t *mcl;
-+	gnttab_transfer_t *gop;
-+	gnttab_copy_t     *copy_op;
-+	int status = NETIF_RSP_OKAY;
-+	int i;
-+
-+	for (i = 0; i <= nr_frags; i++) {
-+		if (npo->meta[npo->meta_cons + i].copy) {
-+			copy_op = npo->copy + npo->copy_cons++;
-+			if (copy_op->status != GNTST_okay) {
-+				DPRINTK("Bad status %d from copy to DOM%d.\n",
-+					copy_op->status, domid);
-+				status = NETIF_RSP_ERROR;
-+			}
-+		} else {
-+			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-+				mcl = npo->mcl + npo->mcl_cons++;
-+				/* The update_va_mapping() must not fail. */
-+				BUG_ON(mcl->result != 0);
-+			}
-+
-+			gop = npo->trans + npo->trans_cons++;
-+			/* Check the reassignment error code. */
-+			if (gop->status != 0) {
-+				DPRINTK("Bad status %d from grant transfer to DOM%u\n",
-+					gop->status, domid);
-+				/*
-+				 * Page no longer belongs to us unless
-+				 * GNTST_bad_page, but that should be
-+				 * a fatal error anyway.
-+				 */
-+				BUG_ON(gop->status == GNTST_bad_page);
-+				status = NETIF_RSP_ERROR;
-+			}
+ #endif
+ 
+ #if defined(CONFIG_PCI_MSI)
+@@ -27,16 +43,16 @@ static inline void __init xen_setup_pirqs(void)
+  * its own functions.
+  */
+ struct xen_pci_frontend_ops {
+-	int (*enable_msi)(struct pci_dev *dev, int **vectors);
++	int (*enable_msi)(struct pci_dev *dev, int vectors[]);
+ 	void (*disable_msi)(struct pci_dev *dev);
+-	int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec);
++	int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec);
+ 	void (*disable_msix)(struct pci_dev *dev);
+ };
+ 
+ extern struct xen_pci_frontend_ops *xen_pci_frontend;
+ 
+ static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
+-					      int **vectors)
++					      int vectors[])
+ {
+ 	if (xen_pci_frontend && xen_pci_frontend->enable_msi)
+ 		return xen_pci_frontend->enable_msi(dev, vectors);
+@@ -48,7 +64,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
+ 			xen_pci_frontend->disable_msi(dev);
+ }
+ static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
+-					       int **vectors, int nvec)
++					       int vectors[], int nvec)
+ {
+ 	if (xen_pci_frontend && xen_pci_frontend->enable_msix)
+ 		return xen_pci_frontend->enable_msix(dev, vectors, nvec);
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index 947f42a..66637bd 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -283,6 +283,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
+ 	if (!after_bootmem && !start) {
+ 		pud_t *pud;
+ 		pmd_t *pmd;
++		unsigned long addr;
++		u64 size, memblock_addr;
+ 
+ 		mmu_cr4_features = read_cr4();
+ 
+@@ -291,11 +293,18 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
+ 		 * located on different 2M pages. cleanup_highmap(), however,
+ 		 * can only consider _end when it runs, so destroy any
+ 		 * mappings beyond _brk_end here.
++		 * Respect memblock reserved regions.
+ 		 */
+ 		pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
+ 		pmd = pmd_offset(pud, _brk_end - 1);
+-		while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
+-			pmd_clear(pmd);
++		addr = (_brk_end + PMD_SIZE - 1) & PMD_MASK;
++		while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1)) {
++			memblock_addr = memblock_x86_find_in_range_size(__pa(addr),
++					&size, PMD_SIZE);
++			if (memblock_addr == (u64) __pa(addr) && size >= PMD_SIZE)
++				pmd_clear(pmd);
++			addr += PMD_SIZE;
 +		}
-+	}
-+
-+	return status;
-+}
-+
-+static void netbk_add_frag_responses(netif_t *netif, int status,
-+				     struct netbk_rx_meta *meta, int nr_frags)
-+{
-+	int i;
-+	unsigned long offset;
-+
-+	for (i = 0; i < nr_frags; i++) {
-+		int id = meta[i].id;
-+		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
-+
-+		if (meta[i].copy)
-+			offset = 0;
-+		else
-+			offset = meta[i].frag.page_offset;
-+		make_rx_response(netif, id, status, offset,
-+				 meta[i].frag.size, flags);
-+	}
-+}
-+
-+static void net_rx_action(unsigned long unused)
-+{
-+	netif_t *netif = NULL;
-+	s8 status;
-+	u16 id, irq, flags;
-+	netif_rx_response_t *resp;
-+	multicall_entry_t *mcl;
-+	struct sk_buff_head rxq;
-+	struct sk_buff *skb;
-+	int notify_nr = 0;
-+	int ret;
-+	int nr_frags;
-+	int count;
-+	unsigned long offset;
+ 	}
+ #endif
+ 	__flush_tlb_all();
+diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
+index 25cd4a0..309c0a0 100644
+--- a/arch/x86/pci/xen.c
++++ b/arch/x86/pci/xen.c
+@@ -20,7 +20,8 @@
+ #include <asm/xen/pci.h>
+ 
+ #ifdef CONFIG_ACPI
+-static int xen_hvm_register_pirq(u32 gsi, int triggering)
++static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
++				 int trigger, int polarity)
+ {
+ 	int rc, irq;
+ 	struct physdev_map_pirq map_irq;
+@@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
+ 		return -1;
+ 	}
+ 
+-	if (triggering == ACPI_EDGE_SENSITIVE) {
++	if (trigger == ACPI_EDGE_SENSITIVE) {
+ 		shareable = 0;
+ 		name = "ioapic-edge";
+ 	} else {
+@@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
+ 
+ 	return irq;
+ }
+-
+-static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
+-				 int trigger, int polarity)
+-{
+-	return xen_hvm_register_pirq(gsi, trigger);
+-}
+ #endif
+ 
+ #if defined(CONFIG_PCI_MSI)
+@@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
+ 
+ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+ {
+-	int irq, pirq, ret = 0;
++	int irq, pirq;
+ 	struct msi_desc *msidesc;
+ 	struct msi_msg msg;
+ 
+@@ -99,39 +94,33 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+ 		__read_msi_msg(msidesc, &msg);
+ 		pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
+ 			((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
+-		if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) {
+-			xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
+-					"msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ);
+-			if (irq < 0)
++		if (msg.data != XEN_PIRQ_MSI_DATA ||
++		    xen_irq_from_pirq(pirq) < 0) {
++			pirq = xen_allocate_pirq_msi(dev, msidesc);
++			if (pirq < 0)
+ 				goto error;
+-			ret = set_irq_msi(irq, msidesc);
+-			if (ret < 0)
+-				goto error_while;
+-			printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d"
+-					" pirq=%d\n", irq, pirq);
+-			return 0;
++			xen_msi_compose_msg(dev, pirq, &msg);
++			__write_msi_msg(msidesc, &msg);
++			dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
++		} else {
++			dev_dbg(&dev->dev,
++				"xen: msi already bound to pirq=%d\n", pirq);
+ 		}
+-		xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
+-				"msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ));
+-		if (irq < 0 || pirq < 0)
++		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
++					       (type == PCI_CAP_ID_MSIX) ?
++					       "msi-x" : "msi",
++					       DOMID_SELF);
++		if (irq < 0)
+ 			goto error;
+-		printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
+-		xen_msi_compose_msg(dev, pirq, &msg);
+-		ret = set_irq_msi(irq, msidesc);
+-		if (ret < 0)
+-			goto error_while;
+-		write_msi_msg(irq, &msg);
++		dev_dbg(&dev->dev,
++			"xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
+ 	}
+ 	return 0;
+ 
+-error_while:
+-	unbind_from_irqhandler(irq, NULL);
+ error:
+-	if (ret == -ENODEV)
+-		dev_err(&dev->dev, "Xen PCI frontend has not registered" \
+-				" MSI/MSI-X support!\n");
+-
+-	return ret;
++	dev_err(&dev->dev,
++		"Xen PCI frontend has not registered MSI/MSI-X support!\n");
++	return -ENODEV;
+ }
+ 
+ /*
+@@ -150,35 +139,27 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+ 		return -ENOMEM;
+ 
+ 	if (type == PCI_CAP_ID_MSIX)
+-		ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
++		ret = xen_pci_frontend_enable_msix(dev, v, nvec);
+ 	else
+-		ret = xen_pci_frontend_enable_msi(dev, &v);
++		ret = xen_pci_frontend_enable_msi(dev, v);
+ 	if (ret)
+ 		goto error;
+ 	i = 0;
+ 	list_for_each_entry(msidesc, &dev->msi_list, list) {
+-		irq = xen_allocate_pirq(v[i], 0, /* not sharable */
+-			(type == PCI_CAP_ID_MSIX) ?
+-			"pcifront-msi-x" : "pcifront-msi");
+-		if (irq < 0) {
+-			ret = -1;
++		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
++					       (type == PCI_CAP_ID_MSIX) ?
++					       "pcifront-msi-x" :
++					       "pcifront-msi",
++						DOMID_SELF);
++		if (irq < 0)
+ 			goto free;
+-		}
+-
+-		ret = set_irq_msi(irq, msidesc);
+-		if (ret)
+-			goto error_while;
+ 		i++;
+ 	}
+ 	kfree(v);
+ 	return 0;
+ 
+-error_while:
+-	unbind_from_irqhandler(irq, NULL);
+ error:
+-	if (ret == -ENODEV)
+-		dev_err(&dev->dev, "Xen PCI frontend has not registered" \
+-			" MSI/MSI-X support!\n");
++	dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+ free:
+ 	kfree(v);
+ 	return ret;
+@@ -193,6 +174,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
+ 		xen_pci_frontend_disable_msix(dev);
+ 	else
+ 		xen_pci_frontend_disable_msi(dev);
 +
-+	/*
-+	 * Putting hundreds of bytes on the stack is considered rude.
-+	 * Static works because a tasklet can only be on one CPU at any time.
-+	 */
-+	static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
-+	static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
-+	static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
-+	static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
-+	static unsigned char rx_notify[NR_IRQS];
-+	static u16 notify_list[NET_RX_RING_SIZE];
-+	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++	/* Free the IRQ's and the msidesc using the generic code. */
++	default_teardown_msi_irqs(dev);
+ }
+ 
+ static void xen_teardown_msi_irq(unsigned int irq)
+@@ -200,47 +184,91 @@ static void xen_teardown_msi_irq(unsigned int irq)
+ 	xen_destroy_irq(irq);
+ }
+ 
++#ifdef CONFIG_XEN_DOM0
+ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+ {
+-	int irq, ret;
++	int ret = 0;
+ 	struct msi_desc *msidesc;
+ 
+ 	list_for_each_entry(msidesc, &dev->msi_list, list) {
+-		irq = xen_create_msi_irq(dev, msidesc, type);
+-		if (irq < 0)
+-			return -1;
++		struct physdev_map_pirq map_irq;
++		domid_t domid;
+ 
+-		ret = set_irq_msi(irq, msidesc);
+-		if (ret)
+-			goto error;
+-	}
+-	return 0;
++		domid = ret = xen_find_device_domain_owner(dev);
++		/* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED,
++		 * hence check ret value for < 0. */
++		if (ret < 0)
++			domid = DOMID_SELF;
+ 
+-error:
+-	xen_destroy_irq(irq);
++		memset(&map_irq, 0, sizeof(map_irq));
++		map_irq.domid = domid;
++		map_irq.type = MAP_PIRQ_TYPE_MSI;
++		map_irq.index = -1;
++		map_irq.pirq = -1;
++		map_irq.bus = dev->bus->number;
++		map_irq.devfn = dev->devfn;
 +
-+	struct netrx_pending_operations npo = {
-+		mmu: rx_mmu,
-+		trans: grant_trans_op,
-+		copy: grant_copy_op,
-+		mcl: rx_mcl,
-+		meta: meta};
++		if (type == PCI_CAP_ID_MSIX) {
++			int pos;
++			u32 table_offset, bir;
 +
-+	skb_queue_head_init(&rxq);
++			pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
 +
-+	count = 0;
++			pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
++					      &table_offset);
++			bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
 +
-+	while ((skb = skb_dequeue(&rx_queue)) != NULL) {
-+		nr_frags = skb_shinfo(skb)->nr_frags;
-+		*(int *)skb->cb = nr_frags;
-+
-+		if (!xen_feature(XENFEAT_auto_translated_physmap) &&
-+		    !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
-+		    check_mfn(nr_frags + 1)) {
-+			/* Memory squeeze? Back off for an arbitrary while. */
-+			if ( net_ratelimit() )
-+				WPRINTK("Memory squeeze in netback "
-+					"driver.\n");
-+			mod_timer(&net_timer, jiffies + HZ);
-+			skb_queue_head(&rx_queue, skb);
-+			break;
++			map_irq.table_base = pci_resource_start(dev, bir);
++			map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
 +		}
 +
-+		netbk_gop_skb(skb, &npo);
-+
-+		count += nr_frags + 1;
-+
-+		__skb_queue_tail(&rxq, skb);
-+
-+		/* Filled the batch queue? */
-+		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
-+			break;
-+	}
-+
-+	BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
-+
-+	npo.mmu_mcl = npo.mcl_prod;
-+	if (npo.mcl_prod) {
-+		BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-+		BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
-+		mcl = npo.mcl + npo.mcl_prod++;
-+
-+		BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
-+		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-+
-+		mcl->op = __HYPERVISOR_mmu_update;
-+		mcl->args[0] = (unsigned long)rx_mmu;
-+		mcl->args[1] = npo.mmu_prod;
-+		mcl->args[2] = 0;
-+		mcl->args[3] = DOMID_SELF;
-+	}
-+
-+	if (npo.trans_prod) {
-+		BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
-+		mcl = npo.mcl + npo.mcl_prod++;
-+		mcl->op = __HYPERVISOR_grant_table_op;
-+		mcl->args[0] = GNTTABOP_transfer;
-+		mcl->args[1] = (unsigned long)grant_trans_op;
-+		mcl->args[2] = npo.trans_prod;
-+	}
-+
-+	if (npo.copy_prod) {
-+		BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
-+		mcl = npo.mcl + npo.mcl_prod++;
-+		mcl->op = __HYPERVISOR_grant_table_op;
-+		mcl->args[0] = GNTTABOP_copy;
-+		mcl->args[1] = (unsigned long)grant_copy_op;
-+		mcl->args[2] = npo.copy_prod;
-+	}
-+
-+	/* Nothing to do? */
-+	if (!npo.mcl_prod)
-+		return;
-+
-+	BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
-+
-+	ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
-+	BUG_ON(ret != 0);
-+	/* The mmu_machphys_update() must not fail. */
-+	BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
-+
-+	while ((skb = __skb_dequeue(&rxq)) != NULL) {
-+		nr_frags = *(int *)skb->cb;
-+
-+		netif = netdev_priv(skb->dev);
-+		/* We can't rely on skb_release_data to release the
-+		   pages used by fragments for us, since it tries to
-+		   touch the pages in the fraglist.  If we're in
-+		   flipping mode, that doesn't work.  In copying mode,
-+		   we still have access to all of the pages, and so
-+		   it's safe to let release_data deal with it. */
-+		/* (Freeing the fragments is safe since we copy
-+		   non-linear skbs destined for flipping interfaces) */
-+		if (!netif->copying_receiver) {
-+			atomic_set(&(skb_shinfo(skb)->dataref), 1);
-+			skb_shinfo(skb)->frag_list = NULL;
-+			skb_shinfo(skb)->nr_frags = 0;
-+			netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
++		ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
++		if (ret) {
++			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
++				 ret, domid);
++			goto out;
 +		}
 +
-+		netif->stats.tx_bytes += skb->len;
-+		netif->stats.tx_packets++;
-+
-+		status = netbk_check_gop(nr_frags, netif->domid, &npo);
-+
-+		id = meta[npo.meta_cons].id;
-+		flags = nr_frags ? NETRXF_more_data : 0;
-+
-+		if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
-+			flags |= NETRXF_csum_blank | NETRXF_data_validated;
-+		else if (skb->proto_data_valid) /* remote but checksummed? */
-+			flags |= NETRXF_data_validated;
-+
-+		if (meta[npo.meta_cons].copy)
-+			offset = 0;
-+		else
-+			offset = offset_in_page(skb->data);
-+		resp = make_rx_response(netif, id, status, offset,
-+					skb_headlen(skb), flags);
-+
-+		if (meta[npo.meta_cons].frag.size) {
-+			struct netif_extra_info *gso =
-+				(struct netif_extra_info *)
-+				RING_GET_RESPONSE(&netif->rx,
-+						  netif->rx.rsp_prod_pvt++);
-+
-+			resp->flags |= NETRXF_extra_info;
-+
-+			gso->u.gso.size = meta[npo.meta_cons].frag.size;
-+			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
-+			gso->u.gso.pad = 0;
-+			gso->u.gso.features = 0;
-+
-+			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
-+			gso->flags = 0;
-+		}
-+
-+		netbk_add_frag_responses(netif, status,
-+					 meta + npo.meta_cons + 1,
-+					 nr_frags);
-+
-+		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
-+		irq = netif->irq;
-+		if (ret && !rx_notify[irq]) {
-+			rx_notify[irq] = 1;
-+			notify_list[notify_nr++] = irq;
-+		}
-+
-+		if (netif_queue_stopped(netif->dev) &&
-+		    netif_schedulable(netif) &&
-+		    !netbk_queue_full(netif))
-+			netif_wake_queue(netif->dev);
-+
-+		netif_put(netif);
-+		dev_kfree_skb(skb);
-+		npo.meta_cons += nr_frags + 1;
-+	}
-+
-+	while (notify_nr != 0) {
-+		irq = notify_list[--notify_nr];
-+		rx_notify[irq] = 0;
-+		notify_remote_via_irq(irq);
++		ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
++					       map_irq.pirq, map_irq.index,
++					       (type == PCI_CAP_ID_MSIX) ?
++					       "msi-x" : "msi",
++						domid);
++		if (ret < 0)
++			goto out;
 +	}
-+
-+	/* More work to do? */
-+	if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
-+		tasklet_schedule(&net_rx_tasklet);
-+#if 0
-+	else
-+		xen_network_done_notify();
++	ret = 0;
++out:
+ 	return ret;
+ }
+ #endif
 +#endif
-+}
+ 
+ static int xen_pcifront_enable_irq(struct pci_dev *dev)
+ {
+ 	int rc;
+ 	int share = 1;
++	u8 gsi;
+ 
+-	dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
+-
+-	if (dev->irq < 0)
+-		return -EINVAL;
++	rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
++	if (rc < 0) {
++		dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
++			 rc);
++		return rc;
++	}
+ 
+-	if (dev->irq < NR_IRQS_LEGACY)
++	if (gsi < NR_IRQS_LEGACY)
+ 		share = 0;
+ 
+-	rc = xen_allocate_pirq(dev->irq, share, "pcifront");
++	rc = xen_allocate_pirq(gsi, share, "pcifront");
+ 	if (rc < 0) {
+-		dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
+-			 dev->irq, rc);
++		dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
++			 gsi, rc);
+ 		return rc;
+ 	}
 +
-+static void net_alarm(unsigned long unused)
-+{
-+	tasklet_schedule(&net_rx_tasklet);
-+}
++	dev->irq = rc;
++	dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
+ 	return 0;
+ }
+ 
+@@ -427,3 +455,76 @@ void __init xen_setup_pirqs(void)
+ 	}
+ }
+ #endif
 +
-+static void netbk_tx_pending_timeout(unsigned long unused)
-+{
-+	tasklet_schedule(&net_tx_tasklet);
-+}
++struct xen_device_domain_owner {
++	domid_t domain;
++	struct pci_dev *dev;
++	struct list_head list;
++};
 +
-+struct net_device_stats *netif_be_get_stats(struct net_device *dev)
-+{
-+	netif_t *netif = netdev_priv(dev);
-+	return &netif->stats;
-+}
++static DEFINE_SPINLOCK(dev_domain_list_spinlock);
++static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
 +
-+static int __on_net_schedule_list(netif_t *netif)
++static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
 +{
-+	return netif->list.next != NULL;
-+}
++	struct xen_device_domain_owner *owner;
 +
-+static void remove_from_net_schedule_list(netif_t *netif)
-+{
-+	spin_lock_irq(&net_schedule_list_lock);
-+	if (likely(__on_net_schedule_list(netif))) {
-+		list_del(&netif->list);
-+		netif->list.next = NULL;
-+		netif_put(netif);
++	list_for_each_entry(owner, &dev_domain_list, list) {
++		if (owner->dev == dev)
++			return owner;
 +	}
-+	spin_unlock_irq(&net_schedule_list_lock);
++	return NULL;
 +}
 +
-+static void add_to_net_schedule_list_tail(netif_t *netif)
++int xen_find_device_domain_owner(struct pci_dev *dev)
 +{
-+	if (__on_net_schedule_list(netif))
-+		return;
++	struct xen_device_domain_owner *owner;
++	int domain = -ENODEV;
 +
-+	spin_lock_irq(&net_schedule_list_lock);
-+	if (!__on_net_schedule_list(netif) &&
-+	    likely(netif_schedulable(netif))) {
-+		list_add_tail(&netif->list, &net_schedule_list);
-+		netif_get(netif);
-+	}
-+	spin_unlock_irq(&net_schedule_list_lock);
++	spin_lock(&dev_domain_list_spinlock);
++	owner = find_device(dev);
++	if (owner)
++		domain = owner->domain;
++	spin_unlock(&dev_domain_list_spinlock);
++	return domain;
 +}
++EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
 +
-+/*
-+ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
-+ * If this driver is pipelining transmit requests then we can be very
-+ * aggressive in avoiding new-packet notifications -- frontend only needs to
-+ * send a notification if there are no outstanding unreceived responses.
-+ * If we may be buffer transmit buffers for any reason then we must be rather
-+ * more conservative and treat this as the final check for pending work.
-+ */
-+void netif_schedule_work(netif_t *netif)
++int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
 +{
-+	int more_to_do;
++	struct xen_device_domain_owner *owner;
 +
-+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
-+	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
-+#else
-+	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
-+#endif
++	owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
++	if (!owner)
++		return -ENODEV;
 +
-+	if (more_to_do) {
-+		add_to_net_schedule_list_tail(netif);
-+		maybe_schedule_tx_action();
++	spin_lock(&dev_domain_list_spinlock);
++	if (find_device(dev)) {
++		spin_unlock(&dev_domain_list_spinlock);
++		kfree(owner);
++		return -EEXIST;
 +	}
++	owner->domain = domain;
++	owner->dev = dev;
++	list_add_tail(&owner->list, &dev_domain_list);
++	spin_unlock(&dev_domain_list_spinlock);
++	return 0;
 +}
++EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
 +
-+void netif_deschedule_work(netif_t *netif)
++int xen_unregister_device_domain_owner(struct pci_dev *dev)
 +{
-+	remove_from_net_schedule_list(netif);
-+}
++	struct xen_device_domain_owner *owner;
 +
++	spin_lock(&dev_domain_list_spinlock);
++	owner = find_device(dev);
++	if (!owner) {
++		spin_unlock(&dev_domain_list_spinlock);
++		return -ENODEV;
++	}
++	list_del(&owner->list);
++	spin_unlock(&dev_domain_list_spinlock);
++	kfree(owner);
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
+diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
+index 5b54892..e4343fe 100644
+--- a/arch/x86/xen/Kconfig
++++ b/arch/x86/xen/Kconfig
+@@ -48,3 +48,11 @@ config XEN_DEBUG_FS
+ 	help
+ 	  Enable statistics output and various tuning options in debugfs.
+ 	  Enabling this option may incur a significant performance overhead.
 +
-+static void tx_add_credit(netif_t *netif)
++config XEN_DEBUG
++	bool "Enable Xen debug checks"
++	depends on XEN
++	default n
++	help
++	  Enable various WARN_ON checks in the Xen MMU code.
++	  Enabling this option WILL incur a significant performance overhead.
+diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
+index 50542ef..49dbd78 100644
+--- a/arch/x86/xen/enlighten.c
++++ b/arch/x86/xen/enlighten.c
+@@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor)
+ 
+ 	xen_setup_features();
+ 
+-	pv_info = xen_info;
+-	pv_info.kernel_rpl = 0;
++	pv_info.name = "Xen HVM";
+ 
+ 	xen_domain_type = XEN_HVM_DOMAIN;
+ 
+ 	return 0;
+ }
+ 
+-void xen_hvm_init_shared_info(void)
++void __ref xen_hvm_init_shared_info(void)
+ {
+ 	int cpu;
+ 	struct xen_add_to_physmap xatp;
+@@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
+ 	switch (action) {
+ 	case CPU_UP_PREPARE:
+ 		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
++		if (xen_have_vector_callback)
++			xen_init_lock_cpu(cpu);
+ 		break;
+ 	default:
+ 		break;
+@@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void)
+ 
+ 	if (xen_feature(XENFEAT_hvm_callback_vector))
+ 		xen_have_vector_callback = 1;
++	xen_hvm_smp_init();
+ 	register_cpu_notifier(&xen_hvm_cpu_notifier);
+ 	xen_unplug_emulated_devices();
+ 	have_vcpu_info_placement = 0;
+diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
+index 5e92b61..0c376a2 100644
+--- a/arch/x86/xen/mmu.c
++++ b/arch/x86/xen/mmu.c
+@@ -46,6 +46,7 @@
+ #include <linux/module.h>
+ #include <linux/gfp.h>
+ #include <linux/memblock.h>
++#include <linux/seq_file.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/tlbflush.h>
+@@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
+ 	if (val & _PAGE_PRESENT) {
+ 		unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
+ 		pteval_t flags = val & PTE_FLAGS_MASK;
+-		unsigned long mfn = pfn_to_mfn(pfn);
++		unsigned long mfn;
+ 
++		if (!xen_feature(XENFEAT_auto_translated_physmap))
++			mfn = get_phys_to_machine(pfn);
++		else
++			mfn = pfn;
+ 		/*
+ 		 * If there's no mfn for the pfn, then just create an
+ 		 * empty non-present pte.  Unfortunately this loses
+@@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
+ 		if (unlikely(mfn == INVALID_P2M_ENTRY)) {
+ 			mfn = 0;
+ 			flags = 0;
++		} else {
++			/*
++			 * Paramount to do this test _after_ the
++			 * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
++			 * IDENTITY_FRAME_BIT resolves to true.
++			 */
++			mfn &= ~FOREIGN_FRAME_BIT;
++			if (mfn & IDENTITY_FRAME_BIT) {
++				mfn &= ~IDENTITY_FRAME_BIT;
++				flags |= _PAGE_IOMAP;
++			}
+ 		}
+-
+ 		val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
+ 	}
+ 
+@@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte)
+ }
+ PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
+ 
++#ifdef CONFIG_XEN_DEBUG
++pte_t xen_make_pte_debug(pteval_t pte)
 +{
-+	unsigned long max_burst, max_credit;
++	phys_addr_t addr = (pte & PTE_PFN_MASK);
++	phys_addr_t other_addr;
++	bool io_page = false;
++	pte_t _pte;
 +
-+	/*
-+	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
-+	 * Otherwise the interface can seize up due to insufficient credit.
-+	 */
-+	max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
-+	max_burst = min(max_burst, 131072UL);
-+	max_burst = max(max_burst, netif->credit_bytes);
++	if (pte & _PAGE_IOMAP)
++		io_page = true;
 +
-+	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
-+	max_credit = netif->remaining_credit + netif->credit_bytes;
-+	if (max_credit < netif->remaining_credit)
-+		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
++	_pte = xen_make_pte(pte);
 +
-+	netif->remaining_credit = min(max_credit, max_burst);
-+}
++	if (!addr)
++		return _pte;
 +
-+static void tx_credit_callback(unsigned long data)
-+{
-+	netif_t *netif = (netif_t *)data;
-+	tx_add_credit(netif);
-+	netif_schedule_work(netif);
-+}
++	if (io_page &&
++	    (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
++		other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
++		WARN(addr != other_addr,
++			"0x%lx is using VM_IO, but it is 0x%lx!\n",
++			(unsigned long)addr, (unsigned long)other_addr);
++	} else {
++		pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
++		other_addr = (_pte.pte & PTE_PFN_MASK);
++		WARN((addr == other_addr) && (!io_page) && (!iomap_set),
++			"0x%lx is missing VM_IO (and wasn't fixed)!\n",
++			(unsigned long)addr);
++	}
 +
-+static inline int copy_pending_req(PEND_RING_IDX pending_idx)
-+{
-+	return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
-+				      &mmap_pages[pending_idx]);
++	return _pte;
 +}
++PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
++#endif
 +
-+inline static void net_tx_action_dealloc(void)
-+{
-+	struct netbk_tx_pending_inuse *inuse, *n;
-+	gnttab_unmap_grant_ref_t *gop;
-+	u16 pending_idx;
-+	PEND_RING_IDX dc, dp;
-+	netif_t *netif;
-+	int ret;
-+	LIST_HEAD(list);
-+
-+	dc = dealloc_cons;
-+	gop = tx_unmap_ops;
-+
-+	/*
-+	 * Free up any grants we have finished using
-+	 */
-+	do {
-+		dp = dealloc_prod;
-+
-+		/* Ensure we see all indices enqueued by netif_idx_release(). */
-+		smp_rmb();
-+
-+		while (dc != dp) {
-+			unsigned long pfn;
-+
-+			pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
-+			list_move_tail(&pending_inuse[pending_idx].list, &list);
-+
-+			pfn = idx_to_pfn(pending_idx);
-+			/* Already unmapped? */
-+			if (!phys_to_machine_mapping_valid(pfn))
-+				continue;
+ pgd_t xen_make_pgd(pgdval_t pgd)
+ {
+ 	pgd = pte_pfn_to_mfn(pgd);
+@@ -1942,6 +1992,9 @@ __init void xen_ident_map_ISA(void)
+ 
+ static __init void xen_post_allocator_init(void)
+ {
++#ifdef CONFIG_XEN_DEBUG
++	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
++#endif
+ 	pv_mmu_ops.set_pte = xen_set_pte;
+ 	pv_mmu_ops.set_pmd = xen_set_pmd;
+ 	pv_mmu_ops.set_pud = xen_set_pud;
+@@ -2074,7 +2127,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
+ 			in_frames[i] = virt_to_mfn(vaddr);
+ 
+ 		MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
+-		set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
++		__set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
+ 
+ 		if (out_frames)
+ 			out_frames[i] = virt_to_pfn(vaddr);
+@@ -2353,6 +2406,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+ 
+ #ifdef CONFIG_XEN_DEBUG_FS
+ 
++static int p2m_dump_open(struct inode *inode, struct file *filp)
++{
++	return single_open(filp, p2m_dump_show, NULL);
++}
 +
-+			gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
-+					    GNTMAP_host_map,
-+					    grant_tx_handle[pending_idx]);
-+			gop++;
-+		}
++static const struct file_operations p2m_dump_fops = {
++	.open		= p2m_dump_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
 +
-+		if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
-+		    list_empty(&pending_inuse_head))
-+			break;
+ static struct dentry *d_mmu_debug;
+ 
+ static int __init xen_mmu_debugfs(void)
+@@ -2408,6 +2473,7 @@ static int __init xen_mmu_debugfs(void)
+ 	debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
+ 			   &mmu_stats.prot_commit_batched);
+ 
++	debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
+ 	return 0;
+ }
+ fs_initcall(xen_mmu_debugfs);
+diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
+index fd12d7c..dd5e735 100644
+--- a/arch/x86/xen/p2m.c
++++ b/arch/x86/xen/p2m.c
+@@ -30,6 +30,7 @@
+ #include <linux/list.h>
+ #include <linux/hash.h>
+ #include <linux/sched.h>
++#include <linux/seq_file.h>
+ 
+ #include <asm/cache.h>
+ #include <asm/setup.h>
+@@ -59,9 +60,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
+ static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
+ static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
+ 
++static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
 +
-+		/* Copy any entries that have been pending for too long. */
-+		list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
-+			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
-+				break;
+ RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
+ RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
+ 
++/* We might hit two boundary violations at the start and end, at max each
++ * boundary violation will require three middle nodes. */
++RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
 +
-+			switch (copy_pending_req(inuse - pending_inuse)) {
-+			case 0:
-+				list_move_tail(&inuse->list, &list);
-+				continue;
-+			case -EBUSY:
-+				list_del_init(&inuse->list);
-+				continue;
-+			case -ENOENT:
-+				continue;
-+			}
+ static inline unsigned p2m_top_index(unsigned long pfn)
+ {
+ 	BUG_ON(pfn >= MAX_P2M_PFN);
+@@ -136,7 +143,7 @@ static void p2m_init(unsigned long *p2m)
+  * - After resume we're called from within stop_machine, but the mfn
+  *   tree should alreay be completely allocated.
+  */
+-void xen_build_mfn_list_list(void)
++void __ref xen_build_mfn_list_list(void)
+ {
+ 	unsigned long pfn;
+ 
+@@ -221,6 +228,9 @@ void __init xen_build_dynamic_phys_to_machine(void)
+ 	p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ 	p2m_top_init(p2m_top);
+ 
++	p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
++	p2m_init(p2m_identity);
 +
-+			break;
-+		}
-+	} while (dp != dealloc_prod);
+ 	/*
+ 	 * The domain builder gives us a pre-constructed p2m array in
+ 	 * mfn_list for all the pages initially given to us, so we just
+@@ -266,6 +276,14 @@ unsigned long get_phys_to_machine(unsigned long pfn)
+ 	mididx = p2m_mid_index(pfn);
+ 	idx = p2m_index(pfn);
+ 
++	/*
++	 * The INVALID_P2M_ENTRY is filled in both p2m_*identity
++	 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
++	 * would be wrong.
++	 */
++	if (p2m_top[topidx][mididx] == p2m_identity)
++		return IDENTITY_FRAME(pfn);
 +
-+	dealloc_cons = dc;
+ 	return p2m_top[topidx][mididx][idx];
+ }
+ EXPORT_SYMBOL_GPL(get_phys_to_machine);
+@@ -335,9 +353,11 @@ static bool alloc_p2m(unsigned long pfn)
+ 			p2m_top_mfn_p[topidx] = mid_mfn;
+ 	}
+ 
+-	if (p2m_top[topidx][mididx] == p2m_missing) {
++	if (p2m_top[topidx][mididx] == p2m_identity ||
++	    p2m_top[topidx][mididx] == p2m_missing) {
+ 		/* p2m leaf page is missing */
+ 		unsigned long *p2m;
++		unsigned long *p2m_orig = p2m_top[topidx][mididx];
+ 
+ 		p2m = alloc_p2m_page();
+ 		if (!p2m)
+@@ -345,7 +365,7 @@ static bool alloc_p2m(unsigned long pfn)
+ 
+ 		p2m_init(p2m);
+ 
+-		if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
++		if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
+ 			free_p2m_page(p2m);
+ 		else
+ 			mid_mfn[mididx] = virt_to_mfn(p2m);
+@@ -354,11 +374,91 @@ static bool alloc_p2m(unsigned long pfn)
+ 	return true;
+ }
+ 
++bool __early_alloc_p2m(unsigned long pfn)
++{
++	unsigned topidx, mididx, idx;
 +
-+	ret = HYPERVISOR_grant_table_op(
-+		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
-+	BUG_ON(ret);
++	topidx = p2m_top_index(pfn);
++	mididx = p2m_mid_index(pfn);
++	idx = p2m_index(pfn);
 +
-+	list_for_each_entry_safe(inuse, n, &list, list) {
-+		pending_idx = inuse - pending_inuse;
++	/* Pfff.. No boundary cross-over, lets get out. */
++	if (!idx)
++		return false;
 +
-+		netif = pending_tx_info[pending_idx].netif;
++	WARN(p2m_top[topidx][mididx] == p2m_identity,
++		"P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
++		topidx, mididx);
 +
-+		make_tx_response(netif, &pending_tx_info[pending_idx].req,
-+				 NETIF_RSP_OKAY);
++	/*
++	 * Could be done by xen_build_dynamic_phys_to_machine..
++	 */
++	if (p2m_top[topidx][mididx] != p2m_missing)
++		return false;
 +
-+		/* Ready for next use. */
-+		gnttab_reset_grant_page(mmap_pages[pending_idx]);
++	/* Boundary cross-over for the edges: */
++	if (idx) {
++		unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
 +
-+		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
++		p2m_init(p2m);
 +
-+		netif_put(netif);
++		p2m_top[topidx][mididx] = p2m;
 +
-+		list_del_init(&inuse->list);
 +	}
++	return idx != 0;
 +}
-+
-+static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
-+{
-+	RING_IDX cons = netif->tx.req_cons;
-+
-+	do {
-+		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+		if (cons >= end)
-+			break;
-+		txp = RING_GET_REQUEST(&netif->tx, cons++);
-+	} while (1);
-+	netif->tx.req_cons = cons;
-+	netif_schedule_work(netif);
-+	netif_put(netif);
-+}
-+
-+static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
-+				netif_tx_request_t *txp, int work_to_do)
++unsigned long set_phys_range_identity(unsigned long pfn_s,
++				      unsigned long pfn_e)
 +{
-+	RING_IDX cons = netif->tx.req_cons;
-+	int frags = 0;
++	unsigned long pfn;
 +
-+	if (!(first->flags & NETTXF_more_data))
++	if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
 +		return 0;
 +
-+	do {
-+		if (frags >= work_to_do) {
-+			DPRINTK("Need more frags\n");
-+			return -frags;
-+		}
++	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
++		return pfn_e - pfn_s;
 +
-+		if (unlikely(frags >= MAX_SKB_FRAGS)) {
-+			DPRINTK("Too many frags\n");
-+			return -frags;
-+		}
++	if (pfn_s > pfn_e)
++		return 0;
 +
-+		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
-+		       sizeof(*txp));
-+		if (txp->size > first->size) {
-+			DPRINTK("Frags galore\n");
-+			return -frags;
-+		}
++	for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
++		pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
++		pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
++	{
++		unsigned topidx = p2m_top_index(pfn);
++		if (p2m_top[topidx] == p2m_mid_missing) {
++			unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
 +
-+		first->size -= txp->size;
-+		frags++;
++			p2m_mid_init(mid);
 +
-+		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
-+			DPRINTK("txp->offset: %x, size: %u\n",
-+				txp->offset, txp->size);
-+			return -frags;
++			p2m_top[topidx] = mid;
 +		}
-+	} while ((txp++)->flags & NETTXF_more_data);
-+
-+	return frags;
-+}
-+
-+static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
-+						  struct sk_buff *skb,
-+						  netif_tx_request_t *txp,
-+						  gnttab_map_grant_ref_t *mop)
-+{
-+	struct skb_shared_info *shinfo = skb_shinfo(skb);
-+	skb_frag_t *frags = shinfo->frags;
-+	unsigned long pending_idx = *((u16 *)skb->data);
-+	int i, start;
-+
-+	/* Skip first skb fragment if it is on same page as header fragment. */
-+	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
++	}
 +
-+	for (i = start; i < shinfo->nr_frags; i++, txp++) {
-+		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
++	__early_alloc_p2m(pfn_s);
++	__early_alloc_p2m(pfn_e);
 +
-+		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
-+				  GNTMAP_host_map | GNTMAP_readonly,
-+				  txp->gref, netif->domid);
++	for (pfn = pfn_s; pfn < pfn_e; pfn++)
++		if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
++			break;
 +
-+		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
-+		netif_get(netif);
-+		pending_tx_info[pending_idx].netif = netif;
-+		frags[i].page = (void *)pending_idx;
-+	}
++	if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
++		"Identity mapping failed. We are %ld short of 1-1 mappings!\n",
++		(pfn_e - pfn_s) - (pfn - pfn_s)))
++		printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
 +
-+	return mop;
++	return pfn - pfn_s;
 +}
 +
-+static int netbk_tx_check_mop(struct sk_buff *skb,
-+			       gnttab_map_grant_ref_t **mopp)
-+{
-+	gnttab_map_grant_ref_t *mop = *mopp;
-+	int pending_idx = *((u16 *)skb->data);
-+	netif_t *netif = pending_tx_info[pending_idx].netif;
-+	netif_tx_request_t *txp;
-+	struct skb_shared_info *shinfo = skb_shinfo(skb);
-+	int nr_frags = shinfo->nr_frags;
-+	int i, err, start;
-+
-+	/* Check status of header. */
-+	err = mop->status;
-+	if (unlikely(err)) {
-+		txp = &pending_tx_info[pending_idx].req;
-+		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-+		netif_put(netif);
-+	} else {
-+		set_phys_to_machine(
-+			__pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
-+			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
-+		grant_tx_handle[pending_idx] = mop->handle;
+ /* Try to install p2m mapping; fail if intermediate bits missing */
+ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+ {
+ 	unsigned topidx, mididx, idx;
+ 
++	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
++		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
++		return true;
 +	}
+ 	if (unlikely(pfn >= MAX_P2M_PFN)) {
+ 		BUG_ON(mfn != INVALID_P2M_ENTRY);
+ 		return true;
+@@ -368,6 +468,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+ 	mididx = p2m_mid_index(pfn);
+ 	idx = p2m_index(pfn);
+ 
++	/* For sparse holes were the p2m leaf has real PFN along with
++	 * PCI holes, stick in the PFN as the MFN value.
++	 */
++	if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
++		if (p2m_top[topidx][mididx] == p2m_identity)
++			return true;
 +
-+	/* Skip first skb fragment if it is on same page as header fragment. */
-+	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
-+
-+	for (i = start; i < nr_frags; i++) {
-+		int j, newerr;
-+
-+		pending_idx = (unsigned long)shinfo->frags[i].page;
-+
-+		/* Check error status: if okay then remember grant handle. */
-+		newerr = (++mop)->status;
-+		if (likely(!newerr)) {
-+			set_phys_to_machine(
-+				__pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
-+				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
-+			grant_tx_handle[pending_idx] = mop->handle;
-+			/* Had a previous error? Invalidate this fragment. */
-+			if (unlikely(err))
-+				netif_idx_release(pending_idx);
-+			continue;
-+		}
-+
-+		/* Error on this fragment: respond to client with an error. */
-+		txp = &pending_tx_info[pending_idx].req;
-+		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-+		netif_put(netif);
-+
-+		/* Not the first error? Preceding frags already invalidated. */
-+		if (err)
-+			continue;
-+
-+		/* First error: invalidate header and preceding fragments. */
-+		pending_idx = *((u16 *)skb->data);
-+		netif_idx_release(pending_idx);
-+		for (j = start; j < i; j++) {
-+			pending_idx = (unsigned long)shinfo->frags[i].page;
-+			netif_idx_release(pending_idx);
++		/* Swap over from MISSING to IDENTITY if needed. */
++		if (p2m_top[topidx][mididx] == p2m_missing) {
++			WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
++				p2m_identity) != p2m_missing);
++			return true;
 +		}
-+
-+		/* Remember the error: invalidate all subsequent fragments. */
-+		err = newerr;
 +	}
 +
-+	*mopp = mop + 1;
-+	return err;
-+}
-+
-+static void netbk_fill_frags(struct sk_buff *skb)
-+{
-+	struct skb_shared_info *shinfo = skb_shinfo(skb);
-+	int nr_frags = shinfo->nr_frags;
-+	int i;
+ 	if (p2m_top[topidx][mididx] == p2m_missing)
+ 		return mfn == INVALID_P2M_ENTRY;
+ 
+@@ -378,11 +493,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+ 
+ bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+ {
+-	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+-		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+-		return true;
+-	}
+-
+ 	if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
+ 		if (!alloc_p2m(pfn))
+ 			return false;
+@@ -417,11 +527,11 @@ static unsigned long mfn_hash(unsigned long mfn)
+ }
+ 
+ /* Add an MFN override for a particular page */
+-int m2p_add_override(unsigned long mfn, struct page *page)
++int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
+ {
+ 	unsigned long flags;
+ 	unsigned long pfn;
+-	unsigned long address;
++	unsigned long uninitialized_var(address);
+ 	unsigned level;
+ 	pte_t *ptep = NULL;
+ 
+@@ -429,7 +539,6 @@ int m2p_add_override(unsigned long mfn, struct page *page)
+ 	if (!PageHighMem(page)) {
+ 		address = (unsigned long)__va(pfn << PAGE_SHIFT);
+ 		ptep = lookup_address(address, &level);
+-
+ 		if (WARN(ptep == NULL || level != PG_LEVEL_4K,
+ 					"m2p_add_override: pfn %lx not mapped", pfn))
+ 			return -EINVAL;
+@@ -439,10 +548,9 @@ int m2p_add_override(unsigned long mfn, struct page *page)
+ 	page->index = pfn_to_mfn(pfn);
+ 
+ 	__set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
+-	if (!PageHighMem(page))
++	if (clear_pte && !PageHighMem(page))
+ 		/* Just zap old mapping for now */
+ 		pte_clear(&init_mm, address, ptep);
+-
+ 	spin_lock_irqsave(&m2p_override_lock, flags);
+ 	list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
+ 	spin_unlock_irqrestore(&m2p_override_lock, flags);
+@@ -450,12 +558,12 @@ int m2p_add_override(unsigned long mfn, struct page *page)
+ 	return 0;
+ }
+ 
+-int m2p_remove_override(struct page *page)
++int m2p_remove_override(struct page *page, bool clear_pte)
+ {
+ 	unsigned long flags;
+ 	unsigned long mfn;
+ 	unsigned long pfn;
+-	unsigned long address;
++	unsigned long uninitialized_var(address);
+ 	unsigned level;
+ 	pte_t *ptep = NULL;
+ 
+@@ -478,7 +586,7 @@ int m2p_remove_override(struct page *page)
+ 	spin_unlock_irqrestore(&m2p_override_lock, flags);
+ 	__set_phys_to_machine(pfn, page->index);
+ 
+-	if (!PageHighMem(page))
++	if (clear_pte && !PageHighMem(page))
+ 		set_pte_at(&init_mm, address, ptep,
+ 				pfn_pte(pfn, PAGE_KERNEL));
+ 		/* No tlb flush necessary because the caller already
+@@ -520,3 +628,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
+ 	return ret;
+ }
+ EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
 +
-+	for (i = 0; i < nr_frags; i++) {
-+		skb_frag_t *frag = shinfo->frags + i;
-+		netif_tx_request_t *txp;
-+		unsigned long pending_idx;
++#ifdef CONFIG_XEN_DEBUG_FS
 +
-+		pending_idx = (unsigned long)frag->page;
++int p2m_dump_show(struct seq_file *m, void *v)
++{
++	static const char * const level_name[] = { "top", "middle",
++						"entry", "abnormal" };
++	static const char * const type_name[] = { "identity", "missing",
++						"pfn", "abnormal"};
++#define TYPE_IDENTITY 0
++#define TYPE_MISSING 1
++#define TYPE_PFN 2
++#define TYPE_UNKNOWN 3
++	unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
++	unsigned int uninitialized_var(prev_level);
++	unsigned int uninitialized_var(prev_type);
 +
-+		pending_inuse[pending_idx].alloc_time = jiffies;
-+		list_add_tail(&pending_inuse[pending_idx].list,
-+			      &pending_inuse_head);
++	if (!p2m_top)
++		return 0;
 +
-+		txp = &pending_tx_info[pending_idx].req;
-+		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
-+		frag->size = txp->size;
-+		frag->page_offset = txp->offset;
++	for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
++		unsigned topidx = p2m_top_index(pfn);
++		unsigned mididx = p2m_mid_index(pfn);
++		unsigned idx = p2m_index(pfn);
++		unsigned lvl, type;
 +
-+		skb->len += txp->size;
-+		skb->data_len += txp->size;
-+		skb->truesize += txp->size;
++		lvl = 4;
++		type = TYPE_UNKNOWN;
++		if (p2m_top[topidx] == p2m_mid_missing) {
++			lvl = 0; type = TYPE_MISSING;
++		} else if (p2m_top[topidx] == NULL) {
++			lvl = 0; type = TYPE_UNKNOWN;
++		} else if (p2m_top[topidx][mididx] == NULL) {
++			lvl = 1; type = TYPE_UNKNOWN;
++		} else if (p2m_top[topidx][mididx] == p2m_identity) {
++			lvl = 1; type = TYPE_IDENTITY;
++		} else if (p2m_top[topidx][mididx] == p2m_missing) {
++			lvl = 1; type = TYPE_MISSING;
++		} else if (p2m_top[topidx][mididx][idx] == 0) {
++			lvl = 2; type = TYPE_UNKNOWN;
++		} else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
++			lvl = 2; type = TYPE_IDENTITY;
++		} else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
++			lvl = 2; type = TYPE_MISSING;
++		} else if (p2m_top[topidx][mididx][idx] == pfn) {
++			lvl = 2; type = TYPE_PFN;
++		} else if (p2m_top[topidx][mididx][idx] != pfn) {
++			lvl = 2; type = TYPE_PFN;
++		}
++		if (pfn == 0) {
++			prev_level = lvl;
++			prev_type = type;
++		}
++		if (pfn == MAX_DOMAIN_PAGES-1) {
++			lvl = 3;
++			type = TYPE_UNKNOWN;
++		}
++		if (prev_type != type) {
++			seq_printf(m, " [0x%lx->0x%lx] %s\n",
++				prev_pfn_type, pfn, type_name[prev_type]);
++			prev_pfn_type = pfn;
++			prev_type = type;
++		}
++		if (prev_level != lvl) {
++			seq_printf(m, " [0x%lx->0x%lx] level %s\n",
++				prev_pfn_level, pfn, level_name[prev_level]);
++			prev_pfn_level = pfn;
++			prev_level = lvl;
++		}
 +	}
++	return 0;
++#undef TYPE_IDENTITY
++#undef TYPE_MISSING
++#undef TYPE_PFN
++#undef TYPE_UNKNOWN
 +}
++#endif
+diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
+index a8a66a5..edeaff2 100644
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
+ 
+ static __init void xen_add_extra_mem(unsigned long pages)
+ {
++	unsigned long pfn;
++
+ 	u64 size = (u64)pages * PAGE_SIZE;
+ 	u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
+ 
+@@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages)
+ 	xen_extra_mem_size += size;
+ 
+ 	xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
 +
-+int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
-+		     int work_to_do)
++	for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
++		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ }
+ 
+ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
+@@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
+ 		WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
+ 		     start, end, ret);
+ 		if (ret == 1) {
+-			set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
++			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ 			len++;
+ 		}
+ 	}
+@@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
+ 	return released;
+ }
+ 
++static unsigned long __init xen_set_identity(const struct e820entry *list,
++					     ssize_t map_size)
 +{
-+	struct netif_extra_info extra;
-+	RING_IDX cons = netif->tx.req_cons;
++	phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
++	phys_addr_t start_pci = last;
++	const struct e820entry *entry;
++	unsigned long identity = 0;
++	int i;
 +
-+	do {
-+		if (unlikely(work_to_do-- <= 0)) {
-+			DPRINTK("Missing extra info\n");
-+			return -EBADR;
-+		}
++	for (i = 0, entry = list; i < map_size; i++, entry++) {
++		phys_addr_t start = entry->addr;
++		phys_addr_t end = start + entry->size;
 +
-+		memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
-+		       sizeof(extra));
-+		if (unlikely(!extra.type ||
-+			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
-+			netif->tx.req_cons = ++cons;
-+			DPRINTK("Invalid extra type: %d\n", extra.type);
-+			return -EINVAL;
-+		}
++		if (start < last)
++			start = last;
 +
-+		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
-+		netif->tx.req_cons = ++cons;
-+	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
++		if (end <= start)
++			continue;
 +
-+	return work_to_do;
-+}
++		/* Skip over the 1MB region. */
++		if (last > end)
++			continue;
 +
-+static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
-+{
-+	if (!gso->u.gso.size) {
-+		DPRINTK("GSO size must not be zero.\n");
-+		return -EINVAL;
-+	}
++		if (entry->type == E820_RAM) {
++			if (start > start_pci)
++				identity += set_phys_range_identity(
++						PFN_UP(start_pci), PFN_DOWN(start));
 +
-+	/* Currently only TCPv4 S.O. is supported. */
-+	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
-+		DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
-+		return -EINVAL;
++			/* Without saving 'last' we would gooble RAM too
++			 * at the end of the loop. */
++			last = end;
++			start_pci = end;
++			continue;
++		}
++		start_pci = min(start, start_pci);
++		last = end;
 +	}
-+
-+	skb_shinfo(skb)->gso_size = gso->u.gso.size;
-+	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
-+
-+	/* Header must be checked, and gso_segs computed. */
-+	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-+	skb_shinfo(skb)->gso_segs = 0;
-+
-+	return 0;
++	if (last > start_pci)
++		identity += set_phys_range_identity(
++					PFN_UP(start_pci), PFN_DOWN(last));
++	return identity;
 +}
+ /**
+  * machine_specific_memory_setup - Hook for machine specific memory setup.
+  **/
+ char * __init xen_memory_setup(void)
+ {
+ 	static struct e820entry map[E820MAX] __initdata;
++	static struct e820entry map_raw[E820MAX] __initdata;
+ 
+ 	unsigned long max_pfn = xen_start_info->nr_pages;
+ 	unsigned long long mem_end;
+@@ -151,6 +199,7 @@ char * __init xen_memory_setup(void)
+ 	struct xen_memory_map memmap;
+ 	unsigned long extra_pages = 0;
+ 	unsigned long extra_limit;
++	unsigned long identity_pages = 0;
+ 	int i;
+ 	int op;
+ 
+@@ -176,6 +225,7 @@ char * __init xen_memory_setup(void)
+ 	}
+ 	BUG_ON(rc);
+ 
++	memcpy(map_raw, map, sizeof(map));
+ 	e820.nr_map = 0;
+ 	xen_extra_mem_start = mem_end;
+ 	for (i = 0; i < memmap.nr_entries; i++) {
+@@ -194,6 +244,14 @@ char * __init xen_memory_setup(void)
+ 			end -= delta;
+ 
+ 			extra_pages += PFN_DOWN(delta);
++			/*
++			 * Set RAM below 4GB that is not for us to be unusable.
++			 * This prevents "System RAM" address space from being
++			 * used as potential resource for I/O address (happens
++			 * when 'allocate_resource' is called).
++			 */
++			if (delta && end < 0x100000000UL)
++				e820_add_region(end, delta, E820_UNUSABLE);
+ 		}
+ 
+ 		if (map[i].size > 0 && end > xen_extra_mem_start)
+@@ -251,6 +309,13 @@ char * __init xen_memory_setup(void)
+ 
+ 	xen_add_extra_mem(extra_pages);
+ 
++	/*
++	 * Set P2M for all non-RAM pages and E820 gaps to be identity
++	 * type PFNs. We supply it with the non-sanitized version
++	 * of the E820.
++	 */
++	identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
++	printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
+ 	return "Xen";
+ }
+ 
+diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
+index 72a4c79..3061244 100644
+--- a/arch/x86/xen/smp.c
++++ b/arch/x86/xen/smp.c
+@@ -509,3 +509,41 @@ void __init xen_smp_init(void)
+ 	xen_fill_possible_map();
+ 	xen_init_spinlocks();
+ }
 +
-+/* Called after netfront has transmitted */
-+static void net_tx_action(unsigned long unused)
++static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
 +{
-+	struct list_head *ent;
-+	struct sk_buff *skb;
-+	netif_t *netif;
-+	netif_tx_request_t txreq;
-+	netif_tx_request_t txfrags[MAX_SKB_FRAGS];
-+	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
-+	u16 pending_idx;
-+	RING_IDX i;
-+	gnttab_map_grant_ref_t *mop;
-+	unsigned int data_len;
-+	int ret, work_to_do;
-+
-+	if (dealloc_cons != dealloc_prod)
-+		net_tx_action_dealloc();
-+
-+	mop = tx_map_ops;
-+	while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-+		!list_empty(&net_schedule_list)) {
-+		/* Get a netif from the list with work to do. */
-+		ent = net_schedule_list.next;
-+		netif = list_entry(ent, netif_t, list);
-+		netif_get(netif);
-+		remove_from_net_schedule_list(netif);
-+
-+		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
-+		if (!work_to_do) {
-+			netif_put(netif);
-+			continue;
-+		}
-+
-+		i = netif->tx.req_cons;
-+		rmb(); /* Ensure that we see the request before we copy it. */
-+		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
-+
-+		/* Credit-based scheduling. */
-+		if (txreq.size > netif->remaining_credit) {
-+			unsigned long now = jiffies;
-+			unsigned long next_credit =
-+				netif->credit_timeout.expires +
-+				msecs_to_jiffies(netif->credit_usec / 1000);
-+
-+			/* Timer could already be pending in rare cases. */
-+			if (timer_pending(&netif->credit_timeout)) {
-+				netif_put(netif);
-+				continue;
-+			}
-+
-+			/* Passed the point where we can replenish credit? */
-+			if (time_after_eq(now, next_credit)) {
-+				netif->credit_timeout.expires = now;
-+				tx_add_credit(netif);
-+			}
-+
-+			/* Still too big to send right now? Set a callback. */
-+			if (txreq.size > netif->remaining_credit) {
-+				netif->credit_timeout.data     =
-+					(unsigned long)netif;
-+				netif->credit_timeout.function =
-+					tx_credit_callback;
-+				__mod_timer(&netif->credit_timeout,
-+					    next_credit);
-+				netif_put(netif);
-+				continue;
-+			}
-+		}
-+		netif->remaining_credit -= txreq.size;
-+
-+		work_to_do--;
-+		netif->tx.req_cons = ++i;
-+
-+		memset(extras, 0, sizeof(extras));
-+		if (txreq.flags & NETTXF_extra_info) {
-+			work_to_do = netbk_get_extras(netif, extras,
-+						      work_to_do);
-+			i = netif->tx.req_cons;
-+			if (unlikely(work_to_do < 0)) {
-+				netbk_tx_err(netif, &txreq, i);
-+				continue;
-+			}
-+		}
-+
-+		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
-+		if (unlikely(ret < 0)) {
-+			netbk_tx_err(netif, &txreq, i - ret);
-+			continue;
-+		}
-+		i += ret;
-+
-+		if (unlikely(txreq.size < ETH_HLEN)) {
-+			DPRINTK("Bad packet size: %d\n", txreq.size);
-+			netbk_tx_err(netif, &txreq, i);
-+			continue;
-+		}
-+
-+		/* No crossing a page as the payload mustn't fragment. */
-+		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
-+			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
-+				txreq.offset, txreq.size,
-+				(txreq.offset &~PAGE_MASK) + txreq.size);
-+			netbk_tx_err(netif, &txreq, i);
-+			continue;
-+		}
-+
-+		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-+
-+		data_len = (txreq.size > PKT_PROT_LEN &&
-+			    ret < MAX_SKB_FRAGS) ?
-+			PKT_PROT_LEN : txreq.size;
-+
-+		skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
-+				GFP_ATOMIC | __GFP_NOWARN);
-+		if (unlikely(skb == NULL)) {
-+			DPRINTK("Can't allocate a skb in start_xmit.\n");
-+			netbk_tx_err(netif, &txreq, i);
-+			break;
-+		}
-+
-+		/* Packets passed to netif_rx() must have some headroom. */
-+		skb_reserve(skb, 16 + NET_IP_ALIGN);
-+
-+		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
-+			struct netif_extra_info *gso;
-+			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
-+
-+			if (netbk_set_skb_gso(skb, gso)) {
-+				kfree_skb(skb);
-+				netbk_tx_err(netif, &txreq, i);
-+				continue;
-+			}
-+		}
-+
-+		gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
-+				  GNTMAP_host_map | GNTMAP_readonly,
-+				  txreq.gref, netif->domid);
-+		mop++;
-+
-+		memcpy(&pending_tx_info[pending_idx].req,
-+		       &txreq, sizeof(txreq));
-+		pending_tx_info[pending_idx].netif = netif;
-+		*((u16 *)skb->data) = pending_idx;
-+
-+		__skb_put(skb, data_len);
-+
-+		skb_shinfo(skb)->nr_frags = ret;
-+		if (data_len < txreq.size) {
-+			skb_shinfo(skb)->nr_frags++;
-+			skb_shinfo(skb)->frags[0].page =
-+				(void *)(unsigned long)pending_idx;
-+		} else {
-+			/* Discriminate from any valid pending_idx value. */
-+			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
-+		}
-+
-+		if (skb->data_len < skb_shinfo(skb)->gso_size) {
-+			skb_shinfo(skb)->gso_size = 0;
-+			skb_shinfo(skb)->gso_type = 0;
-+		}
-+
-+		__skb_queue_tail(&tx_queue, skb);
-+
-+		pending_cons++;
-+
-+		mop = netbk_get_requests(netif, skb, txfrags, mop);
-+
-+		netif->tx.req_cons = i;
-+		netif_schedule_work(netif);
-+
-+		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
-+			break;
-+	}
++	native_smp_prepare_cpus(max_cpus);
++	WARN_ON(xen_smp_intr_init(0));
 +
-+	if (mop == tx_map_ops)
++	if (!xen_have_vector_callback)
 +		return;
-+
-+	ret = HYPERVISOR_grant_table_op(
-+		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
-+	BUG_ON(ret);
-+
-+	mop = tx_map_ops;
-+	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
-+		netif_tx_request_t *txp;
-+
-+		pending_idx = *((u16 *)skb->data);
-+		netif       = pending_tx_info[pending_idx].netif;
-+		txp         = &pending_tx_info[pending_idx].req;
-+
-+		/* Check the remap error code. */
-+		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
-+			DPRINTK("netback grant failed.\n");
-+			skb_shinfo(skb)->nr_frags = 0;
-+			kfree_skb(skb);
-+			continue;
-+		}
-+
-+		data_len = skb->len;
-+		memcpy(skb->data,
-+		       (void *)(idx_to_kaddr(pending_idx)|txp->offset),
-+		       data_len);
-+		if (data_len < txp->size) {
-+			/* Append the packet payload as a fragment. */
-+			txp->offset += data_len;
-+			txp->size -= data_len;
-+		} else {
-+			/* Schedule a response immediately. */
-+			netif_idx_release(pending_idx);
-+		}
-+
-+		/*
-+		 * Old frontends do not assert data_validated but we
-+		 * can infer it from csum_blank so test both flags.
-+		 */
-+		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
-+			skb->ip_summed = CHECKSUM_UNNECESSARY;
-+			skb->proto_data_valid = 1;
-+		} else {
-+			skb->ip_summed = CHECKSUM_NONE;
-+			skb->proto_data_valid = 0;
-+		}
-+		skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
-+
-+		netbk_fill_frags(skb);
-+
-+		skb->dev      = netif->dev;
-+		skb->protocol = eth_type_trans(skb, skb->dev);
-+
-+		netif->stats.rx_bytes += skb->len;
-+		netif->stats.rx_packets++;
-+
-+		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
-+		    unlikely(skb_linearize(skb))) {
-+			DPRINTK("Can't linearize skb in net_tx_action.\n");
-+			kfree_skb(skb);
-+			continue;
-+		}
-+
-+		netif_rx(skb);
-+		netif->dev->last_rx = jiffies;
-+	}
-+
-+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-+	    !list_empty(&pending_inuse_head)) {
-+		struct netbk_tx_pending_inuse *oldest;
-+
-+		oldest = list_entry(pending_inuse_head.next,
-+				    struct netbk_tx_pending_inuse, list);
-+		mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
-+	}
-+}
-+
-+static void netif_idx_release(u16 pending_idx)
-+{
-+	static DEFINE_SPINLOCK(_lock);
-+	unsigned long flags;
-+
-+	spin_lock_irqsave(&_lock, flags);
-+	dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
-+	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
-+	smp_wmb();
-+	dealloc_prod++;
-+	spin_unlock_irqrestore(&_lock, flags);
-+
-+	tasklet_schedule(&net_tx_tasklet);
-+}
-+
-+static void netif_page_release(struct page *page, unsigned int order)
-+{
-+	int idx = netif_page_index(page);
-+	BUG_ON(order);
-+	BUG_ON(idx < 0);
-+	netif_idx_release(idx);
-+}
-+
-+irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
-+{
-+	netif_t *netif = dev_id;
-+
-+	add_to_net_schedule_list_tail(netif);
-+	maybe_schedule_tx_action();
-+
-+	if (netif_schedulable(netif) && !netbk_queue_full(netif))
-+		netif_wake_queue(netif->dev);
-+
-+	return IRQ_HANDLED;
++	xen_init_lock_cpu(0);
++	xen_init_spinlocks();
 +}
 +
-+static void make_tx_response(netif_t *netif,
-+			     netif_tx_request_t *txp,
-+			     s8       st)
++static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
 +{
-+	RING_IDX i = netif->tx.rsp_prod_pvt;
-+	netif_tx_response_t *resp;
-+	int notify;
-+
-+	resp = RING_GET_RESPONSE(&netif->tx, i);
-+	resp->id     = txp->id;
-+	resp->status = st;
-+
-+	if (txp->flags & NETTXF_extra_info)
-+		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
-+
-+	netif->tx.rsp_prod_pvt = ++i;
-+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
-+	if (notify)
-+		notify_remote_via_irq(netif->irq);
-+
-+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
-+	if (i == netif->tx.req_cons) {
-+		int more_to_do;
-+		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
-+		if (more_to_do)
-+			add_to_net_schedule_list_tail(netif);
-+	}
-+#endif
++	int rc;
++	rc = native_cpu_up(cpu);
++	WARN_ON (xen_smp_intr_init(cpu));
++	return rc;
 +}
 +
-+static netif_rx_response_t *make_rx_response(netif_t *netif,
-+					     u16      id,
-+					     s8       st,
-+					     u16      offset,
-+					     u16      size,
-+					     u16      flags)
++static void xen_hvm_cpu_die(unsigned int cpu)
 +{
-+	RING_IDX i = netif->rx.rsp_prod_pvt;
-+	netif_rx_response_t *resp;
-+
-+	resp = RING_GET_RESPONSE(&netif->rx, i);
-+	resp->offset     = offset;
-+	resp->flags      = flags;
-+	resp->id         = id;
-+	resp->status     = (s16)size;
-+	if (st < 0)
-+		resp->status = (s16)st;
-+
-+	netif->rx.rsp_prod_pvt = ++i;
-+
-+	return resp;
++	unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
++	unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
++	unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
++	unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
++	native_cpu_die(cpu);
 +}
 +
-+#ifdef NETBE_DEBUG_INTERRUPT
-+static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
++void __init xen_hvm_smp_init(void)
 +{
-+	struct list_head *ent;
-+	netif_t *netif;
-+	int i = 0;
-+
-+	printk(KERN_ALERT "netif_schedule_list:\n");
-+	spin_lock_irq(&net_schedule_list_lock);
-+
-+	list_for_each (ent, &net_schedule_list) {
-+		netif = list_entry(ent, netif_t, list);
-+		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
-+		       "rx_resp_prod=%08x\n",
-+		       i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
-+		printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
-+		       netif->tx.req_cons, netif->tx.rsp_prod_pvt);
-+		printk(KERN_ALERT "   shared(rx_req_prod=%08x "
-+		       "rx_resp_prod=%08x\n",
-+		       netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
-+		printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
-+		       netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
-+		printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
-+		       netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
-+		i++;
-+	}
-+
-+	spin_unlock_irq(&net_schedule_list_lock);
-+	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
-+
-+	return IRQ_HANDLED;
++	smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
++	smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
++	smp_ops.cpu_up = xen_hvm_cpu_up;
++	smp_ops.cpu_die = xen_hvm_cpu_die;
++	smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
++	smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
 +}
+diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
+index 9bbd63a..45329c8 100644
+--- a/arch/x86/xen/suspend.c
++++ b/arch/x86/xen/suspend.c
+@@ -12,7 +12,7 @@
+ #include "xen-ops.h"
+ #include "mmu.h"
+ 
+-void xen_pre_suspend(void)
++void xen_arch_pre_suspend(void)
+ {
+ 	xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
+ 	xen_start_info->console.domU.mfn =
+@@ -26,8 +26,9 @@ void xen_pre_suspend(void)
+ 		BUG();
+ }
+ 
+-void xen_hvm_post_suspend(int suspend_cancelled)
++void xen_arch_hvm_post_suspend(int suspend_cancelled)
+ {
++#ifdef CONFIG_XEN_PVHVM
+ 	int cpu;
+ 	xen_hvm_init_shared_info();
+ 	xen_callback_vector();
+@@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled)
+ 			xen_setup_runstate_info(cpu);
+ 		}
+ 	}
 +#endif
-+
-+static int __init netback_init(void)
-+{
-+	int i;
-+	struct page *page;
-+
-+	if (!is_running_on_xen())
-+		return -ENODEV;
-+
-+	/* We can increase reservation by this much in net_rx_action(). */
-+	balloon_update_driver_allowance(NET_RX_RING_SIZE);
-+
-+	skb_queue_head_init(&rx_queue);
-+	skb_queue_head_init(&tx_queue);
-+
-+	init_timer(&net_timer);
-+	net_timer.data = 0;
-+	net_timer.function = net_alarm;
-+
-+	init_timer(&netbk_tx_pending_timer);
-+	netbk_tx_pending_timer.data = 0;
-+	netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
-+
-+	mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
-+	if (mmap_pages == NULL) {
-+		printk("%s: out of memory\n", __FUNCTION__);
-+		return -ENOMEM;
-+	}
-+
-+	for (i = 0; i < MAX_PENDING_REQS; i++) {
-+		page = mmap_pages[i];
-+		SetPageForeign(page, netif_page_release);
-+		netif_set_page_index(page, i);
-+		INIT_LIST_HEAD(&pending_inuse[i].list);
-+	}
-+
-+	pending_cons = 0;
-+	pending_prod = MAX_PENDING_REQS;
-+	for (i = 0; i < MAX_PENDING_REQS; i++)
-+		pending_ring[i] = i;
-+
-+	spin_lock_init(&net_schedule_list_lock);
-+	INIT_LIST_HEAD(&net_schedule_list);
-+
-+	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
-+	if (MODPARM_copy_skb) {
-+		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
-+					      NULL, 0))
-+			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
-+		else
-+			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
-+	}
-+
-+	netif_accel_init();
-+
-+	netif_xenbus_init();
-+
-+#ifdef NETBE_DEBUG_INTERRUPT
-+	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
-+				      0,
-+				      netif_be_dbg,
-+				      SA_SHIRQ,
-+				      "net-be-dbg",
-+				      &netif_be_dbg);
-+#endif
-+
-+	return 0;
-+}
-+
-+module_init(netback_init);
-+
-+MODULE_LICENSE("Dual BSD/GPL");
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-new file mode 100644
-index 0000000..d7faeb6
---- /dev/null
-+++ b/drivers/xen/netback/xenbus.c
-@@ -0,0 +1,454 @@
-+/*  Xenbus code for netif backend
-+    Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
-+    Copyright (C) 2005 XenSource Ltd
-+
-+    This program is free software; you can redistribute it and/or modify
-+    it under the terms of the GNU General Public License as published by
-+    the Free Software Foundation; either version 2 of the License, or
-+    (at your option) any later version.
-+
-+    This program is distributed in the hope that it will be useful,
-+    but WITHOUT ANY WARRANTY; without even the implied warranty of
-+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+    GNU General Public License for more details.
-+
-+    You should have received a copy of the GNU General Public License
-+    along with this program; if not, write to the Free Software
-+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-+*/
-+
-+#include <stdarg.h>
-+#include <linux/module.h>
-+#include <xen/xenbus.h>
-+#include "common.h"
-+
-+#if 0
-+#undef DPRINTK
-+#define DPRINTK(fmt, args...) \
-+    printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
-+#endif
-+
-+
-+static int connect_rings(struct backend_info *);
-+static void connect(struct backend_info *);
-+static void backend_create_netif(struct backend_info *be);
-+
-+static int netback_remove(struct xenbus_device *dev)
+ }
+ 
+-void xen_post_suspend(int suspend_cancelled)
++void xen_arch_post_suspend(int suspend_cancelled)
+ {
+ 	xen_build_mfn_list_list();
+ 
+diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
+index 067759e..2e2d370 100644
+--- a/arch/x86/xen/time.c
++++ b/arch/x86/xen/time.c
+@@ -397,7 +397,9 @@ void xen_setup_timer(int cpu)
+ 		name = "<timer kasprintf failed>";
+ 
+ 	irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
+-				      IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
++				      IRQF_DISABLED|IRQF_PERCPU|
++				      IRQF_NOBALANCING|IRQF_TIMER|
++				      IRQF_FORCE_RESUME,
+ 				      name, NULL);
+ 
+ 	evt = &per_cpu(xen_clock_events, cpu);
+diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
+index 9d41bf9..3112f55 100644
+--- a/arch/x86/xen/xen-ops.h
++++ b/arch/x86/xen/xen-ops.h
+@@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void);
+ 
+ #ifdef CONFIG_SMP
+ void xen_smp_init(void);
++void __init xen_hvm_smp_init(void);
+ 
+ extern cpumask_var_t xen_cpu_initialized_map;
+ #else
+ static inline void xen_smp_init(void) {}
++static inline void xen_hvm_smp_init(void) {}
+ #endif
+ 
+ #ifdef CONFIG_PARAVIRT_SPINLOCKS
+diff --git a/block/blk-core.c b/block/blk-core.c
+index 2f4002f..77836fc 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -455,6 +455,7 @@ void blk_put_queue(struct request_queue *q)
+ {
+ 	kobject_put(&q->kobj);
+ }
++EXPORT_SYMBOL_GPL(blk_put_queue);
+ 
+ void blk_cleanup_queue(struct request_queue *q)
+ {
+@@ -662,6 +663,7 @@ int blk_get_queue(struct request_queue *q)
+ 
+ 	return 1;
+ }
++EXPORT_SYMBOL_GPL(blk_get_queue);
+ 
+ static inline void blk_free_request(struct request_queue *q, struct request *rq)
+ {
+diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
+index d7aa39e..9cb8668 100644
+--- a/drivers/block/xen-blkfront.c
++++ b/drivers/block/xen-blkfront.c
+@@ -120,6 +120,10 @@ static DEFINE_SPINLOCK(minor_lock);
+ #define EXTENDED (1<<EXT_SHIFT)
+ #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
+ #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
++#define EMULATED_HD_DISK_MINOR_OFFSET (0)
++#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
++#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
++#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
+ 
+ #define DEV_NAME	"xvd"	/* name in /dev */
+ 
+@@ -281,7 +285,7 @@ static int blkif_queue_request(struct request *req)
+ 	info->shadow[id].request = req;
+ 
+ 	ring_req->id = id;
+-	ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
++	ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
+ 	ring_req->handle = info->handle;
+ 
+ 	ring_req->operation = rq_data_dir(req) ?
+@@ -317,7 +321,7 @@ static int blkif_queue_request(struct request *req)
+ 				rq_data_dir(req) );
+ 
+ 		info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
+-		ring_req->seg[i] =
++		ring_req->u.rw.seg[i] =
+ 				(struct blkif_request_segment) {
+ 					.gref       = ref,
+ 					.first_sect = fsect,
+@@ -434,6 +438,65 @@ static void xlvbd_flush(struct blkfront_info *info)
+ 	       info->feature_flush ? "enabled" : "disabled");
+ }
+ 
++static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
 +{
-+	struct backend_info *be = dev->dev.driver_data;
-+
-+	netback_remove_accelerators(be, dev);
-+
-+	if (be->netif) {
-+		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-+		netif_disconnect(be->netif);
-+		be->netif = NULL;
++	int major;
++	major = BLKIF_MAJOR(vdevice);
++	*minor = BLKIF_MINOR(vdevice);
++	switch (major) {
++		case XEN_IDE0_MAJOR:
++			*offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
++			*minor = ((*minor / 64) * PARTS_PER_DISK) +
++				EMULATED_HD_DISK_MINOR_OFFSET;
++			break;
++		case XEN_IDE1_MAJOR:
++			*offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
++			*minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
++				EMULATED_HD_DISK_MINOR_OFFSET;
++			break;
++		case XEN_SCSI_DISK0_MAJOR:
++			*offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
++			*minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
++			break;
++		case XEN_SCSI_DISK1_MAJOR:
++		case XEN_SCSI_DISK2_MAJOR:
++		case XEN_SCSI_DISK3_MAJOR:
++		case XEN_SCSI_DISK4_MAJOR:
++		case XEN_SCSI_DISK5_MAJOR:
++		case XEN_SCSI_DISK6_MAJOR:
++		case XEN_SCSI_DISK7_MAJOR:
++			*offset = (*minor / PARTS_PER_DISK) + 
++				((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
++				EMULATED_SD_DISK_NAME_OFFSET;
++			*minor = *minor +
++				((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
++				EMULATED_SD_DISK_MINOR_OFFSET;
++			break;
++		case XEN_SCSI_DISK8_MAJOR:
++		case XEN_SCSI_DISK9_MAJOR:
++		case XEN_SCSI_DISK10_MAJOR:
++		case XEN_SCSI_DISK11_MAJOR:
++		case XEN_SCSI_DISK12_MAJOR:
++		case XEN_SCSI_DISK13_MAJOR:
++		case XEN_SCSI_DISK14_MAJOR:
++		case XEN_SCSI_DISK15_MAJOR:
++			*offset = (*minor / PARTS_PER_DISK) + 
++				((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
++				EMULATED_SD_DISK_NAME_OFFSET;
++			*minor = *minor +
++				((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
++				EMULATED_SD_DISK_MINOR_OFFSET;
++			break;
++		case XENVBD_MAJOR:
++			*offset = *minor / PARTS_PER_DISK;
++			break;
++		default:
++			printk(KERN_WARNING "blkfront: your disk configuration is "
++					"incorrect, please use an xvd device instead\n");
++			return -ENODEV;
 +	}
-+	kfree(be);
-+	dev->dev.driver_data = NULL;
 +	return 0;
 +}
-+
-+
-+/**
-+ * Entry point to this code when a new device is created.  Allocate the basic
-+ * structures and switch to InitWait.
-+ */
-+static int netback_probe(struct xenbus_device *dev,
-+			 const struct xenbus_device_id *id)
-+{
-+	const char *message;
-+	struct xenbus_transaction xbt;
+ 
+ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+ 			       struct blkfront_info *info,
+@@ -441,7 +504,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+ {
+ 	struct gendisk *gd;
+ 	int nr_minors = 1;
+-	int err = -ENODEV;
 +	int err;
-+	int sg;
-+	struct backend_info *be = kzalloc(sizeof(struct backend_info),
-+					  GFP_KERNEL);
-+	if (!be) {
-+		xenbus_dev_fatal(dev, -ENOMEM,
-+				 "allocating backend structure");
+ 	unsigned int offset;
+ 	int minor;
+ 	int nr_parts;
+@@ -456,12 +519,20 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+ 	}
+ 
+ 	if (!VDEV_IS_EXTENDED(info->vdevice)) {
+-		minor = BLKIF_MINOR(info->vdevice);
+-		nr_parts = PARTS_PER_DISK;
++		err = xen_translate_vdev(info->vdevice, &minor, &offset);
++		if (err)
++			return err;		
++ 		nr_parts = PARTS_PER_DISK;
+ 	} else {
+ 		minor = BLKIF_MINOR_EXT(info->vdevice);
+ 		nr_parts = PARTS_PER_EXT_DISK;
++		offset = minor / nr_parts;
++		if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4)
++			printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
++					"emulated IDE disks,\n\t choose an xvd device name"
++					"from xvde on\n", info->vdevice);
+ 	}
++	err = -ENODEV;
+ 
+ 	if ((minor % nr_parts) == 0)
+ 		nr_minors = nr_parts;
+@@ -475,8 +546,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+ 	if (gd == NULL)
+ 		goto release;
+ 
+-	offset = minor / nr_parts;
+-
+ 	if (nr_minors > 1) {
+ 		if (offset < 26)
+ 			sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
+@@ -615,7 +684,7 @@ static void blkif_completion(struct blk_shadow *s)
+ {
+ 	int i;
+ 	for (i = 0; i < s->req.nr_segments; i++)
+-		gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
++		gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
+ }
+ 
+ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
+@@ -932,7 +1001,7 @@ static int blkif_recover(struct blkfront_info *info)
+ 		/* Rewrite any grant references invalidated by susp/resume. */
+ 		for (j = 0; j < req->nr_segments; j++)
+ 			gnttab_grant_foreign_access_ref(
+-				req->seg[j].gref,
++				req->u.rw.seg[j].gref,
+ 				info->xbdev->otherend_id,
+ 				pfn_to_mfn(info->shadow[req->id].frame[j]),
+ 				rq_data_dir(info->shadow[req->id].request));
+diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
+index 26347b7..3706156 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
++++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
+@@ -412,7 +412,8 @@ nouveau_mem_vram_init(struct drm_device *dev)
+ 	ret = ttm_bo_device_init(&dev_priv->ttm.bdev,
+ 				 dev_priv->ttm.bo_global_ref.ref.object,
+ 				 &nouveau_bo_driver, DRM_FILE_PAGE_OFFSET,
+-				 dma_bits <= 32 ? true : false);
++				 dma_bits <= 32 ? true : false,
++				 dev->dev);
+ 	if (ret) {
+ 		NV_ERROR(dev, "Error initialising bo driver: %d\n", ret);
+ 		return ret;
+diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+index 9a250eb..07b1151 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
++++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+@@ -12,6 +12,7 @@ struct nouveau_sgdma_be {
+ 	struct drm_device *dev;
+ 
+ 	dma_addr_t *pages;
++	bool *ttm_alloced;
+ 	unsigned nr_pages;
+ 
+ 	u64 offset;
+@@ -20,7 +21,8 @@ struct nouveau_sgdma_be {
+ 
+ static int
+ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
+-		       struct page **pages, struct page *dummy_read_page)
++		       struct page **pages, struct page *dummy_read_page,
++		       dma_addr_t *dma_addrs)
+ {
+ 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+ 	struct drm_device *dev = nvbe->dev;
+@@ -34,15 +36,25 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
+ 	if (!nvbe->pages)
+ 		return -ENOMEM;
+ 
++	nvbe->ttm_alloced = kmalloc(sizeof(bool) * num_pages, GFP_KERNEL);
++	if (!nvbe->ttm_alloced)
 +		return -ENOMEM;
-+	}
-+
-+	be->dev = dev;
-+	dev->dev.driver_data = be;
-+
-+	sg = 1;
-+	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
-+		sg = 0;
-+
-+	do {
-+		err = xenbus_transaction_start(&xbt);
-+		if (err) {
-+			xenbus_dev_fatal(dev, err, "starting transaction");
-+			goto fail;
-+		}
-+
-+		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
-+		if (err) {
-+			message = "writing feature-sg";
-+			goto abort_transaction;
-+		}
-+
-+		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
-+				    "%d", sg);
-+		if (err) {
-+			message = "writing feature-gso-tcpv4";
-+			goto abort_transaction;
-+		}
-+
-+		/* We support rx-copy path. */
-+		err = xenbus_printf(xbt, dev->nodename,
-+				    "feature-rx-copy", "%d", 1);
-+		if (err) {
-+			message = "writing feature-rx-copy";
-+			goto abort_transaction;
-+		}
-+
-+		/*
-+		 * We don't support rx-flip path (except old guests who don't
-+		 * grok this feature flag).
-+		 */
-+		err = xenbus_printf(xbt, dev->nodename,
-+				    "feature-rx-flip", "%d", 0);
-+		if (err) {
-+			message = "writing feature-rx-flip";
-+			goto abort_transaction;
-+		}
-+
-+		err = xenbus_transaction_end(xbt, 0);
-+	} while (err == -EAGAIN);
-+
-+	if (err) {
-+		xenbus_dev_fatal(dev, err, "completing transaction");
-+		goto fail;
-+	}
 +
-+	netback_probe_accelerators(be, dev);
-+
-+	err = xenbus_switch_state(dev, XenbusStateInitWait);
-+	if (err)
-+		goto fail;
-+
-+	/* This kicks hotplug scripts, so do it immediately. */
-+	backend_create_netif(be);
-+
-+	return 0;
-+
-+abort_transaction:
-+	xenbus_transaction_end(xbt, 1);
-+	xenbus_dev_fatal(dev, err, "%s", message);
-+fail:
-+	DPRINTK("failed");
-+	netback_remove(dev);
-+	return err;
-+}
-+
-+
-+/**
-+ * Handle the creation of the hotplug script environment.  We add the script
-+ * and vif variables to the environment, for the benefit of the vif-* hotplug
-+ * scripts.
-+ */
-+static int netback_uevent(struct xenbus_device *xdev, char **envp,
-+			  int num_envp, char *buffer, int buffer_size)
-+{
-+	struct backend_info *be = xdev->dev.driver_data;
-+	netif_t *netif = be->netif;
-+	int i = 0, length = 0;
-+	char *val;
-+
-+	DPRINTK("netback_uevent");
-+
-+	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
-+	if (IS_ERR(val)) {
-+		int err = PTR_ERR(val);
-+		xenbus_dev_fatal(xdev, err, "reading script");
-+		return err;
-+	}
-+	else {
-+		add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
-+			       &length, "script=%s", val);
-+		kfree(val);
-+	}
-+
-+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
-+		       "vif=%s", netif->dev->name);
-+
-+	envp[i] = NULL;
-+
-+	return 0;
-+}
-+
-+
-+static void backend_create_netif(struct backend_info *be)
-+{
-+	int err;
-+	long handle;
-+	struct xenbus_device *dev = be->dev;
-+
-+	if (be->netif != NULL)
-+		return;
-+
-+	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
-+	if (err != 1) {
-+		xenbus_dev_fatal(dev, err, "reading handle");
-+		return;
-+	}
-+
-+	be->netif = netif_alloc(dev->otherend_id, handle);
-+	if (IS_ERR(be->netif)) {
-+		err = PTR_ERR(be->netif);
-+		be->netif = NULL;
-+		xenbus_dev_fatal(dev, err, "creating interface");
-+		return;
+ 	nvbe->nr_pages = 0;
+ 	while (num_pages--) {
+-		nvbe->pages[nvbe->nr_pages] =
+-			pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
++		if (dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE) {
++			nvbe->pages[nvbe->nr_pages] =
++					dma_addrs[nvbe->nr_pages];
++		 	nvbe->ttm_alloced[nvbe->nr_pages] = true;
++		} else {
++			nvbe->pages[nvbe->nr_pages] =
++				pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
+ 				     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+-		if (pci_dma_mapping_error(dev->pdev,
+-					  nvbe->pages[nvbe->nr_pages])) {
+-			be->func->clear(be);
+-			return -EFAULT;
++			if (pci_dma_mapping_error(dev->pdev,
++						  nvbe->pages[nvbe->nr_pages])) {
++				be->func->clear(be);
++				return -EFAULT;
++			}
+ 		}
+ 
+ 		nvbe->nr_pages++;
+@@ -65,11 +77,14 @@ nouveau_sgdma_clear(struct ttm_backend *be)
+ 			be->func->unbind(be);
+ 
+ 		while (nvbe->nr_pages--) {
+-			pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
++			if (!nvbe->ttm_alloced[nvbe->nr_pages])
++				pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
+ 				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ 		}
+ 		kfree(nvbe->pages);
++		kfree(nvbe->ttm_alloced);
+ 		nvbe->pages = NULL;
++		nvbe->ttm_alloced = NULL;
+ 		nvbe->nr_pages = 0;
+ 	}
+ }
+diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
+index d270b3f..f643133 100644
+--- a/drivers/gpu/drm/radeon/evergreen.c
++++ b/drivers/gpu/drm/radeon/evergreen.c
+@@ -3048,9 +3048,6 @@ int evergreen_init(struct radeon_device *rdev)
+ {
+ 	int r;
+ 
+-	r = radeon_dummy_page_init(rdev);
+-	if (r)
+-		return r;
+ 	/* This don't do much */
+ 	r = radeon_gem_init(rdev);
+ 	if (r)
+@@ -3162,7 +3159,6 @@ void evergreen_fini(struct radeon_device *rdev)
+ 	radeon_atombios_fini(rdev);
+ 	kfree(rdev->bios);
+ 	rdev->bios = NULL;
+-	radeon_dummy_page_fini(rdev);
+ }
+ 
+ static void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
+diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
+index de88624..36efc45 100644
+--- a/drivers/gpu/drm/radeon/r600.c
++++ b/drivers/gpu/drm/radeon/r600.c
+@@ -2509,9 +2509,6 @@ int r600_init(struct radeon_device *rdev)
+ {
+ 	int r;
+ 
+-	r = radeon_dummy_page_init(rdev);
+-	if (r)
+-		return r;
+ 	if (r600_debugfs_mc_info_init(rdev)) {
+ 		DRM_ERROR("Failed to register debugfs file for mc !\n");
+ 	}
+@@ -2625,7 +2622,6 @@ void r600_fini(struct radeon_device *rdev)
+ 	radeon_atombios_fini(rdev);
+ 	kfree(rdev->bios);
+ 	rdev->bios = NULL;
+-	radeon_dummy_page_fini(rdev);
+ }
+ 
+ 
+diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
+index 56c48b6..c5955d3 100644
+--- a/drivers/gpu/drm/radeon/radeon.h
++++ b/drivers/gpu/drm/radeon/radeon.h
+@@ -319,6 +319,7 @@ struct radeon_gart {
+ 	union radeon_gart_table		table;
+ 	struct page			**pages;
+ 	dma_addr_t			*pages_addr;
++	bool				*ttm_alloced;
+ 	bool				ready;
+ };
+ 
+@@ -331,7 +332,8 @@ void radeon_gart_fini(struct radeon_device *rdev);
+ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
+ 			int pages);
+ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
+-		     int pages, struct page **pagelist);
++		     int pages, struct page **pagelist,
++		     dma_addr_t *dma_addr);
+ 
+ 
+ /*
+diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
+index 6501611..de4a86f 100644
+--- a/drivers/gpu/drm/radeon/radeon_gart.c
++++ b/drivers/gpu/drm/radeon/radeon_gart.c
+@@ -149,8 +149,9 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
+ 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
+ 	for (i = 0; i < pages; i++, p++) {
+ 		if (rdev->gart.pages[p]) {
+-			pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
+-				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
++			if (!rdev->gart.ttm_alloced[p])
++				pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
++				       		PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ 			rdev->gart.pages[p] = NULL;
+ 			rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
+ 			page_base = rdev->gart.pages_addr[p];
+@@ -165,7 +166,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
+ }
+ 
+ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
+-		     int pages, struct page **pagelist)
++		     int pages, struct page **pagelist, dma_addr_t *dma_addr)
+ {
+ 	unsigned t;
+ 	unsigned p;
+@@ -180,15 +181,22 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
+ 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
+ 
+ 	for (i = 0; i < pages; i++, p++) {
+-		/* we need to support large memory configurations */
+-		/* assume that unbind have already been call on the range */
+-		rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
++		/* On TTM path, we only use the DMA API if TTM_PAGE_FLAG_DMA32
++		 * is requested. */
++		if (dma_addr[i] != DMA_ERROR_CODE) {
++			rdev->gart.ttm_alloced[p] = true;
++			rdev->gart.pages_addr[p] = dma_addr[i];
++		} else {
++			/* we need to support large memory configurations */
++			/* assume that unbind have already been call on the range */
++			rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
+ 							0, PAGE_SIZE,
+ 							PCI_DMA_BIDIRECTIONAL);
+-		if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
+-			/* FIXME: failed to map page (return -ENOMEM?) */
+-			radeon_gart_unbind(rdev, offset, pages);
+-			return -ENOMEM;
++			if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
++				/* FIXME: failed to map page (return -ENOMEM?) */
++				radeon_gart_unbind(rdev, offset, pages);
++				return -ENOMEM;
++			}
+ 		}
+ 		rdev->gart.pages[p] = pagelist[i];
+ 		page_base = rdev->gart.pages_addr[p];
+@@ -251,6 +259,12 @@ int radeon_gart_init(struct radeon_device *rdev)
+ 		radeon_gart_fini(rdev);
+ 		return -ENOMEM;
+ 	}
++	rdev->gart.ttm_alloced = kzalloc(sizeof(bool) *
++					 rdev->gart.num_cpu_pages, GFP_KERNEL);
++	if (rdev->gart.ttm_alloced == NULL) {
++		radeon_gart_fini(rdev);
++		return -ENOMEM;
 +	}
-+
-+	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
-+}
-+
-+
-+/**
-+ * Callback received when the frontend's state changes.
-+ */
-+static void frontend_changed(struct xenbus_device *dev,
-+			     enum xenbus_state frontend_state)
-+{
-+	struct backend_info *be = dev->dev.driver_data;
-+
-+	DPRINTK("%s", xenbus_strstate(frontend_state));
-+
-+	be->frontend_state = frontend_state;
-+
-+	switch (frontend_state) {
-+	case XenbusStateInitialising:
-+		if (dev->state == XenbusStateClosed) {
-+			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
-+			       __FUNCTION__, dev->nodename);
-+			xenbus_switch_state(dev, XenbusStateInitWait);
-+		}
-+		break;
-+
-+	case XenbusStateInitialised:
-+		break;
-+
-+	case XenbusStateConnected:
-+		if (dev->state == XenbusStateConnected)
-+			break;
-+		backend_create_netif(be);
-+		if (be->netif)
-+			connect(be);
-+		break;
-+
-+	case XenbusStateClosing:
-+		if (be->netif) {
-+			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-+			netif_disconnect(be->netif);
-+			be->netif = NULL;
-+		}
-+		xenbus_switch_state(dev, XenbusStateClosing);
-+		break;
-+
-+	case XenbusStateClosed:
-+		xenbus_switch_state(dev, XenbusStateClosed);
-+		if (xenbus_dev_is_online(dev))
-+			break;
-+		/* fall through if not online */
-+	case XenbusStateUnknown:
-+		device_unregister(&dev->dev);
-+		break;
-+
-+	default:
-+		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
-+				 frontend_state);
-+		break;
-+	}
-+}
-+
-+
-+static void xen_net_read_rate(struct xenbus_device *dev,
-+			      unsigned long *bytes, unsigned long *usec)
-+{
-+	char *s, *e;
-+	unsigned long b, u;
-+	char *ratestr;
-+
-+	/* Default to unlimited bandwidth. */
-+	*bytes = ~0UL;
-+	*usec = 0;
-+
-+	ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
-+	if (IS_ERR(ratestr))
-+		return;
-+
-+	s = ratestr;
-+	b = simple_strtoul(s, &e, 10);
-+	if ((s == e) || (*e != ','))
-+		goto fail;
-+
-+	s = e + 1;
-+	u = simple_strtoul(s, &e, 10);
-+	if ((s == e) || (*e != '\0'))
-+		goto fail;
-+
-+	*bytes = b;
-+	*usec = u;
-+
-+	kfree(ratestr);
-+	return;
-+
-+ fail:
-+	WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
-+	kfree(ratestr);
-+}
-+
-+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
-+{
-+	char *s, *e, *macstr;
-+	int i;
-+
-+	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
-+	if (IS_ERR(macstr))
-+		return PTR_ERR(macstr);
-+
-+	for (i = 0; i < ETH_ALEN; i++) {
-+		mac[i] = simple_strtoul(s, &e, 16);
-+		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
-+			kfree(macstr);
-+			return -ENOENT;
-+		}
-+		s = e+1;
-+	}
-+
-+	kfree(macstr);
-+	return 0;
-+}
-+
-+static void connect(struct backend_info *be)
-+{
-+	int err;
-+	struct xenbus_device *dev = be->dev;
-+
-+	err = connect_rings(be);
-+	if (err)
-+		return;
-+
-+	err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
-+	if (err) {
-+		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
-+		return;
-+	}
-+
-+	xen_net_read_rate(dev, &be->netif->credit_bytes,
-+			  &be->netif->credit_usec);
-+	be->netif->remaining_credit = be->netif->credit_bytes;
-+
-+	xenbus_switch_state(dev, XenbusStateConnected);
-+
-+	netif_wake_queue(be->netif->dev);
-+}
-+
-+
-+static int connect_rings(struct backend_info *be)
-+{
-+	struct xenbus_device *dev = be->dev;
-+	unsigned long tx_ring_ref, rx_ring_ref;
-+	unsigned int evtchn, rx_copy;
-+	int err;
-+	int val;
-+
-+	DPRINTK("");
-+
-+	err = xenbus_gather(XBT_NIL, dev->otherend,
-+			    "tx-ring-ref", "%lu", &tx_ring_ref,
-+			    "rx-ring-ref", "%lu", &rx_ring_ref,
-+			    "event-channel", "%u", &evtchn, NULL);
-+	if (err) {
-+		xenbus_dev_fatal(dev, err,
-+				 "reading %s/ring-ref and event-channel",
-+				 dev->otherend);
-+		return err;
-+	}
-+
-+	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
-+			   &rx_copy);
-+	if (err == -ENOENT) {
-+		err = 0;
-+		rx_copy = 0;
-+	}
-+	if (err < 0) {
-+		xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
-+				 dev->otherend);
-+		return err;
-+	}
-+	be->netif->copying_receiver = !!rx_copy;
-+
-+	if (be->netif->dev->tx_queue_len != 0) {
-+		if (xenbus_scanf(XBT_NIL, dev->otherend,
-+				 "feature-rx-notify", "%d", &val) < 0)
-+			val = 0;
-+		if (val)
-+			be->netif->can_queue = 1;
-+		else
-+			/* Must be non-zero for pfifo_fast to work. */
-+			be->netif->dev->tx_queue_len = 1;
-+	}
-+
-+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
-+		val = 0;
-+	if (val) {
-+		be->netif->features |= NETIF_F_SG;
-+		be->netif->dev->features |= NETIF_F_SG;
-+	}
-+
-+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
-+			 &val) < 0)
-+		val = 0;
-+	if (val) {
-+		be->netif->features |= NETIF_F_TSO;
-+		be->netif->dev->features |= NETIF_F_TSO;
-+	}
-+
-+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
-+			 "%d", &val) < 0)
-+		val = 0;
-+	if (val) {
-+		be->netif->features &= ~NETIF_F_IP_CSUM;
-+		be->netif->dev->features &= ~NETIF_F_IP_CSUM;
-+	}
-+
-+	/* Map the shared frame, irq etc. */
-+	err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
-+	if (err) {
-+		xenbus_dev_fatal(dev, err,
-+				 "mapping shared-frames %lu/%lu port %u",
-+				 tx_ring_ref, rx_ring_ref, evtchn);
-+		return err;
-+	}
-+	return 0;
-+}
-+
-+
-+/* ** Driver Registration ** */
-+
-+
-+static const struct xenbus_device_id netback_ids[] = {
-+	{ "vif" },
-+	{ "" }
-+};
-+
-+
-+static struct xenbus_driver netback = {
-+	.name = "vif",
-+	.owner = THIS_MODULE,
-+	.ids = netback_ids,
-+	.probe = netback_probe,
-+	.remove = netback_remove,
-+	.uevent = netback_uevent,
-+	.otherend_changed = frontend_changed,
-+};
-+
-+
-+void netif_xenbus_init(void)
-+{
-+	xenbus_register_backend(&netback);
-+}
--- 
-1.7.4
-
-
-From 5b30803bf5f58ee980edd8d88a2d73dda995ee93 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Mon, 9 Feb 2009 12:05:52 -0800
-Subject: [PATCH 002/203] xen: netback: first cut at porting to upstream and cleaning up
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/Kconfig             |    2 +-
- drivers/xen/netback/Makefile    |    2 +-
- drivers/xen/netback/common.h    |   33 +++---
- drivers/xen/netback/interface.c |   37 +++---
- drivers/xen/netback/netback.c   |  248 ++++++++++++++++++++++++---------------
- drivers/xen/netback/xenbus.c    |   25 ++--
- 6 files changed, 201 insertions(+), 146 deletions(-)
-
-diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
-index 7e83d43..30290a8 100644
---- a/drivers/xen/Kconfig
-+++ b/drivers/xen/Kconfig
-@@ -38,7 +38,7 @@ config XEN_BACKEND
- 	  to other virtual machines.
- 
- config XEN_NETDEV_BACKEND
--       bool "Xen backend network device"
-+       tristate "Xen backend network device"
-        depends on XEN_BACKEND && NET
-        help
-          Implement the network backend driver, which passes packets
-diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
-index f4a0c51..a01a1a3 100644
---- a/drivers/xen/netback/Makefile
-+++ b/drivers/xen/netback/Makefile
-@@ -1,3 +1,3 @@
- obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
- 
--netbk-y   := netback.o xenbus.o interface.o
-+netbk-y := netback.o xenbus.o interface.o
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 9a54d57..65b88f4 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -43,8 +43,7 @@
- #include <asm/io.h>
- #include <asm/pgalloc.h>
- #include <xen/interface/grant_table.h>
--#include <xen/gnttab.h>
--#include <xen/driver_util.h>
-+#include <xen/grant_table.h>
- #include <xen/xenbus.h>
- 
- #define DPRINTK(_f, _a...)			\
-@@ -55,7 +54,7 @@
- #define WPRINTK(fmt, args...)				\
- 	printk(KERN_WARNING "xen_net: " fmt, ##args)
- 
--typedef struct netif_st {
-+struct xen_netif {
- 	/* Unique identifier for this interface. */
- 	domid_t          domid;
- 	unsigned int     handle;
-@@ -70,8 +69,8 @@ typedef struct netif_st {
- 	unsigned int     irq;
- 
- 	/* The shared rings and indexes. */
--	netif_tx_back_ring_t tx;
--	netif_rx_back_ring_t rx;
-+	struct xen_netif_tx_back_ring tx;
-+	struct xen_netif_rx_back_ring rx;
- 	struct vm_struct *tx_comms_area;
- 	struct vm_struct *rx_comms_area;
- 
-@@ -103,7 +102,7 @@ typedef struct netif_st {
- 	unsigned int carrier;
- 
- 	wait_queue_head_t waiting_to_free;
--} netif_t;
-+};
- 
- /*
-  * Implement our own carrier flag: the network stack's version causes delays
-@@ -141,7 +140,7 @@ struct netback_accelerator {
- 
- struct backend_info {
- 	struct xenbus_device *dev;
--	netif_t *netif;
-+	struct xen_netif *netif;
- 	enum xenbus_state frontend_state;
- 
- 	/* State relating to the netback accelerator */
-@@ -174,13 +173,13 @@ extern
- void netif_accel_init(void);
- 
- 
--#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
--#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
-+#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
-+#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
- 
--void netif_disconnect(netif_t *netif);
-+void netif_disconnect(struct xen_netif *netif);
- 
--netif_t *netif_alloc(domid_t domid, unsigned int handle);
--int netif_map(netif_t *netif, unsigned long tx_ring_ref,
-+struct xen_netif *netif_alloc(domid_t domid, unsigned int handle);
-+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
- 	      unsigned long rx_ring_ref, unsigned int evtchn);
- 
- #define netif_get(_b) (atomic_inc(&(_b)->refcnt))
-@@ -195,22 +194,22 @@ void netif_xenbus_init(void);
- #define netif_schedulable(netif)				\
- 	(netif_running((netif)->dev) && netback_carrier_ok(netif))
- 
--void netif_schedule_work(netif_t *netif);
--void netif_deschedule_work(netif_t *netif);
-+void netif_schedule_work(struct xen_netif *netif);
-+void netif_deschedule_work(struct xen_netif *netif);
- 
- int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
- struct net_device_stats *netif_be_get_stats(struct net_device *dev);
--irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
-+irqreturn_t netif_be_int(int irq, void *dev_id);
- 
- static inline int netbk_can_queue(struct net_device *dev)
- {
--	netif_t *netif = netdev_priv(dev);
-+	struct xen_netif *netif = netdev_priv(dev);
- 	return netif->can_queue;
- }
- 
- static inline int netbk_can_sg(struct net_device *dev)
- {
--	netif_t *netif = netdev_priv(dev);
-+	struct xen_netif *netif = netdev_priv(dev);
- 	return netif->features & NETIF_F_SG;
+ 	/* set GART entry to point to the dummy page by default */
+ 	for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
+ 		rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
+@@ -267,6 +281,9 @@ void radeon_gart_fini(struct radeon_device *rdev)
+ 	rdev->gart.ready = false;
+ 	kfree(rdev->gart.pages);
+ 	kfree(rdev->gart.pages_addr);
++	kfree(rdev->gart.ttm_alloced);
+ 	rdev->gart.pages = NULL;
+ 	rdev->gart.pages_addr = NULL;
++	rdev->gart.ttm_alloced = NULL;
++	radeon_dummy_page_fini(rdev);
  }
- 
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index 7e67941..d184ad7 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -34,6 +34,9 @@
- #include <linux/ethtool.h>
- #include <linux/rtnetlink.h>
- 
-+#include <xen/events.h>
-+#include <asm/xen/hypercall.h>
-+
- /*
-  * Module parameter 'queue_length':
-  *
-@@ -51,13 +54,13 @@
- static unsigned long netbk_queue_length = 32;
- module_param_named(queue_length, netbk_queue_length, ulong, 0);
- 
--static void __netif_up(netif_t *netif)
-+static void __netif_up(struct xen_netif *netif)
+diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
+index e5b2cf1..371890c 100644
+--- a/drivers/gpu/drm/radeon/radeon_ttm.c
++++ b/drivers/gpu/drm/radeon/radeon_ttm.c
+@@ -517,7 +517,8 @@ int radeon_ttm_init(struct radeon_device *rdev)
+ 	r = ttm_bo_device_init(&rdev->mman.bdev,
+ 			       rdev->mman.bo_global_ref.ref.object,
+ 			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET,
+-			       rdev->need_dma32);
++			       rdev->need_dma32,
++			       rdev->dev);
+ 	if (r) {
+ 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
+ 		return r;
+@@ -647,6 +648,7 @@ struct radeon_ttm_backend {
+ 	unsigned long			num_pages;
+ 	struct page			**pages;
+ 	struct page			*dummy_read_page;
++	dma_addr_t			*dma_addrs;
+ 	bool				populated;
+ 	bool				bound;
+ 	unsigned			offset;
+@@ -655,12 +657,14 @@ struct radeon_ttm_backend {
+ static int radeon_ttm_backend_populate(struct ttm_backend *backend,
+ 				       unsigned long num_pages,
+ 				       struct page **pages,
+-				       struct page *dummy_read_page)
++				       struct page *dummy_read_page,
++				       dma_addr_t *dma_addrs)
  {
- 	enable_irq(netif->irq);
- 	netif_schedule_work(netif);
- }
+ 	struct radeon_ttm_backend *gtt;
  
--static void __netif_down(netif_t *netif)
-+static void __netif_down(struct xen_netif *netif)
- {
- 	disable_irq(netif->irq);
- 	netif_deschedule_work(netif);
-@@ -65,7 +68,7 @@ static void __netif_down(netif_t *netif)
+ 	gtt = container_of(backend, struct radeon_ttm_backend, backend);
+ 	gtt->pages = pages;
++	gtt->dma_addrs = dma_addrs;
+ 	gtt->num_pages = num_pages;
+ 	gtt->dummy_read_page = dummy_read_page;
+ 	gtt->populated = true;
+@@ -673,6 +677,7 @@ static void radeon_ttm_backend_clear(struct ttm_backend *backend)
  
- static int net_open(struct net_device *dev)
- {
--	netif_t *netif = netdev_priv(dev);
-+	struct xen_netif *netif = netdev_priv(dev);
- 	if (netback_carrier_ok(netif)) {
- 		__netif_up(netif);
- 		netif_start_queue(dev);
-@@ -75,7 +78,7 @@ static int net_open(struct net_device *dev)
- 
- static int net_close(struct net_device *dev)
- {
--	netif_t *netif = netdev_priv(dev);
-+	struct xen_netif *netif = netdev_priv(dev);
- 	if (netback_carrier_ok(netif))
- 		__netif_down(netif);
- 	netif_stop_queue(dev);
-@@ -95,7 +98,7 @@ static int netbk_change_mtu(struct net_device *dev, int mtu)
- static int netbk_set_sg(struct net_device *dev, u32 data)
- {
- 	if (data) {
--		netif_t *netif = netdev_priv(dev);
-+		struct xen_netif *netif = netdev_priv(dev);
- 
- 		if (!(netif->features & NETIF_F_SG))
- 			return -ENOSYS;
-@@ -107,7 +110,7 @@ static int netbk_set_sg(struct net_device *dev, u32 data)
- static int netbk_set_tso(struct net_device *dev, u32 data)
+ 	gtt = container_of(backend, struct radeon_ttm_backend, backend);
+ 	gtt->pages = NULL;
++	gtt->dma_addrs = NULL;
+ 	gtt->num_pages = 0;
+ 	gtt->dummy_read_page = NULL;
+ 	gtt->populated = false;
+@@ -693,7 +698,7 @@ static int radeon_ttm_backend_bind(struct ttm_backend *backend,
+ 		     gtt->num_pages, bo_mem, backend);
+ 	}
+ 	r = radeon_gart_bind(gtt->rdev, gtt->offset,
+-			     gtt->num_pages, gtt->pages);
++			     gtt->num_pages, gtt->pages, gtt->dma_addrs);
+ 	if (r) {
+ 		DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
+ 			  gtt->num_pages, gtt->offset);
+diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
+index d8ba676..6a312e6 100644
+--- a/drivers/gpu/drm/radeon/rv770.c
++++ b/drivers/gpu/drm/radeon/rv770.c
+@@ -1256,9 +1256,6 @@ int rv770_init(struct radeon_device *rdev)
  {
- 	if (data) {
--		netif_t *netif = netdev_priv(dev);
-+		struct xen_netif *netif = netdev_priv(dev);
- 
- 		if (!(netif->features & NETIF_F_TSO))
- 			return -ENOSYS;
-@@ -127,15 +130,15 @@ static struct ethtool_ops network_ethtool_ops =
- 	.get_link = ethtool_op_get_link,
- };
+ 	int r;
  
--netif_t *netif_alloc(domid_t domid, unsigned int handle)
-+struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
- {
- 	int err = 0;
- 	struct net_device *dev;
--	netif_t *netif;
-+	struct xen_netif *netif;
- 	char name[IFNAMSIZ] = {};
- 
- 	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
--	dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
-+	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
- 	if (dev == NULL) {
- 		DPRINTK("Could not create netif: out of memory\n");
- 		return ERR_PTR(-ENOMEM);
-@@ -194,7 +197,7 @@ netif_t *netif_alloc(domid_t domid, unsigned int handle)
+-	r = radeon_dummy_page_init(rdev);
+-	if (r)
+-		return r;
+ 	/* This don't do much */
+ 	r = radeon_gem_init(rdev);
+ 	if (r)
+@@ -1373,7 +1370,6 @@ void rv770_fini(struct radeon_device *rdev)
+ 	radeon_atombios_fini(rdev);
+ 	kfree(rdev->bios);
+ 	rdev->bios = NULL;
+-	radeon_dummy_page_fini(rdev);
  }
  
- static int map_frontend_pages(
--	netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
-+	struct xen_netif *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
- {
- 	struct gnttab_map_grant_ref op;
- 
-@@ -229,7 +232,7 @@ static int map_frontend_pages(
- 	return 0;
- }
+ static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
+diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
+index f999e36..1c4a72f 100644
+--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
++++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
+@@ -47,7 +47,8 @@ struct ttm_agp_backend {
  
--static void unmap_frontend_pages(netif_t *netif)
-+static void unmap_frontend_pages(struct xen_netif *netif)
+ static int ttm_agp_populate(struct ttm_backend *backend,
+ 			    unsigned long num_pages, struct page **pages,
+-			    struct page *dummy_read_page)
++			    struct page *dummy_read_page,
++			    dma_addr_t *dma_addrs)
  {
- 	struct gnttab_unmap_grant_ref op;
- 
-@@ -246,12 +249,12 @@ static void unmap_frontend_pages(netif_t *netif)
- 		BUG();
- }
- 
--int netif_map(netif_t *netif, unsigned long tx_ring_ref,
-+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
- 	      unsigned long rx_ring_ref, unsigned int evtchn)
+ 	struct ttm_agp_backend *agp_be =
+ 	    container_of(backend, struct ttm_agp_backend, backend);
+diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
+index af61fc2..278a2d3 100644
+--- a/drivers/gpu/drm/ttm/ttm_bo.c
++++ b/drivers/gpu/drm/ttm/ttm_bo.c
+@@ -1526,12 +1526,14 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
+ 		       struct ttm_bo_global *glob,
+ 		       struct ttm_bo_driver *driver,
+ 		       uint64_t file_page_offset,
+-		       bool need_dma32)
++		       bool need_dma32,
++		       struct device *dev)
  {
- 	int err = -ENOMEM;
--	netif_tx_sring_t *txs;
--	netif_rx_sring_t *rxs;
-+	struct xen_netif_tx_sring *txs;
-+	struct xen_netif_rx_sring *rxs;
+ 	int ret = -EINVAL;
  
- 	/* Already connected through? */
- 	if (netif->irq)
-@@ -276,10 +279,10 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref,
- 	netif->irq = err;
- 	disable_irq(netif->irq);
+ 	rwlock_init(&bdev->vm_lock);
+ 	bdev->driver = driver;
++	bdev->dev = dev;
  
--	txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
-+	txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
- 	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+ 	memset(bdev->man, 0, sizeof(bdev->man));
  
--	rxs = (netif_rx_sring_t *)
-+	rxs = (struct xen_netif_rx_sring *)
- 		((char *)netif->rx_comms_area->addr);
- 	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
+index b1e02ff..35849db 100644
+--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
++++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
+@@ -38,6 +38,7 @@
+ #include <linux/mm.h>
+ #include <linux/seq_file.h> /* for seq_printf */
+ #include <linux/slab.h>
++#include <linux/dma-mapping.h>
  
-@@ -303,7 +306,7 @@ err_rx:
- 	return err;
- }
+ #include <asm/atomic.h>
  
--void netif_disconnect(netif_t *netif)
-+void netif_disconnect(struct xen_netif *netif)
- {
- 	if (netback_carrier_ok(netif)) {
- 		rtnl_lock();
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index db629d4..c959075 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -35,9 +35,17 @@
+@@ -662,7 +663,8 @@ out:
+  * cached pages.
   */
- 
- #include "common.h"
-+
-+#include <linux/tcp.h>
-+#include <linux/udp.h>
-+
- #include <xen/balloon.h>
-+#include <xen/events.h>
- #include <xen/interface/memory.h>
- 
-+#include <asm/xen/hypercall.h>
-+#include <asm/xen/page.h>
-+
- /*define NETBE_DEBUG_INTERRUPT*/
- 
- struct netbk_rx_meta {
-@@ -51,11 +59,12 @@ struct netbk_tx_pending_inuse {
- 	unsigned long alloc_time;
- };
- 
-+
- static void netif_idx_release(u16 pending_idx);
--static void make_tx_response(netif_t *netif,
--			     netif_tx_request_t *txp,
-+static void make_tx_response(struct xen_netif *netif,
-+			     struct xen_netif_tx_request *txp,
- 			     s8       st);
--static netif_rx_response_t *make_rx_response(netif_t *netif,
-+static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
- 					     u16      id,
- 					     s8       st,
- 					     u16      offset,
-@@ -108,8 +117,8 @@ static inline int netif_page_index(struct page *pg)
- #define PKT_PROT_LEN 64
- 
- static struct pending_tx_info {
--	netif_tx_request_t req;
--	netif_t *netif;
-+	struct xen_netif_tx_request req;
-+	struct xen_netif *netif;
- } pending_tx_info[MAX_PENDING_REQS];
- static u16 pending_ring[MAX_PENDING_REQS];
- typedef unsigned int PEND_RING_IDX;
-@@ -128,8 +137,8 @@ static LIST_HEAD(pending_inuse_head);
- static struct sk_buff_head tx_queue;
- 
- static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
--static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
--static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
-+static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
-+static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
- 
- static struct list_head net_schedule_list;
- static spinlock_t net_schedule_list_lock;
-@@ -195,7 +204,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
- 		goto err;
- 
- 	skb_reserve(nskb, 16 + NET_IP_ALIGN);
--	headlen = nskb->end - nskb->data;
-+	headlen = skb_end_pointer(nskb) - nskb->data;
- 	if (headlen > skb_headlen(skb))
- 		headlen = skb_headlen(skb);
- 	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
-@@ -243,9 +252,9 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
- 
- 	offset = nskb->data - skb->data;
- 
--	nskb->h.raw = skb->h.raw + offset;
--	nskb->nh.raw = skb->nh.raw + offset;
--	nskb->mac.raw = skb->mac.raw + offset;
-+	nskb->transport_header = skb->transport_header + offset;
-+	nskb->network_header = skb->network_header + offset;
-+	nskb->mac_header = skb->mac_header + offset;
- 
- 	return nskb;
- 
-@@ -255,14 +264,14 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
- 	return NULL;
- }
- 
--static inline int netbk_max_required_rx_slots(netif_t *netif)
-+static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
- {
- 	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
- 		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
- 	return 1; /* all in one */
- }
- 
--static inline int netbk_queue_full(netif_t *netif)
-+static inline int netbk_queue_full(struct xen_netif *netif)
- {
- 	RING_IDX peek   = netif->rx_req_cons_peek;
- 	RING_IDX needed = netbk_max_required_rx_slots(netif);
-@@ -273,14 +282,14 @@ static inline int netbk_queue_full(netif_t *netif)
- 
- static void tx_queue_callback(unsigned long data)
+ int ttm_get_pages(struct list_head *pages, int flags,
+-		enum ttm_caching_state cstate, unsigned count)
++		  enum ttm_caching_state cstate, unsigned count,
++		  dma_addr_t *dma_address, struct device *dev)
  {
--	netif_t *netif = (netif_t *)data;
-+	struct xen_netif *netif = (struct xen_netif *)data;
- 	if (netif_schedulable(netif))
- 		netif_wake_queue(netif->dev);
- }
+ 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
+ 	struct page *p = NULL;
+@@ -681,14 +683,22 @@ int ttm_get_pages(struct list_head *pages, int flags,
+ 			gfp_flags |= GFP_HIGHUSER;
  
- int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- {
--	netif_t *netif = netdev_priv(dev);
-+	struct xen_netif *netif = netdev_priv(dev);
- 
- 	BUG_ON(skb->dev != dev);
- 
-@@ -302,7 +311,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 		/* Copy only the header fields we use in this driver. */
- 		nskb->dev = skb->dev;
- 		nskb->ip_summed = skb->ip_summed;
--		nskb->proto_data_valid = skb->proto_data_valid;
- 		dev_kfree_skb(skb);
- 		skb = nskb;
- 	}
-@@ -366,25 +374,25 @@ struct netrx_pending_operations {
- 	unsigned mcl_prod, mcl_cons;
- 	unsigned copy_prod, copy_cons;
- 	unsigned meta_prod, meta_cons;
--	mmu_update_t *mmu;
--	gnttab_transfer_t *trans;
--	gnttab_copy_t *copy;
--	multicall_entry_t *mcl;
-+	struct mmu_update *mmu;
-+	struct gnttab_transfer *trans;
-+	struct gnttab_copy *copy;
-+	struct multicall_entry *mcl;
- 	struct netbk_rx_meta *meta;
- };
+ 		for (r = 0; r < count; ++r) {
+-			p = alloc_page(gfp_flags);
++			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
++				void *addr;
++				addr = dma_alloc_coherent(dev, PAGE_SIZE,
++							  &dma_address[r],
++							  gfp_flags);
++				if (addr == NULL)
++					return -ENOMEM;
++				p = virt_to_page(addr);
++			} else
++				p = alloc_page(gfp_flags);
+ 			if (!p) {
  
- /* Set up the grant operations for this fragment.  If it's a flipping
-    interface, we also set up the unmap request from here. */
--static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
-+static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
- 			  int i, struct netrx_pending_operations *npo,
- 			  struct page *page, unsigned long size,
- 			  unsigned long offset)
- {
--	mmu_update_t *mmu;
--	gnttab_transfer_t *gop;
--	gnttab_copy_t *copy_gop;
--	multicall_entry_t *mcl;
--	netif_rx_request_t *req;
-+	struct mmu_update *mmu;
-+	struct gnttab_transfer *gop;
-+	struct gnttab_copy *copy_gop;
-+	struct multicall_entry *mcl;
-+	struct xen_netif_rx_request *req;
- 	unsigned long old_mfn, new_mfn;
- 	int idx = netif_page_index(page);
- 
-@@ -426,12 +434,12 @@ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
- 			mcl = npo->mcl + npo->mcl_prod++;
- 			MULTI_update_va_mapping(mcl,
- 					     (unsigned long)page_address(page),
--					     pfn_pte_ma(new_mfn, PAGE_KERNEL),
-+					     mfn_pte(new_mfn, PAGE_KERNEL),
- 					     0);
- 
- 			mmu = npo->mmu + npo->mmu_prod++;
--			mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
--				MMU_MACHPHYS_UPDATE;
-+			mmu->ptr = ((phys_addr_t)new_mfn << PAGE_SHIFT) |
-+				    MMU_MACHPHYS_UPDATE;
- 			mmu->val = page_to_pfn(page);
+ 				printk(KERN_ERR TTM_PFX
+ 				       "Unable to allocate page.");
+ 				return -ENOMEM;
+ 			}
+-
+ 			list_add(&p->lru, pages);
  		}
+ 		return 0;
+@@ -720,7 +730,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
+ 			printk(KERN_ERR TTM_PFX
+ 			       "Failed to allocate extra pages "
+ 			       "for large request.");
+-			ttm_put_pages(pages, 0, flags, cstate);
++			ttm_put_pages(pages, 0, flags, cstate, NULL, NULL);
+ 			return r;
+ 		}
+ 	}
+@@ -731,17 +741,30 @@ int ttm_get_pages(struct list_head *pages, int flags,
  
-@@ -446,7 +454,7 @@ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
- static void netbk_gop_skb(struct sk_buff *skb,
- 			  struct netrx_pending_operations *npo)
- {
--	netif_t *netif = netdev_priv(skb->dev);
-+	struct xen_netif *netif = netdev_priv(skb->dev);
- 	int nr_frags = skb_shinfo(skb)->nr_frags;
- 	int i;
- 	int extra;
-@@ -494,9 +502,9 @@ static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
- static int netbk_check_gop(int nr_frags, domid_t domid,
- 			   struct netrx_pending_operations *npo)
- {
--	multicall_entry_t *mcl;
--	gnttab_transfer_t *gop;
--	gnttab_copy_t     *copy_op;
-+	struct multicall_entry *mcl;
-+	struct gnttab_transfer *gop;
-+	struct gnttab_copy     *copy_op;
- 	int status = NETIF_RSP_OKAY;
- 	int i;
- 
-@@ -534,7 +542,7 @@ static int netbk_check_gop(int nr_frags, domid_t domid,
- 	return status;
- }
- 
--static void netbk_add_frag_responses(netif_t *netif, int status,
-+static void netbk_add_frag_responses(struct xen_netif *netif, int status,
- 				     struct netbk_rx_meta *meta, int nr_frags)
- {
- 	int i;
-@@ -555,11 +563,11 @@ static void netbk_add_frag_responses(netif_t *netif, int status,
- 
- static void net_rx_action(unsigned long unused)
+ /* Put all pages in pages list to correct pool to wait for reuse */
+ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
+-		enum ttm_caching_state cstate)
++		   enum ttm_caching_state cstate, dma_addr_t *dma_address,
++		   struct device *dev)
  {
--	netif_t *netif = NULL;
-+	struct xen_netif *netif = NULL;
- 	s8 status;
- 	u16 id, irq, flags;
--	netif_rx_response_t *resp;
--	multicall_entry_t *mcl;
-+	struct xen_netif_rx_response *resp;
-+	struct multicall_entry *mcl;
- 	struct sk_buff_head rxq;
- 	struct sk_buff *skb;
- 	int notify_nr = 0;
-@@ -572,10 +580,10 @@ static void net_rx_action(unsigned long unused)
- 	 * Putting hundreds of bytes on the stack is considered rude.
- 	 * Static works because a tasklet can only be on one CPU at any time.
- 	 */
--	static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
--	static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
--	static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
--	static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
-+	static struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
-+	static struct mmu_update rx_mmu[NET_RX_RING_SIZE];
-+	static struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
-+	static struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
- 	static unsigned char rx_notify[NR_IRQS];
- 	static u16 notify_list[NET_RX_RING_SIZE];
- 	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
-@@ -596,7 +604,7 @@ static void net_rx_action(unsigned long unused)
- 		*(int *)skb->cb = nr_frags;
- 
- 		if (!xen_feature(XENFEAT_auto_translated_physmap) &&
--		    !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
-+		    !((struct xen_netif *)netdev_priv(skb->dev))->copying_receiver &&
- 		    check_mfn(nr_frags + 1)) {
- 			/* Memory squeeze? Back off for an arbitrary while. */
- 			if ( net_ratelimit() )
-@@ -692,9 +700,10 @@ static void net_rx_action(unsigned long unused)
- 		id = meta[npo.meta_cons].id;
- 		flags = nr_frags ? NETRXF_more_data : 0;
- 
--		if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
-+		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
- 			flags |= NETRXF_csum_blank | NETRXF_data_validated;
--		else if (skb->proto_data_valid) /* remote but checksummed? */
-+		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-+			/* remote but checksummed. */
- 			flags |= NETRXF_data_validated;
- 
- 		if (meta[npo.meta_cons].copy)
-@@ -705,8 +714,8 @@ static void net_rx_action(unsigned long unused)
- 					skb_headlen(skb), flags);
- 
- 		if (meta[npo.meta_cons].frag.size) {
--			struct netif_extra_info *gso =
--				(struct netif_extra_info *)
-+			struct xen_netif_extra_info *gso =
-+				(struct xen_netif_extra_info *)
- 				RING_GET_RESPONSE(&netif->rx,
- 						  netif->rx.rsp_prod_pvt++);
+ 	unsigned long irq_flags;
+ 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
+ 	struct page *p, *tmp;
++	unsigned r;
  
-@@ -769,16 +778,16 @@ static void netbk_tx_pending_timeout(unsigned long unused)
+ 	if (pool == NULL) {
+ 		/* No pool for this memory type so free the pages */
  
- struct net_device_stats *netif_be_get_stats(struct net_device *dev)
++		r = page_count-1;
+ 		list_for_each_entry_safe(p, tmp, pages, lru) {
+-			__free_page(p);
++			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
++				void *addr = page_address(p);
++				WARN_ON(!addr || !dma_address[r]);
++				if (addr)
++					dma_free_coherent(dev, PAGE_SIZE,
++							  addr,
++							  dma_address[r]);
++				dma_address[r] = 0;
++			} else
++				__free_page(p);
++			r--;
+ 		}
+ 		/* Make the pages list empty */
+ 		INIT_LIST_HEAD(pages);
+diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
+index af789dc..354f9d9 100644
+--- a/drivers/gpu/drm/ttm/ttm_tt.c
++++ b/drivers/gpu/drm/ttm/ttm_tt.c
+@@ -49,12 +49,16 @@ static int ttm_tt_swapin(struct ttm_tt *ttm);
+ static void ttm_tt_alloc_page_directory(struct ttm_tt *ttm)
  {
--	netif_t *netif = netdev_priv(dev);
-+	struct xen_netif *netif = netdev_priv(dev);
- 	return &netif->stats;
+ 	ttm->pages = drm_calloc_large(ttm->num_pages, sizeof(*ttm->pages));
++	ttm->dma_address = drm_calloc_large(ttm->num_pages,
++					    sizeof(*ttm->dma_address));
  }
  
--static int __on_net_schedule_list(netif_t *netif)
-+static int __on_net_schedule_list(struct xen_netif *netif)
+ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
  {
- 	return netif->list.next != NULL;
+ 	drm_free_large(ttm->pages);
+ 	ttm->pages = NULL;
++	drm_free_large(ttm->dma_address);
++	ttm->dma_address = NULL;
  }
  
--static void remove_from_net_schedule_list(netif_t *netif)
-+static void remove_from_net_schedule_list(struct xen_netif *netif)
- {
- 	spin_lock_irq(&net_schedule_list_lock);
- 	if (likely(__on_net_schedule_list(netif))) {
-@@ -789,7 +798,7 @@ static void remove_from_net_schedule_list(netif_t *netif)
- 	spin_unlock_irq(&net_schedule_list_lock);
- }
+ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
+@@ -105,7 +109,8 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
  
--static void add_to_net_schedule_list_tail(netif_t *netif)
-+static void add_to_net_schedule_list_tail(struct xen_netif *netif)
- {
- 	if (__on_net_schedule_list(netif))
- 		return;
-@@ -811,7 +820,7 @@ static void add_to_net_schedule_list_tail(netif_t *netif)
-  * If we may be buffer transmit buffers for any reason then we must be rather
-  * more conservative and treat this as the final check for pending work.
-  */
--void netif_schedule_work(netif_t *netif)
-+void netif_schedule_work(struct xen_netif *netif)
- {
- 	int more_to_do;
+ 		INIT_LIST_HEAD(&h);
+ 
+-		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
++		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1,
++				    &ttm->dma_address[index], ttm->dev);
  
-@@ -827,13 +836,13 @@ void netif_schedule_work(netif_t *netif)
+ 		if (ret != 0)
+ 			return NULL;
+@@ -164,7 +169,7 @@ int ttm_tt_populate(struct ttm_tt *ttm)
  	}
- }
  
--void netif_deschedule_work(netif_t *netif)
-+void netif_deschedule_work(struct xen_netif *netif)
- {
- 	remove_from_net_schedule_list(netif);
+ 	be->func->populate(be, ttm->num_pages, ttm->pages,
+-			   ttm->dummy_read_page);
++			   ttm->dummy_read_page, ttm->dma_address);
+ 	ttm->state = tt_unbound;
+ 	return 0;
  }
+@@ -298,7 +303,8 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
+ 			count++;
+ 		}
+ 	}
+-	ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state);
++	ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
++		      ttm->dma_address, ttm->dev);
+ 	ttm->state = tt_unpopulated;
+ 	ttm->first_himem_page = ttm->num_pages;
+ 	ttm->last_lomem_page = -1;
+@@ -391,6 +397,7 @@ struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, unsigned long size,
+ 	ttm->last_lomem_page = -1;
+ 	ttm->caching_state = tt_cached;
+ 	ttm->page_flags = page_flags;
++	ttm->dev = bdev->dev;
  
+ 	ttm->dummy_read_page = dummy_read_page;
  
--static void tx_add_credit(netif_t *netif)
-+static void tx_add_credit(struct xen_netif *netif)
- {
- 	unsigned long max_burst, max_credit;
- 
-@@ -855,7 +864,7 @@ static void tx_add_credit(netif_t *netif)
- 
- static void tx_credit_callback(unsigned long data)
- {
--	netif_t *netif = (netif_t *)data;
-+	struct xen_netif *netif = (struct xen_netif *)data;
- 	tx_add_credit(netif);
- 	netif_schedule_work(netif);
- }
-@@ -869,10 +878,10 @@ static inline int copy_pending_req(PEND_RING_IDX pending_idx)
- inline static void net_tx_action_dealloc(void)
- {
- 	struct netbk_tx_pending_inuse *inuse, *n;
--	gnttab_unmap_grant_ref_t *gop;
-+	struct gnttab_unmap_grant_ref *gop;
- 	u16 pending_idx;
- 	PEND_RING_IDX dc, dp;
--	netif_t *netif;
-+	struct xen_netif *netif;
- 	int ret;
- 	LIST_HEAD(list);
- 
-@@ -954,7 +963,7 @@ inline static void net_tx_action_dealloc(void)
- 	}
- }
- 
--static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
-+static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *txp, RING_IDX end)
- {
- 	RING_IDX cons = netif->tx.req_cons;
- 
-@@ -969,8 +978,8 @@ static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
- 	netif_put(netif);
- }
- 
--static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
--				netif_tx_request_t *txp, int work_to_do)
-+static int netbk_count_requests(struct xen_netif *netif, struct xen_netif_tx_request *first,
-+				struct xen_netif_tx_request *txp, int work_to_do)
- {
- 	RING_IDX cons = netif->tx.req_cons;
- 	int frags = 0;
-@@ -1009,10 +1018,10 @@ static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
- 	return frags;
- }
- 
--static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
-+static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
- 						  struct sk_buff *skb,
--						  netif_tx_request_t *txp,
--						  gnttab_map_grant_ref_t *mop)
-+						  struct xen_netif_tx_request *txp,
-+						  struct gnttab_map_grant_ref *mop)
- {
- 	struct skb_shared_info *shinfo = skb_shinfo(skb);
- 	skb_frag_t *frags = shinfo->frags;
-@@ -1039,12 +1048,12 @@ static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
- }
- 
- static int netbk_tx_check_mop(struct sk_buff *skb,
--			       gnttab_map_grant_ref_t **mopp)
-+			       struct gnttab_map_grant_ref **mopp)
- {
--	gnttab_map_grant_ref_t *mop = *mopp;
-+	struct gnttab_map_grant_ref *mop = *mopp;
- 	int pending_idx = *((u16 *)skb->data);
--	netif_t *netif = pending_tx_info[pending_idx].netif;
--	netif_tx_request_t *txp;
-+	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
-+	struct xen_netif_tx_request *txp;
- 	struct skb_shared_info *shinfo = skb_shinfo(skb);
- 	int nr_frags = shinfo->nr_frags;
- 	int i, err, start;
-@@ -1118,7 +1127,7 @@ static void netbk_fill_frags(struct sk_buff *skb)
- 
- 	for (i = 0; i < nr_frags; i++) {
- 		skb_frag_t *frag = shinfo->frags + i;
--		netif_tx_request_t *txp;
-+		struct xen_netif_tx_request *txp;
- 		unsigned long pending_idx;
- 
- 		pending_idx = (unsigned long)frag->page;
-@@ -1138,10 +1147,10 @@ static void netbk_fill_frags(struct sk_buff *skb)
- 	}
- }
- 
--int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
-+int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extras,
- 		     int work_to_do)
- {
--	struct netif_extra_info extra;
-+	struct xen_netif_extra_info extra;
- 	RING_IDX cons = netif->tx.req_cons;
- 
- 	do {
-@@ -1166,7 +1175,7 @@ int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
- 	return work_to_do;
- }
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+index 80bc37b..87e43e0 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+@@ -102,7 +102,8 @@ struct vmw_ttm_backend {
  
--static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
-+static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso)
+ static int vmw_ttm_populate(struct ttm_backend *backend,
+ 			    unsigned long num_pages, struct page **pages,
+-			    struct page *dummy_read_page)
++			    struct page *dummy_read_page,
++			    dma_addr_t *dma_addrs)
  {
- 	if (!gso->u.gso.size) {
- 		DPRINTK("GSO size must not be zero.\n");
-@@ -1189,18 +1198,57 @@ static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
- 	return 0;
- }
+ 	struct vmw_ttm_backend *vmw_be =
+ 	    container_of(backend, struct vmw_ttm_backend, backend);
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+index 10ca97e..803d979 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+@@ -322,11 +322,11 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
+ 	ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
+ 	dev_priv->active_master = &dev_priv->fbdev_master;
  
-+static int skb_checksum_setup(struct sk_buff *skb)
-+{
-+	struct iphdr *iph;
-+	unsigned char *th;
-+	int err = -EPROTO;
-+
-+	if (skb->protocol != htons(ETH_P_IP))
-+		goto out;
+-
+ 	ret = ttm_bo_device_init(&dev_priv->bdev,
+ 				 dev_priv->bo_global_ref.ref.object,
+ 				 &vmw_bo_driver, VMWGFX_FILE_PAGE_OFFSET,
+-				 false);
++				 false,
++				 dev->dev);
+ 	if (unlikely(ret != 0)) {
+ 		DRM_ERROR("Failed initializing TTM buffer object driver.\n");
+ 		goto out_err1;
+diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
+index 0382332..1826d5d 100644
+--- a/drivers/net/Kconfig
++++ b/drivers/net/Kconfig
+@@ -2966,12 +2966,38 @@ config XEN_NETDEV_FRONTEND
+ 	select XEN_XENBUS_FRONTEND
+ 	default y
+ 	help
+-	  The network device frontend driver allows the kernel to
+-	  access network devices exported exported by a virtual
+-	  machine containing a physical network device driver. The
+-	  frontend driver is intended for unprivileged guest domains;
+-	  if you are compiling a kernel for a Xen guest, you almost
+-	  certainly want to enable this.
++	  This driver provides support for Xen paravirtual network
++	  devices exported by a Xen network driver domain (often
++	  domain 0).
 +
-+	iph = (void *)skb->data;
-+	th = skb->data + 4 * iph->ihl;
-+	if (th >= skb_tail_pointer(skb))
-+		goto out;
++	  The corresponding Linux backend driver is enabled by the
++	  CONFIG_XEN_NETDEV_BACKEND option.
 +
-+	skb->csum_start = th - skb->head;
-+	switch (iph->protocol) {
-+	case IPPROTO_TCP:
-+		skb->csum_offset = offsetof(struct tcphdr, check);
-+		break;
-+	case IPPROTO_UDP:
-+		skb->csum_offset = offsetof(struct udphdr, check);
-+		break;
-+	default:
-+		if (net_ratelimit())
-+			printk(KERN_ERR "Attempting to checksum a non-"
-+			       "TCP/UDP packet, dropping a protocol"
-+			       " %d packet", iph->protocol);
-+		goto out;
-+	}
++	  If you are compiling a kernel for use as Xen guest, you
++	  should say Y here. To compile this driver as a module, chose
++	  M here: the module will be called xen-netfront.
 +
-+	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
-+		goto out;
++config XEN_NETDEV_BACKEND
++	tristate "Xen backend network device"
++	depends on XEN_BACKEND
++	help
++	  This driver allows the kernel to act as a Xen network driver
++	  domain which exports paravirtual network devices to other
++	  Xen domains. These devices can be accessed by any operating
++	  system that implements a compatible front end.
 +
-+	err = 0;
++	  The corresponding Linux frontend driver is enabled by the
++	  CONFIG_XEN_NETDEV_FRONTEND configuration option.
 +
-+out:
-+	return err;
-+}
++	  The backend driver presents a standard network device
++	  endpoint for each paravirtual network device to the driver
++	  domain network stack. These can then be bridged or routed
++	  etc in order to provide full network connectivity.
 +
- /* Called after netfront has transmitted */
- static void net_tx_action(unsigned long unused)
- {
- 	struct list_head *ent;
- 	struct sk_buff *skb;
--	netif_t *netif;
--	netif_tx_request_t txreq;
--	netif_tx_request_t txfrags[MAX_SKB_FRAGS];
--	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
-+	struct xen_netif *netif;
-+	struct xen_netif_tx_request txreq;
-+	struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
-+	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
- 	u16 pending_idx;
- 	RING_IDX i;
--	gnttab_map_grant_ref_t *mop;
-+	struct gnttab_map_grant_ref *mop;
- 	unsigned int data_len;
- 	int ret, work_to_do;
- 
-@@ -1212,7 +1260,7 @@ static void net_tx_action(unsigned long unused)
- 		!list_empty(&net_schedule_list)) {
- 		/* Get a netif from the list with work to do. */
- 		ent = net_schedule_list.next;
--		netif = list_entry(ent, netif_t, list);
-+		netif = list_entry(ent, struct xen_netif, list);
- 		netif_get(netif);
- 		remove_from_net_schedule_list(netif);
- 
-@@ -1313,7 +1361,7 @@ static void net_tx_action(unsigned long unused)
- 		skb_reserve(skb, 16 + NET_IP_ALIGN);
- 
- 		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
--			struct netif_extra_info *gso;
-+			struct xen_netif_extra_info *gso;
- 			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
- 
- 			if (netbk_set_skb_gso(skb, gso)) {
-@@ -1372,7 +1420,7 @@ static void net_tx_action(unsigned long unused)
- 
- 	mop = tx_map_ops;
- 	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
--		netif_tx_request_t *txp;
-+		struct xen_netif_tx_request *txp;
- 
- 		pending_idx = *((u16 *)skb->data);
- 		netif       = pending_tx_info[pending_idx].netif;
-@@ -1403,14 +1451,10 @@ static void net_tx_action(unsigned long unused)
- 		 * Old frontends do not assert data_validated but we
- 		 * can infer it from csum_blank so test both flags.
- 		 */
--		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
--			skb->ip_summed = CHECKSUM_UNNECESSARY;
--			skb->proto_data_valid = 1;
--		} else {
-+		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank))
-+			skb->ip_summed = CHECKSUM_PARTIAL;
-+		else
- 			skb->ip_summed = CHECKSUM_NONE;
--			skb->proto_data_valid = 0;
--		}
--		skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
++	  If you are compiling a kernel to run in a Xen network driver
++	  domain (often this is domain 0) you should say Y here. To
++	  compile this driver as a module, chose M here: the module
++	  will be called xen-netback.
  
- 		netbk_fill_frags(skb);
+ config ISERIES_VETH
+ 	tristate "iSeries Virtual Ethernet driver support"
+diff --git a/drivers/net/Makefile b/drivers/net/Makefile
+index b90738d..145dfd7 100644
+--- a/drivers/net/Makefile
++++ b/drivers/net/Makefile
+@@ -171,6 +171,7 @@ obj-$(CONFIG_SLIP) += slip.o
+ obj-$(CONFIG_SLHC) += slhc.o
  
-@@ -1420,6 +1464,14 @@ static void net_tx_action(unsigned long unused)
- 		netif->stats.rx_bytes += skb->len;
- 		netif->stats.rx_packets++;
+ obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
++obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
  
-+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
-+			if (skb_checksum_setup(skb)) {
-+				DPRINTK("Can't setup checksum in net_tx_action\n");
-+				kfree_skb(skb);
-+				continue;
-+			}
-+		}
+ obj-$(CONFIG_DUMMY) += dummy.o
+ obj-$(CONFIG_IFB) += ifb.o
+diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile
+new file mode 100644
+index 0000000..e346e81
+--- /dev/null
++++ b/drivers/net/xen-netback/Makefile
+@@ -0,0 +1,3 @@
++obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
 +
- 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
- 		    unlikely(skb_linearize(skb))) {
- 			DPRINTK("Can't linearize skb in net_tx_action.\n");
-@@ -1464,9 +1516,9 @@ static void netif_page_release(struct page *page, unsigned int order)
- 	netif_idx_release(idx);
- }
- 
--irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
-+irqreturn_t netif_be_int(int irq, void *dev_id)
- {
--	netif_t *netif = dev_id;
-+	struct xen_netif *netif = dev_id;
- 
- 	add_to_net_schedule_list_tail(netif);
- 	maybe_schedule_tx_action();
-@@ -1477,12 +1529,12 @@ irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
- 	return IRQ_HANDLED;
- }
- 
--static void make_tx_response(netif_t *netif,
--			     netif_tx_request_t *txp,
-+static void make_tx_response(struct xen_netif *netif,
-+			     struct xen_netif_tx_request *txp,
- 			     s8       st)
- {
- 	RING_IDX i = netif->tx.rsp_prod_pvt;
--	netif_tx_response_t *resp;
-+	struct xen_netif_tx_response *resp;
- 	int notify;
- 
- 	resp = RING_GET_RESPONSE(&netif->tx, i);
-@@ -1507,7 +1559,7 @@ static void make_tx_response(netif_t *netif,
- #endif
- }
- 
--static netif_rx_response_t *make_rx_response(netif_t *netif,
-+static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
- 					     u16      id,
- 					     s8       st,
- 					     u16      offset,
-@@ -1515,7 +1567,7 @@ static netif_rx_response_t *make_rx_response(netif_t *netif,
- 					     u16      flags)
- {
- 	RING_IDX i = netif->rx.rsp_prod_pvt;
--	netif_rx_response_t *resp;
-+	struct xen_netif_rx_response *resp;
- 
- 	resp = RING_GET_RESPONSE(&netif->rx, i);
- 	resp->offset     = offset;
-@@ -1534,14 +1586,14 @@ static netif_rx_response_t *make_rx_response(netif_t *netif,
- static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
- {
- 	struct list_head *ent;
--	netif_t *netif;
-+	struct xen_netif *netif;
- 	int i = 0;
- 
- 	printk(KERN_ALERT "netif_schedule_list:\n");
- 	spin_lock_irq(&net_schedule_list_lock);
- 
- 	list_for_each (ent, &net_schedule_list) {
--		netif = list_entry(ent, netif_t, list);
-+		netif = list_entry(ent, struct xen_netif, list);
- 		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
- 		       "rx_resp_prod=%08x\n",
- 		       i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
-@@ -1569,11 +1621,13 @@ static int __init netback_init(void)
- 	int i;
- 	struct page *page;
- 
--	if (!is_running_on_xen())
-+	printk(KERN_CRIT "*** netif_init\n");
++xen-netback-y := netback.o xenbus.o interface.o
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+new file mode 100644
+index 0000000..21f4c0c
+--- /dev/null
++++ b/drivers/net/xen-netback/common.h
+@@ -0,0 +1,162 @@
++/*
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
 +
-+	if (!xen_domain())
- 		return -ENODEV;
- 
- 	/* We can increase reservation by this much in net_rx_action(). */
--	balloon_update_driver_allowance(NET_RX_RING_SIZE);
-+//	balloon_update_driver_allowance(NET_RX_RING_SIZE);
- 
- 	skb_queue_head_init(&rx_queue);
- 	skb_queue_head_init(&tx_queue);
-@@ -1616,7 +1670,7 @@ static int __init netback_init(void)
- 			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
- 	}
- 
--	netif_accel_init();
-+	//netif_accel_init();
- 
- 	netif_xenbus_init();
- 
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index d7faeb6..ed7c006 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -37,7 +37,7 @@ static int netback_remove(struct xenbus_device *dev)
- {
- 	struct backend_info *be = dev->dev.driver_data;
- 
--	netback_remove_accelerators(be, dev);
-+	//netback_remove_accelerators(be, dev);
- 
- 	if (be->netif) {
- 		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-@@ -123,7 +123,7 @@ static int netback_probe(struct xenbus_device *dev,
- 		goto fail;
- 	}
- 
--	netback_probe_accelerators(be, dev);
-+	//netback_probe_accelerators(be, dev);
- 
- 	err = xenbus_switch_state(dev, XenbusStateInitWait);
- 	if (err)
-@@ -149,12 +149,10 @@ fail:
-  * and vif variables to the environment, for the benefit of the vif-* hotplug
-  * scripts.
-  */
--static int netback_uevent(struct xenbus_device *xdev, char **envp,
--			  int num_envp, char *buffer, int buffer_size)
-+static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
- {
- 	struct backend_info *be = xdev->dev.driver_data;
--	netif_t *netif = be->netif;
--	int i = 0, length = 0;
-+	struct xen_netif *netif = be->netif;
- 	char *val;
- 
- 	DPRINTK("netback_uevent");
-@@ -166,15 +164,15 @@ static int netback_uevent(struct xenbus_device *xdev, char **envp,
- 		return err;
- 	}
- 	else {
--		add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
--			       &length, "script=%s", val);
-+		if (add_uevent_var(env, "script=%s", val)) {
-+			kfree(val);
-+			return -ENOMEM;
-+		}
- 		kfree(val);
- 	}
- 
--	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
--		       "vif=%s", netif->dev->name);
--
--	envp[i] = NULL;
-+	if (add_uevent_var(env, "vif=%s", netif->dev->name))
-+		return -ENOMEM;
- 
- 	return 0;
- }
-@@ -450,5 +448,6 @@ static struct xenbus_driver netback = {
- 
- void netif_xenbus_init(void)
- {
--	xenbus_register_backend(&netback);
-+	printk(KERN_CRIT "registering netback\n");
-+	(void)xenbus_register_backend(&netback);
- }
--- 
-1.7.4
-
-
-From a41a2ab9e1ac4ef8320f69f2719e973e25faff5c Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy at goop.org>
-Date: Mon, 9 Feb 2009 16:39:01 -0800
-Subject: [PATCH 003/203] xen: netback: don't include xen/evtchn.h
-
-Its a usermode header for users of /dev/evtchn
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy at goop.org>
----
- drivers/xen/netback/common.h |    1 -
- 1 files changed, 0 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 65b88f4..5665ed1 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -38,7 +38,6 @@
- #include <linux/netdevice.h>
- #include <linux/etherdevice.h>
- #include <linux/wait.h>
--#include <xen/evtchn.h>
- #include <xen/interface/io/netif.h>
- #include <asm/io.h>
- #include <asm/pgalloc.h>
--- 
-1.7.4
-
-
-From f28a7c6148bb979acf99c0cbe3b441d0fb0853d9 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Wed, 18 Feb 2009 15:55:18 -0800
-Subject: [PATCH 004/203] xen: netback: use mod_timer
-
-__mod_timer is no longer a public API.
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |    4 ++--
- 1 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index c959075..e920703 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -334,7 +334,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 			 */
- 			netif->tx_queue_timeout.data = (unsigned long)netif;
- 			netif->tx_queue_timeout.function = tx_queue_callback;
--			__mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
-+			mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
- 		}
- 	}
- 
-@@ -1299,7 +1299,7 @@ static void net_tx_action(unsigned long unused)
- 					(unsigned long)netif;
- 				netif->credit_timeout.function =
- 					tx_credit_callback;
--				__mod_timer(&netif->credit_timeout,
-+				mod_timer(&netif->credit_timeout,
- 					    next_credit);
- 				netif_put(netif);
- 				continue;
--- 
-1.7.4
-
-
-From 52f97ad360f28762c785343ba5c9f8abb83536f3 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich at novell.com>
-Date: Fri, 6 Mar 2009 08:29:31 +0000
-Subject: [PATCH 005/203] xen: netback: unmap tx ring gref when mapping of rx ring gref failed
-
-[ijc-ported from linux-2.6.18-xen.hg 782:51decc39e5e7]
-Signed-off-by: Jan Beulich <jbeulich at novell.com>
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/interface.c |    6 ++++++
- 1 files changed, 6 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index d184ad7..f3d9ea1 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -222,6 +222,12 @@ static int map_frontend_pages(
- 		BUG();
- 
- 	if (op.status) {
-+		struct gnttab_unmap_grant_ref unop;
++#ifndef __XEN_NETBACK__COMMON_H__
++#define __XEN_NETBACK__COMMON_H__
 +
-+		gnttab_set_unmap_op(&unop,
-+				    (unsigned long)netif->tx_comms_area->addr,
-+				    GNTMAP_host_map, netif->tx_shmem_handle);
-+		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
- 		DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
- 		return op.status;
- 	}
--- 
-1.7.4
-
-
-From f9b63790f1404eb03ac824147b2294a46e485643 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <Ian.Campbell at citrix.com>
-Date: Fri, 6 Mar 2009 08:29:32 +0000
-Subject: [PATCH 006/203] xen: netback: add ethtool stat to track copied skbs.
-
-Copied skbs should be rare but we have no way of verifying that.
-
-[ijc-ported from linux-2.6.18-xen.hg 792:db9857bb0320]
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/common.h    |    3 ++
- drivers/xen/netback/interface.c |   47 +++++++++++++++++++++++++++++++++++++++
- drivers/xen/netback/netback.c   |    6 ++++-
- 3 files changed, 55 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 5665ed1..6ba804d 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -92,6 +92,9 @@ struct xen_netif {
- 	/* Enforce draining of the transmit queue. */
- 	struct timer_list tx_queue_timeout;
- 
-+	/* Statistics */
-+	int nr_copied_skbs;
-+
- 	/* Miscellaneous private stuff. */
- 	struct list_head list;  /* scheduling list */
- 	atomic_t         refcnt;
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index f3d9ea1..1a99c87 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -119,8 +119,51 @@ static int netbk_set_tso(struct net_device *dev, u32 data)
- 	return ethtool_op_set_tso(dev, data);
- }
- 
-+static void netbk_get_drvinfo(struct net_device *dev,
-+			      struct ethtool_drvinfo *info)
-+{
-+	strcpy(info->driver, "netbk");
-+}
++#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
 +
-+static const struct netif_stat {
-+	char name[ETH_GSTRING_LEN];
-+	u16 offset;
-+} netbk_stats[] = {
-+	{ "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/slab.h>
++#include <linux/ip.h>
++#include <linux/in.h>
++#include <linux/io.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/wait.h>
++#include <linux/sched.h>
++
++#include <xen/interface/io/netif.h>
++#include <xen/interface/grant_table.h>
++#include <xen/grant_table.h>
++#include <xen/xenbus.h>
++
++struct xen_netbk;
++
++struct xenvif {
++	/* Unique identifier for this interface. */
++	domid_t          domid;
++	unsigned int     handle;
++
++	/* Reference to netback processing backend. */
++	struct xen_netbk *netbk;
++
++	u8               fe_dev_addr[6];
++
++	/* Physical parameters of the comms window. */
++	grant_handle_t   tx_shmem_handle;
++	grant_ref_t      tx_shmem_ref;
++	grant_handle_t   rx_shmem_handle;
++	grant_ref_t      rx_shmem_ref;
++	unsigned int     irq;
++
++	/* List of frontends to notify after a batch of frames sent. */
++	struct list_head notify_list;
++
++	/* The shared rings and indexes. */
++	struct xen_netif_tx_back_ring tx;
++	struct xen_netif_rx_back_ring rx;
++	struct vm_struct *tx_comms_area;
++	struct vm_struct *rx_comms_area;
++
++	/* Flags that must not be set in dev->features */
++	int features_disabled;
++
++	/* Frontend feature information. */
++	u8 can_sg:1;
++	u8 gso:1;
++	u8 gso_prefix:1;
++	u8 csum:1;
++
++	/* Internal feature information. */
++	u8 can_queue:1;	    /* can queue packets for receiver? */
++
++	/*
++	 * Allow xenvif_start_xmit() to peek ahead in the rx request
++	 * ring.  This is a prediction of what rx_req_cons will be
++	 * once all queued skbs are put on the ring.
++	 */
++	RING_IDX rx_req_cons_peek;
++
++	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
++	unsigned long   credit_bytes;
++	unsigned long   credit_usec;
++	unsigned long   remaining_credit;
++	struct timer_list credit_timeout;
++
++	/* Statistics */
++	int rx_gso_checksum_fixup;
++
++	/* Miscellaneous private stuff. */
++	struct list_head schedule_list;
++	atomic_t         refcnt;
++	struct net_device *dev;
++	struct net_device_stats stats;
++
++	wait_queue_head_t waiting_to_free;
 +};
 +
-+static int netbk_get_stats_count(struct net_device *dev)
++#define XEN_NETIF_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
++#define XEN_NETIF_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
++
++struct xenvif *xenvif_alloc(struct device *parent,
++			    domid_t domid,
++			    unsigned int handle);
++
++int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
++		   unsigned long rx_ring_ref, unsigned int evtchn);
++void xenvif_disconnect(struct xenvif *vif);
++
++void xenvif_get(struct xenvif *vif);
++void xenvif_put(struct xenvif *vif);
++
++int xenvif_xenbus_init(void);
++
++int xenvif_schedulable(struct xenvif *vif);
++
++int xen_netbk_rx_ring_full(struct xenvif *vif);
++
++int xen_netbk_must_stop_queue(struct xenvif *vif);
++
++/* (Un)Map communication rings. */
++void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
++int xen_netbk_map_frontend_rings(struct xenvif *vif,
++				 grant_ref_t tx_ring_ref,
++				 grant_ref_t rx_ring_ref);
++
++/* (De)Register a xenvif with the netback backend. */
++void xen_netbk_add_xenvif(struct xenvif *vif);
++void xen_netbk_remove_xenvif(struct xenvif *vif);
++
++/* (De)Schedule backend processing for a xenvif */
++void xen_netbk_schedule_xenvif(struct xenvif *vif);
++void xen_netbk_deschedule_xenvif(struct xenvif *vif);
++
++/* Check for SKBs from frontend and schedule backend processing */
++void xen_netbk_check_rx_xenvif(struct xenvif *vif);
++/* Receive an SKB from the frontend */
++void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb);
++
++/* Queue an SKB for transmission to the frontend */
++void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
++/* Notify xenvif that ring now has space to send an skb to the frontend */
++void xenvif_notify_tx_completion(struct xenvif *vif);
++
++/* Returns number of ring slots required to send an skb to the frontend */
++unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);
++
++#endif /* __XEN_NETBACK__COMMON_H__ */
+diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
+new file mode 100644
+index 0000000..1614ba5
+--- /dev/null
++++ b/drivers/net/xen-netback/interface.c
+@@ -0,0 +1,424 @@
++/*
++ * Network-device interface management.
++ *
++ * Copyright (c) 2004-2005, Keir Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include "common.h"
++
++#include <linux/ethtool.h>
++#include <linux/rtnetlink.h>
++
++#include <xen/events.h>
++#include <asm/xen/hypercall.h>
++
++#define XENVIF_QUEUE_LENGTH 32
++
++void xenvif_get(struct xenvif *vif)
 +{
-+	return ARRAY_SIZE(netbk_stats);
++	atomic_inc(&vif->refcnt);
 +}
 +
-+static void netbk_get_ethtool_stats(struct net_device *dev,
-+				   struct ethtool_stats *stats, u64 * data)
++void xenvif_put(struct xenvif *vif)
 +{
-+	void *netif = netdev_priv(dev);
-+	int i;
++	if (atomic_dec_and_test(&vif->refcnt))
++		wake_up(&vif->waiting_to_free);
++}
++
++int xenvif_schedulable(struct xenvif *vif)
++{
++	return netif_running(vif->dev) && netif_carrier_ok(vif->dev);
++}
 +
-+	for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
-+		data[i] = *(int *)(netif + netbk_stats[i].offset);
++static int xenvif_rx_schedulable(struct xenvif *vif)
++{
++	return xenvif_schedulable(vif) && !xen_netbk_rx_ring_full(vif);
 +}
 +
-+static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
++static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
 +{
-+	int i;
++	struct xenvif *vif = dev_id;
 +
-+	switch (stringset) {
-+	case ETH_SS_STATS:
-+		for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
-+			memcpy(data + i * ETH_GSTRING_LEN,
-+			       netbk_stats[i].name, ETH_GSTRING_LEN);
-+		break;
-+	}
++	if (vif->netbk == NULL)
++		return IRQ_NONE;
++
++	xen_netbk_schedule_xenvif(vif);
++
++	if (xenvif_rx_schedulable(vif))
++		netif_wake_queue(vif->dev);
++
++	return IRQ_HANDLED;
 +}
 +
- static struct ethtool_ops network_ethtool_ops =
- {
-+	.get_drvinfo = netbk_get_drvinfo,
-+
- 	.get_tx_csum = ethtool_op_get_tx_csum,
- 	.set_tx_csum = ethtool_op_set_tx_csum,
- 	.get_sg = ethtool_op_get_sg,
-@@ -128,6 +171,10 @@ static struct ethtool_ops network_ethtool_ops =
- 	.get_tso = ethtool_op_get_tso,
- 	.set_tso = netbk_set_tso,
- 	.get_link = ethtool_op_get_link,
-+
-+	.get_stats_count = netbk_get_stats_count,
-+	.get_ethtool_stats = netbk_get_ethtool_stats,
-+	.get_strings = netbk_get_strings,
- };
- 
- struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index e920703..f59fadb 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -923,7 +923,11 @@ inline static void net_tx_action_dealloc(void)
- 			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
- 				break;
- 
--			switch (copy_pending_req(inuse - pending_inuse)) {
-+			pending_idx = inuse - pending_inuse;
++static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct xenvif *vif = netdev_priv(dev);
 +
-+			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
++	BUG_ON(skb->dev != dev);
 +
-+			switch (copy_pending_req(pending_idx)) {
- 			case 0:
- 				list_move_tail(&inuse->list, &list);
- 				continue;
--- 
-1.7.4
-
-
-From c41d8da3d853d4e89ba38693b90c1fe512095704 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <Ian.Campbell at citrix.com>
-Date: Fri, 6 Mar 2009 08:29:33 +0000
-Subject: [PATCH 007/203] xen: netback: make queue length parameter writeable in sysfs
-
-Any changes will only take effect for newly created VIFs.
-
-Also hook up the vif devices to their parent and publish bus info via
-ethtool.
-
-[ijc-ported from linux-2.6.18-xen.hg 793:3aa9b8a7876b]
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/interface.c |    3 ++-
- drivers/xen/netback/xenbus.c    |    1 +
- 2 files changed, 3 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index 1a99c87..7706170 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -52,7 +52,7 @@
-  * blocked.
-  */
- static unsigned long netbk_queue_length = 32;
--module_param_named(queue_length, netbk_queue_length, ulong, 0);
-+module_param_named(queue_length, netbk_queue_length, ulong, 0644);
- 
- static void __netif_up(struct xen_netif *netif)
- {
-@@ -123,6 +123,7 @@ static void netbk_get_drvinfo(struct net_device *dev,
- 			      struct ethtool_drvinfo *info)
- {
- 	strcpy(info->driver, "netbk");
-+	strcpy(info->bus_info, dev->dev.parent->bus_id);
- }
- 
- static const struct netif_stat {
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index ed7c006..dc7b367 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -200,6 +200,7 @@ static void backend_create_netif(struct backend_info *be)
- 		xenbus_dev_fatal(dev, err, "creating interface");
- 		return;
- 	}
-+	SET_NETDEV_DEV(be->netif->dev, &dev->dev);
- 
- 	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
- }
--- 
-1.7.4
-
-
-From f204d7567ab11ddb1ff3208ab5ed8921b575af5d Mon Sep 17 00:00:00 2001
-From: Ian Campbell <Ian.Campbell at citrix.com>
-Date: Mon, 16 Mar 2009 22:05:16 +0000
-Subject: [PATCH 008/203] xen: netback: parent sysfs device should be set before registering.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/common.h    |    2 +-
- drivers/xen/netback/interface.c |    4 +++-
- drivers/xen/netback/xenbus.c    |    3 +--
- 3 files changed, 5 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 6ba804d..123a169 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -180,7 +180,7 @@ void netif_accel_init(void);
- 
- void netif_disconnect(struct xen_netif *netif);
- 
--struct xen_netif *netif_alloc(domid_t domid, unsigned int handle);
-+struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
- int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
- 	      unsigned long rx_ring_ref, unsigned int evtchn);
- 
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index 7706170..5e0d26d 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -178,7 +178,7 @@ static struct ethtool_ops network_ethtool_ops =
- 	.get_strings = netbk_get_strings,
- };
- 
--struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
-+struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
- {
- 	int err = 0;
- 	struct net_device *dev;
-@@ -192,6 +192,8 @@ struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
- 		return ERR_PTR(-ENOMEM);
- 	}
- 
-+	SET_NETDEV_DEV(dev, parent);
++	if (vif->netbk == NULL)
++		goto drop;
 +
- 	netif = netdev_priv(dev);
- 	memset(netif, 0, sizeof(*netif));
- 	netif->domid  = domid;
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index dc7b367..749931e 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -193,14 +193,13 @@ static void backend_create_netif(struct backend_info *be)
- 		return;
- 	}
- 
--	be->netif = netif_alloc(dev->otherend_id, handle);
-+	be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
- 	if (IS_ERR(be->netif)) {
- 		err = PTR_ERR(be->netif);
- 		be->netif = NULL;
- 		xenbus_dev_fatal(dev, err, "creating interface");
- 		return;
- 	}
--	SET_NETDEV_DEV(be->netif->dev, &dev->dev);
- 
- 	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
- }
--- 
-1.7.4
-
-
-From bb606178665ea78b505cb54864899478b6020584 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Thu, 19 Mar 2009 12:42:36 -0700
-Subject: [PATCH 009/203] xen: netback: use NET_SKB_PAD rather than "16"
-
-There's a constant for the default skb headroom.
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |    6 +++---
- 1 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index f59fadb..400f398 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -203,7 +203,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
- 	if (unlikely(!nskb))
- 		goto err;
- 
--	skb_reserve(nskb, 16 + NET_IP_ALIGN);
-+	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
- 	headlen = skb_end_pointer(nskb) - nskb->data;
- 	if (headlen > skb_headlen(skb))
- 		headlen = skb_headlen(skb);
-@@ -1353,7 +1353,7 @@ static void net_tx_action(unsigned long unused)
- 			    ret < MAX_SKB_FRAGS) ?
- 			PKT_PROT_LEN : txreq.size;
- 
--		skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
-+		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
- 				GFP_ATOMIC | __GFP_NOWARN);
- 		if (unlikely(skb == NULL)) {
- 			DPRINTK("Can't allocate a skb in start_xmit.\n");
-@@ -1362,7 +1362,7 @@ static void net_tx_action(unsigned long unused)
- 		}
- 
- 		/* Packets passed to netif_rx() must have some headroom. */
--		skb_reserve(skb, 16 + NET_IP_ALIGN);
-+		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
- 
- 		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
- 			struct xen_netif_extra_info *gso;
--- 
-1.7.4
-
-
-From fe41ab031dfa0c6f9821c2667ce821e7f4f635ed Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Thu, 19 Mar 2009 13:31:26 -0700
-Subject: [PATCH 010/203] xen: netback: completely drop flip support
-
-Nobody uses it?
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/common.h  |    1 -
- drivers/xen/netback/netback.c |  245 ++++-------------------------------------
- drivers/xen/netback/xenbus.c  |    3 +-
- 3 files changed, 22 insertions(+), 227 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 123a169..06f04c1 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -78,7 +78,6 @@ struct xen_netif {
- 
- 	/* Internal feature information. */
- 	u8 can_queue:1;	/* can queue packets for receiver? */
--	u8 copying_receiver:1;	/* copy packets to receiver?       */
- 
- 	/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
- 	RING_IDX rx_req_cons_peek;
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 400f398..36bea2b 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -51,7 +51,6 @@
- struct netbk_rx_meta {
- 	skb_frag_t frag;
- 	int id;
--	u8 copy:1;
- };
- 
- struct netbk_tx_pending_inuse {
-@@ -160,26 +159,6 @@ static inline unsigned long alloc_mfn(void)
- 	return mfn_list[--alloc_index];
- }
- 
--static int check_mfn(int nr)
--{
--	struct xen_memory_reservation reservation = {
--		.extent_order = 0,
--		.domid        = DOMID_SELF
--	};
--	int rc;
--
--	if (likely(alloc_index >= nr))
--		return 0;
--
--	set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
--	reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
--	rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
--	if (likely(rc > 0))
--		alloc_index += rc;
--
--	return alloc_index >= nr ? 0 : -ENOMEM;
--}
--
- static inline void maybe_schedule_tx_action(void)
- {
- 	smp_mb();
-@@ -188,82 +167,6 @@ static inline void maybe_schedule_tx_action(void)
- 		tasklet_schedule(&net_tx_tasklet);
- }
- 
--static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
--{
--	struct skb_shared_info *ninfo;
--	struct sk_buff *nskb;
--	unsigned long offset;
--	int ret;
--	int len;
--	int headlen;
--
--	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
--
--	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
--	if (unlikely(!nskb))
--		goto err;
--
--	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
--	headlen = skb_end_pointer(nskb) - nskb->data;
--	if (headlen > skb_headlen(skb))
--		headlen = skb_headlen(skb);
--	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
--	BUG_ON(ret);
--
--	ninfo = skb_shinfo(nskb);
--	ninfo->gso_size = skb_shinfo(skb)->gso_size;
--	ninfo->gso_type = skb_shinfo(skb)->gso_type;
--
--	offset = headlen;
--	len = skb->len - headlen;
--
--	nskb->len = skb->len;
--	nskb->data_len = len;
--	nskb->truesize += len;
--
--	while (len) {
--		struct page *page;
--		int copy;
--		int zero;
--
--		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
--			dump_stack();
--			goto err_free;
--		}
--
--		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
--		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
--
--		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
--		if (unlikely(!page))
--			goto err_free;
--
--		ret = skb_copy_bits(skb, offset, page_address(page), copy);
--		BUG_ON(ret);
--
--		ninfo->frags[ninfo->nr_frags].page = page;
--		ninfo->frags[ninfo->nr_frags].page_offset = 0;
--		ninfo->frags[ninfo->nr_frags].size = copy;
--		ninfo->nr_frags++;
--
--		offset += copy;
--		len -= copy;
--	}
--
--	offset = nskb->data - skb->data;
--
--	nskb->transport_header = skb->transport_header + offset;
--	nskb->network_header = skb->network_header + offset;
--	nskb->mac_header = skb->mac_header + offset;
--
--	return nskb;
--
-- err_free:
--	kfree_skb(nskb);
-- err:
--	return NULL;
--}
--
- static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
- {
- 	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
-@@ -297,24 +200,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
- 		goto drop;
- 
--	/*
--	 * Copy the packet here if it's destined for a flipping interface
--	 * but isn't flippable (e.g. extra references to data).
--	 * XXX For now we also copy skbuffs whose head crosses a page
--	 * boundary, because netbk_gop_skb can't handle them.
--	 */
--	if (!netif->copying_receiver ||
--	    ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
--		struct sk_buff *nskb = netbk_copy_skb(skb);
--		if ( unlikely(nskb == NULL) )
--			goto drop;
--		/* Copy only the header fields we use in this driver. */
--		nskb->dev = skb->dev;
--		nskb->ip_summed = skb->ip_summed;
--		dev_kfree_skb(skb);
--		skb = nskb;
--	}
--
- 	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
- 				   !!skb_shinfo(skb)->gso_size;
- 	netif_get(netif);
-@@ -388,66 +273,32 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
- 			  struct page *page, unsigned long size,
- 			  unsigned long offset)
- {
--	struct mmu_update *mmu;
--	struct gnttab_transfer *gop;
- 	struct gnttab_copy *copy_gop;
--	struct multicall_entry *mcl;
- 	struct xen_netif_rx_request *req;
--	unsigned long old_mfn, new_mfn;
-+	unsigned long old_mfn;
- 	int idx = netif_page_index(page);
- 
- 	old_mfn = virt_to_mfn(page_address(page));
- 
- 	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
--	if (netif->copying_receiver) {
--		/* The fragment needs to be copied rather than
--		   flipped. */
--		meta->copy = 1;
--		copy_gop = npo->copy + npo->copy_prod++;
--		copy_gop->flags = GNTCOPY_dest_gref;
--		if (idx > -1) {
--			struct pending_tx_info *src_pend = &pending_tx_info[idx];
--			copy_gop->source.domid = src_pend->netif->domid;
--			copy_gop->source.u.ref = src_pend->req.gref;
--			copy_gop->flags |= GNTCOPY_source_gref;
--		} else {
--			copy_gop->source.domid = DOMID_SELF;
--			copy_gop->source.u.gmfn = old_mfn;
--		}
--		copy_gop->source.offset = offset;
--		copy_gop->dest.domid = netif->domid;
--		copy_gop->dest.offset = 0;
--		copy_gop->dest.u.ref = req->gref;
--		copy_gop->len = size;
--	} else {
--		meta->copy = 0;
--		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
--			new_mfn = alloc_mfn();
--
--			/*
--			 * Set the new P2M table entry before
--			 * reassigning the old data page. Heed the
--			 * comment in pgtable-2level.h:pte_page(). :-)
--			 */
--			set_phys_to_machine(page_to_pfn(page), new_mfn);
--
--			mcl = npo->mcl + npo->mcl_prod++;
--			MULTI_update_va_mapping(mcl,
--					     (unsigned long)page_address(page),
--					     mfn_pte(new_mfn, PAGE_KERNEL),
--					     0);
--
--			mmu = npo->mmu + npo->mmu_prod++;
--			mmu->ptr = ((phys_addr_t)new_mfn << PAGE_SHIFT) |
--				    MMU_MACHPHYS_UPDATE;
--			mmu->val = page_to_pfn(page);
--		}
- 
--		gop = npo->trans + npo->trans_prod++;
--		gop->mfn = old_mfn;
--		gop->domid = netif->domid;
--		gop->ref = req->gref;
-+	copy_gop = npo->copy + npo->copy_prod++;
-+	copy_gop->flags = GNTCOPY_dest_gref;
-+	if (idx > -1) {
-+		struct pending_tx_info *src_pend = &pending_tx_info[idx];
-+		copy_gop->source.domid = src_pend->netif->domid;
-+		copy_gop->source.u.ref = src_pend->req.gref;
-+		copy_gop->flags |= GNTCOPY_source_gref;
-+	} else {
-+		copy_gop->source.domid = DOMID_SELF;
-+		copy_gop->source.u.gmfn = old_mfn;
- 	}
-+	copy_gop->source.offset = offset;
-+	copy_gop->dest.domid = netif->domid;
-+	copy_gop->dest.offset = 0;
-+	copy_gop->dest.u.ref = req->gref;
-+	copy_gop->len = size;
++	/* Drop the packet if the target domain has no receive buffers. */
++	if (!xenvif_rx_schedulable(vif))
++		goto drop;
 +
- 	return req->id;
- }
- 
-@@ -502,41 +353,17 @@ static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
- static int netbk_check_gop(int nr_frags, domid_t domid,
- 			   struct netrx_pending_operations *npo)
- {
--	struct multicall_entry *mcl;
--	struct gnttab_transfer *gop;
- 	struct gnttab_copy     *copy_op;
- 	int status = NETIF_RSP_OKAY;
- 	int i;
- 
- 	for (i = 0; i <= nr_frags; i++) {
--		if (npo->meta[npo->meta_cons + i].copy) {
- 			copy_op = npo->copy + npo->copy_cons++;
- 			if (copy_op->status != GNTST_okay) {
- 				DPRINTK("Bad status %d from copy to DOM%d.\n",
- 					copy_op->status, domid);
- 				status = NETIF_RSP_ERROR;
- 			}
--		} else {
--			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
--				mcl = npo->mcl + npo->mcl_cons++;
--				/* The update_va_mapping() must not fail. */
--				BUG_ON(mcl->result != 0);
--			}
--
--			gop = npo->trans + npo->trans_cons++;
--			/* Check the reassignment error code. */
--			if (gop->status != 0) {
--				DPRINTK("Bad status %d from grant transfer to DOM%u\n",
--					gop->status, domid);
--				/*
--				 * Page no longer belongs to us unless
--				 * GNTST_bad_page, but that should be
--				 * a fatal error anyway.
--				 */
--				BUG_ON(gop->status == GNTST_bad_page);
--				status = NETIF_RSP_ERROR;
--			}
--		}
- 	}
- 
- 	return status;
-@@ -551,11 +378,8 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
- 	for (i = 0; i < nr_frags; i++) {
- 		int id = meta[i].id;
- 		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
--
--		if (meta[i].copy)
--			offset = 0;
--		else
--			offset = meta[i].frag.page_offset;
-+		
-+		offset = 0;
- 		make_rx_response(netif, id, status, offset,
- 				 meta[i].frag.size, flags);
- 	}
-@@ -603,18 +427,6 @@ static void net_rx_action(unsigned long unused)
- 		nr_frags = skb_shinfo(skb)->nr_frags;
- 		*(int *)skb->cb = nr_frags;
- 
--		if (!xen_feature(XENFEAT_auto_translated_physmap) &&
--		    !((struct xen_netif *)netdev_priv(skb->dev))->copying_receiver &&
--		    check_mfn(nr_frags + 1)) {
--			/* Memory squeeze? Back off for an arbitrary while. */
--			if ( net_ratelimit() )
--				WPRINTK("Memory squeeze in netback "
--					"driver.\n");
--			mod_timer(&net_timer, jiffies + HZ);
--			skb_queue_head(&rx_queue, skb);
--			break;
--		}
--
- 		netbk_gop_skb(skb, &npo);
- 
- 		count += nr_frags + 1;
-@@ -677,20 +489,6 @@ static void net_rx_action(unsigned long unused)
- 		nr_frags = *(int *)skb->cb;
- 
- 		netif = netdev_priv(skb->dev);
--		/* We can't rely on skb_release_data to release the
--		   pages used by fragments for us, since it tries to
--		   touch the pages in the fraglist.  If we're in
--		   flipping mode, that doesn't work.  In copying mode,
--		   we still have access to all of the pages, and so
--		   it's safe to let release_data deal with it. */
--		/* (Freeing the fragments is safe since we copy
--		   non-linear skbs destined for flipping interfaces) */
--		if (!netif->copying_receiver) {
--			atomic_set(&(skb_shinfo(skb)->dataref), 1);
--			skb_shinfo(skb)->frag_list = NULL;
--			skb_shinfo(skb)->nr_frags = 0;
--			netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
--		}
- 
- 		netif->stats.tx_bytes += skb->len;
- 		netif->stats.tx_packets++;
-@@ -706,10 +504,7 @@ static void net_rx_action(unsigned long unused)
- 			/* remote but checksummed. */
- 			flags |= NETRXF_data_validated;
- 
--		if (meta[npo.meta_cons].copy)
--			offset = 0;
--		else
--			offset = offset_in_page(skb->data);
-+		offset = 0;
- 		resp = make_rx_response(netif, id, status, offset,
- 					skb_headlen(skb), flags);
- 
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index 749931e..a492288 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -378,7 +378,8 @@ static int connect_rings(struct backend_info *be)
- 				 dev->otherend);
- 		return err;
- 	}
--	be->netif->copying_receiver = !!rx_copy;
-+	if (!rx_copy)
-+		return -EOPNOTSUPP;
- 
- 	if (be->netif->dev->tx_queue_len != 0) {
- 		if (xenbus_scanf(XBT_NIL, dev->otherend,
--- 
-1.7.4
-
-
-From 17d465234118873ab4f5a7992feb4ce7b5537cf7 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Thu, 19 Mar 2009 15:19:39 -0700
-Subject: [PATCH 011/203] xen: netback: demacro MASK_PEND_IDX
-
-Replace it with a more meaningful inline: pending_index().
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |   21 +++++++++++++--------
- 1 files changed, 13 insertions(+), 8 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 36bea2b..4095622 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -121,7 +121,12 @@ static struct pending_tx_info {
- } pending_tx_info[MAX_PENDING_REQS];
- static u16 pending_ring[MAX_PENDING_REQS];
- typedef unsigned int PEND_RING_IDX;
--#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-+
-+static inline PEND_RING_IDX pending_index(unsigned i)
++	/* Reserve ring slots for the worst-case number of fragments. */
++	vif->rx_req_cons_peek += xen_netbk_count_skb_slots(vif, skb);
++	xenvif_get(vif);
++
++	if (vif->can_queue && xen_netbk_must_stop_queue(vif))
++		netif_stop_queue(dev);
++
++	xen_netbk_queue_tx_skb(vif, skb);
++
++	return NETDEV_TX_OK;
++
++ drop:
++	vif->stats.tx_dropped++;
++	dev_kfree_skb(skb);
++	return NETDEV_TX_OK;
++}
++
++void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb)
 +{
-+	return i & (MAX_PENDING_REQS-1);
++	netif_rx_ni(skb);
++	vif->dev->last_rx = jiffies;
 +}
 +
- static PEND_RING_IDX pending_prod, pending_cons;
- #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
- 
-@@ -695,7 +700,7 @@ inline static void net_tx_action_dealloc(void)
- 		while (dc != dp) {
- 			unsigned long pfn;
- 
--			pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
-+			pending_idx = dealloc_ring[pending_index(dc++)];
- 			list_move_tail(&pending_inuse[pending_idx].list, &list);
- 
- 			pfn = idx_to_pfn(pending_idx);
-@@ -754,7 +759,7 @@ inline static void net_tx_action_dealloc(void)
- 		/* Ready for next use. */
- 		gnttab_reset_grant_page(mmap_pages[pending_idx]);
- 
--		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-+		pending_ring[pending_index(pending_prod++)] = pending_idx;
- 
- 		netif_put(netif);
- 
-@@ -831,7 +836,7 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
- 	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
- 
- 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
--		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
-+		pending_idx = pending_ring[pending_index(pending_cons++)];
- 
- 		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
- 				  GNTMAP_host_map | GNTMAP_readonly,
-@@ -862,7 +867,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
- 	if (unlikely(err)) {
- 		txp = &pending_tx_info[pending_idx].req;
- 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
--		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-+		pending_ring[pending_index(pending_prod++)] = pending_idx;
- 		netif_put(netif);
- 	} else {
- 		set_phys_to_machine(
-@@ -895,7 +900,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
- 		/* Error on this fragment: respond to client with an error. */
- 		txp = &pending_tx_info[pending_idx].req;
- 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
--		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-+		pending_ring[pending_index(pending_prod++)] = pending_idx;
- 		netif_put(netif);
- 
- 		/* Not the first error? Preceding frags already invalidated. */
-@@ -1142,7 +1147,7 @@ static void net_tx_action(unsigned long unused)
- 			continue;
- 		}
- 
--		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-+		pending_idx = pending_ring[pending_index(pending_cons)];
- 
- 		data_len = (txreq.size > PKT_PROT_LEN &&
- 			    ret < MAX_SKB_FRAGS) ?
-@@ -1298,7 +1303,7 @@ static void netif_idx_release(u16 pending_idx)
- 	unsigned long flags;
- 
- 	spin_lock_irqsave(&_lock, flags);
--	dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
-+	dealloc_ring[pending_index(dealloc_prod)] = pending_idx;
- 	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
- 	smp_wmb();
- 	dealloc_prod++;
--- 
-1.7.4
-
-
-From d47af34f87b2d365c75aa3579ad512619ef3d579 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Thu, 19 Mar 2009 15:29:30 -0700
-Subject: [PATCH 012/203] xen: netback: convert PEND_RING_IDX into a proper typedef name
-
-Rename PEND_RING_IDX to pending_ring_idx_t.  Its not used that much,
-the extra typing won't kill anyone.
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |   12 ++++++------
- 1 files changed, 6 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 4095622..8292e96 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -120,19 +120,19 @@ static struct pending_tx_info {
- 	struct xen_netif *netif;
- } pending_tx_info[MAX_PENDING_REQS];
- static u16 pending_ring[MAX_PENDING_REQS];
--typedef unsigned int PEND_RING_IDX;
-+typedef unsigned int pending_ring_idx_t;
- 
--static inline PEND_RING_IDX pending_index(unsigned i)
-+static inline pending_ring_idx_t pending_index(unsigned i)
- {
- 	return i & (MAX_PENDING_REQS-1);
- }
- 
--static PEND_RING_IDX pending_prod, pending_cons;
-+static pending_ring_idx_t pending_prod, pending_cons;
- #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
- 
- /* Freed TX SKBs get batched on this ring before return to pending_ring. */
- static u16 dealloc_ring[MAX_PENDING_REQS];
--static PEND_RING_IDX dealloc_prod, dealloc_cons;
-+static pending_ring_idx_t dealloc_prod, dealloc_cons;
- 
- /* Doubly-linked list of in-use pending entries. */
- static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
-@@ -669,7 +669,7 @@ static void tx_credit_callback(unsigned long data)
- 	netif_schedule_work(netif);
- }
- 
--static inline int copy_pending_req(PEND_RING_IDX pending_idx)
-+static inline int copy_pending_req(pending_ring_idx_t pending_idx)
- {
- 	return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
- 				      &mmap_pages[pending_idx]);
-@@ -680,7 +680,7 @@ inline static void net_tx_action_dealloc(void)
- 	struct netbk_tx_pending_inuse *inuse, *n;
- 	struct gnttab_unmap_grant_ref *gop;
- 	u16 pending_idx;
--	PEND_RING_IDX dc, dp;
-+	pending_ring_idx_t dc, dp;
- 	struct xen_netif *netif;
- 	int ret;
- 	LIST_HEAD(list);
--- 
-1.7.4
-
-
-From 56727a43f329d50c2a00fed0316ffd87d6c23ebd Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Thu, 19 Mar 2009 15:31:32 -0700
-Subject: [PATCH 013/203] xen: netback: rename NR_PENDING_REQS to nr_pending_reqs()
-
-Use function syntax to show its actually computing a value, rather than
-a constant.
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |   10 +++++++---
- 1 files changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 8292e96..5410a68 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -128,7 +128,11 @@ static inline pending_ring_idx_t pending_index(unsigned i)
- }
- 
- static pending_ring_idx_t pending_prod, pending_cons;
--#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
++void xenvif_notify_tx_completion(struct xenvif *vif)
++{
++	if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif))
++		netif_wake_queue(vif->dev);
++}
 +
-+static inline pending_ring_idx_t nr_pending_reqs(void)
++static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
 +{
-+	return MAX_PENDING_REQS - pending_prod + pending_cons;
++	struct xenvif *vif = netdev_priv(dev);
++	return &vif->stats;
 +}
- 
- /* Freed TX SKBs get batched on this ring before return to pending_ring. */
- static u16 dealloc_ring[MAX_PENDING_REQS];
-@@ -167,7 +171,7 @@ static inline unsigned long alloc_mfn(void)
- static inline void maybe_schedule_tx_action(void)
- {
- 	smp_mb();
--	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
-+	if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
- 	    !list_empty(&net_schedule_list))
- 		tasklet_schedule(&net_tx_tasklet);
- }
-@@ -1060,7 +1064,7 @@ static void net_tx_action(unsigned long unused)
- 		net_tx_action_dealloc();
- 
- 	mop = tx_map_ops;
--	while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-+	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
- 		!list_empty(&net_schedule_list)) {
- 		/* Get a netif from the list with work to do. */
- 		ent = net_schedule_list.next;
--- 
-1.7.4
-
-
-From 55b360614f1bd44d0b1395b4aabf41d8f1f13f17 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Thu, 19 Mar 2009 15:45:45 -0700
-Subject: [PATCH 014/203] xen: netback: pre-initialize list and spinlocks; use empty list to indicate not on list
-
-Statically pre-initialize net_schedule_list head and lock.
-
-Use an empty list to mark when a xen_netif is not on the schedule list,
-rather than NULL (which may upset list debugging).
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/interface.c |    1 +
- drivers/xen/netback/netback.c   |   12 ++++--------
- 2 files changed, 5 insertions(+), 8 deletions(-)
-
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index 5e0d26d..dc4fb53 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -201,6 +201,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
- 	atomic_set(&netif->refcnt, 1);
- 	init_waitqueue_head(&netif->waiting_to_free);
- 	netif->dev = dev;
-+	INIT_LIST_HEAD(&netif->list);
- 
- 	netback_carrier_off(netif);
- 
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 5410a68..cbd4b03 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -148,8 +148,8 @@ static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
- static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
- static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
- 
--static struct list_head net_schedule_list;
--static spinlock_t net_schedule_list_lock;
-+static LIST_HEAD(net_schedule_list);
-+static DEFINE_SPINLOCK(net_schedule_list_lock);
- 
- #define MAX_MFN_ALLOC 64
- static unsigned long mfn_list[MAX_MFN_ALLOC];
-@@ -588,15 +588,14 @@ struct net_device_stats *netif_be_get_stats(struct net_device *dev)
- 
- static int __on_net_schedule_list(struct xen_netif *netif)
- {
--	return netif->list.next != NULL;
-+	return !list_empty(&netif->list);
- }
- 
- static void remove_from_net_schedule_list(struct xen_netif *netif)
- {
- 	spin_lock_irq(&net_schedule_list_lock);
- 	if (likely(__on_net_schedule_list(netif))) {
--		list_del(&netif->list);
--		netif->list.next = NULL;
-+		list_del_init(&netif->list);
- 		netif_put(netif);
- 	}
- 	spin_unlock_irq(&net_schedule_list_lock);
-@@ -1466,9 +1465,6 @@ static int __init netback_init(void)
- 	for (i = 0; i < MAX_PENDING_REQS; i++)
- 		pending_ring[i] = i;
- 
--	spin_lock_init(&net_schedule_list_lock);
--	INIT_LIST_HEAD(&net_schedule_list);
--
- 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
- 	if (MODPARM_copy_skb) {
- 		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
--- 
-1.7.4
-
-
-From e12cf57de7a6c20e4c8900ce7bf4e6924a12f49e Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Thu, 19 Mar 2009 15:48:10 -0700
-Subject: [PATCH 015/203] xen: netback: remove CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
-
-Keir says:
-> > Does CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER need to be a config
-> > option?  Could/should we always/never set it?
-> It doesn't work well with local delivery into dom0, nor even with IP
-> fragment reassembly. I don't think we would ever turn it on these days.
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |   21 ---------------------
- 1 files changed, 0 insertions(+), 21 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index cbd4b03..f00e405 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -615,23 +615,11 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
- 	spin_unlock_irq(&net_schedule_list_lock);
- }
- 
--/*
-- * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
-- * If this driver is pipelining transmit requests then we can be very
-- * aggressive in avoiding new-packet notifications -- frontend only needs to
-- * send a notification if there are no outstanding unreceived responses.
-- * If we may be buffer transmit buffers for any reason then we must be rather
-- * more conservative and treat this as the final check for pending work.
-- */
- void netif_schedule_work(struct xen_netif *netif)
- {
- 	int more_to_do;
- 
--#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
--	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
--#else
- 	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
--#endif
- 
- 	if (more_to_do) {
- 		add_to_net_schedule_list_tail(netif);
-@@ -1355,15 +1343,6 @@ static void make_tx_response(struct xen_netif *netif,
- 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
- 	if (notify)
- 		notify_remote_via_irq(netif->irq);
--
--#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
--	if (i == netif->tx.req_cons) {
--		int more_to_do;
--		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
--		if (more_to_do)
--			add_to_net_schedule_list_tail(netif);
--	}
--#endif
- }
- 
- static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
--- 
-1.7.4
-
-
-From adf542f9c714e3b7c76fcf9e44e0a89cae21a341 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Thu, 19 Mar 2009 22:28:52 -0700
-Subject: [PATCH 016/203] xen: netback: make netif_get/put inlines
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/common.h |   16 ++++++++++------
- 1 files changed, 10 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 06f04c1..9056be0 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -183,12 +183,16 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
- int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
- 	      unsigned long rx_ring_ref, unsigned int evtchn);
- 
--#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
--#define netif_put(_b)						\
--	do {							\
--		if ( atomic_dec_and_test(&(_b)->refcnt) )	\
--			wake_up(&(_b)->waiting_to_free);	\
--	} while (0)
-+static inline void netif_get(struct xen_netif *netif)
-+{
-+	atomic_inc(&netif->refcnt);
-+}
-+
-+static inline void  netif_put(struct xen_netif *netif)
-+{
-+	if (atomic_dec_and_test(&netif->refcnt))
-+		wake_up(&netif->waiting_to_free);
-+}
- 
- void netif_xenbus_init(void);
- 
--- 
-1.7.4
-
-
-From f06459a199f953a68f001f06e54dde54e1e16c87 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Thu, 19 Mar 2009 22:30:24 -0700
-Subject: [PATCH 017/203] xen: netback: move code around
-
-net_tx_action() into several functions; move variables into
-their innermost scopes; rename "i" to "idx".
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |  158 ++++++++++++++++++++++++-----------------
- 1 files changed, 94 insertions(+), 64 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index f00e405..4d63ff3 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -773,7 +773,8 @@ static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *t
- 	netif_put(netif);
- }
- 
--static int netbk_count_requests(struct xen_netif *netif, struct xen_netif_tx_request *first,
-+static int netbk_count_requests(struct xen_netif *netif,
-+				struct xen_netif_tx_request *first,
- 				struct xen_netif_tx_request *txp, int work_to_do)
- {
- 	RING_IDX cons = netif->tx.req_cons;
-@@ -1032,30 +1033,58 @@ out:
- 	return err;
- }
- 
--/* Called after netfront has transmitted */
--static void net_tx_action(unsigned long unused)
-+static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
- {
--	struct list_head *ent;
--	struct sk_buff *skb;
--	struct xen_netif *netif;
--	struct xen_netif_tx_request txreq;
--	struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
--	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
--	u16 pending_idx;
--	RING_IDX i;
--	struct gnttab_map_grant_ref *mop;
--	unsigned int data_len;
--	int ret, work_to_do;
-+	unsigned long now = jiffies;
-+	unsigned long next_credit =
-+		netif->credit_timeout.expires +
-+		msecs_to_jiffies(netif->credit_usec / 1000);
 +
-+	/* Timer could already be pending in rare cases. */
-+	if (timer_pending(&netif->credit_timeout))
-+		return true;
++static void xenvif_up(struct xenvif *vif)
++{
++	xen_netbk_add_xenvif(vif);
++	enable_irq(vif->irq);
++	xen_netbk_check_rx_xenvif(vif);
++}
 +
-+	/* Passed the point where we can replenish credit? */
-+	if (time_after_eq(now, next_credit)) {
-+		netif->credit_timeout.expires = now;
-+		tx_add_credit(netif);
-+	}
- 
--	if (dealloc_cons != dealloc_prod)
--		net_tx_action_dealloc();
-+	/* Still too big to send right now? Set a callback. */
-+	if (size > netif->remaining_credit) {
-+		netif->credit_timeout.data     =
-+			(unsigned long)netif;
-+		netif->credit_timeout.function =
-+			tx_credit_callback;
-+		mod_timer(&netif->credit_timeout,
-+			  next_credit);
++static void xenvif_down(struct xenvif *vif)
++{
++	disable_irq(vif->irq);
++	xen_netbk_deschedule_xenvif(vif);
++	xen_netbk_remove_xenvif(vif);
++}
 +
-+		return true;
-+	}
++static int xenvif_open(struct net_device *dev)
++{
++	struct xenvif *vif = netdev_priv(dev);
++	if (netif_carrier_ok(dev))
++		xenvif_up(vif);
++	netif_start_queue(dev);
++	return 0;
++}
 +
-+	return false;
++static int xenvif_close(struct net_device *dev)
++{
++	struct xenvif *vif = netdev_priv(dev);
++	if (netif_carrier_ok(dev))
++		xenvif_down(vif);
++	netif_stop_queue(dev);
++	return 0;
 +}
 +
-+static unsigned net_tx_build_mops(void)
++static int xenvif_change_mtu(struct net_device *dev, int mtu)
 +{
-+	struct gnttab_map_grant_ref *mop;
-+	struct sk_buff *skb;
-+	int ret;
- 
- 	mop = tx_map_ops;
- 	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
- 		!list_empty(&net_schedule_list)) {
-+		struct xen_netif *netif;
-+		struct xen_netif_tx_request txreq;
-+		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
-+		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
-+		u16 pending_idx;
-+		RING_IDX idx;
-+		int work_to_do;
-+		unsigned int data_len;
-+	
- 		/* Get a netif from the list with work to do. */
--		ent = net_schedule_list.next;
--		netif = list_entry(ent, struct xen_netif, list);
-+		netif = list_first_entry(&net_schedule_list, struct xen_netif, list);
- 		netif_get(netif);
- 		remove_from_net_schedule_list(netif);
- 
-@@ -1065,67 +1094,43 @@ static void net_tx_action(unsigned long unused)
- 			continue;
- 		}
- 
--		i = netif->tx.req_cons;
-+		idx = netif->tx.req_cons;
- 		rmb(); /* Ensure that we see the request before we copy it. */
--		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
-+		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
- 
- 		/* Credit-based scheduling. */
--		if (txreq.size > netif->remaining_credit) {
--			unsigned long now = jiffies;
--			unsigned long next_credit =
--				netif->credit_timeout.expires +
--				msecs_to_jiffies(netif->credit_usec / 1000);
--
--			/* Timer could already be pending in rare cases. */
--			if (timer_pending(&netif->credit_timeout)) {
--				netif_put(netif);
--				continue;
--			}
--
--			/* Passed the point where we can replenish credit? */
--			if (time_after_eq(now, next_credit)) {
--				netif->credit_timeout.expires = now;
--				tx_add_credit(netif);
--			}
--
--			/* Still too big to send right now? Set a callback. */
--			if (txreq.size > netif->remaining_credit) {
--				netif->credit_timeout.data     =
--					(unsigned long)netif;
--				netif->credit_timeout.function =
--					tx_credit_callback;
--				mod_timer(&netif->credit_timeout,
--					    next_credit);
--				netif_put(netif);
--				continue;
--			}
-+		if (txreq.size > netif->remaining_credit &&
-+		    tx_credit_exceeded(netif, txreq.size)) {
-+			netif_put(netif);
-+			continue;
- 		}
++	struct xenvif *vif = netdev_priv(dev);
++	int max = vif->can_sg ? 65535 - ETH_HLEN : ETH_DATA_LEN;
 +
- 		netif->remaining_credit -= txreq.size;
- 
- 		work_to_do--;
--		netif->tx.req_cons = ++i;
-+		netif->tx.req_cons = ++idx;
- 
- 		memset(extras, 0, sizeof(extras));
- 		if (txreq.flags & NETTXF_extra_info) {
- 			work_to_do = netbk_get_extras(netif, extras,
- 						      work_to_do);
--			i = netif->tx.req_cons;
-+			idx = netif->tx.req_cons;
- 			if (unlikely(work_to_do < 0)) {
--				netbk_tx_err(netif, &txreq, i);
-+				netbk_tx_err(netif, &txreq, idx);
- 				continue;
- 			}
- 		}
- 
- 		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
- 		if (unlikely(ret < 0)) {
--			netbk_tx_err(netif, &txreq, i - ret);
-+			netbk_tx_err(netif, &txreq, idx - ret);
- 			continue;
- 		}
--		i += ret;
-+		idx += ret;
- 
- 		if (unlikely(txreq.size < ETH_HLEN)) {
- 			DPRINTK("Bad packet size: %d\n", txreq.size);
--			netbk_tx_err(netif, &txreq, i);
-+			netbk_tx_err(netif, &txreq, idx);
- 			continue;
- 		}
- 
-@@ -1134,7 +1139,7 @@ static void net_tx_action(unsigned long unused)
- 			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
- 				txreq.offset, txreq.size,
- 				(txreq.offset &~PAGE_MASK) + txreq.size);
--			netbk_tx_err(netif, &txreq, i);
-+			netbk_tx_err(netif, &txreq, idx);
- 			continue;
- 		}
- 
-@@ -1148,7 +1153,7 @@ static void net_tx_action(unsigned long unused)
- 				GFP_ATOMIC | __GFP_NOWARN);
- 		if (unlikely(skb == NULL)) {
- 			DPRINTK("Can't allocate a skb in start_xmit.\n");
--			netbk_tx_err(netif, &txreq, i);
-+			netbk_tx_err(netif, &txreq, idx);
- 			break;
- 		}
- 
-@@ -1161,7 +1166,7 @@ static void net_tx_action(unsigned long unused)
- 
- 			if (netbk_set_skb_gso(skb, gso)) {
- 				kfree_skb(skb);
--				netbk_tx_err(netif, &txreq, i);
-+				netbk_tx_err(netif, &txreq, idx);
- 				continue;
- 			}
- 		}
-@@ -1199,23 +1204,27 @@ static void net_tx_action(unsigned long unused)
- 
- 		mop = netbk_get_requests(netif, skb, txfrags, mop);
- 
--		netif->tx.req_cons = i;
-+		netif->tx.req_cons = idx;
- 		netif_schedule_work(netif);
- 
- 		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
- 			break;
- 	}
- 
--	if (mop == tx_map_ops)
--		return;
-+	return mop - tx_map_ops;
++	if (mtu > max)
++		return -EINVAL;
++	dev->mtu = mtu;
++	return 0;
 +}
- 
--	ret = HYPERVISOR_grant_table_op(
--		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
--	BUG_ON(ret);
-+static void net_tx_submit(void)
-+{
-+	struct gnttab_map_grant_ref *mop;
-+	struct sk_buff *skb;
- 
- 	mop = tx_map_ops;
- 	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
- 		struct xen_netif_tx_request *txp;
-+		struct xen_netif *netif;
-+		u16 pending_idx;
-+		unsigned data_len;
- 
- 		pending_idx = *((u16 *)skb->data);
- 		netif       = pending_tx_info[pending_idx].netif;
-@@ -1288,6 +1297,27 @@ static void net_tx_action(unsigned long unused)
- 	}
- }
- 
-+/* Called after netfront has transmitted */
-+static void net_tx_action(unsigned long unused)
-+{
-+	unsigned nr_mops;
-+	int ret;
 +
-+	if (dealloc_cons != dealloc_prod)
-+		net_tx_action_dealloc();
++static void xenvif_set_features(struct xenvif *vif)
++{
++	struct net_device *dev = vif->dev;
++	int features = dev->features;
 +
-+	nr_mops = net_tx_build_mops();
++	if (vif->can_sg)
++		features |= NETIF_F_SG;
++	if (vif->gso || vif->gso_prefix)
++		features |= NETIF_F_TSO;
++	if (vif->csum)
++		features |= NETIF_F_IP_CSUM;
 +
-+	if (nr_mops == 0)
-+		return;
++	features &= ~(vif->features_disabled);
 +
-+	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-+					tx_map_ops, nr_mops);
-+	BUG_ON(ret);
++	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
++		dev->mtu = ETH_DATA_LEN;
 +
-+	net_tx_submit();
++	dev->features = features;
 +}
 +
- static void netif_idx_release(u16 pending_idx)
- {
- 	static DEFINE_SPINLOCK(_lock);
--- 
-1.7.4
-
-
-From cec84ff81d9f6ca882908572b984215529b4117b Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Fri, 20 Mar 2009 23:18:12 -0700
-Subject: [PATCH 018/203] xen: netback: document PKT_PROT_LEN
-
-Document the rationale for the existence and value of PKT_PROT_LEN.
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |    9 +++++++++
- 1 files changed, 9 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 4d63ff3..80b424f 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -113,6 +113,15 @@ static inline int netif_page_index(struct page *pg)
- 	return idx;
- }
- 
-+/*
-+ * This is the amount of packet we copy rather than map, so that the
-+ * guest can't fiddle with the contents of the headers while we do
-+ * packet processing on them (netfilter, routing, etc).  This could
-+ * probably do with being larger, since 1) 64-bytes isn't necessarily
-+ * long enough to cover a full christmas-tree ip+tcp header, let alone
-+ * packet contents, and 2) the data is probably in cache anyway
-+ * (though perhaps some other cpu's cache).
-+ */
- #define PKT_PROT_LEN 64
- 
- static struct pending_tx_info {
--- 
-1.7.4
-
-
-From a9402ee935757e8facebc6e886f9912c2c523da7 Mon Sep 17 00:00:00 2001
-From: Christophe Saout <chtephan at leto.intern.saout.de>
-Date: Sun, 12 Apr 2009 13:40:27 +0200
-Subject: [PATCH 019/203] xen: netback: use dev_name() instead of removed ->bus_id.
-
-Signed-off-by: Christophe Saout <chtephan at leto.intern.saout.de>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/interface.c |    2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index dc4fb53..3bb5c20 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -123,7 +123,7 @@ static void netbk_get_drvinfo(struct net_device *dev,
- 			      struct ethtool_drvinfo *info)
- {
- 	strcpy(info->driver, "netbk");
--	strcpy(info->bus_info, dev->dev.parent->bus_id);
-+	strcpy(info->bus_info, dev_name(dev->dev.parent));
- }
- 
- static const struct netif_stat {
--- 
-1.7.4
-
-
-From 35de1701fca19d693e9722bffbe7609caf1d5ac6 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Mon, 29 Jun 2009 14:04:23 -0700
-Subject: [PATCH 020/203] xen: netback: convert to net_device_ops
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/interface.c |   17 +++++++++++------
- 1 files changed, 11 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index 3bb5c20..21c1f95 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -178,6 +178,15 @@ static struct ethtool_ops network_ethtool_ops =
- 	.get_strings = netbk_get_strings,
- };
- 
-+static struct net_device_ops netback_ops =
++static int xenvif_set_tx_csum(struct net_device *dev, u32 data)
 +{
-+	.ndo_start_xmit	= netif_be_start_xmit,
-+	.ndo_get_stats	= netif_be_get_stats,
-+	.ndo_open	= net_open,
-+	.ndo_stop	= net_close,
-+	.ndo_change_mtu	= netbk_change_mtu,
-+};
-+
- struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
- {
- 	int err = 0;
-@@ -213,12 +222,8 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
- 
- 	init_timer(&netif->tx_queue_timeout);
- 
--	dev->hard_start_xmit = netif_be_start_xmit;
--	dev->get_stats       = netif_be_get_stats;
--	dev->open            = net_open;
--	dev->stop            = net_close;
--	dev->change_mtu	     = netbk_change_mtu;
--	dev->features        = NETIF_F_IP_CSUM;
-+	dev->netdev_ops	= &netback_ops;
-+	dev->features	= NETIF_F_IP_CSUM;
- 
- 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
- 
--- 
-1.7.4
-
-
-From c6f3885ef05e96489025e1c1c7299aac7cf43d87 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Fri, 4 Sep 2009 14:55:43 -0700
-Subject: [PATCH 021/203] xen: netback: reinstate missing code
-
-Change c3219dc868fe3e84070d6da2d0759a834b6f7251, "Completely drop flip
-support" was a bit too aggressive in removing code, and removed a chunk
-which was used for not only flip but if a buffer crossed a page boundary.
-Reinstate that code.
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |   91 +++++++++++++++++++++++++++++++++++++++++
- 1 files changed, 91 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 80b424f..7c0f05b 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -185,6 +185,82 @@ static inline void maybe_schedule_tx_action(void)
- 		tasklet_schedule(&net_tx_tasklet);
- }
- 
-+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
++	struct xenvif *vif = netdev_priv(dev);
++	if (data) {
++		if (!vif->csum)
++			return -EOPNOTSUPP;
++		vif->features_disabled &= ~NETIF_F_IP_CSUM;
++	} else {
++		vif->features_disabled |= NETIF_F_IP_CSUM;
++	}
++
++	xenvif_set_features(vif);
++	return 0;
++}
++
++static int xenvif_set_sg(struct net_device *dev, u32 data)
 +{
-+	struct skb_shared_info *ninfo;
-+	struct sk_buff *nskb;
-+	unsigned long offset;
-+	int ret;
-+	int len;
-+	int headlen;
++	struct xenvif *vif = netdev_priv(dev);
++	if (data) {
++		if (!vif->can_sg)
++			return -EOPNOTSUPP;
++		vif->features_disabled &= ~NETIF_F_SG;
++	} else {
++		vif->features_disabled |= NETIF_F_SG;
++	}
++
++	xenvif_set_features(vif);
++	return 0;
++}
 +
-+	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
++static int xenvif_set_tso(struct net_device *dev, u32 data)
++{
++	struct xenvif *vif = netdev_priv(dev);
++	if (data) {
++		if (!vif->gso && !vif->gso_prefix)
++			return -EOPNOTSUPP;
++		vif->features_disabled &= ~NETIF_F_TSO;
++	} else {
++		vif->features_disabled |= NETIF_F_TSO;
++	}
 +
-+	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
-+	if (unlikely(!nskb))
-+		goto err;
++	xenvif_set_features(vif);
++	return 0;
++}
 +
-+	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
-+	headlen = skb_end_pointer(nskb) - nskb->data;
-+	if (headlen > skb_headlen(skb))
-+		headlen = skb_headlen(skb);
-+	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
-+	BUG_ON(ret);
++static const struct xenvif_stat {
++	char name[ETH_GSTRING_LEN];
++	u16 offset;
++} xenvif_stats[] = {
++	{
++		"rx_gso_checksum_fixup",
++		offsetof(struct xenvif, rx_gso_checksum_fixup)
++	},
++};
 +
-+	ninfo = skb_shinfo(nskb);
-+	ninfo->gso_size = skb_shinfo(skb)->gso_size;
-+	ninfo->gso_type = skb_shinfo(skb)->gso_type;
++static int xenvif_get_sset_count(struct net_device *dev, int string_set)
++{
++	switch (string_set) {
++	case ETH_SS_STATS:
++		return ARRAY_SIZE(xenvif_stats);
++	default:
++		return -EINVAL;
++	}
++}
 +
-+	offset = headlen;
-+	len = skb->len - headlen;
++static void xenvif_get_ethtool_stats(struct net_device *dev,
++				     struct ethtool_stats *stats, u64 * data)
++{
++	void *vif = netdev_priv(dev);
++	int i;
 +
-+	nskb->len = skb->len;
-+	nskb->data_len = len;
-+	nskb->truesize += len;
++	for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++)
++		data[i] = *(int *)(vif + xenvif_stats[i].offset);
++}
 +
-+	while (len) {
-+		struct page *page;
-+		int copy;
-+		int zero;
++static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
++{
++	int i;
 +
-+		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
-+			dump_stack();
-+			goto err_free;
-+		}
++	switch (stringset) {
++	case ETH_SS_STATS:
++		for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++)
++			memcpy(data + i * ETH_GSTRING_LEN,
++			       xenvif_stats[i].name, ETH_GSTRING_LEN);
++		break;
++	}
++}
 +
-+		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
-+		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
++static struct ethtool_ops xenvif_ethtool_ops = {
++	.get_tx_csum	= ethtool_op_get_tx_csum,
++	.set_tx_csum	= xenvif_set_tx_csum,
++	.get_sg		= ethtool_op_get_sg,
++	.set_sg		= xenvif_set_sg,
++	.get_tso	= ethtool_op_get_tso,
++	.set_tso	= xenvif_set_tso,
++	.get_link	= ethtool_op_get_link,
 +
-+		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
-+		if (unlikely(!page))
-+			goto err_free;
++	.get_sset_count = xenvif_get_sset_count,
++	.get_ethtool_stats = xenvif_get_ethtool_stats,
++	.get_strings = xenvif_get_strings,
++};
 +
-+		ret = skb_copy_bits(skb, offset, page_address(page), copy);
-+		BUG_ON(ret);
++static struct net_device_ops xenvif_netdev_ops = {
++	.ndo_start_xmit	= xenvif_start_xmit,
++	.ndo_get_stats	= xenvif_get_stats,
++	.ndo_open	= xenvif_open,
++	.ndo_stop	= xenvif_close,
++	.ndo_change_mtu	= xenvif_change_mtu,
++};
 +
-+		ninfo->frags[ninfo->nr_frags].page = page;
-+		ninfo->frags[ninfo->nr_frags].page_offset = 0;
-+		ninfo->frags[ninfo->nr_frags].size = copy;
-+		ninfo->nr_frags++;
++struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
++			    unsigned int handle)
++{
++	int err;
++	struct net_device *dev;
++	struct xenvif *vif;
++	char name[IFNAMSIZ] = {};
 +
-+		offset += copy;
-+		len -= copy;
++	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
++	dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup);
++	if (dev == NULL) {
++		pr_warn("Could not allocate netdev\n");
++		return ERR_PTR(-ENOMEM);
 +	}
 +
-+	offset = nskb->data - skb->data;
++	SET_NETDEV_DEV(dev, parent);
++
++	vif = netdev_priv(dev);
++	vif->domid  = domid;
++	vif->handle = handle;
++	vif->netbk  = NULL;
++	vif->can_sg = 1;
++	vif->csum = 1;
++	atomic_set(&vif->refcnt, 1);
++	init_waitqueue_head(&vif->waiting_to_free);
++	vif->dev = dev;
++	INIT_LIST_HEAD(&vif->schedule_list);
++	INIT_LIST_HEAD(&vif->notify_list);
 +
-+	nskb->transport_header = skb->transport_header + offset;
-+	nskb->network_header = skb->network_header + offset;
-+	nskb->mac_header = skb->mac_header + offset;
++	vif->credit_bytes = vif->remaining_credit = ~0UL;
++	vif->credit_usec  = 0UL;
++	init_timer(&vif->credit_timeout);
++	/* Initialize 'expires' now: it's used to track the credit window. */
++	vif->credit_timeout.expires = jiffies;
 +
-+	return nskb;
++	dev->netdev_ops	= &xenvif_netdev_ops;
++	xenvif_set_features(vif);
++	SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);
 +
-+ err_free:
-+	kfree_skb(nskb);
-+ err:
-+	return NULL;
-+}
++	dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
 +
- static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
- {
- 	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
-@@ -218,6 +294,21 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
- 		goto drop;
- 
 +	/*
-+	 * XXX For now we also copy skbuffs whose head crosses a page
-+	 * boundary, because netbk_gop_skb can't handle them.
++	 * Initialise a dummy MAC address. We choose the numerically
++	 * largest non-broadcast address to prevent the address getting
++	 * stolen by an Ethernet bridge for STP purposes.
++	 * (FE:FF:FF:FF:FF:FF)
 +	 */
-+	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
-+		struct sk_buff *nskb = netbk_copy_skb(skb);
-+		if ( unlikely(nskb == NULL) )
-+			goto drop;
-+		/* Copy only the header fields we use in this driver. */
-+		nskb->dev = skb->dev;
-+		nskb->ip_summed = skb->ip_summed;
-+		dev_kfree_skb(skb);
-+		skb = nskb;
-+	}
-+
- 	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
- 				   !!skb_shinfo(skb)->gso_size;
- 	netif_get(netif);
--- 
-1.7.4
-
-
-From 2e290d790877df4368691180f76206ad27a42505 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Wed, 9 Sep 2009 15:19:15 -0700
-Subject: [PATCH 022/203] xen: netback: remove debug noise
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |    2 --
- 1 files changed, 0 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 7c0f05b..d7d738e 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1537,8 +1537,6 @@ static int __init netback_init(void)
- 	int i;
- 	struct page *page;
- 
--	printk(KERN_CRIT "*** netif_init\n");
--
- 	if (!xen_domain())
- 		return -ENODEV;
- 
--- 
-1.7.4
-
-
-From 3ba3bb7d563704c3050de6116aa0a761a5791428 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 8 Oct 2009 13:23:09 -0400
-Subject: [PATCH 023/203] Fix compile warnings: ignoring return value of 'xenbus_register_backend' ..
-
-We neglect to check the return value of xenbus_register_backend
-and take actions when that fails. This patch fixes that and adds
-code to deal with those type of failures.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/common.h  |    2 +-
- drivers/xen/netback/netback.c |   12 +++++++++++-
- drivers/xen/netback/xenbus.c  |    4 ++--
- 3 files changed, 14 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 9056be0..0675946 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -194,7 +194,7 @@ static inline void  netif_put(struct xen_netif *netif)
- 		wake_up(&netif->waiting_to_free);
- }
- 
--void netif_xenbus_init(void);
-+int netif_xenbus_init(void);
- 
- #define netif_schedulable(netif)				\
- 	(netif_running((netif)->dev) && netback_carrier_ok(netif))
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index d7d738e..860c61e 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1536,6 +1536,7 @@ static int __init netback_init(void)
- {
- 	int i;
- 	struct page *page;
-+	int rc = 0;
- 
- 	if (!xen_domain())
- 		return -ENODEV;
-@@ -1583,7 +1584,9 @@ static int __init netback_init(void)
- 
- 	//netif_accel_init();
- 
--	netif_xenbus_init();
-+	rc = netif_xenbus_init();
-+	if (rc)
-+		goto failed_init;
- 
- #ifdef NETBE_DEBUG_INTERRUPT
- 	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
-@@ -1595,6 +1598,13 @@ static int __init netback_init(void)
- #endif
- 
- 	return 0;
++	memset(dev->dev_addr, 0xFF, ETH_ALEN);
++	dev->dev_addr[0] &= ~0x01;
 +
-+failed_init:
-+	free_empty_pages_and_pagevec(mmap_pages, MAX_PENDING_REQS);
-+	del_timer(&netbk_tx_pending_timer);
-+	del_timer(&net_timer);
-+	return rc;
++	netif_carrier_off(dev);
 +
- }
- 
- module_init(netback_init);
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index a492288..c46b235 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -447,8 +447,8 @@ static struct xenbus_driver netback = {
- };
- 
- 
--void netif_xenbus_init(void)
-+int netif_xenbus_init(void)
- {
- 	printk(KERN_CRIT "registering netback\n");
--	(void)xenbus_register_backend(&netback);
-+	return xenbus_register_backend(&netback);
- }
--- 
-1.7.4
-
-
-From 4bc919e07d5dc48cb95b22cc6e90c6110c229343 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Tue, 27 Oct 2009 12:37:50 -0700
-Subject: [PATCH 024/203] xen: netback: don't screw around with packet gso state
-
-These lines were reverted from 2.6.18 netback as the network stack
-was modified to deal with packets shorter than the gso size, so there's
-no need to fiddle with the gso state in netback.
-
-Taken from linux-2.6.18-xen.hg change 8081d19dce89
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |    5 -----
- 1 files changed, 0 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 860c61e..9a14976 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1293,11 +1293,6 @@ static unsigned net_tx_build_mops(void)
- 			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
- 		}
- 
--		if (skb->data_len < skb_shinfo(skb)->gso_size) {
--			skb_shinfo(skb)->gso_size = 0;
--			skb_shinfo(skb)->gso_type = 0;
--		}
--
- 		__skb_queue_tail(&tx_queue, skb);
- 
- 		pending_cons++;
--- 
-1.7.4
-
-
-From f2b947783c47a721497e5d325c736234f71501e7 Mon Sep 17 00:00:00 2001
-From: Steven Smith <ssmith at weybridge.uk.xensource.com>
-Date: Fri, 30 Oct 2009 13:55:23 -0700
-Subject: [PATCH 025/203] xen: netback: make sure that pg->mapping is never NULL for a page mapped from a foreign domain.
-
-Otherwise, the foreign maps tracking infrastructure gets confused, and
-thinks that the foreign page is local.  This means that you can't
-forward that packet to another foreign domain.  This leads to very
-high packet drop, and hence very poor performance.
-
-Signed-off-by: Steven Smith <steven.smith at citrix.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |    4 ++--
- 1 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 9a14976..111fec7 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -97,12 +97,12 @@ static inline unsigned long idx_to_kaddr(unsigned int idx)
- /* extra field used in struct page */
- static inline void netif_set_page_index(struct page *pg, unsigned int index)
- {
--	*(unsigned long *)&pg->mapping = index;
-+	*(unsigned long *)&pg->mapping = index + 1;
- }
- 
- static inline int netif_page_index(struct page *pg)
- {
--	unsigned long idx = (unsigned long)pg->mapping;
-+	unsigned long idx = (unsigned long)pg->mapping - 1;
- 
- 	if (!PageForeign(pg))
- 		return -1;
--- 
-1.7.4
-
-
-From df8b27ea0fb2695842104e06caaecb55780577a7 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ijc at hellion.org.uk>
-Date: Thu, 3 Dec 2009 21:56:19 +0000
-Subject: [PATCH 026/203] xen: rename netbk module xen-netback.
-
-netbk is rather generic for a modular distro style kernel.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Cc: Jeremy Fitzhardinge <jeremy at goop.org>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/Makefile |    4 ++--
- 1 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
-index a01a1a3..e346e81 100644
---- a/drivers/xen/netback/Makefile
-+++ b/drivers/xen/netback/Makefile
-@@ -1,3 +1,3 @@
--obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
-+obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
- 
--netbk-y := netback.o xenbus.o interface.o
-+xen-netback-y := netback.o xenbus.o interface.o
--- 
-1.7.4
-
-
-From 279097395ad64ae4df15e206a487cd5fd3be39a8 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Tue, 16 Feb 2010 14:40:37 -0800
-Subject: [PATCH 027/203] xen: netback: use dev_get/set_drvdata() inteface
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/xenbus.c |   10 +++++-----
- 1 files changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index c46b235..79e6fb0 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -35,7 +35,7 @@ static void backend_create_netif(struct backend_info *be);
- 
- static int netback_remove(struct xenbus_device *dev)
- {
--	struct backend_info *be = dev->dev.driver_data;
-+  struct backend_info *be = dev_get_drvdata(&dev->dev);
- 
- 	//netback_remove_accelerators(be, dev);
- 
-@@ -45,7 +45,7 @@ static int netback_remove(struct xenbus_device *dev)
- 		be->netif = NULL;
- 	}
- 	kfree(be);
--	dev->dev.driver_data = NULL;
-+	dev_set_drvdata(&dev->dev, NULL);
- 	return 0;
- }
- 
-@@ -70,7 +70,7 @@ static int netback_probe(struct xenbus_device *dev,
- 	}
- 
- 	be->dev = dev;
--	dev->dev.driver_data = be;
-+	dev_set_drvdata(&dev->dev, be);
- 
- 	sg = 1;
- 	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
-@@ -151,7 +151,7 @@ fail:
-  */
- static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
- {
--	struct backend_info *be = xdev->dev.driver_data;
-+	struct backend_info *be = dev_get_drvdata(&xdev->dev);
- 	struct xen_netif *netif = be->netif;
- 	char *val;
- 
-@@ -211,7 +211,7 @@ static void backend_create_netif(struct backend_info *be)
- static void frontend_changed(struct xenbus_device *dev,
- 			     enum xenbus_state frontend_state)
- {
--	struct backend_info *be = dev->dev.driver_data;
-+	struct backend_info *be = dev_get_drvdata(&dev->dev);
- 
- 	DPRINTK("%s", xenbus_strstate(frontend_state));
- 
--- 
-1.7.4
-
-
-From 31d0b5f5763faf607e32f3b5a0f6b37a34bbbf09 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Tue, 16 Feb 2010 14:41:12 -0800
-Subject: [PATCH 028/203] xen: netback: include linux/sched.h for TASK_* definitions
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/common.h |    2 ++
- 1 files changed, 2 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 0675946..d8653d3 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -38,6 +38,8 @@
- #include <linux/netdevice.h>
- #include <linux/etherdevice.h>
- #include <linux/wait.h>
-+#include <linux/sched.h>
++	err = register_netdev(dev);
++	if (err) {
++		netdev_warn(dev, "Could not register device: err=%d\n", err);
++		free_netdev(dev);
++		return ERR_PTR(err);
++	}
 +
- #include <xen/interface/io/netif.h>
- #include <asm/io.h>
- #include <asm/pgalloc.h>
--- 
-1.7.4
-
-
-From cdefc88924b3cdfcac64be737a00a4ec5593cfd5 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 23 Feb 2010 11:52:27 +0000
-Subject: [PATCH 029/203] xen: netback: remove unused xen_network_done code
-
-It has been disabled effectively forever.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |   23 -----------------------
- 1 files changed, 0 insertions(+), 23 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 111fec7..4b24893 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -343,25 +343,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 	return 0;
- }
- 
--#if 0
--static void xen_network_done_notify(void)
--{
--	static struct net_device *eth0_dev = NULL;
--	if (unlikely(eth0_dev == NULL))
--		eth0_dev = __dev_get_by_name("eth0");
--	netif_rx_schedule(eth0_dev);
--}
--/*
-- * Add following to poll() function in NAPI driver (Tigon3 is example):
-- *  if ( xen_network_done() )
-- *      tg3_enable_ints(tp);
-- */
--int xen_network_done(void)
--{
--	return skb_queue_empty(&rx_queue);
--}
--#endif
--
- struct netrx_pending_operations {
- 	unsigned trans_prod, trans_cons;
- 	unsigned mmu_prod, mmu_mcl;
-@@ -664,10 +645,6 @@ static void net_rx_action(unsigned long unused)
- 	/* More work to do? */
- 	if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
- 		tasklet_schedule(&net_rx_tasklet);
--#if 0
--	else
--		xen_network_done_notify();
--#endif
- }
- 
- static void net_alarm(unsigned long unused)
--- 
-1.7.4
-
-
-From 994be068dd9947cedcee69a7185e54738cda33d4 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 23 Feb 2010 11:58:26 +0000
-Subject: [PATCH 030/203] xen: netback: factor disconnect from backend into new function.
-
-Makes subsequent patches cleaner.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/xenbus.c |   16 ++++++++++++----
- 1 files changed, 12 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index 79e6fb0..1f36b4d4 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -205,6 +205,16 @@ static void backend_create_netif(struct backend_info *be)
- }
- 
- 
-+static void disconnect_backend(struct xenbus_device *dev)
++	netdev_dbg(dev, "Successfully created xenvif\n");
++	return vif;
++}
++
++int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
++		   unsigned long rx_ring_ref, unsigned int evtchn)
 +{
-+	struct backend_info *be = dev_get_drvdata(&dev->dev);
++	int err = -ENOMEM;
 +
-+	if (be->netif) {
-+		netif_disconnect(be->netif);
-+		be->netif = NULL;
-+	}
-+}
++	/* Already connected through? */
++	if (vif->irq)
++		return 0;
 +
- /**
-  * Callback received when the frontend's state changes.
-  */
-@@ -238,11 +248,9 @@ static void frontend_changed(struct xenbus_device *dev,
- 		break;
- 
- 	case XenbusStateClosing:
--		if (be->netif) {
-+		if (be->netif)
- 			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
--			netif_disconnect(be->netif);
--			be->netif = NULL;
--		}
-+		disconnect_backend(dev);
- 		xenbus_switch_state(dev, XenbusStateClosing);
- 		break;
- 
--- 
-1.7.4
-
-
-From 9dcb4c18e5b29d8862cd7783d5b0040913010563 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 23 Feb 2010 12:10:24 +0000
-Subject: [PATCH 031/203] xen: netback: wait for hotplug scripts to complete before signalling connected to frontend
-
-Avoid the situation where the frontend is sending packets but the
-domain 0 bridging (or whatever) is not yet configured (because the
-hotplug scripts are too slow) and so packets get dropped.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Cc: Steven.Smith at citrix.com
----
- drivers/xen/netback/common.h |    2 +
- drivers/xen/netback/xenbus.c |   45 +++++++++++++++++++++++++++++++++++++++++-
- 2 files changed, 46 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index d8653d3..1983768 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -145,6 +145,8 @@ struct backend_info {
- 	struct xenbus_device *dev;
- 	struct xen_netif *netif;
- 	enum xenbus_state frontend_state;
-+	struct xenbus_watch hotplug_status_watch;
-+	int have_hotplug_status_watch:1;
- 
- 	/* State relating to the netback accelerator */
- 	void *netback_accel_priv;
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index 1f36b4d4..d2407cc 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -32,6 +32,7 @@
- static int connect_rings(struct backend_info *);
- static void connect(struct backend_info *);
- static void backend_create_netif(struct backend_info *be);
-+static void unregister_hotplug_status_watch(struct backend_info *be);
- 
- static int netback_remove(struct xenbus_device *dev)
- {
-@@ -39,8 +40,10 @@ static int netback_remove(struct xenbus_device *dev)
- 
- 	//netback_remove_accelerators(be, dev);
- 
-+	unregister_hotplug_status_watch(be);
- 	if (be->netif) {
- 		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-+		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
- 		netif_disconnect(be->netif);
- 		be->netif = NULL;
- 	}
-@@ -210,6 +213,7 @@ static void disconnect_backend(struct xenbus_device *dev)
- 	struct backend_info *be = dev_get_drvdata(&dev->dev);
- 
- 	if (be->netif) {
-+		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
- 		netif_disconnect(be->netif);
- 		be->netif = NULL;
- 	}
-@@ -329,6 +333,36 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
- 	return 0;
- }
- 
-+static void unregister_hotplug_status_watch(struct backend_info *be)
-+{
-+	if (be->have_hotplug_status_watch) {
-+		unregister_xenbus_watch(&be->hotplug_status_watch);
-+		kfree(be->hotplug_status_watch.node);
-+	}
-+	be->have_hotplug_status_watch = 0;
++	xenvif_set_features(vif);
++
++	err = xen_netbk_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
++	if (err < 0)
++		goto err;
++
++	err = bind_interdomain_evtchn_to_irqhandler(
++		vif->domid, evtchn, xenvif_interrupt, 0,
++		vif->dev->name, vif);
++	if (err < 0)
++		goto err_unmap;
++	vif->irq = err;
++	disable_irq(vif->irq);
++
++	xenvif_get(vif);
++
++	rtnl_lock();
++	netif_carrier_on(vif->dev);
++	if (netif_running(vif->dev))
++		xenvif_up(vif);
++	rtnl_unlock();
++
++	return 0;
++err_unmap:
++	xen_netbk_unmap_frontend_rings(vif);
++err:
++	return err;
 +}
 +
-+static void hotplug_status_changed(struct xenbus_watch *watch,
-+				   const char **vec,
-+				   unsigned int vec_size)
++void xenvif_disconnect(struct xenvif *vif)
 +{
-+	struct backend_info *be = container_of(watch,
-+					       struct backend_info,
-+					       hotplug_status_watch);
-+	char *str;
-+	unsigned int len;
-+
-+	str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
-+	if (IS_ERR(str))
-+		return;
-+	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
-+		xenbus_switch_state(be->dev, XenbusStateConnected);
-+		/* Not interested in this watch anymore. */
-+		unregister_hotplug_status_watch(be);
++	struct net_device *dev = vif->dev;
++	if (netif_carrier_ok(dev)) {
++		rtnl_lock();
++		netif_carrier_off(dev); /* discard queued packets */
++		if (netif_running(dev))
++			xenvif_down(vif);
++		rtnl_unlock();
++		xenvif_put(vif);
 +	}
-+	kfree(str);
++
++	atomic_dec(&vif->refcnt);
++	wait_event(vif->waiting_to_free, atomic_read(&vif->refcnt) == 0);
++
++	del_timer_sync(&vif->credit_timeout);
++
++	if (vif->irq)
++		unbind_from_irqhandler(vif->irq, vif);
++
++	unregister_netdev(vif->dev);
++
++	xen_netbk_unmap_frontend_rings(vif);
++
++	free_netdev(vif->dev);
 +}
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+new file mode 100644
+index 0000000..c2669b8
+--- /dev/null
++++ b/drivers/net/xen-netback/netback.c
+@@ -0,0 +1,1745 @@
++/*
++ * Back-end of the driver for virtual network devices. This portion of the
++ * driver exports a 'unified' network-device interface that can be accessed
++ * by any operating system that implements a compatible front end. A
++ * reference front-end implementation can be found in:
++ *  drivers/net/xen-netfront.c
++ *
++ * Copyright (c) 2002-2005, K A Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
 +
- static void connect(struct backend_info *be)
- {
- 	int err;
-@@ -348,7 +382,16 @@ static void connect(struct backend_info *be)
- 			  &be->netif->credit_usec);
- 	be->netif->remaining_credit = be->netif->credit_bytes;
- 
--	xenbus_switch_state(dev, XenbusStateConnected);
-+	unregister_hotplug_status_watch(be);
-+	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
-+				   hotplug_status_changed,
-+				   "%s/%s", dev->nodename, "hotplug-status");
-+	if (err) {
-+		/* Switch now, since we can't do a watch. */
-+		xenbus_switch_state(dev, XenbusStateConnected);
-+	} else {
-+		be->have_hotplug_status_watch = 1;
-+	}
- 
- 	netif_wake_queue(be->netif->dev);
- }
--- 
-1.7.4
-
-
-From 509cc7f20f866277a8f5d5895bb266b5b68aac6d Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 23 Feb 2010 12:11:51 +0000
-Subject: [PATCH 032/203] xen: netback: Always pull through PKT_PROT_LEN bytes into the linear part of an skb.
-
-Previously PKT_PROT_LEN would only have an effect on the first fragment.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |   10 ++++++++++
- 1 files changed, 10 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 4b24893..d4a7a56 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1334,6 +1334,16 @@ static void net_tx_submit(void)
- 
- 		netbk_fill_frags(skb);
- 
-+		/*
-+		 * If the initial fragment was < PKT_PROT_LEN then
-+		 * pull through some bytes from the other fragments to
-+		 * increase the linear region to PKT_PROT_LEN bytes.
-+		 */
-+		if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
-+			int target = min_t(int, skb->len, PKT_PROT_LEN);
-+			__pskb_pull_tail(skb, target - skb_headlen(skb));
-+		}
++#include "common.h"
 +
- 		skb->dev      = netif->dev;
- 		skb->protocol = eth_type_trans(skb, skb->dev);
- 
--- 
-1.7.4
-
-
-From 673c82b5110cfffafe1e1978bc07d6d10d111d50 Mon Sep 17 00:00:00 2001
-From: Steven Smith <ssmith at xensource.com>
-Date: Tue, 23 Feb 2010 11:49:26 +0000
-Subject: [PATCH 033/203] xen: netback: try to pull a minimum of 72 bytes into the skb data area
- when receiving a packet into netback.
-
-The previous number, 64, tended to place a fragment boundary in the middle of
-the TCP header options and led to unnecessary fragmentation in Windows <->
-Windows networking.
-
-Signed-off-by: Steven Smith <ssmith at xensource.com>
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |    9 +++------
- 1 files changed, 3 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index d4a7a56..44357d7 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -116,13 +116,10 @@ static inline int netif_page_index(struct page *pg)
- /*
-  * This is the amount of packet we copy rather than map, so that the
-  * guest can't fiddle with the contents of the headers while we do
-- * packet processing on them (netfilter, routing, etc).  This could
-- * probably do with being larger, since 1) 64-bytes isn't necessarily
-- * long enough to cover a full christmas-tree ip+tcp header, let alone
-- * packet contents, and 2) the data is probably in cache anyway
-- * (though perhaps some other cpu's cache).
-+ * packet processing on them (netfilter, routing, etc). 72 is enough
-+ * to cover TCP+IP headers including options.
-  */
--#define PKT_PROT_LEN 64
-+#define PKT_PROT_LEN 72
- 
- static struct pending_tx_info {
- 	struct xen_netif_tx_request req;
--- 
-1.7.4
-
-
-From c83bd213efd3ebf700189249c30d987b1cb14d7e Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 23 Feb 2010 11:54:30 +0000
-Subject: [PATCH 034/203] xen: netback: Allow setting of large MTU before rings have connected.
-
-This allows large MTU to be configured by the VIF hotplug
-script. Previously this would fail because at the point the hotplug
-script runs the VIF features have most likely not been negotiated with
-the frontend and so SG has not yet been enabled. Invert this behaviour
-so that SG is assumed present until negotiations prove otherwise and
-reduce MTU at that point.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/interface.c |    6 +++++-
- drivers/xen/netback/xenbus.c    |    8 +++++---
- 2 files changed, 10 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index 21c1f95..b23b14d 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -104,6 +104,9 @@ static int netbk_set_sg(struct net_device *dev, u32 data)
- 			return -ENOSYS;
- 	}
- 
-+	if (dev->mtu > ETH_DATA_LEN)
-+		dev->mtu = ETH_DATA_LEN;
++#include <linux/kthread.h>
++#include <linux/if_vlan.h>
++#include <linux/udp.h>
++
++#include <net/tcp.h>
++
++#include <xen/events.h>
++#include <xen/interface/memory.h>
++
++#include <asm/xen/hypercall.h>
++#include <asm/xen/page.h>
 +
- 	return ethtool_op_set_sg(dev, data);
- }
- 
-@@ -207,6 +210,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
- 	memset(netif, 0, sizeof(*netif));
- 	netif->domid  = domid;
- 	netif->handle = handle;
-+	netif->features = NETIF_F_SG;
- 	atomic_set(&netif->refcnt, 1);
- 	init_waitqueue_head(&netif->waiting_to_free);
- 	netif->dev = dev;
-@@ -223,7 +227,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
- 	init_timer(&netif->tx_queue_timeout);
- 
- 	dev->netdev_ops	= &netback_ops;
--	dev->features	= NETIF_F_IP_CSUM;
-+	dev->features   = NETIF_F_IP_CSUM|NETIF_F_SG;
- 
- 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
- 
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index d2407cc..fcd3c34 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -445,9 +445,11 @@ static int connect_rings(struct backend_info *be)
- 
- 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
- 		val = 0;
--	if (val) {
--		be->netif->features |= NETIF_F_SG;
--		be->netif->dev->features |= NETIF_F_SG;
-+	if (!val) {
-+		be->netif->features &= ~NETIF_F_SG;
-+		be->netif->dev->features &= ~NETIF_F_SG;
-+		if (be->netif->dev->mtu > ETH_DATA_LEN)
-+			be->netif->dev->mtu = ETH_DATA_LEN;
- 	}
- 
- 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
--- 
-1.7.4
-
-
-From e5cd35b00cb63f3a3fa1651260a58d59bbc134b7 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Fri, 19 Mar 2010 13:09:16 -0700
-Subject: [PATCH 035/203] xen: netback: use get_sset_count rather than obsolete get_stats_count
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/interface.c |   11 ++++++++---
- 1 files changed, 8 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index b23b14d..086d939 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -136,9 +136,14 @@ static const struct netif_stat {
- 	{ "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
- };
- 
--static int netbk_get_stats_count(struct net_device *dev)
-+static int netbk_get_sset_count(struct net_device *dev, int string_set)
- {
--	return ARRAY_SIZE(netbk_stats);
-+	switch (string_set) {
-+	case ETH_SS_STATS:
-+		return ARRAY_SIZE(netbk_stats);
-+	default:
-+		return -EINVAL;
-+	}
- }
- 
- static void netbk_get_ethtool_stats(struct net_device *dev,
-@@ -176,7 +181,7 @@ static struct ethtool_ops network_ethtool_ops =
- 	.set_tso = netbk_set_tso,
- 	.get_link = ethtool_op_get_link,
- 
--	.get_stats_count = netbk_get_stats_count,
-+	.get_sset_count = netbk_get_sset_count,
- 	.get_ethtool_stats = netbk_get_ethtool_stats,
- 	.get_strings = netbk_get_strings,
- };
--- 
-1.7.4
-
-
-From 0c34835ee66ad641f01a8077a973b7ec1bfdcd86 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 11 May 2010 09:33:42 +0100
-Subject: [PATCH 036/203] xen: netback: correctly setup skb->ip_summed on receive
-
-In 2.6.18 CHECKSUM_PARTIAL and CHECKSUM_UNNECESSARY were both synonyms for
-CHECKSUM_HW. This is no longer the case and we need to select the correct one.
-
-  data_validated csum_blank -> ip_summed
-  0              0             CHECKSUM_NONE
-  0              1             CHECKSUM_PARTIAL
-  1              0             CHECKSUM_UNNECESSARY
-  1              1             CHECKSUM_PARTIAL
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Tested-by: Matej Zary <zary at cvtisr.sk>
-Tested-by: Michael D Labriola <mlabriol at gdeb.com>
----
- drivers/xen/netback/netback.c |   10 +++-------
- 1 files changed, 3 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 44357d7..725da0f 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1320,14 +1320,10 @@ static void net_tx_submit(void)
- 			netif_idx_release(pending_idx);
- 		}
- 
--		/*
--		 * Old frontends do not assert data_validated but we
--		 * can infer it from csum_blank so test both flags.
--		 */
--		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank))
-+		if (txp->flags & NETTXF_csum_blank)
- 			skb->ip_summed = CHECKSUM_PARTIAL;
--		else
--			skb->ip_summed = CHECKSUM_NONE;
-+		else if (txp->flags & NETTXF_data_validated)
-+			skb->ip_summed = CHECKSUM_UNNECESSARY;
- 
- 		netbk_fill_frags(skb);
- 
--- 
-1.7.4
-
-
-From 094944631cc5a9d6e623302c987f78117c0bf7ac Mon Sep 17 00:00:00 2001
-From: Dongxiao Xu <dongxiao.xu at intel.com>
-Date: Wed, 19 May 2010 16:58:56 -0700
-Subject: [PATCH 037/203] xen: netback: Move global/static variables into struct xen_netbk.
-
-Bundle a lot of discrete variables into a single structure.
-
-Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/common.h  |   59 +++++++
- drivers/xen/netback/netback.c |  360 ++++++++++++++++++++---------------------
- 2 files changed, 232 insertions(+), 187 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 1983768..00208f4 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -222,4 +222,63 @@ static inline int netbk_can_sg(struct net_device *dev)
- 	return netif->features & NETIF_F_SG;
- }
- 
 +struct pending_tx_info {
 +	struct xen_netif_tx_request req;
-+	struct xen_netif *netif;
++	struct xenvif *vif;
 +};
 +typedef unsigned int pending_ring_idx_t;
 +
 +struct netbk_rx_meta {
-+	skb_frag_t frag;
 +	int id;
-+};
-+
-+struct netbk_tx_pending_inuse {
-+	struct list_head list;
-+	unsigned long alloc_time;
++	int size;
++	int gso_size;
 +};
 +
 +#define MAX_PENDING_REQS 256
 +
++#define MAX_BUFFER_OFFSET PAGE_SIZE
++
++/* extra field used in struct page */
++union page_ext {
++	struct {
++#if BITS_PER_LONG < 64
++#define IDX_WIDTH   8
++#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
++		unsigned int group:GROUP_WIDTH;
++		unsigned int idx:IDX_WIDTH;
++#else
++		unsigned int group, idx;
++#endif
++	} e;
++	void *mapping;
++};
++
 +struct xen_netbk {
-+	struct tasklet_struct net_tx_tasklet;
-+	struct tasklet_struct net_rx_tasklet;
++	wait_queue_head_t wq;
++	struct task_struct *task;
 +
 +	struct sk_buff_head rx_queue;
 +	struct sk_buff_head tx_queue;
 +
 +	struct timer_list net_timer;
-+	struct timer_list netbk_tx_pending_timer;
 +
-+	struct page **mmap_pages;
++	struct page *mmap_pages[MAX_PENDING_REQS];
 +
 +	pending_ring_idx_t pending_prod;
 +	pending_ring_idx_t pending_cons;
-+	pending_ring_idx_t dealloc_prod;
-+	pending_ring_idx_t dealloc_cons;
-+
-+	struct list_head pending_inuse_head;
 +	struct list_head net_schedule_list;
 +
 +	/* Protect the net_schedule_list in netif. */
 +	spinlock_t net_schedule_list_lock;
 +
++	atomic_t netfront_count;
++
 +	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
-+	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
-+	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
-+	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
++	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
 +
-+	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
 +	u16 pending_ring[MAX_PENDING_REQS];
-+	u16 dealloc_ring[MAX_PENDING_REQS];
-+
-+	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
-+	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
-+	struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
-+	struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
-+	unsigned char rx_notify[NR_IRQS];
-+	u16 notify_list[NET_RX_RING_SIZE];
-+	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
++
++	/*
++	 * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
++	 * head/fragment page uses 2 copy operations because it
++	 * straddles two buffers in the frontend.
++	 */
++	struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
++	struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
 +};
 +
- #endif /* __NETIF__BACKEND__COMMON_H__ */
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 725da0f..417f497 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -48,16 +48,7 @@
- 
- /*define NETBE_DEBUG_INTERRUPT*/
- 
--struct netbk_rx_meta {
--	skb_frag_t frag;
--	int id;
--};
--
--struct netbk_tx_pending_inuse {
--	struct list_head list;
--	unsigned long alloc_time;
--};
--
-+static struct xen_netbk *netbk;
- 
- static void netif_idx_release(u16 pending_idx);
- static void make_tx_response(struct xen_netif *netif,
-@@ -71,22 +62,12 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
- 					     u16      flags);
- 
- static void net_tx_action(unsigned long unused);
--static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
- 
- static void net_rx_action(unsigned long unused);
--static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
--
--static struct timer_list net_timer;
--static struct timer_list netbk_tx_pending_timer;
- 
--#define MAX_PENDING_REQS 256
--
--static struct sk_buff_head rx_queue;
--
--static struct page **mmap_pages;
- static inline unsigned long idx_to_pfn(unsigned int idx)
- {
--	return page_to_pfn(mmap_pages[idx]);
-+	return page_to_pfn(netbk->mmap_pages[idx]);
- }
- 
- static inline unsigned long idx_to_kaddr(unsigned int idx)
-@@ -107,7 +88,7 @@ static inline int netif_page_index(struct page *pg)
- 	if (!PageForeign(pg))
- 		return -1;
- 
--	if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg))
-+	if ((idx >= MAX_PENDING_REQS) || (netbk->mmap_pages[idx] != pg))
- 		return -1;
- 
- 	return idx;
-@@ -121,46 +102,17 @@ static inline int netif_page_index(struct page *pg)
-  */
- #define PKT_PROT_LEN 72
- 
--static struct pending_tx_info {
--	struct xen_netif_tx_request req;
--	struct xen_netif *netif;
--} pending_tx_info[MAX_PENDING_REQS];
--static u16 pending_ring[MAX_PENDING_REQS];
--typedef unsigned int pending_ring_idx_t;
--
- static inline pending_ring_idx_t pending_index(unsigned i)
- {
- 	return i & (MAX_PENDING_REQS-1);
- }
- 
--static pending_ring_idx_t pending_prod, pending_cons;
--
- static inline pending_ring_idx_t nr_pending_reqs(void)
- {
--	return MAX_PENDING_REQS - pending_prod + pending_cons;
-+	return MAX_PENDING_REQS -
-+		netbk->pending_prod + netbk->pending_cons;
- }
- 
--/* Freed TX SKBs get batched on this ring before return to pending_ring. */
--static u16 dealloc_ring[MAX_PENDING_REQS];
--static pending_ring_idx_t dealloc_prod, dealloc_cons;
--
--/* Doubly-linked list of in-use pending entries. */
--static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
--static LIST_HEAD(pending_inuse_head);
--
--static struct sk_buff_head tx_queue;
--
--static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
--static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
--static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
--
--static LIST_HEAD(net_schedule_list);
--static DEFINE_SPINLOCK(net_schedule_list_lock);
--
--#define MAX_MFN_ALLOC 64
--static unsigned long mfn_list[MAX_MFN_ALLOC];
--static unsigned int alloc_index = 0;
--
- /* Setting this allows the safe use of this driver without netloop. */
- static int MODPARM_copy_skb = 1;
- module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
-@@ -168,18 +120,12 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
- 
- int netbk_copy_skb_mode;
- 
--static inline unsigned long alloc_mfn(void)
--{
--	BUG_ON(alloc_index == 0);
--	return mfn_list[--alloc_index];
--}
--
- static inline void maybe_schedule_tx_action(void)
- {
- 	smp_mb();
- 	if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
--	    !list_empty(&net_schedule_list))
--		tasklet_schedule(&net_tx_tasklet);
-+	    !list_empty(&netbk->net_schedule_list))
-+		tasklet_schedule(&netbk->net_tx_tasklet);
- }
- 
- static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
-@@ -328,9 +274,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 			mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
- 		}
- 	}
--
--	skb_queue_tail(&rx_queue, skb);
--	tasklet_schedule(&net_rx_tasklet);
-+	skb_queue_tail(&netbk->rx_queue, skb);
-+	tasklet_schedule(&netbk->net_rx_tasklet);
- 
- 	return 0;
- 
-@@ -372,7 +317,7 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
- 	copy_gop = npo->copy + npo->copy_prod++;
- 	copy_gop->flags = GNTCOPY_dest_gref;
- 	if (idx > -1) {
--		struct pending_tx_info *src_pend = &pending_tx_info[idx];
-+		struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
- 		copy_gop->source.domid = src_pend->netif->domid;
- 		copy_gop->source.u.ref = src_pend->req.gref;
- 		copy_gop->flags |= GNTCOPY_source_gref;
-@@ -487,30 +432,19 @@ static void net_rx_action(unsigned long unused)
- 	int count;
- 	unsigned long offset;
- 
--	/*
--	 * Putting hundreds of bytes on the stack is considered rude.
--	 * Static works because a tasklet can only be on one CPU at any time.
--	 */
--	static struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
--	static struct mmu_update rx_mmu[NET_RX_RING_SIZE];
--	static struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
--	static struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
--	static unsigned char rx_notify[NR_IRQS];
--	static u16 notify_list[NET_RX_RING_SIZE];
--	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
--
- 	struct netrx_pending_operations npo = {
--		mmu: rx_mmu,
--		trans: grant_trans_op,
--		copy: grant_copy_op,
--		mcl: rx_mcl,
--		meta: meta};
-+		.mmu   = netbk->rx_mmu,
-+		.trans = netbk->grant_trans_op,
-+		.copy  = netbk->grant_copy_op,
-+		.mcl   = netbk->rx_mcl,
-+		.meta  = netbk->meta,
-+	};
- 
- 	skb_queue_head_init(&rxq);
- 
- 	count = 0;
- 
--	while ((skb = skb_dequeue(&rx_queue)) != NULL) {
-+	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
- 		nr_frags = skb_shinfo(skb)->nr_frags;
- 		*(int *)skb->cb = nr_frags;
- 
-@@ -525,39 +459,39 @@ static void net_rx_action(unsigned long unused)
- 			break;
- 	}
- 
--	BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
-+	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
- 
- 	npo.mmu_mcl = npo.mcl_prod;
- 	if (npo.mcl_prod) {
- 		BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
--		BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
-+		BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu));
- 		mcl = npo.mcl + npo.mcl_prod++;
- 
- 		BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
- 		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
- 
- 		mcl->op = __HYPERVISOR_mmu_update;
--		mcl->args[0] = (unsigned long)rx_mmu;
-+		mcl->args[0] = (unsigned long)netbk->rx_mmu;
- 		mcl->args[1] = npo.mmu_prod;
- 		mcl->args[2] = 0;
- 		mcl->args[3] = DOMID_SELF;
- 	}
- 
- 	if (npo.trans_prod) {
--		BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
-+		BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op));
- 		mcl = npo.mcl + npo.mcl_prod++;
- 		mcl->op = __HYPERVISOR_grant_table_op;
- 		mcl->args[0] = GNTTABOP_transfer;
--		mcl->args[1] = (unsigned long)grant_trans_op;
-+		mcl->args[1] = (unsigned long)netbk->grant_trans_op;
- 		mcl->args[2] = npo.trans_prod;
- 	}
- 
- 	if (npo.copy_prod) {
--		BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
-+		BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
- 		mcl = npo.mcl + npo.mcl_prod++;
- 		mcl->op = __HYPERVISOR_grant_table_op;
- 		mcl->args[0] = GNTTABOP_copy;
--		mcl->args[1] = (unsigned long)grant_copy_op;
-+		mcl->args[1] = (unsigned long)netbk->grant_copy_op;
- 		mcl->args[2] = npo.copy_prod;
- 	}
- 
-@@ -565,7 +499,7 @@ static void net_rx_action(unsigned long unused)
- 	if (!npo.mcl_prod)
- 		return;
- 
--	BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
-+	BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl));
- 
- 	ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
- 	BUG_ON(ret != 0);
-@@ -582,7 +516,7 @@ static void net_rx_action(unsigned long unused)
- 
- 		status = netbk_check_gop(nr_frags, netif->domid, &npo);
- 
--		id = meta[npo.meta_cons].id;
-+		id = netbk->meta[npo.meta_cons].id;
- 		flags = nr_frags ? NETRXF_more_data : 0;
- 
- 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
-@@ -595,7 +529,7 @@ static void net_rx_action(unsigned long unused)
- 		resp = make_rx_response(netif, id, status, offset,
- 					skb_headlen(skb), flags);
- 
--		if (meta[npo.meta_cons].frag.size) {
-+		if (netbk->meta[npo.meta_cons].frag.size) {
- 			struct xen_netif_extra_info *gso =
- 				(struct xen_netif_extra_info *)
- 				RING_GET_RESPONSE(&netif->rx,
-@@ -603,7 +537,7 @@ static void net_rx_action(unsigned long unused)
- 
- 			resp->flags |= NETRXF_extra_info;
- 
--			gso->u.gso.size = meta[npo.meta_cons].frag.size;
-+			gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size;
- 			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
- 			gso->u.gso.pad = 0;
- 			gso->u.gso.features = 0;
-@@ -613,14 +547,14 @@ static void net_rx_action(unsigned long unused)
- 		}
- 
- 		netbk_add_frag_responses(netif, status,
--					 meta + npo.meta_cons + 1,
--					 nr_frags);
-+				netbk->meta + npo.meta_cons + 1,
-+				nr_frags);
- 
- 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
- 		irq = netif->irq;
--		if (ret && !rx_notify[irq]) {
--			rx_notify[irq] = 1;
--			notify_list[notify_nr++] = irq;
-+		if (ret && !netbk->rx_notify[irq]) {
-+			netbk->rx_notify[irq] = 1;
-+			netbk->notify_list[notify_nr++] = irq;
- 		}
- 
- 		if (netif_queue_stopped(netif->dev) &&
-@@ -634,24 +568,25 @@ static void net_rx_action(unsigned long unused)
- 	}
- 
- 	while (notify_nr != 0) {
--		irq = notify_list[--notify_nr];
--		rx_notify[irq] = 0;
-+		irq = netbk->notify_list[--notify_nr];
-+		netbk->rx_notify[irq] = 0;
- 		notify_remote_via_irq(irq);
- 	}
- 
- 	/* More work to do? */
--	if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
--		tasklet_schedule(&net_rx_tasklet);
-+	if (!skb_queue_empty(&netbk->rx_queue) &&
-+			!timer_pending(&netbk->net_timer))
-+		tasklet_schedule(&netbk->net_rx_tasklet);
- }
- 
- static void net_alarm(unsigned long unused)
- {
--	tasklet_schedule(&net_rx_tasklet);
-+	tasklet_schedule(&netbk->net_rx_tasklet);
- }
- 
- static void netbk_tx_pending_timeout(unsigned long unused)
- {
--	tasklet_schedule(&net_tx_tasklet);
-+	tasklet_schedule(&netbk->net_tx_tasklet);
- }
- 
- struct net_device_stats *netif_be_get_stats(struct net_device *dev)
-@@ -667,12 +602,12 @@ static int __on_net_schedule_list(struct xen_netif *netif)
- 
- static void remove_from_net_schedule_list(struct xen_netif *netif)
- {
--	spin_lock_irq(&net_schedule_list_lock);
-+	spin_lock_irq(&netbk->net_schedule_list_lock);
- 	if (likely(__on_net_schedule_list(netif))) {
- 		list_del_init(&netif->list);
- 		netif_put(netif);
- 	}
--	spin_unlock_irq(&net_schedule_list_lock);
-+	spin_unlock_irq(&netbk->net_schedule_list_lock);
- }
- 
- static void add_to_net_schedule_list_tail(struct xen_netif *netif)
-@@ -680,13 +615,13 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
- 	if (__on_net_schedule_list(netif))
- 		return;
- 
--	spin_lock_irq(&net_schedule_list_lock);
-+	spin_lock_irq(&netbk->net_schedule_list_lock);
- 	if (!__on_net_schedule_list(netif) &&
- 	    likely(netif_schedulable(netif))) {
--		list_add_tail(&netif->list, &net_schedule_list);
-+		list_add_tail(&netif->list, &netbk->net_schedule_list);
- 		netif_get(netif);
- 	}
--	spin_unlock_irq(&net_schedule_list_lock);
-+	spin_unlock_irq(&netbk->net_schedule_list_lock);
- }
- 
- void netif_schedule_work(struct xen_netif *netif)
-@@ -736,8 +671,9 @@ static void tx_credit_callback(unsigned long data)
- 
- static inline int copy_pending_req(pending_ring_idx_t pending_idx)
- {
--	return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
--				      &mmap_pages[pending_idx]);
-+	return gnttab_copy_grant_page(
-+			netbk->grant_tx_handle[pending_idx],
-+			&netbk->mmap_pages[pending_idx]);
- }
- 
- inline static void net_tx_action_dealloc(void)
-@@ -750,22 +686,24 @@ inline static void net_tx_action_dealloc(void)
- 	int ret;
- 	LIST_HEAD(list);
- 
--	dc = dealloc_cons;
--	gop = tx_unmap_ops;
-+	dc = netbk->dealloc_cons;
-+	gop = netbk->tx_unmap_ops;
- 
- 	/*
- 	 * Free up any grants we have finished using
- 	 */
- 	do {
--		dp = dealloc_prod;
-+		dp = netbk->dealloc_prod;
- 
- 		/* Ensure we see all indices enqueued by netif_idx_release(). */
- 		smp_rmb();
- 
- 		while (dc != dp) {
- 			unsigned long pfn;
-+			struct netbk_tx_pending_inuse *pending_inuse =
-+					netbk->pending_inuse;
- 
--			pending_idx = dealloc_ring[pending_index(dc++)];
-+			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
- 			list_move_tail(&pending_inuse[pending_idx].list, &list);
- 
- 			pfn = idx_to_pfn(pending_idx);
-@@ -773,22 +711,27 @@ inline static void net_tx_action_dealloc(void)
- 			if (!phys_to_machine_mapping_valid(pfn))
- 				continue;
- 
--			gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
--					    GNTMAP_host_map,
--					    grant_tx_handle[pending_idx]);
-+			gnttab_set_unmap_op(gop,
-+					idx_to_kaddr(pending_idx),
-+					GNTMAP_host_map,
-+					netbk->grant_tx_handle[pending_idx]);
- 			gop++;
- 		}
- 
- 		if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
--		    list_empty(&pending_inuse_head))
-+		    list_empty(&netbk->pending_inuse_head))
- 			break;
- 
- 		/* Copy any entries that have been pending for too long. */
--		list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
-+		list_for_each_entry_safe(inuse, n,
-+				&netbk->pending_inuse_head, list) {
-+			struct pending_tx_info *pending_tx_info;
-+			pending_tx_info = netbk->pending_tx_info;
-+
- 			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
- 				break;
- 
--			pending_idx = inuse - pending_inuse;
-+			pending_idx = inuse - netbk->pending_inuse;
- 
- 			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
- 
-@@ -805,16 +748,21 @@ inline static void net_tx_action_dealloc(void)
- 
- 			break;
- 		}
--	} while (dp != dealloc_prod);
-+	} while (dp != netbk->dealloc_prod);
- 
--	dealloc_cons = dc;
-+	netbk->dealloc_cons = dc;
- 
- 	ret = HYPERVISOR_grant_table_op(
--		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
-+		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
-+		gop - netbk->tx_unmap_ops);
- 	BUG_ON(ret);
- 
- 	list_for_each_entry_safe(inuse, n, &list, list) {
--		pending_idx = inuse - pending_inuse;
-+		struct pending_tx_info *pending_tx_info;
-+		pending_ring_idx_t index;
++static struct xen_netbk *xen_netbk;
++static int xen_netbk_group_nr;
 +
-+		pending_tx_info = netbk->pending_tx_info;
-+		pending_idx = inuse - netbk->pending_inuse;
- 
- 		netif = pending_tx_info[pending_idx].netif;
- 
-@@ -822,9 +770,10 @@ inline static void net_tx_action_dealloc(void)
- 				 NETIF_RSP_OKAY);
- 
- 		/* Ready for next use. */
--		gnttab_reset_grant_page(mmap_pages[pending_idx]);
-+		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
- 
--		pending_ring[pending_index(pending_prod++)] = pending_idx;
-+		index = pending_index(netbk->pending_prod++);
-+		netbk->pending_ring[index] = pending_idx;
- 
- 		netif_put(netif);
- 
-@@ -832,7 +781,8 @@ inline static void net_tx_action_dealloc(void)
- 	}
- }
- 
--static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *txp, RING_IDX end)
-+static void netbk_tx_err(struct xen_netif *netif,
-+		struct xen_netif_tx_request *txp, RING_IDX end)
- {
- 	RING_IDX cons = netif->tx.req_cons;
- 
-@@ -902,7 +852,12 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
- 	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
- 
- 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
--		pending_idx = pending_ring[pending_index(pending_cons++)];
-+		pending_ring_idx_t index;
-+		struct pending_tx_info *pending_tx_info =
-+			netbk->pending_tx_info;
++void xen_netbk_add_xenvif(struct xenvif *vif)
++{
++	int i;
++	int min_netfront_count;
++	int min_group = 0;
++	struct xen_netbk *netbk;
 +
-+		index = pending_index(netbk->pending_cons++);
-+		pending_idx = netbk->pending_ring[index];
- 
- 		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
- 				  GNTMAP_host_map | GNTMAP_readonly,
-@@ -922,6 +877,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
- {
- 	struct gnttab_map_grant_ref *mop = *mopp;
- 	int pending_idx = *((u16 *)skb->data);
-+	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
- 	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
- 	struct xen_netif_tx_request *txp;
- 	struct skb_shared_info *shinfo = skb_shinfo(skb);
-@@ -931,15 +887,17 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
- 	/* Check status of header. */
- 	err = mop->status;
- 	if (unlikely(err)) {
-+		pending_ring_idx_t index;
-+		index = pending_index(netbk->pending_prod++);
- 		txp = &pending_tx_info[pending_idx].req;
- 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
--		pending_ring[pending_index(pending_prod++)] = pending_idx;
-+		netbk->pending_ring[index] = pending_idx;
- 		netif_put(netif);
- 	} else {
- 		set_phys_to_machine(
- 			__pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
- 			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
--		grant_tx_handle[pending_idx] = mop->handle;
-+		netbk->grant_tx_handle[pending_idx] = mop->handle;
- 	}
- 
- 	/* Skip first skb fragment if it is on same page as header fragment. */
-@@ -947,16 +905,19 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
- 
- 	for (i = start; i < nr_frags; i++) {
- 		int j, newerr;
-+		pending_ring_idx_t index;
- 
- 		pending_idx = (unsigned long)shinfo->frags[i].page;
- 
- 		/* Check error status: if okay then remember grant handle. */
- 		newerr = (++mop)->status;
- 		if (likely(!newerr)) {
-+			unsigned long addr;
-+			addr = idx_to_kaddr(pending_idx);
- 			set_phys_to_machine(
--				__pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
-+				__pa(addr)>>PAGE_SHIFT,
- 				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
--			grant_tx_handle[pending_idx] = mop->handle;
-+			netbk->grant_tx_handle[pending_idx] = mop->handle;
- 			/* Had a previous error? Invalidate this fragment. */
- 			if (unlikely(err))
- 				netif_idx_release(pending_idx);
-@@ -964,9 +925,10 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
- 		}
- 
- 		/* Error on this fragment: respond to client with an error. */
--		txp = &pending_tx_info[pending_idx].req;
-+		txp = &netbk->pending_tx_info[pending_idx].req;
- 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
--		pending_ring[pending_index(pending_prod++)] = pending_idx;
-+		index = pending_index(netbk->pending_prod++);
-+		netbk->pending_ring[index] = pending_idx;
- 		netif_put(netif);
- 
- 		/* Not the first error? Preceding frags already invalidated. */
-@@ -1002,11 +964,11 @@ static void netbk_fill_frags(struct sk_buff *skb)
- 
- 		pending_idx = (unsigned long)frag->page;
- 
--		pending_inuse[pending_idx].alloc_time = jiffies;
--		list_add_tail(&pending_inuse[pending_idx].list,
--			      &pending_inuse_head);
-+		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
-+		list_add_tail(&netbk->pending_inuse[pending_idx].list,
-+			      &netbk->pending_inuse_head);
- 
--		txp = &pending_tx_info[pending_idx].req;
-+		txp = &netbk->pending_tx_info[pending_idx].req;
- 		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
- 		frag->size = txp->size;
- 		frag->page_offset = txp->offset;
-@@ -1145,9 +1107,9 @@ static unsigned net_tx_build_mops(void)
- 	struct sk_buff *skb;
- 	int ret;
- 
--	mop = tx_map_ops;
-+	mop = netbk->tx_map_ops;
- 	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
--		!list_empty(&net_schedule_list)) {
-+		!list_empty(&netbk->net_schedule_list)) {
- 		struct xen_netif *netif;
- 		struct xen_netif_tx_request txreq;
- 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
-@@ -1156,9 +1118,11 @@ static unsigned net_tx_build_mops(void)
- 		RING_IDX idx;
- 		int work_to_do;
- 		unsigned int data_len;
-+		pending_ring_idx_t index;
- 	
- 		/* Get a netif from the list with work to do. */
--		netif = list_first_entry(&net_schedule_list, struct xen_netif, list);
-+		netif = list_first_entry(&netbk->net_schedule_list,
-+				struct xen_netif, list);
- 		netif_get(netif);
- 		remove_from_net_schedule_list(netif);
- 
-@@ -1217,7 +1181,8 @@ static unsigned net_tx_build_mops(void)
- 			continue;
- 		}
- 
--		pending_idx = pending_ring[pending_index(pending_cons)];
-+		index = pending_index(netbk->pending_cons);
-+		pending_idx = netbk->pending_ring[index];
- 
- 		data_len = (txreq.size > PKT_PROT_LEN &&
- 			    ret < MAX_SKB_FRAGS) ?
-@@ -1250,9 +1215,9 @@ static unsigned net_tx_build_mops(void)
- 				  txreq.gref, netif->domid);
- 		mop++;
- 
--		memcpy(&pending_tx_info[pending_idx].req,
-+		memcpy(&netbk->pending_tx_info[pending_idx].req,
- 		       &txreq, sizeof(txreq));
--		pending_tx_info[pending_idx].netif = netif;
-+		netbk->pending_tx_info[pending_idx].netif = netif;
- 		*((u16 *)skb->data) = pending_idx;
- 
- 		__skb_put(skb, data_len);
-@@ -1267,20 +1232,20 @@ static unsigned net_tx_build_mops(void)
- 			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
- 		}
- 
--		__skb_queue_tail(&tx_queue, skb);
-+		__skb_queue_tail(&netbk->tx_queue, skb);
- 
--		pending_cons++;
-+		netbk->pending_cons++;
- 
- 		mop = netbk_get_requests(netif, skb, txfrags, mop);
- 
- 		netif->tx.req_cons = idx;
- 		netif_schedule_work(netif);
- 
--		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
-+		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
- 			break;
- 	}
- 
--	return mop - tx_map_ops;
-+	return mop - netbk->tx_map_ops;
- }
- 
- static void net_tx_submit(void)
-@@ -1288,16 +1253,16 @@ static void net_tx_submit(void)
- 	struct gnttab_map_grant_ref *mop;
- 	struct sk_buff *skb;
- 
--	mop = tx_map_ops;
--	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
-+	mop = netbk->tx_map_ops;
-+	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
- 		struct xen_netif_tx_request *txp;
- 		struct xen_netif *netif;
- 		u16 pending_idx;
- 		unsigned data_len;
- 
- 		pending_idx = *((u16 *)skb->data);
--		netif       = pending_tx_info[pending_idx].netif;
--		txp         = &pending_tx_info[pending_idx].req;
-+		netif = netbk->pending_tx_info[pending_idx].netif;
-+		txp = &netbk->pending_tx_info[pending_idx].req;
- 
- 		/* Check the remap error code. */
- 		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
-@@ -1363,12 +1328,13 @@ static void net_tx_submit(void)
- 	}
- 
- 	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
--	    !list_empty(&pending_inuse_head)) {
-+	    !list_empty(&netbk->pending_inuse_head)) {
- 		struct netbk_tx_pending_inuse *oldest;
- 
--		oldest = list_entry(pending_inuse_head.next,
-+		oldest = list_entry(netbk->pending_inuse_head.next,
- 				    struct netbk_tx_pending_inuse, list);
--		mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
-+		mod_timer(&netbk->netbk_tx_pending_timer,
-+				oldest->alloc_time + HZ);
- 	}
- }
- 
-@@ -1378,7 +1344,7 @@ static void net_tx_action(unsigned long unused)
- 	unsigned nr_mops;
- 	int ret;
- 
--	if (dealloc_cons != dealloc_prod)
-+	if (netbk->dealloc_cons != netbk->dealloc_prod)
- 		net_tx_action_dealloc();
- 
- 	nr_mops = net_tx_build_mops();
-@@ -1387,7 +1353,7 @@ static void net_tx_action(unsigned long unused)
- 		return;
- 
- 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
--					tx_map_ops, nr_mops);
-+					netbk->tx_map_ops, nr_mops);
- 	BUG_ON(ret);
- 
- 	net_tx_submit();
-@@ -1397,15 +1363,17 @@ static void netif_idx_release(u16 pending_idx)
- {
- 	static DEFINE_SPINLOCK(_lock);
- 	unsigned long flags;
-+	pending_ring_idx_t index;
- 
- 	spin_lock_irqsave(&_lock, flags);
--	dealloc_ring[pending_index(dealloc_prod)] = pending_idx;
-+	index = pending_index(netbk->dealloc_prod);
-+	netbk->dealloc_ring[index] = pending_idx;
- 	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
- 	smp_wmb();
--	dealloc_prod++;
-+	netbk->dealloc_prod++;
- 	spin_unlock_irqrestore(&_lock, flags);
- 
--	tasklet_schedule(&net_tx_tasklet);
-+	tasklet_schedule(&netbk->net_tx_tasklet);
- }
- 
- static void netif_page_release(struct page *page, unsigned int order)
-@@ -1481,9 +1449,9 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
- 	int i = 0;
- 
- 	printk(KERN_ALERT "netif_schedule_list:\n");
--	spin_lock_irq(&net_schedule_list_lock);
-+	spin_lock_irq(&netbk->net_schedule_list_lock);
- 
--	list_for_each (ent, &net_schedule_list) {
-+	list_for_each(ent, &netbk->net_schedule_list) {
- 		netif = list_entry(ent, struct xen_netif, list);
- 		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
- 		       "rx_resp_prod=%08x\n",
-@@ -1500,7 +1468,7 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
- 		i++;
- 	}
- 
--	spin_unlock_irq(&net_schedule_list_lock);
-+	spin_unlock_irq(&netbk->net_schedule_list_lock);
- 	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
- 
- 	return IRQ_HANDLED;
-@@ -1516,37 +1484,53 @@ static int __init netback_init(void)
- 	if (!xen_domain())
- 		return -ENODEV;
- 
-+	netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk));
-+	if (!netbk) {
-+		printk(KERN_ALERT "%s: out of memory\n", __func__);
-+		return -ENOMEM;
++	min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);
++	for (i = 0; i < xen_netbk_group_nr; i++) {
++		int netfront_count = atomic_read(&xen_netbk[i].netfront_count);
++		if (netfront_count < min_netfront_count) {
++			min_group = i;
++			min_netfront_count = netfront_count;
++		}
 +	}
 +
- 	/* We can increase reservation by this much in net_rx_action(). */
- //	balloon_update_driver_allowance(NET_RX_RING_SIZE);
- 
--	skb_queue_head_init(&rx_queue);
--	skb_queue_head_init(&tx_queue);
-+	skb_queue_head_init(&netbk->rx_queue);
-+	skb_queue_head_init(&netbk->tx_queue);
- 
--	init_timer(&net_timer);
--	net_timer.data = 0;
--	net_timer.function = net_alarm;
-+	init_timer(&netbk->net_timer);
-+	netbk->net_timer.data = 0;
-+	netbk->net_timer.function = net_alarm;
- 
--	init_timer(&netbk_tx_pending_timer);
--	netbk_tx_pending_timer.data = 0;
--	netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
-+	init_timer(&netbk->netbk_tx_pending_timer);
-+	netbk->netbk_tx_pending_timer.data = 0;
-+	netbk->netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
- 
--	mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
--	if (mmap_pages == NULL) {
--		printk("%s: out of memory\n", __FUNCTION__);
--		return -ENOMEM;
-+	netbk->mmap_pages =
-+		alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
-+	if (!netbk->mmap_pages) {
-+		printk(KERN_ALERT "%s: out of memory\n", __func__);
-+		rc = -ENOMEM;
-+		goto failed_init2;
- 	}
- 
- 	for (i = 0; i < MAX_PENDING_REQS; i++) {
--		page = mmap_pages[i];
-+		page = netbk->mmap_pages[i];
- 		SetPageForeign(page, netif_page_release);
- 		netif_set_page_index(page, i);
--		INIT_LIST_HEAD(&pending_inuse[i].list);
-+		INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
- 	}
- 
--	pending_cons = 0;
--	pending_prod = MAX_PENDING_REQS;
-+	netbk->pending_cons = 0;
-+	netbk->pending_prod = MAX_PENDING_REQS;
- 	for (i = 0; i < MAX_PENDING_REQS; i++)
--		pending_ring[i] = i;
-+		netbk->pending_ring[i] = i;
++	netbk = &xen_netbk[min_group];
 +
-+	tasklet_init(&netbk->net_tx_tasklet, net_tx_action, 0);
-+	tasklet_init(&netbk->net_rx_tasklet, net_rx_action, 0);
++	vif->netbk = netbk;
++	atomic_inc(&netbk->netfront_count);
++}
 +
-+	INIT_LIST_HEAD(&netbk->pending_inuse_head);
-+	INIT_LIST_HEAD(&netbk->net_schedule_list);
++void xen_netbk_remove_xenvif(struct xenvif *vif)
++{
++	struct xen_netbk *netbk = vif->netbk;
++	vif->netbk = NULL;
++	atomic_dec(&netbk->netfront_count);
++}
 +
-+	spin_lock_init(&netbk->net_schedule_list_lock);
- 
- 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
- 	if (MODPARM_copy_skb) {
-@@ -1561,7 +1545,7 @@ static int __init netback_init(void)
- 
- 	rc = netif_xenbus_init();
- 	if (rc)
--		goto failed_init;
-+		goto failed_init1;
- 
- #ifdef NETBE_DEBUG_INTERRUPT
- 	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
-@@ -1574,10 +1558,12 @@ static int __init netback_init(void)
- 
- 	return 0;
- 
--failed_init:
--	free_empty_pages_and_pagevec(mmap_pages, MAX_PENDING_REQS);
--	del_timer(&netbk_tx_pending_timer);
--	del_timer(&net_timer);
-+failed_init1:
-+	free_empty_pages_and_pagevec(netbk->mmap_pages, MAX_PENDING_REQS);
-+failed_init2:
-+	del_timer(&netbk->netbk_tx_pending_timer);
-+	del_timer(&netbk->net_timer);
-+	vfree(netbk);
- 	return rc;
- 
- }
--- 
-1.7.4
-
-
-From c099c22d8b1c12fc7d68998982eb4ccd4918e813 Mon Sep 17 00:00:00 2001
-From: Dongxiao Xu <dongxiao.xu at intel.com>
-Date: Wed, 19 May 2010 16:58:57 -0700
-Subject: [PATCH 038/203] xen: netback: Introduce a new struct type page_ext.
-
-struct page_ext is used to store the group and idx information by
-which a specified page could be identified.
-
-Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/common.h  |   15 +++++++++++++++
- drivers/xen/netback/netback.c |   28 +++++++++++++++++-----------
- 2 files changed, 32 insertions(+), 11 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 00208f4..5e0e467 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -240,6 +240,21 @@ struct netbk_tx_pending_inuse {
- 
- #define MAX_PENDING_REQS 256
- 
-+/* extra field used in struct page */
-+union page_ext {
-+	struct {
-+#if BITS_PER_LONG < 64
-+#define IDX_WIDTH   8
-+#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
-+		unsigned int group:GROUP_WIDTH;
-+		unsigned int idx:IDX_WIDTH;
-+#else
-+		unsigned int group, idx;
-+#endif
-+	} e;
-+	void *mapping;
-+};
++static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
++static void make_tx_response(struct xenvif *vif,
++			     struct xen_netif_tx_request *txp,
++			     s8       st);
++static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
++					     u16      id,
++					     s8       st,
++					     u16      offset,
++					     u16      size,
++					     u16      flags);
 +
- struct xen_netbk {
- 	struct tasklet_struct net_tx_tasklet;
- 	struct tasklet_struct net_rx_tasklet;
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 417f497..71ec999 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -76,22 +76,27 @@ static inline unsigned long idx_to_kaddr(unsigned int idx)
- }
- 
- /* extra field used in struct page */
--static inline void netif_set_page_index(struct page *pg, unsigned int index)
-+static inline void netif_set_page_ext(struct page *pg, unsigned int group,
-+		unsigned int idx)
- {
--	*(unsigned long *)&pg->mapping = index + 1;
++static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
++				       unsigned int idx)
++{
++	return page_to_pfn(netbk->mmap_pages[idx]);
++}
++
++static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
++					 unsigned int idx)
++{
++	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
++}
++
++/* extra field used in struct page */
++static inline void set_page_ext(struct page *pg, struct xen_netbk *netbk,
++				unsigned int idx)
++{
++	unsigned int group = netbk - xen_netbk;
 +	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
 +
 +	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
 +	pg->mapping = ext.mapping;
- }
- 
--static inline int netif_page_index(struct page *pg)
-+static inline unsigned int netif_page_group(const struct page *pg)
- {
--	unsigned long idx = (unsigned long)pg->mapping - 1;
-+	union page_ext ext = { .mapping = pg->mapping };
- 
--	if (!PageForeign(pg))
--		return -1;
-+	return ext.e.group - 1;
 +}
- 
--	if ((idx >= MAX_PENDING_REQS) || (netbk->mmap_pages[idx] != pg))
--		return -1;
-+static inline unsigned int netif_page_index(const struct page *pg)
++
++static int get_page_ext(struct page *pg,
++			unsigned int *pgroup, unsigned int *pidx)
 +{
 +	union page_ext ext = { .mapping = pg->mapping };
- 
--	return idx;
-+	return ext.e.idx;
- }
- 
- /*
-@@ -1380,7 +1385,8 @@ static void netif_page_release(struct page *page, unsigned int order)
- {
- 	int idx = netif_page_index(page);
- 	BUG_ON(order);
--	BUG_ON(idx < 0);
-+	BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
-+	BUG_ON(netbk->mmap_pages[idx] != page);
- 	netif_idx_release(idx);
- }
- 
-@@ -1515,7 +1521,7 @@ static int __init netback_init(void)
- 	for (i = 0; i < MAX_PENDING_REQS; i++) {
- 		page = netbk->mmap_pages[i];
- 		SetPageForeign(page, netif_page_release);
--		netif_set_page_index(page, i);
-+		netif_set_page_ext(page, 0, i);
- 		INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
- 	}
- 
--- 
-1.7.4
-
-
-From 9534985c5b9cc3f6238d6cb8bba7d376e82039d3 Mon Sep 17 00:00:00 2001
-From: Dongxiao Xu <dongxiao.xu at intel.com>
-Date: Wed, 19 May 2010 17:08:21 -0700
-Subject: [PATCH 039/203] xen: netback: Multiple tasklets support.
-
-Now netback uses one pair of tasklets for Tx/Rx data transaction.
-Netback tasklet could only run at one CPU at a time, and it is
-used to serve all the netfronts. Therefore it has become a
-performance bottle neck. This patch is to use multiple tasklet
-pairs to replace the current single pair in dom0.
-
-Assuming that Dom0 has CPUNR VCPUs, we define CPUNR kinds of
-tasklets pair (CPUNR for Tx, and CPUNR for Rx). Each pare of
-tasklets serve specific group of netfronts. Also for those global
-and static variables, we duplicated them for each group in
-order to avoid the spinlock.
-
-Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/common.h    |    6 +
- drivers/xen/netback/interface.c |   27 ++++
- drivers/xen/netback/netback.c   |  270 ++++++++++++++++++++++++---------------
- 3 files changed, 197 insertions(+), 106 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 5e0e467..847ba58 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -58,6 +58,7 @@
- struct xen_netif {
- 	/* Unique identifier for this interface. */
- 	domid_t          domid;
-+	int              group;
- 	unsigned int     handle;
- 
- 	u8               fe_dev_addr[6];
-@@ -278,6 +279,8 @@ struct xen_netbk {
- 	/* Protect the net_schedule_list in netif. */
- 	spinlock_t net_schedule_list_lock;
- 
-+	atomic_t netfront_count;
++	struct xen_netbk *netbk;
++	unsigned int group, idx;
 +
- 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
- 	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
- 	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
-@@ -296,4 +299,7 @@ struct xen_netbk {
- 	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
- };
- 
-+extern struct xen_netbk *xen_netbk;
-+extern int xen_netbk_group_nr;
++	group = ext.e.group - 1;
 +
- #endif /* __NETIF__BACKEND__COMMON_H__ */
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index 086d939..172ef4c 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -54,8 +54,33 @@
- static unsigned long netbk_queue_length = 32;
- module_param_named(queue_length, netbk_queue_length, ulong, 0644);
- 
-+static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
-+			   struct xen_netif *netif)
-+{
-+	int i;
-+	int min_netfront_count;
-+	int min_group = 0;
-+	min_netfront_count = atomic_read(&netbk[0].netfront_count);
-+	for (i = 0; i < group_nr; i++) {
-+		int netfront_count = atomic_read(&netbk[i].netfront_count);
-+		if (netfront_count < min_netfront_count) {
-+			min_group = i;
-+			min_netfront_count = netfront_count;
-+		}
-+	}
++	if (group < 0 || group >= xen_netbk_group_nr)
++		return 0;
++
++	netbk = &xen_netbk[group];
 +
-+	netif->group = min_group;
-+	atomic_inc(&netbk[netif->group].netfront_count);
++	idx = ext.e.idx;
++
++	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
++		return 0;
++
++	if (netbk->mmap_pages[idx] != pg)
++		return 0;
++
++	*pgroup = group;
++	*pidx = idx;
++
++	return 1;
 +}
 +
-+static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
++/*
++ * This is the amount of packet we copy rather than map, so that the
++ * guest can't fiddle with the contents of the headers while we do
++ * packet processing on them (netfilter, routing, etc).
++ */
++#define PKT_PROT_LEN    (ETH_HLEN + \
++			 VLAN_HLEN + \
++			 sizeof(struct iphdr) + MAX_IPOPTLEN + \
++			 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
++
++static inline pending_ring_idx_t pending_index(unsigned i)
 +{
-+	atomic_dec(&netbk[netif->group].netfront_count);
++	return i & (MAX_PENDING_REQS-1);
 +}
 +
- static void __netif_up(struct xen_netif *netif)
- {
-+	netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
- 	enable_irq(netif->irq);
- 	netif_schedule_work(netif);
- }
-@@ -64,6 +89,7 @@ static void __netif_down(struct xen_netif *netif)
- {
- 	disable_irq(netif->irq);
- 	netif_deschedule_work(netif);
-+	netbk_remove_netif(xen_netbk, netif);
- }
- 
- static int net_open(struct net_device *dev)
-@@ -214,6 +240,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
- 	netif = netdev_priv(dev);
- 	memset(netif, 0, sizeof(*netif));
- 	netif->domid  = domid;
-+	netif->group  = -1;
- 	netif->handle = handle;
- 	netif->features = NETIF_F_SG;
- 	atomic_set(&netif->refcnt, 1);
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 71ec999..feefb14 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -48,9 +48,10 @@
- 
- /*define NETBE_DEBUG_INTERRUPT*/
- 
--static struct xen_netbk *netbk;
-+struct xen_netbk *xen_netbk;
-+int xen_netbk_group_nr;
- 
--static void netif_idx_release(u16 pending_idx);
-+static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
- static void make_tx_response(struct xen_netif *netif,
- 			     struct xen_netif_tx_request *txp,
- 			     s8       st);
-@@ -61,18 +62,20 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
- 					     u16      size,
- 					     u16      flags);
- 
--static void net_tx_action(unsigned long unused);
-+static void net_tx_action(unsigned long data);
- 
--static void net_rx_action(unsigned long unused);
-+static void net_rx_action(unsigned long data);
- 
--static inline unsigned long idx_to_pfn(unsigned int idx)
-+static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
-+				       unsigned int idx)
- {
- 	return page_to_pfn(netbk->mmap_pages[idx]);
- }
- 
--static inline unsigned long idx_to_kaddr(unsigned int idx)
-+static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
-+					 unsigned int idx)
- {
--	return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
-+	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
- }
- 
- /* extra field used in struct page */
-@@ -112,7 +115,7 @@ static inline pending_ring_idx_t pending_index(unsigned i)
- 	return i & (MAX_PENDING_REQS-1);
- }
- 
--static inline pending_ring_idx_t nr_pending_reqs(void)
 +static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
- {
- 	return MAX_PENDING_REQS -
- 		netbk->pending_prod + netbk->pending_cons;
-@@ -125,10 +128,10 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
- 
- int netbk_copy_skb_mode;
- 
--static inline void maybe_schedule_tx_action(void)
-+static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
- {
- 	smp_mb();
--	if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
-+	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
- 	    !list_empty(&netbk->net_schedule_list))
- 		tasklet_schedule(&netbk->net_tx_tasklet);
- }
-@@ -235,9 +238,15 @@ static void tx_queue_callback(unsigned long data)
- int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- {
- 	struct xen_netif *netif = netdev_priv(dev);
-+	struct xen_netbk *netbk;
- 
- 	BUG_ON(skb->dev != dev);
- 
-+	if (netif->group == -1)
-+		goto drop;
++{
++	return MAX_PENDING_REQS -
++		netbk->pending_prod + netbk->pending_cons;
++}
 +
-+	netbk = &xen_netbk[netif->group];
-+
- 	/* Drop the packet if the target domain has no receive buffers. */
- 	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
- 		goto drop;
-@@ -313,6 +322,7 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
- 	struct gnttab_copy *copy_gop;
- 	struct xen_netif_rx_request *req;
- 	unsigned long old_mfn;
-+	int group = netif_page_group(page);
- 	int idx = netif_page_index(page);
- 
- 	old_mfn = virt_to_mfn(page_address(page));
-@@ -321,7 +331,8 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
- 
- 	copy_gop = npo->copy + npo->copy_prod++;
- 	copy_gop->flags = GNTCOPY_dest_gref;
--	if (idx > -1) {
-+	if (PageForeign(page)) {
-+		struct xen_netbk *netbk = &xen_netbk[group];
- 		struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
- 		copy_gop->source.domid = src_pend->netif->domid;
- 		copy_gop->source.u.ref = src_pend->req.gref;
-@@ -422,9 +433,10 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
- 	}
- }
- 
--static void net_rx_action(unsigned long unused)
-+static void net_rx_action(unsigned long data)
- {
- 	struct xen_netif *netif = NULL;
-+	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	s8 status;
- 	u16 id, irq, flags;
- 	struct xen_netif_rx_response *resp;
-@@ -584,13 +596,15 @@ static void net_rx_action(unsigned long unused)
- 		tasklet_schedule(&netbk->net_rx_tasklet);
- }
- 
--static void net_alarm(unsigned long unused)
-+static void net_alarm(unsigned long data)
- {
-+	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	tasklet_schedule(&netbk->net_rx_tasklet);
- }
- 
--static void netbk_tx_pending_timeout(unsigned long unused)
-+static void netbk_tx_pending_timeout(unsigned long data)
- {
-+	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	tasklet_schedule(&netbk->net_tx_tasklet);
- }
- 
-@@ -607,6 +621,7 @@ static int __on_net_schedule_list(struct xen_netif *netif)
- 
- static void remove_from_net_schedule_list(struct xen_netif *netif)
- {
-+	struct xen_netbk *netbk = &xen_netbk[netif->group];
- 	spin_lock_irq(&netbk->net_schedule_list_lock);
- 	if (likely(__on_net_schedule_list(netif))) {
- 		list_del_init(&netif->list);
-@@ -617,6 +632,7 @@ static void remove_from_net_schedule_list(struct xen_netif *netif)
- 
- static void add_to_net_schedule_list_tail(struct xen_netif *netif)
- {
-+	struct xen_netbk *netbk = &xen_netbk[netif->group];
- 	if (__on_net_schedule_list(netif))
- 		return;
- 
-@@ -631,13 +647,14 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
- 
- void netif_schedule_work(struct xen_netif *netif)
- {
-+	struct xen_netbk *netbk = &xen_netbk[netif->group];
- 	int more_to_do;
- 
- 	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
- 
- 	if (more_to_do) {
- 		add_to_net_schedule_list_tail(netif);
--		maybe_schedule_tx_action();
-+		maybe_schedule_tx_action(netbk);
- 	}
- }
- 
-@@ -674,14 +691,15 @@ static void tx_credit_callback(unsigned long data)
- 	netif_schedule_work(netif);
- }
- 
--static inline int copy_pending_req(pending_ring_idx_t pending_idx)
-+static inline int copy_pending_req(struct xen_netbk *netbk,
-+				   pending_ring_idx_t pending_idx)
- {
- 	return gnttab_copy_grant_page(
- 			netbk->grant_tx_handle[pending_idx],
- 			&netbk->mmap_pages[pending_idx]);
- }
- 
--inline static void net_tx_action_dealloc(void)
-+static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
- {
- 	struct netbk_tx_pending_inuse *inuse, *n;
- 	struct gnttab_unmap_grant_ref *gop;
-@@ -711,13 +729,13 @@ inline static void net_tx_action_dealloc(void)
- 			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
- 			list_move_tail(&pending_inuse[pending_idx].list, &list);
- 
--			pfn = idx_to_pfn(pending_idx);
-+			pfn = idx_to_pfn(netbk, pending_idx);
- 			/* Already unmapped? */
- 			if (!phys_to_machine_mapping_valid(pfn))
- 				continue;
- 
- 			gnttab_set_unmap_op(gop,
--					idx_to_kaddr(pending_idx),
-+					idx_to_kaddr(netbk, pending_idx),
- 					GNTMAP_host_map,
- 					netbk->grant_tx_handle[pending_idx]);
- 			gop++;
-@@ -740,7 +758,7 @@ inline static void net_tx_action_dealloc(void)
- 
- 			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
- 
--			switch (copy_pending_req(pending_idx)) {
-+			switch (copy_pending_req(netbk, pending_idx)) {
- 			case 0:
- 				list_move_tail(&inuse->list, &list);
- 				continue;
-@@ -843,7 +861,8 @@ static int netbk_count_requests(struct xen_netif *netif,
- 	return frags;
- }
- 
--static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
-+static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
-+						  struct xen_netif *netif,
- 						  struct sk_buff *skb,
- 						  struct xen_netif_tx_request *txp,
- 						  struct gnttab_map_grant_ref *mop)
-@@ -864,7 +883,7 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
- 		index = pending_index(netbk->pending_cons++);
- 		pending_idx = netbk->pending_ring[index];
- 
--		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
-+		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
- 				  GNTMAP_host_map | GNTMAP_readonly,
- 				  txp->gref, netif->domid);
- 
-@@ -877,8 +896,9 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
- 	return mop;
- }
- 
--static int netbk_tx_check_mop(struct sk_buff *skb,
--			       struct gnttab_map_grant_ref **mopp)
-+static int netbk_tx_check_mop(struct xen_netbk *netbk,
-+			      struct sk_buff *skb,
-+			      struct gnttab_map_grant_ref **mopp)
- {
- 	struct gnttab_map_grant_ref *mop = *mopp;
- 	int pending_idx = *((u16 *)skb->data);
-@@ -900,7 +920,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
- 		netif_put(netif);
- 	} else {
- 		set_phys_to_machine(
--			__pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
-+			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
- 			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
- 		netbk->grant_tx_handle[pending_idx] = mop->handle;
- 	}
-@@ -918,14 +938,14 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
- 		newerr = (++mop)->status;
- 		if (likely(!newerr)) {
- 			unsigned long addr;
--			addr = idx_to_kaddr(pending_idx);
-+			addr = idx_to_kaddr(netbk, pending_idx);
- 			set_phys_to_machine(
- 				__pa(addr)>>PAGE_SHIFT,
- 				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
- 			netbk->grant_tx_handle[pending_idx] = mop->handle;
- 			/* Had a previous error? Invalidate this fragment. */
- 			if (unlikely(err))
--				netif_idx_release(pending_idx);
-+				netif_idx_release(netbk, pending_idx);
- 			continue;
- 		}
- 
-@@ -942,10 +962,10 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
- 
- 		/* First error: invalidate header and preceding fragments. */
- 		pending_idx = *((u16 *)skb->data);
--		netif_idx_release(pending_idx);
-+		netif_idx_release(netbk, pending_idx);
- 		for (j = start; j < i; j++) {
- 			pending_idx = (unsigned long)shinfo->frags[i].page;
--			netif_idx_release(pending_idx);
-+			netif_idx_release(netbk, pending_idx);
- 		}
- 
- 		/* Remember the error: invalidate all subsequent fragments. */
-@@ -956,7 +976,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
- 	return err;
- }
- 
--static void netbk_fill_frags(struct sk_buff *skb)
-+static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
- {
- 	struct skb_shared_info *shinfo = skb_shinfo(skb);
- 	int nr_frags = shinfo->nr_frags;
-@@ -974,7 +994,7 @@ static void netbk_fill_frags(struct sk_buff *skb)
- 			      &netbk->pending_inuse_head);
- 
- 		txp = &netbk->pending_tx_info[pending_idx].req;
--		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
-+		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
- 		frag->size = txp->size;
- 		frag->page_offset = txp->offset;
- 
-@@ -1106,14 +1126,14 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
- 	return false;
- }
- 
--static unsigned net_tx_build_mops(void)
-+static unsigned net_tx_build_mops(struct xen_netbk *netbk)
- {
- 	struct gnttab_map_grant_ref *mop;
- 	struct sk_buff *skb;
- 	int ret;
- 
- 	mop = netbk->tx_map_ops;
--	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-+	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
- 		!list_empty(&netbk->net_schedule_list)) {
- 		struct xen_netif *netif;
- 		struct xen_netif_tx_request txreq;
-@@ -1215,7 +1235,7 @@ static unsigned net_tx_build_mops(void)
- 			}
- 		}
- 
--		gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
-+		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
- 				  GNTMAP_host_map | GNTMAP_readonly,
- 				  txreq.gref, netif->domid);
- 		mop++;
-@@ -1241,7 +1261,7 @@ static unsigned net_tx_build_mops(void)
- 
- 		netbk->pending_cons++;
- 
--		mop = netbk_get_requests(netif, skb, txfrags, mop);
-+		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
- 
- 		netif->tx.req_cons = idx;
- 		netif_schedule_work(netif);
-@@ -1253,7 +1273,7 @@ static unsigned net_tx_build_mops(void)
- 	return mop - netbk->tx_map_ops;
- }
- 
--static void net_tx_submit(void)
-+static void net_tx_submit(struct xen_netbk *netbk)
- {
- 	struct gnttab_map_grant_ref *mop;
- 	struct sk_buff *skb;
-@@ -1270,7 +1290,7 @@ static void net_tx_submit(void)
- 		txp = &netbk->pending_tx_info[pending_idx].req;
- 
- 		/* Check the remap error code. */
--		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
-+		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
- 			DPRINTK("netback grant failed.\n");
- 			skb_shinfo(skb)->nr_frags = 0;
- 			kfree_skb(skb);
-@@ -1279,7 +1299,7 @@ static void net_tx_submit(void)
- 
- 		data_len = skb->len;
- 		memcpy(skb->data,
--		       (void *)(idx_to_kaddr(pending_idx)|txp->offset),
-+		       (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
- 		       data_len);
- 		if (data_len < txp->size) {
- 			/* Append the packet payload as a fragment. */
-@@ -1287,7 +1307,7 @@ static void net_tx_submit(void)
- 			txp->size -= data_len;
- 		} else {
- 			/* Schedule a response immediately. */
--			netif_idx_release(pending_idx);
-+			netif_idx_release(netbk, pending_idx);
- 		}
- 
- 		if (txp->flags & NETTXF_csum_blank)
-@@ -1295,7 +1315,7 @@ static void net_tx_submit(void)
- 		else if (txp->flags & NETTXF_data_validated)
- 			skb->ip_summed = CHECKSUM_UNNECESSARY;
- 
--		netbk_fill_frags(skb);
-+		netbk_fill_frags(netbk, skb);
- 
- 		/*
- 		 * If the initial fragment was < PKT_PROT_LEN then
-@@ -1344,15 +1364,16 @@ static void net_tx_submit(void)
- }
- 
- /* Called after netfront has transmitted */
--static void net_tx_action(unsigned long unused)
-+static void net_tx_action(unsigned long data)
- {
-+	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	unsigned nr_mops;
- 	int ret;
- 
- 	if (netbk->dealloc_cons != netbk->dealloc_prod)
--		net_tx_action_dealloc();
-+		net_tx_action_dealloc(netbk);
- 
--	nr_mops = net_tx_build_mops();
-+	nr_mops = net_tx_build_mops(netbk);
- 
- 	if (nr_mops == 0)
- 		return;
-@@ -1361,10 +1382,10 @@ static void net_tx_action(unsigned long unused)
- 					netbk->tx_map_ops, nr_mops);
- 	BUG_ON(ret);
- 
--	net_tx_submit();
-+	net_tx_submit(netbk);
- }
- 
--static void netif_idx_release(u16 pending_idx)
-+static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
- {
- 	static DEFINE_SPINLOCK(_lock);
- 	unsigned long flags;
-@@ -1383,19 +1404,28 @@ static void netif_idx_release(u16 pending_idx)
- 
- static void netif_page_release(struct page *page, unsigned int order)
- {
-+	int group = netif_page_group(page);
- 	int idx = netif_page_index(page);
-+	struct xen_netbk *netbk = &xen_netbk[group];
- 	BUG_ON(order);
-+	BUG_ON(group < 0 || group >= xen_netbk_group_nr);
- 	BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
- 	BUG_ON(netbk->mmap_pages[idx] != page);
--	netif_idx_release(idx);
-+	netif_idx_release(netbk, idx);
- }
- 
- irqreturn_t netif_be_int(int irq, void *dev_id)
- {
- 	struct xen_netif *netif = dev_id;
-+	struct xen_netbk *netbk;
++static void xen_netbk_kick_thread(struct xen_netbk *netbk)
++{
++	wake_up(&netbk->wq);
++}
 +
-+	if (netif->group == -1)
-+		return IRQ_NONE;
++static int max_required_rx_slots(struct xenvif *vif)
++{
++	int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
 +
-+	netbk = &xen_netbk[netif->group];
- 
- 	add_to_net_schedule_list_tail(netif);
--	maybe_schedule_tx_action();
-+	maybe_schedule_tx_action(netbk);
- 
- 	if (netif_schedulable(netif) && !netbk_queue_full(netif))
- 		netif_wake_queue(netif->dev);
-@@ -1453,28 +1483,40 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
- 	struct list_head *ent;
- 	struct xen_netif *netif;
- 	int i = 0;
-+	int group = 0;
- 
- 	printk(KERN_ALERT "netif_schedule_list:\n");
--	spin_lock_irq(&netbk->net_schedule_list_lock);
- 
--	list_for_each(ent, &netbk->net_schedule_list) {
--		netif = list_entry(ent, struct xen_netif, list);
--		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
--		       "rx_resp_prod=%08x\n",
--		       i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
--		printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
--		       netif->tx.req_cons, netif->tx.rsp_prod_pvt);
--		printk(KERN_ALERT "   shared(rx_req_prod=%08x "
--		       "rx_resp_prod=%08x\n",
--		       netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
--		printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
--		       netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
--		printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
--		       netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
--		i++;
-+	for (group = 0; group < xen_netbk_group_nr; group++) {
-+		struct xen_netbk *netbk = &xen_netbk[group];
-+		spin_lock_irq(&netbk->net_schedule_list_lock);
-+		printk(KERN_ALERT "xen_netback group number: %d\n", group);
-+		list_for_each(ent, &netbk->net_schedule_list) {
-+			netif = list_entry(ent, struct xen_netif, list);
-+			printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
-+				"rx_resp_prod=%08x\n",
-+				i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
-+			printk(KERN_ALERT
-+				"   tx_req_cons=%08x, tx_resp_prod=%08x)\n",
-+				netif->tx.req_cons, netif->tx.rsp_prod_pvt);
-+			printk(KERN_ALERT
-+				"   shared(rx_req_prod=%08x "
-+				"rx_resp_prod=%08x\n",
-+				netif->rx.sring->req_prod,
-+				netif->rx.sring->rsp_prod);
-+			printk(KERN_ALERT
-+				"   rx_event=%08x, tx_req_prod=%08x\n",
-+				netif->rx.sring->rsp_event,
-+				netif->tx.sring->req_prod);
-+			printk(KERN_ALERT
-+				"   tx_resp_prod=%08x, tx_event=%08x)\n",
-+				netif->tx.sring->rsp_prod,
-+				netif->tx.sring->rsp_event);
-+			i++;
-+		}
-+		spin_unlock_irq(&netbk->net_schedule_list_lock);
- 	}
- 
--	spin_unlock_irq(&netbk->net_schedule_list_lock);
- 	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
- 
- 	return IRQ_HANDLED;
-@@ -1486,12 +1528,15 @@ static int __init netback_init(void)
- 	int i;
- 	struct page *page;
- 	int rc = 0;
-+	int group;
- 
- 	if (!xen_domain())
- 		return -ENODEV;
- 
--	netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk));
--	if (!netbk) {
-+	xen_netbk_group_nr = num_online_cpus();
-+	xen_netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk) *
-+					    xen_netbk_group_nr);
-+	if (!xen_netbk) {
- 		printk(KERN_ALERT "%s: out of memory\n", __func__);
- 		return -ENOMEM;
- 	}
-@@ -1499,44 +1544,54 @@ static int __init netback_init(void)
- 	/* We can increase reservation by this much in net_rx_action(). */
- //	balloon_update_driver_allowance(NET_RX_RING_SIZE);
- 
--	skb_queue_head_init(&netbk->rx_queue);
--	skb_queue_head_init(&netbk->tx_queue);
--
--	init_timer(&netbk->net_timer);
--	netbk->net_timer.data = 0;
--	netbk->net_timer.function = net_alarm;
--
--	init_timer(&netbk->netbk_tx_pending_timer);
--	netbk->netbk_tx_pending_timer.data = 0;
--	netbk->netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
-+	for (group = 0; group < xen_netbk_group_nr; group++) {
-+		struct xen_netbk *netbk = &xen_netbk[group];
-+		skb_queue_head_init(&netbk->rx_queue);
-+		skb_queue_head_init(&netbk->tx_queue);
-+
-+		init_timer(&netbk->net_timer);
-+		netbk->net_timer.data = (unsigned long)netbk;
-+		netbk->net_timer.function = net_alarm;
-+
-+		init_timer(&netbk->netbk_tx_pending_timer);
-+		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
-+		netbk->netbk_tx_pending_timer.function =
-+			netbk_tx_pending_timeout;
-+
-+		netbk->mmap_pages =
-+			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
-+		if (!netbk->mmap_pages) {
-+			printk(KERN_ALERT "%s: out of memory\n", __func__);
-+			del_timer(&netbk->netbk_tx_pending_timer);
-+			del_timer(&netbk->net_timer);
-+			rc = -ENOMEM;
-+			goto failed_init;
-+		}
- 
--	netbk->mmap_pages =
--		alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
--	if (!netbk->mmap_pages) {
--		printk(KERN_ALERT "%s: out of memory\n", __func__);
--		rc = -ENOMEM;
--		goto failed_init2;
--	}
-+		for (i = 0; i < MAX_PENDING_REQS; i++) {
-+			page = netbk->mmap_pages[i];
-+			SetPageForeign(page, netif_page_release);
-+			netif_set_page_ext(page, group, i);
-+			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
-+		}
- 
--	for (i = 0; i < MAX_PENDING_REQS; i++) {
--		page = netbk->mmap_pages[i];
--		SetPageForeign(page, netif_page_release);
--		netif_set_page_ext(page, 0, i);
--		INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
--	}
-+		netbk->pending_cons = 0;
-+		netbk->pending_prod = MAX_PENDING_REQS;
-+		for (i = 0; i < MAX_PENDING_REQS; i++)
-+			netbk->pending_ring[i] = i;
- 
--	netbk->pending_cons = 0;
--	netbk->pending_prod = MAX_PENDING_REQS;
--	for (i = 0; i < MAX_PENDING_REQS; i++)
--		netbk->pending_ring[i] = i;
-+		tasklet_init(&netbk->net_tx_tasklet, net_tx_action,
-+				(unsigned long)netbk);
-+		tasklet_init(&netbk->net_rx_tasklet, net_rx_action,
-+				(unsigned long)netbk);
- 
--	tasklet_init(&netbk->net_tx_tasklet, net_tx_action, 0);
--	tasklet_init(&netbk->net_rx_tasklet, net_rx_action, 0);
-+		INIT_LIST_HEAD(&netbk->pending_inuse_head);
-+		INIT_LIST_HEAD(&netbk->net_schedule_list);
- 
--	INIT_LIST_HEAD(&netbk->pending_inuse_head);
--	INIT_LIST_HEAD(&netbk->net_schedule_list);
-+		spin_lock_init(&netbk->net_schedule_list_lock);
- 
--	spin_lock_init(&netbk->net_schedule_list_lock);
-+		atomic_set(&netbk->netfront_count, 0);
-+	}
- 
- 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
- 	if (MODPARM_copy_skb) {
-@@ -1551,25 +1606,28 @@ static int __init netback_init(void)
- 
- 	rc = netif_xenbus_init();
- 	if (rc)
--		goto failed_init1;
-+		goto failed_init;
- 
- #ifdef NETBE_DEBUG_INTERRUPT
- 	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
- 				      0,
- 				      netif_be_dbg,
--				      SA_SHIRQ,
-+				      IRQF_SHARED,
- 				      "net-be-dbg",
- 				      &netif_be_dbg);
- #endif
- 
- 	return 0;
- 
--failed_init1:
--	free_empty_pages_and_pagevec(netbk->mmap_pages, MAX_PENDING_REQS);
--failed_init2:
--	del_timer(&netbk->netbk_tx_pending_timer);
--	del_timer(&netbk->net_timer);
--	vfree(netbk);
-+failed_init:
-+	for (i = 0; i < group; i++) {
-+		struct xen_netbk *netbk = &xen_netbk[i];
-+		free_empty_pages_and_pagevec(netbk->mmap_pages,
-+				MAX_PENDING_REQS);
-+		del_timer(&netbk->netbk_tx_pending_timer);
-+		del_timer(&netbk->net_timer);
-+	}
-+	vfree(xen_netbk);
- 	return rc;
- 
- }
--- 
-1.7.4
-
-
-From e7317b70c0436c109b605bb377939cb2eaff6a6f Mon Sep 17 00:00:00 2001
-From: Dongxiao Xu <dongxiao.xu at intel.com>
-Date: Wed, 19 May 2010 17:08:22 -0700
-Subject: [PATCH 040/203] xen: netback: Use Kernel thread to replace the tasklet.
-
-Kernel thread has more control over QoS, and could improve dom0's
-userspace responseness. This option is defaultly off currently.
-
-Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/common.h  |   13 ++++-
- drivers/xen/netback/netback.c |  109 ++++++++++++++++++++++++++++++++++++----
- 2 files changed, 109 insertions(+), 13 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 847ba58..36cb2b9 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -257,8 +257,17 @@ union page_ext {
- };
- 
- struct xen_netbk {
--	struct tasklet_struct net_tx_tasklet;
--	struct tasklet_struct net_rx_tasklet;
-+	union {
-+		struct {
-+			struct tasklet_struct net_tx_tasklet;
-+			struct tasklet_struct net_rx_tasklet;
-+		} tasklet;
-+
-+		struct {
-+			wait_queue_head_t netbk_action_wq;
-+			struct task_struct *task;
-+		} kthread;
-+	};
- 
- 	struct sk_buff_head rx_queue;
- 	struct sk_buff_head tx_queue;
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index feefb14..547dcaa 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -38,6 +38,7 @@
- 
- #include <linux/tcp.h>
- #include <linux/udp.h>
-+#include <linux/kthread.h>
- 
- #include <xen/balloon.h>
- #include <xen/events.h>
-@@ -128,12 +129,31 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
- 
- int netbk_copy_skb_mode;
- 
-+static int MODPARM_netback_kthread;
-+module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
-+MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
++	if (vif->can_sg || vif->gso || vif->gso_prefix)
++		max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
 +
-+/*
-+ * Netback bottom half handler.
-+ * dir indicates the data direction.
-+ * rx: 1, tx: 0.
-+ */
-+static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
-+{
-+	if (MODPARM_netback_kthread)
-+		wake_up(&netbk->kthread.netbk_action_wq);
-+	else if (dir)
-+		tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
-+	else
-+		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
++	return max;
 +}
 +
- static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
- {
- 	smp_mb();
- 	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
- 	    !list_empty(&netbk->net_schedule_list))
--		tasklet_schedule(&netbk->net_tx_tasklet);
-+		xen_netbk_bh_handler(netbk, 0);
- }
- 
- static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
-@@ -289,7 +309,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 		}
- 	}
- 	skb_queue_tail(&netbk->rx_queue, skb);
--	tasklet_schedule(&netbk->net_rx_tasklet);
-+
-+	xen_netbk_bh_handler(netbk, 1);
- 
- 	return 0;
- 
-@@ -593,19 +614,19 @@ static void net_rx_action(unsigned long data)
- 	/* More work to do? */
- 	if (!skb_queue_empty(&netbk->rx_queue) &&
- 			!timer_pending(&netbk->net_timer))
--		tasklet_schedule(&netbk->net_rx_tasklet);
-+		xen_netbk_bh_handler(netbk, 1);
- }
- 
- static void net_alarm(unsigned long data)
- {
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
--	tasklet_schedule(&netbk->net_rx_tasklet);
-+	xen_netbk_bh_handler(netbk, 1);
- }
- 
- static void netbk_tx_pending_timeout(unsigned long data)
- {
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
--	tasklet_schedule(&netbk->net_tx_tasklet);
-+	xen_netbk_bh_handler(netbk, 0);
- }
- 
- struct net_device_stats *netif_be_get_stats(struct net_device *dev)
-@@ -1348,7 +1369,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 			continue;
- 		}
- 
--		netif_rx(skb);
-+		netif_rx_ni(skb);
- 		netif->dev->last_rx = jiffies;
- 	}
- 
-@@ -1399,7 +1420,7 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
- 	netbk->dealloc_prod++;
- 	spin_unlock_irqrestore(&_lock, flags);
- 
--	tasklet_schedule(&netbk->net_tx_tasklet);
-+	xen_netbk_bh_handler(netbk, 0);
- }
- 
- static void netif_page_release(struct page *page, unsigned int order)
-@@ -1523,6 +1544,46 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
- }
- #endif
- 
-+static inline int rx_work_todo(struct xen_netbk *netbk)
++int xen_netbk_rx_ring_full(struct xenvif *vif)
 +{
-+	return !skb_queue_empty(&netbk->rx_queue);
++	RING_IDX peek   = vif->rx_req_cons_peek;
++	RING_IDX needed = max_required_rx_slots(vif);
++
++	return ((vif->rx.sring->req_prod - peek) < needed) ||
++	       ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);
 +}
 +
-+static inline int tx_work_todo(struct xen_netbk *netbk)
++int xen_netbk_must_stop_queue(struct xenvif *vif)
 +{
-+	if (netbk->dealloc_cons != netbk->dealloc_prod)
-+		return 1;
++	if (!xen_netbk_rx_ring_full(vif))
++		return 0;
 +
-+	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-+			!list_empty(&netbk->net_schedule_list))
-+		return 1;
++	vif->rx.sring->req_event = vif->rx_req_cons_peek +
++		max_required_rx_slots(vif);
++	mb(); /* request notification /then/ check the queue */
 +
-+	return 0;
++	return xen_netbk_rx_ring_full(vif);
 +}
 +
-+static int netbk_action_thread(void *data)
++/*
++ * Returns true if we should start a new receive buffer instead of
++ * adding 'size' bytes to a buffer which currently contains 'offset'
++ * bytes.
++ */
++static bool start_new_rx_buffer(int offset, unsigned long size, int head)
 +{
-+	struct xen_netbk *netbk = (struct xen_netbk *)data;
-+	while (!kthread_should_stop()) {
-+		wait_event_interruptible(netbk->kthread.netbk_action_wq,
-+				rx_work_todo(netbk)
-+				|| tx_work_todo(netbk)
-+				|| kthread_should_stop());
-+		cond_resched();
-+
-+		if (kthread_should_stop())
-+			break;
-+
-+		if (rx_work_todo(netbk))
-+			net_rx_action((unsigned long)netbk);
++	/* simple case: we have completely filled the current buffer. */
++	if (offset == MAX_BUFFER_OFFSET)
++		return true;
 +
-+		if (tx_work_todo(netbk))
-+			net_tx_action((unsigned long)netbk);
-+	}
++	/*
++	 * complex case: start a fresh buffer if the current frag
++	 * would overflow the current buffer but only if:
++	 *     (i)   this frag would fit completely in the next buffer
++	 * and (ii)  there is already some data in the current buffer
++	 * and (iii) this is not the head buffer.
++	 *
++	 * Where:
++	 * - (i) stops us splitting a frag into two copies
++	 *   unless the frag is too large for a single buffer.
++	 * - (ii) stops us from leaving a buffer pointlessly empty.
++	 * - (iii) stops us leaving the first buffer
++	 *   empty. Strictly speaking this is already covered
++	 *   by (ii) but is explicitly checked because
++	 *   netfront relies on the first buffer being
++	 *   non-empty and can crash otherwise.
++	 *
++	 * This means we will effectively linearise small
++	 * frags but do not needlessly split large buffers
++	 * into multiple copies tend to give large frags their
++	 * own buffers as before.
++	 */
++	if ((offset + size > MAX_BUFFER_OFFSET) &&
++	    (size <= MAX_BUFFER_OFFSET) && offset && !head)
++		return true;
 +
-+	return 0;
++	return false;
 +}
 +
- static int __init netback_init(void)
- {
- 	int i;
-@@ -1580,10 +1641,34 @@ static int __init netback_init(void)
- 		for (i = 0; i < MAX_PENDING_REQS; i++)
- 			netbk->pending_ring[i] = i;
- 
--		tasklet_init(&netbk->net_tx_tasklet, net_tx_action,
--				(unsigned long)netbk);
--		tasklet_init(&netbk->net_rx_tasklet, net_rx_action,
--				(unsigned long)netbk);
-+		if (MODPARM_netback_kthread) {
-+			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
-+			netbk->kthread.task =
-+				kthread_create(netbk_action_thread,
-+					       (void *)netbk,
-+					       "netback/%u", group);
-+
-+			if (!IS_ERR(netbk->kthread.task)) {
-+				kthread_bind(netbk->kthread.task, group);
-+				wake_up_process(netbk->kthread.task);
-+			} else {
-+				printk(KERN_ALERT
-+					"kthread_run() fails at netback\n");
-+				free_empty_pages_and_pagevec(netbk->mmap_pages,
-+						MAX_PENDING_REQS);
-+				del_timer(&netbk->netbk_tx_pending_timer);
-+				del_timer(&netbk->net_timer);
-+				rc = PTR_ERR(netbk->kthread.task);
-+				goto failed_init;
-+			}
-+		} else {
-+			tasklet_init(&netbk->tasklet.net_tx_tasklet,
-+				     net_tx_action,
-+				     (unsigned long)netbk);
-+			tasklet_init(&netbk->tasklet.net_rx_tasklet,
-+				     net_rx_action,
-+				     (unsigned long)netbk);
-+		}
- 
- 		INIT_LIST_HEAD(&netbk->pending_inuse_head);
- 		INIT_LIST_HEAD(&netbk->net_schedule_list);
-@@ -1626,6 +1711,8 @@ failed_init:
- 				MAX_PENDING_REQS);
- 		del_timer(&netbk->netbk_tx_pending_timer);
- 		del_timer(&netbk->net_timer);
-+		if (MODPARM_netback_kthread)
-+			kthread_stop(netbk->kthread.task);
- 	}
- 	vfree(xen_netbk);
- 	return rc;
--- 
-1.7.4
-
-
-From 6359d5939c5d1f59b794cd02e8cdbd36b9f3434d Mon Sep 17 00:00:00 2001
-From: James Harper <james.harper at bendigoit.com.au>
-Date: Fri, 28 May 2010 23:12:56 -0700
-Subject: [PATCH 041/203] xen: netback: avoid null-pointer access in netback_uevent
-
-Check if drvdata has been set up yet and return if it hasn't.
-
-Signed-off-by: James Harper <james.harper at bendigoit.com.au>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/xenbus.c |    9 +++++++--
- 1 files changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index fcd3c34..e30b0c7 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -154,12 +154,17 @@ fail:
-  */
- static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
- {
--	struct backend_info *be = dev_get_drvdata(&xdev->dev);
--	struct xen_netif *netif = be->netif;
-+	struct backend_info *be;
-+	struct xen_netif *netif;
- 	char *val;
- 
- 	DPRINTK("netback_uevent");
- 
-+	be = dev_get_drvdata(&xdev->dev);
-+	if (!be)
-+		return 0;
-+	netif = be->netif;
-+
- 	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
- 	if (IS_ERR(val)) {
- 		int err = PTR_ERR(val);
--- 
-1.7.4
-
-
-From 4a818daa044d9d499412e8f6e2e3086c0521e7b3 Mon Sep 17 00:00:00 2001
-From: Keir Fraser <keir.fraser at citrix.com>
-Date: Fri, 11 Jun 2010 11:48:30 +0100
-Subject: [PATCH 042/203] xen: netback: Fixes for delayed copy of tx network packets.
-
- - Should call net_tx_action_dealloc() even when dealloc ring is
-   empty, as there may in any case be work to do on the
-   pending_inuse list.
- - Should not exit directly from the middle of the tx_action tasklet,
-   as the tx_pending_timer should always be checked and updated at the
-   end of the tasklet.
-
-Signed-off-by: Keir Fraser <keir.fraser at citrix.com>
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-[picked from linux-2.6.18-xen.hg 959:1a97bd686258, ported across a43e2175 "xen/netback: move code around"]
----
- drivers/xen/netback/netback.c |   25 ++++++++++++-------------
- 1 files changed, 12 insertions(+), 13 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 547dcaa..58dfbd2 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1372,16 +1372,6 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 		netif_rx_ni(skb);
- 		netif->dev->last_rx = jiffies;
- 	}
--
--	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
--	    !list_empty(&netbk->pending_inuse_head)) {
--		struct netbk_tx_pending_inuse *oldest;
--
--		oldest = list_entry(netbk->pending_inuse_head.next,
--				    struct netbk_tx_pending_inuse, list);
--		mod_timer(&netbk->netbk_tx_pending_timer,
--				oldest->alloc_time + HZ);
--	}
- }
- 
- /* Called after netfront has transmitted */
-@@ -1391,19 +1381,28 @@ static void net_tx_action(unsigned long data)
- 	unsigned nr_mops;
- 	int ret;
- 
--	if (netbk->dealloc_cons != netbk->dealloc_prod)
--		net_tx_action_dealloc(netbk);
-+	net_tx_action_dealloc(netbk);
- 
- 	nr_mops = net_tx_build_mops(netbk);
- 
- 	if (nr_mops == 0)
--		return;
-+		goto out;
- 
- 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
- 					netbk->tx_map_ops, nr_mops);
- 	BUG_ON(ret);
- 
- 	net_tx_submit(netbk);
-+out:
-+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-+	    !list_empty(&netbk->pending_inuse_head)) {
-+		struct netbk_tx_pending_inuse *oldest;
-+
-+		oldest = list_entry(netbk->pending_inuse_head.next,
-+				    struct netbk_tx_pending_inuse, list);
-+		mod_timer(&netbk->netbk_tx_pending_timer,
-+				oldest->alloc_time + HZ);
-+	}
- }
- 
- static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
--- 
-1.7.4
-
-
-From 48fa1af97e6c9d304c04f70a75de1340e7d79e18 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 11 Jun 2010 10:51:01 +0100
-Subject: [PATCH 043/203] xen: netback: handle NET_SKBUFF_DATA_USES_OFFSET correctly
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Cc: Jan Beulich <JBeulich at novell.com>
----
- drivers/xen/netback/netback.c |    4 ++++
- 1 files changed, 4 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 58dfbd2..aa094af 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -218,7 +218,11 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
- 		len -= copy;
- 	}
- 
-+#ifdef NET_SKBUFF_DATA_USES_OFFSET
-+	offset = 0;
-+#else
- 	offset = nskb->data - skb->data;
-+#endif
- 
- 	nskb->transport_header = skb->transport_header + offset;
- 	nskb->network_header = skb->network_header + offset;
--- 
-1.7.4
-
-
-From 7d3e6e42251f179e407fa5236f613e5500b3a3ea Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 11 Jun 2010 10:51:01 +0100
-Subject: [PATCH 044/203] xen: netback: drop frag member from struct netbk_rx_meta
-
-It has been unused since c3219dc "xen/netback: completely drop flip
-support", as has netbk_free_pages().
-
-(Although it now has only a single member struct netbk_rx_meta will
-gain other members in a subsequent patch so there is no point
-reworking to get rid of the struct)
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/common.h  |    1 -
- drivers/xen/netback/netback.c |    8 --------
- 2 files changed, 0 insertions(+), 9 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 36cb2b9..be4fe91 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -230,7 +230,6 @@ struct pending_tx_info {
- typedef unsigned int pending_ring_idx_t;
- 
- struct netbk_rx_meta {
--	skb_frag_t frag;
- 	int id;
- };
- 
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index aa094af..9f7e489 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -411,14 +411,6 @@ static void netbk_gop_skb(struct sk_buff *skb,
- 	netif->rx.req_cons += nr_frags + extra;
- }
- 
--static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
--{
--	int i;
--
--	for (i = 0; i < nr_frags; i++)
--		put_page(meta[i].frag.page);
--}
--
- /* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
-    used to set up the operations on the top of
-    netrx_pending_operations, which have since been done.  Check that
--- 
-1.7.4
-
-
-From 1ced27150d0092c40ebbbbb3896192003d433c0e Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 11 Jun 2010 10:51:01 +0100
-Subject: [PATCH 045/203] xen: netback: linearise SKBs as we copy them into guest memory on guest-RX.
-
-There's no point in sending lots of little packets to a copying
-receiver if we can instead arrange to copy them all into a single RX
-buffer.  We need to copy anyway, so there's no overhead here, and this
-is a little bit easier on the receiving domain's network stack.
-
-Based on a patch by Steven Smith. Fixed to not skip unnecessarily to
-the next buffer which could leave the head fragment of a received
-frame empty if the headlen of an SKB was large (which would crash
-netfront). Instead we only try and pack "small enough" fragments
-together but do not try to coalesce large or whole page fragments.
-
-In previous iterations of this patch we also tried to only include
-2048 bytes per frag because very old netfronts stored other
-information in the second half of the page. It has been determined
-that only frontends which support scatter-gather are going to come
-down this path and that any guest which supports scatter-gather is
-also new enough to allow us to use the full page size for each
-fragment (since this limitation which fixed as part of the SG
-implementation) so we do not need this restriction.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Cc: Steven Smith <Steven.Smith at eu.citrix.com>
----
- drivers/xen/netback/common.h  |   15 ++-
- drivers/xen/netback/netback.c |  282 ++++++++++++++++++++++++++++++-----------
- 2 files changed, 218 insertions(+), 79 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index be4fe91..9c0c048 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -82,7 +82,9 @@ struct xen_netif {
- 	/* Internal feature information. */
- 	u8 can_queue:1;	/* can queue packets for receiver? */
- 
--	/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
-+	/* Allow netif_be_start_xmit() to peek ahead in the rx request
-+	 * ring.  This is a prediction of what rx_req_cons will be once
-+	 * all queued skbs are put on the ring. */
- 	RING_IDX rx_req_cons_peek;
- 
- 	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
-@@ -231,6 +233,8 @@ typedef unsigned int pending_ring_idx_t;
- 
- struct netbk_rx_meta {
- 	int id;
-+	int size;
-+	int gso_size;
- };
- 
- struct netbk_tx_pending_inuse {
-@@ -240,6 +244,8 @@ struct netbk_tx_pending_inuse {
- 
- #define MAX_PENDING_REQS 256
- 
-+#define MAX_BUFFER_OFFSET PAGE_SIZE
-+
- /* extra field used in struct page */
- union page_ext {
- 	struct {
-@@ -301,7 +307,12 @@ struct xen_netbk {
- 	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
- 	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
- 	struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
--	struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
-+	/*
-+	 * Each head or fragment can be up to 4096 bytes. Given
-+	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
-+	 * head/fragment uses 2 copy operation.
-+	 */
-+	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
- 	unsigned char rx_notify[NR_IRQS];
- 	u16 notify_list[NET_RX_RING_SIZE];
- 	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 9f7e489..d53d88e 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -259,6 +259,48 @@ static void tx_queue_callback(unsigned long data)
- 		netif_wake_queue(netif->dev);
- }
- 
-+/* Figure out how many ring slots we're going to need to send @skb to
-+   the guest. */
-+static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
++/*
++ * Figure out how many ring slots we're going to need to send @skb to
++ * the guest. This function is essentially a dry run of
++ * netbk_gop_frag_copy.
++ */
++unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
 +{
-+	unsigned count;
-+	unsigned copy_off;
-+	unsigned i;
-+
-+	copy_off = 0;
-+	count = 1;
++	unsigned int count;
++	int i, copy_off;
 +
-+	BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
++	count = DIV_ROUND_UP(
++			offset_in_page(skb->data)+skb_headlen(skb), PAGE_SIZE);
 +
-+	copy_off = skb_headlen(skb);
++	copy_off = skb_headlen(skb) % PAGE_SIZE;
 +
 +	if (skb_shinfo(skb)->gso_size)
 +		count++;
@@ -8495,9 +3096,7 @@ index 9f7e489..d53d88e 100644
 +		while (size > 0) {
 +			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
 +
-+			/* These checks are the same as in netbk_gop_frag_copy */
-+			if (copy_off == MAX_BUFFER_OFFSET
-+			    || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) {
++			if (start_new_rx_buffer(copy_off, size, 0)) {
 +				count++;
 +				copy_off = 0;
 +			}
@@ -8513,100 +3112,69 @@ index 9f7e489..d53d88e 100644
 +	return count;
 +}
 +
- int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- {
- 	struct xen_netif *netif = netdev_priv(dev);
-@@ -290,8 +332,9 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 		skb = nskb;
- 	}
- 
--	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
--				   !!skb_shinfo(skb)->gso_size;
-+	/* Reserve ring slots for the worst-case number of
-+	 * fragments. */
-+	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
- 	netif_get(netif);
- 
- 	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
-@@ -335,96 +378,165 @@ struct netrx_pending_operations {
- 	struct gnttab_copy *copy;
- 	struct multicall_entry *mcl;
- 	struct netbk_rx_meta *meta;
++struct netrx_pending_operations {
++	unsigned copy_prod, copy_cons;
++	unsigned meta_prod, meta_cons;
++	struct gnttab_copy *copy;
++	struct netbk_rx_meta *meta;
 +	int copy_off;
 +	grant_ref_t copy_gref;
- };
- 
- /* Set up the grant operations for this fragment.  If it's a flipping
-    interface, we also set up the unmap request from here. */
--static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
--			  int i, struct netrx_pending_operations *npo,
--			  struct page *page, unsigned long size,
--			  unsigned long offset)
++};
 +
-+static void netbk_gop_frag_copy(struct xen_netif *netif,
-+				struct netrx_pending_operations *npo,
-+				struct page *page, unsigned long size,
-+				unsigned long offset, int head)
- {
- 	struct gnttab_copy *copy_gop;
--	struct xen_netif_rx_request *req;
--	unsigned long old_mfn;
++static struct netbk_rx_meta *get_next_rx_buffer(struct xenvif *vif,
++						struct netrx_pending_operations *npo)
++{
 +	struct netbk_rx_meta *meta;
- 	int group = netif_page_group(page);
- 	int idx = netif_page_index(page);
-+	unsigned long bytes;
++	struct xen_netif_rx_request *req;
 +
-+	/* Data must not cross a page boundary. */
-+	BUG_ON(size + offset > PAGE_SIZE);
- 
--	old_mfn = virt_to_mfn(page_address(page));
-+	meta = npo->meta + npo->meta_prod - 1;
- 
--	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
-+	while (size > 0) {
-+		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
- 
--	copy_gop = npo->copy + npo->copy_prod++;
--	copy_gop->flags = GNTCOPY_dest_gref;
--	if (PageForeign(page)) {
-+		/*
-+		 * Move to a new receive buffer if:
-+		 *
-+		 * simple case: we have completely filled the current buffer.
-+		 *
-+		 * complex case: the current frag would overflow
-+		 * the current buffer but only if:
-+		 *     (i)   this frag would fit completely in the next buffer
-+		 * and (ii)  there is already some data in the current buffer
-+		 * and (iii) this is not the head buffer.
-+		 *
-+		 * Where:
-+		 * - (i) stops us splitting a frag into two copies
-+		 *   unless the frag is too large for a single buffer.
-+		 * - (ii) stops us from leaving a buffer pointlessly empty.
-+		 * - (iii) stops us leaving the first buffer
-+		 *   empty. Strictly speaking this is already covered
-+		 *   by (ii) but is explicitly checked because
-+		 *   netfront relies on the first buffer being
-+		 *   non-empty and can crash otherwise.
-+		 *
-+		 * This means we will effectively linearise small
-+		 * frags but do not needlessly split large buffers
-+		 * into multiple copies tend to give large frags their
-+		 * own buffers as before.
-+		 */
-+		if (npo->copy_off == MAX_BUFFER_OFFSET
-+		    || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) {
-+			struct xen_netif_rx_request *req;
-+
-+			BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
-+			/* Overflowed this request, go to the next one */
-+			req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-+			meta = npo->meta + npo->meta_prod++;
-+			meta->size = 0;
-+			meta->id = req->id;
-+			npo->copy_off = 0;
-+			npo->copy_gref = req->gref;
++	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
++
++	meta = npo->meta + npo->meta_prod++;
++	meta->gso_size = 0;
++	meta->size = 0;
++	meta->id = req->id;
++
++	npo->copy_off = 0;
++	npo->copy_gref = req->gref;
++
++	return meta;
++}
++
++/*
++ * Set up the grant operations for this fragment. If it's a flipping
++ * interface, we also set up the unmap request from here.
++ */
++static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
++				struct netrx_pending_operations *npo,
++				struct page *page, unsigned long size,
++				unsigned long offset, int *head)
++{
++	struct gnttab_copy *copy_gop;
++	struct netbk_rx_meta *meta;
++	/*
++	 * These variables a used iff get_page_ext returns true,
++	 * in which case they are guaranteed to be initialized.
++	 */
++	unsigned int uninitialized_var(group), uninitialized_var(idx);
++	int foreign = get_page_ext(page, &group, &idx);
++	unsigned long bytes;
++
++	/* Data must not cross a page boundary. */
++	BUG_ON(size + offset > PAGE_SIZE);
++
++	meta = npo->meta + npo->meta_prod - 1;
++
++	while (size > 0) {
++		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
++
++		if (start_new_rx_buffer(npo->copy_off, size, *head)) {
++			/*
++			 * Netfront requires there to be some data in the head
++			 * buffer.
++			 */
++			BUG_ON(*head);
++
++			meta = get_next_rx_buffer(vif, npo);
 +		}
 +
 +		bytes = size;
@@ -8615,30 +3183,23 @@ index 9f7e489..d53d88e 100644
 +
 +		copy_gop = npo->copy + npo->copy_prod++;
 +		copy_gop->flags = GNTCOPY_dest_gref;
-+		if (PageForeign(page)) {
- 		struct xen_netbk *netbk = &xen_netbk[group];
- 		struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
- 		copy_gop->source.domid = src_pend->netif->domid;
- 		copy_gop->source.u.ref = src_pend->req.gref;
--		copy_gop->flags |= GNTCOPY_source_gref;
--	} else {
--		copy_gop->source.domid = DOMID_SELF;
--		copy_gop->source.u.gmfn = old_mfn;
--	}
--	copy_gop->source.offset = offset;
--	copy_gop->dest.domid = netif->domid;
--	copy_gop->dest.offset = 0;
--	copy_gop->dest.u.ref = req->gref;
--	copy_gop->len = size;
++		if (foreign) {
++			struct xen_netbk *netbk = &xen_netbk[group];
++			struct pending_tx_info *src_pend;
++
++			src_pend = &netbk->pending_tx_info[idx];
++
++			copy_gop->source.domid = src_pend->vif->domid;
++			copy_gop->source.u.ref = src_pend->req.gref;
 +			copy_gop->flags |= GNTCOPY_source_gref;
 +		} else {
++			void *vaddr = page_address(page);
 +			copy_gop->source.domid = DOMID_SELF;
-+			copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
++			copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
 +		}
 +		copy_gop->source.offset = offset;
-+		copy_gop->dest.domid = netif->domid;
- 
--	return req->id;
++		copy_gop->dest.domid = vif->domid;
++
 +		copy_gop->dest.offset = npo->copy_off;
 +		copy_gop->dest.u.ref = npo->copy_gref;
 +		copy_gop->len = bytes;
@@ -8648,564 +3209,204 @@ index 9f7e489..d53d88e 100644
 +
 +		offset += bytes;
 +		size -= bytes;
-+		head = 0; /* Must be something in this buffer now */
++
++		/* Leave a gap for the GSO descriptor. */
++		if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
++			vif->rx.req_cons++;
++
++		*head = 0; /* There must be something in this buffer now. */
++
 +	}
- }
- 
--static void netbk_gop_skb(struct sk_buff *skb,
--			  struct netrx_pending_operations *npo)
-+/* Prepare an SKB to be transmitted to the frontend.  This is
-+   responsible for allocating grant operations, meta structures, etc.
-+   It returns the number of meta structures consumed.  The number of
-+   ring slots used is always equal to the number of meta slots used
-+   plus the number of GSO descriptors used.  Currently, we use either
-+   zero GSO descriptors (for non-GSO packets) or one descriptor (for
-+   frontend-side LRO). */
++}
++
++/*
++ * Prepare an SKB to be transmitted to the frontend.
++ *
++ * This function is responsible for allocating grant operations, meta
++ * structures, etc.
++ *
++ * It returns the number of meta structures consumed. The number of
++ * ring slots used is always equal to the number of meta slots used
++ * plus the number of GSO descriptors used. Currently, we use either
++ * zero GSO descriptors (for non-GSO packets) or one descriptor (for
++ * frontend-side LRO).
++ */
 +static int netbk_gop_skb(struct sk_buff *skb,
 +			 struct netrx_pending_operations *npo)
- {
- 	struct xen_netif *netif = netdev_priv(skb->dev);
- 	int nr_frags = skb_shinfo(skb)->nr_frags;
- 	int i;
--	int extra;
--	struct netbk_rx_meta *head_meta, *meta;
++{
++	struct xenvif *vif = netdev_priv(skb->dev);
++	int nr_frags = skb_shinfo(skb)->nr_frags;
++	int i;
 +	struct xen_netif_rx_request *req;
 +	struct netbk_rx_meta *meta;
++	unsigned char *data;
++	int head = 1;
 +	int old_meta_prod;
 +
 +	old_meta_prod = npo->meta_prod;
- 
--	head_meta = npo->meta + npo->meta_prod++;
--	head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
--	head_meta->frag.size = skb_shinfo(skb)->gso_size;
--	extra = !!head_meta->frag.size + 1;
-+	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++
++	/* Set up a GSO prefix descriptor, if necessary */
++	if (skb_shinfo(skb)->gso_size && vif->gso_prefix) {
++		req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
++		meta = npo->meta + npo->meta_prod++;
++		meta->gso_size = skb_shinfo(skb)->gso_size;
++		meta->size = 0;
++		meta->id = req->id;
++	}
++
++	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
 +	meta = npo->meta + npo->meta_prod++;
-+	meta->gso_size = skb_shinfo(skb)->gso_size;
++
++	if (!vif->gso_prefix)
++		meta->gso_size = skb_shinfo(skb)->gso_size;
++	else
++		meta->gso_size = 0;
++
 +	meta->size = 0;
 +	meta->id = req->id;
 +	npo->copy_off = 0;
 +	npo->copy_gref = req->gref;
 +
-+	netbk_gop_frag_copy(netif,
-+			    npo, virt_to_page(skb->data),
-+			    skb_headlen(skb),
-+			    offset_in_page(skb->data), 1);
++	data = skb->data;
++	while (data < skb_tail_pointer(skb)) {
++		unsigned int offset = offset_in_page(data);
++		unsigned int len = PAGE_SIZE - offset;
++
++		if (data + len > skb_tail_pointer(skb))
++			len = skb_tail_pointer(skb) - data;
 +
-+	/* Leave a gap for the GSO descriptor. */
-+	if (skb_shinfo(skb)->gso_size)
-+		netif->rx.req_cons++;
- 
- 	for (i = 0; i < nr_frags; i++) {
--		meta = npo->meta + npo->meta_prod++;
--		meta->frag = skb_shinfo(skb)->frags[i];
--		meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
--					  meta->frag.page,
--					  meta->frag.size,
--					  meta->frag.page_offset);
-+		netbk_gop_frag_copy(netif, npo,
++		netbk_gop_frag_copy(vif, skb, npo,
++				    virt_to_page(data), len, offset, &head);
++		data += len;
++	}
++
++	for (i = 0; i < nr_frags; i++) {
++		netbk_gop_frag_copy(vif, skb, npo,
 +				    skb_shinfo(skb)->frags[i].page,
 +				    skb_shinfo(skb)->frags[i].size,
 +				    skb_shinfo(skb)->frags[i].page_offset,
-+				    0);
- 	}
- 
--	/*
--	 * This must occur at the end to ensure that we don't trash skb_shinfo
--	 * until we're done. We know that the head doesn't cross a page
--	 * boundary because such packets get copied in netif_be_start_xmit.
--	 */
--	head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
--				       virt_to_page(skb->data),
--				       skb_headlen(skb),
--				       offset_in_page(skb->data));
--
--	netif->rx.req_cons += nr_frags + extra;
++				    &head);
++	}
++
 +	return npo->meta_prod - old_meta_prod;
- }
- 
- /* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
-    used to set up the operations on the top of
-    netrx_pending_operations, which have since been done.  Check that
-    they didn't give any errors and advance over them. */
--static int netbk_check_gop(int nr_frags, domid_t domid,
-+static int netbk_check_gop(int nr_meta_slots, domid_t domid,
- 			   struct netrx_pending_operations *npo)
- {
- 	struct gnttab_copy     *copy_op;
- 	int status = NETIF_RSP_OKAY;
- 	int i;
- 
--	for (i = 0; i <= nr_frags; i++) {
--			copy_op = npo->copy + npo->copy_cons++;
--			if (copy_op->status != GNTST_okay) {
++}
++
++/*
++ * This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
++ * used to set up the operations on the top of
++ * netrx_pending_operations, which have since been done.  Check that
++ * they didn't give any errors and advance over them.
++ */
++static int netbk_check_gop(struct xenvif *vif, int nr_meta_slots,
++			   struct netrx_pending_operations *npo)
++{
++	struct gnttab_copy     *copy_op;
++	int status = XEN_NETIF_RSP_OKAY;
++	int i;
++
 +	for (i = 0; i < nr_meta_slots; i++) {
 +		copy_op = npo->copy + npo->copy_cons++;
 +		if (copy_op->status != GNTST_okay) {
- 				DPRINTK("Bad status %d from copy to DOM%d.\n",
- 					copy_op->status, domid);
- 				status = NETIF_RSP_ERROR;
-@@ -435,27 +547,35 @@ static int netbk_check_gop(int nr_frags, domid_t domid,
- }
- 
- static void netbk_add_frag_responses(struct xen_netif *netif, int status,
--				     struct netbk_rx_meta *meta, int nr_frags)
++			netdev_dbg(vif->dev,
++				   "Bad status %d from copy to DOM%d.\n",
++				   copy_op->status, vif->domid);
++			status = XEN_NETIF_RSP_ERROR;
++		}
++	}
++
++	return status;
++}
++
++static void netbk_add_frag_responses(struct xenvif *vif, int status,
 +				     struct netbk_rx_meta *meta,
 +				     int nr_meta_slots)
- {
- 	int i;
- 	unsigned long offset;
- 
--	for (i = 0; i < nr_frags; i++) {
--		int id = meta[i].id;
--		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
--		
++{
++	int i;
++	unsigned long offset;
++
++	/* No fragments used */
++	if (nr_meta_slots <= 1)
++		return;
++
++	nr_meta_slots--;
++
 +	for (i = 0; i < nr_meta_slots; i++) {
 +		int flags;
 +		if (i == nr_meta_slots - 1)
 +			flags = 0;
 +		else
-+			flags = NETRXF_more_data;
++			flags = XEN_NETRXF_more_data;
 +
- 		offset = 0;
--		make_rx_response(netif, id, status, offset,
--				 meta[i].frag.size, flags);
-+		make_rx_response(netif, meta[i].id, status, offset,
++		offset = 0;
++		make_rx_response(vif, meta[i].id, status, offset,
 +				 meta[i].size, flags);
- 	}
- }
- 
++	}
++}
++
 +struct skb_cb_overlay {
 +	int meta_slots_used;
 +};
 +
- static void net_rx_action(unsigned long data)
- {
- 	struct xen_netif *netif = NULL;
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	s8 status;
--	u16 id, irq, flags;
++static void xen_netbk_rx_action(struct xen_netbk *netbk)
++{
++	struct xenvif *vif = NULL, *tmp;
++	s8 status;
 +	u16 irq, flags;
- 	struct xen_netif_rx_response *resp;
- 	struct multicall_entry *mcl;
- 	struct sk_buff_head rxq;
-@@ -465,6 +585,7 @@ static void net_rx_action(unsigned long data)
- 	int nr_frags;
- 	int count;
- 	unsigned long offset;
++	struct xen_netif_rx_response *resp;
++	struct sk_buff_head rxq;
++	struct sk_buff *skb;
++	LIST_HEAD(notify);
++	int ret;
++	int nr_frags;
++	int count;
++	unsigned long offset;
 +	struct skb_cb_overlay *sco;
- 
- 	struct netrx_pending_operations npo = {
- 		.mmu   = netbk->rx_mmu,
-@@ -479,10 +600,11 @@ static void net_rx_action(unsigned long data)
- 	count = 0;
- 
- 	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
-+		netif = netdev_priv(skb->dev);
- 		nr_frags = skb_shinfo(skb)->nr_frags;
--		*(int *)skb->cb = nr_frags;
- 
--		netbk_gop_skb(skb, &npo);
++
++	struct netrx_pending_operations npo = {
++		.copy  = netbk->grant_copy_op,
++		.meta  = netbk->meta,
++	};
++
++	skb_queue_head_init(&rxq);
++
++	count = 0;
++
++	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
++		vif = netdev_priv(skb->dev);
++		nr_frags = skb_shinfo(skb)->nr_frags;
++
 +		sco = (struct skb_cb_overlay *)skb->cb;
 +		sco->meta_slots_used = netbk_gop_skb(skb, &npo);
- 
- 		count += nr_frags + 1;
- 
-@@ -541,18 +663,20 @@ static void net_rx_action(unsigned long data)
- 	BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
- 
- 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
--		nr_frags = *(int *)skb->cb;
++
++		count += nr_frags + 1;
++
++		__skb_queue_tail(&rxq, skb);
++
++		/* Filled the batch queue? */
++		if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
++			break;
++	}
++
++	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
++
++	if (!npo.copy_prod)
++		return;
++
++	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
++	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
++					npo.copy_prod);
++	BUG_ON(ret != 0);
++
++	while ((skb = __skb_dequeue(&rxq)) != NULL) {
 +		sco = (struct skb_cb_overlay *)skb->cb;
- 
- 		netif = netdev_priv(skb->dev);
- 
- 		netif->stats.tx_bytes += skb->len;
- 		netif->stats.tx_packets++;
- 
--		status = netbk_check_gop(nr_frags, netif->domid, &npo);
--
--		id = netbk->meta[npo.meta_cons].id;
--		flags = nr_frags ? NETRXF_more_data : 0;
-+		status = netbk_check_gop(sco->meta_slots_used,
-+					 netif->domid, &npo);
- 
-+		if (sco->meta_slots_used == 1)
-+			flags = 0;
-+		else
-+			flags = NETRXF_more_data;
- 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
- 			flags |= NETRXF_csum_blank | NETRXF_data_validated;
- 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-@@ -560,10 +684,12 @@ static void net_rx_action(unsigned long data)
- 			flags |= NETRXF_data_validated;
- 
- 		offset = 0;
--		resp = make_rx_response(netif, id, status, offset,
--					skb_headlen(skb), flags);
-+		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
-+					status, offset,
-+					netbk->meta[npo.meta_cons].size,
-+					flags);
- 
--		if (netbk->meta[npo.meta_cons].frag.size) {
-+		if (netbk->meta[npo.meta_cons].gso_size) {
- 			struct xen_netif_extra_info *gso =
- 				(struct xen_netif_extra_info *)
- 				RING_GET_RESPONSE(&netif->rx,
-@@ -571,7 +697,7 @@ static void net_rx_action(unsigned long data)
- 
- 			resp->flags |= NETRXF_extra_info;
- 
--			gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size;
-+			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
- 			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
- 			gso->u.gso.pad = 0;
- 			gso->u.gso.features = 0;
-@@ -580,9 +706,11 @@ static void net_rx_action(unsigned long data)
- 			gso->flags = 0;
- 		}
- 
--		netbk_add_frag_responses(netif, status,
--				netbk->meta + npo.meta_cons + 1,
--				nr_frags);
-+		if (sco->meta_slots_used > 1) {
-+			netbk_add_frag_responses(netif, status,
-+						 netbk->meta + npo.meta_cons + 1,
-+						 sco->meta_slots_used - 1);
-+		}
- 
- 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
- 		irq = netif->irq;
-@@ -597,8 +725,8 @@ static void net_rx_action(unsigned long data)
- 			netif_wake_queue(netif->dev);
- 
- 		netif_put(netif);
-+		npo.meta_cons += sco->meta_slots_used;
- 		dev_kfree_skb(skb);
--		npo.meta_cons += nr_frags + 1;
- 	}
- 
- 	while (notify_nr != 0) {
--- 
-1.7.4
-
-
-From 96069b28e612232fb739ef48d9c2c5178b19f562 Mon Sep 17 00:00:00 2001
-From: Dongxiao Xu <dongxiao.xu at intel.com>
-Date: Thu, 10 Jun 2010 19:03:15 +0800
-Subject: [PATCH 046/203] xen: netback: Set allocated memory to zero from vmalloc.
-
-This should fix the windows/linux pv driver issue.
-
-Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |    1 +
- 1 files changed, 1 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index d53d88e..c7024d4 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1724,6 +1724,7 @@ static int __init netback_init(void)
- 		printk(KERN_ALERT "%s: out of memory\n", __func__);
- 		return -ENOMEM;
- 	}
-+	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
- 
- 	/* We can increase reservation by this much in net_rx_action(). */
- //	balloon_update_driver_allowance(NET_RX_RING_SIZE);
--- 
-1.7.4
-
-
-From 109a748d1c11b7eeaaacedb08c48bc65640b0bb8 Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Mon, 14 Jun 2010 13:23:33 +0100
-Subject: [PATCH 047/203] xen: netback: minor code formatting fixup
-
-Don't include redundant casts from allocation.
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |    3 +--
- 1 files changed, 1 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index c7024d4..58e920a 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1718,8 +1718,7 @@ static int __init netback_init(void)
- 		return -ENODEV;
- 
- 	xen_netbk_group_nr = num_online_cpus();
--	xen_netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk) *
--					    xen_netbk_group_nr);
-+	xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
- 	if (!xen_netbk) {
- 		printk(KERN_ALERT "%s: out of memory\n", __func__);
- 		return -ENOMEM;
--- 
-1.7.4
-
-
-From 2424b59d68ee6ccdb7e52ab68bdba3a8b742513d Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Wed, 30 Jun 2010 10:12:49 +0100
-Subject: [PATCH 048/203] xen: netback: drop more relics of flipping mode
-
-The mmu_update and gnttab_transfer arrays were only used by flipping
-mode. With those gone the multicall now consists of a single call to
-GNTTABOP_copy so drop the multicall as well and just make the one
-hypercall.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Cc: Jeremy Fitzhardinge <jeremy at goop.org>
-Cc: Paul Durrant <paul.durrant at citrix.com>
----
- drivers/xen/netback/common.h  |    3 --
- drivers/xen/netback/netback.c |   55 +++--------------------------------------
- 2 files changed, 4 insertions(+), 54 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 9c0c048..08e7a0e 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -304,9 +304,6 @@ struct xen_netbk {
- 	u16 pending_ring[MAX_PENDING_REQS];
- 	u16 dealloc_ring[MAX_PENDING_REQS];
- 
--	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
--	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
--	struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
- 	/*
- 	 * Each head or fragment can be up to 4096 bytes. Given
- 	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 58e920a..ca65840 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -368,15 +368,9 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- }
- 
- struct netrx_pending_operations {
--	unsigned trans_prod, trans_cons;
--	unsigned mmu_prod, mmu_mcl;
--	unsigned mcl_prod, mcl_cons;
- 	unsigned copy_prod, copy_cons;
- 	unsigned meta_prod, meta_cons;
--	struct mmu_update *mmu;
--	struct gnttab_transfer *trans;
- 	struct gnttab_copy *copy;
--	struct multicall_entry *mcl;
- 	struct netbk_rx_meta *meta;
- 	int copy_off;
- 	grant_ref_t copy_gref;
-@@ -577,7 +571,6 @@ static void net_rx_action(unsigned long data)
- 	s8 status;
- 	u16 irq, flags;
- 	struct xen_netif_rx_response *resp;
--	struct multicall_entry *mcl;
- 	struct sk_buff_head rxq;
- 	struct sk_buff *skb;
- 	int notify_nr = 0;
-@@ -588,10 +581,7 @@ static void net_rx_action(unsigned long data)
- 	struct skb_cb_overlay *sco;
- 
- 	struct netrx_pending_operations npo = {
--		.mmu   = netbk->rx_mmu,
--		.trans = netbk->grant_trans_op,
- 		.copy  = netbk->grant_copy_op,
--		.mcl   = netbk->rx_mcl,
- 		.meta  = netbk->meta,
- 	};
- 
-@@ -617,50 +607,13 @@ static void net_rx_action(unsigned long data)
- 
- 	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
- 
--	npo.mmu_mcl = npo.mcl_prod;
--	if (npo.mcl_prod) {
--		BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
--		BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu));
--		mcl = npo.mcl + npo.mcl_prod++;
--
--		BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
--		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
--
--		mcl->op = __HYPERVISOR_mmu_update;
--		mcl->args[0] = (unsigned long)netbk->rx_mmu;
--		mcl->args[1] = npo.mmu_prod;
--		mcl->args[2] = 0;
--		mcl->args[3] = DOMID_SELF;
--	}
--
--	if (npo.trans_prod) {
--		BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op));
--		mcl = npo.mcl + npo.mcl_prod++;
--		mcl->op = __HYPERVISOR_grant_table_op;
--		mcl->args[0] = GNTTABOP_transfer;
--		mcl->args[1] = (unsigned long)netbk->grant_trans_op;
--		mcl->args[2] = npo.trans_prod;
--	}
--
--	if (npo.copy_prod) {
--		BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
--		mcl = npo.mcl + npo.mcl_prod++;
--		mcl->op = __HYPERVISOR_grant_table_op;
--		mcl->args[0] = GNTTABOP_copy;
--		mcl->args[1] = (unsigned long)netbk->grant_copy_op;
--		mcl->args[2] = npo.copy_prod;
--	}
--
--	/* Nothing to do? */
--	if (!npo.mcl_prod)
-+	if (!npo.copy_prod)
- 		return;
- 
--	BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl));
--
--	ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
-+	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
-+	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
-+					npo.copy_prod);
- 	BUG_ON(ret != 0);
--	/* The mmu_machphys_update() must not fail. */
--	BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
- 
- 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
- 		sco = (struct skb_cb_overlay *)skb->cb;
--- 
-1.7.4
-
-
-From 673a19d9e2d78939c6dc9c49e7e35ee54b54c8c7 Mon Sep 17 00:00:00 2001
-From: Paul Durrant <paul.durrant at citrix.com>
-Date: Fri, 2 Jul 2010 10:28:11 +0100
-Subject: [PATCH 049/203] xen: netback: Fix basic indentation issue
-
-Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |   11 +++++++----
- 1 files changed, 7 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index ca65840..848503e 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -445,10 +445,13 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- 		copy_gop = npo->copy + npo->copy_prod++;
- 		copy_gop->flags = GNTCOPY_dest_gref;
- 		if (PageForeign(page)) {
--		struct xen_netbk *netbk = &xen_netbk[group];
--		struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
--		copy_gop->source.domid = src_pend->netif->domid;
--		copy_gop->source.u.ref = src_pend->req.gref;
-+			struct xen_netbk *netbk = &xen_netbk[group];
-+			struct pending_tx_info *src_pend;
-+
-+			src_pend = &netbk->pending_tx_info[idx];
-+
-+			copy_gop->source.domid = src_pend->netif->domid;
-+			copy_gop->source.u.ref = src_pend->req.gref;
- 			copy_gop->flags |= GNTCOPY_source_gref;
- 		} else {
- 			copy_gop->source.domid = DOMID_SELF;
--- 
-1.7.4
-
-
-From d08b2d1f2ff4723b335d0fb5b91ffd6cb6a005d3 Mon Sep 17 00:00:00 2001
-From: Paul Durrant <paul.durrant at citrix.com>
-Date: Mon, 5 Jul 2010 11:45:29 +0100
-Subject: [PATCH 050/203] xen: netback: Add a new style of passing GSO packets to frontends.
-
-feature-gso-tcpv4-prefix uses precedes the packet data passed to
-the frontend with a ring entry that contains the necessary
-metadata. This style of GSO passing is required for Citrix
-Windows PV Drivers.
-
-Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
-Cc: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/common.h     |    3 ++-
- drivers/xen/netback/netback.c    |   37 ++++++++++++++++++++++++++++++++++---
- drivers/xen/netback/xenbus.c     |   15 ++++++++++++---
- include/xen/interface/io/netif.h |    4 ++++
- 4 files changed, 52 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 08e7a0e..78451ab 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -80,7 +80,8 @@ struct xen_netif {
- 	int features;
- 
- 	/* Internal feature information. */
--	u8 can_queue:1;	/* can queue packets for receiver? */
-+	u8 can_queue:1;	    /* can queue packets for receiver? */
-+	u8 gso_prefix:1;    /* use a prefix segment for GSO information */
- 
- 	/* Allow netif_be_start_xmit() to peek ahead in the rx request
- 	 * ring.  This is a prediction of what rx_req_cons will be once
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 848503e..e93a62e 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -432,6 +432,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- 			/* Overflowed this request, go to the next one */
- 			req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
- 			meta = npo->meta + npo->meta_prod++;
-+			meta->gso_size = 0;
- 			meta->size = 0;
- 			meta->id = req->id;
- 			npo->copy_off = 0;
-@@ -492,9 +493,23 @@ static int netbk_gop_skb(struct sk_buff *skb,
- 
- 	old_meta_prod = npo->meta_prod;
- 
-+	/* Set up a GSO prefix descriptor, if necessary */
-+	if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
-+		req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-+		meta = npo->meta + npo->meta_prod++;
-+		meta->gso_size = skb_shinfo(skb)->gso_size;
-+		meta->size = 0;
-+		meta->id = req->id;
-+	}
-+
- 	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
- 	meta = npo->meta + npo->meta_prod++;
--	meta->gso_size = skb_shinfo(skb)->gso_size;
 +
-+	if (!netif->gso_prefix)
-+		meta->gso_size = skb_shinfo(skb)->gso_size;
-+	else
-+		meta->gso_size = 0;
++		vif = netdev_priv(skb->dev);
 +
- 	meta->size = 0;
- 	meta->id = req->id;
- 	npo->copy_off = 0;
-@@ -506,7 +521,7 @@ static int netbk_gop_skb(struct sk_buff *skb,
- 			    offset_in_page(skb->data), 1);
- 
- 	/* Leave a gap for the GSO descriptor. */
--	if (skb_shinfo(skb)->gso_size)
-+	if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
- 		netif->rx.req_cons++;
- 
- 	for (i = 0; i < nr_frags; i++) {
-@@ -623,6 +638,21 @@ static void net_rx_action(unsigned long data)
- 
- 		netif = netdev_priv(skb->dev);
- 
-+		if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
-+			resp = RING_GET_RESPONSE(&netif->rx,
-+						netif->rx.rsp_prod_pvt++);
++		if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
++			resp = RING_GET_RESPONSE(&vif->rx,
++						vif->rx.rsp_prod_pvt++);
 +
-+			resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
++			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
 +
 +			resp->offset = netbk->meta[npo.meta_cons].gso_size;
 +			resp->id = netbk->meta[npo.meta_cons].id;
@@ -9216,2033 +3417,468 @@ index 848503e..e93a62e 100644
 +		}
 +
 +
- 		netif->stats.tx_bytes += skb->len;
- 		netif->stats.tx_packets++;
- 
-@@ -633,6 +663,7 @@ static void net_rx_action(unsigned long data)
- 			flags = 0;
- 		else
- 			flags = NETRXF_more_data;
-+
- 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
- 			flags |= NETRXF_csum_blank | NETRXF_data_validated;
- 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-@@ -645,7 +676,7 @@ static void net_rx_action(unsigned long data)
- 					netbk->meta[npo.meta_cons].size,
- 					flags);
- 
--		if (netbk->meta[npo.meta_cons].gso_size) {
-+		if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
- 			struct xen_netif_extra_info *gso =
- 				(struct xen_netif_extra_info *)
- 				RING_GET_RESPONSE(&netif->rx,
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index e30b0c7..cda987f 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -457,16 +457,25 @@ static int connect_rings(struct backend_info *be)
- 			be->netif->dev->mtu = ETH_DATA_LEN;
- 	}
- 
--	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
--			 &val) < 0)
-+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
-+			"%d", &val) < 0)
- 		val = 0;
- 	if (val) {
- 		be->netif->features |= NETIF_F_TSO;
- 		be->netif->dev->features |= NETIF_F_TSO;
- 	}
- 
-+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
-+			"%d", &val) < 0)
-+		val = 0;
-+	if (val) {
-+		be->netif->features |= NETIF_F_TSO;
-+		be->netif->dev->features |= NETIF_F_TSO;
-+		be->netif->gso_prefix = 1;
-+	}
-+
- 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
--			 "%d", &val) < 0)
-+			"%d", &val) < 0)
- 		val = 0;
- 	if (val) {
- 		be->netif->features &= ~NETIF_F_IP_CSUM;
-diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
-index 518481c..8309344 100644
---- a/include/xen/interface/io/netif.h
-+++ b/include/xen/interface/io/netif.h
-@@ -131,6 +131,10 @@ struct xen_netif_rx_request {
- #define _NETRXF_extra_info     (3)
- #define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
- 
-+/* GSO Prefix descriptor. */
-+#define _NETRXF_gso_prefix     (4)
-+#define  NETRXF_gso_prefix     (1U<<_NETRXF_gso_prefix)
++		vif->stats.tx_bytes += skb->len;
++		vif->stats.tx_packets++;
 +
- struct xen_netif_rx_response {
-     uint16_t id;
-     uint16_t offset;       /* Offset in page of start of received packet  */
--- 
-1.7.4
-
-
-From bd910979612331d60a629c16a49ebeb5efa0f035 Mon Sep 17 00:00:00 2001
-From: Paul Durrant <paul.durrant at citrix.com>
-Date: Fri, 2 Jul 2010 10:28:13 +0100
-Subject: [PATCH 051/203] xen: netback: Make frontend features distinct from netback feature flags.
-
-Make sure that if a feature flag is disabled by ethtool on netback
-that we do not gratuitously re-enabled it when we check the frontend
-features during ring connection.
-
-Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
-Cc: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/common.h    |   14 ++++++--
- drivers/xen/netback/interface.c |   68 ++++++++++++++++++++++++++++++--------
- drivers/xen/netback/netback.c   |    2 +-
- drivers/xen/netback/xenbus.c    |   44 ++++++++++---------------
- 4 files changed, 81 insertions(+), 47 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 78451ab..a5f3759 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -76,12 +76,17 @@ struct xen_netif {
- 	struct vm_struct *tx_comms_area;
- 	struct vm_struct *rx_comms_area;
- 
--	/* Set of features that can be turned on in dev->features. */
--	int features;
-+	/* Flags that must not be set in dev->features */
-+	int features_disabled;
++		status = netbk_check_gop(vif, sco->meta_slots_used, &npo);
 +
-+	/* Frontend feature information. */
-+	u8 can_sg:1;
-+	u8 gso:1;
-+	u8 gso_prefix:1;
-+	u8 csum:1;
- 
- 	/* Internal feature information. */
- 	u8 can_queue:1;	    /* can queue packets for receiver? */
--	u8 gso_prefix:1;    /* use a prefix segment for GSO information */
- 
- 	/* Allow netif_be_start_xmit() to peek ahead in the rx request
- 	 * ring.  This is a prediction of what rx_req_cons will be once
-@@ -187,6 +192,7 @@ void netif_accel_init(void);
- 
- void netif_disconnect(struct xen_netif *netif);
- 
-+void netif_set_features(struct xen_netif *netif);
- struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
- int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
- 	      unsigned long rx_ring_ref, unsigned int evtchn);
-@@ -223,7 +229,7 @@ static inline int netbk_can_queue(struct net_device *dev)
- static inline int netbk_can_sg(struct net_device *dev)
- {
- 	struct xen_netif *netif = netdev_priv(dev);
--	return netif->features & NETIF_F_SG;
-+	return netif->can_sg;
- }
- 
- struct pending_tx_info {
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index 172ef4c..2e8508a 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -121,31 +121,69 @@ static int netbk_change_mtu(struct net_device *dev, int mtu)
- 	return 0;
- }
- 
--static int netbk_set_sg(struct net_device *dev, u32 data)
-+void netif_set_features(struct xen_netif *netif)
- {
--	if (data) {
--		struct xen_netif *netif = netdev_priv(dev);
-+	struct net_device *dev = netif->dev;
-+	int features = dev->features;
++		if (sco->meta_slots_used == 1)
++			flags = 0;
++		else
++			flags = XEN_NETRXF_more_data;
 +
-+	if (netif->can_sg)
-+		features |= NETIF_F_SG;
-+	if (netif->gso || netif->gso_prefix)
-+		features |= NETIF_F_TSO;
-+	if (netif->csum)
-+		features |= NETIF_F_IP_CSUM;
++		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
++			flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated;
++		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
++			/* remote but checksummed. */
++			flags |= XEN_NETRXF_data_validated;
 +
-+	features &= ~(netif->features_disabled);
- 
--		if (!(netif->features & NETIF_F_SG))
-+	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
-+		dev->mtu = ETH_DATA_LEN;
++		offset = 0;
++		resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,
++					status, offset,
++					netbk->meta[npo.meta_cons].size,
++					flags);
 +
-+	dev->features = features;
++		if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
++			struct xen_netif_extra_info *gso =
++				(struct xen_netif_extra_info *)
++				RING_GET_RESPONSE(&vif->rx,
++						  vif->rx.rsp_prod_pvt++);
++
++			resp->flags |= XEN_NETRXF_extra_info;
++
++			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
++			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
++			gso->u.gso.pad = 0;
++			gso->u.gso.features = 0;
++
++			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
++			gso->flags = 0;
++		}
++
++		netbk_add_frag_responses(vif, status,
++					 netbk->meta + npo.meta_cons + 1,
++					 sco->meta_slots_used);
++
++		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
++		irq = vif->irq;
++		if (ret && list_empty(&vif->notify_list))
++			list_add_tail(&vif->notify_list, &notify);
++
++		xenvif_notify_tx_completion(vif);
++
++		xenvif_put(vif);
++		npo.meta_cons += sco->meta_slots_used;
++		dev_kfree_skb(skb);
++	}
++
++	list_for_each_entry_safe(vif, tmp, &notify, notify_list) {
++		notify_remote_via_irq(vif->irq);
++		list_del_init(&vif->notify_list);
++	}
++
++	/* More work to do? */
++	if (!skb_queue_empty(&netbk->rx_queue) &&
++			!timer_pending(&netbk->net_timer))
++		xen_netbk_kick_thread(netbk);
 +}
 +
-+static int netbk_set_tx_csum(struct net_device *dev, u32 data)
++void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
 +{
-+	struct xen_netif *netif = netdev_priv(dev);
-+	if (data) {
-+		if (!netif->csum)
- 			return -ENOSYS;
-+		netif->features_disabled &= ~NETIF_F_IP_CSUM;
-+	} else {
-+		netif->features_disabled |= NETIF_F_IP_CSUM;
- 	}
- 
--	if (dev->mtu > ETH_DATA_LEN)
--		dev->mtu = ETH_DATA_LEN;
-+	netif_set_features(netif);
-+	return 0;
++	struct xen_netbk *netbk = vif->netbk;
++
++	skb_queue_tail(&netbk->rx_queue, skb);
++
++	xen_netbk_kick_thread(netbk);
 +}
- 
--	return ethtool_op_set_sg(dev, data);
-+static int netbk_set_sg(struct net_device *dev, u32 data)
++
++static void xen_netbk_alarm(unsigned long data)
 +{
-+	struct xen_netif *netif = netdev_priv(dev);
-+	if (data) {
-+		if (!netif->can_sg)
-+			return -ENOSYS;
-+		netif->features_disabled &= ~NETIF_F_SG;
-+	} else {
-+		netif->features_disabled |= NETIF_F_SG;
++	struct xen_netbk *netbk = (struct xen_netbk *)data;
++	xen_netbk_kick_thread(netbk);
++}
++
++static int __on_net_schedule_list(struct xenvif *vif)
++{
++	return !list_empty(&vif->schedule_list);
++}
++
++/* Must be called with net_schedule_list_lock held */
++static void remove_from_net_schedule_list(struct xenvif *vif)
++{
++	if (likely(__on_net_schedule_list(vif))) {
++		list_del_init(&vif->schedule_list);
++		xenvif_put(vif);
 +	}
++}
 +
-+	netif_set_features(netif);
-+	return 0;
- }
- 
- static int netbk_set_tso(struct net_device *dev, u32 data)
- {
-+	struct xen_netif *netif = netdev_priv(dev);
- 	if (data) {
--		struct xen_netif *netif = netdev_priv(dev);
--
--		if (!(netif->features & NETIF_F_TSO))
-+		if (!netif->gso && !netif->gso_prefix)
- 			return -ENOSYS;
-+		netif->features_disabled &= ~NETIF_F_TSO;
-+	} else {
-+		netif->features_disabled |= NETIF_F_TSO;
- 	}
- 
--	return ethtool_op_set_tso(dev, data);
-+	netif_set_features(netif);
-+	return 0;
- }
- 
- static void netbk_get_drvinfo(struct net_device *dev,
-@@ -200,7 +238,7 @@ static struct ethtool_ops network_ethtool_ops =
- 	.get_drvinfo = netbk_get_drvinfo,
- 
- 	.get_tx_csum = ethtool_op_get_tx_csum,
--	.set_tx_csum = ethtool_op_set_tx_csum,
-+	.set_tx_csum = netbk_set_tx_csum,
- 	.get_sg = ethtool_op_get_sg,
- 	.set_sg = netbk_set_sg,
- 	.get_tso = ethtool_op_get_tso,
-@@ -242,7 +280,8 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
- 	netif->domid  = domid;
- 	netif->group  = -1;
- 	netif->handle = handle;
--	netif->features = NETIF_F_SG;
-+	netif->can_sg = 1;
-+	netif->csum = 1;
- 	atomic_set(&netif->refcnt, 1);
- 	init_waitqueue_head(&netif->waiting_to_free);
- 	netif->dev = dev;
-@@ -259,8 +298,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
- 	init_timer(&netif->tx_queue_timeout);
- 
- 	dev->netdev_ops	= &netback_ops;
--	dev->features   = NETIF_F_IP_CSUM|NETIF_F_SG;
--
-+	netif_set_features(netif);
- 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
- 
- 	dev->tx_queue_len = netbk_queue_length;
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index e93a62e..63a771e 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -238,7 +238,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
- 
- static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
- {
--	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
-+	if (netif->can_sg || netif->gso || netif->gso_prefix)
- 		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
- 	return 1; /* all in one */
- }
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index cda987f..17ff5cf 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -404,6 +404,7 @@ static void connect(struct backend_info *be)
- 
- static int connect_rings(struct backend_info *be)
- {
-+	struct xen_netif *netif = be->netif;
- 	struct xenbus_device *dev = be->dev;
- 	unsigned long tx_ring_ref, rx_ring_ref;
- 	unsigned int evtchn, rx_copy;
-@@ -437,53 +438,42 @@ static int connect_rings(struct backend_info *be)
- 	if (!rx_copy)
- 		return -EOPNOTSUPP;
- 
--	if (be->netif->dev->tx_queue_len != 0) {
-+	if (netif->dev->tx_queue_len != 0) {
- 		if (xenbus_scanf(XBT_NIL, dev->otherend,
- 				 "feature-rx-notify", "%d", &val) < 0)
- 			val = 0;
- 		if (val)
--			be->netif->can_queue = 1;
-+			netif->can_queue = 1;
- 		else
- 			/* Must be non-zero for pfifo_fast to work. */
--			be->netif->dev->tx_queue_len = 1;
-+			netif->dev->tx_queue_len = 1;
- 	}
- 
--	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
-+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
-+			 "%d", &val) < 0)
- 		val = 0;
--	if (!val) {
--		be->netif->features &= ~NETIF_F_SG;
--		be->netif->dev->features &= ~NETIF_F_SG;
--		if (be->netif->dev->mtu > ETH_DATA_LEN)
--			be->netif->dev->mtu = ETH_DATA_LEN;
--	}
-+	netif->can_sg = !!val;
- 
- 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
--			"%d", &val) < 0)
-+			 "%d", &val) < 0)
- 		val = 0;
--	if (val) {
--		be->netif->features |= NETIF_F_TSO;
--		be->netif->dev->features |= NETIF_F_TSO;
--	}
-+	netif->gso = !!val;
- 
- 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
--			"%d", &val) < 0)
-+			 "%d", &val) < 0)
- 		val = 0;
--	if (val) {
--		be->netif->features |= NETIF_F_TSO;
--		be->netif->dev->features |= NETIF_F_TSO;
--		be->netif->gso_prefix = 1;
--	}
-+	netif->gso_prefix = !!val;
- 
- 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
--			"%d", &val) < 0)
-+			 "%d", &val) < 0)
- 		val = 0;
--	if (val) {
--		be->netif->features &= ~NETIF_F_IP_CSUM;
--		be->netif->dev->features &= ~NETIF_F_IP_CSUM;
--	}
-+	netif->csum = !val;
++static struct xenvif *poll_net_schedule_list(struct xen_netbk *netbk)
++{
++	struct xenvif *vif = NULL;
 +
-+	/* Set dev->features */
-+	netif_set_features(netif);
- 
- 	/* Map the shared frame, irq etc. */
--	err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
-+	err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
- 	if (err) {
- 		xenbus_dev_fatal(dev, err,
- 				 "mapping shared-frames %lu/%lu port %u",
--- 
-1.7.4
-
-
-From cf8c20169427de5829e3ec723712b77de52e64ac Mon Sep 17 00:00:00 2001
-From: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Date: Thu, 15 Jul 2010 10:46:50 -0700
-Subject: [PATCH 052/203] xen: netback: only initialize for PV domains
-
-HVM domains don't support netback
-
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |    2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 63a771e..911c85b 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1701,7 +1701,7 @@ static int __init netback_init(void)
- 	int rc = 0;
- 	int group;
- 
--	if (!xen_domain())
-+	if (!xen_pv_domain())
- 		return -ENODEV;
- 
- 	xen_netbk_group_nr = num_online_cpus();
--- 
-1.7.4
-
-
-From dfa4906cfade8541573814c34be82ba02c348317 Mon Sep 17 00:00:00 2001
-From: Owen Smith <owen.smith at citrix.com>
-Date: Wed, 22 Dec 2010 15:05:00 +0000
-Subject: [PATCH 053/203] Union the blkif_request request specific fields
-
-Prepare for extending the block device ring to allow request
-specific fields, by moving the request specific fields for
-reads, writes and barrier requests to a union member
-
-Signed-off-by: Owen Smith <owen.smith at citrix.com>
----
- drivers/block/xen-blkfront.c     |    8 ++++----
- include/xen/interface/io/blkif.h |   16 +++++++++++-----
- 2 files changed, 15 insertions(+), 9 deletions(-)
-
-diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
-index d7aa39e..cc4514c 100644
---- a/drivers/block/xen-blkfront.c
-+++ b/drivers/block/xen-blkfront.c
-@@ -281,7 +281,7 @@ static int blkif_queue_request(struct request *req)
- 	info->shadow[id].request = req;
- 
- 	ring_req->id = id;
--	ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
-+	ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
- 	ring_req->handle = info->handle;
- 
- 	ring_req->operation = rq_data_dir(req) ?
-@@ -317,7 +317,7 @@ static int blkif_queue_request(struct request *req)
- 				rq_data_dir(req) );
- 
- 		info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
--		ring_req->seg[i] =
-+		ring_req->u.rw.seg[i] =
- 				(struct blkif_request_segment) {
- 					.gref       = ref,
- 					.first_sect = fsect,
-@@ -615,7 +615,7 @@ static void blkif_completion(struct blk_shadow *s)
- {
- 	int i;
- 	for (i = 0; i < s->req.nr_segments; i++)
--		gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
-+		gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
- }
- 
- static irqreturn_t blkif_interrupt(int irq, void *dev_id)
-@@ -932,7 +932,7 @@ static int blkif_recover(struct blkfront_info *info)
- 		/* Rewrite any grant references invalidated by susp/resume. */
- 		for (j = 0; j < req->nr_segments; j++)
- 			gnttab_grant_foreign_access_ref(
--				req->seg[j].gref,
-+				req->u.rw.seg[j].gref,
- 				info->xbdev->otherend_id,
- 				pfn_to_mfn(info->shadow[req->id].frame[j]),
- 				rq_data_dir(info->shadow[req->id].request));
-diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
-index c2d1fa4..e4f743c 100644
---- a/include/xen/interface/io/blkif.h
-+++ b/include/xen/interface/io/blkif.h
-@@ -51,11 +51,7 @@ typedef uint64_t blkif_sector_t;
-  */
- #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
- 
--struct blkif_request {
--	uint8_t        operation;    /* BLKIF_OP_???                         */
--	uint8_t        nr_segments;  /* number of segments                   */
--	blkif_vdev_t   handle;       /* only for read/write requests         */
--	uint64_t       id;           /* private guest value, echoed in resp  */
-+struct blkif_request_rw {
- 	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
- 	struct blkif_request_segment {
- 		grant_ref_t gref;        /* reference to I/O buffer frame        */
-@@ -65,6 +61,16 @@ struct blkif_request {
- 	} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
- };
- 
-+struct blkif_request {
-+	uint8_t        operation;    /* BLKIF_OP_???                         */
-+	uint8_t        nr_segments;  /* number of segments                   */
-+	blkif_vdev_t   handle;       /* only for read/write requests         */
-+	uint64_t       id;           /* private guest value, echoed in resp  */
-+	union {
-+		struct blkif_request_rw rw;
-+	} u;
-+};
++	spin_lock_irq(&netbk->net_schedule_list_lock);
++	if (list_empty(&netbk->net_schedule_list))
++		goto out;
 +
- struct blkif_response {
- 	uint64_t        id;              /* copied from request */
- 	uint8_t         operation;       /* copied from request */
--- 
-1.7.4
-
-
-From e8bc588ab4c297e3f3d8f61f205b2d2db258907b Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 18 Jan 2011 20:09:41 -0500
-Subject: [PATCH 054/203] xen: Mark all initial reserved pages for the balloon as INVALID_P2M_ENTRY.
-
-With this patch, we diligently set regions that will be used by the
-balloon driver to be INVALID_P2M_ENTRY and under the ownership
-of the balloon driver. We are OK using the __set_phys_to_machine
-as we do not expect to be allocating any P2M middle or entries pages.
-The set_phys_to_machine has the side-effect of potentially allocating
-new pages and we do not want that at this stage.
-
-We can do this because xen_build_mfn_list_list will have already
-allocated all such pages up to xen_max_p2m_pfn.
-
-We also move the check for auto translated physmap down the
-stack so it is present in __set_phys_to_machine.
-
-[v2: Rebased with mmu->p2m code split]
-Reviewed-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/include/asm/xen/page.h |    1 +
- arch/x86/xen/mmu.c              |    2 +-
- arch/x86/xen/p2m.c              |    9 ++++-----
- arch/x86/xen/setup.c            |    7 ++++++-
- drivers/xen/balloon.c           |    2 +-
- 5 files changed, 13 insertions(+), 8 deletions(-)
-
-diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
-index f25bdf2..8ea9772 100644
---- a/arch/x86/include/asm/xen/page.h
-+++ b/arch/x86/include/asm/xen/page.h
-@@ -41,6 +41,7 @@ extern unsigned int   machine_to_phys_order;
- 
- extern unsigned long get_phys_to_machine(unsigned long pfn);
- extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-+extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
- 
- extern int m2p_add_override(unsigned long mfn, struct page *page);
- extern int m2p_remove_override(struct page *page);
-diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
-index 5e92b61..0180ae8 100644
---- a/arch/x86/xen/mmu.c
-+++ b/arch/x86/xen/mmu.c
-@@ -2074,7 +2074,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
- 			in_frames[i] = virt_to_mfn(vaddr);
- 
- 		MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
--		set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
-+		__set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
- 
- 		if (out_frames)
- 			out_frames[i] = virt_to_pfn(vaddr);
-diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
-index ddc81a0..df4e367 100644
---- a/arch/x86/xen/p2m.c
-+++ b/arch/x86/xen/p2m.c
-@@ -365,6 +365,10 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
- {
- 	unsigned topidx, mididx, idx;
- 
-+	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
-+		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
-+		return true;
++	vif = list_first_entry(&netbk->net_schedule_list,
++			       struct xenvif, schedule_list);
++	if (!vif)
++		goto out;
++
++	xenvif_get(vif);
++
++	remove_from_net_schedule_list(vif);
++out:
++	spin_unlock_irq(&netbk->net_schedule_list_lock);
++	return vif;
++}
++
++void xen_netbk_schedule_xenvif(struct xenvif *vif)
++{
++	unsigned long flags;
++	struct xen_netbk *netbk = vif->netbk;
++
++	if (__on_net_schedule_list(vif))
++		goto kick;
++
++	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
++	if (!__on_net_schedule_list(vif) &&
++	    likely(xenvif_schedulable(vif))) {
++		list_add_tail(&vif->schedule_list, &netbk->net_schedule_list);
++		xenvif_get(vif);
 +	}
- 	if (unlikely(pfn >= MAX_P2M_PFN)) {
- 		BUG_ON(mfn != INVALID_P2M_ENTRY);
- 		return true;
-@@ -384,11 +388,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
- 
- bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
- {
--	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
--		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
--		return true;
--	}
--
- 	if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
- 		if (!alloc_p2m(pfn))
- 			return false;
-diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
-index b5a7f92..7201800 100644
---- a/arch/x86/xen/setup.c
-+++ b/arch/x86/xen/setup.c
-@@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
- 
- static __init void xen_add_extra_mem(unsigned long pages)
- {
-+	unsigned long pfn;
++	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
 +
- 	u64 size = (u64)pages * PAGE_SIZE;
- 	u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
- 
-@@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages)
- 	xen_extra_mem_size += size;
- 
- 	xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
++kick:
++	smp_mb();
++	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
++	    !list_empty(&netbk->net_schedule_list))
++		xen_netbk_kick_thread(netbk);
++}
 +
-+	for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
-+		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
- }
- 
- static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
-@@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
- 		WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
- 		     start, end, ret);
- 		if (ret == 1) {
--			set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-+			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
- 			len++;
- 		}
- 	}
-diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
-index 43f9f02..b1661cd 100644
---- a/drivers/xen/balloon.c
-+++ b/drivers/xen/balloon.c
-@@ -296,7 +296,7 @@ static int decrease_reservation(unsigned long nr_pages)
- 	/* No more mappings: invalidate P2M and add to balloon. */
- 	for (i = 0; i < nr_pages; i++) {
- 		pfn = mfn_to_pfn(frame_list[i]);
--		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-+		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
- 		balloon_append(pfn_to_page(pfn));
- 	}
- 
--- 
-1.7.4
-
-
-From 7b2fc719094440d3eacacd95fa6a1f5ac495396b Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 18 Jan 2011 20:15:21 -0500
-Subject: [PATCH 060/203] xen/mmu: Add the notion of identity (1-1) mapping.
-
-Our P2M tree structure is a three-level. On the leaf nodes
-we set the Machine Frame Number (MFN) of the PFN. What this means
-is that when one does: pfn_to_mfn(pfn), which is used when creating
-PTE entries, you get the real MFN of the hardware. When Xen sets
-up a guest it initially populates a array which has descending
-(or ascending) MFN values, as so:
-
- idx: 0,  1,       2
- [0x290F, 0x290E, 0x290D, ..]
-
-so pfn_to_mfn(2)==0x290D. If you start, restart many guests that list
-starts looking quite random.
-
-We graft this structure on our P2M tree structure and stick in
-those MFN in the leafs. But for all other leaf entries, or for the top
-root, or middle one, for which there is a void entry, we assume it is
-"missing". So
- pfn_to_mfn(0xc0000)=INVALID_P2M_ENTRY.
-
-We add the possibility of setting 1-1 mappings on certain regions, so
-that:
- pfn_to_mfn(0xc0000)=0xc0000
-
-The benefit of this is, that we can assume for non-RAM regions (think
-PCI BARs, or ACPI spaces), we can create mappings easily b/c we
-get the PFN value to match the MFN.
-
-For this to work efficiently we introduce one new page p2m_identity and
-allocate (via reserved_brk) any other pages we need to cover the sides
-(1GB or 4MB boundary violations). All entries in p2m_identity are set to
-INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
-no other fancy value).
-
-On lookup we spot that the entry points to p2m_identity and return the identity
-value instead of dereferencing and returning INVALID_P2M_ENTRY. If the entry
-points to an allocated page, we just proceed as before and return the PFN.
-If the PFN has IDENTITY_FRAME_BIT set we unmask that in appropriate functions
-(pfn_to_mfn).
-
-The reason for having the IDENTITY_FRAME_BIT instead of just returning the
-PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
-non-identity pfn. To protect ourselves against we elect to set (and get) the
-IDENTITY_FRAME_BIT on all identity mapped PFNs.
-
-This simplistic diagram is used to explain the more subtle piece of code.
-There is also a digram of the P2M at the end that can help.
-Imagine your E820 looking as so:
-
-                   1GB                                           2GB
-/-------------------+---------\/----\         /----------\    /---+-----\
-| System RAM        | Sys RAM ||ACPI|         | reserved |    | Sys RAM |
-\-------------------+---------/\----/         \----------/    \---+-----/
-                              ^- 1029MB                       ^- 2001MB
-
-[1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), 2048MB = 524288 (0x80000)]
-
-And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
-is actually not present (would have to kick the balloon driver to put it in).
-
-When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
-Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
-of the PFN and the end PFN (263424 and 512256 respectively). The first step is
-to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
-covers 512^2 of page estate (1GB) and in case the start or end PFN is not
-aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn to
-end pfn.  We reserve_brk top leaf pages if they are missing (means they point
-to p2m_mid_missing).
-
-With the E820 example above, 263424 is not 1GB aligned so we allocate a
-reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
-Each entry in the allocate page is "missing" (points to p2m_missing).
-
-Next stage is to determine if we need to do a more granular boundary check
-on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
-We check if the start pfn and end pfn violate that boundary check, and if
-so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
-granularity of setting which PFNs are missing and which ones are identity.
-In our example 263424 and 512256 both fail the check so we reserve_brk two
-pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" values)
-and assign them to p2m[1][2] and p2m[1][488] respectively.
-
-At this point we would at minimum reserve_brk one page, but could be up to
-three. Each call to set_phys_range_identity has at maximum a three page
-cost. If we were to query the P2M at this stage, all those entries from
-start PFN through end PFN (so 1029MB -> 2001MB) would return INVALID_P2M_ENTRY
-("missing").
-
-The next step is to walk from the start pfn to the end pfn setting
-the IDENTITY_FRAME_BIT on each PFN. This is done in 'set_phys_range_identity'.
-If we find that the middle leaf is pointing to p2m_missing we can swap it over
-to p2m_identity - this way covering 4MB (or 2MB) PFN space.  At this point we
-do not need to worry about boundary aligment (so no need to reserve_brk a middle
-page, figure out which PFNs are "missing" and which ones are identity), as that
-has been done earlier.  If we find that the middle leaf is not occupied by
-p2m_identity or p2m_missing, we dereference that page (which covers
-512 PFNs) and set the appropriate PFN with IDENTITY_FRAME_BIT. In our example
-263424 and 512256 end up there, and we set from p2m[1][2][256->511] and
-p2m[1][488][0->256] with IDENTITY_FRAME_BIT set.
-
-All other regions that are void (or not filled) either point to p2m_missing
-(considered missing) or have the default value of INVALID_P2M_ENTRY (also
-considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
-contain the INVALID_P2M_ENTRY value and are considered "missing."
-
-This is what the p2m ends up looking (for the E820 above) with this
-fabulous drawing:
-
-   p2m         /--------------\
- /-----\       | &mfn_list[0],|                           /-----------------\
- |  0  |------>| &mfn_list[1],|    /---------------\      | ~0, ~0, ..      |
- |-----|       |  ..., ~0, ~0 |    | ~0, ~0, [x]---+----->| IDENTITY [@256] |
- |  1  |---\   \--------------/    | [p2m_identity]+\     | IDENTITY [@257] |
- |-----|    \                      | [p2m_identity]+\\    | ....            |
- |  2  |--\  \-------------------->|  ...          | \\   \----------------/
- |-----|   \                       \---------------/  \\
- |  3  |\   \                                          \\  p2m_identity
- |-----| \   \-------------------->/---------------\   /-----------------\
- | ..  +->+                        | [p2m_identity]+-->| ~0, ~0, ~0, ... |
- \-----/ /                         | [p2m_identity]+-->| ..., ~0         |
-        / /---------------\        | ....          |   \-----------------/
-       /  | IDENTITY[@0]  |      /-+-[x], ~0, ~0.. |
-      /   | IDENTITY[@256]|<----/  \---------------/
-     /    | ~0, ~0, ....  |
-    |     \---------------/
-    |
-    p2m_missing             p2m_missing
-/------------------\     /------------\
-| [p2m_mid_missing]+---->| ~0, ~0, ~0 |
-| [p2m_mid_missing]+---->| ..., ~0    |
-\------------------/     \------------/
-
-where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
-
-[v5: Changed code to use ranges, added ASCII art]
-[v6: Rebased on top of xen->p2m code split]
-[v4: Squished patches in just this one]
-[v7: Added RESERVE_BRK for potentially allocated pages]
-[v8: Fixed alignment problem]
-[v9: Changed 1<<3X to 1<<BITS_PER_LONG-X]
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/include/asm/xen/page.h |    8 ++-
- arch/x86/xen/p2m.c              |  113 ++++++++++++++++++++++++++++++++++++++-
- 2 files changed, 117 insertions(+), 4 deletions(-)
-
-diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
-index 8ea9772..65fa4f2 100644
---- a/arch/x86/include/asm/xen/page.h
-+++ b/arch/x86/include/asm/xen/page.h
-@@ -29,8 +29,10 @@ typedef struct xpaddr {
- 
- /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
- #define INVALID_P2M_ENTRY	(~0UL)
--#define FOREIGN_FRAME_BIT	(1UL<<31)
-+#define FOREIGN_FRAME_BIT	(1UL<<(BITS_PER_LONG-1))
-+#define IDENTITY_FRAME_BIT	(1UL<<(BITS_PER_LONG-2))
- #define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
-+#define IDENTITY_FRAME(m)	((m) | IDENTITY_FRAME_BIT)
- 
- /* Maximum amount of memory we can handle in a domain in pages */
- #define MAX_DOMAIN_PAGES						\
-@@ -42,6 +44,8 @@ extern unsigned int   machine_to_phys_order;
- extern unsigned long get_phys_to_machine(unsigned long pfn);
- extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
- extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-+extern unsigned long set_phys_range_identity(unsigned long pfn_s,
-+					     unsigned long pfn_e);
- 
- extern int m2p_add_override(unsigned long mfn, struct page *page);
- extern int m2p_remove_override(struct page *page);
-@@ -58,7 +62,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
- 	mfn = get_phys_to_machine(pfn);
- 
- 	if (mfn != INVALID_P2M_ENTRY)
--		mfn &= ~FOREIGN_FRAME_BIT;
-+		mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
- 
- 	return mfn;
- }
-diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
-index df4e367..dd30ec8 100644
---- a/arch/x86/xen/p2m.c
-+++ b/arch/x86/xen/p2m.c
-@@ -59,9 +59,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
- static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
- static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
- 
-+static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
++void xen_netbk_deschedule_xenvif(struct xenvif *vif)
++{
++	struct xen_netbk *netbk = vif->netbk;
++	spin_lock_irq(&netbk->net_schedule_list_lock);
++	remove_from_net_schedule_list(vif);
++	spin_unlock_irq(&netbk->net_schedule_list_lock);
++}
 +
- RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
- RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
- 
-+/* We might hit two boundary violations at the start and end, at max each
-+ * boundary violation will require three middle nodes. */
-+RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
++void xen_netbk_check_rx_xenvif(struct xenvif *vif)
++{
++	int more_to_do;
 +
- static inline unsigned p2m_top_index(unsigned long pfn)
- {
- 	BUG_ON(pfn >= MAX_P2M_PFN);
-@@ -221,6 +227,9 @@ void __init xen_build_dynamic_phys_to_machine(void)
- 	p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
- 	p2m_top_init(p2m_top);
- 
-+	p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
-+	p2m_init(p2m_identity);
++	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
++
++	if (more_to_do)
++		xen_netbk_schedule_xenvif(vif);
++}
++
++static void tx_add_credit(struct xenvif *vif)
++{
++	unsigned long max_burst, max_credit;
 +
- 	/*
- 	 * The domain builder gives us a pre-constructed p2m array in
- 	 * mfn_list for all the pages initially given to us, so we just
-@@ -272,6 +281,14 @@ unsigned long get_phys_to_machine(unsigned long pfn)
- 	mididx = p2m_mid_index(pfn);
- 	idx = p2m_index(pfn);
- 
 +	/*
-+	 * The INVALID_P2M_ENTRY is filled in both p2m_*identity
-+	 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
-+	 * would be wrong.
++	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
++	 * Otherwise the interface can seize up due to insufficient credit.
 +	 */
-+	if (p2m_top[topidx][mididx] == p2m_identity)
-+		return IDENTITY_FRAME(pfn);
++	max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;
++	max_burst = min(max_burst, 131072UL);
++	max_burst = max(max_burst, vif->credit_bytes);
 +
- 	return p2m_top[topidx][mididx][idx];
- }
- EXPORT_SYMBOL_GPL(get_phys_to_machine);
-@@ -341,9 +358,11 @@ static bool alloc_p2m(unsigned long pfn)
- 			p2m_top_mfn_p[topidx] = mid_mfn;
- 	}
- 
--	if (p2m_top[topidx][mididx] == p2m_missing) {
-+	if (p2m_top[topidx][mididx] == p2m_identity ||
-+	    p2m_top[topidx][mididx] == p2m_missing) {
- 		/* p2m leaf page is missing */
- 		unsigned long *p2m;
-+		unsigned long *p2m_orig = p2m_top[topidx][mididx];
- 
- 		p2m = alloc_p2m_page();
- 		if (!p2m)
-@@ -351,7 +370,7 @@ static bool alloc_p2m(unsigned long pfn)
- 
- 		p2m_init(p2m);
- 
--		if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
-+		if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
- 			free_p2m_page(p2m);
- 		else
- 			mid_mfn[mididx] = virt_to_mfn(p2m);
-@@ -360,6 +379,82 @@ static bool alloc_p2m(unsigned long pfn)
- 	return true;
- }
- 
-+bool __early_alloc_p2m(unsigned long pfn)
++	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
++	max_credit = vif->remaining_credit + vif->credit_bytes;
++	if (max_credit < vif->remaining_credit)
++		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
++
++	vif->remaining_credit = min(max_credit, max_burst);
++}
++
++static void tx_credit_callback(unsigned long data)
 +{
-+	unsigned topidx, mididx, idx;
++	struct xenvif *vif = (struct xenvif *)data;
++	tx_add_credit(vif);
++	xen_netbk_check_rx_xenvif(vif);
++}
 +
-+	topidx = p2m_top_index(pfn);
-+	mididx = p2m_mid_index(pfn);
-+	idx = p2m_index(pfn);
++static void netbk_tx_err(struct xenvif *vif,
++			 struct xen_netif_tx_request *txp, RING_IDX end)
++{
++	RING_IDX cons = vif->tx.req_cons;
 +
-+	/* Pfff.. No boundary cross-over, lets get out. */
-+	if (!idx)
-+		return false;
++	do {
++		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
++		if (cons >= end)
++			break;
++		txp = RING_GET_REQUEST(&vif->tx, cons++);
++	} while (1);
++	vif->tx.req_cons = cons;
++	xen_netbk_check_rx_xenvif(vif);
++	xenvif_put(vif);
++}
 +
-+	WARN(p2m_top[topidx][mididx] == p2m_identity,
-+		"P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
-+		topidx, mididx);
++static int netbk_count_requests(struct xenvif *vif,
++				struct xen_netif_tx_request *first,
++				struct xen_netif_tx_request *txp,
++				int work_to_do)
++{
++	RING_IDX cons = vif->tx.req_cons;
++	int frags = 0;
 +
-+	/*
-+	 * Could be done by xen_build_dynamic_phys_to_machine..
-+	 */
-+	if (p2m_top[topidx][mididx] != p2m_missing)
-+		return false;
++	if (!(first->flags & XEN_NETTXF_more_data))
++		return 0;
 +
-+	/* Boundary cross-over for the edges: */
-+	if (idx) {
-+		unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
++	do {
++		if (frags >= work_to_do) {
++			netdev_dbg(vif->dev, "Need more frags\n");
++			return -frags;
++		}
 +
-+		p2m_init(p2m);
++		if (unlikely(frags >= MAX_SKB_FRAGS)) {
++			netdev_dbg(vif->dev, "Too many frags\n");
++			return -frags;
++		}
 +
-+		p2m_top[topidx][mididx] = p2m;
++		memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags),
++		       sizeof(*txp));
++		if (txp->size > first->size) {
++			netdev_dbg(vif->dev, "Frags galore\n");
++			return -frags;
++		}
 +
-+	}
-+	return idx != 0;
++		first->size -= txp->size;
++		frags++;
++
++		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
++			netdev_dbg(vif->dev, "txp->offset: %x, size: %u\n",
++				 txp->offset, txp->size);
++			return -frags;
++		}
++	} while ((txp++)->flags & XEN_NETTXF_more_data);
++	return frags;
 +}
-+unsigned long set_phys_range_identity(unsigned long pfn_s,
-+				      unsigned long pfn_e)
++
++static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
++					 struct sk_buff *skb,
++					 unsigned long pending_idx)
 +{
-+	unsigned long pfn;
++	struct page *page;
++	page = alloc_page(GFP_KERNEL|__GFP_COLD);
++	if (!page)
++		return NULL;
++	set_page_ext(page, netbk, pending_idx);
++	netbk->mmap_pages[pending_idx] = page;
++	return page;
++}
 +
-+	if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
-+		return 0;
++static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
++						  struct xenvif *vif,
++						  struct sk_buff *skb,
++						  struct xen_netif_tx_request *txp,
++						  struct gnttab_copy *gop)
++{
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	skb_frag_t *frags = shinfo->frags;
++	unsigned long pending_idx = *((u16 *)skb->data);
++	int i, start;
 +
-+	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
-+		return pfn_e - pfn_s;
++	/* Skip first skb fragment if it is on same page as header fragment. */
++	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
 +
-+	if (pfn_s > pfn_e)
-+		return 0;
++	for (i = start; i < shinfo->nr_frags; i++, txp++) {
++		struct page *page;
++		pending_ring_idx_t index;
++		struct pending_tx_info *pending_tx_info =
++			netbk->pending_tx_info;
 +
-+	for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
-+		pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
-+		pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
-+	{
-+		unsigned topidx = p2m_top_index(pfn);
-+		if (p2m_top[topidx] == p2m_mid_missing) {
-+			unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
++		index = pending_index(netbk->pending_cons++);
++		pending_idx = netbk->pending_ring[index];
++		page = xen_netbk_alloc_page(netbk, skb, pending_idx);
++		if (!page)
++			return NULL;
 +
-+			p2m_mid_init(mid);
++		netbk->mmap_pages[pending_idx] = page;
 +
-+			p2m_top[topidx] = mid;
-+		}
-+	}
++		gop->source.u.ref = txp->gref;
++		gop->source.domid = vif->domid;
++		gop->source.offset = txp->offset;
 +
-+	__early_alloc_p2m(pfn_s);
-+	__early_alloc_p2m(pfn_e);
++		gop->dest.u.gmfn = virt_to_mfn(page_address(page));
++		gop->dest.domid = DOMID_SELF;
++		gop->dest.offset = txp->offset;
 +
-+	for (pfn = pfn_s; pfn < pfn_e; pfn++)
-+		if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
-+			break;
++		gop->len = txp->size;
++		gop->flags = GNTCOPY_source_gref;
 +
-+	if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
-+		"Identity mapping failed. We are %ld short of 1-1 mappings!\n",
-+		(pfn_e - pfn_s) - (pfn - pfn_s)))
-+		printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
++		gop++;
 +
-+	return pfn - pfn_s;
++		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
++		xenvif_get(vif);
++		pending_tx_info[pending_idx].vif = vif;
++		frags[i].page = (void *)pending_idx;
++	}
++
++	return gop;
 +}
 +
- /* Try to install p2m mapping; fail if intermediate bits missing */
- bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
- {
-@@ -378,6 +473,20 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
- 	mididx = p2m_mid_index(pfn);
- 	idx = p2m_index(pfn);
- 
-+	/* For sparse holes were the p2m leaf has real PFN along with
-+	 * PCI holes, stick in the PFN as the MFN value.
-+	 */
-+	if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
-+		if (p2m_top[topidx][mididx] == p2m_identity)
-+			return true;
++static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
++				  struct sk_buff *skb,
++				  struct gnttab_copy **gopp)
++{
++	struct gnttab_copy *gop = *gopp;
++	int pending_idx = *((u16 *)skb->data);
++	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
++	struct xenvif *vif = pending_tx_info[pending_idx].vif;
++	struct xen_netif_tx_request *txp;
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	int nr_frags = shinfo->nr_frags;
++	int i, err, start;
 +
-+		/* Swap over from MISSING to IDENTITY if needed. */
-+		if (p2m_top[topidx][mididx] == p2m_missing) {
-+			p2m_top[topidx][mididx] = p2m_identity;
-+			return true;
++	/* Check status of header. */
++	err = gop->status;
++	if (unlikely(err)) {
++		pending_ring_idx_t index;
++		index = pending_index(netbk->pending_prod++);
++		txp = &pending_tx_info[pending_idx].req;
++		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
++		netbk->pending_ring[index] = pending_idx;
++		xenvif_put(vif);
++	}
++
++	/* Skip first skb fragment if it is on same page as header fragment. */
++	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
++
++	for (i = start; i < nr_frags; i++) {
++		int j, newerr;
++		pending_ring_idx_t index;
++
++		pending_idx = (unsigned long)shinfo->frags[i].page;
++
++		/* Check error status: if okay then remember grant handle. */
++		newerr = (++gop)->status;
++		if (likely(!newerr)) {
++			/* Had a previous error? Invalidate this fragment. */
++			if (unlikely(err))
++				xen_netbk_idx_release(netbk, pending_idx);
++			continue;
++		}
++
++		/* Error on this fragment: respond to client with an error. */
++		txp = &netbk->pending_tx_info[pending_idx].req;
++		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
++		index = pending_index(netbk->pending_prod++);
++		netbk->pending_ring[index] = pending_idx;
++		xenvif_put(vif);
++
++		/* Not the first error? Preceding frags already invalidated. */
++		if (err)
++			continue;
++
++		/* First error: invalidate header and preceding fragments. */
++		pending_idx = *((u16 *)skb->data);
++		xen_netbk_idx_release(netbk, pending_idx);
++		for (j = start; j < i; j++) {
++			pending_idx = (unsigned long)shinfo->frags[i].page;
++			xen_netbk_idx_release(netbk, pending_idx);
 +		}
++
++		/* Remember the error: invalidate all subsequent fragments. */
++		err = newerr;
 +	}
 +
- 	if (p2m_top[topidx][mididx] == p2m_missing)
- 		return mfn == INVALID_P2M_ENTRY;
- 
--- 
-1.7.4
-
-
-From 57fafbb798ffde486606244c3392ed6f63050222 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 5 Jan 2011 15:46:31 -0500
-Subject: [PATCH 061/203] xen/mmu: Set _PAGE_IOMAP if PFN is an identity PFN.
-
-If we find that the PFN is within the P2M as an identity
-PFN make sure to tack on the _PAGE_IOMAP flag.
-
-Reviewed-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/xen/mmu.c |   18 ++++++++++++++++--
- 1 files changed, 16 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
-index 0180ae8..9c9e076 100644
---- a/arch/x86/xen/mmu.c
-+++ b/arch/x86/xen/mmu.c
-@@ -416,8 +416,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
- 	if (val & _PAGE_PRESENT) {
- 		unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
- 		pteval_t flags = val & PTE_FLAGS_MASK;
--		unsigned long mfn = pfn_to_mfn(pfn);
-+		unsigned long mfn;
- 
-+		if (!xen_feature(XENFEAT_auto_translated_physmap))
-+			mfn = get_phys_to_machine(pfn);
-+		else
-+			mfn = pfn;
- 		/*
- 		 * If there's no mfn for the pfn, then just create an
- 		 * empty non-present pte.  Unfortunately this loses
-@@ -427,8 +431,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
- 		if (unlikely(mfn == INVALID_P2M_ENTRY)) {
- 			mfn = 0;
- 			flags = 0;
-+		} else {
-+			/*
-+			 * Paramount to do this test _after_ the
-+			 * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
-+			 * IDENTITY_FRAME_BIT resolves to true.
-+			 */
-+			mfn &= ~FOREIGN_FRAME_BIT;
-+			if (mfn & IDENTITY_FRAME_BIT) {
-+				mfn &= ~IDENTITY_FRAME_BIT;
-+				flags |= _PAGE_IOMAP;
-+			}
- 		}
--
- 		val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
- 	}
- 
--- 
-1.7.4
-
-
-From e6f1e7aabca9f9f99ef4fc1c752497b3efa7896c Mon Sep 17 00:00:00 2001
-From: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
-Date: Mon, 31 Jan 2011 15:18:10 +0000
-Subject: [PATCH 062/203] x86/mm/init: respect memblock reserved regions when destroying mappings
-
-x86/mm/init: respect memblock reserved regions when destroying mappings
-
-In init_memory_mapping we are destroying all the mappings between
-_brk_end and _end, no matter if some memory areas in that range have
-been reserved using memblock_x86_reserve_range.
-Besides if _end is not pmd aligned we might destroy the
-mappings for valid memory between _end and the following pmd.
-
-In order to avoid this problem, before clearing any pmds we check if the
-corresponding memory area has been reserved and we only destroy the
-mapping if it hasn't.
-
-We found this problem because under Xen we have a valid mapping at _end,
-and if _end is not pmd aligned the current code destroys the initial
-part of it.
-
-In practice this fix does not have any impact on native.
-
-Signed-off-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
----
- arch/x86/mm/init.c |   13 +++++++++++--
- 1 files changed, 11 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
-index 947f42a..66637bd 100644
---- a/arch/x86/mm/init.c
-+++ b/arch/x86/mm/init.c
-@@ -283,6 +283,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
- 	if (!after_bootmem && !start) {
- 		pud_t *pud;
- 		pmd_t *pmd;
-+		unsigned long addr;
-+		u64 size, memblock_addr;
- 
- 		mmu_cr4_features = read_cr4();
- 
-@@ -291,11 +293,18 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
- 		 * located on different 2M pages. cleanup_highmap(), however,
- 		 * can only consider _end when it runs, so destroy any
- 		 * mappings beyond _brk_end here.
-+		 * Respect memblock reserved regions.
- 		 */
- 		pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
- 		pmd = pmd_offset(pud, _brk_end - 1);
--		while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
--			pmd_clear(pmd);
-+		addr = (_brk_end + PMD_SIZE - 1) & PMD_MASK;
-+		while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1)) {
-+			memblock_addr = memblock_x86_find_in_range_size(__pa(addr),
-+					&size, PMD_SIZE);
-+			if (memblock_addr == (u64) __pa(addr) && size >= PMD_SIZE)
-+				pmd_clear(pmd);
-+			addr += PMD_SIZE;
-+		}
- 	}
- #endif
- 	__flush_tlb_all();
--- 
-1.7.4
-
-
-From 203b40c0915fc78e9ca1524c887623e7dc86422d Mon Sep 17 00:00:00 2001
-From: Bastian Blank <waldi at debian.org>
-Date: Thu, 29 Jul 2010 17:30:18 +0200
-Subject: [PATCH 063/203] xen: netback: Fix null-pointer access in netback_uevent
-
-The uevent method of Xen netback does not check if the the network
-device is already setup and tries to dereference a null-pointer if not.
-
-Signed-off-by: Bastian Blank <waldi at debian.org>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/xenbus.c |   10 ++--------
- 1 files changed, 2 insertions(+), 8 deletions(-)
-
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index 17ff5cf..1fec65a 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -154,17 +154,11 @@ fail:
-  */
- static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
- {
--	struct backend_info *be;
--	struct xen_netif *netif;
-+	struct backend_info *be = dev_get_drvdata(&xdev->dev);
- 	char *val;
- 
- 	DPRINTK("netback_uevent");
- 
--	be = dev_get_drvdata(&xdev->dev);
--	if (!be)
--		return 0;
--	netif = be->netif;
--
- 	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
- 	if (IS_ERR(val)) {
- 		int err = PTR_ERR(val);
-@@ -179,7 +173,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
- 		kfree(val);
- 	}
- 
--	if (add_uevent_var(env, "vif=%s", netif->dev->name))
-+	if (be && be->netif && add_uevent_var(env, "vif=%s", be->netif->dev->name))
- 		return -ENOMEM;
- 
- 	return 0;
--- 
-1.7.4
-
-
-From bee16269e5ba79706bdae554013cd14285b4f1ad Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 30 Jul 2010 15:16:47 +0100
-Subject: [PATCH 064/203] xen: netback: check if foreign pages are actually netback-created foreign pages.
-
-020ba906 "xen/netback: Multiple tasklets support." changed
-netbk_gop_frag_copy to attempt to lookup a pending_tx_info for any
-foreign page, regardless of whether the page was a netback-foreign
-page.
-
-In the case of non-netback pages this can lead to dereferencing a NULL
-src_pend->netif.
-
-Restore the behaviour of netif_page_index prior toa3031942
-"xen/netback: Introduce a new struct type page_ext" by performing
-tests to ensure that page is a netback page and extend the same checks
-to netif_page_group.
-
-Actually combine netif_page_{index,group} in to a single function
-since they are always called together and it saves duplicating all the
-checks.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Cc: Jeremy Fitzhardinge <jeremy at goop.org>
-Cc: Xu, Dongxiao <dongxiao.xu at intel.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |   56 ++++++++++++++++++++++++++++------------
- 1 files changed, 39 insertions(+), 17 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 911c85b..95df223 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -89,18 +89,37 @@ static inline void netif_set_page_ext(struct page *pg, unsigned int group,
- 	pg->mapping = ext.mapping;
- }
- 
--static inline unsigned int netif_page_group(const struct page *pg)
-+static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsigned int *_idx)
- {
- 	union page_ext ext = { .mapping = pg->mapping };
-+	struct xen_netbk *netbk;
-+	unsigned int group, idx;
- 
--	return ext.e.group - 1;
--}
-+	if (!PageForeign(pg))
-+		return 0;
- 
--static inline unsigned int netif_page_index(const struct page *pg)
--{
--	union page_ext ext = { .mapping = pg->mapping };
-+	group = ext.e.group - 1;
-+
-+	if (group < 0 || group >= xen_netbk_group_nr)
-+		return 0;
-+
-+	netbk = &xen_netbk[group];
-+
-+	if (netbk->mmap_pages == NULL)
-+		return 0;
- 
--	return ext.e.idx;
-+	idx = ext.e.idx;
-+
-+	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
-+		return 0;
++	*gopp = gop + 1;
++	return err;
++}
 +
-+	if (netbk->mmap_pages[idx] != pg)
-+		return 0;
++static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
++{
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	int nr_frags = shinfo->nr_frags;
++	int i;
 +
-+	*_group = group;
-+	*_idx = idx;
++	for (i = 0; i < nr_frags; i++) {
++		skb_frag_t *frag = shinfo->frags + i;
++		struct xen_netif_tx_request *txp;
++		unsigned long pending_idx;
 +
-+	return 1;
- }
- 
- /*
-@@ -386,8 +405,12 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- {
- 	struct gnttab_copy *copy_gop;
- 	struct netbk_rx_meta *meta;
--	int group = netif_page_group(page);
--	int idx = netif_page_index(page);
-+	/*
-+	 * These variables a used iff netif_get_page_ext returns true,
-+	 * in which case they are guaranteed to be initialized.
-+         */
-+	unsigned int uninitialized_var(group), uninitialized_var(idx);
-+	int foreign = netif_get_page_ext(page, &group, &idx);
- 	unsigned long bytes;
- 
- 	/* Data must not cross a page boundary. */
-@@ -445,7 +468,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- 
- 		copy_gop = npo->copy + npo->copy_prod++;
- 		copy_gop->flags = GNTCOPY_dest_gref;
--		if (PageForeign(page)) {
-+		if (foreign) {
- 			struct xen_netbk *netbk = &xen_netbk[group];
- 			struct pending_tx_info *src_pend;
- 
-@@ -1535,14 +1558,13 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
- 
- static void netif_page_release(struct page *page, unsigned int order)
- {
--	int group = netif_page_group(page);
--	int idx = netif_page_index(page);
--	struct xen_netbk *netbk = &xen_netbk[group];
-+	unsigned int group, idx;
-+	int foreign = netif_get_page_ext(page, &group, &idx);
++		pending_idx = (unsigned long)frag->page;
 +
-+	BUG_ON(!foreign);
- 	BUG_ON(order);
--	BUG_ON(group < 0 || group >= xen_netbk_group_nr);
--	BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
--	BUG_ON(netbk->mmap_pages[idx] != page);
--	netif_idx_release(netbk, idx);
++		txp = &netbk->pending_tx_info[pending_idx].req;
++		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
++		frag->size = txp->size;
++		frag->page_offset = txp->offset;
 +
-+	netif_idx_release(&xen_netbk[group], idx);
- }
- 
- irqreturn_t netif_be_int(int irq, void *dev_id)
--- 
-1.7.4
-
-
-From 3f33055e8af9d74e35670c52438e190d54ac5f9e Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 30 Jul 2010 15:16:46 +0100
-Subject: [PATCH 065/203] xen: netback: do not unleash netback threads until initialisation is complete
-
-Otherwise netbk_action_thread can reference &netbk->net_schedule_list
-(via tx_work_todo) before it is initialised. Until now it was zeroed
-which is probably safe but not exactly robust.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Cc: Jeremy Fitzhardinge <jeremy at goop.org>
-Cc: Xu, Dongxiao <dongxiao.xu at intel.com>
-Cc: Paul Durrant <Paul.Durrant at citrix.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |    4 +++-
- 1 files changed, 3 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 95df223..2646383 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1782,7 +1782,6 @@ static int __init netback_init(void)
- 
- 			if (!IS_ERR(netbk->kthread.task)) {
- 				kthread_bind(netbk->kthread.task, group);
--				wake_up_process(netbk->kthread.task);
- 			} else {
- 				printk(KERN_ALERT
- 					"kthread_run() fails at netback\n");
-@@ -1808,6 +1807,9 @@ static int __init netback_init(void)
- 		spin_lock_init(&netbk->net_schedule_list_lock);
- 
- 		atomic_set(&netbk->netfront_count, 0);
-+
-+		if (MODPARM_netback_kthread)
-+			wake_up_process(netbk->kthread.task);
- 	}
- 
- 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
--- 
-1.7.4
-
-
-From 329a17f3dbb9c127d431d09df0ed63ec28b8c702 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ijc at hellion.org.uk>
-Date: Thu, 2 Sep 2010 14:36:40 +0100
-Subject: [PATCH 066/203] xen: netback: save interrupt state in add_to_net_schedule_list_tail
-
-add_to_net_schedule_list_tail is called from both hard interrupt context
-(add_to_net_schedule_list_tail) and soft interrupt/process context
-(netif_schedule_work) so use the interrupt state saving spinlock
-variants.
-
-Fixes:
-        ------------[ cut here ]------------
-        WARNING: at kernel/lockdep.c:2323 trace_hardirqs_on_caller+0xef/0x1a0()
-        Hardware name: PowerEdge 860
-        Modules linked in: rtc_cmos rtc_core rtc_lib
-        Pid: 16, comm: xenwatch Not tainted 2.6.32.18-x86_32p-xen0-00850-ge6b9b2c #98
-        Call Trace:
-         [<c103951c>] warn_slowpath_common+0x6c/0xc0
-         [<c1039585>] warn_slowpath_null+0x15/0x20
-         [<c105f60f>] trace_hardirqs_on_caller+0xef/0x1a0
-         [<c105f6cb>] trace_hardirqs_on+0xb/0x10
-         [<c136cc72>] _spin_unlock_irq+0x22/0x40
-         [<c11ab9ef>] add_to_net_schedule_list_tail+0x5f/0xb0
-         [<c11aba6b>] netif_be_int+0x2b/0x120
-         [<c106dd8e>] handle_IRQ_event+0x2e/0xe0
-         [<c106f98e>] handle_level_irq+0x6e/0xf0
-         [<c1197cdf>] __xen_evtchn_do_upcall+0x16f/0x190
-         [<c11981b8>] xen_evtchn_do_upcall+0x28/0x40
-         [<c100b487>] xen_do_upcall+0x7/0xc
-         [<c119bcf9>] xs_talkv+0x59/0x1a0
-         [<c119bf6a>] xs_single+0x3a/0x50
-         [<c119c6f9>] xenbus_read+0x39/0x60
-         [<c11adf77>] frontend_changed+0x3e7/0x6a0
-         [<c119d35a>] xenbus_otherend_changed+0x8a/0xa0
-         [<c119d572>] frontend_changed+0x12/0x20
-         [<c119b9dc>] xenwatch_thread+0x7c/0x140
-         [<c104ea74>] kthread+0x74/0x80
-         [<c100b433>] kernel_thread_helper+0x7/0x10
-        ---[ end trace 48d73949a8e0909a ]---
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/netback.c |    6 ++++--
- 1 files changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 2646383..1d080f6 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -787,17 +787,19 @@ static void remove_from_net_schedule_list(struct xen_netif *netif)
- 
- static void add_to_net_schedule_list_tail(struct xen_netif *netif)
- {
-+	unsigned long flags;
++		skb->len += txp->size;
++		skb->data_len += txp->size;
++		skb->truesize += txp->size;
 +
- 	struct xen_netbk *netbk = &xen_netbk[netif->group];
- 	if (__on_net_schedule_list(netif))
- 		return;
- 
--	spin_lock_irq(&netbk->net_schedule_list_lock);
-+	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
- 	if (!__on_net_schedule_list(netif) &&
- 	    likely(netif_schedulable(netif))) {
- 		list_add_tail(&netif->list, &netbk->net_schedule_list);
- 		netif_get(netif);
- 	}
--	spin_unlock_irq(&netbk->net_schedule_list_lock);
-+	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
- }
- 
- void netif_schedule_work(struct xen_netif *netif)
--- 
-1.7.4
-
-
-From 7edc1ededaaa953877d2a052886db4ea2f69effe Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 8 Oct 2010 17:11:51 +0100
-Subject: [PATCH 067/203] xen: netback: increase size of rx_meta array.
-
-We can end up needing as many of these as we need grant copy operations so
-increase the array size for the same reason.
-
-Crash observed on XenServer:
-kernel: ------------[ cut here ]------------
-kernel: kernel BUG at drivers/xen/netback/netback.c:834!
-kernel: invalid opcode: 0000 [#1] SMP
-kernel: last sysfs file: /sys/devices/xen-backend/vbd-10-768/statistics/rd_usecs
-kernel: Process netback (pid: 1413, ti=ec8a4000 task=ed0a6b70 task.ti=ec8a4000)
-kernel: Stack: 00000000 00000612 00000001 00000000 00020000 00000000 ecfbe000 00000000
-kernel:        ec8a5f80 ec8a5f98 ec8a5fac 00000000 c0537220 c0539220 00000000 c0534220
-kernel:        cd7afaa0 cd7afaa0 0000000c 00000014 062de396 00000001 00000001 00000014
-kernel: Call Trace:
-kernel:  [<c0285f10>] ? netbk_action_thread+0x0/0x1fe0
-kernel:  [<c013daf2>] ? kthread+0x42/0x70
-kernel:  [<c013dab0>] ? kthread+0x0/0x70
-kernel:  [<c010569b>] ? kernel_thread_helper+0x7/0x10
-kernel:  =======================
-kernel: Code: 00 00 c7 42 08 20 82 53 c0 8b 85 e4 fe ff ff c7 42 10 00 00 00 00 \
-              c7 42 14 f0 7f 00 00 89 42 0c 8b 8d ec fe ff ff e9 3e e9 ff ff <0f> \
-              0b eb fe 0f 0b eb fe 0f 0b eb fe 0f 0b eb fe 31 c0 e8 bf 31
-kernel: EIP: [<c028790a>] netbk_action_thread+0x19fa/0x1fe0 SS:ESP 0069:ec8a5d98
-
-Corresponding to
-	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/netback/common.h |    2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index a5f3759..ce0041a 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -319,7 +319,7 @@ struct xen_netbk {
- 	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
- 	unsigned char rx_notify[NR_IRQS];
- 	u16 notify_list[NET_RX_RING_SIZE];
--	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
-+	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
- };
- 
- extern struct xen_netbk *xen_netbk;
--- 
-1.7.4
-
-
-From 4fbb266d319fcb3349eff2f9c42cdee51c5fcb5f Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 15 Oct 2010 13:41:44 +0100
-Subject: [PATCH 068/203] xen: netback: take net_schedule_list_lock when removing entry from net_schedule_list
-
-There is a race in net_tx_build_mops between checking if
-net_schedule_list is empty and actually dequeuing the first entry on
-the list. If another thread dequeues the only entry on the list during
-this window we crash because list_first_entry expects a non-empty
-list, like so:
-
-[ 0.133127] BUG: unable to handle kernel NULL pointer dereference at 00000008
-[ 0.133132] IP: [<c12aae71>] net_tx_build_mops+0x91/0xa70
-[ 0.133142] *pdpt = 0000000000000000 *pde = 000000000000000f
-[ 0.133147] Oops: 0002 1 SMP
-[ 0.133150] last sysfs file:
-[ 0.133152] Modules linked in:
-[ 0.133154]
-[ 0.133156] Pid: 55, comm: netback/1 Not tainted (2.6.32.12-0.7.1 #1) Latitude E4310
-[ 0.133158] EIP: 0061:[<c12aae71>] EFLAGS: 00010202 CPU: 1
-[ 0.133161] EIP is at net_tx_build_mops+0x91/0xa70
-[ 0.133163] EAX: 00000012 EBX: 00000008 ECX: e112b734 EDX: e112b76c
-[ 0.133165] ESI: ffffff30 EDI: 00000000 EBP: e112b734 ESP: dfe85d98
-[ 0.133167] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069
-[ 0.133169] Process netback/1 (pid: 55, ti=dfe84000 task=dfe83340 task.ti=dfe84000)
-[ 0.133170] Stack:
-[ 0.133172] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
-[ 0.133177] <0> 00000000 e112b734 e112ec08 e112b7f8 e112ec08 ffffff30 00000000 00000000
-[ 0.133186] <0> 00000000 00000000 00000000 e112b76c dfe85df4 00000001 00000000 aaaaaaaa
-[ 0.133193] Call Trace:
-[ 0.133202] [<c12abc7f>] net_tx_action+0x42f/0xac0
-[ 0.133206] [<c12ac37a>] netbk_action_thread+0x6a/0x1b0
-[ 0.133212] [<c1057444>] kthread+0x74/0x80
-[ 0.133218] [<c10049d7>] kernel_thread_helper+0x7/0x10
-[ 0.133220] Code: c4 00 00 00 89 74 24 58 39 74 24 2c 0f 84 c7 06 00 00 8b 74 24 \
-                  58 8b 5c 24 58 81 ee d0 00 00 00 83 c3 08 89 74 24 34 8b 7c 24 \
-             58 <f0> ff 47 08 89 f0 e8 b4 f9 ff ff 8b 46 2c 8b 56 34 89 44 24 5c
-[ 0.133261] EIP: [<c12aae71>] net_tx_build_mops+0x91/0xa70 SS:ESP 0069:dfe85d98
-[ 0.133265] CR2: 0000000000000008
-[ 0.133274] --[ end trace e2c5c15f54bd9d93 ]--
-
-Therefore after the initial lock free check for an empty list check
-again with the lock held before dequeueing the entry.
-
-Based on a patch by Tomasz Wroblewski.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Cc: Tomasz Wroblewski <tomasz.wroblewski at citrix.com>
----
- drivers/xen/netback/netback.c |   35 ++++++++++++++++++++++++++++-------
- 1 files changed, 28 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 1d080f6..3b03435 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -774,15 +774,34 @@ static int __on_net_schedule_list(struct xen_netif *netif)
- 	return !list_empty(&netif->list);
- }
- 
-+/* Must be called with net_schedule_list_lock held */
- static void remove_from_net_schedule_list(struct xen_netif *netif)
- {
--	struct xen_netbk *netbk = &xen_netbk[netif->group];
--	spin_lock_irq(&netbk->net_schedule_list_lock);
- 	if (likely(__on_net_schedule_list(netif))) {
- 		list_del_init(&netif->list);
- 		netif_put(netif);
- 	}
++		/* Take an extra reference to offset xen_netbk_idx_release */
++		get_page(netbk->mmap_pages[pending_idx]);
++		xen_netbk_idx_release(netbk, pending_idx);
++	}
 +}
 +
-+static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
++static int xen_netbk_get_extras(struct xenvif *vif,
++				struct xen_netif_extra_info *extras,
++				int work_to_do)
 +{
-+	struct xen_netif *netif = NULL;
++	struct xen_netif_extra_info extra;
++	RING_IDX cons = vif->tx.req_cons;
 +
-+	spin_lock_irq(&netbk->net_schedule_list_lock);
-+	if (list_empty(&netbk->net_schedule_list))
-+		goto out;
++	do {
++		if (unlikely(work_to_do-- <= 0)) {
++			netdev_dbg(vif->dev, "Missing extra info\n");
++			return -EBADR;
++		}
 +
-+	netif = list_first_entry(&netbk->net_schedule_list,
-+				 struct xen_netif, list);
-+	if (!netif)
-+		goto out;
++		memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
++		       sizeof(extra));
++		if (unlikely(!extra.type ||
++			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
++			vif->tx.req_cons = ++cons;
++			netdev_dbg(vif->dev,
++				   "Invalid extra type: %d\n", extra.type);
++			return -EINVAL;
++		}
 +
-+	netif_get(netif);
++		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
++		vif->tx.req_cons = ++cons;
++	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
 +
-+	remove_from_net_schedule_list(netif);
-+out:
- 	spin_unlock_irq(&netbk->net_schedule_list_lock);
-+	return netif;
- }
- 
- static void add_to_net_schedule_list_tail(struct xen_netif *netif)
-@@ -817,7 +836,10 @@ void netif_schedule_work(struct xen_netif *netif)
- 
- void netif_deschedule_work(struct xen_netif *netif)
- {
-+	struct xen_netbk *netbk = &xen_netbk[netif->group];
-+	spin_lock_irq(&netbk->net_schedule_list_lock);
- 	remove_from_net_schedule_list(netif);
-+	spin_unlock_irq(&netbk->net_schedule_list_lock);
- }
- 
- 
-@@ -1301,12 +1323,11 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
- 		int work_to_do;
- 		unsigned int data_len;
- 		pending_ring_idx_t index;
--	
++	return work_to_do;
++}
 +
- 		/* Get a netif from the list with work to do. */
--		netif = list_first_entry(&netbk->net_schedule_list,
--				struct xen_netif, list);
--		netif_get(netif);
--		remove_from_net_schedule_list(netif);
-+		netif = poll_net_schedule_list(netbk);
-+		if (!netif)
-+			continue;
- 
- 		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
- 		if (!work_to_do) {
--- 
-1.7.4
-
-
-From d58a964195f593879cc0160ac0c6f31bc95cdc6e Mon Sep 17 00:00:00 2001
-From: Paul Durrant <paul.durrant at citrix.com>
-Date: Wed, 15 Dec 2010 09:48:12 +0000
-Subject: [PATCH 069/203] xen: netback: Re-define PKT_PROT_LEN to be bigger.
-
-Re-define PKT_PROT_LEN to be big enough to handle maximal IPv4 and TCP options and phrase
-the definition so that it's reasonably obvious that's what it's for.
-
-Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |   14 +++++++++-----
- 1 files changed, 9 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 3b03435..9bbd230 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -36,9 +36,11 @@
- 
- #include "common.h"
- 
--#include <linux/tcp.h>
--#include <linux/udp.h>
- #include <linux/kthread.h>
-+#include <linux/if_vlan.h>
-+#include <linux/udp.h>
++static int netbk_set_skb_gso(struct xenvif *vif,
++			     struct sk_buff *skb,
++			     struct xen_netif_extra_info *gso)
++{
++	if (!gso->u.gso.size) {
++		netdev_dbg(vif->dev, "GSO size must not be zero.\n");
++		return -EINVAL;
++	}
 +
-+#include <net/tcp.h>
- 
- #include <xen/balloon.h>
- #include <xen/events.h>
-@@ -125,10 +127,12 @@ static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsi
- /*
-  * This is the amount of packet we copy rather than map, so that the
-  * guest can't fiddle with the contents of the headers while we do
-- * packet processing on them (netfilter, routing, etc). 72 is enough
-- * to cover TCP+IP headers including options.
-+ * packet processing on them (netfilter, routing, etc).
-  */
--#define PKT_PROT_LEN 72
-+#define PKT_PROT_LEN    (ETH_HLEN + \
-+			 VLAN_HLEN + \
-+			 sizeof(struct iphdr) + MAX_IPOPTLEN + \
-+			 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
- 
- static inline pending_ring_idx_t pending_index(unsigned i)
- {
--- 
-1.7.4
-
-
-From 7120999e00f068383bc16d03cceae8d6d511c43a Mon Sep 17 00:00:00 2001
-From: Paul Durrant <paul.durrant at citrix.com>
-Date: Tue, 14 Dec 2010 20:35:19 +0000
-Subject: [PATCH 070/203] xen: netback: Don't count packets we don't actually receive.
-
-Make sure we only bump rx_packets when we're definitely going to call netif_rx_ni().
-
-Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |    6 +++---
- 1 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 9bbd230..78d3509 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1512,9 +1512,6 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 		skb->dev      = netif->dev;
- 		skb->protocol = eth_type_trans(skb, skb->dev);
- 
--		netif->stats.rx_bytes += skb->len;
--		netif->stats.rx_packets++;
--
- 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
- 			if (skb_checksum_setup(skb)) {
- 				DPRINTK("Can't setup checksum in net_tx_action\n");
-@@ -1530,6 +1527,9 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 			continue;
- 		}
- 
-+		netif->stats.rx_bytes += skb->len;
-+		netif->stats.rx_packets++;
++	/* Currently only TCPv4 S.O. is supported. */
++	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
++		netdev_dbg(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
++		return -EINVAL;
++	}
 +
- 		netif_rx_ni(skb);
- 		netif->dev->last_rx = jiffies;
- 	}
--- 
-1.7.4
-
-
-From 3c1f462cb05ad5eb5bb8e65cf6feb9a4e4363ebf Mon Sep 17 00:00:00 2001
-From: Paul Durrant <paul.durrant at citrix.com>
-Date: Tue, 14 Dec 2010 20:35:20 +0000
-Subject: [PATCH 071/203] xen: netback: Remove the 500ms timeout to restart the netif queue.
-
-It is generally unhelpful as it results in a massive tail-drop should a guest become
-unresponsive for a relatively short period of time and no back-pressure (other than
-that caused by a higher layer protocol) is applied to the sender.
-
-Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |   20 +-------------------
- 1 files changed, 1 insertions(+), 19 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 78d3509..2caa5f8 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -275,13 +275,6 @@ static inline int netbk_queue_full(struct xen_netif *netif)
- 	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
- }
- 
--static void tx_queue_callback(unsigned long data)
--{
--	struct xen_netif *netif = (struct xen_netif *)data;
--	if (netif_schedulable(netif))
--		netif_wake_queue(netif->dev);
--}
--
- /* Figure out how many ring slots we're going to need to send @skb to
-    the guest. */
- static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
-@@ -364,19 +357,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 		netif->rx.sring->req_event = netif->rx_req_cons_peek +
- 			netbk_max_required_rx_slots(netif);
- 		mb(); /* request notification /then/ check & stop the queue */
--		if (netbk_queue_full(netif)) {
-+		if (netbk_queue_full(netif))
- 			netif_stop_queue(dev);
--			/*
--			 * Schedule 500ms timeout to restart the queue, thus
--			 * ensuring that an inactive queue will be drained.
--			 * Packets will be immediately be dropped until more
--			 * receive buffers become available (see
--			 * netbk_queue_full() check above).
--			 */
--			netif->tx_queue_timeout.data = (unsigned long)netif;
--			netif->tx_queue_timeout.function = tx_queue_callback;
--			mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
--		}
- 	}
- 	skb_queue_tail(&netbk->rx_queue, skb);
- 
--- 
-1.7.4
-
-
-From d70d4906c2736dadc5c287caa36c5880947f8688 Mon Sep 17 00:00:00 2001
-From: Paul Durrant <paul.durrant at citrix.com>
-Date: Tue, 14 Dec 2010 20:35:21 +0000
-Subject: [PATCH 072/203] xen: netback: Add a missing test to tx_work_todo.
-
-Adda test so that, when netback is using worker threads, net_tx_action()
-gets called in a timely manner when the pending_inuse list is populated.
-
-Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |    4 ++++
- 1 files changed, 4 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 2caa5f8..dd52d01 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1695,6 +1695,10 @@ static inline int tx_work_todo(struct xen_netbk *netbk)
- 	if (netbk->dealloc_cons != netbk->dealloc_prod)
- 		return 1;
- 
-+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-+	    !list_empty(&netbk->pending_inuse_head))
-+		return 1;
++	skb_shinfo(skb)->gso_size = gso->u.gso.size;
++	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 +
- 	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
- 			!list_empty(&netbk->net_schedule_list))
- 		return 1;
--- 
-1.7.4
-
-
-From 8811a0cf07ccd1b9f40eaae76de3aa2792c9fd28 Mon Sep 17 00:00:00 2001
-From: Paul Durrant <paul.durrant at citrix.com>
-Date: Tue, 14 Dec 2010 20:35:22 +0000
-Subject: [PATCH 073/203] xen: netback: Re-factor net_tx_action_dealloc() slightly.
-
-There is no need for processing of the pending_inuse list to be within the dealloc_prod/cons
-loop.
-
-Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |   26 ++++++++++++++------------
- 1 files changed, 14 insertions(+), 12 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index dd52d01..53b3a0e 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -906,11 +906,20 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
- 			gop++;
- 		}
- 
--		if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
--		    list_empty(&netbk->pending_inuse_head))
--			break;
-+	} while (dp != netbk->dealloc_prod);
++	/* Header must be checked, and gso_segs computed. */
++	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
++	skb_shinfo(skb)->gso_segs = 0;
 +
-+	netbk->dealloc_cons = dc;
- 
--		/* Copy any entries that have been pending for too long. */
-+	ret = HYPERVISOR_grant_table_op(
-+		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
-+		gop - netbk->tx_unmap_ops);
-+	BUG_ON(ret);
-+
-+	/*
-+	 * Copy any entries that have been pending for too long
-+	 */
-+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-+	    !list_empty(&netbk->pending_inuse_head)) {
- 		list_for_each_entry_safe(inuse, n,
- 				&netbk->pending_inuse_head, list) {
- 			struct pending_tx_info *pending_tx_info;
-@@ -936,14 +945,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
- 
- 			break;
- 		}
--	} while (dp != netbk->dealloc_prod);
--
--	netbk->dealloc_cons = dc;
--
--	ret = HYPERVISOR_grant_table_op(
--		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
--		gop - netbk->tx_unmap_ops);
--	BUG_ON(ret);
-+	}
- 
- 	list_for_each_entry_safe(inuse, n, &list, list) {
- 		struct pending_tx_info *pending_tx_info;
--- 
-1.7.4
-
-
-From 9481475c92f00d15100d0a5083ef338f1b528506 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Wed, 5 Jan 2011 09:57:37 +0000
-Subject: [PATCH 074/203] xen: netback: Drop GSO SKBs which do not have csum_blank.
-
-The Linux network stack expects all GSO SKBs to have ip_summed ==
-CHECKSUM_PARTIAL (which implies that the frame contains a partial
-checksum) and the Xen network ring protocol similarly expects an SKB
-which has GSO set to also have NETRX_csum_blank (which also implies a
-partial checksum). Therefore drop such frames on receive otherwise
-they will trigger the warning in skb_gso_segment.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Cc: Jeremy Fitzhardinge <jeremy at goop.org>
-Cc: xen-devel at lists.xensource.com
----
- drivers/xen/netback/netback.c |    4 ++++
- 1 files changed, 4 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 53b3a0e..2766b93 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1502,6 +1502,10 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 				kfree_skb(skb);
- 				continue;
- 			}
-+		} else if (skb_is_gso(skb)) {
-+			DPRINTK("Dropping GSO but not CHECKSUM_PARTIAL skb\n");
-+			kfree_skb(skb);
-+			continue;
- 		}
- 
- 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
--- 
-1.7.4
-
-
-From a45f9a0889210fba5c73994ec2fa1c36f82a435f Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Wed, 19 Jan 2011 12:43:38 +0000
-Subject: [PATCH 075/203] xen: netback: completely remove tx_queue_timer
-
-"xen: netback: Remove the 500ms timeout to restart the netif queue." missed
-removing the timer initialisation.
-
-Also remove the related comment which has been obsolete since the default for
-MODPARM_copy_skb was switched to true some time ago.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Cc: Paul Durrant <Paul.Durrant at citrix.com>
----
- drivers/xen/netback/common.h    |    3 ---
- drivers/xen/netback/interface.c |   13 +------------
- 2 files changed, 1 insertions(+), 15 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index ce0041a..7e03a46 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -99,9 +99,6 @@ struct xen_netif {
- 	unsigned long   remaining_credit;
- 	struct timer_list credit_timeout;
- 
--	/* Enforce draining of the transmit queue. */
--	struct timer_list tx_queue_timeout;
--
- 	/* Statistics */
- 	int nr_copied_skbs;
- 
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index 2e8508a..efdc21c 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -41,15 +41,7 @@
-  * Module parameter 'queue_length':
-  *
-  * Enables queuing in the network stack when a client has run out of receive
-- * descriptors. Although this feature can improve receive bandwidth by avoiding
-- * packet loss, it can also result in packets sitting in the 'tx_queue' for
-- * unbounded time. This is bad if those packets hold onto foreign resources.
-- * For example, consider a packet that holds onto resources belonging to the
-- * guest for which it is queued (e.g., packet received on vif1.0, destined for
-- * vif1.1 which is not activated in the guest): in this situation the guest
-- * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
-- * run a timer (tx_queue_timeout) to drain the queue when the interface is
-- * blocked.
-+ * descriptors.
-  */
- static unsigned long netbk_queue_length = 32;
- module_param_named(queue_length, netbk_queue_length, ulong, 0644);
-@@ -295,8 +287,6 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
- 	/* Initialize 'expires' now: it's used to track the credit window. */
- 	netif->credit_timeout.expires = jiffies;
- 
--	init_timer(&netif->tx_queue_timeout);
--
- 	dev->netdev_ops	= &netback_ops;
- 	netif_set_features(netif);
- 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
-@@ -458,7 +448,6 @@ void netif_disconnect(struct xen_netif *netif)
- 	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
- 
- 	del_timer_sync(&netif->credit_timeout);
--	del_timer_sync(&netif->tx_queue_timeout);
- 
- 	if (netif->irq)
- 		unbind_from_irqhandler(netif->irq, netif);
--- 
-1.7.4
-
-
-From 2794a483aeafab1c0765a0478b760978b361bad9 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 28 Jan 2011 13:11:45 +0000
-Subject: [PATCH 076/203] Revert "xen: netback: Drop GSO SKBs which do not have csum_blank."
-
-This reverts commit 082386b4a581b2ba5a125cc8944a57ceb33ff37c.
-
-Based on discussion surrounding the netfront equivalent fix
-(http://patchwork.ozlabs.org/patch/80389/) this issue will be fixed
-differently.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |    4 ----
- 1 files changed, 0 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 2766b93..53b3a0e 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1502,10 +1502,6 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 				kfree_skb(skb);
- 				continue;
- 			}
--		} else if (skb_is_gso(skb)) {
--			DPRINTK("Dropping GSO but not CHECKSUM_PARTIAL skb\n");
--			kfree_skb(skb);
--			continue;
- 		}
- 
- 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
--- 
-1.7.4
-
-
-From 01cd16474ee82db4d958c9cf8e481be897fa4ca6 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Thu, 27 Jan 2011 15:43:46 +0000
-Subject: [PATCH 077/203] xen: netback: handle incoming GSO SKBs which are not CHECKSUM_PARTIAL
-
-The Linux network stack expects all GSO SKBs to have ip_summed ==
-CHECKSUM_PARTIAL (which implies that the frame contains a partial
-checksum) and the Xen network ring protocol similarly expects an SKB
-which has GSO set to also have NETRX_csum_blank (which also implies a
-partial checksum).
-
-However there have been cases of buggy guests which mark a frame as
-GSO but do not set csum_blank. If we detect that we a receiving such a
-frame (which manifests as ip_summed != PARTIAL && skb_is_gso) then
-force the SKB to partial and recalculate the checksum, since we cannot
-rely on the peer having done so if they have not set csum_blank.
-
-Add an ethtool stat to track occurances of this event.
-
-A corresponding fix was made to netfront in e0ce4af920eb028f38bfd680b1d733f4c7a0b7cf.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Cc: Jeremy Fitzhardinge <jeremy at goop.org>
-Cc: xen-devel at lists.xensource.com
----
- drivers/xen/netback/common.h    |    1 +
- drivers/xen/netback/interface.c |    9 +++++++-
- drivers/xen/netback/netback.c   |   43 ++++++++++++++++++++++++++++++++------
- 3 files changed, 45 insertions(+), 8 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index 7e03a46..f660eb5 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -101,6 +101,7 @@ struct xen_netif {
- 
- 	/* Statistics */
- 	int nr_copied_skbs;
-+	int rx_gso_checksum_fixup;
- 
- 	/* Miscellaneous private stuff. */
- 	struct list_head list;  /* scheduling list */
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index efdc21c..d3af68e 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -189,7 +189,14 @@ static const struct netif_stat {
- 	char name[ETH_GSTRING_LEN];
- 	u16 offset;
- } netbk_stats[] = {
--	{ "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
-+	{
-+		"copied_skbs",
-+		offsetof(struct xen_netif, nr_copied_skbs)
-+	},
-+	{
-+		"rx_gso_checksum_fixup",
-+		offsetof(struct xen_netif, rx_gso_checksum_fixup)
-+	},
- };
- 
- static int netbk_get_sset_count(struct net_device *dev, int string_set)
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 53b3a0e..8189199 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1222,11 +1222,28 @@ static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *g
- 	return 0;
- }
- 
--static int skb_checksum_setup(struct sk_buff *skb)
-+static int checksum_setup(struct xen_netif *netif, struct sk_buff *skb)
- {
- 	struct iphdr *iph;
- 	unsigned char *th;
- 	int err = -EPROTO;
-+	int recalculate_partial_csum = 0;
++	return 0;
++}
++
++static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
++{
++	struct iphdr *iph;
++	unsigned char *th;
++	int err = -EPROTO;
++	int recalculate_partial_csum = 0;
 +
 +	/*
 +	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
@@ -11251,7 +3887,7 @@ index 53b3a0e..8189199 100644
 +	 * recalculate the partial checksum.
 +	 */
 +	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
-+		netif->rx_gso_checksum_fixup++;
++		vif->rx_gso_checksum_fixup++;
 +		skb->ip_summed = CHECKSUM_PARTIAL;
 +		recalculate_partial_csum = 1;
 +	}
@@ -11259,13 +3895,19 @@ index 53b3a0e..8189199 100644
 +	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
 +	if (skb->ip_summed != CHECKSUM_PARTIAL)
 +		return 0;
- 
- 	if (skb->protocol != htons(ETH_P_IP))
- 		goto out;
-@@ -1240,9 +1257,23 @@ static int skb_checksum_setup(struct sk_buff *skb)
- 	switch (iph->protocol) {
- 	case IPPROTO_TCP:
- 		skb->csum_offset = offsetof(struct tcphdr, check);
++
++	if (skb->protocol != htons(ETH_P_IP))
++		goto out;
++
++	iph = (void *)skb->data;
++	th = skb->data + 4 * iph->ihl;
++	if (th >= skb_tail_pointer(skb))
++		goto out;
++
++	skb->csum_start = th - skb->head;
++	switch (iph->protocol) {
++	case IPPROTO_TCP:
++		skb->csum_offset = offsetof(struct tcphdr, check);
 +
 +		if (recalculate_partial_csum) {
 +			struct tcphdr *tcph = (struct tcphdr *)th;
@@ -11273,9 +3915,9 @@ index 53b3a0e..8189199 100644
 +							 skb->len - iph->ihl*4,
 +							 IPPROTO_TCP, 0);
 +		}
- 		break;
- 	case IPPROTO_UDP:
- 		skb->csum_offset = offsetof(struct udphdr, check);
++		break;
++	case IPPROTO_UDP:
++		skb->csum_offset = offsetof(struct udphdr, check);
 +
 +		if (recalculate_partial_csum) {
 +			struct udphdr *udph = (struct udphdr *)th;
@@ -11283,27370 +3925,9392 @@ index 53b3a0e..8189199 100644
 +							 skb->len - iph->ihl*4,
 +							 IPPROTO_UDP, 0);
 +		}
- 		break;
- 	default:
- 		if (net_ratelimit())
-@@ -1496,12 +1527,10 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 		skb->dev      = netif->dev;
- 		skb->protocol = eth_type_trans(skb, skb->dev);
- 
--		if (skb->ip_summed == CHECKSUM_PARTIAL) {
--			if (skb_checksum_setup(skb)) {
--				DPRINTK("Can't setup checksum in net_tx_action\n");
--				kfree_skb(skb);
--				continue;
--			}
-+		if (checksum_setup(netif, skb)) {
-+			DPRINTK("Can't setup checksum in net_tx_action\n");
-+			kfree_skb(skb);
-+			continue;
- 		}
- 
- 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
--- 
-1.7.4
-
-
-From 929c2cd7541a48c9ab64af1b25b3e53ed396e0c5 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 18 Jan 2011 11:37:12 +0000
-Subject: [PATCH 078/203] xen: netback: rationalise types used in count_skb_slots
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |   10 +++-------
- 1 files changed, 3 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 8189199..22c1fa5 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -277,14 +277,10 @@ static inline int netbk_queue_full(struct xen_netif *netif)
- 
- /* Figure out how many ring slots we're going to need to send @skb to
-    the guest. */
--static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
-+static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
- {
--	unsigned count;
--	unsigned copy_off;
--	unsigned i;
--
--	copy_off = 0;
--	count = 1;
-+	unsigned int count = 1;
-+	int i, copy_off = 0;
- 
- 	BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
- 
--- 
-1.7.4
-
-
-From 0ded854cc2e58205d857df3d96ef6f9a108a4a66 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 18 Jan 2011 11:21:35 +0000
-Subject: [PATCH 079/203] xen: netback: refactor logic for moving to a new receive buffer.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |   80 ++++++++++++++++++++++++-----------------
- 1 files changed, 47 insertions(+), 33 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 22c1fa5..909e0ef 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -275,8 +275,51 @@ static inline int netbk_queue_full(struct xen_netif *netif)
- 	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
- }
- 
--/* Figure out how many ring slots we're going to need to send @skb to
--   the guest. */
-+/*
-+ * Returns true if we should start a new receive buffer instead of
-+ * adding 'size' bytes to a buffer which currently contains 'offset'
-+ * bytes.
-+ */
-+static bool start_new_rx_buffer(int offset, unsigned long size, int head)
++		break;
++	default:
++		if (net_ratelimit())
++			netdev_err(vif->dev,
++				   "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
++				   iph->protocol);
++		goto out;
++	}
++
++	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
++		goto out;
++
++	err = 0;
++
++out:
++	return err;
++}
++
++static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
 +{
-+	/* simple case: we have completely filled the current buffer. */
-+	if (offset == MAX_BUFFER_OFFSET)
++	unsigned long now = jiffies;
++	unsigned long next_credit =
++		vif->credit_timeout.expires +
++		msecs_to_jiffies(vif->credit_usec / 1000);
++
++	/* Timer could already be pending in rare cases. */
++	if (timer_pending(&vif->credit_timeout))
 +		return true;
 +
-+	/*
-+	 * complex case: start a fresh buffer if the current frag
-+	 * would overflow the current buffer but only if:
-+	 *     (i)   this frag would fit completely in the next buffer
-+	 * and (ii)  there is already some data in the current buffer
-+	 * and (iii) this is not the head buffer.
-+	 *
-+	 * Where:
-+	 * - (i) stops us splitting a frag into two copies
-+	 *   unless the frag is too large for a single buffer.
-+	 * - (ii) stops us from leaving a buffer pointlessly empty.
-+	 * - (iii) stops us leaving the first buffer
-+	 *   empty. Strictly speaking this is already covered
-+	 *   by (ii) but is explicitly checked because
-+	 *   netfront relies on the first buffer being
-+	 *   non-empty and can crash otherwise.
-+	 *
-+	 * This means we will effectively linearise small
-+	 * frags but do not needlessly split large buffers
-+	 * into multiple copies tend to give large frags their
-+	 * own buffers as before.
-+	 */
-+	if ((offset + size > MAX_BUFFER_OFFSET) &&
-+	    (size <= MAX_BUFFER_OFFSET) && offset && !head)
++	/* Passed the point where we can replenish credit? */
++	if (time_after_eq(now, next_credit)) {
++		vif->credit_timeout.expires = now;
++		tx_add_credit(vif);
++	}
++
++	/* Still too big to send right now? Set a callback. */
++	if (size > vif->remaining_credit) {
++		vif->credit_timeout.data     =
++			(unsigned long)vif;
++		vif->credit_timeout.function =
++			tx_credit_callback;
++		mod_timer(&vif->credit_timeout,
++			  next_credit);
++
 +		return true;
++	}
 +
 +	return false;
 +}
 +
-+/*
-+ * Figure out how many ring slots we're going to need to send @skb to
-+ * the guest. This function is essentially a dry run of
-+ * netbk_gop_frag_copy.
-+ */
- static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
- {
- 	unsigned int count = 1;
-@@ -295,9 +338,7 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif
- 		while (size > 0) {
- 			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
- 
--			/* These checks are the same as in netbk_gop_frag_copy */
--			if (copy_off == MAX_BUFFER_OFFSET
--			    || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) {
-+			if (start_new_rx_buffer(copy_off, size, 0)) {
- 				count++;
- 				copy_off = 0;
- 			}
-@@ -403,34 +444,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- 	while (size > 0) {
- 		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
- 
--		/*
--		 * Move to a new receive buffer if:
--		 *
--		 * simple case: we have completely filled the current buffer.
--		 *
--		 * complex case: the current frag would overflow
--		 * the current buffer but only if:
--		 *     (i)   this frag would fit completely in the next buffer
--		 * and (ii)  there is already some data in the current buffer
--		 * and (iii) this is not the head buffer.
--		 *
--		 * Where:
--		 * - (i) stops us splitting a frag into two copies
--		 *   unless the frag is too large for a single buffer.
--		 * - (ii) stops us from leaving a buffer pointlessly empty.
--		 * - (iii) stops us leaving the first buffer
--		 *   empty. Strictly speaking this is already covered
--		 *   by (ii) but is explicitly checked because
--		 *   netfront relies on the first buffer being
--		 *   non-empty and can crash otherwise.
--		 *
--		 * This means we will effectively linearise small
--		 * frags but do not needlessly split large buffers
--		 * into multiple copies tend to give large frags their
--		 * own buffers as before.
--		 */
--		if (npo->copy_off == MAX_BUFFER_OFFSET
--		    || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) {
-+		if (start_new_rx_buffer(npo->copy_off, size, head)) {
- 			struct xen_netif_rx_request *req;
- 
- 			BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
--- 
-1.7.4
-
-
-From 9e79b27c9369d25ca86abf66611a700783ce1ef2 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Wed, 2 Feb 2011 11:12:39 +0000
-Subject: [PATCH 080/203] xen: netback: refactor code to get next rx buffer into own function.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |   37 ++++++++++++++++++++++++++-----------
- 1 files changed, 26 insertions(+), 11 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index 909e0ef..a8ee1c2 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -418,6 +418,25 @@ struct netrx_pending_operations {
- 	grant_ref_t copy_gref;
- };
- 
-+static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
-+						struct netrx_pending_operations *npo)
++static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
 +{
-+	struct netbk_rx_meta *meta;
-+	struct xen_netif_rx_request *req;
++	struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
++	struct sk_buff *skb;
++	int ret;
 +
-+	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
++	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++		!list_empty(&netbk->net_schedule_list)) {
++		struct xenvif *vif;
++		struct xen_netif_tx_request txreq;
++		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
++		struct page *page;
++		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
++		u16 pending_idx;
++		RING_IDX idx;
++		int work_to_do;
++		unsigned int data_len;
++		pending_ring_idx_t index;
 +
-+	meta = npo->meta + npo->meta_prod++;
-+	meta->gso_size = 0;
-+	meta->size = 0;
-+	meta->id = req->id;
++		/* Get a netif from the list with work to do. */
++		vif = poll_net_schedule_list(netbk);
++		if (!vif)
++			continue;
 +
-+	npo->copy_off = 0;
-+	npo->copy_gref = req->gref;
++		RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
++		if (!work_to_do) {
++			xenvif_put(vif);
++			continue;
++		}
 +
-+	return meta;
-+}
++		idx = vif->tx.req_cons;
++		rmb(); /* Ensure that we see the request before we copy it. */
++		memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));
 +
- /* Set up the grant operations for this fragment.  If it's a flipping
-    interface, we also set up the unmap request from here. */
- 
-@@ -445,17 +464,13 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- 		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
- 
- 		if (start_new_rx_buffer(npo->copy_off, size, head)) {
--			struct xen_netif_rx_request *req;
--
--			BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
--			/* Overflowed this request, go to the next one */
--			req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
--			meta = npo->meta + npo->meta_prod++;
--			meta->gso_size = 0;
--			meta->size = 0;
--			meta->id = req->id;
--			npo->copy_off = 0;
--			npo->copy_gref = req->gref;
-+			/*
-+			 * Netfront requires there to be some data in the head
-+			 * buffer.
-+			 */
-+			BUG_ON(head);
++		/* Credit-based scheduling. */
++		if (txreq.size > vif->remaining_credit &&
++		    tx_credit_exceeded(vif, txreq.size)) {
++			xenvif_put(vif);
++			continue;
++		}
 +
-+			meta = get_next_rx_buffer(netif, npo);
- 		}
- 
- 		bytes = size;
--- 
-1.7.4
-
-
-From e02c96928e165eb351ded4cc78cce4c615778be3 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Wed, 2 Feb 2011 11:14:17 +0000
-Subject: [PATCH 081/203] xen: netback: simplify use of netbk_add_frag_responses
-
-Move all the logic into the function instead of having some in the caller.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/netback.c |   14 +++++++++-----
- 1 files changed, 9 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index a8ee1c2..cfe7931 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -599,6 +599,12 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
- 	int i;
- 	unsigned long offset;
- 
-+	/* No fragments used */
-+	if (nr_meta_slots <= 1)
-+		return;
++		vif->remaining_credit -= txreq.size;
 +
-+	nr_meta_slots--;
++		work_to_do--;
++		vif->tx.req_cons = ++idx;
 +
- 	for (i = 0; i < nr_meta_slots; i++) {
- 		int flags;
- 		if (i == nr_meta_slots - 1)
-@@ -727,11 +733,9 @@ static void net_rx_action(unsigned long data)
- 			gso->flags = 0;
- 		}
- 
--		if (sco->meta_slots_used > 1) {
--			netbk_add_frag_responses(netif, status,
--						 netbk->meta + npo.meta_cons + 1,
--						 sco->meta_slots_used - 1);
--		}
-+		netbk_add_frag_responses(netif, status,
-+					 netbk->meta + npo.meta_cons + 1,
-+					 sco->meta_slots_used);
- 
- 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
- 		irq = netif->irq;
--- 
-1.7.4
-
-
-From 9304651128c7fdc2ffdf36e9fd589a13d5aca35d Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 24 Dec 2010 13:37:04 +0000
-Subject: [PATCH 082/203] xen: netback: cleanup coding style
-
-Fix checkpatch.pl errors plus manual sweep.
-
-Including:
-- remove incorrect and unnecessary filenames from comment headers.
-- do not include <linux/version.h>
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/common.h    |   57 ++----------------------
- drivers/xen/netback/interface.c |   18 +++----
- drivers/xen/netback/netback.c   |   93 ++++++++++++++++++++-------------------
- drivers/xen/netback/xenbus.c    |   65 ++++++++++++---------------
- 4 files changed, 89 insertions(+), 144 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index f660eb5..a2455a0 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -1,6 +1,4 @@
--/******************************************************************************
-- * arch/xen/drivers/netif/backend/common.h
-- *
-+/*
-  * This program is free software; you can redistribute it and/or
-  * modify it under the terms of the GNU General Public License version 2
-  * as published by the Free Software Foundation; or, when distributed
-@@ -29,19 +27,18 @@
- #ifndef __NETIF__BACKEND__COMMON_H__
- #define __NETIF__BACKEND__COMMON_H__
- 
--#include <linux/version.h>
- #include <linux/module.h>
- #include <linux/interrupt.h>
- #include <linux/slab.h>
- #include <linux/ip.h>
- #include <linux/in.h>
-+#include <linux/io.h>
- #include <linux/netdevice.h>
- #include <linux/etherdevice.h>
- #include <linux/wait.h>
- #include <linux/sched.h>
- 
- #include <xen/interface/io/netif.h>
--#include <asm/io.h>
- #include <asm/pgalloc.h>
- #include <xen/interface/grant_table.h>
- #include <xen/grant_table.h>
-@@ -49,7 +46,7 @@
- 
- #define DPRINTK(_f, _a...)			\
- 	pr_debug("(file=%s, line=%d) " _f,	\
--		 __FILE__ , __LINE__ , ## _a )
-+		 __FILE__ , __LINE__ , ## _a)
- #define IPRINTK(fmt, args...)				\
- 	printk(KERN_INFO "xen_net: " fmt, ##args)
- #define WPRINTK(fmt, args...)				\
-@@ -132,66 +129,22 @@ enum {
- 
- extern int netbk_copy_skb_mode;
- 
--/* Function pointers into netback accelerator plugin modules */
--struct netback_accel_hooks {
--	struct module *owner;
--	int  (*probe)(struct xenbus_device *dev);
--	int (*remove)(struct xenbus_device *dev);
--};
--
--/* Structure to track the state of a netback accelerator plugin */
--struct netback_accelerator {
--	struct list_head link;
--	int id;
--	char *eth_name;
--	atomic_t use_count;
--	struct netback_accel_hooks *hooks;
--};
--
- struct backend_info {
- 	struct xenbus_device *dev;
- 	struct xen_netif *netif;
- 	enum xenbus_state frontend_state;
- 	struct xenbus_watch hotplug_status_watch;
- 	int have_hotplug_status_watch:1;
--
--	/* State relating to the netback accelerator */
--	void *netback_accel_priv;
--	/* The accelerator that this backend is currently using */
--	struct netback_accelerator *accelerator;
- };
- 
--#define NETBACK_ACCEL_VERSION 0x00010001
--
--/*
-- * Connect an accelerator plugin module to netback.  Returns zero on
-- * success, < 0 on error, > 0 (with highest version number supported)
-- * if version mismatch.
-- */
--extern int netback_connect_accelerator(unsigned version,
--				       int id, const char *eth_name,
--				       struct netback_accel_hooks *hooks);
--/* Disconnect a previously connected accelerator plugin module */
--extern void netback_disconnect_accelerator(int id, const char *eth_name);
--
--
--extern
--void netback_probe_accelerators(struct backend_info *be,
--				struct xenbus_device *dev);
--extern
--void netback_remove_accelerators(struct backend_info *be,
--				 struct xenbus_device *dev);
--extern
--void netif_accel_init(void);
--
--
- #define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
- #define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
- 
- void netif_disconnect(struct xen_netif *netif);
- 
- void netif_set_features(struct xen_netif *netif);
--struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
-+struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
-+			      unsigned int handle);
- int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
- 	      unsigned long rx_ring_ref, unsigned int evtchn);
- 
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index d3af68e..4622653 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -1,6 +1,4 @@
--/******************************************************************************
-- * arch/xen/drivers/netif/backend/interface.c
-- *
-+/*
-  * Network-device interface management.
-  *
-  * Copyright (c) 2004-2005, Keir Fraser
-@@ -232,8 +230,7 @@ static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
- 	}
- }
- 
--static struct ethtool_ops network_ethtool_ops =
--{
-+static struct ethtool_ops network_ethtool_ops = {
- 	.get_drvinfo = netbk_get_drvinfo,
- 
- 	.get_tx_csum = ethtool_op_get_tx_csum,
-@@ -249,8 +246,7 @@ static struct ethtool_ops network_ethtool_ops =
- 	.get_strings = netbk_get_strings,
- };
- 
--static struct net_device_ops netback_ops =
--{
-+static struct net_device_ops netback_ops = {
- 	.ndo_start_xmit	= netif_be_start_xmit,
- 	.ndo_get_stats	= netif_be_get_stats,
- 	.ndo_open	= net_open,
-@@ -258,7 +254,8 @@ static struct net_device_ops netback_ops =
- 	.ndo_change_mtu	= netbk_change_mtu,
- };
- 
--struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
-+struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
-+			      unsigned int handle)
- {
- 	int err = 0;
- 	struct net_device *dev;
-@@ -323,8 +320,9 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
- 	return netif;
- }
- 
--static int map_frontend_pages(
--	struct xen_netif *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
-+static int map_frontend_pages(struct xen_netif *netif,
-+			      grant_ref_t tx_ring_ref,
-+			      grant_ref_t rx_ring_ref)
- {
- 	struct gnttab_map_grant_ref op;
- 
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index cfe7931..d4aa8ac 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -1,11 +1,9 @@
--/******************************************************************************
-- * drivers/xen/netback/netback.c
-- *
-+/*
-  * Back-end of the driver for virtual network devices. This portion of the
-  * driver exports a 'unified' network-device interface that can be accessed
-  * by any operating system that implements a compatible front end. A
-  * reference front-end implementation can be found in:
-- *  drivers/xen/netfront/netfront.c
-+ *  drivers/net/xen-netfront.c
-  *
-  * Copyright (c) 2002-2005, K A Fraser
-  *
-@@ -82,8 +80,8 @@ static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
- }
- 
- /* extra field used in struct page */
--static inline void netif_set_page_ext(struct page *pg, unsigned int group,
--		unsigned int idx)
-+static inline void netif_set_page_ext(struct page *pg,
-+				      unsigned int group, unsigned int idx)
- {
- 	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
- 
-@@ -91,7 +89,8 @@ static inline void netif_set_page_ext(struct page *pg, unsigned int group,
- 	pg->mapping = ext.mapping;
- }
- 
--static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsigned int *_idx)
-+static int netif_get_page_ext(struct page *pg,
-+			      unsigned int *_group, unsigned int *_idx)
- {
- 	union page_ext ext = { .mapping = pg->mapping };
- 	struct xen_netbk *netbk;
-@@ -325,7 +324,7 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif
- 	unsigned int count = 1;
- 	int i, copy_off = 0;
- 
--	BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
-+	BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);
- 
- 	copy_off = skb_headlen(skb);
- 
-@@ -376,7 +375,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 	 */
- 	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
- 		struct sk_buff *nskb = netbk_copy_skb(skb);
--		if ( unlikely(nskb == NULL) )
-+		if (unlikely(nskb == NULL))
- 			goto drop;
- 		/* Copy only the header fields we use in this driver. */
- 		nskb->dev = skb->dev;
-@@ -385,8 +384,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 		skb = nskb;
- 	}
- 
--	/* Reserve ring slots for the worst-case number of
--	 * fragments. */
-+	/* Reserve ring slots for the worst-case number of fragments. */
- 	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
- 	netif_get(netif);
- 
-@@ -437,9 +435,10 @@ static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
- 	return meta;
- }
- 
--/* Set up the grant operations for this fragment.  If it's a flipping
--   interface, we also set up the unmap request from here. */
--
-+/*
-+ * Set up the grant operations for this fragment. If it's a flipping
-+ * interface, we also set up the unmap request from here.
-+ */
- static void netbk_gop_frag_copy(struct xen_netif *netif,
- 				struct netrx_pending_operations *npo,
- 				struct page *page, unsigned long size,
-@@ -450,7 +449,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- 	/*
- 	 * These variables a used iff netif_get_page_ext returns true,
- 	 * in which case they are guaranteed to be initialized.
--         */
-+	 */
- 	unsigned int uninitialized_var(group), uninitialized_var(idx);
- 	int foreign = netif_get_page_ext(page, &group, &idx);
- 	unsigned long bytes;
-@@ -489,8 +488,9 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- 			copy_gop->source.u.ref = src_pend->req.gref;
- 			copy_gop->flags |= GNTCOPY_source_gref;
- 		} else {
-+			void *vaddr = page_address(page);
- 			copy_gop->source.domid = DOMID_SELF;
--			copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
-+			copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
- 		}
- 		copy_gop->source.offset = offset;
- 		copy_gop->dest.domid = netif->domid;
-@@ -504,17 +504,22 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- 
- 		offset += bytes;
- 		size -= bytes;
--		head = 0; /* Must be something in this buffer now */
-+		head = 0; /* There must be something in this buffer now. */
- 	}
- }
- 
--/* Prepare an SKB to be transmitted to the frontend.  This is
--   responsible for allocating grant operations, meta structures, etc.
--   It returns the number of meta structures consumed.  The number of
--   ring slots used is always equal to the number of meta slots used
--   plus the number of GSO descriptors used.  Currently, we use either
--   zero GSO descriptors (for non-GSO packets) or one descriptor (for
--   frontend-side LRO). */
-+/*
-+ * Prepare an SKB to be transmitted to the frontend.
-+ *
-+ * This function is responsible for allocating grant operations, meta
-+ * structures, etc.
-+ *
-+ * It returns the number of meta structures consumed. The number of
-+ * ring slots used is always equal to the number of meta slots used
-+ * plus the number of GSO descriptors used. Currently, we use either
-+ * zero GSO descriptors (for non-GSO packets) or one descriptor (for
-+ * frontend-side LRO).
-+ */
- static int netbk_gop_skb(struct sk_buff *skb,
- 			 struct netrx_pending_operations *npo)
- {
-@@ -569,10 +574,12 @@ static int netbk_gop_skb(struct sk_buff *skb,
- 	return npo->meta_prod - old_meta_prod;
- }
- 
--/* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
--   used to set up the operations on the top of
--   netrx_pending_operations, which have since been done.  Check that
--   they didn't give any errors and advance over them. */
-+/*
-+ * This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
-+ * used to set up the operations on the top of
-+ * netrx_pending_operations, which have since been done.  Check that
-+ * they didn't give any errors and advance over them.
-+ */
- static int netbk_check_gop(int nr_meta_slots, domid_t domid,
- 			   struct netrx_pending_operations *npo)
- {
-@@ -906,9 +913,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
- 	dc = netbk->dealloc_cons;
- 	gop = netbk->tx_unmap_ops;
- 
--	/*
--	 * Free up any grants we have finished using
--	 */
-+	/* Free up any grants we have finished using. */
- 	do {
- 		dp = netbk->dealloc_prod;
- 
-@@ -1018,7 +1023,8 @@ static void netbk_tx_err(struct xen_netif *netif,
- 
- static int netbk_count_requests(struct xen_netif *netif,
- 				struct xen_netif_tx_request *first,
--				struct xen_netif_tx_request *txp, int work_to_do)
-+				struct xen_netif_tx_request *txp,
-+				int work_to_do)
- {
- 	RING_IDX cons = netif->tx.req_cons;
- 	int frags = 0;
-@@ -1058,10 +1064,10 @@ static int netbk_count_requests(struct xen_netif *netif,
- }
- 
- static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
--						  struct xen_netif *netif,
--						  struct sk_buff *skb,
--						  struct xen_netif_tx_request *txp,
--						  struct gnttab_map_grant_ref *mop)
-+						       struct xen_netif *netif,
-+						       struct sk_buff *skb,
-+						       struct xen_netif_tx_request *txp,
-+						       struct gnttab_map_grant_ref *mop)
- {
- 	struct skb_shared_info *shinfo = skb_shinfo(skb);
- 	skb_frag_t *frags = shinfo->frags;
-@@ -1200,7 +1206,8 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
- 	}
- }
- 
--int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extras,
-+int netbk_get_extras(struct xen_netif *netif,
-+		     struct xen_netif_extra_info *extras,
- 		     int work_to_do)
- {
- 	struct xen_netif_extra_info extra;
-@@ -1228,7 +1235,8 @@ int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extra
- 	return work_to_do;
- }
- 
--static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso)
-+static int netbk_set_skb_gso(struct sk_buff *skb,
-+			     struct xen_netif_extra_info *gso)
- {
- 	if (!gso->u.gso.size) {
- 		DPRINTK("GSO size must not be zero.\n");
-@@ -1365,7 +1373,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
- 		struct xen_netif *netif;
- 		struct xen_netif_tx_request txreq;
- 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
--		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
-+		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
- 		u16 pending_idx;
- 		RING_IDX idx;
- 		int work_to_do;
-@@ -1427,7 +1435,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
- 		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
- 			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
- 				txreq.offset, txreq.size,
--				(txreq.offset &~PAGE_MASK) + txreq.size);
-+				(txreq.offset&~PAGE_MASK) + txreq.size);
- 			netbk_tx_err(netif, &txreq, idx);
- 			continue;
- 		}
-@@ -1807,9 +1815,6 @@ static int __init netback_init(void)
- 	}
- 	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
- 
--	/* We can increase reservation by this much in net_rx_action(). */
--//	balloon_update_driver_allowance(NET_RX_RING_SIZE);
--
- 	for (group = 0; group < xen_netbk_group_nr; group++) {
- 		struct xen_netbk *netbk = &xen_netbk[group];
- 		skb_queue_head_init(&netbk->rx_queue);
-@@ -1894,8 +1899,6 @@ static int __init netback_init(void)
- 			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
- 	}
- 
--	//netif_accel_init();
--
- 	rc = netif_xenbus_init();
- 	if (rc)
- 		goto failed_init;
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index 1fec65a..dd44341 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -1,20 +1,22 @@
--/*  Xenbus code for netif backend
--    Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
--    Copyright (C) 2005 XenSource Ltd
--
--    This program is free software; you can redistribute it and/or modify
--    it under the terms of the GNU General Public License as published by
--    the Free Software Foundation; either version 2 of the License, or
--    (at your option) any later version.
--
--    This program is distributed in the hope that it will be useful,
--    but WITHOUT ANY WARRANTY; without even the implied warranty of
--    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--    GNU General Public License for more details.
--
--    You should have received a copy of the GNU General Public License
--    along with this program; if not, write to the Free Software
--    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-+/*
-+ * Xenbus code for netif backend
-+ *
-+ * Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
-+ * Copyright (C) 2005 XenSource Ltd
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
- 
- #include <stdarg.h>
-@@ -22,13 +24,6 @@
- #include <xen/xenbus.h>
- #include "common.h"
- 
--#if 0
--#undef DPRINTK
--#define DPRINTK(fmt, args...) \
--    printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
--#endif
--
--
- static int connect_rings(struct backend_info *);
- static void connect(struct backend_info *);
- static void backend_create_netif(struct backend_info *be);
-@@ -36,9 +31,7 @@ static void unregister_hotplug_status_watch(struct backend_info *be);
- 
- static int netback_remove(struct xenbus_device *dev)
- {
--  struct backend_info *be = dev_get_drvdata(&dev->dev);
--
--	//netback_remove_accelerators(be, dev);
-+	struct backend_info *be = dev_get_drvdata(&dev->dev);
- 
- 	unregister_hotplug_status_watch(be);
- 	if (be->netif) {
-@@ -126,8 +119,6 @@ static int netback_probe(struct xenbus_device *dev,
- 		goto fail;
- 	}
- 
--	//netback_probe_accelerators(be, dev);
--
- 	err = xenbus_switch_state(dev, XenbusStateInitWait);
- 	if (err)
- 		goto fail;
-@@ -147,12 +138,13 @@ fail:
- }
- 
- 
--/**
-+/*
-  * Handle the creation of the hotplug script environment.  We add the script
-  * and vif variables to the environment, for the benefit of the vif-* hotplug
-  * scripts.
-  */
--static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
-+static int netback_uevent(struct xenbus_device *xdev,
-+			  struct kobj_uevent_env *env)
- {
- 	struct backend_info *be = dev_get_drvdata(&xdev->dev);
- 	char *val;
-@@ -164,8 +156,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
- 		int err = PTR_ERR(val);
- 		xenbus_dev_fatal(xdev, err, "reading script");
- 		return err;
--	}
--	else {
-+	} else {
- 		if (add_uevent_var(env, "script=%s", val)) {
- 			kfree(val);
- 			return -ENOMEM;
-@@ -173,10 +164,10 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
- 		kfree(val);
- 	}
- 
--	if (be && be->netif && add_uevent_var(env, "vif=%s", be->netif->dev->name))
--		return -ENOMEM;
-+	if (!be || !be->netif)
-+		return 0;
- 
--	return 0;
-+	return add_uevent_var(env, "vif=%s", be->netif->dev->name);
- }
- 
- 
-@@ -234,7 +225,7 @@ static void frontend_changed(struct xenbus_device *dev,
- 	case XenbusStateInitialising:
- 		if (dev->state == XenbusStateClosed) {
- 			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
--			       __FUNCTION__, dev->nodename);
-+			       __func__, dev->nodename);
- 			xenbus_switch_state(dev, XenbusStateInitWait);
- 		}
- 		break;
--- 
-1.7.4
-
-
-From 7ab35b88fd3b452d8b9aec972ab7d3e9de710894 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Wed, 19 Jan 2011 10:51:45 +0000
-Subject: [PATCH 083/203] xen: netback: drop private ?PRINTK macros in favour of pr_*
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/xen/netback/common.h    |   10 ++--------
- drivers/xen/netback/interface.c |   13 +++++++------
- drivers/xen/netback/netback.c   |   38 +++++++++++++++++++-------------------
- drivers/xen/netback/xenbus.c    |   13 +++----------
- 4 files changed, 31 insertions(+), 43 deletions(-)
-
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-index a2455a0..49dc4cf 100644
---- a/drivers/xen/netback/common.h
-+++ b/drivers/xen/netback/common.h
-@@ -27,6 +27,8 @@
- #ifndef __NETIF__BACKEND__COMMON_H__
- #define __NETIF__BACKEND__COMMON_H__
- 
-+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
++		memset(extras, 0, sizeof(extras));
++		if (txreq.flags & XEN_NETTXF_extra_info) {
++			work_to_do = xen_netbk_get_extras(vif, extras,
++							  work_to_do);
++			idx = vif->tx.req_cons;
++			if (unlikely(work_to_do < 0)) {
++				netbk_tx_err(vif, &txreq, idx);
++				continue;
++			}
++		}
 +
- #include <linux/module.h>
- #include <linux/interrupt.h>
- #include <linux/slab.h>
-@@ -44,14 +46,6 @@
- #include <xen/grant_table.h>
- #include <xen/xenbus.h>
- 
--#define DPRINTK(_f, _a...)			\
--	pr_debug("(file=%s, line=%d) " _f,	\
--		 __FILE__ , __LINE__ , ## _a)
--#define IPRINTK(fmt, args...)				\
--	printk(KERN_INFO "xen_net: " fmt, ##args)
--#define WPRINTK(fmt, args...)				\
--	printk(KERN_WARNING "xen_net: " fmt, ##args)
--
- struct xen_netif {
- 	/* Unique identifier for this interface. */
- 	domid_t          domid;
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-index 4622653..b429f8c 100644
---- a/drivers/xen/netback/interface.c
-+++ b/drivers/xen/netback/interface.c
-@@ -29,6 +29,7 @@
-  */
- 
- #include "common.h"
++		ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);
++		if (unlikely(ret < 0)) {
++			netbk_tx_err(vif, &txreq, idx - ret);
++			continue;
++		}
++		idx += ret;
 +
- #include <linux/ethtool.h>
- #include <linux/rtnetlink.h>
- 
-@@ -265,7 +266,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
- 	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
- 	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
- 	if (dev == NULL) {
--		DPRINTK("Could not create netif: out of memory\n");
-+		pr_debug("Could not allocate netdev\n");
- 		return ERR_PTR(-ENOMEM);
- 	}
- 
-@@ -310,13 +311,13 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
- 	err = register_netdevice(dev);
- 	rtnl_unlock();
- 	if (err) {
--		DPRINTK("Could not register new net device %s: err=%d\n",
--			dev->name, err);
-+		pr_debug("Could not register new net device %s: err=%d\n",
-+			 dev->name, err);
- 		free_netdev(dev);
- 		return ERR_PTR(err);
- 	}
- 
--	DPRINTK("Successfully created netif\n");
-+	pr_debug("Successfully created netif\n");
- 	return netif;
- }
- 
-@@ -333,7 +334,7 @@ static int map_frontend_pages(struct xen_netif *netif,
- 		BUG();
- 
- 	if (op.status) {
--		DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
-+		pr_debug("Gnttab failure mapping tx_ring_ref!\n");
- 		return op.status;
- 	}
- 
-@@ -353,7 +354,7 @@ static int map_frontend_pages(struct xen_netif *netif,
- 				    (unsigned long)netif->tx_comms_area->addr,
- 				    GNTMAP_host_map, netif->tx_shmem_handle);
- 		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
--		DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
-+		pr_debug("Gnttab failure mapping rx_ring_ref!\n");
- 		return op.status;
- 	}
- 
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-index d4aa8ac..b290525 100644
---- a/drivers/xen/netback/netback.c
-+++ b/drivers/xen/netback/netback.c
-@@ -590,8 +590,8 @@ static int netbk_check_gop(int nr_meta_slots, domid_t domid,
- 	for (i = 0; i < nr_meta_slots; i++) {
- 		copy_op = npo->copy + npo->copy_cons++;
- 		if (copy_op->status != GNTST_okay) {
--				DPRINTK("Bad status %d from copy to DOM%d.\n",
--					copy_op->status, domid);
-+				pr_debug("Bad status %d from copy to DOM%d.\n",
-+					 copy_op->status, domid);
- 				status = NETIF_RSP_ERROR;
- 			}
- 	}
-@@ -1034,19 +1034,19 @@ static int netbk_count_requests(struct xen_netif *netif,
- 
- 	do {
- 		if (frags >= work_to_do) {
--			DPRINTK("Need more frags\n");
-+			pr_debug("Need more frags\n");
- 			return -frags;
- 		}
- 
- 		if (unlikely(frags >= MAX_SKB_FRAGS)) {
--			DPRINTK("Too many frags\n");
-+			pr_debug("Too many frags\n");
- 			return -frags;
- 		}
- 
- 		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
- 		       sizeof(*txp));
- 		if (txp->size > first->size) {
--			DPRINTK("Frags galore\n");
-+			pr_debug("Frags galore\n");
- 			return -frags;
- 		}
- 
-@@ -1054,8 +1054,8 @@ static int netbk_count_requests(struct xen_netif *netif,
- 		frags++;
- 
- 		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
--			DPRINTK("txp->offset: %x, size: %u\n",
--				txp->offset, txp->size);
-+			pr_debug("txp->offset: %x, size: %u\n",
-+				 txp->offset, txp->size);
- 			return -frags;
- 		}
- 	} while ((txp++)->flags & NETTXF_more_data);
-@@ -1215,7 +1215,7 @@ int netbk_get_extras(struct xen_netif *netif,
- 
- 	do {
- 		if (unlikely(work_to_do-- <= 0)) {
--			DPRINTK("Missing extra info\n");
-+			pr_debug("Missing extra info\n");
- 			return -EBADR;
- 		}
- 
-@@ -1224,7 +1224,7 @@ int netbk_get_extras(struct xen_netif *netif,
- 		if (unlikely(!extra.type ||
- 			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
- 			netif->tx.req_cons = ++cons;
--			DPRINTK("Invalid extra type: %d\n", extra.type);
-+			pr_debug("Invalid extra type: %d\n", extra.type);
- 			return -EINVAL;
- 		}
- 
-@@ -1239,13 +1239,13 @@ static int netbk_set_skb_gso(struct sk_buff *skb,
- 			     struct xen_netif_extra_info *gso)
- {
- 	if (!gso->u.gso.size) {
--		DPRINTK("GSO size must not be zero.\n");
-+		pr_debug("GSO size must not be zero.\n");
- 		return -EINVAL;
- 	}
- 
- 	/* Currently only TCPv4 S.O. is supported. */
- 	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
--		DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
-+		pr_debug("Bad GSO type %d.\n", gso->u.gso.type);
- 		return -EINVAL;
- 	}
- 
-@@ -1426,16 +1426,16 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
- 		idx += ret;
- 
- 		if (unlikely(txreq.size < ETH_HLEN)) {
--			DPRINTK("Bad packet size: %d\n", txreq.size);
-+			pr_debug("Bad packet size: %d\n", txreq.size);
- 			netbk_tx_err(netif, &txreq, idx);
- 			continue;
- 		}
- 
- 		/* No crossing a page as the payload mustn't fragment. */
- 		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
--			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
--				txreq.offset, txreq.size,
--				(txreq.offset&~PAGE_MASK) + txreq.size);
-+			pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
-+				 txreq.offset, txreq.size,
-+				 (txreq.offset&~PAGE_MASK) + txreq.size);
- 			netbk_tx_err(netif, &txreq, idx);
- 			continue;
- 		}
-@@ -1450,7 +1450,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
- 		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
- 				GFP_ATOMIC | __GFP_NOWARN);
- 		if (unlikely(skb == NULL)) {
--			DPRINTK("Can't allocate a skb in start_xmit.\n");
-+			pr_debug("Can't allocate a skb in start_xmit.\n");
- 			netbk_tx_err(netif, &txreq, idx);
- 			break;
- 		}
-@@ -1525,7 +1525,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 
- 		/* Check the remap error code. */
- 		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
--			DPRINTK("netback grant failed.\n");
-+			pr_debug("netback grant failed.\n");
- 			skb_shinfo(skb)->nr_frags = 0;
- 			kfree_skb(skb);
- 			continue;
-@@ -1565,14 +1565,14 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 		skb->protocol = eth_type_trans(skb, skb->dev);
- 
- 		if (checksum_setup(netif, skb)) {
--			DPRINTK("Can't setup checksum in net_tx_action\n");
-+			pr_debug("Can't setup checksum in net_tx_action\n");
- 			kfree_skb(skb);
- 			continue;
- 		}
- 
- 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
- 		    unlikely(skb_linearize(skb))) {
--			DPRINTK("Can't linearize skb in net_tx_action.\n");
-+			pr_debug("Can't linearize skb in net_tx_action.\n");
- 			kfree_skb(skb);
- 			continue;
- 		}
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-index dd44341..867dc25 100644
---- a/drivers/xen/netback/xenbus.c
-+++ b/drivers/xen/netback/xenbus.c
-@@ -19,9 +19,6 @@
-  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
- 
--#include <stdarg.h>
--#include <linux/module.h>
--#include <xen/xenbus.h>
- #include "common.h"
- 
- static int connect_rings(struct backend_info *);
-@@ -132,7 +129,7 @@ abort_transaction:
- 	xenbus_transaction_end(xbt, 1);
- 	xenbus_dev_fatal(dev, err, "%s", message);
- fail:
--	DPRINTK("failed");
-+	pr_debug("failed");
- 	netback_remove(dev);
- 	return err;
- }
-@@ -149,8 +146,6 @@ static int netback_uevent(struct xenbus_device *xdev,
- 	struct backend_info *be = dev_get_drvdata(&xdev->dev);
- 	char *val;
- 
--	DPRINTK("netback_uevent");
--
- 	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
- 	if (IS_ERR(val)) {
- 		int err = PTR_ERR(val);
-@@ -217,7 +212,7 @@ static void frontend_changed(struct xenbus_device *dev,
- {
- 	struct backend_info *be = dev_get_drvdata(&dev->dev);
- 
--	DPRINTK("%s", xenbus_strstate(frontend_state));
-+	pr_debug("frontend state %s", xenbus_strstate(frontend_state));
- 
- 	be->frontend_state = frontend_state;
- 
-@@ -297,7 +292,7 @@ static void xen_net_read_rate(struct xenbus_device *dev,
- 	return;
- 
-  fail:
--	WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
-+	pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
- 	kfree(ratestr);
- }
- 
-@@ -396,8 +391,6 @@ static int connect_rings(struct backend_info *be)
- 	int err;
- 	int val;
- 
--	DPRINTK("");
--
- 	err = xenbus_gather(XBT_NIL, dev->otherend,
- 			    "tx-ring-ref", "%lu", &tx_ring_ref,
- 			    "rx-ring-ref", "%lu", &rx_ring_ref,
--- 
-1.7.4
-
-
-From 34db20ebe74e4ffacda2d42eb9cd92ae9b770970 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 18 Jan 2011 12:54:12 +0000
-Subject: [PATCH 084/203] xen: netback: move under drivers/net/xen-netback/
-
-From the kernel's PoV netback is just another network device driver.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/Kconfig                 |    7 +
- drivers/net/Makefile                |    1 +
- drivers/net/xen-netback/Makefile    |    3 +
- drivers/net/xen-netback/common.h    |  273 +++++
- drivers/net/xen-netback/interface.c |  470 +++++++++
- drivers/net/xen-netback/netback.c   | 1934 +++++++++++++++++++++++++++++++++++
- drivers/net/xen-netback/xenbus.c    |  489 +++++++++
- drivers/xen/Kconfig                 |    7 -
- drivers/xen/Makefile                |    1 -
- drivers/xen/netback/Makefile        |    3 -
- drivers/xen/netback/common.h        |  273 -----
- drivers/xen/netback/interface.c     |  470 ---------
- drivers/xen/netback/netback.c       | 1934 -----------------------------------
- drivers/xen/netback/xenbus.c        |  489 ---------
- 14 files changed, 3177 insertions(+), 3177 deletions(-)
- create mode 100644 drivers/net/xen-netback/Makefile
- create mode 100644 drivers/net/xen-netback/common.h
- create mode 100644 drivers/net/xen-netback/interface.c
- create mode 100644 drivers/net/xen-netback/netback.c
- create mode 100644 drivers/net/xen-netback/xenbus.c
- delete mode 100644 drivers/xen/netback/Makefile
- delete mode 100644 drivers/xen/netback/common.h
- delete mode 100644 drivers/xen/netback/interface.c
- delete mode 100644 drivers/xen/netback/netback.c
- delete mode 100644 drivers/xen/netback/xenbus.c
-
-diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
-index cbf0635..5b088f5 100644
---- a/drivers/net/Kconfig
-+++ b/drivers/net/Kconfig
-@@ -2970,6 +2970,13 @@ config XEN_NETDEV_FRONTEND
- 	  if you are compiling a kernel for a Xen guest, you almost
- 	  certainly want to enable this.
- 
-+config XEN_NETDEV_BACKEND
-+	tristate "Xen backend network device"
-+	depends on XEN_BACKEND
-+	help
-+	  Implement the network backend driver, which passes packets
-+	  from the guest domain's frontend drivers to the network.
++		if (unlikely(txreq.size < ETH_HLEN)) {
++			netdev_dbg(vif->dev,
++				   "Bad packet size: %d\n", txreq.size);
++			netbk_tx_err(vif, &txreq, idx);
++			continue;
++		}
 +
- config ISERIES_VETH
- 	tristate "iSeries Virtual Ethernet driver support"
- 	depends on PPC_ISERIES
-diff --git a/drivers/net/Makefile b/drivers/net/Makefile
-index b90738d..145dfd7 100644
---- a/drivers/net/Makefile
-+++ b/drivers/net/Makefile
-@@ -171,6 +171,7 @@ obj-$(CONFIG_SLIP) += slip.o
- obj-$(CONFIG_SLHC) += slhc.o
- 
- obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
-+obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
- 
- obj-$(CONFIG_DUMMY) += dummy.o
- obj-$(CONFIG_IFB) += ifb.o
-diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile
-new file mode 100644
-index 0000000..e346e81
---- /dev/null
-+++ b/drivers/net/xen-netback/Makefile
-@@ -0,0 +1,3 @@
-+obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
-+
-+xen-netback-y := netback.o xenbus.o interface.o
-diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
-new file mode 100644
-index 0000000..2d727a0
---- /dev/null
-+++ b/drivers/net/xen-netback/common.h
-@@ -0,0 +1,273 @@
-+/*
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#ifndef __XEN_NETBACK__COMMON_H__
-+#define __XEN_NETBACK__COMMON_H__
-+
-+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
-+
-+#include <linux/module.h>
-+#include <linux/interrupt.h>
-+#include <linux/slab.h>
-+#include <linux/ip.h>
-+#include <linux/in.h>
-+#include <linux/io.h>
-+#include <linux/netdevice.h>
-+#include <linux/etherdevice.h>
-+#include <linux/wait.h>
-+#include <linux/sched.h>
++		/* No crossing a page as the payload mustn't fragment. */
++		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
++			netdev_dbg(vif->dev,
++				   "txreq.offset: %x, size: %u, end: %lu\n",
++				   txreq.offset, txreq.size,
++				   (txreq.offset&~PAGE_MASK) + txreq.size);
++			netbk_tx_err(vif, &txreq, idx);
++			continue;
++		}
 +
-+#include <xen/interface/io/netif.h>
-+#include <asm/pgalloc.h>
-+#include <xen/interface/grant_table.h>
-+#include <xen/grant_table.h>
-+#include <xen/xenbus.h>
++		index = pending_index(netbk->pending_cons);
++		pending_idx = netbk->pending_ring[index];
 +
-+struct xen_netif {
-+	/* Unique identifier for this interface. */
-+	domid_t          domid;
-+	int              group;
-+	unsigned int     handle;
++		data_len = (txreq.size > PKT_PROT_LEN &&
++			    ret < MAX_SKB_FRAGS) ?
++			PKT_PROT_LEN : txreq.size;
 +
-+	u8               fe_dev_addr[6];
++		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
++				GFP_ATOMIC | __GFP_NOWARN);
++		if (unlikely(skb == NULL)) {
++			netdev_dbg(vif->dev,
++				   "Can't allocate a skb in start_xmit.\n");
++			netbk_tx_err(vif, &txreq, idx);
++			break;
++		}
 +
-+	/* Physical parameters of the comms window. */
-+	grant_handle_t   tx_shmem_handle;
-+	grant_ref_t      tx_shmem_ref;
-+	grant_handle_t   rx_shmem_handle;
-+	grant_ref_t      rx_shmem_ref;
-+	unsigned int     irq;
++		/* Packets passed to netif_rx() must have some headroom. */
++		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 +
-+	/* The shared rings and indexes. */
-+	struct xen_netif_tx_back_ring tx;
-+	struct xen_netif_rx_back_ring rx;
-+	struct vm_struct *tx_comms_area;
-+	struct vm_struct *rx_comms_area;
++		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
++			struct xen_netif_extra_info *gso;
++			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
 +
-+	/* Flags that must not be set in dev->features */
-+	int features_disabled;
++			if (netbk_set_skb_gso(vif, skb, gso)) {
++				kfree_skb(skb);
++				netbk_tx_err(vif, &txreq, idx);
++				continue;
++			}
++		}
 +
-+	/* Frontend feature information. */
-+	u8 can_sg:1;
-+	u8 gso:1;
-+	u8 gso_prefix:1;
-+	u8 csum:1;
++		/* XXX could copy straight to head */
++		page = xen_netbk_alloc_page(netbk, skb, pending_idx);
++		if (!page) {
++			kfree_skb(skb);
++			netbk_tx_err(vif, &txreq, idx);
++			continue;
++		}
 +
-+	/* Internal feature information. */
-+	u8 can_queue:1;	    /* can queue packets for receiver? */
++		netbk->mmap_pages[pending_idx] = page;
 +
-+	/* Allow netif_be_start_xmit() to peek ahead in the rx request
-+	 * ring.  This is a prediction of what rx_req_cons will be once
-+	 * all queued skbs are put on the ring. */
-+	RING_IDX rx_req_cons_peek;
++		gop->source.u.ref = txreq.gref;
++		gop->source.domid = vif->domid;
++		gop->source.offset = txreq.offset;
 +
-+	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
-+	unsigned long   credit_bytes;
-+	unsigned long   credit_usec;
-+	unsigned long   remaining_credit;
-+	struct timer_list credit_timeout;
++		gop->dest.u.gmfn = virt_to_mfn(page_address(page));
++		gop->dest.domid = DOMID_SELF;
++		gop->dest.offset = txreq.offset;
 +
-+	/* Statistics */
-+	int nr_copied_skbs;
-+	int rx_gso_checksum_fixup;
++		gop->len = txreq.size;
++		gop->flags = GNTCOPY_source_gref;
 +
-+	/* Miscellaneous private stuff. */
-+	struct list_head list;  /* scheduling list */
-+	atomic_t         refcnt;
-+	struct net_device *dev;
-+	struct net_device_stats stats;
++		gop++;
 +
-+	unsigned int carrier;
++		memcpy(&netbk->pending_tx_info[pending_idx].req,
++		       &txreq, sizeof(txreq));
++		netbk->pending_tx_info[pending_idx].vif = vif;
++		*((u16 *)skb->data) = pending_idx;
 +
-+	wait_queue_head_t waiting_to_free;
-+};
++		__skb_put(skb, data_len);
 +
-+/*
-+ * Implement our own carrier flag: the network stack's version causes delays
-+ * when the carrier is re-enabled (in particular, dev_activate() may not
-+ * immediately be called, which can cause packet loss; also the etherbridge
-+ * can be rather lazy in activating its port).
-+ */
-+#define netback_carrier_on(netif)	((netif)->carrier = 1)
-+#define netback_carrier_off(netif)	((netif)->carrier = 0)
-+#define netback_carrier_ok(netif)	((netif)->carrier)
-+
-+enum {
-+	NETBK_DONT_COPY_SKB,
-+	NETBK_DELAYED_COPY_SKB,
-+	NETBK_ALWAYS_COPY_SKB,
-+};
++		skb_shinfo(skb)->nr_frags = ret;
++		if (data_len < txreq.size) {
++			skb_shinfo(skb)->nr_frags++;
++			skb_shinfo(skb)->frags[0].page =
++				(void *)(unsigned long)pending_idx;
++		} else {
++			/* Discriminate from any valid pending_idx value. */
++			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
++		}
 +
-+extern int netbk_copy_skb_mode;
++		__skb_queue_tail(&netbk->tx_queue, skb);
 +
-+struct backend_info {
-+	struct xenbus_device *dev;
-+	struct xen_netif *netif;
-+	enum xenbus_state frontend_state;
-+	struct xenbus_watch hotplug_status_watch;
-+	int have_hotplug_status_watch:1;
-+};
++		netbk->pending_cons++;
 +
-+#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
-+#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
++		request_gop = xen_netbk_get_requests(netbk, vif,
++						     skb, txfrags, gop);
++		if (request_gop == NULL) {
++			kfree_skb(skb);
++			netbk_tx_err(vif, &txreq, idx);
++			continue;
++		}
++		gop = request_gop;
 +
-+void netif_disconnect(struct xen_netif *netif);
++		vif->tx.req_cons = idx;
++		xen_netbk_check_rx_xenvif(vif);
 +
-+void netif_set_features(struct xen_netif *netif);
-+struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
-+			      unsigned int handle);
-+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
-+	      unsigned long rx_ring_ref, unsigned int evtchn);
++		if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
++			break;
++	}
 +
-+static inline void netif_get(struct xen_netif *netif)
-+{
-+	atomic_inc(&netif->refcnt);
++	return gop - netbk->tx_copy_ops;
 +}
 +
-+static inline void  netif_put(struct xen_netif *netif)
++static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 +{
-+	if (atomic_dec_and_test(&netif->refcnt))
-+		wake_up(&netif->waiting_to_free);
-+}
++	struct gnttab_copy *gop = netbk->tx_copy_ops;
++	struct sk_buff *skb;
 +
-+int netif_xenbus_init(void);
++	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
++		struct xen_netif_tx_request *txp;
++		struct xenvif *vif;
++		u16 pending_idx;
++		unsigned data_len;
 +
-+#define netif_schedulable(netif)				\
-+	(netif_running((netif)->dev) && netback_carrier_ok(netif))
++		pending_idx = *((u16 *)skb->data);
++		vif = netbk->pending_tx_info[pending_idx].vif;
++		txp = &netbk->pending_tx_info[pending_idx].req;
 +
-+void netif_schedule_work(struct xen_netif *netif);
-+void netif_deschedule_work(struct xen_netif *netif);
++		/* Check the remap error code. */
++		if (unlikely(xen_netbk_tx_check_gop(netbk, skb, &gop))) {
++			netdev_dbg(vif->dev, "netback grant failed.\n");
++			skb_shinfo(skb)->nr_frags = 0;
++			kfree_skb(skb);
++			continue;
++		}
 +
-+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
-+struct net_device_stats *netif_be_get_stats(struct net_device *dev);
-+irqreturn_t netif_be_int(int irq, void *dev_id);
++		data_len = skb->len;
++		memcpy(skb->data,
++		       (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
++		       data_len);
++		if (data_len < txp->size) {
++			/* Append the packet payload as a fragment. */
++			txp->offset += data_len;
++			txp->size -= data_len;
++		} else {
++			/* Schedule a response immediately. */
++			xen_netbk_idx_release(netbk, pending_idx);
++		}
 +
-+static inline int netbk_can_queue(struct net_device *dev)
-+{
-+	struct xen_netif *netif = netdev_priv(dev);
-+	return netif->can_queue;
-+}
++		if (txp->flags & XEN_NETTXF_csum_blank)
++			skb->ip_summed = CHECKSUM_PARTIAL;
++		else if (txp->flags & XEN_NETTXF_data_validated)
++			skb->ip_summed = CHECKSUM_UNNECESSARY;
 +
-+static inline int netbk_can_sg(struct net_device *dev)
-+{
-+	struct xen_netif *netif = netdev_priv(dev);
-+	return netif->can_sg;
-+}
++		xen_netbk_fill_frags(netbk, skb);
 +
-+struct pending_tx_info {
-+	struct xen_netif_tx_request req;
-+	struct xen_netif *netif;
-+};
-+typedef unsigned int pending_ring_idx_t;
++		/*
++		 * If the initial fragment was < PKT_PROT_LEN then
++		 * pull through some bytes from the other fragments to
++		 * increase the linear region to PKT_PROT_LEN bytes.
++		 */
++		if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
++			int target = min_t(int, skb->len, PKT_PROT_LEN);
++			__pskb_pull_tail(skb, target - skb_headlen(skb));
++		}
 +
-+struct netbk_rx_meta {
-+	int id;
-+	int size;
-+	int gso_size;
-+};
++		skb->dev      = vif->dev;
++		skb->protocol = eth_type_trans(skb, skb->dev);
 +
-+struct netbk_tx_pending_inuse {
-+	struct list_head list;
-+	unsigned long alloc_time;
-+};
++		if (checksum_setup(vif, skb)) {
++			netdev_dbg(vif->dev,
++				   "Can't setup checksum in net_tx_action\n");
++			kfree_skb(skb);
++			continue;
++		}
 +
-+#define MAX_PENDING_REQS 256
++		vif->stats.rx_bytes += skb->len;
++		vif->stats.rx_packets++;
 +
-+#define MAX_BUFFER_OFFSET PAGE_SIZE
++		xenvif_receive_skb(vif, skb);
++	}
++}
 +
-+/* extra field used in struct page */
-+union page_ext {
-+	struct {
-+#if BITS_PER_LONG < 64
-+#define IDX_WIDTH   8
-+#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
-+		unsigned int group:GROUP_WIDTH;
-+		unsigned int idx:IDX_WIDTH;
-+#else
-+		unsigned int group, idx;
-+#endif
-+	} e;
-+	void *mapping;
-+};
-+
-+struct xen_netbk {
-+	union {
-+		struct {
-+			struct tasklet_struct net_tx_tasklet;
-+			struct tasklet_struct net_rx_tasklet;
-+		} tasklet;
-+
-+		struct {
-+			wait_queue_head_t netbk_action_wq;
-+			struct task_struct *task;
-+		} kthread;
-+	};
-+
-+	struct sk_buff_head rx_queue;
-+	struct sk_buff_head tx_queue;
-+
-+	struct timer_list net_timer;
-+	struct timer_list netbk_tx_pending_timer;
++/* Called after netfront has transmitted */
++static void xen_netbk_tx_action(struct xen_netbk *netbk)
++{
++	unsigned nr_gops;
++	int ret;
 +
-+	struct page **mmap_pages;
++	nr_gops = xen_netbk_tx_build_gops(netbk);
 +
-+	pending_ring_idx_t pending_prod;
-+	pending_ring_idx_t pending_cons;
-+	pending_ring_idx_t dealloc_prod;
-+	pending_ring_idx_t dealloc_cons;
++	if (nr_gops == 0)
++		return;
++	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
++					netbk->tx_copy_ops, nr_gops);
++	BUG_ON(ret);
 +
-+	struct list_head pending_inuse_head;
-+	struct list_head net_schedule_list;
++	xen_netbk_tx_submit(netbk);
 +
-+	/* Protect the net_schedule_list in netif. */
-+	spinlock_t net_schedule_list_lock;
++}
 +
-+	atomic_t netfront_count;
++static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
++{
++	struct xenvif *vif;
++	struct pending_tx_info *pending_tx_info;
++	pending_ring_idx_t index;
 +
-+	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
-+	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
-+	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
-+	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
++	/* Already complete? */
++	if (netbk->mmap_pages[pending_idx] == NULL)
++		return;
 +
-+	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-+	u16 pending_ring[MAX_PENDING_REQS];
-+	u16 dealloc_ring[MAX_PENDING_REQS];
++	pending_tx_info = &netbk->pending_tx_info[pending_idx];
 +
-+	/*
-+	 * Each head or fragment can be up to 4096 bytes. Given
-+	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
-+	 * head/fragment uses 2 copy operation.
-+	 */
-+	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
-+	unsigned char rx_notify[NR_IRQS];
-+	u16 notify_list[NET_RX_RING_SIZE];
-+	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
-+};
++	vif = pending_tx_info->vif;
 +
-+extern struct xen_netbk *xen_netbk;
-+extern int xen_netbk_group_nr;
++	make_tx_response(vif, &pending_tx_info->req, XEN_NETIF_RSP_OKAY);
 +
-+#endif /* __XEN_NETBACK__COMMON_H__ */
-diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
-new file mode 100644
-index 0000000..b429f8c
---- /dev/null
-+++ b/drivers/net/xen-netback/interface.c
-@@ -0,0 +1,470 @@
-+/*
-+ * Network-device interface management.
-+ *
-+ * Copyright (c) 2004-2005, Keir Fraser
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
++	index = pending_index(netbk->pending_prod++);
++	netbk->pending_ring[index] = pending_idx;
 +
-+#include "common.h"
++	xenvif_put(vif);
 +
-+#include <linux/ethtool.h>
-+#include <linux/rtnetlink.h>
++	netbk->mmap_pages[pending_idx]->mapping = 0;
++	put_page(netbk->mmap_pages[pending_idx]);
++	netbk->mmap_pages[pending_idx] = NULL;
++}
 +
-+#include <xen/events.h>
-+#include <asm/xen/hypercall.h>
++static void make_tx_response(struct xenvif *vif,
++			     struct xen_netif_tx_request *txp,
++			     s8       st)
++{
++	RING_IDX i = vif->tx.rsp_prod_pvt;
++	struct xen_netif_tx_response *resp;
++	int notify;
 +
-+/*
-+ * Module parameter 'queue_length':
-+ *
-+ * Enables queuing in the network stack when a client has run out of receive
-+ * descriptors.
-+ */
-+static unsigned long netbk_queue_length = 32;
-+module_param_named(queue_length, netbk_queue_length, ulong, 0644);
++	resp = RING_GET_RESPONSE(&vif->tx, i);
++	resp->id     = txp->id;
++	resp->status = st;
 +
-+static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
-+			   struct xen_netif *netif)
-+{
-+	int i;
-+	int min_netfront_count;
-+	int min_group = 0;
-+	min_netfront_count = atomic_read(&netbk[0].netfront_count);
-+	for (i = 0; i < group_nr; i++) {
-+		int netfront_count = atomic_read(&netbk[i].netfront_count);
-+		if (netfront_count < min_netfront_count) {
-+			min_group = i;
-+			min_netfront_count = netfront_count;
-+		}
-+	}
++	if (txp->flags & XEN_NETTXF_extra_info)
++		RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
 +
-+	netif->group = min_group;
-+	atomic_inc(&netbk[netif->group].netfront_count);
++	vif->tx.rsp_prod_pvt = ++i;
++	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
++	if (notify)
++		notify_remote_via_irq(vif->irq);
 +}
 +
-+static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
++static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
++					     u16      id,
++					     s8       st,
++					     u16      offset,
++					     u16      size,
++					     u16      flags)
 +{
-+	atomic_dec(&netbk[netif->group].netfront_count);
-+}
++	RING_IDX i = vif->rx.rsp_prod_pvt;
++	struct xen_netif_rx_response *resp;
 +
-+static void __netif_up(struct xen_netif *netif)
-+{
-+	netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
-+	enable_irq(netif->irq);
-+	netif_schedule_work(netif);
-+}
++	resp = RING_GET_RESPONSE(&vif->rx, i);
++	resp->offset     = offset;
++	resp->flags      = flags;
++	resp->id         = id;
++	resp->status     = (s16)size;
++	if (st < 0)
++		resp->status = (s16)st;
 +
-+static void __netif_down(struct xen_netif *netif)
-+{
-+	disable_irq(netif->irq);
-+	netif_deschedule_work(netif);
-+	netbk_remove_netif(xen_netbk, netif);
-+}
++	vif->rx.rsp_prod_pvt = ++i;
 +
-+static int net_open(struct net_device *dev)
-+{
-+	struct xen_netif *netif = netdev_priv(dev);
-+	if (netback_carrier_ok(netif)) {
-+		__netif_up(netif);
-+		netif_start_queue(dev);
-+	}
-+	return 0;
++	return resp;
 +}
 +
-+static int net_close(struct net_device *dev)
++static inline int rx_work_todo(struct xen_netbk *netbk)
 +{
-+	struct xen_netif *netif = netdev_priv(dev);
-+	if (netback_carrier_ok(netif))
-+		__netif_down(netif);
-+	netif_stop_queue(dev);
-+	return 0;
++	return !skb_queue_empty(&netbk->rx_queue);
 +}
 +
-+static int netbk_change_mtu(struct net_device *dev, int mtu)
++static inline int tx_work_todo(struct xen_netbk *netbk)
 +{
-+	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
 +
-+	if (mtu > max)
-+		return -EINVAL;
-+	dev->mtu = mtu;
++	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
++			!list_empty(&netbk->net_schedule_list))
++		return 1;
++
 +	return 0;
 +}
 +
-+void netif_set_features(struct xen_netif *netif)
++static int xen_netbk_kthread(void *data)
 +{
-+	struct net_device *dev = netif->dev;
-+	int features = dev->features;
-+
-+	if (netif->can_sg)
-+		features |= NETIF_F_SG;
-+	if (netif->gso || netif->gso_prefix)
-+		features |= NETIF_F_TSO;
-+	if (netif->csum)
-+		features |= NETIF_F_IP_CSUM;
-+
-+	features &= ~(netif->features_disabled);
++	struct xen_netbk *netbk = data;
++	while (!kthread_should_stop()) {
++		wait_event_interruptible(netbk->wq,
++				rx_work_todo(netbk) ||
++				tx_work_todo(netbk) ||
++				kthread_should_stop());
++		cond_resched();
 +
-+	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
-+		dev->mtu = ETH_DATA_LEN;
++		if (kthread_should_stop())
++			break;
 +
-+	dev->features = features;
-+}
++		if (rx_work_todo(netbk))
++			xen_netbk_rx_action(netbk);
 +
-+static int netbk_set_tx_csum(struct net_device *dev, u32 data)
-+{
-+	struct xen_netif *netif = netdev_priv(dev);
-+	if (data) {
-+		if (!netif->csum)
-+			return -ENOSYS;
-+		netif->features_disabled &= ~NETIF_F_IP_CSUM;
-+	} else {
-+		netif->features_disabled |= NETIF_F_IP_CSUM;
++		if (tx_work_todo(netbk))
++			xen_netbk_tx_action(netbk);
 +	}
 +
-+	netif_set_features(netif);
 +	return 0;
 +}
 +
-+static int netbk_set_sg(struct net_device *dev, u32 data)
++void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
 +{
-+	struct xen_netif *netif = netdev_priv(dev);
-+	if (data) {
-+		if (!netif->can_sg)
-+			return -ENOSYS;
-+		netif->features_disabled &= ~NETIF_F_SG;
-+	} else {
-+		netif->features_disabled |= NETIF_F_SG;
-+	}
++	struct gnttab_unmap_grant_ref op;
 +
-+	netif_set_features(netif);
-+	return 0;
-+}
++	if (vif->tx.sring) {
++		gnttab_set_unmap_op(&op, (unsigned long)vif->tx_comms_area->addr,
++				    GNTMAP_host_map, vif->tx_shmem_handle);
 +
-+static int netbk_set_tso(struct net_device *dev, u32 data)
-+{
-+	struct xen_netif *netif = netdev_priv(dev);
-+	if (data) {
-+		if (!netif->gso && !netif->gso_prefix)
-+			return -ENOSYS;
-+		netif->features_disabled &= ~NETIF_F_TSO;
-+	} else {
-+		netif->features_disabled |= NETIF_F_TSO;
++		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++			BUG();
 +	}
 +
-+	netif_set_features(netif);
-+	return 0;
-+}
-+
-+static void netbk_get_drvinfo(struct net_device *dev,
-+			      struct ethtool_drvinfo *info)
-+{
-+	strcpy(info->driver, "netbk");
-+	strcpy(info->bus_info, dev_name(dev->dev.parent));
-+}
-+
-+static const struct netif_stat {
-+	char name[ETH_GSTRING_LEN];
-+	u16 offset;
-+} netbk_stats[] = {
-+	{
-+		"copied_skbs",
-+		offsetof(struct xen_netif, nr_copied_skbs)
-+	},
-+	{
-+		"rx_gso_checksum_fixup",
-+		offsetof(struct xen_netif, rx_gso_checksum_fixup)
-+	},
-+};
++	if (vif->rx.sring) {
++		gnttab_set_unmap_op(&op, (unsigned long)vif->rx_comms_area->addr,
++				    GNTMAP_host_map, vif->rx_shmem_handle);
 +
-+static int netbk_get_sset_count(struct net_device *dev, int string_set)
-+{
-+	switch (string_set) {
-+	case ETH_SS_STATS:
-+		return ARRAY_SIZE(netbk_stats);
-+	default:
-+		return -EINVAL;
++		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++			BUG();
 +	}
++	if (vif->rx_comms_area)
++		free_vm_area(vif->rx_comms_area);
++	if (vif->tx_comms_area)
++		free_vm_area(vif->tx_comms_area);
 +}
 +
-+static void netbk_get_ethtool_stats(struct net_device *dev,
-+				   struct ethtool_stats *stats, u64 * data)
++int xen_netbk_map_frontend_rings(struct xenvif *vif,
++				 grant_ref_t tx_ring_ref,
++				 grant_ref_t rx_ring_ref)
 +{
-+	void *netif = netdev_priv(dev);
-+	int i;
++	struct gnttab_map_grant_ref op;
++	struct xen_netif_tx_sring *txs;
++	struct xen_netif_rx_sring *rxs;
 +
-+	for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
-+		data[i] = *(int *)(netif + netbk_stats[i].offset);
-+}
++	int err = -ENOMEM;
 +
-+static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
-+{
-+	int i;
++	vif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
++	if (vif->tx_comms_area == NULL)
++		goto err;
 +
-+	switch (stringset) {
-+	case ETH_SS_STATS:
-+		for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
-+			memcpy(data + i * ETH_GSTRING_LEN,
-+			       netbk_stats[i].name, ETH_GSTRING_LEN);
-+		break;
-+	}
-+}
++	vif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
++	if (vif->rx_comms_area == NULL)
++		goto err;
 +
-+static struct ethtool_ops network_ethtool_ops = {
-+	.get_drvinfo = netbk_get_drvinfo,
++	gnttab_set_map_op(&op, (unsigned long)vif->tx_comms_area->addr,
++			  GNTMAP_host_map, tx_ring_ref, vif->domid);
 +
-+	.get_tx_csum = ethtool_op_get_tx_csum,
-+	.set_tx_csum = netbk_set_tx_csum,
-+	.get_sg = ethtool_op_get_sg,
-+	.set_sg = netbk_set_sg,
-+	.get_tso = ethtool_op_get_tso,
-+	.set_tso = netbk_set_tso,
-+	.get_link = ethtool_op_get_link,
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
 +
-+	.get_sset_count = netbk_get_sset_count,
-+	.get_ethtool_stats = netbk_get_ethtool_stats,
-+	.get_strings = netbk_get_strings,
-+};
++	if (op.status) {
++		netdev_warn(vif->dev,
++			    "failed to map tx ring. err=%d status=%d\n",
++			    err, op.status);
++		err = op.status;
++		goto err;
++	}
 +
-+static struct net_device_ops netback_ops = {
-+	.ndo_start_xmit	= netif_be_start_xmit,
-+	.ndo_get_stats	= netif_be_get_stats,
-+	.ndo_open	= net_open,
-+	.ndo_stop	= net_close,
-+	.ndo_change_mtu	= netbk_change_mtu,
-+};
-+
-+struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
-+			      unsigned int handle)
-+{
-+	int err = 0;
-+	struct net_device *dev;
-+	struct xen_netif *netif;
-+	char name[IFNAMSIZ] = {};
-+
-+	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
-+	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
-+	if (dev == NULL) {
-+		pr_debug("Could not allocate netdev\n");
-+		return ERR_PTR(-ENOMEM);
-+	}
-+
-+	SET_NETDEV_DEV(dev, parent);
-+
-+	netif = netdev_priv(dev);
-+	memset(netif, 0, sizeof(*netif));
-+	netif->domid  = domid;
-+	netif->group  = -1;
-+	netif->handle = handle;
-+	netif->can_sg = 1;
-+	netif->csum = 1;
-+	atomic_set(&netif->refcnt, 1);
-+	init_waitqueue_head(&netif->waiting_to_free);
-+	netif->dev = dev;
-+	INIT_LIST_HEAD(&netif->list);
-+
-+	netback_carrier_off(netif);
-+
-+	netif->credit_bytes = netif->remaining_credit = ~0UL;
-+	netif->credit_usec  = 0UL;
-+	init_timer(&netif->credit_timeout);
-+	/* Initialize 'expires' now: it's used to track the credit window. */
-+	netif->credit_timeout.expires = jiffies;
-+
-+	dev->netdev_ops	= &netback_ops;
-+	netif_set_features(netif);
-+	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
-+
-+	dev->tx_queue_len = netbk_queue_length;
-+
-+	/*
-+	 * Initialise a dummy MAC address. We choose the numerically
-+	 * largest non-broadcast address to prevent the address getting
-+	 * stolen by an Ethernet bridge for STP purposes.
-+	 * (FE:FF:FF:FF:FF:FF)
-+	 */
-+	memset(dev->dev_addr, 0xFF, ETH_ALEN);
-+	dev->dev_addr[0] &= ~0x01;
-+
-+	rtnl_lock();
-+	err = register_netdevice(dev);
-+	rtnl_unlock();
-+	if (err) {
-+		pr_debug("Could not register new net device %s: err=%d\n",
-+			 dev->name, err);
-+		free_netdev(dev);
-+		return ERR_PTR(err);
-+	}
-+
-+	pr_debug("Successfully created netif\n");
-+	return netif;
-+}
++	vif->tx_shmem_ref    = tx_ring_ref;
++	vif->tx_shmem_handle = op.handle;
 +
-+static int map_frontend_pages(struct xen_netif *netif,
-+			      grant_ref_t tx_ring_ref,
-+			      grant_ref_t rx_ring_ref)
-+{
-+	struct gnttab_map_grant_ref op;
++	txs = (struct xen_netif_tx_sring *)vif->tx_comms_area->addr;
++	BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
 +
-+	gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
-+			  GNTMAP_host_map, tx_ring_ref, netif->domid);
++	gnttab_set_map_op(&op, (unsigned long)vif->rx_comms_area->addr,
++			  GNTMAP_host_map, rx_ring_ref, vif->domid);
 +
 +	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
 +		BUG();
 +
 +	if (op.status) {
-+		pr_debug("Gnttab failure mapping tx_ring_ref!\n");
-+		return op.status;
++		netdev_warn(vif->dev,
++			    "failed to map rx ring. err=%d status=%d\n",
++			    err, op.status);
++		err = op.status;
++		goto err;
 +	}
 +
-+	netif->tx_shmem_ref    = tx_ring_ref;
-+	netif->tx_shmem_handle = op.handle;
-+
-+	gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
-+			  GNTMAP_host_map, rx_ring_ref, netif->domid);
-+
-+	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-+		BUG();
-+
-+	if (op.status) {
-+		struct gnttab_unmap_grant_ref unop;
-+
-+		gnttab_set_unmap_op(&unop,
-+				    (unsigned long)netif->tx_comms_area->addr,
-+				    GNTMAP_host_map, netif->tx_shmem_handle);
-+		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
-+		pr_debug("Gnttab failure mapping rx_ring_ref!\n");
-+		return op.status;
-+	}
++	vif->rx_shmem_ref     = rx_ring_ref;
++	vif->rx_shmem_handle  = op.handle;
++	vif->rx_req_cons_peek = 0;
 +
-+	netif->rx_shmem_ref    = rx_ring_ref;
-+	netif->rx_shmem_handle = op.handle;
++	rxs = (struct xen_netif_rx_sring *)vif->rx_comms_area->addr;
++	BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
 +
 +	return 0;
-+}
-+
-+static void unmap_frontend_pages(struct xen_netif *netif)
-+{
-+	struct gnttab_unmap_grant_ref op;
-+
-+	gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
-+			    GNTMAP_host_map, netif->tx_shmem_handle);
-+
-+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-+		BUG();
-+
-+	gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
-+			    GNTMAP_host_map, netif->rx_shmem_handle);
 +
-+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-+		BUG();
++err:
++	xen_netbk_unmap_frontend_rings(vif);
++	return err;
 +}
 +
-+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
-+	      unsigned long rx_ring_ref, unsigned int evtchn)
++static int __init netback_init(void)
 +{
-+	int err = -ENOMEM;
-+	struct xen_netif_tx_sring *txs;
-+	struct xen_netif_rx_sring *rxs;
++	int i;
++	int rc = 0;
++	int group;
 +
-+	/* Already connected through? */
-+	if (netif->irq)
-+		return 0;
++	if (!xen_pv_domain())
++		return -ENODEV;
 +
-+	netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
-+	if (netif->tx_comms_area == NULL)
++	xen_netbk_group_nr = num_online_cpus();
++	xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
++	if (!xen_netbk) {
++		printk(KERN_ALERT "%s: out of memory\n", __func__);
 +		return -ENOMEM;
-+	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
-+	if (netif->rx_comms_area == NULL)
-+		goto err_rx;
-+
-+	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
-+	if (err)
-+		goto err_map;
++	}
 +
-+	err = bind_interdomain_evtchn_to_irqhandler(
-+		netif->domid, evtchn, netif_be_int, 0,
-+		netif->dev->name, netif);
-+	if (err < 0)
-+		goto err_hypervisor;
-+	netif->irq = err;
-+	disable_irq(netif->irq);
++	for (group = 0; group < xen_netbk_group_nr; group++) {
++		struct xen_netbk *netbk = &xen_netbk[group];
++		skb_queue_head_init(&netbk->rx_queue);
++		skb_queue_head_init(&netbk->tx_queue);
 +
-+	txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
-+	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
++		init_timer(&netbk->net_timer);
++		netbk->net_timer.data = (unsigned long)netbk;
++		netbk->net_timer.function = xen_netbk_alarm;
 +
-+	rxs = (struct xen_netif_rx_sring *)
-+		((char *)netif->rx_comms_area->addr);
-+	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
++		netbk->pending_cons = 0;
++		netbk->pending_prod = MAX_PENDING_REQS;
++		for (i = 0; i < MAX_PENDING_REQS; i++)
++			netbk->pending_ring[i] = i;
 +
-+	netif->rx_req_cons_peek = 0;
++		init_waitqueue_head(&netbk->wq);
++		netbk->task = kthread_create(xen_netbk_kthread,
++					     (void *)netbk,
++					     "netback/%u", group);
 +
-+	netif_get(netif);
++		if (IS_ERR(netbk->task)) {
++			printk(KERN_ALERT "kthread_run() fails at netback\n");
++			del_timer(&netbk->net_timer);
++			rc = PTR_ERR(netbk->task);
++			goto failed_init;
++		}
 +
-+	rtnl_lock();
-+	netback_carrier_on(netif);
-+	if (netif_running(netif->dev))
-+		__netif_up(netif);
-+	rtnl_unlock();
++		kthread_bind(netbk->task, group);
 +
-+	return 0;
-+err_hypervisor:
-+	unmap_frontend_pages(netif);
-+err_map:
-+	free_vm_area(netif->rx_comms_area);
-+err_rx:
-+	free_vm_area(netif->tx_comms_area);
-+	return err;
-+}
++		INIT_LIST_HEAD(&netbk->net_schedule_list);
 +
-+void netif_disconnect(struct xen_netif *netif)
-+{
-+	if (netback_carrier_ok(netif)) {
-+		rtnl_lock();
-+		netback_carrier_off(netif);
-+		netif_carrier_off(netif->dev); /* discard queued packets */
-+		if (netif_running(netif->dev))
-+			__netif_down(netif);
-+		rtnl_unlock();
-+		netif_put(netif);
-+	}
++		spin_lock_init(&netbk->net_schedule_list_lock);
 +
-+	atomic_dec(&netif->refcnt);
-+	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
++		atomic_set(&netbk->netfront_count, 0);
 +
-+	del_timer_sync(&netif->credit_timeout);
++		wake_up_process(netbk->task);
++	}
 +
-+	if (netif->irq)
-+		unbind_from_irqhandler(netif->irq, netif);
++	rc = xenvif_xenbus_init();
++	if (rc)
++		goto failed_init;
 +
-+	unregister_netdev(netif->dev);
++	return 0;
 +
-+	if (netif->tx.sring) {
-+		unmap_frontend_pages(netif);
-+		free_vm_area(netif->tx_comms_area);
-+		free_vm_area(netif->rx_comms_area);
++failed_init:
++	while (--group >= 0) {
++		struct xen_netbk *netbk = &xen_netbk[group];
++		for (i = 0; i < MAX_PENDING_REQS; i++) {
++			if (netbk->mmap_pages[i])
++				__free_page(netbk->mmap_pages[i]);
++		}
++		del_timer(&netbk->net_timer);
++		kthread_stop(netbk->task);
 +	}
++	vfree(xen_netbk);
++	return rc;
 +
-+	free_netdev(netif->dev);
 +}
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
++
++module_init(netback_init);
++
++MODULE_LICENSE("Dual BSD/GPL");
+diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
 new file mode 100644
-index 0000000..b290525
+index 0000000..22b8c35
 --- /dev/null
-+++ b/drivers/net/xen-netback/netback.c
-@@ -0,0 +1,1934 @@
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -0,0 +1,490 @@
 +/*
-+ * Back-end of the driver for virtual network devices. This portion of the
-+ * driver exports a 'unified' network-device interface that can be accessed
-+ * by any operating system that implements a compatible front end. A
-+ * reference front-end implementation can be found in:
-+ *  drivers/net/xen-netfront.c
-+ *
-+ * Copyright (c) 2002-2005, K A Fraser
++ * Xenbus code for netif backend
 + *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
++ * Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
++ * Copyright (C) 2005 XenSource Ltd
 + *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
 + *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
 + *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++*/
 +
 +#include "common.h"
 +
-+#include <linux/kthread.h>
-+#include <linux/if_vlan.h>
-+#include <linux/udp.h>
-+
-+#include <net/tcp.h>
-+
-+#include <xen/balloon.h>
-+#include <xen/events.h>
-+#include <xen/interface/memory.h>
-+
-+#include <asm/xen/hypercall.h>
-+#include <asm/xen/page.h>
-+
-+/*define NETBE_DEBUG_INTERRUPT*/
-+
-+struct xen_netbk *xen_netbk;
-+int xen_netbk_group_nr;
-+
-+static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
-+static void make_tx_response(struct xen_netif *netif,
-+			     struct xen_netif_tx_request *txp,
-+			     s8       st);
-+static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
-+					     u16      id,
-+					     s8       st,
-+					     u16      offset,
-+					     u16      size,
-+					     u16      flags);
-+
-+static void net_tx_action(unsigned long data);
-+
-+static void net_rx_action(unsigned long data);
-+
-+static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
-+				       unsigned int idx)
-+{
-+	return page_to_pfn(netbk->mmap_pages[idx]);
-+}
-+
-+static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
-+					 unsigned int idx)
-+{
-+	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
-+}
-+
-+/* extra field used in struct page */
-+static inline void netif_set_page_ext(struct page *pg,
-+				      unsigned int group, unsigned int idx)
-+{
-+	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
-+
-+	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
-+	pg->mapping = ext.mapping;
-+}
-+
-+static int netif_get_page_ext(struct page *pg,
-+			      unsigned int *_group, unsigned int *_idx)
-+{
-+	union page_ext ext = { .mapping = pg->mapping };
-+	struct xen_netbk *netbk;
-+	unsigned int group, idx;
-+
-+	if (!PageForeign(pg))
-+		return 0;
-+
-+	group = ext.e.group - 1;
-+
-+	if (group < 0 || group >= xen_netbk_group_nr)
-+		return 0;
-+
-+	netbk = &xen_netbk[group];
-+
-+	if (netbk->mmap_pages == NULL)
-+		return 0;
-+
-+	idx = ext.e.idx;
++struct backend_info {
++	struct xenbus_device *dev;
++	struct xenvif *vif;
++	enum xenbus_state frontend_state;
++	struct xenbus_watch hotplug_status_watch;
++	int have_hotplug_status_watch:1;
++};
 +
-+	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
-+		return 0;
-+
-+	if (netbk->mmap_pages[idx] != pg)
-+		return 0;
-+
-+	*_group = group;
-+	*_idx = idx;
-+
-+	return 1;
-+}
-+
-+/*
-+ * This is the amount of packet we copy rather than map, so that the
-+ * guest can't fiddle with the contents of the headers while we do
-+ * packet processing on them (netfilter, routing, etc).
-+ */
-+#define PKT_PROT_LEN    (ETH_HLEN + \
-+			 VLAN_HLEN + \
-+			 sizeof(struct iphdr) + MAX_IPOPTLEN + \
-+			 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
-+
-+static inline pending_ring_idx_t pending_index(unsigned i)
-+{
-+	return i & (MAX_PENDING_REQS-1);
-+}
-+
-+static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
-+{
-+	return MAX_PENDING_REQS -
-+		netbk->pending_prod + netbk->pending_cons;
-+}
-+
-+/* Setting this allows the safe use of this driver without netloop. */
-+static int MODPARM_copy_skb = 1;
-+module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
-+MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
-+
-+int netbk_copy_skb_mode;
-+
-+static int MODPARM_netback_kthread;
-+module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
-+MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
-+
-+/*
-+ * Netback bottom half handler.
-+ * dir indicates the data direction.
-+ * rx: 1, tx: 0.
-+ */
-+static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
-+{
-+	if (MODPARM_netback_kthread)
-+		wake_up(&netbk->kthread.netbk_action_wq);
-+	else if (dir)
-+		tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
-+	else
-+		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
-+}
-+
-+static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
-+{
-+	smp_mb();
-+	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
-+	    !list_empty(&netbk->net_schedule_list))
-+		xen_netbk_bh_handler(netbk, 0);
-+}
-+
-+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
-+{
-+	struct skb_shared_info *ninfo;
-+	struct sk_buff *nskb;
-+	unsigned long offset;
-+	int ret;
-+	int len;
-+	int headlen;
-+
-+	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
-+
-+	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
-+	if (unlikely(!nskb))
-+		goto err;
-+
-+	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
-+	headlen = skb_end_pointer(nskb) - nskb->data;
-+	if (headlen > skb_headlen(skb))
-+		headlen = skb_headlen(skb);
-+	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
-+	BUG_ON(ret);
-+
-+	ninfo = skb_shinfo(nskb);
-+	ninfo->gso_size = skb_shinfo(skb)->gso_size;
-+	ninfo->gso_type = skb_shinfo(skb)->gso_type;
-+
-+	offset = headlen;
-+	len = skb->len - headlen;
-+
-+	nskb->len = skb->len;
-+	nskb->data_len = len;
-+	nskb->truesize += len;
-+
-+	while (len) {
-+		struct page *page;
-+		int copy;
-+		int zero;
-+
-+		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
-+			dump_stack();
-+			goto err_free;
-+		}
-+
-+		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
-+		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
-+
-+		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
-+		if (unlikely(!page))
-+			goto err_free;
-+
-+		ret = skb_copy_bits(skb, offset, page_address(page), copy);
-+		BUG_ON(ret);
-+
-+		ninfo->frags[ninfo->nr_frags].page = page;
-+		ninfo->frags[ninfo->nr_frags].page_offset = 0;
-+		ninfo->frags[ninfo->nr_frags].size = copy;
-+		ninfo->nr_frags++;
-+
-+		offset += copy;
-+		len -= copy;
-+	}
-+
-+#ifdef NET_SKBUFF_DATA_USES_OFFSET
-+	offset = 0;
-+#else
-+	offset = nskb->data - skb->data;
-+#endif
-+
-+	nskb->transport_header = skb->transport_header + offset;
-+	nskb->network_header = skb->network_header + offset;
-+	nskb->mac_header = skb->mac_header + offset;
-+
-+	return nskb;
-+
-+ err_free:
-+	kfree_skb(nskb);
-+ err:
-+	return NULL;
-+}
-+
-+static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
-+{
-+	if (netif->can_sg || netif->gso || netif->gso_prefix)
-+		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
-+	return 1; /* all in one */
-+}
-+
-+static inline int netbk_queue_full(struct xen_netif *netif)
-+{
-+	RING_IDX peek   = netif->rx_req_cons_peek;
-+	RING_IDX needed = netbk_max_required_rx_slots(netif);
-+
-+	return ((netif->rx.sring->req_prod - peek) < needed) ||
-+	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
-+}
-+
-+/*
-+ * Returns true if we should start a new receive buffer instead of
-+ * adding 'size' bytes to a buffer which currently contains 'offset'
-+ * bytes.
-+ */
-+static bool start_new_rx_buffer(int offset, unsigned long size, int head)
-+{
-+	/* simple case: we have completely filled the current buffer. */
-+	if (offset == MAX_BUFFER_OFFSET)
-+		return true;
-+
-+	/*
-+	 * complex case: start a fresh buffer if the current frag
-+	 * would overflow the current buffer but only if:
-+	 *     (i)   this frag would fit completely in the next buffer
-+	 * and (ii)  there is already some data in the current buffer
-+	 * and (iii) this is not the head buffer.
-+	 *
-+	 * Where:
-+	 * - (i) stops us splitting a frag into two copies
-+	 *   unless the frag is too large for a single buffer.
-+	 * - (ii) stops us from leaving a buffer pointlessly empty.
-+	 * - (iii) stops us leaving the first buffer
-+	 *   empty. Strictly speaking this is already covered
-+	 *   by (ii) but is explicitly checked because
-+	 *   netfront relies on the first buffer being
-+	 *   non-empty and can crash otherwise.
-+	 *
-+	 * This means we will effectively linearise small
-+	 * frags but do not needlessly split large buffers
-+	 * into multiple copies tend to give large frags their
-+	 * own buffers as before.
-+	 */
-+	if ((offset + size > MAX_BUFFER_OFFSET) &&
-+	    (size <= MAX_BUFFER_OFFSET) && offset && !head)
-+		return true;
-+
-+	return false;
-+}
-+
-+/*
-+ * Figure out how many ring slots we're going to need to send @skb to
-+ * the guest. This function is essentially a dry run of
-+ * netbk_gop_frag_copy.
-+ */
-+static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
-+{
-+	unsigned int count = 1;
-+	int i, copy_off = 0;
-+
-+	BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);
-+
-+	copy_off = skb_headlen(skb);
-+
-+	if (skb_shinfo(skb)->gso_size)
-+		count++;
-+
-+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-+		unsigned long size = skb_shinfo(skb)->frags[i].size;
-+		unsigned long bytes;
-+		while (size > 0) {
-+			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
-+
-+			if (start_new_rx_buffer(copy_off, size, 0)) {
-+				count++;
-+				copy_off = 0;
-+			}
-+
-+			bytes = size;
-+			if (copy_off + bytes > MAX_BUFFER_OFFSET)
-+				bytes = MAX_BUFFER_OFFSET - copy_off;
-+
-+			copy_off += bytes;
-+			size -= bytes;
-+		}
-+	}
-+	return count;
-+}
-+
-+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
-+{
-+	struct xen_netif *netif = netdev_priv(dev);
-+	struct xen_netbk *netbk;
-+
-+	BUG_ON(skb->dev != dev);
-+
-+	if (netif->group == -1)
-+		goto drop;
-+
-+	netbk = &xen_netbk[netif->group];
-+
-+	/* Drop the packet if the target domain has no receive buffers. */
-+	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
-+		goto drop;
-+
-+	/*
-+	 * XXX For now we also copy skbuffs whose head crosses a page
-+	 * boundary, because netbk_gop_skb can't handle them.
-+	 */
-+	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
-+		struct sk_buff *nskb = netbk_copy_skb(skb);
-+		if (unlikely(nskb == NULL))
-+			goto drop;
-+		/* Copy only the header fields we use in this driver. */
-+		nskb->dev = skb->dev;
-+		nskb->ip_summed = skb->ip_summed;
-+		dev_kfree_skb(skb);
-+		skb = nskb;
-+	}
-+
-+	/* Reserve ring slots for the worst-case number of fragments. */
-+	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
-+	netif_get(netif);
-+
-+	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
-+		netif->rx.sring->req_event = netif->rx_req_cons_peek +
-+			netbk_max_required_rx_slots(netif);
-+		mb(); /* request notification /then/ check & stop the queue */
-+		if (netbk_queue_full(netif))
-+			netif_stop_queue(dev);
-+	}
-+	skb_queue_tail(&netbk->rx_queue, skb);
-+
-+	xen_netbk_bh_handler(netbk, 1);
-+
-+	return 0;
-+
-+ drop:
-+	netif->stats.tx_dropped++;
-+	dev_kfree_skb(skb);
-+	return 0;
-+}
-+
-+struct netrx_pending_operations {
-+	unsigned copy_prod, copy_cons;
-+	unsigned meta_prod, meta_cons;
-+	struct gnttab_copy *copy;
-+	struct netbk_rx_meta *meta;
-+	int copy_off;
-+	grant_ref_t copy_gref;
-+};
-+
-+static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
-+						struct netrx_pending_operations *npo)
-+{
-+	struct netbk_rx_meta *meta;
-+	struct xen_netif_rx_request *req;
-+
-+	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-+
-+	meta = npo->meta + npo->meta_prod++;
-+	meta->gso_size = 0;
-+	meta->size = 0;
-+	meta->id = req->id;
-+
-+	npo->copy_off = 0;
-+	npo->copy_gref = req->gref;
-+
-+	return meta;
-+}
-+
-+/*
-+ * Set up the grant operations for this fragment. If it's a flipping
-+ * interface, we also set up the unmap request from here.
-+ */
-+static void netbk_gop_frag_copy(struct xen_netif *netif,
-+				struct netrx_pending_operations *npo,
-+				struct page *page, unsigned long size,
-+				unsigned long offset, int head)
-+{
-+	struct gnttab_copy *copy_gop;
-+	struct netbk_rx_meta *meta;
-+	/*
-+	 * These variables a used iff netif_get_page_ext returns true,
-+	 * in which case they are guaranteed to be initialized.
-+	 */
-+	unsigned int uninitialized_var(group), uninitialized_var(idx);
-+	int foreign = netif_get_page_ext(page, &group, &idx);
-+	unsigned long bytes;
-+
-+	/* Data must not cross a page boundary. */
-+	BUG_ON(size + offset > PAGE_SIZE);
-+
-+	meta = npo->meta + npo->meta_prod - 1;
-+
-+	while (size > 0) {
-+		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
-+
-+		if (start_new_rx_buffer(npo->copy_off, size, head)) {
-+			/*
-+			 * Netfront requires there to be some data in the head
-+			 * buffer.
-+			 */
-+			BUG_ON(head);
-+
-+			meta = get_next_rx_buffer(netif, npo);
-+		}
-+
-+		bytes = size;
-+		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
-+			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
-+
-+		copy_gop = npo->copy + npo->copy_prod++;
-+		copy_gop->flags = GNTCOPY_dest_gref;
-+		if (foreign) {
-+			struct xen_netbk *netbk = &xen_netbk[group];
-+			struct pending_tx_info *src_pend;
-+
-+			src_pend = &netbk->pending_tx_info[idx];
-+
-+			copy_gop->source.domid = src_pend->netif->domid;
-+			copy_gop->source.u.ref = src_pend->req.gref;
-+			copy_gop->flags |= GNTCOPY_source_gref;
-+		} else {
-+			void *vaddr = page_address(page);
-+			copy_gop->source.domid = DOMID_SELF;
-+			copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
-+		}
-+		copy_gop->source.offset = offset;
-+		copy_gop->dest.domid = netif->domid;
-+
-+		copy_gop->dest.offset = npo->copy_off;
-+		copy_gop->dest.u.ref = npo->copy_gref;
-+		copy_gop->len = bytes;
-+
-+		npo->copy_off += bytes;
-+		meta->size += bytes;
-+
-+		offset += bytes;
-+		size -= bytes;
-+		head = 0; /* There must be something in this buffer now. */
-+	}
-+}
-+
-+/*
-+ * Prepare an SKB to be transmitted to the frontend.
-+ *
-+ * This function is responsible for allocating grant operations, meta
-+ * structures, etc.
-+ *
-+ * It returns the number of meta structures consumed. The number of
-+ * ring slots used is always equal to the number of meta slots used
-+ * plus the number of GSO descriptors used. Currently, we use either
-+ * zero GSO descriptors (for non-GSO packets) or one descriptor (for
-+ * frontend-side LRO).
-+ */
-+static int netbk_gop_skb(struct sk_buff *skb,
-+			 struct netrx_pending_operations *npo)
-+{
-+	struct xen_netif *netif = netdev_priv(skb->dev);
-+	int nr_frags = skb_shinfo(skb)->nr_frags;
-+	int i;
-+	struct xen_netif_rx_request *req;
-+	struct netbk_rx_meta *meta;
-+	int old_meta_prod;
-+
-+	old_meta_prod = npo->meta_prod;
-+
-+	/* Set up a GSO prefix descriptor, if necessary */
-+	if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
-+		req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-+		meta = npo->meta + npo->meta_prod++;
-+		meta->gso_size = skb_shinfo(skb)->gso_size;
-+		meta->size = 0;
-+		meta->id = req->id;
-+	}
-+
-+	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-+	meta = npo->meta + npo->meta_prod++;
-+
-+	if (!netif->gso_prefix)
-+		meta->gso_size = skb_shinfo(skb)->gso_size;
-+	else
-+		meta->gso_size = 0;
-+
-+	meta->size = 0;
-+	meta->id = req->id;
-+	npo->copy_off = 0;
-+	npo->copy_gref = req->gref;
-+
-+	netbk_gop_frag_copy(netif,
-+			    npo, virt_to_page(skb->data),
-+			    skb_headlen(skb),
-+			    offset_in_page(skb->data), 1);
-+
-+	/* Leave a gap for the GSO descriptor. */
-+	if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
-+		netif->rx.req_cons++;
-+
-+	for (i = 0; i < nr_frags; i++) {
-+		netbk_gop_frag_copy(netif, npo,
-+				    skb_shinfo(skb)->frags[i].page,
-+				    skb_shinfo(skb)->frags[i].size,
-+				    skb_shinfo(skb)->frags[i].page_offset,
-+				    0);
-+	}
-+
-+	return npo->meta_prod - old_meta_prod;
-+}
-+
-+/*
-+ * This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
-+ * used to set up the operations on the top of
-+ * netrx_pending_operations, which have since been done.  Check that
-+ * they didn't give any errors and advance over them.
-+ */
-+static int netbk_check_gop(int nr_meta_slots, domid_t domid,
-+			   struct netrx_pending_operations *npo)
-+{
-+	struct gnttab_copy     *copy_op;
-+	int status = NETIF_RSP_OKAY;
-+	int i;
-+
-+	for (i = 0; i < nr_meta_slots; i++) {
-+		copy_op = npo->copy + npo->copy_cons++;
-+		if (copy_op->status != GNTST_okay) {
-+				pr_debug("Bad status %d from copy to DOM%d.\n",
-+					 copy_op->status, domid);
-+				status = NETIF_RSP_ERROR;
-+			}
-+	}
-+
-+	return status;
-+}
-+
-+static void netbk_add_frag_responses(struct xen_netif *netif, int status,
-+				     struct netbk_rx_meta *meta,
-+				     int nr_meta_slots)
-+{
-+	int i;
-+	unsigned long offset;
-+
-+	/* No fragments used */
-+	if (nr_meta_slots <= 1)
-+		return;
-+
-+	nr_meta_slots--;
-+
-+	for (i = 0; i < nr_meta_slots; i++) {
-+		int flags;
-+		if (i == nr_meta_slots - 1)
-+			flags = 0;
-+		else
-+			flags = NETRXF_more_data;
-+
-+		offset = 0;
-+		make_rx_response(netif, meta[i].id, status, offset,
-+				 meta[i].size, flags);
-+	}
-+}
-+
-+struct skb_cb_overlay {
-+	int meta_slots_used;
-+};
-+
-+static void net_rx_action(unsigned long data)
-+{
-+	struct xen_netif *netif = NULL;
-+	struct xen_netbk *netbk = (struct xen_netbk *)data;
-+	s8 status;
-+	u16 irq, flags;
-+	struct xen_netif_rx_response *resp;
-+	struct sk_buff_head rxq;
-+	struct sk_buff *skb;
-+	int notify_nr = 0;
-+	int ret;
-+	int nr_frags;
-+	int count;
-+	unsigned long offset;
-+	struct skb_cb_overlay *sco;
-+
-+	struct netrx_pending_operations npo = {
-+		.copy  = netbk->grant_copy_op,
-+		.meta  = netbk->meta,
-+	};
-+
-+	skb_queue_head_init(&rxq);
-+
-+	count = 0;
-+
-+	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
-+		netif = netdev_priv(skb->dev);
-+		nr_frags = skb_shinfo(skb)->nr_frags;
-+
-+		sco = (struct skb_cb_overlay *)skb->cb;
-+		sco->meta_slots_used = netbk_gop_skb(skb, &npo);
-+
-+		count += nr_frags + 1;
-+
-+		__skb_queue_tail(&rxq, skb);
-+
-+		/* Filled the batch queue? */
-+		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
-+			break;
-+	}
-+
-+	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
-+
-+	if (!npo.copy_prod)
-+		return;
-+
-+	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
-+	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
-+					npo.copy_prod);
-+	BUG_ON(ret != 0);
-+
-+	while ((skb = __skb_dequeue(&rxq)) != NULL) {
-+		sco = (struct skb_cb_overlay *)skb->cb;
-+
-+		netif = netdev_priv(skb->dev);
-+
-+		if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
-+			resp = RING_GET_RESPONSE(&netif->rx,
-+						netif->rx.rsp_prod_pvt++);
-+
-+			resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
-+
-+			resp->offset = netbk->meta[npo.meta_cons].gso_size;
-+			resp->id = netbk->meta[npo.meta_cons].id;
-+			resp->status = sco->meta_slots_used;
-+
-+			npo.meta_cons++;
-+			sco->meta_slots_used--;
-+		}
-+
-+
-+		netif->stats.tx_bytes += skb->len;
-+		netif->stats.tx_packets++;
-+
-+		status = netbk_check_gop(sco->meta_slots_used,
-+					 netif->domid, &npo);
-+
-+		if (sco->meta_slots_used == 1)
-+			flags = 0;
-+		else
-+			flags = NETRXF_more_data;
-+
-+		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
-+			flags |= NETRXF_csum_blank | NETRXF_data_validated;
-+		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-+			/* remote but checksummed. */
-+			flags |= NETRXF_data_validated;
-+
-+		offset = 0;
-+		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
-+					status, offset,
-+					netbk->meta[npo.meta_cons].size,
-+					flags);
-+
-+		if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
-+			struct xen_netif_extra_info *gso =
-+				(struct xen_netif_extra_info *)
-+				RING_GET_RESPONSE(&netif->rx,
-+						  netif->rx.rsp_prod_pvt++);
-+
-+			resp->flags |= NETRXF_extra_info;
-+
-+			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
-+			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
-+			gso->u.gso.pad = 0;
-+			gso->u.gso.features = 0;
-+
-+			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
-+			gso->flags = 0;
-+		}
-+
-+		netbk_add_frag_responses(netif, status,
-+					 netbk->meta + npo.meta_cons + 1,
-+					 sco->meta_slots_used);
-+
-+		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
-+		irq = netif->irq;
-+		if (ret && !netbk->rx_notify[irq]) {
-+			netbk->rx_notify[irq] = 1;
-+			netbk->notify_list[notify_nr++] = irq;
-+		}
-+
-+		if (netif_queue_stopped(netif->dev) &&
-+		    netif_schedulable(netif) &&
-+		    !netbk_queue_full(netif))
-+			netif_wake_queue(netif->dev);
-+
-+		netif_put(netif);
-+		npo.meta_cons += sco->meta_slots_used;
-+		dev_kfree_skb(skb);
-+	}
-+
-+	while (notify_nr != 0) {
-+		irq = netbk->notify_list[--notify_nr];
-+		netbk->rx_notify[irq] = 0;
-+		notify_remote_via_irq(irq);
-+	}
-+
-+	/* More work to do? */
-+	if (!skb_queue_empty(&netbk->rx_queue) &&
-+			!timer_pending(&netbk->net_timer))
-+		xen_netbk_bh_handler(netbk, 1);
-+}
-+
-+static void net_alarm(unsigned long data)
-+{
-+	struct xen_netbk *netbk = (struct xen_netbk *)data;
-+	xen_netbk_bh_handler(netbk, 1);
-+}
-+
-+static void netbk_tx_pending_timeout(unsigned long data)
-+{
-+	struct xen_netbk *netbk = (struct xen_netbk *)data;
-+	xen_netbk_bh_handler(netbk, 0);
-+}
-+
-+struct net_device_stats *netif_be_get_stats(struct net_device *dev)
-+{
-+	struct xen_netif *netif = netdev_priv(dev);
-+	return &netif->stats;
-+}
-+
-+static int __on_net_schedule_list(struct xen_netif *netif)
-+{
-+	return !list_empty(&netif->list);
-+}
-+
-+/* Must be called with net_schedule_list_lock held */
-+static void remove_from_net_schedule_list(struct xen_netif *netif)
-+{
-+	if (likely(__on_net_schedule_list(netif))) {
-+		list_del_init(&netif->list);
-+		netif_put(netif);
-+	}
-+}
-+
-+static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
-+{
-+	struct xen_netif *netif = NULL;
-+
-+	spin_lock_irq(&netbk->net_schedule_list_lock);
-+	if (list_empty(&netbk->net_schedule_list))
-+		goto out;
-+
-+	netif = list_first_entry(&netbk->net_schedule_list,
-+				 struct xen_netif, list);
-+	if (!netif)
-+		goto out;
-+
-+	netif_get(netif);
-+
-+	remove_from_net_schedule_list(netif);
-+out:
-+	spin_unlock_irq(&netbk->net_schedule_list_lock);
-+	return netif;
-+}
-+
-+static void add_to_net_schedule_list_tail(struct xen_netif *netif)
-+{
-+	unsigned long flags;
-+
-+	struct xen_netbk *netbk = &xen_netbk[netif->group];
-+	if (__on_net_schedule_list(netif))
-+		return;
-+
-+	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
-+	if (!__on_net_schedule_list(netif) &&
-+	    likely(netif_schedulable(netif))) {
-+		list_add_tail(&netif->list, &netbk->net_schedule_list);
-+		netif_get(netif);
-+	}
-+	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
-+}
-+
-+void netif_schedule_work(struct xen_netif *netif)
-+{
-+	struct xen_netbk *netbk = &xen_netbk[netif->group];
-+	int more_to_do;
-+
-+	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
-+
-+	if (more_to_do) {
-+		add_to_net_schedule_list_tail(netif);
-+		maybe_schedule_tx_action(netbk);
-+	}
-+}
-+
-+void netif_deschedule_work(struct xen_netif *netif)
-+{
-+	struct xen_netbk *netbk = &xen_netbk[netif->group];
-+	spin_lock_irq(&netbk->net_schedule_list_lock);
-+	remove_from_net_schedule_list(netif);
-+	spin_unlock_irq(&netbk->net_schedule_list_lock);
-+}
-+
-+
-+static void tx_add_credit(struct xen_netif *netif)
-+{
-+	unsigned long max_burst, max_credit;
-+
-+	/*
-+	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
-+	 * Otherwise the interface can seize up due to insufficient credit.
-+	 */
-+	max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
-+	max_burst = min(max_burst, 131072UL);
-+	max_burst = max(max_burst, netif->credit_bytes);
-+
-+	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
-+	max_credit = netif->remaining_credit + netif->credit_bytes;
-+	if (max_credit < netif->remaining_credit)
-+		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
-+
-+	netif->remaining_credit = min(max_credit, max_burst);
-+}
-+
-+static void tx_credit_callback(unsigned long data)
-+{
-+	struct xen_netif *netif = (struct xen_netif *)data;
-+	tx_add_credit(netif);
-+	netif_schedule_work(netif);
-+}
-+
-+static inline int copy_pending_req(struct xen_netbk *netbk,
-+				   pending_ring_idx_t pending_idx)
-+{
-+	return gnttab_copy_grant_page(
-+			netbk->grant_tx_handle[pending_idx],
-+			&netbk->mmap_pages[pending_idx]);
-+}
-+
-+static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
-+{
-+	struct netbk_tx_pending_inuse *inuse, *n;
-+	struct gnttab_unmap_grant_ref *gop;
-+	u16 pending_idx;
-+	pending_ring_idx_t dc, dp;
-+	struct xen_netif *netif;
-+	int ret;
-+	LIST_HEAD(list);
-+
-+	dc = netbk->dealloc_cons;
-+	gop = netbk->tx_unmap_ops;
-+
-+	/* Free up any grants we have finished using. */
-+	do {
-+		dp = netbk->dealloc_prod;
-+
-+		/* Ensure we see all indices enqueued by netif_idx_release(). */
-+		smp_rmb();
-+
-+		while (dc != dp) {
-+			unsigned long pfn;
-+			struct netbk_tx_pending_inuse *pending_inuse =
-+					netbk->pending_inuse;
-+
-+			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
-+			list_move_tail(&pending_inuse[pending_idx].list, &list);
-+
-+			pfn = idx_to_pfn(netbk, pending_idx);
-+			/* Already unmapped? */
-+			if (!phys_to_machine_mapping_valid(pfn))
-+				continue;
-+
-+			gnttab_set_unmap_op(gop,
-+					idx_to_kaddr(netbk, pending_idx),
-+					GNTMAP_host_map,
-+					netbk->grant_tx_handle[pending_idx]);
-+			gop++;
-+		}
-+
-+	} while (dp != netbk->dealloc_prod);
-+
-+	netbk->dealloc_cons = dc;
-+
-+	ret = HYPERVISOR_grant_table_op(
-+		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
-+		gop - netbk->tx_unmap_ops);
-+	BUG_ON(ret);
-+
-+	/*
-+	 * Copy any entries that have been pending for too long
-+	 */
-+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-+	    !list_empty(&netbk->pending_inuse_head)) {
-+		list_for_each_entry_safe(inuse, n,
-+				&netbk->pending_inuse_head, list) {
-+			struct pending_tx_info *pending_tx_info;
-+			pending_tx_info = netbk->pending_tx_info;
-+
-+			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
-+				break;
-+
-+			pending_idx = inuse - netbk->pending_inuse;
-+
-+			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
-+
-+			switch (copy_pending_req(netbk, pending_idx)) {
-+			case 0:
-+				list_move_tail(&inuse->list, &list);
-+				continue;
-+			case -EBUSY:
-+				list_del_init(&inuse->list);
-+				continue;
-+			case -ENOENT:
-+				continue;
-+			}
-+
-+			break;
-+		}
-+	}
-+
-+	list_for_each_entry_safe(inuse, n, &list, list) {
-+		struct pending_tx_info *pending_tx_info;
-+		pending_ring_idx_t index;
-+
-+		pending_tx_info = netbk->pending_tx_info;
-+		pending_idx = inuse - netbk->pending_inuse;
-+
-+		netif = pending_tx_info[pending_idx].netif;
-+
-+		make_tx_response(netif, &pending_tx_info[pending_idx].req,
-+				 NETIF_RSP_OKAY);
-+
-+		/* Ready for next use. */
-+		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
-+
-+		index = pending_index(netbk->pending_prod++);
-+		netbk->pending_ring[index] = pending_idx;
-+
-+		netif_put(netif);
-+
-+		list_del_init(&inuse->list);
-+	}
-+}
-+
-+static void netbk_tx_err(struct xen_netif *netif,
-+		struct xen_netif_tx_request *txp, RING_IDX end)
-+{
-+	RING_IDX cons = netif->tx.req_cons;
-+
-+	do {
-+		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+		if (cons >= end)
-+			break;
-+		txp = RING_GET_REQUEST(&netif->tx, cons++);
-+	} while (1);
-+	netif->tx.req_cons = cons;
-+	netif_schedule_work(netif);
-+	netif_put(netif);
-+}
-+
-+static int netbk_count_requests(struct xen_netif *netif,
-+				struct xen_netif_tx_request *first,
-+				struct xen_netif_tx_request *txp,
-+				int work_to_do)
-+{
-+	RING_IDX cons = netif->tx.req_cons;
-+	int frags = 0;
-+
-+	if (!(first->flags & NETTXF_more_data))
-+		return 0;
-+
-+	do {
-+		if (frags >= work_to_do) {
-+			pr_debug("Need more frags\n");
-+			return -frags;
-+		}
-+
-+		if (unlikely(frags >= MAX_SKB_FRAGS)) {
-+			pr_debug("Too many frags\n");
-+			return -frags;
-+		}
-+
-+		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
-+		       sizeof(*txp));
-+		if (txp->size > first->size) {
-+			pr_debug("Frags galore\n");
-+			return -frags;
-+		}
-+
-+		first->size -= txp->size;
-+		frags++;
-+
-+		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
-+			pr_debug("txp->offset: %x, size: %u\n",
-+				 txp->offset, txp->size);
-+			return -frags;
-+		}
-+	} while ((txp++)->flags & NETTXF_more_data);
-+
-+	return frags;
-+}
-+
-+static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
-+						       struct xen_netif *netif,
-+						       struct sk_buff *skb,
-+						       struct xen_netif_tx_request *txp,
-+						       struct gnttab_map_grant_ref *mop)
-+{
-+	struct skb_shared_info *shinfo = skb_shinfo(skb);
-+	skb_frag_t *frags = shinfo->frags;
-+	unsigned long pending_idx = *((u16 *)skb->data);
-+	int i, start;
-+
-+	/* Skip first skb fragment if it is on same page as header fragment. */
-+	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
-+
-+	for (i = start; i < shinfo->nr_frags; i++, txp++) {
-+		pending_ring_idx_t index;
-+		struct pending_tx_info *pending_tx_info =
-+			netbk->pending_tx_info;
-+
-+		index = pending_index(netbk->pending_cons++);
-+		pending_idx = netbk->pending_ring[index];
-+
-+		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
-+				  GNTMAP_host_map | GNTMAP_readonly,
-+				  txp->gref, netif->domid);
-+
-+		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
-+		netif_get(netif);
-+		pending_tx_info[pending_idx].netif = netif;
-+		frags[i].page = (void *)pending_idx;
-+	}
-+
-+	return mop;
-+}
-+
-+static int netbk_tx_check_mop(struct xen_netbk *netbk,
-+			      struct sk_buff *skb,
-+			      struct gnttab_map_grant_ref **mopp)
-+{
-+	struct gnttab_map_grant_ref *mop = *mopp;
-+	int pending_idx = *((u16 *)skb->data);
-+	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
-+	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
-+	struct xen_netif_tx_request *txp;
-+	struct skb_shared_info *shinfo = skb_shinfo(skb);
-+	int nr_frags = shinfo->nr_frags;
-+	int i, err, start;
-+
-+	/* Check status of header. */
-+	err = mop->status;
-+	if (unlikely(err)) {
-+		pending_ring_idx_t index;
-+		index = pending_index(netbk->pending_prod++);
-+		txp = &pending_tx_info[pending_idx].req;
-+		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+		netbk->pending_ring[index] = pending_idx;
-+		netif_put(netif);
-+	} else {
-+		set_phys_to_machine(
-+			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
-+			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
-+		netbk->grant_tx_handle[pending_idx] = mop->handle;
-+	}
-+
-+	/* Skip first skb fragment if it is on same page as header fragment. */
-+	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
-+
-+	for (i = start; i < nr_frags; i++) {
-+		int j, newerr;
-+		pending_ring_idx_t index;
-+
-+		pending_idx = (unsigned long)shinfo->frags[i].page;
-+
-+		/* Check error status: if okay then remember grant handle. */
-+		newerr = (++mop)->status;
-+		if (likely(!newerr)) {
-+			unsigned long addr;
-+			addr = idx_to_kaddr(netbk, pending_idx);
-+			set_phys_to_machine(
-+				__pa(addr)>>PAGE_SHIFT,
-+				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
-+			netbk->grant_tx_handle[pending_idx] = mop->handle;
-+			/* Had a previous error? Invalidate this fragment. */
-+			if (unlikely(err))
-+				netif_idx_release(netbk, pending_idx);
-+			continue;
-+		}
-+
-+		/* Error on this fragment: respond to client with an error. */
-+		txp = &netbk->pending_tx_info[pending_idx].req;
-+		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+		index = pending_index(netbk->pending_prod++);
-+		netbk->pending_ring[index] = pending_idx;
-+		netif_put(netif);
-+
-+		/* Not the first error? Preceding frags already invalidated. */
-+		if (err)
-+			continue;
-+
-+		/* First error: invalidate header and preceding fragments. */
-+		pending_idx = *((u16 *)skb->data);
-+		netif_idx_release(netbk, pending_idx);
-+		for (j = start; j < i; j++) {
-+			pending_idx = (unsigned long)shinfo->frags[i].page;
-+			netif_idx_release(netbk, pending_idx);
-+		}
-+
-+		/* Remember the error: invalidate all subsequent fragments. */
-+		err = newerr;
-+	}
-+
-+	*mopp = mop + 1;
-+	return err;
-+}
-+
-+static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
-+{
-+	struct skb_shared_info *shinfo = skb_shinfo(skb);
-+	int nr_frags = shinfo->nr_frags;
-+	int i;
-+
-+	for (i = 0; i < nr_frags; i++) {
-+		skb_frag_t *frag = shinfo->frags + i;
-+		struct xen_netif_tx_request *txp;
-+		unsigned long pending_idx;
-+
-+		pending_idx = (unsigned long)frag->page;
-+
-+		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
-+		list_add_tail(&netbk->pending_inuse[pending_idx].list,
-+			      &netbk->pending_inuse_head);
-+
-+		txp = &netbk->pending_tx_info[pending_idx].req;
-+		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
-+		frag->size = txp->size;
-+		frag->page_offset = txp->offset;
-+
-+		skb->len += txp->size;
-+		skb->data_len += txp->size;
-+		skb->truesize += txp->size;
-+	}
-+}
-+
-+int netbk_get_extras(struct xen_netif *netif,
-+		     struct xen_netif_extra_info *extras,
-+		     int work_to_do)
-+{
-+	struct xen_netif_extra_info extra;
-+	RING_IDX cons = netif->tx.req_cons;
-+
-+	do {
-+		if (unlikely(work_to_do-- <= 0)) {
-+			pr_debug("Missing extra info\n");
-+			return -EBADR;
-+		}
-+
-+		memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
-+		       sizeof(extra));
-+		if (unlikely(!extra.type ||
-+			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
-+			netif->tx.req_cons = ++cons;
-+			pr_debug("Invalid extra type: %d\n", extra.type);
-+			return -EINVAL;
-+		}
-+
-+		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
-+		netif->tx.req_cons = ++cons;
-+	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
-+
-+	return work_to_do;
-+}
-+
-+static int netbk_set_skb_gso(struct sk_buff *skb,
-+			     struct xen_netif_extra_info *gso)
-+{
-+	if (!gso->u.gso.size) {
-+		pr_debug("GSO size must not be zero.\n");
-+		return -EINVAL;
-+	}
-+
-+	/* Currently only TCPv4 S.O. is supported. */
-+	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
-+		pr_debug("Bad GSO type %d.\n", gso->u.gso.type);
-+		return -EINVAL;
-+	}
-+
-+	skb_shinfo(skb)->gso_size = gso->u.gso.size;
-+	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
-+
-+	/* Header must be checked, and gso_segs computed. */
-+	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-+	skb_shinfo(skb)->gso_segs = 0;
-+
-+	return 0;
-+}
-+
-+static int checksum_setup(struct xen_netif *netif, struct sk_buff *skb)
-+{
-+	struct iphdr *iph;
-+	unsigned char *th;
-+	int err = -EPROTO;
-+	int recalculate_partial_csum = 0;
-+
-+	/*
-+	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
-+	 * peers can fail to set NETRXF_csum_blank when sending a GSO
-+	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
-+	 * recalculate the partial checksum.
-+	 */
-+	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
-+		netif->rx_gso_checksum_fixup++;
-+		skb->ip_summed = CHECKSUM_PARTIAL;
-+		recalculate_partial_csum = 1;
-+	}
-+
-+	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
-+	if (skb->ip_summed != CHECKSUM_PARTIAL)
-+		return 0;
-+
-+	if (skb->protocol != htons(ETH_P_IP))
-+		goto out;
-+
-+	iph = (void *)skb->data;
-+	th = skb->data + 4 * iph->ihl;
-+	if (th >= skb_tail_pointer(skb))
-+		goto out;
-+
-+	skb->csum_start = th - skb->head;
-+	switch (iph->protocol) {
-+	case IPPROTO_TCP:
-+		skb->csum_offset = offsetof(struct tcphdr, check);
-+
-+		if (recalculate_partial_csum) {
-+			struct tcphdr *tcph = (struct tcphdr *)th;
-+			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-+							 skb->len - iph->ihl*4,
-+							 IPPROTO_TCP, 0);
-+		}
-+		break;
-+	case IPPROTO_UDP:
-+		skb->csum_offset = offsetof(struct udphdr, check);
-+
-+		if (recalculate_partial_csum) {
-+			struct udphdr *udph = (struct udphdr *)th;
-+			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-+							 skb->len - iph->ihl*4,
-+							 IPPROTO_UDP, 0);
-+		}
-+		break;
-+	default:
-+		if (net_ratelimit())
-+			printk(KERN_ERR "Attempting to checksum a non-"
-+			       "TCP/UDP packet, dropping a protocol"
-+			       " %d packet", iph->protocol);
-+		goto out;
-+	}
-+
-+	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
-+		goto out;
-+
-+	err = 0;
-+
-+out:
-+	return err;
-+}
-+
-+static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
-+{
-+	unsigned long now = jiffies;
-+	unsigned long next_credit =
-+		netif->credit_timeout.expires +
-+		msecs_to_jiffies(netif->credit_usec / 1000);
-+
-+	/* Timer could already be pending in rare cases. */
-+	if (timer_pending(&netif->credit_timeout))
-+		return true;
-+
-+	/* Passed the point where we can replenish credit? */
-+	if (time_after_eq(now, next_credit)) {
-+		netif->credit_timeout.expires = now;
-+		tx_add_credit(netif);
-+	}
-+
-+	/* Still too big to send right now? Set a callback. */
-+	if (size > netif->remaining_credit) {
-+		netif->credit_timeout.data     =
-+			(unsigned long)netif;
-+		netif->credit_timeout.function =
-+			tx_credit_callback;
-+		mod_timer(&netif->credit_timeout,
-+			  next_credit);
-+
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+static unsigned net_tx_build_mops(struct xen_netbk *netbk)
-+{
-+	struct gnttab_map_grant_ref *mop;
-+	struct sk_buff *skb;
-+	int ret;
-+
-+	mop = netbk->tx_map_ops;
-+	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-+		!list_empty(&netbk->net_schedule_list)) {
-+		struct xen_netif *netif;
-+		struct xen_netif_tx_request txreq;
-+		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
-+		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
-+		u16 pending_idx;
-+		RING_IDX idx;
-+		int work_to_do;
-+		unsigned int data_len;
-+		pending_ring_idx_t index;
-+
-+		/* Get a netif from the list with work to do. */
-+		netif = poll_net_schedule_list(netbk);
-+		if (!netif)
-+			continue;
-+
-+		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
-+		if (!work_to_do) {
-+			netif_put(netif);
-+			continue;
-+		}
-+
-+		idx = netif->tx.req_cons;
-+		rmb(); /* Ensure that we see the request before we copy it. */
-+		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
-+
-+		/* Credit-based scheduling. */
-+		if (txreq.size > netif->remaining_credit &&
-+		    tx_credit_exceeded(netif, txreq.size)) {
-+			netif_put(netif);
-+			continue;
-+		}
-+
-+		netif->remaining_credit -= txreq.size;
-+
-+		work_to_do--;
-+		netif->tx.req_cons = ++idx;
-+
-+		memset(extras, 0, sizeof(extras));
-+		if (txreq.flags & NETTXF_extra_info) {
-+			work_to_do = netbk_get_extras(netif, extras,
-+						      work_to_do);
-+			idx = netif->tx.req_cons;
-+			if (unlikely(work_to_do < 0)) {
-+				netbk_tx_err(netif, &txreq, idx);
-+				continue;
-+			}
-+		}
-+
-+		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
-+		if (unlikely(ret < 0)) {
-+			netbk_tx_err(netif, &txreq, idx - ret);
-+			continue;
-+		}
-+		idx += ret;
-+
-+		if (unlikely(txreq.size < ETH_HLEN)) {
-+			pr_debug("Bad packet size: %d\n", txreq.size);
-+			netbk_tx_err(netif, &txreq, idx);
-+			continue;
-+		}
-+
-+		/* No crossing a page as the payload mustn't fragment. */
-+		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
-+			pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
-+				 txreq.offset, txreq.size,
-+				 (txreq.offset&~PAGE_MASK) + txreq.size);
-+			netbk_tx_err(netif, &txreq, idx);
-+			continue;
-+		}
-+
-+		index = pending_index(netbk->pending_cons);
-+		pending_idx = netbk->pending_ring[index];
-+
-+		data_len = (txreq.size > PKT_PROT_LEN &&
-+			    ret < MAX_SKB_FRAGS) ?
-+			PKT_PROT_LEN : txreq.size;
-+
-+		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
-+				GFP_ATOMIC | __GFP_NOWARN);
-+		if (unlikely(skb == NULL)) {
-+			pr_debug("Can't allocate a skb in start_xmit.\n");
-+			netbk_tx_err(netif, &txreq, idx);
-+			break;
-+		}
-+
-+		/* Packets passed to netif_rx() must have some headroom. */
-+		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
-+
-+		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
-+			struct xen_netif_extra_info *gso;
-+			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
-+
-+			if (netbk_set_skb_gso(skb, gso)) {
-+				kfree_skb(skb);
-+				netbk_tx_err(netif, &txreq, idx);
-+				continue;
-+			}
-+		}
-+
-+		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
-+				  GNTMAP_host_map | GNTMAP_readonly,
-+				  txreq.gref, netif->domid);
-+		mop++;
-+
-+		memcpy(&netbk->pending_tx_info[pending_idx].req,
-+		       &txreq, sizeof(txreq));
-+		netbk->pending_tx_info[pending_idx].netif = netif;
-+		*((u16 *)skb->data) = pending_idx;
-+
-+		__skb_put(skb, data_len);
-+
-+		skb_shinfo(skb)->nr_frags = ret;
-+		if (data_len < txreq.size) {
-+			skb_shinfo(skb)->nr_frags++;
-+			skb_shinfo(skb)->frags[0].page =
-+				(void *)(unsigned long)pending_idx;
-+		} else {
-+			/* Discriminate from any valid pending_idx value. */
-+			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
-+		}
-+
-+		__skb_queue_tail(&netbk->tx_queue, skb);
-+
-+		netbk->pending_cons++;
-+
-+		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
-+
-+		netif->tx.req_cons = idx;
-+		netif_schedule_work(netif);
-+
-+		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
-+			break;
-+	}
-+
-+	return mop - netbk->tx_map_ops;
-+}
-+
-+static void net_tx_submit(struct xen_netbk *netbk)
-+{
-+	struct gnttab_map_grant_ref *mop;
-+	struct sk_buff *skb;
-+
-+	mop = netbk->tx_map_ops;
-+	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
-+		struct xen_netif_tx_request *txp;
-+		struct xen_netif *netif;
-+		u16 pending_idx;
-+		unsigned data_len;
-+
-+		pending_idx = *((u16 *)skb->data);
-+		netif = netbk->pending_tx_info[pending_idx].netif;
-+		txp = &netbk->pending_tx_info[pending_idx].req;
-+
-+		/* Check the remap error code. */
-+		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
-+			pr_debug("netback grant failed.\n");
-+			skb_shinfo(skb)->nr_frags = 0;
-+			kfree_skb(skb);
-+			continue;
-+		}
-+
-+		data_len = skb->len;
-+		memcpy(skb->data,
-+		       (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
-+		       data_len);
-+		if (data_len < txp->size) {
-+			/* Append the packet payload as a fragment. */
-+			txp->offset += data_len;
-+			txp->size -= data_len;
-+		} else {
-+			/* Schedule a response immediately. */
-+			netif_idx_release(netbk, pending_idx);
-+		}
-+
-+		if (txp->flags & NETTXF_csum_blank)
-+			skb->ip_summed = CHECKSUM_PARTIAL;
-+		else if (txp->flags & NETTXF_data_validated)
-+			skb->ip_summed = CHECKSUM_UNNECESSARY;
-+
-+		netbk_fill_frags(netbk, skb);
-+
-+		/*
-+		 * If the initial fragment was < PKT_PROT_LEN then
-+		 * pull through some bytes from the other fragments to
-+		 * increase the linear region to PKT_PROT_LEN bytes.
-+		 */
-+		if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
-+			int target = min_t(int, skb->len, PKT_PROT_LEN);
-+			__pskb_pull_tail(skb, target - skb_headlen(skb));
-+		}
-+
-+		skb->dev      = netif->dev;
-+		skb->protocol = eth_type_trans(skb, skb->dev);
-+
-+		if (checksum_setup(netif, skb)) {
-+			pr_debug("Can't setup checksum in net_tx_action\n");
-+			kfree_skb(skb);
-+			continue;
-+		}
-+
-+		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
-+		    unlikely(skb_linearize(skb))) {
-+			pr_debug("Can't linearize skb in net_tx_action.\n");
-+			kfree_skb(skb);
-+			continue;
-+		}
-+
-+		netif->stats.rx_bytes += skb->len;
-+		netif->stats.rx_packets++;
-+
-+		netif_rx_ni(skb);
-+		netif->dev->last_rx = jiffies;
-+	}
-+}
-+
-+/* Called after netfront has transmitted */
-+static void net_tx_action(unsigned long data)
-+{
-+	struct xen_netbk *netbk = (struct xen_netbk *)data;
-+	unsigned nr_mops;
-+	int ret;
-+
-+	net_tx_action_dealloc(netbk);
-+
-+	nr_mops = net_tx_build_mops(netbk);
-+
-+	if (nr_mops == 0)
-+		goto out;
-+
-+	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-+					netbk->tx_map_ops, nr_mops);
-+	BUG_ON(ret);
-+
-+	net_tx_submit(netbk);
-+out:
-+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-+	    !list_empty(&netbk->pending_inuse_head)) {
-+		struct netbk_tx_pending_inuse *oldest;
-+
-+		oldest = list_entry(netbk->pending_inuse_head.next,
-+				    struct netbk_tx_pending_inuse, list);
-+		mod_timer(&netbk->netbk_tx_pending_timer,
-+				oldest->alloc_time + HZ);
-+	}
-+}
-+
-+static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
-+{
-+	static DEFINE_SPINLOCK(_lock);
-+	unsigned long flags;
-+	pending_ring_idx_t index;
-+
-+	spin_lock_irqsave(&_lock, flags);
-+	index = pending_index(netbk->dealloc_prod);
-+	netbk->dealloc_ring[index] = pending_idx;
-+	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
-+	smp_wmb();
-+	netbk->dealloc_prod++;
-+	spin_unlock_irqrestore(&_lock, flags);
-+
-+	xen_netbk_bh_handler(netbk, 0);
-+}
-+
-+static void netif_page_release(struct page *page, unsigned int order)
-+{
-+	unsigned int group, idx;
-+	int foreign = netif_get_page_ext(page, &group, &idx);
-+
-+	BUG_ON(!foreign);
-+	BUG_ON(order);
-+
-+	netif_idx_release(&xen_netbk[group], idx);
-+}
-+
-+irqreturn_t netif_be_int(int irq, void *dev_id)
-+{
-+	struct xen_netif *netif = dev_id;
-+	struct xen_netbk *netbk;
-+
-+	if (netif->group == -1)
-+		return IRQ_NONE;
-+
-+	netbk = &xen_netbk[netif->group];
-+
-+	add_to_net_schedule_list_tail(netif);
-+	maybe_schedule_tx_action(netbk);
-+
-+	if (netif_schedulable(netif) && !netbk_queue_full(netif))
-+		netif_wake_queue(netif->dev);
-+
-+	return IRQ_HANDLED;
-+}
-+
-+static void make_tx_response(struct xen_netif *netif,
-+			     struct xen_netif_tx_request *txp,
-+			     s8       st)
-+{
-+	RING_IDX i = netif->tx.rsp_prod_pvt;
-+	struct xen_netif_tx_response *resp;
-+	int notify;
-+
-+	resp = RING_GET_RESPONSE(&netif->tx, i);
-+	resp->id     = txp->id;
-+	resp->status = st;
-+
-+	if (txp->flags & NETTXF_extra_info)
-+		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
-+
-+	netif->tx.rsp_prod_pvt = ++i;
-+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
-+	if (notify)
-+		notify_remote_via_irq(netif->irq);
-+}
-+
-+static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
-+					     u16      id,
-+					     s8       st,
-+					     u16      offset,
-+					     u16      size,
-+					     u16      flags)
-+{
-+	RING_IDX i = netif->rx.rsp_prod_pvt;
-+	struct xen_netif_rx_response *resp;
-+
-+	resp = RING_GET_RESPONSE(&netif->rx, i);
-+	resp->offset     = offset;
-+	resp->flags      = flags;
-+	resp->id         = id;
-+	resp->status     = (s16)size;
-+	if (st < 0)
-+		resp->status = (s16)st;
-+
-+	netif->rx.rsp_prod_pvt = ++i;
-+
-+	return resp;
-+}
-+
-+#ifdef NETBE_DEBUG_INTERRUPT
-+static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
-+{
-+	struct list_head *ent;
-+	struct xen_netif *netif;
-+	int i = 0;
-+	int group = 0;
-+
-+	printk(KERN_ALERT "netif_schedule_list:\n");
-+
-+	for (group = 0; group < xen_netbk_group_nr; group++) {
-+		struct xen_netbk *netbk = &xen_netbk[group];
-+		spin_lock_irq(&netbk->net_schedule_list_lock);
-+		printk(KERN_ALERT "xen_netback group number: %d\n", group);
-+		list_for_each(ent, &netbk->net_schedule_list) {
-+			netif = list_entry(ent, struct xen_netif, list);
-+			printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
-+				"rx_resp_prod=%08x\n",
-+				i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
-+			printk(KERN_ALERT
-+				"   tx_req_cons=%08x, tx_resp_prod=%08x)\n",
-+				netif->tx.req_cons, netif->tx.rsp_prod_pvt);
-+			printk(KERN_ALERT
-+				"   shared(rx_req_prod=%08x "
-+				"rx_resp_prod=%08x\n",
-+				netif->rx.sring->req_prod,
-+				netif->rx.sring->rsp_prod);
-+			printk(KERN_ALERT
-+				"   rx_event=%08x, tx_req_prod=%08x\n",
-+				netif->rx.sring->rsp_event,
-+				netif->tx.sring->req_prod);
-+			printk(KERN_ALERT
-+				"   tx_resp_prod=%08x, tx_event=%08x)\n",
-+				netif->tx.sring->rsp_prod,
-+				netif->tx.sring->rsp_event);
-+			i++;
-+		}
-+		spin_unlock_irq(&netbk->net_schedule_list_lock);
-+	}
-+
-+	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
-+
-+	return IRQ_HANDLED;
-+}
-+#endif
-+
-+static inline int rx_work_todo(struct xen_netbk *netbk)
-+{
-+	return !skb_queue_empty(&netbk->rx_queue);
-+}
-+
-+static inline int tx_work_todo(struct xen_netbk *netbk)
-+{
-+	if (netbk->dealloc_cons != netbk->dealloc_prod)
-+		return 1;
-+
-+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-+	    !list_empty(&netbk->pending_inuse_head))
-+		return 1;
-+
-+	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-+			!list_empty(&netbk->net_schedule_list))
-+		return 1;
-+
-+	return 0;
-+}
-+
-+static int netbk_action_thread(void *data)
-+{
-+	struct xen_netbk *netbk = (struct xen_netbk *)data;
-+	while (!kthread_should_stop()) {
-+		wait_event_interruptible(netbk->kthread.netbk_action_wq,
-+				rx_work_todo(netbk)
-+				|| tx_work_todo(netbk)
-+				|| kthread_should_stop());
-+		cond_resched();
-+
-+		if (kthread_should_stop())
-+			break;
-+
-+		if (rx_work_todo(netbk))
-+			net_rx_action((unsigned long)netbk);
-+
-+		if (tx_work_todo(netbk))
-+			net_tx_action((unsigned long)netbk);
-+	}
-+
-+	return 0;
-+}
-+
-+static int __init netback_init(void)
-+{
-+	int i;
-+	struct page *page;
-+	int rc = 0;
-+	int group;
-+
-+	if (!xen_pv_domain())
-+		return -ENODEV;
-+
-+	xen_netbk_group_nr = num_online_cpus();
-+	xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
-+	if (!xen_netbk) {
-+		printk(KERN_ALERT "%s: out of memory\n", __func__);
-+		return -ENOMEM;
-+	}
-+	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
-+
-+	for (group = 0; group < xen_netbk_group_nr; group++) {
-+		struct xen_netbk *netbk = &xen_netbk[group];
-+		skb_queue_head_init(&netbk->rx_queue);
-+		skb_queue_head_init(&netbk->tx_queue);
-+
-+		init_timer(&netbk->net_timer);
-+		netbk->net_timer.data = (unsigned long)netbk;
-+		netbk->net_timer.function = net_alarm;
-+
-+		init_timer(&netbk->netbk_tx_pending_timer);
-+		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
-+		netbk->netbk_tx_pending_timer.function =
-+			netbk_tx_pending_timeout;
-+
-+		netbk->mmap_pages =
-+			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
-+		if (!netbk->mmap_pages) {
-+			printk(KERN_ALERT "%s: out of memory\n", __func__);
-+			del_timer(&netbk->netbk_tx_pending_timer);
-+			del_timer(&netbk->net_timer);
-+			rc = -ENOMEM;
-+			goto failed_init;
-+		}
-+
-+		for (i = 0; i < MAX_PENDING_REQS; i++) {
-+			page = netbk->mmap_pages[i];
-+			SetPageForeign(page, netif_page_release);
-+			netif_set_page_ext(page, group, i);
-+			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
-+		}
-+
-+		netbk->pending_cons = 0;
-+		netbk->pending_prod = MAX_PENDING_REQS;
-+		for (i = 0; i < MAX_PENDING_REQS; i++)
-+			netbk->pending_ring[i] = i;
-+
-+		if (MODPARM_netback_kthread) {
-+			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
-+			netbk->kthread.task =
-+				kthread_create(netbk_action_thread,
-+					       (void *)netbk,
-+					       "netback/%u", group);
-+
-+			if (!IS_ERR(netbk->kthread.task)) {
-+				kthread_bind(netbk->kthread.task, group);
-+			} else {
-+				printk(KERN_ALERT
-+					"kthread_run() fails at netback\n");
-+				free_empty_pages_and_pagevec(netbk->mmap_pages,
-+						MAX_PENDING_REQS);
-+				del_timer(&netbk->netbk_tx_pending_timer);
-+				del_timer(&netbk->net_timer);
-+				rc = PTR_ERR(netbk->kthread.task);
-+				goto failed_init;
-+			}
-+		} else {
-+			tasklet_init(&netbk->tasklet.net_tx_tasklet,
-+				     net_tx_action,
-+				     (unsigned long)netbk);
-+			tasklet_init(&netbk->tasklet.net_rx_tasklet,
-+				     net_rx_action,
-+				     (unsigned long)netbk);
-+		}
-+
-+		INIT_LIST_HEAD(&netbk->pending_inuse_head);
-+		INIT_LIST_HEAD(&netbk->net_schedule_list);
-+
-+		spin_lock_init(&netbk->net_schedule_list_lock);
-+
-+		atomic_set(&netbk->netfront_count, 0);
-+
-+		if (MODPARM_netback_kthread)
-+			wake_up_process(netbk->kthread.task);
-+	}
-+
-+	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
-+	if (MODPARM_copy_skb) {
-+		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
-+					      NULL, 0))
-+			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
-+		else
-+			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
-+	}
-+
-+	rc = netif_xenbus_init();
-+	if (rc)
-+		goto failed_init;
-+
-+#ifdef NETBE_DEBUG_INTERRUPT
-+	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
-+				      0,
-+				      netif_be_dbg,
-+				      IRQF_SHARED,
-+				      "net-be-dbg",
-+				      &netif_be_dbg);
-+#endif
-+
-+	return 0;
-+
-+failed_init:
-+	for (i = 0; i < group; i++) {
-+		struct xen_netbk *netbk = &xen_netbk[i];
-+		free_empty_pages_and_pagevec(netbk->mmap_pages,
-+				MAX_PENDING_REQS);
-+		del_timer(&netbk->netbk_tx_pending_timer);
-+		del_timer(&netbk->net_timer);
-+		if (MODPARM_netback_kthread)
-+			kthread_stop(netbk->kthread.task);
-+	}
-+	vfree(xen_netbk);
-+	return rc;
-+
-+}
-+
-+module_init(netback_init);
-+
-+MODULE_LICENSE("Dual BSD/GPL");
-diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
-new file mode 100644
-index 0000000..867dc25
---- /dev/null
-+++ b/drivers/net/xen-netback/xenbus.c
-@@ -0,0 +1,489 @@
-+/*
-+ * Xenbus code for netif backend
-+ *
-+ * Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
-+ * Copyright (C) 2005 XenSource Ltd
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-+*/
-+
-+#include "common.h"
-+
-+static int connect_rings(struct backend_info *);
-+static void connect(struct backend_info *);
-+static void backend_create_netif(struct backend_info *be);
-+static void unregister_hotplug_status_watch(struct backend_info *be);
-+
-+static int netback_remove(struct xenbus_device *dev)
-+{
-+	struct backend_info *be = dev_get_drvdata(&dev->dev);
-+
-+	unregister_hotplug_status_watch(be);
-+	if (be->netif) {
-+		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-+		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
-+		netif_disconnect(be->netif);
-+		be->netif = NULL;
-+	}
-+	kfree(be);
-+	dev_set_drvdata(&dev->dev, NULL);
-+	return 0;
-+}
-+
-+
-+/**
-+ * Entry point to this code when a new device is created.  Allocate the basic
-+ * structures and switch to InitWait.
-+ */
-+static int netback_probe(struct xenbus_device *dev,
-+			 const struct xenbus_device_id *id)
-+{
-+	const char *message;
-+	struct xenbus_transaction xbt;
-+	int err;
-+	int sg;
-+	struct backend_info *be = kzalloc(sizeof(struct backend_info),
-+					  GFP_KERNEL);
-+	if (!be) {
-+		xenbus_dev_fatal(dev, -ENOMEM,
-+				 "allocating backend structure");
-+		return -ENOMEM;
-+	}
-+
-+	be->dev = dev;
-+	dev_set_drvdata(&dev->dev, be);
-+
-+	sg = 1;
-+	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
-+		sg = 0;
-+
-+	do {
-+		err = xenbus_transaction_start(&xbt);
-+		if (err) {
-+			xenbus_dev_fatal(dev, err, "starting transaction");
-+			goto fail;
-+		}
-+
-+		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
-+		if (err) {
-+			message = "writing feature-sg";
-+			goto abort_transaction;
-+		}
-+
-+		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
-+				    "%d", sg);
-+		if (err) {
-+			message = "writing feature-gso-tcpv4";
-+			goto abort_transaction;
-+		}
-+
-+		/* We support rx-copy path. */
-+		err = xenbus_printf(xbt, dev->nodename,
-+				    "feature-rx-copy", "%d", 1);
-+		if (err) {
-+			message = "writing feature-rx-copy";
-+			goto abort_transaction;
-+		}
-+
-+		/*
-+		 * We don't support rx-flip path (except old guests who don't
-+		 * grok this feature flag).
-+		 */
-+		err = xenbus_printf(xbt, dev->nodename,
-+				    "feature-rx-flip", "%d", 0);
-+		if (err) {
-+			message = "writing feature-rx-flip";
-+			goto abort_transaction;
-+		}
-+
-+		err = xenbus_transaction_end(xbt, 0);
-+	} while (err == -EAGAIN);
-+
-+	if (err) {
-+		xenbus_dev_fatal(dev, err, "completing transaction");
-+		goto fail;
-+	}
-+
-+	err = xenbus_switch_state(dev, XenbusStateInitWait);
-+	if (err)
-+		goto fail;
-+
-+	/* This kicks hotplug scripts, so do it immediately. */
-+	backend_create_netif(be);
-+
-+	return 0;
-+
-+abort_transaction:
-+	xenbus_transaction_end(xbt, 1);
-+	xenbus_dev_fatal(dev, err, "%s", message);
-+fail:
-+	pr_debug("failed");
-+	netback_remove(dev);
-+	return err;
-+}
-+
-+
-+/*
-+ * Handle the creation of the hotplug script environment.  We add the script
-+ * and vif variables to the environment, for the benefit of the vif-* hotplug
-+ * scripts.
-+ */
-+static int netback_uevent(struct xenbus_device *xdev,
-+			  struct kobj_uevent_env *env)
-+{
-+	struct backend_info *be = dev_get_drvdata(&xdev->dev);
-+	char *val;
-+
-+	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
-+	if (IS_ERR(val)) {
-+		int err = PTR_ERR(val);
-+		xenbus_dev_fatal(xdev, err, "reading script");
-+		return err;
-+	} else {
-+		if (add_uevent_var(env, "script=%s", val)) {
-+			kfree(val);
-+			return -ENOMEM;
-+		}
-+		kfree(val);
-+	}
-+
-+	if (!be || !be->netif)
-+		return 0;
-+
-+	return add_uevent_var(env, "vif=%s", be->netif->dev->name);
-+}
-+
-+
-+static void backend_create_netif(struct backend_info *be)
-+{
-+	int err;
-+	long handle;
-+	struct xenbus_device *dev = be->dev;
-+
-+	if (be->netif != NULL)
-+		return;
-+
-+	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
-+	if (err != 1) {
-+		xenbus_dev_fatal(dev, err, "reading handle");
-+		return;
-+	}
-+
-+	be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
-+	if (IS_ERR(be->netif)) {
-+		err = PTR_ERR(be->netif);
-+		be->netif = NULL;
-+		xenbus_dev_fatal(dev, err, "creating interface");
-+		return;
-+	}
-+
-+	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
-+}
-+
-+
-+static void disconnect_backend(struct xenbus_device *dev)
-+{
-+	struct backend_info *be = dev_get_drvdata(&dev->dev);
-+
-+	if (be->netif) {
-+		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
-+		netif_disconnect(be->netif);
-+		be->netif = NULL;
-+	}
-+}
-+
-+/**
-+ * Callback received when the frontend's state changes.
-+ */
-+static void frontend_changed(struct xenbus_device *dev,
-+			     enum xenbus_state frontend_state)
-+{
-+	struct backend_info *be = dev_get_drvdata(&dev->dev);
-+
-+	pr_debug("frontend state %s", xenbus_strstate(frontend_state));
-+
-+	be->frontend_state = frontend_state;
-+
-+	switch (frontend_state) {
-+	case XenbusStateInitialising:
-+		if (dev->state == XenbusStateClosed) {
-+			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
-+			       __func__, dev->nodename);
-+			xenbus_switch_state(dev, XenbusStateInitWait);
-+		}
-+		break;
-+
-+	case XenbusStateInitialised:
-+		break;
-+
-+	case XenbusStateConnected:
-+		if (dev->state == XenbusStateConnected)
-+			break;
-+		backend_create_netif(be);
-+		if (be->netif)
-+			connect(be);
-+		break;
-+
-+	case XenbusStateClosing:
-+		if (be->netif)
-+			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-+		disconnect_backend(dev);
-+		xenbus_switch_state(dev, XenbusStateClosing);
-+		break;
-+
-+	case XenbusStateClosed:
-+		xenbus_switch_state(dev, XenbusStateClosed);
-+		if (xenbus_dev_is_online(dev))
-+			break;
-+		/* fall through if not online */
-+	case XenbusStateUnknown:
-+		device_unregister(&dev->dev);
-+		break;
-+
-+	default:
-+		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
-+				 frontend_state);
-+		break;
-+	}
-+}
-+
-+
-+static void xen_net_read_rate(struct xenbus_device *dev,
-+			      unsigned long *bytes, unsigned long *usec)
-+{
-+	char *s, *e;
-+	unsigned long b, u;
-+	char *ratestr;
-+
-+	/* Default to unlimited bandwidth. */
-+	*bytes = ~0UL;
-+	*usec = 0;
-+
-+	ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
-+	if (IS_ERR(ratestr))
-+		return;
-+
-+	s = ratestr;
-+	b = simple_strtoul(s, &e, 10);
-+	if ((s == e) || (*e != ','))
-+		goto fail;
-+
-+	s = e + 1;
-+	u = simple_strtoul(s, &e, 10);
-+	if ((s == e) || (*e != '\0'))
-+		goto fail;
-+
-+	*bytes = b;
-+	*usec = u;
-+
-+	kfree(ratestr);
-+	return;
-+
-+ fail:
-+	pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
-+	kfree(ratestr);
-+}
-+
-+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
-+{
-+	char *s, *e, *macstr;
-+	int i;
-+
-+	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
-+	if (IS_ERR(macstr))
-+		return PTR_ERR(macstr);
-+
-+	for (i = 0; i < ETH_ALEN; i++) {
-+		mac[i] = simple_strtoul(s, &e, 16);
-+		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
-+			kfree(macstr);
-+			return -ENOENT;
-+		}
-+		s = e+1;
-+	}
-+
-+	kfree(macstr);
-+	return 0;
-+}
-+
-+static void unregister_hotplug_status_watch(struct backend_info *be)
-+{
-+	if (be->have_hotplug_status_watch) {
-+		unregister_xenbus_watch(&be->hotplug_status_watch);
-+		kfree(be->hotplug_status_watch.node);
-+	}
-+	be->have_hotplug_status_watch = 0;
-+}
-+
-+static void hotplug_status_changed(struct xenbus_watch *watch,
-+				   const char **vec,
-+				   unsigned int vec_size)
-+{
-+	struct backend_info *be = container_of(watch,
-+					       struct backend_info,
-+					       hotplug_status_watch);
-+	char *str;
-+	unsigned int len;
-+
-+	str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
-+	if (IS_ERR(str))
-+		return;
-+	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
-+		xenbus_switch_state(be->dev, XenbusStateConnected);
-+		/* Not interested in this watch anymore. */
-+		unregister_hotplug_status_watch(be);
-+	}
-+	kfree(str);
-+}
-+
-+static void connect(struct backend_info *be)
-+{
-+	int err;
-+	struct xenbus_device *dev = be->dev;
-+
-+	err = connect_rings(be);
-+	if (err)
-+		return;
-+
-+	err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
-+	if (err) {
-+		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
-+		return;
-+	}
-+
-+	xen_net_read_rate(dev, &be->netif->credit_bytes,
-+			  &be->netif->credit_usec);
-+	be->netif->remaining_credit = be->netif->credit_bytes;
-+
-+	unregister_hotplug_status_watch(be);
-+	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
-+				   hotplug_status_changed,
-+				   "%s/%s", dev->nodename, "hotplug-status");
-+	if (err) {
-+		/* Switch now, since we can't do a watch. */
-+		xenbus_switch_state(dev, XenbusStateConnected);
-+	} else {
-+		be->have_hotplug_status_watch = 1;
-+	}
-+
-+	netif_wake_queue(be->netif->dev);
-+}
-+
-+
-+static int connect_rings(struct backend_info *be)
-+{
-+	struct xen_netif *netif = be->netif;
-+	struct xenbus_device *dev = be->dev;
-+	unsigned long tx_ring_ref, rx_ring_ref;
-+	unsigned int evtchn, rx_copy;
-+	int err;
-+	int val;
-+
-+	err = xenbus_gather(XBT_NIL, dev->otherend,
-+			    "tx-ring-ref", "%lu", &tx_ring_ref,
-+			    "rx-ring-ref", "%lu", &rx_ring_ref,
-+			    "event-channel", "%u", &evtchn, NULL);
-+	if (err) {
-+		xenbus_dev_fatal(dev, err,
-+				 "reading %s/ring-ref and event-channel",
-+				 dev->otherend);
-+		return err;
-+	}
-+
-+	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
-+			   &rx_copy);
-+	if (err == -ENOENT) {
-+		err = 0;
-+		rx_copy = 0;
-+	}
-+	if (err < 0) {
-+		xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
-+				 dev->otherend);
-+		return err;
-+	}
-+	if (!rx_copy)
-+		return -EOPNOTSUPP;
-+
-+	if (netif->dev->tx_queue_len != 0) {
-+		if (xenbus_scanf(XBT_NIL, dev->otherend,
-+				 "feature-rx-notify", "%d", &val) < 0)
-+			val = 0;
-+		if (val)
-+			netif->can_queue = 1;
-+		else
-+			/* Must be non-zero for pfifo_fast to work. */
-+			netif->dev->tx_queue_len = 1;
-+	}
-+
-+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
-+			 "%d", &val) < 0)
-+		val = 0;
-+	netif->can_sg = !!val;
-+
-+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
-+			 "%d", &val) < 0)
-+		val = 0;
-+	netif->gso = !!val;
-+
-+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
-+			 "%d", &val) < 0)
-+		val = 0;
-+	netif->gso_prefix = !!val;
-+
-+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
-+			 "%d", &val) < 0)
-+		val = 0;
-+	netif->csum = !val;
-+
-+	/* Set dev->features */
-+	netif_set_features(netif);
-+
-+	/* Map the shared frame, irq etc. */
-+	err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
-+	if (err) {
-+		xenbus_dev_fatal(dev, err,
-+				 "mapping shared-frames %lu/%lu port %u",
-+				 tx_ring_ref, rx_ring_ref, evtchn);
-+		return err;
-+	}
-+	return 0;
-+}
-+
-+
-+/* ** Driver Registration ** */
-+
-+
-+static const struct xenbus_device_id netback_ids[] = {
-+	{ "vif" },
-+	{ "" }
-+};
-+
-+
-+static struct xenbus_driver netback = {
-+	.name = "vif",
-+	.owner = THIS_MODULE,
-+	.ids = netback_ids,
-+	.probe = netback_probe,
-+	.remove = netback_remove,
-+	.uevent = netback_uevent,
-+	.otherend_changed = frontend_changed,
-+};
-+
-+
-+int netif_xenbus_init(void)
-+{
-+	printk(KERN_CRIT "registering netback\n");
-+	return xenbus_register_backend(&netback);
-+}
-diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
-index 30290a8..5a48ce9 100644
---- a/drivers/xen/Kconfig
-+++ b/drivers/xen/Kconfig
-@@ -37,13 +37,6 @@ config XEN_BACKEND
- 	  Support for backend device drivers that provide I/O services
- 	  to other virtual machines.
- 
--config XEN_NETDEV_BACKEND
--       tristate "Xen backend network device"
--       depends on XEN_BACKEND && NET
--       help
--         Implement the network backend driver, which passes packets
--         from the guest domain's frontend drivers to the network.
--
- config XENFS
- 	tristate "Xen filesystem"
- 	default y
-diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
-index c0e0509..533a199 100644
---- a/drivers/xen/Makefile
-+++ b/drivers/xen/Makefile
-@@ -9,7 +9,6 @@ obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
- obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
- obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
- obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
--obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
- obj-$(CONFIG_XENFS)		+= xenfs/
- obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
- obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
-diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
-deleted file mode 100644
-index e346e81..0000000
---- a/drivers/xen/netback/Makefile
-+++ /dev/null
-@@ -1,3 +0,0 @@
--obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
--
--xen-netback-y := netback.o xenbus.o interface.o
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-deleted file mode 100644
-index 49dc4cf..0000000
---- a/drivers/xen/netback/common.h
-+++ /dev/null
-@@ -1,273 +0,0 @@
--/*
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License version 2
-- * as published by the Free Software Foundation; or, when distributed
-- * separately from the Linux kernel or incorporated into other
-- * software packages, subject to the following license:
-- *
-- * Permission is hereby granted, free of charge, to any person obtaining a copy
-- * of this source file (the "Software"), to deal in the Software without
-- * restriction, including without limitation the rights to use, copy, modify,
-- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-- * and to permit persons to whom the Software is furnished to do so, subject to
-- * the following conditions:
-- *
-- * The above copyright notice and this permission notice shall be included in
-- * all copies or substantial portions of the Software.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-- * IN THE SOFTWARE.
-- */
--
--#ifndef __NETIF__BACKEND__COMMON_H__
--#define __NETIF__BACKEND__COMMON_H__
--
--#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
--
--#include <linux/module.h>
--#include <linux/interrupt.h>
--#include <linux/slab.h>
--#include <linux/ip.h>
--#include <linux/in.h>
--#include <linux/io.h>
--#include <linux/netdevice.h>
--#include <linux/etherdevice.h>
--#include <linux/wait.h>
--#include <linux/sched.h>
--
--#include <xen/interface/io/netif.h>
--#include <asm/pgalloc.h>
--#include <xen/interface/grant_table.h>
--#include <xen/grant_table.h>
--#include <xen/xenbus.h>
--
--struct xen_netif {
--	/* Unique identifier for this interface. */
--	domid_t          domid;
--	int              group;
--	unsigned int     handle;
--
--	u8               fe_dev_addr[6];
--
--	/* Physical parameters of the comms window. */
--	grant_handle_t   tx_shmem_handle;
--	grant_ref_t      tx_shmem_ref;
--	grant_handle_t   rx_shmem_handle;
--	grant_ref_t      rx_shmem_ref;
--	unsigned int     irq;
--
--	/* The shared rings and indexes. */
--	struct xen_netif_tx_back_ring tx;
--	struct xen_netif_rx_back_ring rx;
--	struct vm_struct *tx_comms_area;
--	struct vm_struct *rx_comms_area;
--
--	/* Flags that must not be set in dev->features */
--	int features_disabled;
--
--	/* Frontend feature information. */
--	u8 can_sg:1;
--	u8 gso:1;
--	u8 gso_prefix:1;
--	u8 csum:1;
--
--	/* Internal feature information. */
--	u8 can_queue:1;	    /* can queue packets for receiver? */
--
--	/* Allow netif_be_start_xmit() to peek ahead in the rx request
--	 * ring.  This is a prediction of what rx_req_cons will be once
--	 * all queued skbs are put on the ring. */
--	RING_IDX rx_req_cons_peek;
--
--	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
--	unsigned long   credit_bytes;
--	unsigned long   credit_usec;
--	unsigned long   remaining_credit;
--	struct timer_list credit_timeout;
--
--	/* Statistics */
--	int nr_copied_skbs;
--	int rx_gso_checksum_fixup;
--
--	/* Miscellaneous private stuff. */
--	struct list_head list;  /* scheduling list */
--	atomic_t         refcnt;
--	struct net_device *dev;
--	struct net_device_stats stats;
--
--	unsigned int carrier;
--
--	wait_queue_head_t waiting_to_free;
--};
--
--/*
-- * Implement our own carrier flag: the network stack's version causes delays
-- * when the carrier is re-enabled (in particular, dev_activate() may not
-- * immediately be called, which can cause packet loss; also the etherbridge
-- * can be rather lazy in activating its port).
-- */
--#define netback_carrier_on(netif)	((netif)->carrier = 1)
--#define netback_carrier_off(netif)	((netif)->carrier = 0)
--#define netback_carrier_ok(netif)	((netif)->carrier)
--
--enum {
--	NETBK_DONT_COPY_SKB,
--	NETBK_DELAYED_COPY_SKB,
--	NETBK_ALWAYS_COPY_SKB,
--};
--
--extern int netbk_copy_skb_mode;
--
--struct backend_info {
--	struct xenbus_device *dev;
--	struct xen_netif *netif;
--	enum xenbus_state frontend_state;
--	struct xenbus_watch hotplug_status_watch;
--	int have_hotplug_status_watch:1;
--};
--
--#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
--#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
--
--void netif_disconnect(struct xen_netif *netif);
--
--void netif_set_features(struct xen_netif *netif);
--struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
--			      unsigned int handle);
--int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
--	      unsigned long rx_ring_ref, unsigned int evtchn);
--
--static inline void netif_get(struct xen_netif *netif)
--{
--	atomic_inc(&netif->refcnt);
--}
--
--static inline void  netif_put(struct xen_netif *netif)
--{
--	if (atomic_dec_and_test(&netif->refcnt))
--		wake_up(&netif->waiting_to_free);
--}
--
--int netif_xenbus_init(void);
--
--#define netif_schedulable(netif)				\
--	(netif_running((netif)->dev) && netback_carrier_ok(netif))
--
--void netif_schedule_work(struct xen_netif *netif);
--void netif_deschedule_work(struct xen_netif *netif);
--
--int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
--struct net_device_stats *netif_be_get_stats(struct net_device *dev);
--irqreturn_t netif_be_int(int irq, void *dev_id);
--
--static inline int netbk_can_queue(struct net_device *dev)
--{
--	struct xen_netif *netif = netdev_priv(dev);
--	return netif->can_queue;
--}
--
--static inline int netbk_can_sg(struct net_device *dev)
--{
--	struct xen_netif *netif = netdev_priv(dev);
--	return netif->can_sg;
--}
--
--struct pending_tx_info {
--	struct xen_netif_tx_request req;
--	struct xen_netif *netif;
--};
--typedef unsigned int pending_ring_idx_t;
--
--struct netbk_rx_meta {
--	int id;
--	int size;
--	int gso_size;
--};
--
--struct netbk_tx_pending_inuse {
--	struct list_head list;
--	unsigned long alloc_time;
--};
--
--#define MAX_PENDING_REQS 256
--
--#define MAX_BUFFER_OFFSET PAGE_SIZE
--
--/* extra field used in struct page */
--union page_ext {
--	struct {
--#if BITS_PER_LONG < 64
--#define IDX_WIDTH   8
--#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
--		unsigned int group:GROUP_WIDTH;
--		unsigned int idx:IDX_WIDTH;
--#else
--		unsigned int group, idx;
--#endif
--	} e;
--	void *mapping;
--};
--
--struct xen_netbk {
--	union {
--		struct {
--			struct tasklet_struct net_tx_tasklet;
--			struct tasklet_struct net_rx_tasklet;
--		} tasklet;
--
--		struct {
--			wait_queue_head_t netbk_action_wq;
--			struct task_struct *task;
--		} kthread;
--	};
--
--	struct sk_buff_head rx_queue;
--	struct sk_buff_head tx_queue;
--
--	struct timer_list net_timer;
--	struct timer_list netbk_tx_pending_timer;
--
--	struct page **mmap_pages;
--
--	pending_ring_idx_t pending_prod;
--	pending_ring_idx_t pending_cons;
--	pending_ring_idx_t dealloc_prod;
--	pending_ring_idx_t dealloc_cons;
--
--	struct list_head pending_inuse_head;
--	struct list_head net_schedule_list;
--
--	/* Protect the net_schedule_list in netif. */
--	spinlock_t net_schedule_list_lock;
--
--	atomic_t netfront_count;
--
--	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
--	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
--	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
--	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
--
--	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
--	u16 pending_ring[MAX_PENDING_REQS];
--	u16 dealloc_ring[MAX_PENDING_REQS];
--
--	/*
--	 * Each head or fragment can be up to 4096 bytes. Given
--	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
--	 * head/fragment uses 2 copy operation.
--	 */
--	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
--	unsigned char rx_notify[NR_IRQS];
--	u16 notify_list[NET_RX_RING_SIZE];
--	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
--};
--
--extern struct xen_netbk *xen_netbk;
--extern int xen_netbk_group_nr;
--
--#endif /* __NETIF__BACKEND__COMMON_H__ */
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-deleted file mode 100644
-index b429f8c..0000000
---- a/drivers/xen/netback/interface.c
-+++ /dev/null
-@@ -1,470 +0,0 @@
--/*
-- * Network-device interface management.
-- *
-- * Copyright (c) 2004-2005, Keir Fraser
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License version 2
-- * as published by the Free Software Foundation; or, when distributed
-- * separately from the Linux kernel or incorporated into other
-- * software packages, subject to the following license:
-- *
-- * Permission is hereby granted, free of charge, to any person obtaining a copy
-- * of this source file (the "Software"), to deal in the Software without
-- * restriction, including without limitation the rights to use, copy, modify,
-- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-- * and to permit persons to whom the Software is furnished to do so, subject to
-- * the following conditions:
-- *
-- * The above copyright notice and this permission notice shall be included in
-- * all copies or substantial portions of the Software.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-- * IN THE SOFTWARE.
-- */
--
--#include "common.h"
--
--#include <linux/ethtool.h>
--#include <linux/rtnetlink.h>
--
--#include <xen/events.h>
--#include <asm/xen/hypercall.h>
--
--/*
-- * Module parameter 'queue_length':
-- *
-- * Enables queuing in the network stack when a client has run out of receive
-- * descriptors.
-- */
--static unsigned long netbk_queue_length = 32;
--module_param_named(queue_length, netbk_queue_length, ulong, 0644);
--
--static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
--			   struct xen_netif *netif)
--{
--	int i;
--	int min_netfront_count;
--	int min_group = 0;
--	min_netfront_count = atomic_read(&netbk[0].netfront_count);
--	for (i = 0; i < group_nr; i++) {
--		int netfront_count = atomic_read(&netbk[i].netfront_count);
--		if (netfront_count < min_netfront_count) {
--			min_group = i;
--			min_netfront_count = netfront_count;
--		}
--	}
--
--	netif->group = min_group;
--	atomic_inc(&netbk[netif->group].netfront_count);
--}
--
--static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
--{
--	atomic_dec(&netbk[netif->group].netfront_count);
--}
--
--static void __netif_up(struct xen_netif *netif)
--{
--	netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
--	enable_irq(netif->irq);
--	netif_schedule_work(netif);
--}
--
--static void __netif_down(struct xen_netif *netif)
--{
--	disable_irq(netif->irq);
--	netif_deschedule_work(netif);
--	netbk_remove_netif(xen_netbk, netif);
--}
--
--static int net_open(struct net_device *dev)
--{
--	struct xen_netif *netif = netdev_priv(dev);
--	if (netback_carrier_ok(netif)) {
--		__netif_up(netif);
--		netif_start_queue(dev);
--	}
--	return 0;
--}
--
--static int net_close(struct net_device *dev)
--{
--	struct xen_netif *netif = netdev_priv(dev);
--	if (netback_carrier_ok(netif))
--		__netif_down(netif);
--	netif_stop_queue(dev);
--	return 0;
--}
--
--static int netbk_change_mtu(struct net_device *dev, int mtu)
--{
--	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
--
--	if (mtu > max)
--		return -EINVAL;
--	dev->mtu = mtu;
--	return 0;
--}
--
--void netif_set_features(struct xen_netif *netif)
--{
--	struct net_device *dev = netif->dev;
--	int features = dev->features;
--
--	if (netif->can_sg)
--		features |= NETIF_F_SG;
--	if (netif->gso || netif->gso_prefix)
--		features |= NETIF_F_TSO;
--	if (netif->csum)
--		features |= NETIF_F_IP_CSUM;
--
--	features &= ~(netif->features_disabled);
--
--	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
--		dev->mtu = ETH_DATA_LEN;
--
--	dev->features = features;
--}
--
--static int netbk_set_tx_csum(struct net_device *dev, u32 data)
--{
--	struct xen_netif *netif = netdev_priv(dev);
--	if (data) {
--		if (!netif->csum)
--			return -ENOSYS;
--		netif->features_disabled &= ~NETIF_F_IP_CSUM;
--	} else {
--		netif->features_disabled |= NETIF_F_IP_CSUM;
--	}
--
--	netif_set_features(netif);
--	return 0;
--}
--
--static int netbk_set_sg(struct net_device *dev, u32 data)
--{
--	struct xen_netif *netif = netdev_priv(dev);
--	if (data) {
--		if (!netif->can_sg)
--			return -ENOSYS;
--		netif->features_disabled &= ~NETIF_F_SG;
--	} else {
--		netif->features_disabled |= NETIF_F_SG;
--	}
--
--	netif_set_features(netif);
--	return 0;
--}
--
--static int netbk_set_tso(struct net_device *dev, u32 data)
--{
--	struct xen_netif *netif = netdev_priv(dev);
--	if (data) {
--		if (!netif->gso && !netif->gso_prefix)
--			return -ENOSYS;
--		netif->features_disabled &= ~NETIF_F_TSO;
--	} else {
--		netif->features_disabled |= NETIF_F_TSO;
--	}
--
--	netif_set_features(netif);
--	return 0;
--}
--
--static void netbk_get_drvinfo(struct net_device *dev,
--			      struct ethtool_drvinfo *info)
--{
--	strcpy(info->driver, "netbk");
--	strcpy(info->bus_info, dev_name(dev->dev.parent));
--}
--
--static const struct netif_stat {
--	char name[ETH_GSTRING_LEN];
--	u16 offset;
--} netbk_stats[] = {
--	{
--		"copied_skbs",
--		offsetof(struct xen_netif, nr_copied_skbs)
--	},
--	{
--		"rx_gso_checksum_fixup",
--		offsetof(struct xen_netif, rx_gso_checksum_fixup)
--	},
--};
--
--static int netbk_get_sset_count(struct net_device *dev, int string_set)
--{
--	switch (string_set) {
--	case ETH_SS_STATS:
--		return ARRAY_SIZE(netbk_stats);
--	default:
--		return -EINVAL;
--	}
--}
--
--static void netbk_get_ethtool_stats(struct net_device *dev,
--				   struct ethtool_stats *stats, u64 * data)
--{
--	void *netif = netdev_priv(dev);
--	int i;
--
--	for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
--		data[i] = *(int *)(netif + netbk_stats[i].offset);
--}
--
--static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
--{
--	int i;
--
--	switch (stringset) {
--	case ETH_SS_STATS:
--		for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
--			memcpy(data + i * ETH_GSTRING_LEN,
--			       netbk_stats[i].name, ETH_GSTRING_LEN);
--		break;
--	}
--}
--
--static struct ethtool_ops network_ethtool_ops = {
--	.get_drvinfo = netbk_get_drvinfo,
--
--	.get_tx_csum = ethtool_op_get_tx_csum,
--	.set_tx_csum = netbk_set_tx_csum,
--	.get_sg = ethtool_op_get_sg,
--	.set_sg = netbk_set_sg,
--	.get_tso = ethtool_op_get_tso,
--	.set_tso = netbk_set_tso,
--	.get_link = ethtool_op_get_link,
--
--	.get_sset_count = netbk_get_sset_count,
--	.get_ethtool_stats = netbk_get_ethtool_stats,
--	.get_strings = netbk_get_strings,
--};
--
--static struct net_device_ops netback_ops = {
--	.ndo_start_xmit	= netif_be_start_xmit,
--	.ndo_get_stats	= netif_be_get_stats,
--	.ndo_open	= net_open,
--	.ndo_stop	= net_close,
--	.ndo_change_mtu	= netbk_change_mtu,
--};
--
--struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
--			      unsigned int handle)
--{
--	int err = 0;
--	struct net_device *dev;
--	struct xen_netif *netif;
--	char name[IFNAMSIZ] = {};
--
--	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
--	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
--	if (dev == NULL) {
--		pr_debug("Could not allocate netdev\n");
--		return ERR_PTR(-ENOMEM);
--	}
--
--	SET_NETDEV_DEV(dev, parent);
--
--	netif = netdev_priv(dev);
--	memset(netif, 0, sizeof(*netif));
--	netif->domid  = domid;
--	netif->group  = -1;
--	netif->handle = handle;
--	netif->can_sg = 1;
--	netif->csum = 1;
--	atomic_set(&netif->refcnt, 1);
--	init_waitqueue_head(&netif->waiting_to_free);
--	netif->dev = dev;
--	INIT_LIST_HEAD(&netif->list);
--
--	netback_carrier_off(netif);
--
--	netif->credit_bytes = netif->remaining_credit = ~0UL;
--	netif->credit_usec  = 0UL;
--	init_timer(&netif->credit_timeout);
--	/* Initialize 'expires' now: it's used to track the credit window. */
--	netif->credit_timeout.expires = jiffies;
--
--	dev->netdev_ops	= &netback_ops;
--	netif_set_features(netif);
--	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
--
--	dev->tx_queue_len = netbk_queue_length;
--
--	/*
--	 * Initialise a dummy MAC address. We choose the numerically
--	 * largest non-broadcast address to prevent the address getting
--	 * stolen by an Ethernet bridge for STP purposes.
--	 * (FE:FF:FF:FF:FF:FF)
--	 */
--	memset(dev->dev_addr, 0xFF, ETH_ALEN);
--	dev->dev_addr[0] &= ~0x01;
--
--	rtnl_lock();
--	err = register_netdevice(dev);
--	rtnl_unlock();
--	if (err) {
--		pr_debug("Could not register new net device %s: err=%d\n",
--			 dev->name, err);
--		free_netdev(dev);
--		return ERR_PTR(err);
--	}
--
--	pr_debug("Successfully created netif\n");
--	return netif;
--}
--
--static int map_frontend_pages(struct xen_netif *netif,
--			      grant_ref_t tx_ring_ref,
--			      grant_ref_t rx_ring_ref)
--{
--	struct gnttab_map_grant_ref op;
--
--	gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
--			  GNTMAP_host_map, tx_ring_ref, netif->domid);
--
--	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
--		BUG();
--
--	if (op.status) {
--		pr_debug("Gnttab failure mapping tx_ring_ref!\n");
--		return op.status;
--	}
--
--	netif->tx_shmem_ref    = tx_ring_ref;
--	netif->tx_shmem_handle = op.handle;
--
--	gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
--			  GNTMAP_host_map, rx_ring_ref, netif->domid);
--
--	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
--		BUG();
--
--	if (op.status) {
--		struct gnttab_unmap_grant_ref unop;
--
--		gnttab_set_unmap_op(&unop,
--				    (unsigned long)netif->tx_comms_area->addr,
--				    GNTMAP_host_map, netif->tx_shmem_handle);
--		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
--		pr_debug("Gnttab failure mapping rx_ring_ref!\n");
--		return op.status;
--	}
--
--	netif->rx_shmem_ref    = rx_ring_ref;
--	netif->rx_shmem_handle = op.handle;
--
--	return 0;
--}
--
--static void unmap_frontend_pages(struct xen_netif *netif)
--{
--	struct gnttab_unmap_grant_ref op;
--
--	gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
--			    GNTMAP_host_map, netif->tx_shmem_handle);
--
--	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
--		BUG();
--
--	gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
--			    GNTMAP_host_map, netif->rx_shmem_handle);
--
--	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
--		BUG();
--}
--
--int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
--	      unsigned long rx_ring_ref, unsigned int evtchn)
--{
--	int err = -ENOMEM;
--	struct xen_netif_tx_sring *txs;
--	struct xen_netif_rx_sring *rxs;
--
--	/* Already connected through? */
--	if (netif->irq)
--		return 0;
--
--	netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
--	if (netif->tx_comms_area == NULL)
--		return -ENOMEM;
--	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
--	if (netif->rx_comms_area == NULL)
--		goto err_rx;
--
--	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
--	if (err)
--		goto err_map;
--
--	err = bind_interdomain_evtchn_to_irqhandler(
--		netif->domid, evtchn, netif_be_int, 0,
--		netif->dev->name, netif);
--	if (err < 0)
--		goto err_hypervisor;
--	netif->irq = err;
--	disable_irq(netif->irq);
--
--	txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
--	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
--
--	rxs = (struct xen_netif_rx_sring *)
--		((char *)netif->rx_comms_area->addr);
--	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
--
--	netif->rx_req_cons_peek = 0;
--
--	netif_get(netif);
--
--	rtnl_lock();
--	netback_carrier_on(netif);
--	if (netif_running(netif->dev))
--		__netif_up(netif);
--	rtnl_unlock();
--
--	return 0;
--err_hypervisor:
--	unmap_frontend_pages(netif);
--err_map:
--	free_vm_area(netif->rx_comms_area);
--err_rx:
--	free_vm_area(netif->tx_comms_area);
--	return err;
--}
--
--void netif_disconnect(struct xen_netif *netif)
--{
--	if (netback_carrier_ok(netif)) {
--		rtnl_lock();
--		netback_carrier_off(netif);
--		netif_carrier_off(netif->dev); /* discard queued packets */
--		if (netif_running(netif->dev))
--			__netif_down(netif);
--		rtnl_unlock();
--		netif_put(netif);
--	}
--
--	atomic_dec(&netif->refcnt);
--	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
--
--	del_timer_sync(&netif->credit_timeout);
--
--	if (netif->irq)
--		unbind_from_irqhandler(netif->irq, netif);
--
--	unregister_netdev(netif->dev);
--
--	if (netif->tx.sring) {
--		unmap_frontend_pages(netif);
--		free_vm_area(netif->tx_comms_area);
--		free_vm_area(netif->rx_comms_area);
--	}
--
--	free_netdev(netif->dev);
--}
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-deleted file mode 100644
-index b290525..0000000
---- a/drivers/xen/netback/netback.c
-+++ /dev/null
-@@ -1,1934 +0,0 @@
--/*
-- * Back-end of the driver for virtual network devices. This portion of the
-- * driver exports a 'unified' network-device interface that can be accessed
-- * by any operating system that implements a compatible front end. A
-- * reference front-end implementation can be found in:
-- *  drivers/net/xen-netfront.c
-- *
-- * Copyright (c) 2002-2005, K A Fraser
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License version 2
-- * as published by the Free Software Foundation; or, when distributed
-- * separately from the Linux kernel or incorporated into other
-- * software packages, subject to the following license:
-- *
-- * Permission is hereby granted, free of charge, to any person obtaining a copy
-- * of this source file (the "Software"), to deal in the Software without
-- * restriction, including without limitation the rights to use, copy, modify,
-- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-- * and to permit persons to whom the Software is furnished to do so, subject to
-- * the following conditions:
-- *
-- * The above copyright notice and this permission notice shall be included in
-- * all copies or substantial portions of the Software.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-- * IN THE SOFTWARE.
-- */
--
--#include "common.h"
--
--#include <linux/kthread.h>
--#include <linux/if_vlan.h>
--#include <linux/udp.h>
--
--#include <net/tcp.h>
--
--#include <xen/balloon.h>
--#include <xen/events.h>
--#include <xen/interface/memory.h>
--
--#include <asm/xen/hypercall.h>
--#include <asm/xen/page.h>
--
--/*define NETBE_DEBUG_INTERRUPT*/
--
--struct xen_netbk *xen_netbk;
--int xen_netbk_group_nr;
--
--static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
--static void make_tx_response(struct xen_netif *netif,
--			     struct xen_netif_tx_request *txp,
--			     s8       st);
--static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
--					     u16      id,
--					     s8       st,
--					     u16      offset,
--					     u16      size,
--					     u16      flags);
--
--static void net_tx_action(unsigned long data);
--
--static void net_rx_action(unsigned long data);
--
--static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
--				       unsigned int idx)
--{
--	return page_to_pfn(netbk->mmap_pages[idx]);
--}
--
--static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
--					 unsigned int idx)
--{
--	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
--}
--
--/* extra field used in struct page */
--static inline void netif_set_page_ext(struct page *pg,
--				      unsigned int group, unsigned int idx)
--{
--	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
--
--	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
--	pg->mapping = ext.mapping;
--}
--
--static int netif_get_page_ext(struct page *pg,
--			      unsigned int *_group, unsigned int *_idx)
--{
--	union page_ext ext = { .mapping = pg->mapping };
--	struct xen_netbk *netbk;
--	unsigned int group, idx;
--
--	if (!PageForeign(pg))
--		return 0;
--
--	group = ext.e.group - 1;
--
--	if (group < 0 || group >= xen_netbk_group_nr)
--		return 0;
--
--	netbk = &xen_netbk[group];
--
--	if (netbk->mmap_pages == NULL)
--		return 0;
--
--	idx = ext.e.idx;
--
--	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
--		return 0;
--
--	if (netbk->mmap_pages[idx] != pg)
--		return 0;
--
--	*_group = group;
--	*_idx = idx;
--
--	return 1;
--}
--
--/*
-- * This is the amount of packet we copy rather than map, so that the
-- * guest can't fiddle with the contents of the headers while we do
-- * packet processing on them (netfilter, routing, etc).
-- */
--#define PKT_PROT_LEN    (ETH_HLEN + \
--			 VLAN_HLEN + \
--			 sizeof(struct iphdr) + MAX_IPOPTLEN + \
--			 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
--
--static inline pending_ring_idx_t pending_index(unsigned i)
--{
--	return i & (MAX_PENDING_REQS-1);
--}
--
--static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
--{
--	return MAX_PENDING_REQS -
--		netbk->pending_prod + netbk->pending_cons;
--}
--
--/* Setting this allows the safe use of this driver without netloop. */
--static int MODPARM_copy_skb = 1;
--module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
--MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
--
--int netbk_copy_skb_mode;
--
--static int MODPARM_netback_kthread;
--module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
--MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
--
--/*
-- * Netback bottom half handler.
-- * dir indicates the data direction.
-- * rx: 1, tx: 0.
-- */
--static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
--{
--	if (MODPARM_netback_kthread)
--		wake_up(&netbk->kthread.netbk_action_wq);
--	else if (dir)
--		tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
--	else
--		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
--}
--
--static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
--{
--	smp_mb();
--	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
--	    !list_empty(&netbk->net_schedule_list))
--		xen_netbk_bh_handler(netbk, 0);
--}
--
--static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
--{
--	struct skb_shared_info *ninfo;
--	struct sk_buff *nskb;
--	unsigned long offset;
--	int ret;
--	int len;
--	int headlen;
--
--	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
--
--	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
--	if (unlikely(!nskb))
--		goto err;
--
--	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
--	headlen = skb_end_pointer(nskb) - nskb->data;
--	if (headlen > skb_headlen(skb))
--		headlen = skb_headlen(skb);
--	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
--	BUG_ON(ret);
--
--	ninfo = skb_shinfo(nskb);
--	ninfo->gso_size = skb_shinfo(skb)->gso_size;
--	ninfo->gso_type = skb_shinfo(skb)->gso_type;
--
--	offset = headlen;
--	len = skb->len - headlen;
--
--	nskb->len = skb->len;
--	nskb->data_len = len;
--	nskb->truesize += len;
--
--	while (len) {
--		struct page *page;
--		int copy;
--		int zero;
--
--		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
--			dump_stack();
--			goto err_free;
--		}
--
--		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
--		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
--
--		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
--		if (unlikely(!page))
--			goto err_free;
--
--		ret = skb_copy_bits(skb, offset, page_address(page), copy);
--		BUG_ON(ret);
--
--		ninfo->frags[ninfo->nr_frags].page = page;
--		ninfo->frags[ninfo->nr_frags].page_offset = 0;
--		ninfo->frags[ninfo->nr_frags].size = copy;
--		ninfo->nr_frags++;
--
--		offset += copy;
--		len -= copy;
--	}
--
--#ifdef NET_SKBUFF_DATA_USES_OFFSET
--	offset = 0;
--#else
--	offset = nskb->data - skb->data;
--#endif
--
--	nskb->transport_header = skb->transport_header + offset;
--	nskb->network_header = skb->network_header + offset;
--	nskb->mac_header = skb->mac_header + offset;
--
--	return nskb;
--
-- err_free:
--	kfree_skb(nskb);
-- err:
--	return NULL;
--}
--
--static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
--{
--	if (netif->can_sg || netif->gso || netif->gso_prefix)
--		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
--	return 1; /* all in one */
--}
--
--static inline int netbk_queue_full(struct xen_netif *netif)
--{
--	RING_IDX peek   = netif->rx_req_cons_peek;
--	RING_IDX needed = netbk_max_required_rx_slots(netif);
--
--	return ((netif->rx.sring->req_prod - peek) < needed) ||
--	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
--}
--
--/*
-- * Returns true if we should start a new receive buffer instead of
-- * adding 'size' bytes to a buffer which currently contains 'offset'
-- * bytes.
-- */
--static bool start_new_rx_buffer(int offset, unsigned long size, int head)
--{
--	/* simple case: we have completely filled the current buffer. */
--	if (offset == MAX_BUFFER_OFFSET)
--		return true;
--
--	/*
--	 * complex case: start a fresh buffer if the current frag
--	 * would overflow the current buffer but only if:
--	 *     (i)   this frag would fit completely in the next buffer
--	 * and (ii)  there is already some data in the current buffer
--	 * and (iii) this is not the head buffer.
--	 *
--	 * Where:
--	 * - (i) stops us splitting a frag into two copies
--	 *   unless the frag is too large for a single buffer.
--	 * - (ii) stops us from leaving a buffer pointlessly empty.
--	 * - (iii) stops us leaving the first buffer
--	 *   empty. Strictly speaking this is already covered
--	 *   by (ii) but is explicitly checked because
--	 *   netfront relies on the first buffer being
--	 *   non-empty and can crash otherwise.
--	 *
--	 * This means we will effectively linearise small
--	 * frags but do not needlessly split large buffers
--	 * into multiple copies tend to give large frags their
--	 * own buffers as before.
--	 */
--	if ((offset + size > MAX_BUFFER_OFFSET) &&
--	    (size <= MAX_BUFFER_OFFSET) && offset && !head)
--		return true;
--
--	return false;
--}
--
--/*
-- * Figure out how many ring slots we're going to need to send @skb to
-- * the guest. This function is essentially a dry run of
-- * netbk_gop_frag_copy.
-- */
--static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
--{
--	unsigned int count = 1;
--	int i, copy_off = 0;
--
--	BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);
--
--	copy_off = skb_headlen(skb);
--
--	if (skb_shinfo(skb)->gso_size)
--		count++;
--
--	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
--		unsigned long size = skb_shinfo(skb)->frags[i].size;
--		unsigned long bytes;
--		while (size > 0) {
--			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
--
--			if (start_new_rx_buffer(copy_off, size, 0)) {
--				count++;
--				copy_off = 0;
--			}
--
--			bytes = size;
--			if (copy_off + bytes > MAX_BUFFER_OFFSET)
--				bytes = MAX_BUFFER_OFFSET - copy_off;
--
--			copy_off += bytes;
--			size -= bytes;
--		}
--	}
--	return count;
--}
--
--int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
--{
--	struct xen_netif *netif = netdev_priv(dev);
--	struct xen_netbk *netbk;
--
--	BUG_ON(skb->dev != dev);
--
--	if (netif->group == -1)
--		goto drop;
--
--	netbk = &xen_netbk[netif->group];
--
--	/* Drop the packet if the target domain has no receive buffers. */
--	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
--		goto drop;
--
--	/*
--	 * XXX For now we also copy skbuffs whose head crosses a page
--	 * boundary, because netbk_gop_skb can't handle them.
--	 */
--	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
--		struct sk_buff *nskb = netbk_copy_skb(skb);
--		if (unlikely(nskb == NULL))
--			goto drop;
--		/* Copy only the header fields we use in this driver. */
--		nskb->dev = skb->dev;
--		nskb->ip_summed = skb->ip_summed;
--		dev_kfree_skb(skb);
--		skb = nskb;
--	}
--
--	/* Reserve ring slots for the worst-case number of fragments. */
--	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
--	netif_get(netif);
--
--	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
--		netif->rx.sring->req_event = netif->rx_req_cons_peek +
--			netbk_max_required_rx_slots(netif);
--		mb(); /* request notification /then/ check & stop the queue */
--		if (netbk_queue_full(netif))
--			netif_stop_queue(dev);
--	}
--	skb_queue_tail(&netbk->rx_queue, skb);
--
--	xen_netbk_bh_handler(netbk, 1);
--
--	return 0;
--
-- drop:
--	netif->stats.tx_dropped++;
--	dev_kfree_skb(skb);
--	return 0;
--}
--
--struct netrx_pending_operations {
--	unsigned copy_prod, copy_cons;
--	unsigned meta_prod, meta_cons;
--	struct gnttab_copy *copy;
--	struct netbk_rx_meta *meta;
--	int copy_off;
--	grant_ref_t copy_gref;
--};
--
--static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
--						struct netrx_pending_operations *npo)
--{
--	struct netbk_rx_meta *meta;
--	struct xen_netif_rx_request *req;
--
--	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
--
--	meta = npo->meta + npo->meta_prod++;
--	meta->gso_size = 0;
--	meta->size = 0;
--	meta->id = req->id;
--
--	npo->copy_off = 0;
--	npo->copy_gref = req->gref;
--
--	return meta;
--}
--
--/*
-- * Set up the grant operations for this fragment. If it's a flipping
-- * interface, we also set up the unmap request from here.
-- */
--static void netbk_gop_frag_copy(struct xen_netif *netif,
--				struct netrx_pending_operations *npo,
--				struct page *page, unsigned long size,
--				unsigned long offset, int head)
--{
--	struct gnttab_copy *copy_gop;
--	struct netbk_rx_meta *meta;
--	/*
--	 * These variables a used iff netif_get_page_ext returns true,
--	 * in which case they are guaranteed to be initialized.
--	 */
--	unsigned int uninitialized_var(group), uninitialized_var(idx);
--	int foreign = netif_get_page_ext(page, &group, &idx);
--	unsigned long bytes;
--
--	/* Data must not cross a page boundary. */
--	BUG_ON(size + offset > PAGE_SIZE);
--
--	meta = npo->meta + npo->meta_prod - 1;
--
--	while (size > 0) {
--		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
--
--		if (start_new_rx_buffer(npo->copy_off, size, head)) {
--			/*
--			 * Netfront requires there to be some data in the head
--			 * buffer.
--			 */
--			BUG_ON(head);
--
--			meta = get_next_rx_buffer(netif, npo);
--		}
--
--		bytes = size;
--		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
--			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
--
--		copy_gop = npo->copy + npo->copy_prod++;
--		copy_gop->flags = GNTCOPY_dest_gref;
--		if (foreign) {
--			struct xen_netbk *netbk = &xen_netbk[group];
--			struct pending_tx_info *src_pend;
--
--			src_pend = &netbk->pending_tx_info[idx];
--
--			copy_gop->source.domid = src_pend->netif->domid;
--			copy_gop->source.u.ref = src_pend->req.gref;
--			copy_gop->flags |= GNTCOPY_source_gref;
--		} else {
--			void *vaddr = page_address(page);
--			copy_gop->source.domid = DOMID_SELF;
--			copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
--		}
--		copy_gop->source.offset = offset;
--		copy_gop->dest.domid = netif->domid;
--
--		copy_gop->dest.offset = npo->copy_off;
--		copy_gop->dest.u.ref = npo->copy_gref;
--		copy_gop->len = bytes;
--
--		npo->copy_off += bytes;
--		meta->size += bytes;
--
--		offset += bytes;
--		size -= bytes;
--		head = 0; /* There must be something in this buffer now. */
--	}
--}
--
--/*
-- * Prepare an SKB to be transmitted to the frontend.
-- *
-- * This function is responsible for allocating grant operations, meta
-- * structures, etc.
-- *
-- * It returns the number of meta structures consumed. The number of
-- * ring slots used is always equal to the number of meta slots used
-- * plus the number of GSO descriptors used. Currently, we use either
-- * zero GSO descriptors (for non-GSO packets) or one descriptor (for
-- * frontend-side LRO).
-- */
--static int netbk_gop_skb(struct sk_buff *skb,
--			 struct netrx_pending_operations *npo)
--{
--	struct xen_netif *netif = netdev_priv(skb->dev);
--	int nr_frags = skb_shinfo(skb)->nr_frags;
--	int i;
--	struct xen_netif_rx_request *req;
--	struct netbk_rx_meta *meta;
--	int old_meta_prod;
--
--	old_meta_prod = npo->meta_prod;
--
--	/* Set up a GSO prefix descriptor, if necessary */
--	if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
--		req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
--		meta = npo->meta + npo->meta_prod++;
--		meta->gso_size = skb_shinfo(skb)->gso_size;
--		meta->size = 0;
--		meta->id = req->id;
--	}
--
--	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
--	meta = npo->meta + npo->meta_prod++;
--
--	if (!netif->gso_prefix)
--		meta->gso_size = skb_shinfo(skb)->gso_size;
--	else
--		meta->gso_size = 0;
--
--	meta->size = 0;
--	meta->id = req->id;
--	npo->copy_off = 0;
--	npo->copy_gref = req->gref;
--
--	netbk_gop_frag_copy(netif,
--			    npo, virt_to_page(skb->data),
--			    skb_headlen(skb),
--			    offset_in_page(skb->data), 1);
--
--	/* Leave a gap for the GSO descriptor. */
--	if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
--		netif->rx.req_cons++;
--
--	for (i = 0; i < nr_frags; i++) {
--		netbk_gop_frag_copy(netif, npo,
--				    skb_shinfo(skb)->frags[i].page,
--				    skb_shinfo(skb)->frags[i].size,
--				    skb_shinfo(skb)->frags[i].page_offset,
--				    0);
--	}
--
--	return npo->meta_prod - old_meta_prod;
--}
--
--/*
-- * This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
-- * used to set up the operations on the top of
-- * netrx_pending_operations, which have since been done.  Check that
-- * they didn't give any errors and advance over them.
-- */
--static int netbk_check_gop(int nr_meta_slots, domid_t domid,
--			   struct netrx_pending_operations *npo)
--{
--	struct gnttab_copy     *copy_op;
--	int status = NETIF_RSP_OKAY;
--	int i;
--
--	for (i = 0; i < nr_meta_slots; i++) {
--		copy_op = npo->copy + npo->copy_cons++;
--		if (copy_op->status != GNTST_okay) {
--				pr_debug("Bad status %d from copy to DOM%d.\n",
--					 copy_op->status, domid);
--				status = NETIF_RSP_ERROR;
--			}
--	}
--
--	return status;
--}
--
--static void netbk_add_frag_responses(struct xen_netif *netif, int status,
--				     struct netbk_rx_meta *meta,
--				     int nr_meta_slots)
--{
--	int i;
--	unsigned long offset;
--
--	/* No fragments used */
--	if (nr_meta_slots <= 1)
--		return;
--
--	nr_meta_slots--;
--
--	for (i = 0; i < nr_meta_slots; i++) {
--		int flags;
--		if (i == nr_meta_slots - 1)
--			flags = 0;
--		else
--			flags = NETRXF_more_data;
--
--		offset = 0;
--		make_rx_response(netif, meta[i].id, status, offset,
--				 meta[i].size, flags);
--	}
--}
--
--struct skb_cb_overlay {
--	int meta_slots_used;
--};
--
--static void net_rx_action(unsigned long data)
--{
--	struct xen_netif *netif = NULL;
--	struct xen_netbk *netbk = (struct xen_netbk *)data;
--	s8 status;
--	u16 irq, flags;
--	struct xen_netif_rx_response *resp;
--	struct sk_buff_head rxq;
--	struct sk_buff *skb;
--	int notify_nr = 0;
--	int ret;
--	int nr_frags;
--	int count;
--	unsigned long offset;
--	struct skb_cb_overlay *sco;
--
--	struct netrx_pending_operations npo = {
--		.copy  = netbk->grant_copy_op,
--		.meta  = netbk->meta,
--	};
--
--	skb_queue_head_init(&rxq);
--
--	count = 0;
--
--	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
--		netif = netdev_priv(skb->dev);
--		nr_frags = skb_shinfo(skb)->nr_frags;
--
--		sco = (struct skb_cb_overlay *)skb->cb;
--		sco->meta_slots_used = netbk_gop_skb(skb, &npo);
--
--		count += nr_frags + 1;
--
--		__skb_queue_tail(&rxq, skb);
--
--		/* Filled the batch queue? */
--		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
--			break;
--	}
--
--	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
--
--	if (!npo.copy_prod)
--		return;
--
--	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
--	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
--					npo.copy_prod);
--	BUG_ON(ret != 0);
--
--	while ((skb = __skb_dequeue(&rxq)) != NULL) {
--		sco = (struct skb_cb_overlay *)skb->cb;
--
--		netif = netdev_priv(skb->dev);
--
--		if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
--			resp = RING_GET_RESPONSE(&netif->rx,
--						netif->rx.rsp_prod_pvt++);
--
--			resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
--
--			resp->offset = netbk->meta[npo.meta_cons].gso_size;
--			resp->id = netbk->meta[npo.meta_cons].id;
--			resp->status = sco->meta_slots_used;
--
--			npo.meta_cons++;
--			sco->meta_slots_used--;
--		}
--
--
--		netif->stats.tx_bytes += skb->len;
--		netif->stats.tx_packets++;
--
--		status = netbk_check_gop(sco->meta_slots_used,
--					 netif->domid, &npo);
--
--		if (sco->meta_slots_used == 1)
--			flags = 0;
--		else
--			flags = NETRXF_more_data;
--
--		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
--			flags |= NETRXF_csum_blank | NETRXF_data_validated;
--		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
--			/* remote but checksummed. */
--			flags |= NETRXF_data_validated;
--
--		offset = 0;
--		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
--					status, offset,
--					netbk->meta[npo.meta_cons].size,
--					flags);
--
--		if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
--			struct xen_netif_extra_info *gso =
--				(struct xen_netif_extra_info *)
--				RING_GET_RESPONSE(&netif->rx,
--						  netif->rx.rsp_prod_pvt++);
--
--			resp->flags |= NETRXF_extra_info;
--
--			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
--			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
--			gso->u.gso.pad = 0;
--			gso->u.gso.features = 0;
--
--			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
--			gso->flags = 0;
--		}
--
--		netbk_add_frag_responses(netif, status,
--					 netbk->meta + npo.meta_cons + 1,
--					 sco->meta_slots_used);
--
--		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
--		irq = netif->irq;
--		if (ret && !netbk->rx_notify[irq]) {
--			netbk->rx_notify[irq] = 1;
--			netbk->notify_list[notify_nr++] = irq;
--		}
--
--		if (netif_queue_stopped(netif->dev) &&
--		    netif_schedulable(netif) &&
--		    !netbk_queue_full(netif))
--			netif_wake_queue(netif->dev);
--
--		netif_put(netif);
--		npo.meta_cons += sco->meta_slots_used;
--		dev_kfree_skb(skb);
--	}
--
--	while (notify_nr != 0) {
--		irq = netbk->notify_list[--notify_nr];
--		netbk->rx_notify[irq] = 0;
--		notify_remote_via_irq(irq);
--	}
--
--	/* More work to do? */
--	if (!skb_queue_empty(&netbk->rx_queue) &&
--			!timer_pending(&netbk->net_timer))
--		xen_netbk_bh_handler(netbk, 1);
--}
--
--static void net_alarm(unsigned long data)
--{
--	struct xen_netbk *netbk = (struct xen_netbk *)data;
--	xen_netbk_bh_handler(netbk, 1);
--}
--
--static void netbk_tx_pending_timeout(unsigned long data)
--{
--	struct xen_netbk *netbk = (struct xen_netbk *)data;
--	xen_netbk_bh_handler(netbk, 0);
--}
--
--struct net_device_stats *netif_be_get_stats(struct net_device *dev)
--{
--	struct xen_netif *netif = netdev_priv(dev);
--	return &netif->stats;
--}
--
--static int __on_net_schedule_list(struct xen_netif *netif)
--{
--	return !list_empty(&netif->list);
--}
--
--/* Must be called with net_schedule_list_lock held */
--static void remove_from_net_schedule_list(struct xen_netif *netif)
--{
--	if (likely(__on_net_schedule_list(netif))) {
--		list_del_init(&netif->list);
--		netif_put(netif);
--	}
--}
--
--static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
--{
--	struct xen_netif *netif = NULL;
--
--	spin_lock_irq(&netbk->net_schedule_list_lock);
--	if (list_empty(&netbk->net_schedule_list))
--		goto out;
--
--	netif = list_first_entry(&netbk->net_schedule_list,
--				 struct xen_netif, list);
--	if (!netif)
--		goto out;
--
--	netif_get(netif);
--
--	remove_from_net_schedule_list(netif);
--out:
--	spin_unlock_irq(&netbk->net_schedule_list_lock);
--	return netif;
--}
--
--static void add_to_net_schedule_list_tail(struct xen_netif *netif)
--{
--	unsigned long flags;
--
--	struct xen_netbk *netbk = &xen_netbk[netif->group];
--	if (__on_net_schedule_list(netif))
--		return;
--
--	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
--	if (!__on_net_schedule_list(netif) &&
--	    likely(netif_schedulable(netif))) {
--		list_add_tail(&netif->list, &netbk->net_schedule_list);
--		netif_get(netif);
--	}
--	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
--}
--
--void netif_schedule_work(struct xen_netif *netif)
--{
--	struct xen_netbk *netbk = &xen_netbk[netif->group];
--	int more_to_do;
--
--	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
--
--	if (more_to_do) {
--		add_to_net_schedule_list_tail(netif);
--		maybe_schedule_tx_action(netbk);
--	}
--}
--
--void netif_deschedule_work(struct xen_netif *netif)
--{
--	struct xen_netbk *netbk = &xen_netbk[netif->group];
--	spin_lock_irq(&netbk->net_schedule_list_lock);
--	remove_from_net_schedule_list(netif);
--	spin_unlock_irq(&netbk->net_schedule_list_lock);
--}
--
--
--static void tx_add_credit(struct xen_netif *netif)
--{
--	unsigned long max_burst, max_credit;
--
--	/*
--	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
--	 * Otherwise the interface can seize up due to insufficient credit.
--	 */
--	max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
--	max_burst = min(max_burst, 131072UL);
--	max_burst = max(max_burst, netif->credit_bytes);
--
--	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
--	max_credit = netif->remaining_credit + netif->credit_bytes;
--	if (max_credit < netif->remaining_credit)
--		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
--
--	netif->remaining_credit = min(max_credit, max_burst);
--}
--
--static void tx_credit_callback(unsigned long data)
--{
--	struct xen_netif *netif = (struct xen_netif *)data;
--	tx_add_credit(netif);
--	netif_schedule_work(netif);
--}
--
--static inline int copy_pending_req(struct xen_netbk *netbk,
--				   pending_ring_idx_t pending_idx)
--{
--	return gnttab_copy_grant_page(
--			netbk->grant_tx_handle[pending_idx],
--			&netbk->mmap_pages[pending_idx]);
--}
--
--static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
--{
--	struct netbk_tx_pending_inuse *inuse, *n;
--	struct gnttab_unmap_grant_ref *gop;
--	u16 pending_idx;
--	pending_ring_idx_t dc, dp;
--	struct xen_netif *netif;
--	int ret;
--	LIST_HEAD(list);
--
--	dc = netbk->dealloc_cons;
--	gop = netbk->tx_unmap_ops;
--
--	/* Free up any grants we have finished using. */
--	do {
--		dp = netbk->dealloc_prod;
--
--		/* Ensure we see all indices enqueued by netif_idx_release(). */
--		smp_rmb();
--
--		while (dc != dp) {
--			unsigned long pfn;
--			struct netbk_tx_pending_inuse *pending_inuse =
--					netbk->pending_inuse;
--
--			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
--			list_move_tail(&pending_inuse[pending_idx].list, &list);
--
--			pfn = idx_to_pfn(netbk, pending_idx);
--			/* Already unmapped? */
--			if (!phys_to_machine_mapping_valid(pfn))
--				continue;
--
--			gnttab_set_unmap_op(gop,
--					idx_to_kaddr(netbk, pending_idx),
--					GNTMAP_host_map,
--					netbk->grant_tx_handle[pending_idx]);
--			gop++;
--		}
--
--	} while (dp != netbk->dealloc_prod);
--
--	netbk->dealloc_cons = dc;
--
--	ret = HYPERVISOR_grant_table_op(
--		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
--		gop - netbk->tx_unmap_ops);
--	BUG_ON(ret);
--
--	/*
--	 * Copy any entries that have been pending for too long
--	 */
--	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
--	    !list_empty(&netbk->pending_inuse_head)) {
--		list_for_each_entry_safe(inuse, n,
--				&netbk->pending_inuse_head, list) {
--			struct pending_tx_info *pending_tx_info;
--			pending_tx_info = netbk->pending_tx_info;
--
--			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
--				break;
--
--			pending_idx = inuse - netbk->pending_inuse;
--
--			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
--
--			switch (copy_pending_req(netbk, pending_idx)) {
--			case 0:
--				list_move_tail(&inuse->list, &list);
--				continue;
--			case -EBUSY:
--				list_del_init(&inuse->list);
--				continue;
--			case -ENOENT:
--				continue;
--			}
--
--			break;
--		}
--	}
--
--	list_for_each_entry_safe(inuse, n, &list, list) {
--		struct pending_tx_info *pending_tx_info;
--		pending_ring_idx_t index;
--
--		pending_tx_info = netbk->pending_tx_info;
--		pending_idx = inuse - netbk->pending_inuse;
--
--		netif = pending_tx_info[pending_idx].netif;
--
--		make_tx_response(netif, &pending_tx_info[pending_idx].req,
--				 NETIF_RSP_OKAY);
--
--		/* Ready for next use. */
--		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
--
--		index = pending_index(netbk->pending_prod++);
--		netbk->pending_ring[index] = pending_idx;
--
--		netif_put(netif);
--
--		list_del_init(&inuse->list);
--	}
--}
--
--static void netbk_tx_err(struct xen_netif *netif,
--		struct xen_netif_tx_request *txp, RING_IDX end)
--{
--	RING_IDX cons = netif->tx.req_cons;
--
--	do {
--		make_tx_response(netif, txp, NETIF_RSP_ERROR);
--		if (cons >= end)
--			break;
--		txp = RING_GET_REQUEST(&netif->tx, cons++);
--	} while (1);
--	netif->tx.req_cons = cons;
--	netif_schedule_work(netif);
--	netif_put(netif);
--}
--
--static int netbk_count_requests(struct xen_netif *netif,
--				struct xen_netif_tx_request *first,
--				struct xen_netif_tx_request *txp,
--				int work_to_do)
--{
--	RING_IDX cons = netif->tx.req_cons;
--	int frags = 0;
--
--	if (!(first->flags & NETTXF_more_data))
--		return 0;
--
--	do {
--		if (frags >= work_to_do) {
--			pr_debug("Need more frags\n");
--			return -frags;
--		}
--
--		if (unlikely(frags >= MAX_SKB_FRAGS)) {
--			pr_debug("Too many frags\n");
--			return -frags;
--		}
--
--		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
--		       sizeof(*txp));
--		if (txp->size > first->size) {
--			pr_debug("Frags galore\n");
--			return -frags;
--		}
--
--		first->size -= txp->size;
--		frags++;
--
--		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
--			pr_debug("txp->offset: %x, size: %u\n",
--				 txp->offset, txp->size);
--			return -frags;
--		}
--	} while ((txp++)->flags & NETTXF_more_data);
--
--	return frags;
--}
--
--static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
--						       struct xen_netif *netif,
--						       struct sk_buff *skb,
--						       struct xen_netif_tx_request *txp,
--						       struct gnttab_map_grant_ref *mop)
--{
--	struct skb_shared_info *shinfo = skb_shinfo(skb);
--	skb_frag_t *frags = shinfo->frags;
--	unsigned long pending_idx = *((u16 *)skb->data);
--	int i, start;
--
--	/* Skip first skb fragment if it is on same page as header fragment. */
--	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
--
--	for (i = start; i < shinfo->nr_frags; i++, txp++) {
--		pending_ring_idx_t index;
--		struct pending_tx_info *pending_tx_info =
--			netbk->pending_tx_info;
--
--		index = pending_index(netbk->pending_cons++);
--		pending_idx = netbk->pending_ring[index];
--
--		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
--				  GNTMAP_host_map | GNTMAP_readonly,
--				  txp->gref, netif->domid);
--
--		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
--		netif_get(netif);
--		pending_tx_info[pending_idx].netif = netif;
--		frags[i].page = (void *)pending_idx;
--	}
--
--	return mop;
--}
--
--static int netbk_tx_check_mop(struct xen_netbk *netbk,
--			      struct sk_buff *skb,
--			      struct gnttab_map_grant_ref **mopp)
--{
--	struct gnttab_map_grant_ref *mop = *mopp;
--	int pending_idx = *((u16 *)skb->data);
--	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
--	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
--	struct xen_netif_tx_request *txp;
--	struct skb_shared_info *shinfo = skb_shinfo(skb);
--	int nr_frags = shinfo->nr_frags;
--	int i, err, start;
--
--	/* Check status of header. */
--	err = mop->status;
--	if (unlikely(err)) {
--		pending_ring_idx_t index;
--		index = pending_index(netbk->pending_prod++);
--		txp = &pending_tx_info[pending_idx].req;
--		make_tx_response(netif, txp, NETIF_RSP_ERROR);
--		netbk->pending_ring[index] = pending_idx;
--		netif_put(netif);
--	} else {
--		set_phys_to_machine(
--			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
--			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
--		netbk->grant_tx_handle[pending_idx] = mop->handle;
--	}
--
--	/* Skip first skb fragment if it is on same page as header fragment. */
--	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
--
--	for (i = start; i < nr_frags; i++) {
--		int j, newerr;
--		pending_ring_idx_t index;
--
--		pending_idx = (unsigned long)shinfo->frags[i].page;
--
--		/* Check error status: if okay then remember grant handle. */
--		newerr = (++mop)->status;
--		if (likely(!newerr)) {
--			unsigned long addr;
--			addr = idx_to_kaddr(netbk, pending_idx);
--			set_phys_to_machine(
--				__pa(addr)>>PAGE_SHIFT,
--				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
--			netbk->grant_tx_handle[pending_idx] = mop->handle;
--			/* Had a previous error? Invalidate this fragment. */
--			if (unlikely(err))
--				netif_idx_release(netbk, pending_idx);
--			continue;
--		}
--
--		/* Error on this fragment: respond to client with an error. */
--		txp = &netbk->pending_tx_info[pending_idx].req;
--		make_tx_response(netif, txp, NETIF_RSP_ERROR);
--		index = pending_index(netbk->pending_prod++);
--		netbk->pending_ring[index] = pending_idx;
--		netif_put(netif);
--
--		/* Not the first error? Preceding frags already invalidated. */
--		if (err)
--			continue;
--
--		/* First error: invalidate header and preceding fragments. */
--		pending_idx = *((u16 *)skb->data);
--		netif_idx_release(netbk, pending_idx);
--		for (j = start; j < i; j++) {
--			pending_idx = (unsigned long)shinfo->frags[i].page;
--			netif_idx_release(netbk, pending_idx);
--		}
--
--		/* Remember the error: invalidate all subsequent fragments. */
--		err = newerr;
--	}
--
--	*mopp = mop + 1;
--	return err;
--}
--
--static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
--{
--	struct skb_shared_info *shinfo = skb_shinfo(skb);
--	int nr_frags = shinfo->nr_frags;
--	int i;
--
--	for (i = 0; i < nr_frags; i++) {
--		skb_frag_t *frag = shinfo->frags + i;
--		struct xen_netif_tx_request *txp;
--		unsigned long pending_idx;
--
--		pending_idx = (unsigned long)frag->page;
--
--		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
--		list_add_tail(&netbk->pending_inuse[pending_idx].list,
--			      &netbk->pending_inuse_head);
--
--		txp = &netbk->pending_tx_info[pending_idx].req;
--		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
--		frag->size = txp->size;
--		frag->page_offset = txp->offset;
--
--		skb->len += txp->size;
--		skb->data_len += txp->size;
--		skb->truesize += txp->size;
--	}
--}
--
--int netbk_get_extras(struct xen_netif *netif,
--		     struct xen_netif_extra_info *extras,
--		     int work_to_do)
--{
--	struct xen_netif_extra_info extra;
--	RING_IDX cons = netif->tx.req_cons;
--
--	do {
--		if (unlikely(work_to_do-- <= 0)) {
--			pr_debug("Missing extra info\n");
--			return -EBADR;
--		}
--
--		memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
--		       sizeof(extra));
--		if (unlikely(!extra.type ||
--			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
--			netif->tx.req_cons = ++cons;
--			pr_debug("Invalid extra type: %d\n", extra.type);
--			return -EINVAL;
--		}
--
--		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
--		netif->tx.req_cons = ++cons;
--	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
--
--	return work_to_do;
--}
--
--static int netbk_set_skb_gso(struct sk_buff *skb,
--			     struct xen_netif_extra_info *gso)
--{
--	if (!gso->u.gso.size) {
--		pr_debug("GSO size must not be zero.\n");
--		return -EINVAL;
--	}
--
--	/* Currently only TCPv4 S.O. is supported. */
--	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
--		pr_debug("Bad GSO type %d.\n", gso->u.gso.type);
--		return -EINVAL;
--	}
--
--	skb_shinfo(skb)->gso_size = gso->u.gso.size;
--	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
--
--	/* Header must be checked, and gso_segs computed. */
--	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
--	skb_shinfo(skb)->gso_segs = 0;
--
--	return 0;
--}
--
--static int checksum_setup(struct xen_netif *netif, struct sk_buff *skb)
--{
--	struct iphdr *iph;
--	unsigned char *th;
--	int err = -EPROTO;
--	int recalculate_partial_csum = 0;
--
--	/*
--	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
--	 * peers can fail to set NETRXF_csum_blank when sending a GSO
--	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
--	 * recalculate the partial checksum.
--	 */
--	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
--		netif->rx_gso_checksum_fixup++;
--		skb->ip_summed = CHECKSUM_PARTIAL;
--		recalculate_partial_csum = 1;
--	}
--
--	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
--	if (skb->ip_summed != CHECKSUM_PARTIAL)
--		return 0;
--
--	if (skb->protocol != htons(ETH_P_IP))
--		goto out;
--
--	iph = (void *)skb->data;
--	th = skb->data + 4 * iph->ihl;
--	if (th >= skb_tail_pointer(skb))
--		goto out;
--
--	skb->csum_start = th - skb->head;
--	switch (iph->protocol) {
--	case IPPROTO_TCP:
--		skb->csum_offset = offsetof(struct tcphdr, check);
--
--		if (recalculate_partial_csum) {
--			struct tcphdr *tcph = (struct tcphdr *)th;
--			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
--							 skb->len - iph->ihl*4,
--							 IPPROTO_TCP, 0);
--		}
--		break;
--	case IPPROTO_UDP:
--		skb->csum_offset = offsetof(struct udphdr, check);
--
--		if (recalculate_partial_csum) {
--			struct udphdr *udph = (struct udphdr *)th;
--			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
--							 skb->len - iph->ihl*4,
--							 IPPROTO_UDP, 0);
--		}
--		break;
--	default:
--		if (net_ratelimit())
--			printk(KERN_ERR "Attempting to checksum a non-"
--			       "TCP/UDP packet, dropping a protocol"
--			       " %d packet", iph->protocol);
--		goto out;
--	}
--
--	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
--		goto out;
--
--	err = 0;
--
--out:
--	return err;
--}
--
--static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
--{
--	unsigned long now = jiffies;
--	unsigned long next_credit =
--		netif->credit_timeout.expires +
--		msecs_to_jiffies(netif->credit_usec / 1000);
--
--	/* Timer could already be pending in rare cases. */
--	if (timer_pending(&netif->credit_timeout))
--		return true;
--
--	/* Passed the point where we can replenish credit? */
--	if (time_after_eq(now, next_credit)) {
--		netif->credit_timeout.expires = now;
--		tx_add_credit(netif);
--	}
--
--	/* Still too big to send right now? Set a callback. */
--	if (size > netif->remaining_credit) {
--		netif->credit_timeout.data     =
--			(unsigned long)netif;
--		netif->credit_timeout.function =
--			tx_credit_callback;
--		mod_timer(&netif->credit_timeout,
--			  next_credit);
--
--		return true;
--	}
--
--	return false;
--}
--
--static unsigned net_tx_build_mops(struct xen_netbk *netbk)
--{
--	struct gnttab_map_grant_ref *mop;
--	struct sk_buff *skb;
--	int ret;
--
--	mop = netbk->tx_map_ops;
--	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
--		!list_empty(&netbk->net_schedule_list)) {
--		struct xen_netif *netif;
--		struct xen_netif_tx_request txreq;
--		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
--		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
--		u16 pending_idx;
--		RING_IDX idx;
--		int work_to_do;
--		unsigned int data_len;
--		pending_ring_idx_t index;
--
--		/* Get a netif from the list with work to do. */
--		netif = poll_net_schedule_list(netbk);
--		if (!netif)
--			continue;
--
--		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
--		if (!work_to_do) {
--			netif_put(netif);
--			continue;
--		}
--
--		idx = netif->tx.req_cons;
--		rmb(); /* Ensure that we see the request before we copy it. */
--		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
--
--		/* Credit-based scheduling. */
--		if (txreq.size > netif->remaining_credit &&
--		    tx_credit_exceeded(netif, txreq.size)) {
--			netif_put(netif);
--			continue;
--		}
--
--		netif->remaining_credit -= txreq.size;
--
--		work_to_do--;
--		netif->tx.req_cons = ++idx;
--
--		memset(extras, 0, sizeof(extras));
--		if (txreq.flags & NETTXF_extra_info) {
--			work_to_do = netbk_get_extras(netif, extras,
--						      work_to_do);
--			idx = netif->tx.req_cons;
--			if (unlikely(work_to_do < 0)) {
--				netbk_tx_err(netif, &txreq, idx);
--				continue;
--			}
--		}
--
--		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
--		if (unlikely(ret < 0)) {
--			netbk_tx_err(netif, &txreq, idx - ret);
--			continue;
--		}
--		idx += ret;
--
--		if (unlikely(txreq.size < ETH_HLEN)) {
--			pr_debug("Bad packet size: %d\n", txreq.size);
--			netbk_tx_err(netif, &txreq, idx);
--			continue;
--		}
--
--		/* No crossing a page as the payload mustn't fragment. */
--		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
--			pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
--				 txreq.offset, txreq.size,
--				 (txreq.offset&~PAGE_MASK) + txreq.size);
--			netbk_tx_err(netif, &txreq, idx);
--			continue;
--		}
--
--		index = pending_index(netbk->pending_cons);
--		pending_idx = netbk->pending_ring[index];
--
--		data_len = (txreq.size > PKT_PROT_LEN &&
--			    ret < MAX_SKB_FRAGS) ?
--			PKT_PROT_LEN : txreq.size;
--
--		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
--				GFP_ATOMIC | __GFP_NOWARN);
--		if (unlikely(skb == NULL)) {
--			pr_debug("Can't allocate a skb in start_xmit.\n");
--			netbk_tx_err(netif, &txreq, idx);
--			break;
--		}
--
--		/* Packets passed to netif_rx() must have some headroom. */
--		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
--
--		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
--			struct xen_netif_extra_info *gso;
--			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
--
--			if (netbk_set_skb_gso(skb, gso)) {
--				kfree_skb(skb);
--				netbk_tx_err(netif, &txreq, idx);
--				continue;
--			}
--		}
--
--		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
--				  GNTMAP_host_map | GNTMAP_readonly,
--				  txreq.gref, netif->domid);
--		mop++;
--
--		memcpy(&netbk->pending_tx_info[pending_idx].req,
--		       &txreq, sizeof(txreq));
--		netbk->pending_tx_info[pending_idx].netif = netif;
--		*((u16 *)skb->data) = pending_idx;
--
--		__skb_put(skb, data_len);
--
--		skb_shinfo(skb)->nr_frags = ret;
--		if (data_len < txreq.size) {
--			skb_shinfo(skb)->nr_frags++;
--			skb_shinfo(skb)->frags[0].page =
--				(void *)(unsigned long)pending_idx;
--		} else {
--			/* Discriminate from any valid pending_idx value. */
--			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
--		}
--
--		__skb_queue_tail(&netbk->tx_queue, skb);
--
--		netbk->pending_cons++;
--
--		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
--
--		netif->tx.req_cons = idx;
--		netif_schedule_work(netif);
--
--		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
--			break;
--	}
--
--	return mop - netbk->tx_map_ops;
--}
--
--static void net_tx_submit(struct xen_netbk *netbk)
--{
--	struct gnttab_map_grant_ref *mop;
--	struct sk_buff *skb;
--
--	mop = netbk->tx_map_ops;
--	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
--		struct xen_netif_tx_request *txp;
--		struct xen_netif *netif;
--		u16 pending_idx;
--		unsigned data_len;
--
--		pending_idx = *((u16 *)skb->data);
--		netif = netbk->pending_tx_info[pending_idx].netif;
--		txp = &netbk->pending_tx_info[pending_idx].req;
--
--		/* Check the remap error code. */
--		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
--			pr_debug("netback grant failed.\n");
--			skb_shinfo(skb)->nr_frags = 0;
--			kfree_skb(skb);
--			continue;
--		}
--
--		data_len = skb->len;
--		memcpy(skb->data,
--		       (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
--		       data_len);
--		if (data_len < txp->size) {
--			/* Append the packet payload as a fragment. */
--			txp->offset += data_len;
--			txp->size -= data_len;
--		} else {
--			/* Schedule a response immediately. */
--			netif_idx_release(netbk, pending_idx);
--		}
--
--		if (txp->flags & NETTXF_csum_blank)
--			skb->ip_summed = CHECKSUM_PARTIAL;
--		else if (txp->flags & NETTXF_data_validated)
--			skb->ip_summed = CHECKSUM_UNNECESSARY;
--
--		netbk_fill_frags(netbk, skb);
--
--		/*
--		 * If the initial fragment was < PKT_PROT_LEN then
--		 * pull through some bytes from the other fragments to
--		 * increase the linear region to PKT_PROT_LEN bytes.
--		 */
--		if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
--			int target = min_t(int, skb->len, PKT_PROT_LEN);
--			__pskb_pull_tail(skb, target - skb_headlen(skb));
--		}
--
--		skb->dev      = netif->dev;
--		skb->protocol = eth_type_trans(skb, skb->dev);
--
--		if (checksum_setup(netif, skb)) {
--			pr_debug("Can't setup checksum in net_tx_action\n");
--			kfree_skb(skb);
--			continue;
--		}
--
--		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
--		    unlikely(skb_linearize(skb))) {
--			pr_debug("Can't linearize skb in net_tx_action.\n");
--			kfree_skb(skb);
--			continue;
--		}
--
--		netif->stats.rx_bytes += skb->len;
--		netif->stats.rx_packets++;
--
--		netif_rx_ni(skb);
--		netif->dev->last_rx = jiffies;
--	}
--}
--
--/* Called after netfront has transmitted */
--static void net_tx_action(unsigned long data)
--{
--	struct xen_netbk *netbk = (struct xen_netbk *)data;
--	unsigned nr_mops;
--	int ret;
--
--	net_tx_action_dealloc(netbk);
--
--	nr_mops = net_tx_build_mops(netbk);
--
--	if (nr_mops == 0)
--		goto out;
--
--	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
--					netbk->tx_map_ops, nr_mops);
--	BUG_ON(ret);
--
--	net_tx_submit(netbk);
--out:
--	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
--	    !list_empty(&netbk->pending_inuse_head)) {
--		struct netbk_tx_pending_inuse *oldest;
--
--		oldest = list_entry(netbk->pending_inuse_head.next,
--				    struct netbk_tx_pending_inuse, list);
--		mod_timer(&netbk->netbk_tx_pending_timer,
--				oldest->alloc_time + HZ);
--	}
--}
--
--static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
--{
--	static DEFINE_SPINLOCK(_lock);
--	unsigned long flags;
--	pending_ring_idx_t index;
--
--	spin_lock_irqsave(&_lock, flags);
--	index = pending_index(netbk->dealloc_prod);
--	netbk->dealloc_ring[index] = pending_idx;
--	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
--	smp_wmb();
--	netbk->dealloc_prod++;
--	spin_unlock_irqrestore(&_lock, flags);
--
--	xen_netbk_bh_handler(netbk, 0);
--}
--
--static void netif_page_release(struct page *page, unsigned int order)
--{
--	unsigned int group, idx;
--	int foreign = netif_get_page_ext(page, &group, &idx);
--
--	BUG_ON(!foreign);
--	BUG_ON(order);
--
--	netif_idx_release(&xen_netbk[group], idx);
--}
--
--irqreturn_t netif_be_int(int irq, void *dev_id)
--{
--	struct xen_netif *netif = dev_id;
--	struct xen_netbk *netbk;
--
--	if (netif->group == -1)
--		return IRQ_NONE;
--
--	netbk = &xen_netbk[netif->group];
--
--	add_to_net_schedule_list_tail(netif);
--	maybe_schedule_tx_action(netbk);
--
--	if (netif_schedulable(netif) && !netbk_queue_full(netif))
--		netif_wake_queue(netif->dev);
--
--	return IRQ_HANDLED;
--}
--
--static void make_tx_response(struct xen_netif *netif,
--			     struct xen_netif_tx_request *txp,
--			     s8       st)
--{
--	RING_IDX i = netif->tx.rsp_prod_pvt;
--	struct xen_netif_tx_response *resp;
--	int notify;
--
--	resp = RING_GET_RESPONSE(&netif->tx, i);
--	resp->id     = txp->id;
--	resp->status = st;
--
--	if (txp->flags & NETTXF_extra_info)
--		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
--
--	netif->tx.rsp_prod_pvt = ++i;
--	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
--	if (notify)
--		notify_remote_via_irq(netif->irq);
--}
--
--static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
--					     u16      id,
--					     s8       st,
--					     u16      offset,
--					     u16      size,
--					     u16      flags)
--{
--	RING_IDX i = netif->rx.rsp_prod_pvt;
--	struct xen_netif_rx_response *resp;
--
--	resp = RING_GET_RESPONSE(&netif->rx, i);
--	resp->offset     = offset;
--	resp->flags      = flags;
--	resp->id         = id;
--	resp->status     = (s16)size;
--	if (st < 0)
--		resp->status = (s16)st;
--
--	netif->rx.rsp_prod_pvt = ++i;
--
--	return resp;
--}
--
--#ifdef NETBE_DEBUG_INTERRUPT
--static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
--{
--	struct list_head *ent;
--	struct xen_netif *netif;
--	int i = 0;
--	int group = 0;
--
--	printk(KERN_ALERT "netif_schedule_list:\n");
--
--	for (group = 0; group < xen_netbk_group_nr; group++) {
--		struct xen_netbk *netbk = &xen_netbk[group];
--		spin_lock_irq(&netbk->net_schedule_list_lock);
--		printk(KERN_ALERT "xen_netback group number: %d\n", group);
--		list_for_each(ent, &netbk->net_schedule_list) {
--			netif = list_entry(ent, struct xen_netif, list);
--			printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
--				"rx_resp_prod=%08x\n",
--				i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
--			printk(KERN_ALERT
--				"   tx_req_cons=%08x, tx_resp_prod=%08x)\n",
--				netif->tx.req_cons, netif->tx.rsp_prod_pvt);
--			printk(KERN_ALERT
--				"   shared(rx_req_prod=%08x "
--				"rx_resp_prod=%08x\n",
--				netif->rx.sring->req_prod,
--				netif->rx.sring->rsp_prod);
--			printk(KERN_ALERT
--				"   rx_event=%08x, tx_req_prod=%08x\n",
--				netif->rx.sring->rsp_event,
--				netif->tx.sring->req_prod);
--			printk(KERN_ALERT
--				"   tx_resp_prod=%08x, tx_event=%08x)\n",
--				netif->tx.sring->rsp_prod,
--				netif->tx.sring->rsp_event);
--			i++;
--		}
--		spin_unlock_irq(&netbk->net_schedule_list_lock);
--	}
--
--	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
--
--	return IRQ_HANDLED;
--}
--#endif
--
--static inline int rx_work_todo(struct xen_netbk *netbk)
--{
--	return !skb_queue_empty(&netbk->rx_queue);
--}
--
--static inline int tx_work_todo(struct xen_netbk *netbk)
--{
--	if (netbk->dealloc_cons != netbk->dealloc_prod)
--		return 1;
--
--	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
--	    !list_empty(&netbk->pending_inuse_head))
--		return 1;
--
--	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
--			!list_empty(&netbk->net_schedule_list))
--		return 1;
--
--	return 0;
--}
--
--static int netbk_action_thread(void *data)
--{
--	struct xen_netbk *netbk = (struct xen_netbk *)data;
--	while (!kthread_should_stop()) {
--		wait_event_interruptible(netbk->kthread.netbk_action_wq,
--				rx_work_todo(netbk)
--				|| tx_work_todo(netbk)
--				|| kthread_should_stop());
--		cond_resched();
--
--		if (kthread_should_stop())
--			break;
--
--		if (rx_work_todo(netbk))
--			net_rx_action((unsigned long)netbk);
--
--		if (tx_work_todo(netbk))
--			net_tx_action((unsigned long)netbk);
--	}
--
--	return 0;
--}
--
--static int __init netback_init(void)
--{
--	int i;
--	struct page *page;
--	int rc = 0;
--	int group;
--
--	if (!xen_pv_domain())
--		return -ENODEV;
--
--	xen_netbk_group_nr = num_online_cpus();
--	xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
--	if (!xen_netbk) {
--		printk(KERN_ALERT "%s: out of memory\n", __func__);
--		return -ENOMEM;
--	}
--	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
--
--	for (group = 0; group < xen_netbk_group_nr; group++) {
--		struct xen_netbk *netbk = &xen_netbk[group];
--		skb_queue_head_init(&netbk->rx_queue);
--		skb_queue_head_init(&netbk->tx_queue);
--
--		init_timer(&netbk->net_timer);
--		netbk->net_timer.data = (unsigned long)netbk;
--		netbk->net_timer.function = net_alarm;
--
--		init_timer(&netbk->netbk_tx_pending_timer);
--		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
--		netbk->netbk_tx_pending_timer.function =
--			netbk_tx_pending_timeout;
--
--		netbk->mmap_pages =
--			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
--		if (!netbk->mmap_pages) {
--			printk(KERN_ALERT "%s: out of memory\n", __func__);
--			del_timer(&netbk->netbk_tx_pending_timer);
--			del_timer(&netbk->net_timer);
--			rc = -ENOMEM;
--			goto failed_init;
--		}
--
--		for (i = 0; i < MAX_PENDING_REQS; i++) {
--			page = netbk->mmap_pages[i];
--			SetPageForeign(page, netif_page_release);
--			netif_set_page_ext(page, group, i);
--			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
--		}
--
--		netbk->pending_cons = 0;
--		netbk->pending_prod = MAX_PENDING_REQS;
--		for (i = 0; i < MAX_PENDING_REQS; i++)
--			netbk->pending_ring[i] = i;
--
--		if (MODPARM_netback_kthread) {
--			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
--			netbk->kthread.task =
--				kthread_create(netbk_action_thread,
--					       (void *)netbk,
--					       "netback/%u", group);
--
--			if (!IS_ERR(netbk->kthread.task)) {
--				kthread_bind(netbk->kthread.task, group);
--			} else {
--				printk(KERN_ALERT
--					"kthread_run() fails at netback\n");
--				free_empty_pages_and_pagevec(netbk->mmap_pages,
--						MAX_PENDING_REQS);
--				del_timer(&netbk->netbk_tx_pending_timer);
--				del_timer(&netbk->net_timer);
--				rc = PTR_ERR(netbk->kthread.task);
--				goto failed_init;
--			}
--		} else {
--			tasklet_init(&netbk->tasklet.net_tx_tasklet,
--				     net_tx_action,
--				     (unsigned long)netbk);
--			tasklet_init(&netbk->tasklet.net_rx_tasklet,
--				     net_rx_action,
--				     (unsigned long)netbk);
--		}
--
--		INIT_LIST_HEAD(&netbk->pending_inuse_head);
--		INIT_LIST_HEAD(&netbk->net_schedule_list);
--
--		spin_lock_init(&netbk->net_schedule_list_lock);
--
--		atomic_set(&netbk->netfront_count, 0);
--
--		if (MODPARM_netback_kthread)
--			wake_up_process(netbk->kthread.task);
--	}
--
--	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
--	if (MODPARM_copy_skb) {
--		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
--					      NULL, 0))
--			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
--		else
--			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
--	}
--
--	rc = netif_xenbus_init();
--	if (rc)
--		goto failed_init;
--
--#ifdef NETBE_DEBUG_INTERRUPT
--	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
--				      0,
--				      netif_be_dbg,
--				      IRQF_SHARED,
--				      "net-be-dbg",
--				      &netif_be_dbg);
--#endif
--
--	return 0;
--
--failed_init:
--	for (i = 0; i < group; i++) {
--		struct xen_netbk *netbk = &xen_netbk[i];
--		free_empty_pages_and_pagevec(netbk->mmap_pages,
--				MAX_PENDING_REQS);
--		del_timer(&netbk->netbk_tx_pending_timer);
--		del_timer(&netbk->net_timer);
--		if (MODPARM_netback_kthread)
--			kthread_stop(netbk->kthread.task);
--	}
--	vfree(xen_netbk);
--	return rc;
--
--}
--
--module_init(netback_init);
--
--MODULE_LICENSE("Dual BSD/GPL");
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-deleted file mode 100644
-index 867dc25..0000000
---- a/drivers/xen/netback/xenbus.c
-+++ /dev/null
-@@ -1,489 +0,0 @@
--/*
-- * Xenbus code for netif backend
-- *
-- * Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
-- * Copyright (C) 2005 XenSource Ltd
-- *
-- * This program is free software; you can redistribute it and/or modify
-- * it under the terms of the GNU General Public License as published by
-- * the Free Software Foundation; either version 2 of the License, or
-- * (at your option) any later version.
-- *
-- * This program is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with this program; if not, write to the Free Software
-- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--*/
--
--#include "common.h"
--
--static int connect_rings(struct backend_info *);
--static void connect(struct backend_info *);
--static void backend_create_netif(struct backend_info *be);
--static void unregister_hotplug_status_watch(struct backend_info *be);
--
--static int netback_remove(struct xenbus_device *dev)
--{
--	struct backend_info *be = dev_get_drvdata(&dev->dev);
--
--	unregister_hotplug_status_watch(be);
--	if (be->netif) {
--		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
--		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
--		netif_disconnect(be->netif);
--		be->netif = NULL;
--	}
--	kfree(be);
--	dev_set_drvdata(&dev->dev, NULL);
--	return 0;
--}
--
--
--/**
-- * Entry point to this code when a new device is created.  Allocate the basic
-- * structures and switch to InitWait.
-- */
--static int netback_probe(struct xenbus_device *dev,
--			 const struct xenbus_device_id *id)
--{
--	const char *message;
--	struct xenbus_transaction xbt;
--	int err;
--	int sg;
--	struct backend_info *be = kzalloc(sizeof(struct backend_info),
--					  GFP_KERNEL);
--	if (!be) {
--		xenbus_dev_fatal(dev, -ENOMEM,
--				 "allocating backend structure");
--		return -ENOMEM;
--	}
--
--	be->dev = dev;
--	dev_set_drvdata(&dev->dev, be);
--
--	sg = 1;
--	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
--		sg = 0;
--
--	do {
--		err = xenbus_transaction_start(&xbt);
--		if (err) {
--			xenbus_dev_fatal(dev, err, "starting transaction");
--			goto fail;
--		}
--
--		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
--		if (err) {
--			message = "writing feature-sg";
--			goto abort_transaction;
--		}
--
--		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
--				    "%d", sg);
--		if (err) {
--			message = "writing feature-gso-tcpv4";
--			goto abort_transaction;
--		}
--
--		/* We support rx-copy path. */
--		err = xenbus_printf(xbt, dev->nodename,
--				    "feature-rx-copy", "%d", 1);
--		if (err) {
--			message = "writing feature-rx-copy";
--			goto abort_transaction;
--		}
--
--		/*
--		 * We don't support rx-flip path (except old guests who don't
--		 * grok this feature flag).
--		 */
--		err = xenbus_printf(xbt, dev->nodename,
--				    "feature-rx-flip", "%d", 0);
--		if (err) {
--			message = "writing feature-rx-flip";
--			goto abort_transaction;
--		}
--
--		err = xenbus_transaction_end(xbt, 0);
--	} while (err == -EAGAIN);
--
--	if (err) {
--		xenbus_dev_fatal(dev, err, "completing transaction");
--		goto fail;
--	}
--
--	err = xenbus_switch_state(dev, XenbusStateInitWait);
--	if (err)
--		goto fail;
--
--	/* This kicks hotplug scripts, so do it immediately. */
--	backend_create_netif(be);
--
--	return 0;
--
--abort_transaction:
--	xenbus_transaction_end(xbt, 1);
--	xenbus_dev_fatal(dev, err, "%s", message);
--fail:
--	pr_debug("failed");
--	netback_remove(dev);
--	return err;
--}
--
--
--/*
-- * Handle the creation of the hotplug script environment.  We add the script
-- * and vif variables to the environment, for the benefit of the vif-* hotplug
-- * scripts.
-- */
--static int netback_uevent(struct xenbus_device *xdev,
--			  struct kobj_uevent_env *env)
--{
--	struct backend_info *be = dev_get_drvdata(&xdev->dev);
--	char *val;
--
--	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
--	if (IS_ERR(val)) {
--		int err = PTR_ERR(val);
--		xenbus_dev_fatal(xdev, err, "reading script");
--		return err;
--	} else {
--		if (add_uevent_var(env, "script=%s", val)) {
--			kfree(val);
--			return -ENOMEM;
--		}
--		kfree(val);
--	}
--
--	if (!be || !be->netif)
--		return 0;
--
--	return add_uevent_var(env, "vif=%s", be->netif->dev->name);
--}
--
--
--static void backend_create_netif(struct backend_info *be)
--{
--	int err;
--	long handle;
--	struct xenbus_device *dev = be->dev;
--
--	if (be->netif != NULL)
--		return;
--
--	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
--	if (err != 1) {
--		xenbus_dev_fatal(dev, err, "reading handle");
--		return;
--	}
--
--	be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
--	if (IS_ERR(be->netif)) {
--		err = PTR_ERR(be->netif);
--		be->netif = NULL;
--		xenbus_dev_fatal(dev, err, "creating interface");
--		return;
--	}
--
--	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
--}
--
--
--static void disconnect_backend(struct xenbus_device *dev)
--{
--	struct backend_info *be = dev_get_drvdata(&dev->dev);
--
--	if (be->netif) {
--		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
--		netif_disconnect(be->netif);
--		be->netif = NULL;
--	}
--}
--
--/**
-- * Callback received when the frontend's state changes.
-- */
--static void frontend_changed(struct xenbus_device *dev,
--			     enum xenbus_state frontend_state)
--{
--	struct backend_info *be = dev_get_drvdata(&dev->dev);
--
--	pr_debug("frontend state %s", xenbus_strstate(frontend_state));
--
--	be->frontend_state = frontend_state;
--
--	switch (frontend_state) {
--	case XenbusStateInitialising:
--		if (dev->state == XenbusStateClosed) {
--			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
--			       __func__, dev->nodename);
--			xenbus_switch_state(dev, XenbusStateInitWait);
--		}
--		break;
--
--	case XenbusStateInitialised:
--		break;
--
--	case XenbusStateConnected:
--		if (dev->state == XenbusStateConnected)
--			break;
--		backend_create_netif(be);
--		if (be->netif)
--			connect(be);
--		break;
--
--	case XenbusStateClosing:
--		if (be->netif)
--			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
--		disconnect_backend(dev);
--		xenbus_switch_state(dev, XenbusStateClosing);
--		break;
--
--	case XenbusStateClosed:
--		xenbus_switch_state(dev, XenbusStateClosed);
--		if (xenbus_dev_is_online(dev))
--			break;
--		/* fall through if not online */
--	case XenbusStateUnknown:
--		device_unregister(&dev->dev);
--		break;
--
--	default:
--		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
--				 frontend_state);
--		break;
--	}
--}
--
--
--static void xen_net_read_rate(struct xenbus_device *dev,
--			      unsigned long *bytes, unsigned long *usec)
--{
--	char *s, *e;
--	unsigned long b, u;
--	char *ratestr;
--
--	/* Default to unlimited bandwidth. */
--	*bytes = ~0UL;
--	*usec = 0;
--
--	ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
--	if (IS_ERR(ratestr))
--		return;
--
--	s = ratestr;
--	b = simple_strtoul(s, &e, 10);
--	if ((s == e) || (*e != ','))
--		goto fail;
--
--	s = e + 1;
--	u = simple_strtoul(s, &e, 10);
--	if ((s == e) || (*e != '\0'))
--		goto fail;
--
--	*bytes = b;
--	*usec = u;
--
--	kfree(ratestr);
--	return;
--
-- fail:
--	pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
--	kfree(ratestr);
--}
--
--static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
--{
--	char *s, *e, *macstr;
--	int i;
--
--	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
--	if (IS_ERR(macstr))
--		return PTR_ERR(macstr);
--
--	for (i = 0; i < ETH_ALEN; i++) {
--		mac[i] = simple_strtoul(s, &e, 16);
--		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
--			kfree(macstr);
--			return -ENOENT;
--		}
--		s = e+1;
--	}
--
--	kfree(macstr);
--	return 0;
--}
--
--static void unregister_hotplug_status_watch(struct backend_info *be)
--{
--	if (be->have_hotplug_status_watch) {
--		unregister_xenbus_watch(&be->hotplug_status_watch);
--		kfree(be->hotplug_status_watch.node);
--	}
--	be->have_hotplug_status_watch = 0;
--}
--
--static void hotplug_status_changed(struct xenbus_watch *watch,
--				   const char **vec,
--				   unsigned int vec_size)
--{
--	struct backend_info *be = container_of(watch,
--					       struct backend_info,
--					       hotplug_status_watch);
--	char *str;
--	unsigned int len;
--
--	str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
--	if (IS_ERR(str))
--		return;
--	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
--		xenbus_switch_state(be->dev, XenbusStateConnected);
--		/* Not interested in this watch anymore. */
--		unregister_hotplug_status_watch(be);
--	}
--	kfree(str);
--}
--
--static void connect(struct backend_info *be)
--{
--	int err;
--	struct xenbus_device *dev = be->dev;
--
--	err = connect_rings(be);
--	if (err)
--		return;
--
--	err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
--	if (err) {
--		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
--		return;
--	}
--
--	xen_net_read_rate(dev, &be->netif->credit_bytes,
--			  &be->netif->credit_usec);
--	be->netif->remaining_credit = be->netif->credit_bytes;
--
--	unregister_hotplug_status_watch(be);
--	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
--				   hotplug_status_changed,
--				   "%s/%s", dev->nodename, "hotplug-status");
--	if (err) {
--		/* Switch now, since we can't do a watch. */
--		xenbus_switch_state(dev, XenbusStateConnected);
--	} else {
--		be->have_hotplug_status_watch = 1;
--	}
--
--	netif_wake_queue(be->netif->dev);
--}
--
--
--static int connect_rings(struct backend_info *be)
--{
--	struct xen_netif *netif = be->netif;
--	struct xenbus_device *dev = be->dev;
--	unsigned long tx_ring_ref, rx_ring_ref;
--	unsigned int evtchn, rx_copy;
--	int err;
--	int val;
--
--	err = xenbus_gather(XBT_NIL, dev->otherend,
--			    "tx-ring-ref", "%lu", &tx_ring_ref,
--			    "rx-ring-ref", "%lu", &rx_ring_ref,
--			    "event-channel", "%u", &evtchn, NULL);
--	if (err) {
--		xenbus_dev_fatal(dev, err,
--				 "reading %s/ring-ref and event-channel",
--				 dev->otherend);
--		return err;
--	}
--
--	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
--			   &rx_copy);
--	if (err == -ENOENT) {
--		err = 0;
--		rx_copy = 0;
--	}
--	if (err < 0) {
--		xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
--				 dev->otherend);
--		return err;
--	}
--	if (!rx_copy)
--		return -EOPNOTSUPP;
--
--	if (netif->dev->tx_queue_len != 0) {
--		if (xenbus_scanf(XBT_NIL, dev->otherend,
--				 "feature-rx-notify", "%d", &val) < 0)
--			val = 0;
--		if (val)
--			netif->can_queue = 1;
--		else
--			/* Must be non-zero for pfifo_fast to work. */
--			netif->dev->tx_queue_len = 1;
--	}
--
--	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
--			 "%d", &val) < 0)
--		val = 0;
--	netif->can_sg = !!val;
--
--	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
--			 "%d", &val) < 0)
--		val = 0;
--	netif->gso = !!val;
--
--	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
--			 "%d", &val) < 0)
--		val = 0;
--	netif->gso_prefix = !!val;
--
--	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
--			 "%d", &val) < 0)
--		val = 0;
--	netif->csum = !val;
--
--	/* Set dev->features */
--	netif_set_features(netif);
--
--	/* Map the shared frame, irq etc. */
--	err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
--	if (err) {
--		xenbus_dev_fatal(dev, err,
--				 "mapping shared-frames %lu/%lu port %u",
--				 tx_ring_ref, rx_ring_ref, evtchn);
--		return err;
--	}
--	return 0;
--}
--
--
--/* ** Driver Registration ** */
--
--
--static const struct xenbus_device_id netback_ids[] = {
--	{ "vif" },
--	{ "" }
--};
--
--
--static struct xenbus_driver netback = {
--	.name = "vif",
--	.owner = THIS_MODULE,
--	.ids = netback_ids,
--	.probe = netback_probe,
--	.remove = netback_remove,
--	.uevent = netback_uevent,
--	.otherend_changed = frontend_changed,
--};
--
--
--int netif_xenbus_init(void)
--{
--	printk(KERN_CRIT "registering netback\n");
--	return xenbus_register_backend(&netback);
--}
--- 
-1.7.4
-
-
-From 932240f6f2a1ee52e330e281069efd2f728ff7ed Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Thu, 20 Jan 2011 13:02:14 +0000
-Subject: [PATCH 085/203] xen: netback: remove queue_length module option
-
-This setting can be controlled via sysfs.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/interface.c |   11 ++---------
- 1 files changed, 2 insertions(+), 9 deletions(-)
-
-diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
-index b429f8c..249f010 100644
---- a/drivers/net/xen-netback/interface.c
-+++ b/drivers/net/xen-netback/interface.c
-@@ -36,14 +36,7 @@
- #include <xen/events.h>
- #include <asm/xen/hypercall.h>
- 
--/*
-- * Module parameter 'queue_length':
-- *
-- * Enables queuing in the network stack when a client has run out of receive
-- * descriptors.
-- */
--static unsigned long netbk_queue_length = 32;
--module_param_named(queue_length, netbk_queue_length, ulong, 0644);
-+#define NETBK_QUEUE_LENGTH 32
- 
- static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
- 			   struct xen_netif *netif)
-@@ -296,7 +289,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
- 	netif_set_features(netif);
- 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
- 
--	dev->tx_queue_len = netbk_queue_length;
-+	dev->tx_queue_len = NETBK_QUEUE_LENGTH;
- 
- 	/*
- 	 * Initialise a dummy MAC address. We choose the numerically
--- 
-1.7.4
-
-
-From 96568c3f81c7ba4d4fea43be791e4a773331ed7c Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Thu, 20 Jan 2011 14:16:52 +0000
-Subject: [PATCH 086/203] xen: netback: correct error return from ethtool hooks.
-
-Should be -EOPNOTSUPP not -ENOSYS.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/interface.c |    6 +++---
- 1 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
-index 249f010..c41d93e 100644
---- a/drivers/net/xen-netback/interface.c
-+++ b/drivers/net/xen-netback/interface.c
-@@ -130,7 +130,7 @@ static int netbk_set_tx_csum(struct net_device *dev, u32 data)
- 	struct xen_netif *netif = netdev_priv(dev);
- 	if (data) {
- 		if (!netif->csum)
--			return -ENOSYS;
-+			return -EOPNOTSUPP;
- 		netif->features_disabled &= ~NETIF_F_IP_CSUM;
- 	} else {
- 		netif->features_disabled |= NETIF_F_IP_CSUM;
-@@ -145,7 +145,7 @@ static int netbk_set_sg(struct net_device *dev, u32 data)
- 	struct xen_netif *netif = netdev_priv(dev);
- 	if (data) {
- 		if (!netif->can_sg)
--			return -ENOSYS;
-+			return -EOPNOTSUPP;
- 		netif->features_disabled &= ~NETIF_F_SG;
- 	} else {
- 		netif->features_disabled |= NETIF_F_SG;
-@@ -160,7 +160,7 @@ static int netbk_set_tso(struct net_device *dev, u32 data)
- 	struct xen_netif *netif = netdev_priv(dev);
- 	if (data) {
- 		if (!netif->gso && !netif->gso_prefix)
--			return -ENOSYS;
-+			return -EOPNOTSUPP;
- 		netif->features_disabled &= ~NETIF_F_TSO;
- 	} else {
- 		netif->features_disabled |= NETIF_F_TSO;
--- 
-1.7.4
-
-
-From 12a5b4f364888f291bcd8e38a146f30fcceb9588 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Thu, 20 Jan 2011 14:20:04 +0000
-Subject: [PATCH 087/203] xen: netback: avoid leading _ in function parameter names.
-
-It is usually used to distinguish lower-level functions or to avoid naming
-conflicts in macros.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/netback.c |    6 +++---
- 1 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index b290525..cd2af15 100644
---- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -90,7 +90,7 @@ static inline void netif_set_page_ext(struct page *pg,
- }
- 
- static int netif_get_page_ext(struct page *pg,
--			      unsigned int *_group, unsigned int *_idx)
-+			      unsigned int *pgroup, unsigned int *pidx)
- {
- 	union page_ext ext = { .mapping = pg->mapping };
- 	struct xen_netbk *netbk;
-@@ -117,8 +117,8 @@ static int netif_get_page_ext(struct page *pg,
- 	if (netbk->mmap_pages[idx] != pg)
- 		return 0;
- 
--	*_group = group;
--	*_idx = idx;
-+	*pgroup = group;
-+	*pidx = idx;
- 
- 	return 1;
- }
--- 
-1.7.4
-
-
-From a789731f0ed97fc762835a568f2fefbf0d490ccf Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Thu, 20 Jan 2011 14:21:24 +0000
-Subject: [PATCH 088/203] xen: netback: drop unused debug interrupt handler.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/netback.c |   57 -------------------------------------
- 1 files changed, 0 insertions(+), 57 deletions(-)
-
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index cd2af15..a7646f3 100644
---- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -47,8 +47,6 @@
- #include <asm/xen/hypercall.h>
- #include <asm/xen/page.h>
- 
--/*define NETBE_DEBUG_INTERRUPT*/
--
- struct xen_netbk *xen_netbk;
- int xen_netbk_group_nr;
- 
-@@ -1707,52 +1705,6 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
- 	return resp;
- }
- 
--#ifdef NETBE_DEBUG_INTERRUPT
--static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
--{
--	struct list_head *ent;
--	struct xen_netif *netif;
--	int i = 0;
--	int group = 0;
--
--	printk(KERN_ALERT "netif_schedule_list:\n");
--
--	for (group = 0; group < xen_netbk_group_nr; group++) {
--		struct xen_netbk *netbk = &xen_netbk[group];
--		spin_lock_irq(&netbk->net_schedule_list_lock);
--		printk(KERN_ALERT "xen_netback group number: %d\n", group);
--		list_for_each(ent, &netbk->net_schedule_list) {
--			netif = list_entry(ent, struct xen_netif, list);
--			printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
--				"rx_resp_prod=%08x\n",
--				i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
--			printk(KERN_ALERT
--				"   tx_req_cons=%08x, tx_resp_prod=%08x)\n",
--				netif->tx.req_cons, netif->tx.rsp_prod_pvt);
--			printk(KERN_ALERT
--				"   shared(rx_req_prod=%08x "
--				"rx_resp_prod=%08x\n",
--				netif->rx.sring->req_prod,
--				netif->rx.sring->rsp_prod);
--			printk(KERN_ALERT
--				"   rx_event=%08x, tx_req_prod=%08x\n",
--				netif->rx.sring->rsp_event,
--				netif->tx.sring->req_prod);
--			printk(KERN_ALERT
--				"   tx_resp_prod=%08x, tx_event=%08x)\n",
--				netif->tx.sring->rsp_prod,
--				netif->tx.sring->rsp_event);
--			i++;
--		}
--		spin_unlock_irq(&netbk->net_schedule_list_lock);
--	}
--
--	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
--
--	return IRQ_HANDLED;
--}
--#endif
--
- static inline int rx_work_todo(struct xen_netbk *netbk)
- {
- 	return !skb_queue_empty(&netbk->rx_queue);
-@@ -1903,15 +1855,6 @@ static int __init netback_init(void)
- 	if (rc)
- 		goto failed_init;
- 
--#ifdef NETBE_DEBUG_INTERRUPT
--	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
--				      0,
--				      netif_be_dbg,
--				      IRQF_SHARED,
--				      "net-be-dbg",
--				      &netif_be_dbg);
--#endif
--
- 	return 0;
- 
- failed_init:
--- 
-1.7.4
-
-
-From 056508bc941b3715c72a5fa00a6b38a006979dd9 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Thu, 20 Jan 2011 16:42:27 +0000
-Subject: [PATCH 089/203] xen: netif: properly namespace the Xen netif protocol header.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/netback.c |   38 +++++++++---------
- drivers/net/xen-netfront.c        |   20 +++++-----
- include/xen/interface/io/netif.h  |   80 ++++++++++++++++++------------------
- 3 files changed, 69 insertions(+), 69 deletions(-)
-
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index a7646f3..69b4535 100644
---- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -582,7 +582,7 @@ static int netbk_check_gop(int nr_meta_slots, domid_t domid,
- 			   struct netrx_pending_operations *npo)
- {
- 	struct gnttab_copy     *copy_op;
--	int status = NETIF_RSP_OKAY;
-+	int status = XEN_NETIF_RSP_OKAY;
- 	int i;
- 
- 	for (i = 0; i < nr_meta_slots; i++) {
-@@ -590,7 +590,7 @@ static int netbk_check_gop(int nr_meta_slots, domid_t domid,
- 		if (copy_op->status != GNTST_okay) {
- 				pr_debug("Bad status %d from copy to DOM%d.\n",
- 					 copy_op->status, domid);
--				status = NETIF_RSP_ERROR;
-+				status = XEN_NETIF_RSP_ERROR;
- 			}
- 	}
- 
-@@ -615,7 +615,7 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
- 		if (i == nr_meta_slots - 1)
- 			flags = 0;
- 		else
--			flags = NETRXF_more_data;
-+			flags = XEN_NETRXF_more_data;
- 
- 		offset = 0;
- 		make_rx_response(netif, meta[i].id, status, offset,
-@@ -687,7 +687,7 @@ static void net_rx_action(unsigned long data)
- 			resp = RING_GET_RESPONSE(&netif->rx,
- 						netif->rx.rsp_prod_pvt++);
- 
--			resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
-+			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
- 
- 			resp->offset = netbk->meta[npo.meta_cons].gso_size;
- 			resp->id = netbk->meta[npo.meta_cons].id;
-@@ -707,13 +707,13 @@ static void net_rx_action(unsigned long data)
- 		if (sco->meta_slots_used == 1)
- 			flags = 0;
- 		else
--			flags = NETRXF_more_data;
-+			flags = XEN_NETRXF_more_data;
- 
- 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
--			flags |= NETRXF_csum_blank | NETRXF_data_validated;
-+			flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated;
- 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
- 			/* remote but checksummed. */
--			flags |= NETRXF_data_validated;
-+			flags |= XEN_NETRXF_data_validated;
- 
- 		offset = 0;
- 		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
-@@ -727,7 +727,7 @@ static void net_rx_action(unsigned long data)
- 				RING_GET_RESPONSE(&netif->rx,
- 						  netif->rx.rsp_prod_pvt++);
- 
--			resp->flags |= NETRXF_extra_info;
-+			resp->flags |= XEN_NETRXF_extra_info;
- 
- 			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
- 			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
-@@ -989,7 +989,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
- 		netif = pending_tx_info[pending_idx].netif;
- 
- 		make_tx_response(netif, &pending_tx_info[pending_idx].req,
--				 NETIF_RSP_OKAY);
-+				 XEN_NETIF_RSP_OKAY);
- 
- 		/* Ready for next use. */
- 		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
-@@ -1009,7 +1009,7 @@ static void netbk_tx_err(struct xen_netif *netif,
- 	RING_IDX cons = netif->tx.req_cons;
- 
- 	do {
--		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
- 		if (cons >= end)
- 			break;
- 		txp = RING_GET_REQUEST(&netif->tx, cons++);
-@@ -1027,7 +1027,7 @@ static int netbk_count_requests(struct xen_netif *netif,
- 	RING_IDX cons = netif->tx.req_cons;
- 	int frags = 0;
- 
--	if (!(first->flags & NETTXF_more_data))
-+	if (!(first->flags & XEN_NETTXF_more_data))
- 		return 0;
- 
- 	do {
-@@ -1056,7 +1056,7 @@ static int netbk_count_requests(struct xen_netif *netif,
- 				 txp->offset, txp->size);
- 			return -frags;
- 		}
--	} while ((txp++)->flags & NETTXF_more_data);
-+	} while ((txp++)->flags & XEN_NETTXF_more_data);
- 
- 	return frags;
- }
-@@ -1115,7 +1115,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
- 		pending_ring_idx_t index;
- 		index = pending_index(netbk->pending_prod++);
- 		txp = &pending_tx_info[pending_idx].req;
--		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
- 		netbk->pending_ring[index] = pending_idx;
- 		netif_put(netif);
- 	} else {
-@@ -1151,7 +1151,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
- 
- 		/* Error on this fragment: respond to client with an error. */
- 		txp = &netbk->pending_tx_info[pending_idx].req;
--		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
- 		index = pending_index(netbk->pending_prod++);
- 		netbk->pending_ring[index] = pending_idx;
- 		netif_put(netif);
-@@ -1406,7 +1406,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
- 		netif->tx.req_cons = ++idx;
- 
- 		memset(extras, 0, sizeof(extras));
--		if (txreq.flags & NETTXF_extra_info) {
-+		if (txreq.flags & XEN_NETTXF_extra_info) {
- 			work_to_do = netbk_get_extras(netif, extras,
- 						      work_to_do);
- 			idx = netif->tx.req_cons;
-@@ -1542,9 +1542,9 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 			netif_idx_release(netbk, pending_idx);
- 		}
- 
--		if (txp->flags & NETTXF_csum_blank)
-+		if (txp->flags & XEN_NETTXF_csum_blank)
- 			skb->ip_summed = CHECKSUM_PARTIAL;
--		else if (txp->flags & NETTXF_data_validated)
-+		else if (txp->flags & XEN_NETTXF_data_validated)
- 			skb->ip_summed = CHECKSUM_UNNECESSARY;
- 
- 		netbk_fill_frags(netbk, skb);
-@@ -1673,8 +1673,8 @@ static void make_tx_response(struct xen_netif *netif,
- 	resp->id     = txp->id;
- 	resp->status = st;
- 
--	if (txp->flags & NETTXF_extra_info)
--		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
-+	if (txp->flags & XEN_NETTXF_extra_info)
-+		RING_GET_RESPONSE(&netif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
- 
- 	netif->tx.rsp_prod_pvt = ++i;
- 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
-diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
-index 458bb57..cc23d42 100644
---- a/drivers/net/xen-netfront.c
-+++ b/drivers/net/xen-netfront.c
-@@ -356,7 +356,7 @@ static void xennet_tx_buf_gc(struct net_device *dev)
- 			struct xen_netif_tx_response *txrsp;
- 
- 			txrsp = RING_GET_RESPONSE(&np->tx, cons);
--			if (txrsp->status == NETIF_RSP_NULL)
-+			if (txrsp->status == XEN_NETIF_RSP_NULL)
- 				continue;
- 
- 			id  = txrsp->id;
-@@ -413,7 +413,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
- 	   larger than a page), split it it into page-sized chunks. */
- 	while (len > PAGE_SIZE - offset) {
- 		tx->size = PAGE_SIZE - offset;
--		tx->flags |= NETTXF_more_data;
-+		tx->flags |= XEN_NETTXF_more_data;
- 		len -= tx->size;
- 		data += tx->size;
- 		offset = 0;
-@@ -439,7 +439,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
- 	for (i = 0; i < frags; i++) {
- 		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
- 
--		tx->flags |= NETTXF_more_data;
-+		tx->flags |= XEN_NETTXF_more_data;
- 
- 		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
- 		np->tx_skbs[id].skb = skb_get(skb);
-@@ -514,10 +514,10 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 	tx->flags = 0;
- 	if (skb->ip_summed == CHECKSUM_PARTIAL)
- 		/* local packet? */
--		tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
-+		tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
- 	else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
- 		/* remote but checksummed. */
--		tx->flags |= NETTXF_data_validated;
-+		tx->flags |= XEN_NETTXF_data_validated;
- 
- 	if (skb_shinfo(skb)->gso_size) {
- 		struct xen_netif_extra_info *gso;
-@@ -528,7 +528,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 		if (extra)
- 			extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
- 		else
--			tx->flags |= NETTXF_extra_info;
-+			tx->flags |= XEN_NETTXF_extra_info;
- 
- 		gso->u.gso.size = skb_shinfo(skb)->gso_size;
- 		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
-@@ -648,7 +648,7 @@ static int xennet_get_responses(struct netfront_info *np,
- 	int err = 0;
- 	unsigned long ret;
- 
--	if (rx->flags & NETRXF_extra_info) {
-+	if (rx->flags & XEN_NETRXF_extra_info) {
- 		err = xennet_get_extras(np, extras, rp);
- 		cons = np->rx.rsp_cons;
- 	}
-@@ -685,7 +685,7 @@ static int xennet_get_responses(struct netfront_info *np,
- 		__skb_queue_tail(list, skb);
- 
- next:
--		if (!(rx->flags & NETRXF_more_data))
-+		if (!(rx->flags & XEN_NETRXF_more_data))
- 			break;
- 
- 		if (cons + frags == rp) {
-@@ -950,9 +950,9 @@ err:
- 		skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
- 		skb->len += skb->data_len;
- 
--		if (rx->flags & NETRXF_csum_blank)
-+		if (rx->flags & XEN_NETRXF_csum_blank)
- 			skb->ip_summed = CHECKSUM_PARTIAL;
--		else if (rx->flags & NETRXF_data_validated)
-+		else if (rx->flags & XEN_NETRXF_data_validated)
- 			skb->ip_summed = CHECKSUM_UNNECESSARY;
- 
- 		__skb_queue_tail(&rxq, skb);
-diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
-index 8309344..cb94668 100644
---- a/include/xen/interface/io/netif.h
-+++ b/include/xen/interface/io/netif.h
-@@ -22,50 +22,50 @@
- 
- /*
-  * This is the 'wire' format for packets:
-- *  Request 1: netif_tx_request -- NETTXF_* (any flags)
-- * [Request 2: netif_tx_extra]  (only if request 1 has NETTXF_extra_info)
-- * [Request 3: netif_tx_extra]  (only if request 2 has XEN_NETIF_EXTRA_MORE)
-- *  Request 4: netif_tx_request -- NETTXF_more_data
-- *  Request 5: netif_tx_request -- NETTXF_more_data
-+ *  Request 1: xen_netif_tx_request  -- XEN_NETTXF_* (any flags)
-+ * [Request 2: xen_netif_extra_info]    (only if request 1 has XEN_NETTXF_extra_info)
-+ * [Request 3: xen_netif_extra_info]    (only if request 2 has XEN_NETIF_EXTRA_MORE)
-+ *  Request 4: xen_netif_tx_request  -- XEN_NETTXF_more_data
-+ *  Request 5: xen_netif_tx_request  -- XEN_NETTXF_more_data
-  *  ...
-- *  Request N: netif_tx_request -- 0
-+ *  Request N: xen_netif_tx_request  -- 0
-  */
- 
- /* Protocol checksum field is blank in the packet (hardware offload)? */
--#define _NETTXF_csum_blank     (0)
--#define  NETTXF_csum_blank     (1U<<_NETTXF_csum_blank)
-+#define _XEN_NETTXF_csum_blank		(0)
-+#define  XEN_NETTXF_csum_blank		(1U<<_XEN_NETTXF_csum_blank)
- 
- /* Packet data has been validated against protocol checksum. */
--#define _NETTXF_data_validated (1)
--#define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
-+#define _XEN_NETTXF_data_validated	(1)
-+#define  XEN_NETTXF_data_validated	(1U<<_XEN_NETTXF_data_validated)
- 
- /* Packet continues in the next request descriptor. */
--#define _NETTXF_more_data      (2)
--#define  NETTXF_more_data      (1U<<_NETTXF_more_data)
-+#define _XEN_NETTXF_more_data		(2)
-+#define  XEN_NETTXF_more_data		(1U<<_XEN_NETTXF_more_data)
- 
- /* Packet to be followed by extra descriptor(s). */
--#define _NETTXF_extra_info     (3)
--#define  NETTXF_extra_info     (1U<<_NETTXF_extra_info)
-+#define _XEN_NETTXF_extra_info		(3)
-+#define  XEN_NETTXF_extra_info		(1U<<_XEN_NETTXF_extra_info)
- 
- struct xen_netif_tx_request {
-     grant_ref_t gref;      /* Reference to buffer page */
-     uint16_t offset;       /* Offset within buffer page */
--    uint16_t flags;        /* NETTXF_* */
-+    uint16_t flags;        /* XEN_NETTXF_* */
-     uint16_t id;           /* Echoed in response message. */
-     uint16_t size;         /* Packet size in bytes.       */
- };
- 
--/* Types of netif_extra_info descriptors. */
--#define XEN_NETIF_EXTRA_TYPE_NONE  (0)  /* Never used - invalid */
--#define XEN_NETIF_EXTRA_TYPE_GSO   (1)  /* u.gso */
--#define XEN_NETIF_EXTRA_TYPE_MAX   (2)
-+/* Types of xen_netif_extra_info descriptors. */
-+#define XEN_NETIF_EXTRA_TYPE_NONE	(0)  /* Never used - invalid */
-+#define XEN_NETIF_EXTRA_TYPE_GSO	(1)  /* u.gso */
-+#define XEN_NETIF_EXTRA_TYPE_MAX	(2)
- 
--/* netif_extra_info flags. */
--#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
--#define XEN_NETIF_EXTRA_FLAG_MORE  (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
-+/* xen_netif_extra_info flags. */
-+#define _XEN_NETIF_EXTRA_FLAG_MORE	(0)
-+#define  XEN_NETIF_EXTRA_FLAG_MORE	(1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
- 
- /* GSO types - only TCPv4 currently supported. */
--#define XEN_NETIF_GSO_TYPE_TCPV4        (1)
-+#define XEN_NETIF_GSO_TYPE_TCPV4	(1)
- 
- /*
-  * This structure needs to fit within both netif_tx_request and
-@@ -107,7 +107,7 @@ struct xen_netif_extra_info {
- 
- struct xen_netif_tx_response {
- 	uint16_t id;
--	int16_t  status;       /* NETIF_RSP_* */
-+	int16_t  status;       /* XEN_NETIF_RSP_* */
- };
- 
- struct xen_netif_rx_request {
-@@ -116,29 +116,29 @@ struct xen_netif_rx_request {
- };
- 
- /* Packet data has been validated against protocol checksum. */
--#define _NETRXF_data_validated (0)
--#define  NETRXF_data_validated (1U<<_NETRXF_data_validated)
-+#define _XEN_NETRXF_data_validated	(0)
-+#define  XEN_NETRXF_data_validated	(1U<<_XEN_NETRXF_data_validated)
- 
- /* Protocol checksum field is blank in the packet (hardware offload)? */
--#define _NETRXF_csum_blank     (1)
--#define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)
-+#define _XEN_NETRXF_csum_blank		(1)
-+#define  XEN_NETRXF_csum_blank		(1U<<_XEN_NETRXF_csum_blank)
- 
- /* Packet continues in the next request descriptor. */
--#define _NETRXF_more_data      (2)
--#define  NETRXF_more_data      (1U<<_NETRXF_more_data)
-+#define _XEN_NETRXF_more_data		(2)
-+#define  XEN_NETRXF_more_data		(1U<<_XEN_NETRXF_more_data)
- 
- /* Packet to be followed by extra descriptor(s). */
--#define _NETRXF_extra_info     (3)
--#define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
-+#define _XEN_NETRXF_extra_info		(3)
-+#define  XEN_NETRXF_extra_info		(1U<<_XEN_NETRXF_extra_info)
- 
- /* GSO Prefix descriptor. */
--#define _NETRXF_gso_prefix     (4)
--#define  NETRXF_gso_prefix     (1U<<_NETRXF_gso_prefix)
-+#define _XEN_NETRXF_gso_prefix		(4)
-+#define  XEN_NETRXF_gso_prefix		(1U<<_XEN_NETRXF_gso_prefix)
- 
- struct xen_netif_rx_response {
-     uint16_t id;
-     uint16_t offset;       /* Offset in page of start of received packet  */
--    uint16_t flags;        /* NETRXF_* */
-+    uint16_t flags;        /* XEN_NETRXF_* */
-     int16_t  status;       /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
- };
- 
-@@ -153,10 +153,10 @@ DEFINE_RING_TYPES(xen_netif_rx,
- 		  struct xen_netif_rx_request,
- 		  struct xen_netif_rx_response);
- 
--#define NETIF_RSP_DROPPED         -2
--#define NETIF_RSP_ERROR           -1
--#define NETIF_RSP_OKAY             0
--/* No response: used for auxiliary requests (e.g., netif_tx_extra). */
--#define NETIF_RSP_NULL             1
-+#define XEN_NETIF_RSP_DROPPED	-2
-+#define XEN_NETIF_RSP_ERROR	-1
-+#define XEN_NETIF_RSP_OKAY	 0
-+/* No response: used for auxiliary requests (e.g., xen_netif_extra_info). */
-+#define XEN_NETIF_RSP_NULL	 1
- 
- #endif
--- 
-1.7.4
-
-
-From 16d8ace0b7af72fb9e355eee165251e1f9b511e1 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 1 Feb 2011 16:50:24 +0000
-Subject: [PATCH 090/203] xen: netif: improve Kconfig help text for front- and backend drivers.
-
-Signed-off-by: Ian Campell <ian.campbell at citrix.com>
----
- drivers/net/Kconfig |   35 +++++++++++++++++++++++++++--------
- 1 files changed, 27 insertions(+), 8 deletions(-)
-
-diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
-index 5b088f5..1c77e183 100644
---- a/drivers/net/Kconfig
-+++ b/drivers/net/Kconfig
-@@ -2963,19 +2963,38 @@ config XEN_NETDEV_FRONTEND
- 	select XEN_XENBUS_FRONTEND
- 	default y
- 	help
--	  The network device frontend driver allows the kernel to
--	  access network devices exported exported by a virtual
--	  machine containing a physical network device driver. The
--	  frontend driver is intended for unprivileged guest domains;
--	  if you are compiling a kernel for a Xen guest, you almost
--	  certainly want to enable this.
-+	  This driver provides support for Xen paravirtual network
-+	  devices exported by a Xen network driver domain (often
-+	  domain 0).
-+
-+	  The corresponding Linux backend driver is enabled by the
-+	  CONFIG_XEN_NETDEV_BACKEND option.
-+
-+	  If you are compiling a kernel for use as Xen guest, you
-+	  should say Y here. To compile this driver as a module, chose
-+	  M here: the module will be called xen-netfront.
- 
- config XEN_NETDEV_BACKEND
- 	tristate "Xen backend network device"
- 	depends on XEN_BACKEND
- 	help
--	  Implement the network backend driver, which passes packets
--	  from the guest domain's frontend drivers to the network.
-+	  This driver allows the kernel to act as a Xen network driver
-+	  domain which exports paravirtual network devices to other
-+	  Xen domains. These devices can be accessed by any operating
-+	  system that implements a compatible front end.
-+
-+	  The corresponding Linux frontend driver is enabled by the
-+	  CONFIG_XEN_NETDEV_FRONTEND configuration option.
-+
-+	  The backend driver presents a standard network device
-+	  endpoint for each paravirtual network device to the driver
-+	  domain network stack. These can then be bridged or routed
-+	  etc in order to provide full network connectivity.
-+
-+	  If you are compiling a kernel to run in a Xen network driver
-+	  domain (often this is domain 0) you should say Y here. To
-+	  compile this driver as a module, chose M here: the module
-+	  will be called xen-netback.
- 
- config ISERIES_VETH
- 	tristate "iSeries Virtual Ethernet driver support"
--- 
-1.7.4
-
-
-From 54f3edcf01cab556198efc7d576d9feb4b836569 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Wed, 19 Jan 2011 14:41:55 +0000
-Subject: [PATCH 091/203] xen: netback: drop ethtool drvinfo callback
-
-The default provided by the network core is sufficient for our needs.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/interface.c |    9 ---------
- 1 files changed, 0 insertions(+), 9 deletions(-)
-
-diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
-index c41d93e..28b0832 100644
---- a/drivers/net/xen-netback/interface.c
-+++ b/drivers/net/xen-netback/interface.c
-@@ -170,13 +170,6 @@ static int netbk_set_tso(struct net_device *dev, u32 data)
- 	return 0;
- }
- 
--static void netbk_get_drvinfo(struct net_device *dev,
--			      struct ethtool_drvinfo *info)
--{
--	strcpy(info->driver, "netbk");
--	strcpy(info->bus_info, dev_name(dev->dev.parent));
--}
--
- static const struct netif_stat {
- 	char name[ETH_GSTRING_LEN];
- 	u16 offset;
-@@ -225,8 +218,6 @@ static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
- }
- 
- static struct ethtool_ops network_ethtool_ops = {
--	.get_drvinfo = netbk_get_drvinfo,
--
- 	.get_tx_csum = ethtool_op_get_tx_csum,
- 	.set_tx_csum = netbk_set_tx_csum,
- 	.get_sg = ethtool_op_get_sg,
--- 
-1.7.4
-
-
-From 02f5acfcf4ad6c982c7eb0c6fd361749ad9140cd Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Thu, 20 Jan 2011 16:24:28 +0000
-Subject: [PATCH 092/203] xen: netback: use xen_netbk prefix where appropriate
-
-Do not use net_ of netif_ since these belong to the network core.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/netback.c |   84 ++++++++++++++++++------------------
- 1 files changed, 42 insertions(+), 42 deletions(-)
-
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index 69b4535..6ed4459 100644
---- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -50,7 +50,7 @@
- struct xen_netbk *xen_netbk;
- int xen_netbk_group_nr;
- 
--static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
-+static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
- static void make_tx_response(struct xen_netif *netif,
- 			     struct xen_netif_tx_request *txp,
- 			     s8       st);
-@@ -61,9 +61,9 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
- 					     u16      size,
- 					     u16      flags);
- 
--static void net_tx_action(unsigned long data);
-+static void xen_netbk_tx_action(unsigned long data);
- 
--static void net_rx_action(unsigned long data);
-+static void xen_netbk_rx_action(unsigned long data);
- 
- static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
- 				       unsigned int idx)
-@@ -78,8 +78,8 @@ static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
- }
- 
- /* extra field used in struct page */
--static inline void netif_set_page_ext(struct page *pg,
--				      unsigned int group, unsigned int idx)
-+static inline void set_page_ext(struct page *pg,
-+				unsigned int group, unsigned int idx)
- {
- 	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
- 
-@@ -87,8 +87,8 @@ static inline void netif_set_page_ext(struct page *pg,
- 	pg->mapping = ext.mapping;
- }
- 
--static int netif_get_page_ext(struct page *pg,
--			      unsigned int *pgroup, unsigned int *pidx)
-+static int get_page_ext(struct page *pg,
-+			unsigned int *pgroup, unsigned int *pidx)
- {
- 	union page_ext ext = { .mapping = pg->mapping };
- 	struct xen_netbk *netbk;
-@@ -445,11 +445,11 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- 	struct gnttab_copy *copy_gop;
- 	struct netbk_rx_meta *meta;
- 	/*
--	 * These variables a used iff netif_get_page_ext returns true,
-+	 * These variables a used iff get_page_ext returns true,
- 	 * in which case they are guaranteed to be initialized.
- 	 */
- 	unsigned int uninitialized_var(group), uninitialized_var(idx);
--	int foreign = netif_get_page_ext(page, &group, &idx);
-+	int foreign = get_page_ext(page, &group, &idx);
- 	unsigned long bytes;
- 
- 	/* Data must not cross a page boundary. */
-@@ -627,7 +627,7 @@ struct skb_cb_overlay {
- 	int meta_slots_used;
- };
- 
--static void net_rx_action(unsigned long data)
-+static void xen_netbk_rx_action(unsigned long data)
- {
- 	struct xen_netif *netif = NULL;
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
-@@ -915,7 +915,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
- 	do {
- 		dp = netbk->dealloc_prod;
- 
--		/* Ensure we see all indices enqueued by netif_idx_release(). */
-+		/* Ensure we see all indices enqueued by xen_netbk_idx_release(). */
- 		smp_rmb();
- 
- 		while (dc != dp) {
-@@ -1061,11 +1061,11 @@ static int netbk_count_requests(struct xen_netif *netif,
- 	return frags;
- }
- 
--static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
--						       struct xen_netif *netif,
--						       struct sk_buff *skb,
--						       struct xen_netif_tx_request *txp,
--						       struct gnttab_map_grant_ref *mop)
-+static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *netbk,
-+							   struct xen_netif *netif,
-+							   struct sk_buff *skb,
-+							   struct xen_netif_tx_request *txp,
-+							   struct gnttab_map_grant_ref *mop)
- {
- 	struct skb_shared_info *shinfo = skb_shinfo(skb);
- 	skb_frag_t *frags = shinfo->frags;
-@@ -1096,9 +1096,9 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
- 	return mop;
- }
- 
--static int netbk_tx_check_mop(struct xen_netbk *netbk,
--			      struct sk_buff *skb,
--			      struct gnttab_map_grant_ref **mopp)
-+static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
-+				  struct sk_buff *skb,
-+				  struct gnttab_map_grant_ref **mopp)
- {
- 	struct gnttab_map_grant_ref *mop = *mopp;
- 	int pending_idx = *((u16 *)skb->data);
-@@ -1145,7 +1145,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
- 			netbk->grant_tx_handle[pending_idx] = mop->handle;
- 			/* Had a previous error? Invalidate this fragment. */
- 			if (unlikely(err))
--				netif_idx_release(netbk, pending_idx);
-+				xen_netbk_idx_release(netbk, pending_idx);
- 			continue;
- 		}
- 
-@@ -1162,10 +1162,10 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
- 
- 		/* First error: invalidate header and preceding fragments. */
- 		pending_idx = *((u16 *)skb->data);
--		netif_idx_release(netbk, pending_idx);
-+		xen_netbk_idx_release(netbk, pending_idx);
- 		for (j = start; j < i; j++) {
- 			pending_idx = (unsigned long)shinfo->frags[i].page;
--			netif_idx_release(netbk, pending_idx);
-+			xen_netbk_idx_release(netbk, pending_idx);
- 		}
- 
- 		/* Remember the error: invalidate all subsequent fragments. */
-@@ -1176,7 +1176,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
- 	return err;
- }
- 
--static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
-+static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
- {
- 	struct skb_shared_info *shinfo = skb_shinfo(skb);
- 	int nr_frags = shinfo->nr_frags;
-@@ -1359,7 +1359,7 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
- 	return false;
- }
- 
--static unsigned net_tx_build_mops(struct xen_netbk *netbk)
-+static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- {
- 	struct gnttab_map_grant_ref *mop;
- 	struct sk_buff *skb;
-@@ -1493,7 +1493,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
- 
- 		netbk->pending_cons++;
- 
--		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
-+		mop = xen_netbk_get_requests(netbk, netif, skb, txfrags, mop);
- 
- 		netif->tx.req_cons = idx;
- 		netif_schedule_work(netif);
-@@ -1505,7 +1505,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
- 	return mop - netbk->tx_map_ops;
- }
- 
--static void net_tx_submit(struct xen_netbk *netbk)
-+static void xen_netbk_tx_submit(struct xen_netbk *netbk)
- {
- 	struct gnttab_map_grant_ref *mop;
- 	struct sk_buff *skb;
-@@ -1522,7 +1522,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 		txp = &netbk->pending_tx_info[pending_idx].req;
- 
- 		/* Check the remap error code. */
--		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
-+		if (unlikely(xen_netbk_tx_check_mop(netbk, skb, &mop))) {
- 			pr_debug("netback grant failed.\n");
- 			skb_shinfo(skb)->nr_frags = 0;
- 			kfree_skb(skb);
-@@ -1539,7 +1539,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 			txp->size -= data_len;
- 		} else {
- 			/* Schedule a response immediately. */
--			netif_idx_release(netbk, pending_idx);
-+			xen_netbk_idx_release(netbk, pending_idx);
- 		}
- 
- 		if (txp->flags & XEN_NETTXF_csum_blank)
-@@ -1547,7 +1547,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
- 		else if (txp->flags & XEN_NETTXF_data_validated)
- 			skb->ip_summed = CHECKSUM_UNNECESSARY;
- 
--		netbk_fill_frags(netbk, skb);
-+		xen_netbk_fill_frags(netbk, skb);
- 
- 		/*
- 		 * If the initial fragment was < PKT_PROT_LEN then
-@@ -1584,7 +1584,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
- }
- 
- /* Called after netfront has transmitted */
--static void net_tx_action(unsigned long data)
-+static void xen_netbk_tx_action(unsigned long data)
- {
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	unsigned nr_mops;
-@@ -1592,7 +1592,7 @@ static void net_tx_action(unsigned long data)
- 
- 	net_tx_action_dealloc(netbk);
- 
--	nr_mops = net_tx_build_mops(netbk);
-+	nr_mops = xen_netbk_tx_build_mops(netbk);
- 
- 	if (nr_mops == 0)
- 		goto out;
-@@ -1601,7 +1601,7 @@ static void net_tx_action(unsigned long data)
- 					netbk->tx_map_ops, nr_mops);
- 	BUG_ON(ret);
- 
--	net_tx_submit(netbk);
-+	xen_netbk_tx_submit(netbk);
- out:
- 	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
- 	    !list_empty(&netbk->pending_inuse_head)) {
-@@ -1614,7 +1614,7 @@ out:
- 	}
- }
- 
--static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
-+static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
- {
- 	static DEFINE_SPINLOCK(_lock);
- 	unsigned long flags;
-@@ -1634,12 +1634,12 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
- static void netif_page_release(struct page *page, unsigned int order)
- {
- 	unsigned int group, idx;
--	int foreign = netif_get_page_ext(page, &group, &idx);
-+	int foreign = get_page_ext(page, &group, &idx);
- 
- 	BUG_ON(!foreign);
- 	BUG_ON(order);
- 
--	netif_idx_release(&xen_netbk[group], idx);
-+	xen_netbk_idx_release(&xen_netbk[group], idx);
- }
- 
- irqreturn_t netif_be_int(int irq, void *dev_id)
-@@ -1726,7 +1726,7 @@ static inline int tx_work_todo(struct xen_netbk *netbk)
- 	return 0;
- }
- 
--static int netbk_action_thread(void *data)
-+static int xen_netbk_action_thread(void *data)
- {
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	while (!kthread_should_stop()) {
-@@ -1740,10 +1740,10 @@ static int netbk_action_thread(void *data)
- 			break;
- 
- 		if (rx_work_todo(netbk))
--			net_rx_action((unsigned long)netbk);
-+			xen_netbk_rx_action((unsigned long)netbk);
- 
- 		if (tx_work_todo(netbk))
--			net_tx_action((unsigned long)netbk);
-+			xen_netbk_tx_action((unsigned long)netbk);
- 	}
- 
- 	return 0;
-@@ -1794,7 +1794,7 @@ static int __init netback_init(void)
- 		for (i = 0; i < MAX_PENDING_REQS; i++) {
- 			page = netbk->mmap_pages[i];
- 			SetPageForeign(page, netif_page_release);
--			netif_set_page_ext(page, group, i);
-+			set_page_ext(page, group, i);
- 			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
- 		}
- 
-@@ -1806,7 +1806,7 @@ static int __init netback_init(void)
- 		if (MODPARM_netback_kthread) {
- 			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
- 			netbk->kthread.task =
--				kthread_create(netbk_action_thread,
-+				kthread_create(xen_netbk_action_thread,
- 					       (void *)netbk,
- 					       "netback/%u", group);
- 
-@@ -1824,10 +1824,10 @@ static int __init netback_init(void)
- 			}
- 		} else {
- 			tasklet_init(&netbk->tasklet.net_tx_tasklet,
--				     net_tx_action,
-+				     xen_netbk_tx_action,
- 				     (unsigned long)netbk);
- 			tasklet_init(&netbk->tasklet.net_rx_tasklet,
--				     net_rx_action,
-+				     xen_netbk_rx_action,
- 				     (unsigned long)netbk);
- 		}
- 
--- 
-1.7.4
-
-
-From 0feb757d74b864a7d8d00fb57a8c523bd3081d42 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Thu, 20 Jan 2011 17:00:54 +0000
-Subject: [PATCH 093/203] xen: netback: refactor to make all xen_netbk knowledge internal to netback.c
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/common.h    |   95 +---------------------------
- drivers/net/xen-netback/interface.c |   28 +--------
- drivers/net/xen-netback/netback.c   |  117 ++++++++++++++++++++++++++++++++++-
- 3 files changed, 120 insertions(+), 120 deletions(-)
-
-diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
-index 2d727a0..32a9e4f 100644
---- a/drivers/net/xen-netback/common.h
-+++ b/drivers/net/xen-netback/common.h
-@@ -177,97 +177,8 @@ static inline int netbk_can_sg(struct net_device *dev)
- 	return netif->can_sg;
- }
- 
--struct pending_tx_info {
--	struct xen_netif_tx_request req;
--	struct xen_netif *netif;
--};
--typedef unsigned int pending_ring_idx_t;
--
--struct netbk_rx_meta {
--	int id;
--	int size;
--	int gso_size;
--};
--
--struct netbk_tx_pending_inuse {
--	struct list_head list;
--	unsigned long alloc_time;
--};
--
--#define MAX_PENDING_REQS 256
--
--#define MAX_BUFFER_OFFSET PAGE_SIZE
--
--/* extra field used in struct page */
--union page_ext {
--	struct {
--#if BITS_PER_LONG < 64
--#define IDX_WIDTH   8
--#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
--		unsigned int group:GROUP_WIDTH;
--		unsigned int idx:IDX_WIDTH;
--#else
--		unsigned int group, idx;
--#endif
--	} e;
--	void *mapping;
--};
--
--struct xen_netbk {
--	union {
--		struct {
--			struct tasklet_struct net_tx_tasklet;
--			struct tasklet_struct net_rx_tasklet;
--		} tasklet;
--
--		struct {
--			wait_queue_head_t netbk_action_wq;
--			struct task_struct *task;
--		} kthread;
--	};
--
--	struct sk_buff_head rx_queue;
--	struct sk_buff_head tx_queue;
--
--	struct timer_list net_timer;
--	struct timer_list netbk_tx_pending_timer;
--
--	struct page **mmap_pages;
--
--	pending_ring_idx_t pending_prod;
--	pending_ring_idx_t pending_cons;
--	pending_ring_idx_t dealloc_prod;
--	pending_ring_idx_t dealloc_cons;
--
--	struct list_head pending_inuse_head;
--	struct list_head net_schedule_list;
--
--	/* Protect the net_schedule_list in netif. */
--	spinlock_t net_schedule_list_lock;
--
--	atomic_t netfront_count;
--
--	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
--	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
--	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
--	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
--
--	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
--	u16 pending_ring[MAX_PENDING_REQS];
--	u16 dealloc_ring[MAX_PENDING_REQS];
--
--	/*
--	 * Each head or fragment can be up to 4096 bytes. Given
--	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
--	 * head/fragment uses 2 copy operation.
--	 */
--	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
--	unsigned char rx_notify[NR_IRQS];
--	u16 notify_list[NET_RX_RING_SIZE];
--	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
--};
--
--extern struct xen_netbk *xen_netbk;
--extern int xen_netbk_group_nr;
-+/* (De)Register a netif with the netback backend. */
-+void xen_netbk_add_netif(struct xen_netif *netif);
-+void xen_netbk_remove_netif(struct xen_netif *netif);
- 
- #endif /* __XEN_NETBACK__COMMON_H__ */
-diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
-index 28b0832..54ae275 100644
---- a/drivers/net/xen-netback/interface.c
-+++ b/drivers/net/xen-netback/interface.c
-@@ -38,33 +38,9 @@
- 
- #define NETBK_QUEUE_LENGTH 32
- 
--static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
--			   struct xen_netif *netif)
--{
--	int i;
--	int min_netfront_count;
--	int min_group = 0;
--	min_netfront_count = atomic_read(&netbk[0].netfront_count);
--	for (i = 0; i < group_nr; i++) {
--		int netfront_count = atomic_read(&netbk[i].netfront_count);
--		if (netfront_count < min_netfront_count) {
--			min_group = i;
--			min_netfront_count = netfront_count;
--		}
--	}
--
--	netif->group = min_group;
--	atomic_inc(&netbk[netif->group].netfront_count);
--}
--
--static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
--{
--	atomic_dec(&netbk[netif->group].netfront_count);
--}
--
- static void __netif_up(struct xen_netif *netif)
- {
--	netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
-+	xen_netbk_add_netif(netif);
- 	enable_irq(netif->irq);
- 	netif_schedule_work(netif);
- }
-@@ -73,7 +49,7 @@ static void __netif_down(struct xen_netif *netif)
- {
- 	disable_irq(netif->irq);
- 	netif_deschedule_work(netif);
--	netbk_remove_netif(xen_netbk, netif);
-+	xen_netbk_remove_netif(netif);
- }
- 
- static int net_open(struct net_device *dev)
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index 6ed4459..d10ddbc 100644
---- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -47,8 +47,121 @@
- #include <asm/xen/hypercall.h>
- #include <asm/xen/page.h>
- 
--struct xen_netbk *xen_netbk;
--int xen_netbk_group_nr;
-+struct pending_tx_info {
-+	struct xen_netif_tx_request req;
-+	struct xen_netif *netif;
-+};
-+typedef unsigned int pending_ring_idx_t;
-+
-+struct netbk_rx_meta {
-+	int id;
-+	int size;
-+	int gso_size;
-+};
-+
-+struct netbk_tx_pending_inuse {
-+	struct list_head list;
-+	unsigned long alloc_time;
-+};
-+
-+#define MAX_PENDING_REQS 256
-+
-+#define MAX_BUFFER_OFFSET PAGE_SIZE
-+
-+/* extra field used in struct page */
-+union page_ext {
-+	struct {
-+#if BITS_PER_LONG < 64
-+#define IDX_WIDTH   8
-+#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
-+		unsigned int group:GROUP_WIDTH;
-+		unsigned int idx:IDX_WIDTH;
-+#else
-+		unsigned int group, idx;
-+#endif
-+	} e;
-+	void *mapping;
-+};
-+
-+struct xen_netbk {
-+	union {
-+		struct {
-+			struct tasklet_struct net_tx_tasklet;
-+			struct tasklet_struct net_rx_tasklet;
-+		} tasklet;
-+
-+		struct {
-+			wait_queue_head_t netbk_action_wq;
-+			struct task_struct *task;
-+		} kthread;
-+	};
-+
-+	struct sk_buff_head rx_queue;
-+	struct sk_buff_head tx_queue;
-+
-+	struct timer_list net_timer;
-+	struct timer_list netbk_tx_pending_timer;
-+
-+	struct page **mmap_pages;
-+
-+	pending_ring_idx_t pending_prod;
-+	pending_ring_idx_t pending_cons;
-+	pending_ring_idx_t dealloc_prod;
-+	pending_ring_idx_t dealloc_cons;
-+
-+	struct list_head pending_inuse_head;
-+	struct list_head net_schedule_list;
-+
-+	/* Protect the net_schedule_list in netif. */
-+	spinlock_t net_schedule_list_lock;
-+
-+	atomic_t netfront_count;
-+
-+	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
-+	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
-+	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
-+	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
-+
-+	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-+	u16 pending_ring[MAX_PENDING_REQS];
-+	u16 dealloc_ring[MAX_PENDING_REQS];
-+
-+	/*
-+	 * Each head or fragment can be up to 4096 bytes. Given
-+	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
-+	 * head/fragment uses 2 copy operation.
-+	 */
-+	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
-+	unsigned char rx_notify[NR_IRQS];
-+	u16 notify_list[NET_RX_RING_SIZE];
-+	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
-+};
-+
-+static struct xen_netbk *xen_netbk;
-+static int xen_netbk_group_nr;
-+
-+void xen_netbk_add_netif(struct xen_netif *netif)
-+{
-+	int i;
-+	int min_netfront_count;
-+	int min_group = 0;
-+	min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);
-+	for (i = 0; i < xen_netbk_group_nr; i++) {
-+		int netfront_count = atomic_read(&xen_netbk[i].netfront_count);
-+		if (netfront_count < min_netfront_count) {
-+			min_group = i;
-+			min_netfront_count = netfront_count;
-+		}
-+	}
-+
-+	netif->group = min_group;
-+	atomic_inc(&xen_netbk[netif->group].netfront_count);
-+}
-+
-+void xen_netbk_remove_netif(struct xen_netif *netif)
-+{
-+	atomic_dec(&xen_netbk[netif->group].netfront_count);
-+}
- 
- static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
- static void make_tx_response(struct xen_netif *netif,
--- 
-1.7.4
-
-
-From 651ae17d2aaedb2adca37c7b7c9eda195c3cad6b Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Thu, 20 Jan 2011 17:08:23 +0000
-Subject: [PATCH 094/203] xen: netback: use xenvif_ prefix where appropriate
-
-Avoids use of netif_ prefix which belongs to the networking core.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/common.h    |   65 +++---
- drivers/net/xen-netback/interface.c |  316 ++++++++++++++--------------
- drivers/net/xen-netback/netback.c   |  394 +++++++++++++++++------------------
- drivers/net/xen-netback/xenbus.c    |   72 +++----
- 4 files changed, 422 insertions(+), 425 deletions(-)
-
-diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
-index 32a9e4f..f6da94b 100644
---- a/drivers/net/xen-netback/common.h
-+++ b/drivers/net/xen-netback/common.h
-@@ -46,7 +46,7 @@
- #include <xen/grant_table.h>
- #include <xen/xenbus.h>
- 
--struct xen_netif {
-+struct xenvif {
- 	/* Unique identifier for this interface. */
- 	domid_t          domid;
- 	int              group;
-@@ -79,7 +79,7 @@ struct xen_netif {
- 	/* Internal feature information. */
- 	u8 can_queue:1;	    /* can queue packets for receiver? */
- 
--	/* Allow netif_be_start_xmit() to peek ahead in the rx request
-+	/* Allow xenvif_start_xmit() to peek ahead in the rx request
- 	 * ring.  This is a prediction of what rx_req_cons will be once
- 	 * all queued skbs are put on the ring. */
- 	RING_IDX rx_req_cons_peek;
-@@ -111,9 +111,9 @@ struct xen_netif {
-  * immediately be called, which can cause packet loss; also the etherbridge
-  * can be rather lazy in activating its port).
-  */
--#define netback_carrier_on(netif)	((netif)->carrier = 1)
--#define netback_carrier_off(netif)	((netif)->carrier = 0)
--#define netback_carrier_ok(netif)	((netif)->carrier)
-+#define xenvif_carrier_on(xenvif)	((xenvif)->carrier = 1)
-+#define xenvif_carrier_off(xenvif)	((xenvif)->carrier = 0)
-+#define xenvif_carrier_ok(xenvif)	((xenvif)->carrier)
- 
- enum {
- 	NETBK_DONT_COPY_SKB,
-@@ -125,7 +125,7 @@ extern int netbk_copy_skb_mode;
- 
- struct backend_info {
- 	struct xenbus_device *dev;
--	struct xen_netif *netif;
-+	struct xenvif *vif;
- 	enum xenbus_state frontend_state;
- 	struct xenbus_watch hotplug_status_watch;
- 	int have_hotplug_status_watch:1;
-@@ -134,51 +134,52 @@ struct backend_info {
- #define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
- #define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
- 
--void netif_disconnect(struct xen_netif *netif);
-+void xenvif_disconnect(struct xenvif *vif);
- 
--void netif_set_features(struct xen_netif *netif);
--struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
--			      unsigned int handle);
--int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
--	      unsigned long rx_ring_ref, unsigned int evtchn);
-+void xenvif_set_features(struct xenvif *vif);
-+struct xenvif *xenvif_alloc(struct device *parent,
-+			    domid_t domid,
-+			    unsigned int handle);
-+int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
-+		   unsigned long rx_ring_ref, unsigned int evtchn);
- 
--static inline void netif_get(struct xen_netif *netif)
-+static inline void xenvif_get(struct xenvif *vif)
- {
--	atomic_inc(&netif->refcnt);
-+	atomic_inc(&vif->refcnt);
- }
- 
--static inline void  netif_put(struct xen_netif *netif)
-+static inline void xenvif_put(struct xenvif *vif)
- {
--	if (atomic_dec_and_test(&netif->refcnt))
--		wake_up(&netif->waiting_to_free);
-+	if (atomic_dec_and_test(&vif->refcnt))
-+		wake_up(&vif->waiting_to_free);
- }
- 
--int netif_xenbus_init(void);
-+int xenvif_xenbus_init(void);
- 
--#define netif_schedulable(netif)				\
--	(netif_running((netif)->dev) && netback_carrier_ok(netif))
-+#define xenvif_schedulable(vif)	\
-+	(netif_running((vif)->dev) && xenvif_carrier_ok(vif))
- 
--void netif_schedule_work(struct xen_netif *netif);
--void netif_deschedule_work(struct xen_netif *netif);
-+void xenvif_schedule_work(struct xenvif *vif);
-+void xenvif_deschedule_work(struct xenvif *vif);
- 
--int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
--struct net_device_stats *netif_be_get_stats(struct net_device *dev);
--irqreturn_t netif_be_int(int irq, void *dev_id);
-+int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev);
-+struct net_device_stats *xenvif_get_stats(struct net_device *dev);
-+irqreturn_t xenvif_interrupt(int irq, void *dev_id);
- 
- static inline int netbk_can_queue(struct net_device *dev)
- {
--	struct xen_netif *netif = netdev_priv(dev);
--	return netif->can_queue;
-+	struct xenvif *vif = netdev_priv(dev);
-+	return vif->can_queue;
- }
- 
- static inline int netbk_can_sg(struct net_device *dev)
- {
--	struct xen_netif *netif = netdev_priv(dev);
--	return netif->can_sg;
-+	struct xenvif *vif = netdev_priv(dev);
-+	return vif->can_sg;
- }
- 
--/* (De)Register a netif with the netback backend. */
--void xen_netbk_add_netif(struct xen_netif *netif);
--void xen_netbk_remove_netif(struct xen_netif *netif);
-+/* (De)Register a xenvif with the netback backend. */
-+void xen_netbk_add_xenvif(struct xenvif *vif);
-+void xen_netbk_remove_xenvif(struct xenvif *vif);
- 
- #endif /* __XEN_NETBACK__COMMON_H__ */
-diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
-index 54ae275..a065173 100644
---- a/drivers/net/xen-netback/interface.c
-+++ b/drivers/net/xen-netback/interface.c
-@@ -38,40 +38,40 @@
- 
- #define NETBK_QUEUE_LENGTH 32
- 
--static void __netif_up(struct xen_netif *netif)
-+static void xenvif_up(struct xenvif *vif)
- {
--	xen_netbk_add_netif(netif);
--	enable_irq(netif->irq);
--	netif_schedule_work(netif);
-+	xen_netbk_add_xenvif(vif);
-+	enable_irq(vif->irq);
-+	xenvif_schedule_work(vif);
- }
- 
--static void __netif_down(struct xen_netif *netif)
-+static void xenvif_down(struct xenvif *vif)
- {
--	disable_irq(netif->irq);
--	netif_deschedule_work(netif);
--	xen_netbk_remove_netif(netif);
-+	disable_irq(vif->irq);
-+	xenvif_deschedule_work(vif);
-+	xen_netbk_remove_xenvif(vif);
- }
- 
--static int net_open(struct net_device *dev)
-+static int xenvif_open(struct net_device *dev)
- {
--	struct xen_netif *netif = netdev_priv(dev);
--	if (netback_carrier_ok(netif)) {
--		__netif_up(netif);
-+	struct xenvif *vif = netdev_priv(dev);
-+	if (xenvif_carrier_ok(vif)) {
-+		xenvif_up(vif);
- 		netif_start_queue(dev);
- 	}
- 	return 0;
- }
- 
--static int net_close(struct net_device *dev)
-+static int xenvif_close(struct net_device *dev)
- {
--	struct xen_netif *netif = netdev_priv(dev);
--	if (netback_carrier_ok(netif))
--		__netif_down(netif);
-+	struct xenvif *vif = netdev_priv(dev);
-+	if (xenvif_carrier_ok(vif))
-+		xenvif_down(vif);
- 	netif_stop_queue(dev);
- 	return 0;
- }
- 
--static int netbk_change_mtu(struct net_device *dev, int mtu)
-+static int xenvif_change_mtu(struct net_device *dev, int mtu)
- {
- 	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
- 
-@@ -81,19 +81,19 @@ static int netbk_change_mtu(struct net_device *dev, int mtu)
- 	return 0;
- }
- 
--void netif_set_features(struct xen_netif *netif)
-+void xenvif_set_features(struct xenvif *vif)
- {
--	struct net_device *dev = netif->dev;
-+	struct net_device *dev = vif->dev;
- 	int features = dev->features;
- 
--	if (netif->can_sg)
-+	if (vif->can_sg)
- 		features |= NETIF_F_SG;
--	if (netif->gso || netif->gso_prefix)
-+	if (vif->gso || vif->gso_prefix)
- 		features |= NETIF_F_TSO;
--	if (netif->csum)
-+	if (vif->csum)
- 		features |= NETIF_F_IP_CSUM;
- 
--	features &= ~(netif->features_disabled);
-+	features &= ~(vif->features_disabled);
- 
- 	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
- 		dev->mtu = ETH_DATA_LEN;
-@@ -101,130 +101,130 @@ void netif_set_features(struct xen_netif *netif)
- 	dev->features = features;
- }
- 
--static int netbk_set_tx_csum(struct net_device *dev, u32 data)
-+static int xenvif_set_tx_csum(struct net_device *dev, u32 data)
- {
--	struct xen_netif *netif = netdev_priv(dev);
-+	struct xenvif *vif = netdev_priv(dev);
- 	if (data) {
--		if (!netif->csum)
-+		if (!vif->csum)
- 			return -EOPNOTSUPP;
--		netif->features_disabled &= ~NETIF_F_IP_CSUM;
-+		vif->features_disabled &= ~NETIF_F_IP_CSUM;
- 	} else {
--		netif->features_disabled |= NETIF_F_IP_CSUM;
-+		vif->features_disabled |= NETIF_F_IP_CSUM;
- 	}
- 
--	netif_set_features(netif);
-+	xenvif_set_features(vif);
- 	return 0;
- }
- 
--static int netbk_set_sg(struct net_device *dev, u32 data)
-+static int xenvif_set_sg(struct net_device *dev, u32 data)
- {
--	struct xen_netif *netif = netdev_priv(dev);
-+	struct xenvif *vif = netdev_priv(dev);
- 	if (data) {
--		if (!netif->can_sg)
-+		if (!vif->can_sg)
- 			return -EOPNOTSUPP;
--		netif->features_disabled &= ~NETIF_F_SG;
-+		vif->features_disabled &= ~NETIF_F_SG;
- 	} else {
--		netif->features_disabled |= NETIF_F_SG;
-+		vif->features_disabled |= NETIF_F_SG;
- 	}
- 
--	netif_set_features(netif);
-+	xenvif_set_features(vif);
- 	return 0;
- }
- 
--static int netbk_set_tso(struct net_device *dev, u32 data)
-+static int xenvif_set_tso(struct net_device *dev, u32 data)
- {
--	struct xen_netif *netif = netdev_priv(dev);
-+	struct xenvif *vif = netdev_priv(dev);
- 	if (data) {
--		if (!netif->gso && !netif->gso_prefix)
-+		if (!vif->gso && !vif->gso_prefix)
- 			return -EOPNOTSUPP;
--		netif->features_disabled &= ~NETIF_F_TSO;
-+		vif->features_disabled &= ~NETIF_F_TSO;
- 	} else {
--		netif->features_disabled |= NETIF_F_TSO;
-+		vif->features_disabled |= NETIF_F_TSO;
- 	}
- 
--	netif_set_features(netif);
-+	xenvif_set_features(vif);
- 	return 0;
- }
- 
--static const struct netif_stat {
-+static const struct xenvif_stat {
- 	char name[ETH_GSTRING_LEN];
- 	u16 offset;
--} netbk_stats[] = {
-+} xenvif_stats[] = {
- 	{
- 		"copied_skbs",
--		offsetof(struct xen_netif, nr_copied_skbs)
-+		offsetof(struct xenvif, nr_copied_skbs)
- 	},
- 	{
- 		"rx_gso_checksum_fixup",
--		offsetof(struct xen_netif, rx_gso_checksum_fixup)
-+		offsetof(struct xenvif, rx_gso_checksum_fixup)
- 	},
- };
- 
--static int netbk_get_sset_count(struct net_device *dev, int string_set)
-+static int xenvif_get_sset_count(struct net_device *dev, int string_set)
- {
- 	switch (string_set) {
- 	case ETH_SS_STATS:
--		return ARRAY_SIZE(netbk_stats);
-+		return ARRAY_SIZE(xenvif_stats);
- 	default:
- 		return -EINVAL;
- 	}
- }
- 
--static void netbk_get_ethtool_stats(struct net_device *dev,
--				   struct ethtool_stats *stats, u64 * data)
-+static void xenvif_get_ethtool_stats(struct net_device *dev,
-+				     struct ethtool_stats *stats, u64 * data)
- {
--	void *netif = netdev_priv(dev);
-+	void *vif = netdev_priv(dev);
- 	int i;
- 
--	for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
--		data[i] = *(int *)(netif + netbk_stats[i].offset);
-+	for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++)
-+		data[i] = *(int *)(vif + xenvif_stats[i].offset);
- }
- 
--static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
-+static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
- {
- 	int i;
- 
- 	switch (stringset) {
- 	case ETH_SS_STATS:
--		for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
-+		for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++)
- 			memcpy(data + i * ETH_GSTRING_LEN,
--			       netbk_stats[i].name, ETH_GSTRING_LEN);
-+			       xenvif_stats[i].name, ETH_GSTRING_LEN);
- 		break;
- 	}
- }
- 
--static struct ethtool_ops network_ethtool_ops = {
--	.get_tx_csum = ethtool_op_get_tx_csum,
--	.set_tx_csum = netbk_set_tx_csum,
--	.get_sg = ethtool_op_get_sg,
--	.set_sg = netbk_set_sg,
--	.get_tso = ethtool_op_get_tso,
--	.set_tso = netbk_set_tso,
--	.get_link = ethtool_op_get_link,
--
--	.get_sset_count = netbk_get_sset_count,
--	.get_ethtool_stats = netbk_get_ethtool_stats,
--	.get_strings = netbk_get_strings,
-+static struct ethtool_ops xenvif_ethtool_ops = {
-+	.get_tx_csum	= ethtool_op_get_tx_csum,
-+	.set_tx_csum	= xenvif_set_tx_csum,
-+	.get_sg		= ethtool_op_get_sg,
-+	.set_sg		= xenvif_set_sg,
-+	.get_tso	= ethtool_op_get_tso,
-+	.set_tso	= xenvif_set_tso,
-+	.get_link	= ethtool_op_get_link,
-+
-+	.get_sset_count = xenvif_get_sset_count,
-+	.get_ethtool_stats = xenvif_get_ethtool_stats,
-+	.get_strings = xenvif_get_strings,
- };
- 
--static struct net_device_ops netback_ops = {
--	.ndo_start_xmit	= netif_be_start_xmit,
--	.ndo_get_stats	= netif_be_get_stats,
--	.ndo_open	= net_open,
--	.ndo_stop	= net_close,
--	.ndo_change_mtu	= netbk_change_mtu,
-+static struct net_device_ops xenvif_netdev_ops = {
-+	.ndo_start_xmit	= xenvif_start_xmit,
-+	.ndo_get_stats	= xenvif_get_stats,
-+	.ndo_open	= xenvif_open,
-+	.ndo_stop	= xenvif_close,
-+	.ndo_change_mtu	= xenvif_change_mtu,
- };
- 
--struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
--			      unsigned int handle)
-+struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
-+			    unsigned int handle)
- {
- 	int err = 0;
- 	struct net_device *dev;
--	struct xen_netif *netif;
-+	struct xenvif *vif;
- 	char name[IFNAMSIZ] = {};
- 
- 	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
--	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
-+	dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup);
- 	if (dev == NULL) {
- 		pr_debug("Could not allocate netdev\n");
- 		return ERR_PTR(-ENOMEM);
-@@ -232,29 +232,29 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
- 
- 	SET_NETDEV_DEV(dev, parent);
- 
--	netif = netdev_priv(dev);
--	memset(netif, 0, sizeof(*netif));
--	netif->domid  = domid;
--	netif->group  = -1;
--	netif->handle = handle;
--	netif->can_sg = 1;
--	netif->csum = 1;
--	atomic_set(&netif->refcnt, 1);
--	init_waitqueue_head(&netif->waiting_to_free);
--	netif->dev = dev;
--	INIT_LIST_HEAD(&netif->list);
--
--	netback_carrier_off(netif);
--
--	netif->credit_bytes = netif->remaining_credit = ~0UL;
--	netif->credit_usec  = 0UL;
--	init_timer(&netif->credit_timeout);
-+	vif = netdev_priv(dev);
-+	memset(vif, 0, sizeof(*vif));
-+	vif->domid  = domid;
-+	vif->group  = -1;
-+	vif->handle = handle;
-+	vif->can_sg = 1;
-+	vif->csum = 1;
-+	atomic_set(&vif->refcnt, 1);
-+	init_waitqueue_head(&vif->waiting_to_free);
-+	vif->dev = dev;
-+	INIT_LIST_HEAD(&vif->list);
-+
-+	xenvif_carrier_off(vif);
-+
-+	vif->credit_bytes = vif->remaining_credit = ~0UL;
-+	vif->credit_usec  = 0UL;
-+	init_timer(&vif->credit_timeout);
- 	/* Initialize 'expires' now: it's used to track the credit window. */
--	netif->credit_timeout.expires = jiffies;
-+	vif->credit_timeout.expires = jiffies;
- 
--	dev->netdev_ops	= &netback_ops;
--	netif_set_features(netif);
--	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
-+	dev->netdev_ops	= &xenvif_netdev_ops;
-+	xenvif_set_features(vif);
-+	SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);
- 
- 	dev->tx_queue_len = NETBK_QUEUE_LENGTH;
- 
-@@ -277,18 +277,18 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
- 		return ERR_PTR(err);
- 	}
- 
--	pr_debug("Successfully created netif\n");
--	return netif;
-+	pr_debug("Successfully created xenvif\n");
-+	return vif;
- }
- 
--static int map_frontend_pages(struct xen_netif *netif,
-+static int map_frontend_pages(struct xenvif *vif,
- 			      grant_ref_t tx_ring_ref,
- 			      grant_ref_t rx_ring_ref)
- {
- 	struct gnttab_map_grant_ref op;
- 
--	gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
--			  GNTMAP_host_map, tx_ring_ref, netif->domid);
-+	gnttab_set_map_op(&op, (unsigned long)vif->tx_comms_area->addr,
-+			  GNTMAP_host_map, tx_ring_ref, vif->domid);
- 
- 	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
- 		BUG();
-@@ -298,11 +298,11 @@ static int map_frontend_pages(struct xen_netif *netif,
- 		return op.status;
- 	}
- 
--	netif->tx_shmem_ref    = tx_ring_ref;
--	netif->tx_shmem_handle = op.handle;
-+	vif->tx_shmem_ref    = tx_ring_ref;
-+	vif->tx_shmem_handle = op.handle;
- 
--	gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
--			  GNTMAP_host_map, rx_ring_ref, netif->domid);
-+	gnttab_set_map_op(&op, (unsigned long)vif->rx_comms_area->addr,
-+			  GNTMAP_host_map, rx_ring_ref, vif->domid);
- 
- 	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
- 		BUG();
-@@ -311,120 +311,120 @@ static int map_frontend_pages(struct xen_netif *netif,
- 		struct gnttab_unmap_grant_ref unop;
- 
- 		gnttab_set_unmap_op(&unop,
--				    (unsigned long)netif->tx_comms_area->addr,
--				    GNTMAP_host_map, netif->tx_shmem_handle);
-+				    (unsigned long)vif->tx_comms_area->addr,
-+				    GNTMAP_host_map, vif->tx_shmem_handle);
- 		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
- 		pr_debug("Gnttab failure mapping rx_ring_ref!\n");
- 		return op.status;
- 	}
- 
--	netif->rx_shmem_ref    = rx_ring_ref;
--	netif->rx_shmem_handle = op.handle;
-+	vif->rx_shmem_ref    = rx_ring_ref;
-+	vif->rx_shmem_handle = op.handle;
- 
- 	return 0;
- }
- 
--static void unmap_frontend_pages(struct xen_netif *netif)
-+static void unmap_frontend_pages(struct xenvif *vif)
- {
- 	struct gnttab_unmap_grant_ref op;
- 
--	gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
--			    GNTMAP_host_map, netif->tx_shmem_handle);
-+	gnttab_set_unmap_op(&op, (unsigned long)vif->tx_comms_area->addr,
-+			    GNTMAP_host_map, vif->tx_shmem_handle);
- 
- 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
- 		BUG();
- 
--	gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
--			    GNTMAP_host_map, netif->rx_shmem_handle);
-+	gnttab_set_unmap_op(&op, (unsigned long)vif->rx_comms_area->addr,
-+			    GNTMAP_host_map, vif->rx_shmem_handle);
- 
- 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
- 		BUG();
- }
- 
--int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
--	      unsigned long rx_ring_ref, unsigned int evtchn)
-+int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
-+		   unsigned long rx_ring_ref, unsigned int evtchn)
- {
- 	int err = -ENOMEM;
- 	struct xen_netif_tx_sring *txs;
- 	struct xen_netif_rx_sring *rxs;
- 
- 	/* Already connected through? */
--	if (netif->irq)
-+	if (vif->irq)
- 		return 0;
- 
--	netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
--	if (netif->tx_comms_area == NULL)
-+	vif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
-+	if (vif->tx_comms_area == NULL)
- 		return -ENOMEM;
--	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
--	if (netif->rx_comms_area == NULL)
-+	vif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
-+	if (vif->rx_comms_area == NULL)
- 		goto err_rx;
- 
--	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
-+	err = map_frontend_pages(vif, tx_ring_ref, rx_ring_ref);
- 	if (err)
- 		goto err_map;
- 
- 	err = bind_interdomain_evtchn_to_irqhandler(
--		netif->domid, evtchn, netif_be_int, 0,
--		netif->dev->name, netif);
-+		vif->domid, evtchn, xenvif_interrupt, 0,
-+		vif->dev->name, vif);
- 	if (err < 0)
- 		goto err_hypervisor;
--	netif->irq = err;
--	disable_irq(netif->irq);
-+	vif->irq = err;
-+	disable_irq(vif->irq);
- 
--	txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
--	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
-+	txs = (struct xen_netif_tx_sring *)vif->tx_comms_area->addr;
-+	BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
- 
- 	rxs = (struct xen_netif_rx_sring *)
--		((char *)netif->rx_comms_area->addr);
--	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
-+		((char *)vif->rx_comms_area->addr);
-+	BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
- 
--	netif->rx_req_cons_peek = 0;
-+	vif->rx_req_cons_peek = 0;
- 
--	netif_get(netif);
-+	xenvif_get(vif);
- 
- 	rtnl_lock();
--	netback_carrier_on(netif);
--	if (netif_running(netif->dev))
--		__netif_up(netif);
-+	xenvif_carrier_on(vif);
-+	if (netif_running(vif->dev))
-+		xenvif_up(vif);
- 	rtnl_unlock();
- 
- 	return 0;
- err_hypervisor:
--	unmap_frontend_pages(netif);
-+	unmap_frontend_pages(vif);
- err_map:
--	free_vm_area(netif->rx_comms_area);
-+	free_vm_area(vif->rx_comms_area);
- err_rx:
--	free_vm_area(netif->tx_comms_area);
-+	free_vm_area(vif->tx_comms_area);
- 	return err;
- }
- 
--void netif_disconnect(struct xen_netif *netif)
-+void xenvif_disconnect(struct xenvif *vif)
- {
--	if (netback_carrier_ok(netif)) {
-+	if (xenvif_carrier_ok(vif)) {
- 		rtnl_lock();
--		netback_carrier_off(netif);
--		netif_carrier_off(netif->dev); /* discard queued packets */
--		if (netif_running(netif->dev))
--			__netif_down(netif);
-+		xenvif_carrier_off(vif);
-+		netif_carrier_off(vif->dev); /* discard queued packets */
-+		if (netif_running(vif->dev))
-+			xenvif_down(vif);
- 		rtnl_unlock();
--		netif_put(netif);
-+		xenvif_put(vif);
- 	}
- 
--	atomic_dec(&netif->refcnt);
--	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
-+	atomic_dec(&vif->refcnt);
-+	wait_event(vif->waiting_to_free, atomic_read(&vif->refcnt) == 0);
- 
--	del_timer_sync(&netif->credit_timeout);
-+	del_timer_sync(&vif->credit_timeout);
- 
--	if (netif->irq)
--		unbind_from_irqhandler(netif->irq, netif);
-+	if (vif->irq)
-+		unbind_from_irqhandler(vif->irq, vif);
- 
--	unregister_netdev(netif->dev);
-+	unregister_netdev(vif->dev);
- 
--	if (netif->tx.sring) {
--		unmap_frontend_pages(netif);
--		free_vm_area(netif->tx_comms_area);
--		free_vm_area(netif->rx_comms_area);
-+	if (vif->tx.sring) {
-+		unmap_frontend_pages(vif);
-+		free_vm_area(vif->tx_comms_area);
-+		free_vm_area(vif->rx_comms_area);
- 	}
- 
--	free_netdev(netif->dev);
-+	free_netdev(vif->dev);
- }
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index d10ddbc..f2f9c6f 100644
---- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -49,7 +49,7 @@
- 
- struct pending_tx_info {
- 	struct xen_netif_tx_request req;
--	struct xen_netif *netif;
-+	struct xenvif *vif;
- };
- typedef unsigned int pending_ring_idx_t;
- 
-@@ -140,7 +140,7 @@ struct xen_netbk {
- static struct xen_netbk *xen_netbk;
- static int xen_netbk_group_nr;
- 
--void xen_netbk_add_netif(struct xen_netif *netif)
-+void xen_netbk_add_xenvif(struct xenvif *vif)
- {
- 	int i;
- 	int min_netfront_count;
-@@ -154,20 +154,20 @@ void xen_netbk_add_netif(struct xen_netif *netif)
- 		}
- 	}
- 
--	netif->group = min_group;
--	atomic_inc(&xen_netbk[netif->group].netfront_count);
-+	vif->group = min_group;
-+	atomic_inc(&xen_netbk[vif->group].netfront_count);
- }
- 
--void xen_netbk_remove_netif(struct xen_netif *netif)
-+void xen_netbk_remove_xenvif(struct xenvif *vif)
- {
--	atomic_dec(&xen_netbk[netif->group].netfront_count);
-+	atomic_dec(&xen_netbk[vif->group].netfront_count);
- }
- 
- static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
--static void make_tx_response(struct xen_netif *netif,
-+static void make_tx_response(struct xenvif *vif,
- 			     struct xen_netif_tx_request *txp,
- 			     s8       st);
--static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
-+static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
- 					     u16      id,
- 					     s8       st,
- 					     u16      offset,
-@@ -369,20 +369,20 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
- 	return NULL;
- }
- 
--static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
-+static inline int xenvif_max_required_rx_slots(struct xenvif *vif)
- {
--	if (netif->can_sg || netif->gso || netif->gso_prefix)
-+	if (vif->can_sg || vif->gso || vif->gso_prefix)
- 		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
- 	return 1; /* all in one */
- }
- 
--static inline int netbk_queue_full(struct xen_netif *netif)
-+static inline int xenvif_queue_full(struct xenvif *vif)
- {
--	RING_IDX peek   = netif->rx_req_cons_peek;
--	RING_IDX needed = netbk_max_required_rx_slots(netif);
-+	RING_IDX peek   = vif->rx_req_cons_peek;
-+	RING_IDX needed = xenvif_max_required_rx_slots(vif);
- 
--	return ((netif->rx.sring->req_prod - peek) < needed) ||
--	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
-+	return ((vif->rx.sring->req_prod - peek) < needed) ||
-+	       ((vif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
- }
- 
- /*
-@@ -430,7 +430,7 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
-  * the guest. This function is essentially a dry run of
-  * netbk_gop_frag_copy.
-  */
--static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
-+static unsigned int count_skb_slots(struct sk_buff *skb, struct xenvif *vif)
- {
- 	unsigned int count = 1;
- 	int i, copy_off = 0;
-@@ -464,20 +464,20 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif
- 	return count;
- }
- 
--int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
-+int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
- {
--	struct xen_netif *netif = netdev_priv(dev);
-+	struct xenvif *vif = netdev_priv(dev);
- 	struct xen_netbk *netbk;
- 
- 	BUG_ON(skb->dev != dev);
- 
--	if (netif->group == -1)
-+	if (vif->group == -1)
- 		goto drop;
- 
--	netbk = &xen_netbk[netif->group];
-+	netbk = &xen_netbk[vif->group];
- 
- 	/* Drop the packet if the target domain has no receive buffers. */
--	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
-+	if (unlikely(!xenvif_schedulable(vif) || xenvif_queue_full(vif)))
- 		goto drop;
- 
- 	/*
-@@ -496,14 +496,14 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 	}
- 
- 	/* Reserve ring slots for the worst-case number of fragments. */
--	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
--	netif_get(netif);
-+	vif->rx_req_cons_peek += count_skb_slots(skb, vif);
-+	xenvif_get(vif);
- 
--	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
--		netif->rx.sring->req_event = netif->rx_req_cons_peek +
--			netbk_max_required_rx_slots(netif);
-+	if (netbk_can_queue(dev) && xenvif_queue_full(vif)) {
-+		vif->rx.sring->req_event = vif->rx_req_cons_peek +
-+			xenvif_max_required_rx_slots(vif);
- 		mb(); /* request notification /then/ check & stop the queue */
--		if (netbk_queue_full(netif))
-+		if (xenvif_queue_full(vif))
- 			netif_stop_queue(dev);
- 	}
- 	skb_queue_tail(&netbk->rx_queue, skb);
-@@ -513,7 +513,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 	return 0;
- 
-  drop:
--	netif->stats.tx_dropped++;
-+	vif->stats.tx_dropped++;
- 	dev_kfree_skb(skb);
- 	return 0;
- }
-@@ -527,13 +527,13 @@ struct netrx_pending_operations {
- 	grant_ref_t copy_gref;
- };
- 
--static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
-+static struct netbk_rx_meta *get_next_rx_buffer(struct xenvif *vif,
- 						struct netrx_pending_operations *npo)
- {
- 	struct netbk_rx_meta *meta;
- 	struct xen_netif_rx_request *req;
- 
--	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-+	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
- 
- 	meta = npo->meta + npo->meta_prod++;
- 	meta->gso_size = 0;
-@@ -550,7 +550,7 @@ static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
-  * Set up the grant operations for this fragment. If it's a flipping
-  * interface, we also set up the unmap request from here.
-  */
--static void netbk_gop_frag_copy(struct xen_netif *netif,
-+static void netbk_gop_frag_copy(struct xenvif *vif,
- 				struct netrx_pending_operations *npo,
- 				struct page *page, unsigned long size,
- 				unsigned long offset, int head)
-@@ -580,7 +580,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- 			 */
- 			BUG_ON(head);
- 
--			meta = get_next_rx_buffer(netif, npo);
-+			meta = get_next_rx_buffer(vif, npo);
- 		}
- 
- 		bytes = size;
-@@ -595,7 +595,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- 
- 			src_pend = &netbk->pending_tx_info[idx];
- 
--			copy_gop->source.domid = src_pend->netif->domid;
-+			copy_gop->source.domid = src_pend->vif->domid;
- 			copy_gop->source.u.ref = src_pend->req.gref;
- 			copy_gop->flags |= GNTCOPY_source_gref;
- 		} else {
-@@ -604,7 +604,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- 			copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
- 		}
- 		copy_gop->source.offset = offset;
--		copy_gop->dest.domid = netif->domid;
-+		copy_gop->dest.domid = vif->domid;
- 
- 		copy_gop->dest.offset = npo->copy_off;
- 		copy_gop->dest.u.ref = npo->copy_gref;
-@@ -634,7 +634,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
- static int netbk_gop_skb(struct sk_buff *skb,
- 			 struct netrx_pending_operations *npo)
- {
--	struct xen_netif *netif = netdev_priv(skb->dev);
-+	struct xenvif *vif = netdev_priv(skb->dev);
- 	int nr_frags = skb_shinfo(skb)->nr_frags;
- 	int i;
- 	struct xen_netif_rx_request *req;
-@@ -644,18 +644,18 @@ static int netbk_gop_skb(struct sk_buff *skb,
- 	old_meta_prod = npo->meta_prod;
- 
- 	/* Set up a GSO prefix descriptor, if necessary */
--	if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
--		req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-+	if (skb_shinfo(skb)->gso_size && vif->gso_prefix) {
-+		req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
- 		meta = npo->meta + npo->meta_prod++;
- 		meta->gso_size = skb_shinfo(skb)->gso_size;
- 		meta->size = 0;
- 		meta->id = req->id;
- 	}
- 
--	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-+	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
- 	meta = npo->meta + npo->meta_prod++;
- 
--	if (!netif->gso_prefix)
-+	if (!vif->gso_prefix)
- 		meta->gso_size = skb_shinfo(skb)->gso_size;
- 	else
- 		meta->gso_size = 0;
-@@ -665,17 +665,16 @@ static int netbk_gop_skb(struct sk_buff *skb,
- 	npo->copy_off = 0;
- 	npo->copy_gref = req->gref;
- 
--	netbk_gop_frag_copy(netif,
--			    npo, virt_to_page(skb->data),
-+	netbk_gop_frag_copy(vif, npo, virt_to_page(skb->data),
- 			    skb_headlen(skb),
- 			    offset_in_page(skb->data), 1);
- 
- 	/* Leave a gap for the GSO descriptor. */
--	if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
--		netif->rx.req_cons++;
-+	if (skb_shinfo(skb)->gso_size && !vif->gso_prefix)
-+		vif->rx.req_cons++;
- 
- 	for (i = 0; i < nr_frags; i++) {
--		netbk_gop_frag_copy(netif, npo,
-+		netbk_gop_frag_copy(vif, npo,
- 				    skb_shinfo(skb)->frags[i].page,
- 				    skb_shinfo(skb)->frags[i].size,
- 				    skb_shinfo(skb)->frags[i].page_offset,
-@@ -710,7 +709,7 @@ static int netbk_check_gop(int nr_meta_slots, domid_t domid,
- 	return status;
- }
- 
--static void netbk_add_frag_responses(struct xen_netif *netif, int status,
-+static void netbk_add_frag_responses(struct xenvif *vif, int status,
- 				     struct netbk_rx_meta *meta,
- 				     int nr_meta_slots)
- {
-@@ -731,7 +730,7 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
- 			flags = XEN_NETRXF_more_data;
- 
- 		offset = 0;
--		make_rx_response(netif, meta[i].id, status, offset,
-+		make_rx_response(vif, meta[i].id, status, offset,
- 				 meta[i].size, flags);
- 	}
- }
-@@ -742,7 +741,7 @@ struct skb_cb_overlay {
- 
- static void xen_netbk_rx_action(unsigned long data)
- {
--	struct xen_netif *netif = NULL;
-+	struct xenvif *vif = NULL;
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	s8 status;
- 	u16 irq, flags;
-@@ -766,7 +765,7 @@ static void xen_netbk_rx_action(unsigned long data)
- 	count = 0;
- 
- 	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
--		netif = netdev_priv(skb->dev);
-+		vif = netdev_priv(skb->dev);
- 		nr_frags = skb_shinfo(skb)->nr_frags;
- 
- 		sco = (struct skb_cb_overlay *)skb->cb;
-@@ -794,11 +793,11 @@ static void xen_netbk_rx_action(unsigned long data)
- 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
- 		sco = (struct skb_cb_overlay *)skb->cb;
- 
--		netif = netdev_priv(skb->dev);
-+		vif = netdev_priv(skb->dev);
- 
--		if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
--			resp = RING_GET_RESPONSE(&netif->rx,
--						netif->rx.rsp_prod_pvt++);
-+		if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
-+			resp = RING_GET_RESPONSE(&vif->rx,
-+						vif->rx.rsp_prod_pvt++);
- 
- 			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
- 
-@@ -811,11 +810,11 @@ static void xen_netbk_rx_action(unsigned long data)
- 		}
- 
- 
--		netif->stats.tx_bytes += skb->len;
--		netif->stats.tx_packets++;
-+		vif->stats.tx_bytes += skb->len;
-+		vif->stats.tx_packets++;
- 
- 		status = netbk_check_gop(sco->meta_slots_used,
--					 netif->domid, &npo);
-+					 vif->domid, &npo);
- 
- 		if (sco->meta_slots_used == 1)
- 			flags = 0;
-@@ -829,16 +828,16 @@ static void xen_netbk_rx_action(unsigned long data)
- 			flags |= XEN_NETRXF_data_validated;
- 
- 		offset = 0;
--		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
-+		resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,
- 					status, offset,
- 					netbk->meta[npo.meta_cons].size,
- 					flags);
- 
--		if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
-+		if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
- 			struct xen_netif_extra_info *gso =
- 				(struct xen_netif_extra_info *)
--				RING_GET_RESPONSE(&netif->rx,
--						  netif->rx.rsp_prod_pvt++);
-+				RING_GET_RESPONSE(&vif->rx,
-+						  vif->rx.rsp_prod_pvt++);
- 
- 			resp->flags |= XEN_NETRXF_extra_info;
- 
-@@ -851,23 +850,23 @@ static void xen_netbk_rx_action(unsigned long data)
- 			gso->flags = 0;
- 		}
- 
--		netbk_add_frag_responses(netif, status,
-+		netbk_add_frag_responses(vif, status,
- 					 netbk->meta + npo.meta_cons + 1,
- 					 sco->meta_slots_used);
- 
--		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
--		irq = netif->irq;
-+		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
-+		irq = vif->irq;
- 		if (ret && !netbk->rx_notify[irq]) {
- 			netbk->rx_notify[irq] = 1;
- 			netbk->notify_list[notify_nr++] = irq;
- 		}
- 
--		if (netif_queue_stopped(netif->dev) &&
--		    netif_schedulable(netif) &&
--		    !netbk_queue_full(netif))
--			netif_wake_queue(netif->dev);
-+		if (netif_queue_stopped(vif->dev) &&
-+		    xenvif_schedulable(vif) &&
-+		    !xenvif_queue_full(vif))
-+			netif_wake_queue(vif->dev);
- 
--		netif_put(netif);
-+		xenvif_put(vif);
- 		npo.meta_cons += sco->meta_slots_used;
- 		dev_kfree_skb(skb);
- 	}
-@@ -890,93 +889,92 @@ static void net_alarm(unsigned long data)
- 	xen_netbk_bh_handler(netbk, 1);
- }
- 
--static void netbk_tx_pending_timeout(unsigned long data)
-+static void xen_netbk_tx_pending_timeout(unsigned long data)
- {
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	xen_netbk_bh_handler(netbk, 0);
- }
- 
--struct net_device_stats *netif_be_get_stats(struct net_device *dev)
-+struct net_device_stats *xenvif_get_stats(struct net_device *dev)
- {
--	struct xen_netif *netif = netdev_priv(dev);
--	return &netif->stats;
-+	struct xenvif *vif = netdev_priv(dev);
-+	return &vif->stats;
- }
- 
--static int __on_net_schedule_list(struct xen_netif *netif)
-+static int __on_net_schedule_list(struct xenvif *vif)
- {
--	return !list_empty(&netif->list);
-+	return !list_empty(&vif->list);
- }
- 
- /* Must be called with net_schedule_list_lock held */
--static void remove_from_net_schedule_list(struct xen_netif *netif)
-+static void remove_from_net_schedule_list(struct xenvif *vif)
- {
--	if (likely(__on_net_schedule_list(netif))) {
--		list_del_init(&netif->list);
--		netif_put(netif);
-+	if (likely(__on_net_schedule_list(vif))) {
-+		list_del_init(&vif->list);
-+		xenvif_put(vif);
- 	}
- }
- 
--static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
-+static struct xenvif *poll_net_schedule_list(struct xen_netbk *netbk)
- {
--	struct xen_netif *netif = NULL;
-+	struct xenvif *vif = NULL;
- 
- 	spin_lock_irq(&netbk->net_schedule_list_lock);
- 	if (list_empty(&netbk->net_schedule_list))
- 		goto out;
- 
--	netif = list_first_entry(&netbk->net_schedule_list,
--				 struct xen_netif, list);
--	if (!netif)
-+	vif = list_first_entry(&netbk->net_schedule_list,
-+			       struct xenvif, list);
-+	if (!vif)
- 		goto out;
- 
--	netif_get(netif);
-+	xenvif_get(vif);
- 
--	remove_from_net_schedule_list(netif);
-+	remove_from_net_schedule_list(vif);
- out:
- 	spin_unlock_irq(&netbk->net_schedule_list_lock);
--	return netif;
-+	return vif;
- }
- 
--static void add_to_net_schedule_list_tail(struct xen_netif *netif)
-+static void add_to_net_schedule_list_tail(struct xenvif *vif)
- {
- 	unsigned long flags;
- 
--	struct xen_netbk *netbk = &xen_netbk[netif->group];
--	if (__on_net_schedule_list(netif))
-+	struct xen_netbk *netbk = &xen_netbk[vif->group];
-+	if (__on_net_schedule_list(vif))
- 		return;
- 
- 	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
--	if (!__on_net_schedule_list(netif) &&
--	    likely(netif_schedulable(netif))) {
--		list_add_tail(&netif->list, &netbk->net_schedule_list);
--		netif_get(netif);
-+	if (!__on_net_schedule_list(vif) &&
-+	    likely(xenvif_schedulable(vif))) {
-+		list_add_tail(&vif->list, &netbk->net_schedule_list);
-+		xenvif_get(vif);
- 	}
- 	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
- }
- 
--void netif_schedule_work(struct xen_netif *netif)
-+void xenvif_schedule_work(struct xenvif *vif)
- {
--	struct xen_netbk *netbk = &xen_netbk[netif->group];
-+	struct xen_netbk *netbk = &xen_netbk[vif->group];
- 	int more_to_do;
- 
--	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
-+	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
- 
- 	if (more_to_do) {
--		add_to_net_schedule_list_tail(netif);
-+		add_to_net_schedule_list_tail(vif);
- 		maybe_schedule_tx_action(netbk);
- 	}
- }
- 
--void netif_deschedule_work(struct xen_netif *netif)
-+void xenvif_deschedule_work(struct xenvif *vif)
- {
--	struct xen_netbk *netbk = &xen_netbk[netif->group];
-+	struct xen_netbk *netbk = &xen_netbk[vif->group];
- 	spin_lock_irq(&netbk->net_schedule_list_lock);
--	remove_from_net_schedule_list(netif);
-+	remove_from_net_schedule_list(vif);
- 	spin_unlock_irq(&netbk->net_schedule_list_lock);
- }
- 
--
--static void tx_add_credit(struct xen_netif *netif)
-+static void tx_add_credit(struct xenvif *vif)
- {
- 	unsigned long max_burst, max_credit;
- 
-@@ -984,23 +982,23 @@ static void tx_add_credit(struct xen_netif *netif)
- 	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
- 	 * Otherwise the interface can seize up due to insufficient credit.
- 	 */
--	max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
-+	max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;
- 	max_burst = min(max_burst, 131072UL);
--	max_burst = max(max_burst, netif->credit_bytes);
-+	max_burst = max(max_burst, vif->credit_bytes);
- 
- 	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
--	max_credit = netif->remaining_credit + netif->credit_bytes;
--	if (max_credit < netif->remaining_credit)
-+	max_credit = vif->remaining_credit + vif->credit_bytes;
-+	if (max_credit < vif->remaining_credit)
- 		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
- 
--	netif->remaining_credit = min(max_credit, max_burst);
-+	vif->remaining_credit = min(max_credit, max_burst);
- }
- 
- static void tx_credit_callback(unsigned long data)
- {
--	struct xen_netif *netif = (struct xen_netif *)data;
--	tx_add_credit(netif);
--	netif_schedule_work(netif);
-+	struct xenvif *vif = (struct xenvif *)data;
-+	tx_add_credit(vif);
-+	xenvif_schedule_work(vif);
- }
- 
- static inline int copy_pending_req(struct xen_netbk *netbk,
-@@ -1011,7 +1009,7 @@ static inline int copy_pending_req(struct xen_netbk *netbk,
- 			&netbk->mmap_pages[pending_idx]);
- }
- 
--static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
-+static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
- {
- 	struct netbk_tx_pending_inuse *inuse, *n;
- 	struct gnttab_unmap_grant_ref *gop;
-@@ -1110,34 +1108,34 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
- 		index = pending_index(netbk->pending_prod++);
- 		netbk->pending_ring[index] = pending_idx;
- 
--		netif_put(netif);
-+		xenvif_put(vif);
- 
- 		list_del_init(&inuse->list);
- 	}
- }
- 
--static void netbk_tx_err(struct xen_netif *netif,
--		struct xen_netif_tx_request *txp, RING_IDX end)
-+static void netbk_tx_err(struct xenvif *vif,
-+			 struct xen_netif_tx_request *txp, RING_IDX end)
- {
--	RING_IDX cons = netif->tx.req_cons;
-+	RING_IDX cons = vif->tx.req_cons;
- 
- 	do {
--		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
-+		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
- 		if (cons >= end)
- 			break;
--		txp = RING_GET_REQUEST(&netif->tx, cons++);
-+		txp = RING_GET_REQUEST(&vif->tx, cons++);
- 	} while (1);
--	netif->tx.req_cons = cons;
--	netif_schedule_work(netif);
--	netif_put(netif);
-+	vif->tx.req_cons = cons;
-+	xenvif_schedule_work(vif);
-+	xenvif_put(vif);
- }
- 
--static int netbk_count_requests(struct xen_netif *netif,
-+static int netbk_count_requests(struct xenvif *vif,
- 				struct xen_netif_tx_request *first,
- 				struct xen_netif_tx_request *txp,
- 				int work_to_do)
- {
--	RING_IDX cons = netif->tx.req_cons;
-+	RING_IDX cons = vif->tx.req_cons;
- 	int frags = 0;
- 
- 	if (!(first->flags & XEN_NETTXF_more_data))
-@@ -1154,7 +1152,7 @@ static int netbk_count_requests(struct xen_netif *netif,
- 			return -frags;
- 		}
- 
--		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
-+		memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags),
- 		       sizeof(*txp));
- 		if (txp->size > first->size) {
- 			pr_debug("Frags galore\n");
-@@ -1175,7 +1173,7 @@ static int netbk_count_requests(struct xen_netif *netif,
- }
- 
- static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *netbk,
--							   struct xen_netif *netif,
-+							   struct xenvif *vif,
- 							   struct sk_buff *skb,
- 							   struct xen_netif_tx_request *txp,
- 							   struct gnttab_map_grant_ref *mop)
-@@ -1201,8 +1199,8 @@ static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *net
- 				  txp->gref, netif->domid);
- 
- 		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
--		netif_get(netif);
--		pending_tx_info[pending_idx].netif = netif;
-+		xenvif_get(vif);
-+		pending_tx_info[pending_idx].vif = vif;
- 		frags[i].page = (void *)pending_idx;
- 	}
- 
-@@ -1216,7 +1214,7 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
- 	struct gnttab_map_grant_ref *mop = *mopp;
- 	int pending_idx = *((u16 *)skb->data);
- 	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
--	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
-+	struct xenvif *vif = pending_tx_info[pending_idx].vif;
- 	struct xen_netif_tx_request *txp;
- 	struct skb_shared_info *shinfo = skb_shinfo(skb);
- 	int nr_frags = shinfo->nr_frags;
-@@ -1228,9 +1226,9 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
- 		pending_ring_idx_t index;
- 		index = pending_index(netbk->pending_prod++);
- 		txp = &pending_tx_info[pending_idx].req;
--		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
-+		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
- 		netbk->pending_ring[index] = pending_idx;
--		netif_put(netif);
-+		xenvif_put(vif);
- 	} else {
- 		set_phys_to_machine(
- 			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
-@@ -1264,10 +1262,10 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
- 
- 		/* Error on this fragment: respond to client with an error. */
- 		txp = &netbk->pending_tx_info[pending_idx].req;
--		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
-+		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
- 		index = pending_index(netbk->pending_prod++);
- 		netbk->pending_ring[index] = pending_idx;
--		netif_put(netif);
-+		xenvif_put(vif);
- 
- 		/* Not the first error? Preceding frags already invalidated. */
- 		if (err)
-@@ -1317,12 +1315,12 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
- 	}
- }
- 
--int netbk_get_extras(struct xen_netif *netif,
-+int netbk_get_extras(struct xenvif *vif,
- 		     struct xen_netif_extra_info *extras,
- 		     int work_to_do)
- {
- 	struct xen_netif_extra_info extra;
--	RING_IDX cons = netif->tx.req_cons;
-+	RING_IDX cons = vif->tx.req_cons;
- 
- 	do {
- 		if (unlikely(work_to_do-- <= 0)) {
-@@ -1330,17 +1328,17 @@ int netbk_get_extras(struct xen_netif *netif,
- 			return -EBADR;
- 		}
- 
--		memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
-+		memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
- 		       sizeof(extra));
- 		if (unlikely(!extra.type ||
- 			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
--			netif->tx.req_cons = ++cons;
-+			vif->tx.req_cons = ++cons;
- 			pr_debug("Invalid extra type: %d\n", extra.type);
- 			return -EINVAL;
- 		}
- 
- 		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
--		netif->tx.req_cons = ++cons;
-+		vif->tx.req_cons = ++cons;
- 	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
- 
- 	return work_to_do;
-@@ -1370,7 +1368,7 @@ static int netbk_set_skb_gso(struct sk_buff *skb,
- 	return 0;
- }
- 
--static int checksum_setup(struct xen_netif *netif, struct sk_buff *skb)
-+static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
- {
- 	struct iphdr *iph;
- 	unsigned char *th;
-@@ -1384,7 +1382,7 @@ static int checksum_setup(struct xen_netif *netif, struct sk_buff *skb)
- 	 * recalculate the partial checksum.
- 	 */
- 	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
--		netif->rx_gso_checksum_fixup++;
-+		vif->rx_gso_checksum_fixup++;
- 		skb->ip_summed = CHECKSUM_PARTIAL;
- 		recalculate_partial_csum = 1;
- 	}
-@@ -1440,30 +1438,30 @@ out:
- 	return err;
- }
- 
--static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
-+static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
- {
- 	unsigned long now = jiffies;
- 	unsigned long next_credit =
--		netif->credit_timeout.expires +
--		msecs_to_jiffies(netif->credit_usec / 1000);
-+		vif->credit_timeout.expires +
-+		msecs_to_jiffies(vif->credit_usec / 1000);
- 
- 	/* Timer could already be pending in rare cases. */
--	if (timer_pending(&netif->credit_timeout))
-+	if (timer_pending(&vif->credit_timeout))
- 		return true;
- 
- 	/* Passed the point where we can replenish credit? */
- 	if (time_after_eq(now, next_credit)) {
--		netif->credit_timeout.expires = now;
--		tx_add_credit(netif);
-+		vif->credit_timeout.expires = now;
-+		tx_add_credit(vif);
- 	}
- 
- 	/* Still too big to send right now? Set a callback. */
--	if (size > netif->remaining_credit) {
--		netif->credit_timeout.data     =
--			(unsigned long)netif;
--		netif->credit_timeout.function =
-+	if (size > vif->remaining_credit) {
-+		vif->credit_timeout.data     =
-+			(unsigned long)vif;
-+		vif->credit_timeout.function =
- 			tx_credit_callback;
--		mod_timer(&netif->credit_timeout,
-+		mod_timer(&vif->credit_timeout,
- 			  next_credit);
- 
- 		return true;
-@@ -1481,7 +1479,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- 	mop = netbk->tx_map_ops;
- 	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
- 		!list_empty(&netbk->net_schedule_list)) {
--		struct xen_netif *netif;
-+		struct xenvif *vif;
- 		struct xen_netif_tx_request txreq;
- 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
- 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
-@@ -1492,53 +1490,53 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- 		pending_ring_idx_t index;
- 
- 		/* Get a netif from the list with work to do. */
--		netif = poll_net_schedule_list(netbk);
--		if (!netif)
-+		vif = poll_net_schedule_list(netbk);
-+		if (!vif)
- 			continue;
- 
--		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
-+		RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
- 		if (!work_to_do) {
--			netif_put(netif);
-+			xenvif_put(vif);
- 			continue;
- 		}
- 
--		idx = netif->tx.req_cons;
-+		idx = vif->tx.req_cons;
- 		rmb(); /* Ensure that we see the request before we copy it. */
--		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
-+		memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));
- 
- 		/* Credit-based scheduling. */
--		if (txreq.size > netif->remaining_credit &&
--		    tx_credit_exceeded(netif, txreq.size)) {
--			netif_put(netif);
-+		if (txreq.size > vif->remaining_credit &&
-+		    tx_credit_exceeded(vif, txreq.size)) {
-+			xenvif_put(vif);
- 			continue;
- 		}
- 
--		netif->remaining_credit -= txreq.size;
-+		vif->remaining_credit -= txreq.size;
- 
- 		work_to_do--;
--		netif->tx.req_cons = ++idx;
-+		vif->tx.req_cons = ++idx;
- 
- 		memset(extras, 0, sizeof(extras));
- 		if (txreq.flags & XEN_NETTXF_extra_info) {
--			work_to_do = netbk_get_extras(netif, extras,
-+			work_to_do = netbk_get_extras(vif, extras,
- 						      work_to_do);
--			idx = netif->tx.req_cons;
-+			idx = vif->tx.req_cons;
- 			if (unlikely(work_to_do < 0)) {
--				netbk_tx_err(netif, &txreq, idx);
-+				netbk_tx_err(vif, &txreq, idx);
- 				continue;
- 			}
- 		}
- 
--		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
-+		ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);
- 		if (unlikely(ret < 0)) {
--			netbk_tx_err(netif, &txreq, idx - ret);
-+			netbk_tx_err(vif, &txreq, idx - ret);
- 			continue;
- 		}
- 		idx += ret;
- 
- 		if (unlikely(txreq.size < ETH_HLEN)) {
- 			pr_debug("Bad packet size: %d\n", txreq.size);
--			netbk_tx_err(netif, &txreq, idx);
-+			netbk_tx_err(vif, &txreq, idx);
- 			continue;
- 		}
- 
-@@ -1547,7 +1545,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- 			pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
- 				 txreq.offset, txreq.size,
- 				 (txreq.offset&~PAGE_MASK) + txreq.size);
--			netbk_tx_err(netif, &txreq, idx);
-+			netbk_tx_err(vif, &txreq, idx);
- 			continue;
- 		}
- 
-@@ -1562,7 +1560,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- 				GFP_ATOMIC | __GFP_NOWARN);
- 		if (unlikely(skb == NULL)) {
- 			pr_debug("Can't allocate a skb in start_xmit.\n");
--			netbk_tx_err(netif, &txreq, idx);
-+			netbk_tx_err(vif, &txreq, idx);
- 			break;
- 		}
- 
-@@ -1575,7 +1573,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- 
- 			if (netbk_set_skb_gso(skb, gso)) {
- 				kfree_skb(skb);
--				netbk_tx_err(netif, &txreq, idx);
-+				netbk_tx_err(vif, &txreq, idx);
- 				continue;
- 			}
- 		}
-@@ -1587,7 +1585,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- 
- 		memcpy(&netbk->pending_tx_info[pending_idx].req,
- 		       &txreq, sizeof(txreq));
--		netbk->pending_tx_info[pending_idx].netif = netif;
-+		netbk->pending_tx_info[pending_idx].vif = vif;
- 		*((u16 *)skb->data) = pending_idx;
- 
- 		__skb_put(skb, data_len);
-@@ -1608,8 +1606,8 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- 
- 		mop = xen_netbk_get_requests(netbk, netif, skb, txfrags, mop);
- 
--		netif->tx.req_cons = idx;
--		netif_schedule_work(netif);
-+		vif->tx.req_cons = idx;
-+		xenvif_schedule_work(vif);
- 
- 		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
- 			break;
-@@ -1626,12 +1624,12 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
- 	mop = netbk->tx_map_ops;
- 	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
- 		struct xen_netif_tx_request *txp;
--		struct xen_netif *netif;
-+		struct xenvif *vif;
- 		u16 pending_idx;
- 		unsigned data_len;
- 
- 		pending_idx = *((u16 *)skb->data);
--		netif = netbk->pending_tx_info[pending_idx].netif;
-+		vif = netbk->pending_tx_info[pending_idx].vif;
- 		txp = &netbk->pending_tx_info[pending_idx].req;
- 
- 		/* Check the remap error code. */
-@@ -1672,10 +1670,10 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
- 			__pskb_pull_tail(skb, target - skb_headlen(skb));
- 		}
- 
--		skb->dev      = netif->dev;
-+		skb->dev      = vif->dev;
- 		skb->protocol = eth_type_trans(skb, skb->dev);
- 
--		if (checksum_setup(netif, skb)) {
-+		if (checksum_setup(vif, skb)) {
- 			pr_debug("Can't setup checksum in net_tx_action\n");
- 			kfree_skb(skb);
- 			continue;
-@@ -1688,11 +1686,11 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
- 			continue;
- 		}
- 
--		netif->stats.rx_bytes += skb->len;
--		netif->stats.rx_packets++;
-+		vif->stats.rx_bytes += skb->len;
-+		vif->stats.rx_packets++;
- 
- 		netif_rx_ni(skb);
--		netif->dev->last_rx = jiffies;
-+		vif->dev->last_rx = jiffies;
- 	}
- }
- 
-@@ -1755,57 +1753,57 @@ static void netif_page_release(struct page *page, unsigned int order)
- 	xen_netbk_idx_release(&xen_netbk[group], idx);
- }
- 
--irqreturn_t netif_be_int(int irq, void *dev_id)
-+irqreturn_t xenvif_interrupt(int irq, void *dev_id)
- {
--	struct xen_netif *netif = dev_id;
-+	struct xenvif *vif = dev_id;
- 	struct xen_netbk *netbk;
- 
--	if (netif->group == -1)
-+	if (vif->group == -1)
- 		return IRQ_NONE;
- 
--	netbk = &xen_netbk[netif->group];
-+	netbk = &xen_netbk[vif->group];
- 
--	add_to_net_schedule_list_tail(netif);
-+	add_to_net_schedule_list_tail(vif);
- 	maybe_schedule_tx_action(netbk);
- 
--	if (netif_schedulable(netif) && !netbk_queue_full(netif))
--		netif_wake_queue(netif->dev);
-+	if (xenvif_schedulable(vif) && !xenvif_queue_full(vif))
-+		netif_wake_queue(vif->dev);
- 
- 	return IRQ_HANDLED;
- }
- 
--static void make_tx_response(struct xen_netif *netif,
-+static void make_tx_response(struct xenvif *vif,
- 			     struct xen_netif_tx_request *txp,
- 			     s8       st)
- {
--	RING_IDX i = netif->tx.rsp_prod_pvt;
-+	RING_IDX i = vif->tx.rsp_prod_pvt;
- 	struct xen_netif_tx_response *resp;
- 	int notify;
- 
--	resp = RING_GET_RESPONSE(&netif->tx, i);
-+	resp = RING_GET_RESPONSE(&vif->tx, i);
- 	resp->id     = txp->id;
- 	resp->status = st;
- 
- 	if (txp->flags & XEN_NETTXF_extra_info)
--		RING_GET_RESPONSE(&netif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
-+		RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
- 
--	netif->tx.rsp_prod_pvt = ++i;
--	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
-+	vif->tx.rsp_prod_pvt = ++i;
-+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
- 	if (notify)
--		notify_remote_via_irq(netif->irq);
-+		notify_remote_via_irq(vif->irq);
- }
- 
--static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
-+static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
- 					     u16      id,
- 					     s8       st,
- 					     u16      offset,
- 					     u16      size,
- 					     u16      flags)
- {
--	RING_IDX i = netif->rx.rsp_prod_pvt;
-+	RING_IDX i = vif->rx.rsp_prod_pvt;
- 	struct xen_netif_rx_response *resp;
- 
--	resp = RING_GET_RESPONSE(&netif->rx, i);
-+	resp = RING_GET_RESPONSE(&vif->rx, i);
- 	resp->offset     = offset;
- 	resp->flags      = flags;
- 	resp->id         = id;
-@@ -1813,7 +1811,7 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
- 	if (st < 0)
- 		resp->status = (s16)st;
- 
--	netif->rx.rsp_prod_pvt = ++i;
-+	vif->rx.rsp_prod_pvt = ++i;
- 
- 	return resp;
- }
-@@ -1964,7 +1962,7 @@ static int __init netback_init(void)
- 			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
- 	}
- 
--	rc = netif_xenbus_init();
-+	rc = xenvif_xenbus_init();
- 	if (rc)
- 		goto failed_init;
- 
-diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
-index 867dc25..a6ad259 100644
---- a/drivers/net/xen-netback/xenbus.c
-+++ b/drivers/net/xen-netback/xenbus.c
-@@ -23,7 +23,7 @@
- 
- static int connect_rings(struct backend_info *);
- static void connect(struct backend_info *);
--static void backend_create_netif(struct backend_info *be);
-+static void backend_create_xenvif(struct backend_info *be);
- static void unregister_hotplug_status_watch(struct backend_info *be);
- 
- static int netback_remove(struct xenbus_device *dev)
-@@ -31,11 +31,11 @@ static int netback_remove(struct xenbus_device *dev)
- 	struct backend_info *be = dev_get_drvdata(&dev->dev);
- 
- 	unregister_hotplug_status_watch(be);
--	if (be->netif) {
-+	if (be->vif) {
- 		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
- 		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
--		netif_disconnect(be->netif);
--		be->netif = NULL;
-+		xenvif_disconnect(be->vif);
-+		be->vif = NULL;
- 	}
- 	kfree(be);
- 	dev_set_drvdata(&dev->dev, NULL);
-@@ -121,7 +121,7 @@ static int netback_probe(struct xenbus_device *dev,
- 		goto fail;
- 
- 	/* This kicks hotplug scripts, so do it immediately. */
--	backend_create_netif(be);
-+	backend_create_xenvif(be);
- 
- 	return 0;
- 
-@@ -159,20 +159,20 @@ static int netback_uevent(struct xenbus_device *xdev,
- 		kfree(val);
- 	}
- 
--	if (!be || !be->netif)
-+	if (!be || !be->vif)
- 		return 0;
- 
--	return add_uevent_var(env, "vif=%s", be->netif->dev->name);
-+	return add_uevent_var(env, "vif=%s", be->vif->dev->name);
- }
- 
- 
--static void backend_create_netif(struct backend_info *be)
-+static void backend_create_xenvif(struct backend_info *be)
- {
- 	int err;
- 	long handle;
- 	struct xenbus_device *dev = be->dev;
- 
--	if (be->netif != NULL)
-+	if (be->vif != NULL)
- 		return;
- 
- 	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
-@@ -181,10 +181,10 @@ static void backend_create_netif(struct backend_info *be)
- 		return;
- 	}
- 
--	be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
--	if (IS_ERR(be->netif)) {
--		err = PTR_ERR(be->netif);
--		be->netif = NULL;
-+	be->vif = xenvif_alloc(&dev->dev, dev->otherend_id, handle);
-+	if (IS_ERR(be->vif)) {
-+		err = PTR_ERR(be->vif);
-+		be->vif = NULL;
- 		xenbus_dev_fatal(dev, err, "creating interface");
- 		return;
- 	}
-@@ -197,10 +197,10 @@ static void disconnect_backend(struct xenbus_device *dev)
- {
- 	struct backend_info *be = dev_get_drvdata(&dev->dev);
- 
--	if (be->netif) {
-+	if (be->vif) {
- 		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
--		netif_disconnect(be->netif);
--		be->netif = NULL;
-+		xenvif_disconnect(be->vif);
-+		be->vif = NULL;
- 	}
- }
- 
-@@ -231,13 +231,13 @@ static void frontend_changed(struct xenbus_device *dev,
- 	case XenbusStateConnected:
- 		if (dev->state == XenbusStateConnected)
- 			break;
--		backend_create_netif(be);
--		if (be->netif)
-+		backend_create_xenvif(be);
-+		if (be->vif)
- 			connect(be);
- 		break;
- 
- 	case XenbusStateClosing:
--		if (be->netif)
-+		if (be->vif)
- 			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
- 		disconnect_backend(dev);
- 		xenbus_switch_state(dev, XenbusStateClosing);
-@@ -357,15 +357,15 @@ static void connect(struct backend_info *be)
- 	if (err)
- 		return;
- 
--	err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
-+	err = xen_net_read_mac(dev, be->vif->fe_dev_addr);
- 	if (err) {
- 		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
- 		return;
- 	}
- 
--	xen_net_read_rate(dev, &be->netif->credit_bytes,
--			  &be->netif->credit_usec);
--	be->netif->remaining_credit = be->netif->credit_bytes;
-+	xen_net_read_rate(dev, &be->vif->credit_bytes,
-+			  &be->vif->credit_usec);
-+	be->vif->remaining_credit = be->vif->credit_bytes;
- 
- 	unregister_hotplug_status_watch(be);
- 	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
-@@ -378,13 +378,13 @@ static void connect(struct backend_info *be)
- 		be->have_hotplug_status_watch = 1;
- 	}
- 
--	netif_wake_queue(be->netif->dev);
-+	netif_wake_queue(be->vif->dev);
- }
- 
- 
- static int connect_rings(struct backend_info *be)
- {
--	struct xen_netif *netif = be->netif;
-+	struct xenvif *vif = be->vif;
- 	struct xenbus_device *dev = be->dev;
- 	unsigned long tx_ring_ref, rx_ring_ref;
- 	unsigned int evtchn, rx_copy;
-@@ -416,42 +416,42 @@ static int connect_rings(struct backend_info *be)
- 	if (!rx_copy)
- 		return -EOPNOTSUPP;
- 
--	if (netif->dev->tx_queue_len != 0) {
-+	if (vif->dev->tx_queue_len != 0) {
- 		if (xenbus_scanf(XBT_NIL, dev->otherend,
- 				 "feature-rx-notify", "%d", &val) < 0)
- 			val = 0;
- 		if (val)
--			netif->can_queue = 1;
-+			vif->can_queue = 1;
- 		else
- 			/* Must be non-zero for pfifo_fast to work. */
--			netif->dev->tx_queue_len = 1;
-+			vif->dev->tx_queue_len = 1;
- 	}
- 
- 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
- 			 "%d", &val) < 0)
- 		val = 0;
--	netif->can_sg = !!val;
-+	vif->can_sg = !!val;
- 
- 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
- 			 "%d", &val) < 0)
- 		val = 0;
--	netif->gso = !!val;
-+	vif->gso = !!val;
- 
- 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
- 			 "%d", &val) < 0)
- 		val = 0;
--	netif->gso_prefix = !!val;
-+	vif->gso_prefix = !!val;
- 
- 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
- 			 "%d", &val) < 0)
- 		val = 0;
--	netif->csum = !val;
-+	vif->csum = !val;
- 
- 	/* Set dev->features */
--	netif_set_features(netif);
-+	xenvif_set_features(vif);
- 
- 	/* Map the shared frame, irq etc. */
--	err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
-+	err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, evtchn);
- 	if (err) {
- 		xenbus_dev_fatal(dev, err,
- 				 "mapping shared-frames %lu/%lu port %u",
-@@ -481,9 +481,7 @@ static struct xenbus_driver netback = {
- 	.otherend_changed = frontend_changed,
- };
- 
--
--int netif_xenbus_init(void)
-+int xenvif_xenbus_init(void)
- {
--	printk(KERN_CRIT "registering netback\n");
- 	return xenbus_register_backend(&netback);
- }
--- 
-1.7.4
-
-
-From ffb0eb7c4e5115ac17621cb83cf614ba026975df Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 21 Jan 2011 09:42:35 +0000
-Subject: [PATCH 095/203] xen: netback: add reference from xenvif to xen_netbk
-
-Rather than storing the group id simply store a pointer (opaque to xenvif).
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/common.h    |    6 +++++-
- drivers/net/xen-netback/interface.c |    2 +-
- drivers/net/xen-netback/netback.c   |   26 ++++++++++++++------------
- 3 files changed, 20 insertions(+), 14 deletions(-)
-
-diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
-index f6da94b..ebe93fb 100644
---- a/drivers/net/xen-netback/common.h
-+++ b/drivers/net/xen-netback/common.h
-@@ -46,12 +46,16 @@
- #include <xen/grant_table.h>
- #include <xen/xenbus.h>
- 
-+struct xen_netbk;
-+
- struct xenvif {
- 	/* Unique identifier for this interface. */
- 	domid_t          domid;
--	int              group;
- 	unsigned int     handle;
- 
-+	/* */
-+	struct xen_netbk *netbk;
-+
- 	u8               fe_dev_addr[6];
- 
- 	/* Physical parameters of the comms window. */
-diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
-index a065173..cfbb3cc 100644
---- a/drivers/net/xen-netback/interface.c
-+++ b/drivers/net/xen-netback/interface.c
-@@ -235,8 +235,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
- 	vif = netdev_priv(dev);
- 	memset(vif, 0, sizeof(*vif));
- 	vif->domid  = domid;
--	vif->group  = -1;
- 	vif->handle = handle;
-+	vif->netbk  = NULL;
- 	vif->can_sg = 1;
- 	vif->csum = 1;
- 	atomic_set(&vif->refcnt, 1);
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index f2f9c6f..4f04d2d 100644
---- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -145,6 +145,8 @@ void xen_netbk_add_xenvif(struct xenvif *vif)
- 	int i;
- 	int min_netfront_count;
- 	int min_group = 0;
-+	struct xen_netbk *netbk;
-+
- 	min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);
- 	for (i = 0; i < xen_netbk_group_nr; i++) {
- 		int netfront_count = atomic_read(&xen_netbk[i].netfront_count);
-@@ -154,13 +156,15 @@ void xen_netbk_add_xenvif(struct xenvif *vif)
- 		}
- 	}
- 
--	vif->group = min_group;
--	atomic_inc(&xen_netbk[vif->group].netfront_count);
-+	netbk = &xen_netbk[min_group];
-+
-+	vif->netbk = netbk;
-+	atomic_inc(&netbk->netfront_count);
- }
- 
- void xen_netbk_remove_xenvif(struct xenvif *vif)
- {
--	atomic_dec(&xen_netbk[vif->group].netfront_count);
-+	atomic_dec(&vif->netbk->netfront_count);
- }
- 
- static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
-@@ -471,10 +475,10 @@ int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 
- 	BUG_ON(skb->dev != dev);
- 
--	if (vif->group == -1)
-+	if (vif->netbk == NULL)
- 		goto drop;
- 
--	netbk = &xen_netbk[vif->group];
-+	netbk = vif->netbk;
- 
- 	/* Drop the packet if the target domain has no receive buffers. */
- 	if (unlikely(!xenvif_schedulable(vif) || xenvif_queue_full(vif)))
-@@ -940,7 +944,7 @@ static void add_to_net_schedule_list_tail(struct xenvif *vif)
- {
- 	unsigned long flags;
- 
--	struct xen_netbk *netbk = &xen_netbk[vif->group];
-+	struct xen_netbk *netbk = vif->netbk;
- 	if (__on_net_schedule_list(vif))
- 		return;
- 
-@@ -955,7 +959,7 @@ static void add_to_net_schedule_list_tail(struct xenvif *vif)
- 
- void xenvif_schedule_work(struct xenvif *vif)
- {
--	struct xen_netbk *netbk = &xen_netbk[vif->group];
-+	struct xen_netbk *netbk = vif->netbk;
- 	int more_to_do;
- 
- 	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
-@@ -968,7 +972,7 @@ void xenvif_schedule_work(struct xenvif *vif)
- 
- void xenvif_deschedule_work(struct xenvif *vif)
- {
--	struct xen_netbk *netbk = &xen_netbk[vif->group];
-+	struct xen_netbk *netbk = vif->netbk;
- 	spin_lock_irq(&netbk->net_schedule_list_lock);
- 	remove_from_net_schedule_list(vif);
- 	spin_unlock_irq(&netbk->net_schedule_list_lock);
-@@ -1756,13 +1760,11 @@ static void netif_page_release(struct page *page, unsigned int order)
- irqreturn_t xenvif_interrupt(int irq, void *dev_id)
- {
- 	struct xenvif *vif = dev_id;
--	struct xen_netbk *netbk;
-+	struct xen_netbk *netbk = vif->netbk;
- 
--	if (vif->group == -1)
-+	if (netbk == NULL)
- 		return IRQ_NONE;
- 
--	netbk = &xen_netbk[vif->group];
--
- 	add_to_net_schedule_list_tail(vif);
- 	maybe_schedule_tx_action(netbk);
- 
--- 
-1.7.4
-
-
-From 0cf3cc23560821e8e0b95a1400374a7ff1248e52 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 21 Jan 2011 10:45:25 +0000
-Subject: [PATCH 096/203] xen: netback: refactor to separate network device from worker pools
-
-The netback worker pool code is in netback.c and uses "struct xen_netbk" and
-xen_netbk_*.
-
-The network dfeivce interface is in interface.c and uses "struct xen_vif" and
-xenvif_*.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/common.h    |   69 +++-------
- drivers/net/xen-netback/interface.c |  227 ++++++++++++++++++++++++++++++-
- drivers/net/xen-netback/netback.c   |  259 ++++++-----------------------------
- drivers/net/xen-netback/xenbus.c    |   11 +-
- 4 files changed, 289 insertions(+), 277 deletions(-)
-
-diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
-index ebe93fb..b998a27 100644
---- a/drivers/net/xen-netback/common.h
-+++ b/drivers/net/xen-netback/common.h
-@@ -109,16 +109,6 @@ struct xenvif {
- 	wait_queue_head_t waiting_to_free;
- };
- 
--/*
-- * Implement our own carrier flag: the network stack's version causes delays
-- * when the carrier is re-enabled (in particular, dev_activate() may not
-- * immediately be called, which can cause packet loss; also the etherbridge
-- * can be rather lazy in activating its port).
-- */
--#define xenvif_carrier_on(xenvif)	((xenvif)->carrier = 1)
--#define xenvif_carrier_off(xenvif)	((xenvif)->carrier = 0)
--#define xenvif_carrier_ok(xenvif)	((xenvif)->carrier)
--
- enum {
- 	NETBK_DONT_COPY_SKB,
- 	NETBK_DELAYED_COPY_SKB,
-@@ -127,63 +117,40 @@ enum {
- 
- extern int netbk_copy_skb_mode;
- 
--struct backend_info {
--	struct xenbus_device *dev;
--	struct xenvif *vif;
--	enum xenbus_state frontend_state;
--	struct xenbus_watch hotplug_status_watch;
--	int have_hotplug_status_watch:1;
--};
--
--#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
--#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
--
--void xenvif_disconnect(struct xenvif *vif);
-+#define XEN_NETIF_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
-+#define XEN_NETIF_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
- 
--void xenvif_set_features(struct xenvif *vif);
- struct xenvif *xenvif_alloc(struct device *parent,
- 			    domid_t domid,
- 			    unsigned int handle);
-+
- int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
- 		   unsigned long rx_ring_ref, unsigned int evtchn);
-+void xenvif_disconnect(struct xenvif *vif);
- 
--static inline void xenvif_get(struct xenvif *vif)
--{
--	atomic_inc(&vif->refcnt);
--}
--
--static inline void xenvif_put(struct xenvif *vif)
--{
--	if (atomic_dec_and_test(&vif->refcnt))
--		wake_up(&vif->waiting_to_free);
--}
-+void xenvif_get(struct xenvif *vif);
-+void xenvif_put(struct xenvif *vif);
- 
- int xenvif_xenbus_init(void);
- 
--#define xenvif_schedulable(vif)	\
--	(netif_running((vif)->dev) && xenvif_carrier_ok(vif))
-+int xenvif_schedulable(struct xenvif *vif);
- 
- void xenvif_schedule_work(struct xenvif *vif);
--void xenvif_deschedule_work(struct xenvif *vif);
--
--int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev);
--struct net_device_stats *xenvif_get_stats(struct net_device *dev);
--irqreturn_t xenvif_interrupt(int irq, void *dev_id);
--
--static inline int netbk_can_queue(struct net_device *dev)
--{
--	struct xenvif *vif = netdev_priv(dev);
--	return vif->can_queue;
--}
- 
--static inline int netbk_can_sg(struct net_device *dev)
--{
--	struct xenvif *vif = netdev_priv(dev);
--	return vif->can_sg;
--}
-+int xenvif_queue_full(struct xenvif *vif);
- 
- /* (De)Register a xenvif with the netback backend. */
- void xen_netbk_add_xenvif(struct xenvif *vif);
- void xen_netbk_remove_xenvif(struct xenvif *vif);
- 
-+/* */
-+void xen_netbk_schedule_xenvif(struct xenvif *vif);
-+void xen_netbk_deschedule_xenfif(struct xenvif *vif);
-+
-+/* */
-+unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);
-+
-+/* */
-+void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
-+
- #endif /* __XEN_NETBACK__COMMON_H__ */
-diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
-index cfbb3cc..c906c79 100644
---- a/drivers/net/xen-netback/interface.c
-+++ b/drivers/net/xen-netback/interface.c
-@@ -36,7 +36,221 @@
- #include <xen/events.h>
- #include <asm/xen/hypercall.h>
- 
--#define NETBK_QUEUE_LENGTH 32
-+#define XENVIF_QUEUE_LENGTH 32
-+
-+void xenvif_get(struct xenvif *vif)
-+{
-+	atomic_inc(&vif->refcnt);
-+}
-+
-+void xenvif_put(struct xenvif *vif)
-+{
-+	if (atomic_dec_and_test(&vif->refcnt))
-+		wake_up(&vif->waiting_to_free);
-+}
-+
-+static int xenvif_max_required_rx_slots(struct xenvif *vif)
-+{
-+	if (vif->can_sg || vif->gso || vif->gso_prefix)
-+		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
-+	return 1; /* all in one */
-+}
-+
-+int xenvif_queue_full(struct xenvif *vif)
-+{
-+	RING_IDX peek   = vif->rx_req_cons_peek;
-+	RING_IDX needed = xenvif_max_required_rx_slots(vif);
-+
-+	return ((vif->rx.sring->req_prod - peek) < needed) ||
-+	       ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);
-+}
-+
-+/*
-+ * Implement our own carrier flag: the network stack's version causes delays
-+ * when the carrier is re-enabled (in particular, dev_activate() may not
-+ * immediately be called, which can cause packet loss; also the etherbridge
-+ * can be rather lazy in activating its port).
-+ */
-+static void xenvif_carrier_on(struct xenvif *vif)
-+{
-+	vif->carrier = 1;
-+}
-+static void xenvif_carrier_off(struct xenvif *vif)
-+{
-+	vif->carrier = 0;
-+}
-+static int xenvif_carrier_ok(struct xenvif *vif)
-+{
-+	return vif->carrier;
-+}
-+
-+int xenvif_schedulable(struct xenvif *vif)
-+{
-+	return netif_running(vif->dev) && xenvif_carrier_ok(vif);
-+}
-+
-+static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
-+{
-+	struct xenvif *vif = dev_id;
-+
-+	if (vif->netbk == NULL)
-+		return IRQ_NONE;
-+
-+	xen_netbk_schedule_xenvif(vif);
-+
-+	if (xenvif_schedulable(vif) && !xenvif_queue_full(vif))
-+		netif_wake_queue(vif->dev);
-+
-+	return IRQ_HANDLED;
-+}
-+
-+/* TODO: move to networking core */
-+static struct sk_buff *xenvif_copy_skb(struct sk_buff *skb)
-+{
-+	struct skb_shared_info *ninfo;
-+	struct sk_buff *nskb;
-+	unsigned long offset;
-+	int ret;
-+	int len;
-+	int headlen;
-+
-+	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
-+
-+	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
-+	if (unlikely(!nskb))
-+		goto err;
-+
-+	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
-+	headlen = skb_end_pointer(nskb) - nskb->data;
-+	if (headlen > skb_headlen(skb))
-+		headlen = skb_headlen(skb);
-+	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
-+	BUG_ON(ret);
-+
-+	ninfo = skb_shinfo(nskb);
-+	ninfo->gso_size = skb_shinfo(skb)->gso_size;
-+	ninfo->gso_type = skb_shinfo(skb)->gso_type;
-+
-+	offset = headlen;
-+	len = skb->len - headlen;
-+
-+	nskb->len = skb->len;
-+	nskb->data_len = len;
-+	nskb->truesize += len;
-+
-+	while (len) {
-+		struct page *page;
-+		int copy;
-+		int zero;
-+
-+		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
-+			dump_stack();
-+			goto err_free;
-+		}
-+
-+		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
-+		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
-+
-+		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
-+		if (unlikely(!page))
-+			goto err_free;
-+
-+		ret = skb_copy_bits(skb, offset, page_address(page), copy);
-+		BUG_ON(ret);
-+
-+		ninfo->frags[ninfo->nr_frags].page = page;
-+		ninfo->frags[ninfo->nr_frags].page_offset = 0;
-+		ninfo->frags[ninfo->nr_frags].size = copy;
-+		ninfo->nr_frags++;
-+
-+		offset += copy;
-+		len -= copy;
-+	}
-+
-+#ifdef NET_SKBUFF_DATA_USES_OFFSET
-+	offset = 0;
-+#else
-+	offset = nskb->data - skb->data;
-+#endif
-+
-+	nskb->transport_header = skb->transport_header + offset;
-+	nskb->network_header = skb->network_header + offset;
-+	nskb->mac_header = skb->mac_header + offset;
-+
-+	return nskb;
-+
-+ err_free:
-+	kfree_skb(nskb);
-+ err:
-+	return NULL;
-+}
-+
-+static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
-+{
-+	struct xenvif *vif = netdev_priv(dev);
-+
-+	BUG_ON(skb->dev != dev);
-+
-+	if (vif->netbk == NULL)
-+		goto drop;
-+
-+	/* Drop the packet if the target domain has no receive buffers. */
-+	if (unlikely(!xenvif_schedulable(vif) || xenvif_queue_full(vif)))
-+		goto drop;
-+
-+	/*
-+	 * XXX For now we also copy skbuffs whose head crosses a page
-+	 * boundary, because netbk_gop_skb can't handle them.
-+	 */
-+	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
-+		struct sk_buff *nskb = xenvif_copy_skb(skb);
-+		if (unlikely(nskb == NULL))
-+			goto drop;
-+		/* Copy only the header fields we use in this driver. */
-+		nskb->dev = skb->dev;
-+		nskb->ip_summed = skb->ip_summed;
-+		dev_kfree_skb(skb);
-+		skb = nskb;
-+	}
-+
-+	/* Reserve ring slots for the worst-case number of fragments. */
-+	vif->rx_req_cons_peek += xen_netbk_count_skb_slots(vif, skb);
-+	xenvif_get(vif);
-+
-+	if (vif->can_queue && xenvif_queue_full(vif)) {
-+		vif->rx.sring->req_event = vif->rx_req_cons_peek +
-+			xenvif_max_required_rx_slots(vif);
-+		mb(); /* request notification /then/ check & stop the queue */
-+		if (xenvif_queue_full(vif))
-+			netif_stop_queue(dev);
-+	}
-+
-+	xen_netbk_queue_tx_skb(vif, skb);
-+
-+	return 0;
-+
-+ drop:
-+	vif->stats.tx_dropped++;
-+	dev_kfree_skb(skb);
-+	return 0;
-+}
-+
-+static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
-+{
-+	struct xenvif *vif = netdev_priv(dev);
-+	return &vif->stats;
-+}
-+
-+void xenvif_schedule_work(struct xenvif *vif)
-+{
-+	int more_to_do;
-+
-+	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
-+
-+	if (more_to_do)
-+		xen_netbk_schedule_xenvif(vif);
-+}
-+
- 
- static void xenvif_up(struct xenvif *vif)
- {
-@@ -48,7 +262,7 @@ static void xenvif_up(struct xenvif *vif)
- static void xenvif_down(struct xenvif *vif)
- {
- 	disable_irq(vif->irq);
--	xenvif_deschedule_work(vif);
-+	xen_netbk_deschedule_xenfif(vif);
- 	xen_netbk_remove_xenvif(vif);
- }
- 
-@@ -73,7 +287,8 @@ static int xenvif_close(struct net_device *dev)
- 
- static int xenvif_change_mtu(struct net_device *dev, int mtu)
- {
--	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
-+	struct xenvif *vif = netdev_priv(dev);
-+	int max = vif->can_sg ? 65535 - ETH_HLEN : ETH_DATA_LEN;
- 
- 	if (mtu > max)
- 		return -EINVAL;
-@@ -81,7 +296,7 @@ static int xenvif_change_mtu(struct net_device *dev, int mtu)
- 	return 0;
- }
- 
--void xenvif_set_features(struct xenvif *vif)
-+static void xenvif_set_features(struct xenvif *vif)
- {
- 	struct net_device *dev = vif->dev;
- 	int features = dev->features;
-@@ -256,7 +471,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
- 	xenvif_set_features(vif);
- 	SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);
- 
--	dev->tx_queue_len = NETBK_QUEUE_LENGTH;
-+	dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
- 
- 	/*
- 	 * Initialise a dummy MAC address. We choose the numerically
-@@ -352,6 +567,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
- 	if (vif->irq)
- 		return 0;
- 
-+	xenvif_set_features(vif);
-+
- 	vif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
- 	if (vif->tx_comms_area == NULL)
- 		return -ENOMEM;
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index 4f04d2d..1b7005c 100644
---- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -131,10 +131,10 @@ struct xen_netbk {
- 	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
- 	 * head/fragment uses 2 copy operation.
- 	 */
--	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
-+	struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
- 	unsigned char rx_notify[NR_IRQS];
--	u16 notify_list[NET_RX_RING_SIZE];
--	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
-+	u16 notify_list[XEN_NETIF_RX_RING_SIZE];
-+	struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
- };
- 
- static struct xen_netbk *xen_netbk;
-@@ -164,7 +164,9 @@ void xen_netbk_add_xenvif(struct xenvif *vif)
- 
- void xen_netbk_remove_xenvif(struct xenvif *vif)
- {
--	atomic_dec(&vif->netbk->netfront_count);
-+	struct xen_netbk *netbk = vif->netbk;
-+	vif->netbk = NULL;
-+	atomic_dec(&netbk->netfront_count);
- }
- 
- static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
-@@ -275,7 +277,7 @@ MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
-  * dir indicates the data direction.
-  * rx: 1, tx: 0.
-  */
--static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
-+void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
- {
- 	if (MODPARM_netback_kthread)
- 		wake_up(&netbk->kthread.netbk_action_wq);
-@@ -285,110 +287,6 @@ static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
- 		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
- }
- 
--static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
--{
--	smp_mb();
--	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
--	    !list_empty(&netbk->net_schedule_list))
--		xen_netbk_bh_handler(netbk, 0);
--}
--
--static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
--{
--	struct skb_shared_info *ninfo;
--	struct sk_buff *nskb;
--	unsigned long offset;
--	int ret;
--	int len;
--	int headlen;
--
--	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
--
--	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
--	if (unlikely(!nskb))
--		goto err;
--
--	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
--	headlen = skb_end_pointer(nskb) - nskb->data;
--	if (headlen > skb_headlen(skb))
--		headlen = skb_headlen(skb);
--	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
--	BUG_ON(ret);
--
--	ninfo = skb_shinfo(nskb);
--	ninfo->gso_size = skb_shinfo(skb)->gso_size;
--	ninfo->gso_type = skb_shinfo(skb)->gso_type;
--
--	offset = headlen;
--	len = skb->len - headlen;
--
--	nskb->len = skb->len;
--	nskb->data_len = len;
--	nskb->truesize += len;
--
--	while (len) {
--		struct page *page;
--		int copy;
--		int zero;
--
--		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
--			dump_stack();
--			goto err_free;
--		}
--
--		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
--		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
--
--		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
--		if (unlikely(!page))
--			goto err_free;
--
--		ret = skb_copy_bits(skb, offset, page_address(page), copy);
--		BUG_ON(ret);
--
--		ninfo->frags[ninfo->nr_frags].page = page;
--		ninfo->frags[ninfo->nr_frags].page_offset = 0;
--		ninfo->frags[ninfo->nr_frags].size = copy;
--		ninfo->nr_frags++;
--
--		offset += copy;
--		len -= copy;
--	}
--
--#ifdef NET_SKBUFF_DATA_USES_OFFSET
--	offset = 0;
--#else
--	offset = nskb->data - skb->data;
--#endif
--
--	nskb->transport_header = skb->transport_header + offset;
--	nskb->network_header = skb->network_header + offset;
--	nskb->mac_header = skb->mac_header + offset;
--
--	return nskb;
--
-- err_free:
--	kfree_skb(nskb);
-- err:
--	return NULL;
--}
--
--static inline int xenvif_max_required_rx_slots(struct xenvif *vif)
--{
--	if (vif->can_sg || vif->gso || vif->gso_prefix)
--		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
--	return 1; /* all in one */
--}
--
--static inline int xenvif_queue_full(struct xenvif *vif)
--{
--	RING_IDX peek   = vif->rx_req_cons_peek;
--	RING_IDX needed = xenvif_max_required_rx_slots(vif);
--
--	return ((vif->rx.sring->req_prod - peek) < needed) ||
--	       ((vif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
--}
--
- /*
-  * Returns true if we should start a new receive buffer instead of
-  * adding 'size' bytes to a buffer which currently contains 'offset'
-@@ -434,7 +332,7 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
-  * the guest. This function is essentially a dry run of
-  * netbk_gop_frag_copy.
-  */
--static unsigned int count_skb_slots(struct sk_buff *skb, struct xenvif *vif)
-+unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
- {
- 	unsigned int count = 1;
- 	int i, copy_off = 0;
-@@ -468,60 +366,6 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xenvif *vif)
- 	return count;
- }
- 
--int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
--{
--	struct xenvif *vif = netdev_priv(dev);
--	struct xen_netbk *netbk;
--
--	BUG_ON(skb->dev != dev);
--
--	if (vif->netbk == NULL)
--		goto drop;
--
--	netbk = vif->netbk;
--
--	/* Drop the packet if the target domain has no receive buffers. */
--	if (unlikely(!xenvif_schedulable(vif) || xenvif_queue_full(vif)))
--		goto drop;
--
--	/*
--	 * XXX For now we also copy skbuffs whose head crosses a page
--	 * boundary, because netbk_gop_skb can't handle them.
--	 */
--	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
--		struct sk_buff *nskb = netbk_copy_skb(skb);
--		if (unlikely(nskb == NULL))
--			goto drop;
--		/* Copy only the header fields we use in this driver. */
--		nskb->dev = skb->dev;
--		nskb->ip_summed = skb->ip_summed;
--		dev_kfree_skb(skb);
--		skb = nskb;
--	}
--
--	/* Reserve ring slots for the worst-case number of fragments. */
--	vif->rx_req_cons_peek += count_skb_slots(skb, vif);
--	xenvif_get(vif);
--
--	if (netbk_can_queue(dev) && xenvif_queue_full(vif)) {
--		vif->rx.sring->req_event = vif->rx_req_cons_peek +
--			xenvif_max_required_rx_slots(vif);
--		mb(); /* request notification /then/ check & stop the queue */
--		if (xenvif_queue_full(vif))
--			netif_stop_queue(dev);
--	}
--	skb_queue_tail(&netbk->rx_queue, skb);
--
--	xen_netbk_bh_handler(netbk, 1);
--
--	return 0;
--
-- drop:
--	vif->stats.tx_dropped++;
--	dev_kfree_skb(skb);
--	return 0;
--}
--
- struct netrx_pending_operations {
- 	unsigned copy_prod, copy_cons;
- 	unsigned meta_prod, meta_cons;
-@@ -780,7 +624,7 @@ static void xen_netbk_rx_action(unsigned long data)
- 		__skb_queue_tail(&rxq, skb);
- 
- 		/* Filled the batch queue? */
--		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
-+		if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
- 			break;
- 	}
- 
-@@ -887,9 +731,12 @@ static void xen_netbk_rx_action(unsigned long data)
- 		xen_netbk_bh_handler(netbk, 1);
- }
- 
--static void net_alarm(unsigned long data)
-+void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
- {
--	struct xen_netbk *netbk = (struct xen_netbk *)data;
-+	struct xen_netbk *netbk = vif->netbk;
-+
-+	skb_queue_tail(&netbk->rx_queue, skb);
-+
- 	xen_netbk_bh_handler(netbk, 1);
- }
- 
-@@ -899,10 +746,10 @@ static void xen_netbk_tx_pending_timeout(unsigned long data)
- 	xen_netbk_bh_handler(netbk, 0);
- }
- 
--struct net_device_stats *xenvif_get_stats(struct net_device *dev)
-+static void xen_netbk_alarm(unsigned long data)
- {
--	struct xenvif *vif = netdev_priv(dev);
--	return &vif->stats;
-+	struct xen_netbk *netbk = (struct xen_netbk *)data;
-+	xen_netbk_bh_handler(netbk, 1);
- }
- 
- static int __on_net_schedule_list(struct xenvif *vif)
-@@ -940,13 +787,13 @@ out:
- 	return vif;
- }
- 
--static void add_to_net_schedule_list_tail(struct xenvif *vif)
-+void xen_netbk_schedule_xenvif(struct xenvif *vif)
- {
- 	unsigned long flags;
- 
- 	struct xen_netbk *netbk = vif->netbk;
- 	if (__on_net_schedule_list(vif))
--		return;
-+		goto kick;
- 
- 	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
- 	if (!__on_net_schedule_list(vif) &&
-@@ -955,22 +802,15 @@ static void add_to_net_schedule_list_tail(struct xenvif *vif)
- 		xenvif_get(vif);
- 	}
- 	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
--}
--
--void xenvif_schedule_work(struct xenvif *vif)
--{
--	struct xen_netbk *netbk = vif->netbk;
--	int more_to_do;
--
--	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
- 
--	if (more_to_do) {
--		add_to_net_schedule_list_tail(vif);
--		maybe_schedule_tx_action(netbk);
--	}
-+kick:
-+	smp_mb();
-+	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
-+	    !list_empty(&netbk->net_schedule_list))
-+		xen_netbk_bh_handler(netbk, 0);
- }
- 
--void xenvif_deschedule_work(struct xenvif *vif)
-+void xen_netbk_deschedule_xenfif(struct xenvif *vif)
- {
- 	struct xen_netbk *netbk = vif->netbk;
- 	spin_lock_irq(&netbk->net_schedule_list_lock);
-@@ -1019,7 +859,7 @@ static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
- 	struct gnttab_unmap_grant_ref *gop;
- 	u16 pending_idx;
- 	pending_ring_idx_t dc, dp;
--	struct xen_netif *netif;
-+	struct xenvif *vif;
- 	int ret;
- 	LIST_HEAD(list);
- 
-@@ -1077,7 +917,7 @@ static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
- 
- 			pending_idx = inuse - netbk->pending_inuse;
- 
--			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
-+			pending_tx_info[pending_idx].vif->nr_copied_skbs++;
- 
- 			switch (copy_pending_req(netbk, pending_idx)) {
- 			case 0:
-@@ -1101,9 +941,9 @@ static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
- 		pending_tx_info = netbk->pending_tx_info;
- 		pending_idx = inuse - netbk->pending_inuse;
- 
--		netif = pending_tx_info[pending_idx].netif;
-+		vif = pending_tx_info[pending_idx].vif;
- 
--		make_tx_response(netif, &pending_tx_info[pending_idx].req,
-+		make_tx_response(vif, &pending_tx_info[pending_idx].req,
- 				 XEN_NETIF_RSP_OKAY);
- 
- 		/* Ready for next use. */
-@@ -1200,7 +1040,7 @@ static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *net
- 
- 		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
- 				  GNTMAP_host_map | GNTMAP_readonly,
--				  txp->gref, netif->domid);
-+				  txp->gref, vif->domid);
- 
- 		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
- 		xenvif_get(vif);
-@@ -1319,9 +1159,9 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
- 	}
- }
- 
--int netbk_get_extras(struct xenvif *vif,
--		     struct xen_netif_extra_info *extras,
--		     int work_to_do)
-+static int xen_netbk_get_extras(struct xenvif *vif,
-+				struct xen_netif_extra_info *extras,
-+				int work_to_do)
- {
- 	struct xen_netif_extra_info extra;
- 	RING_IDX cons = vif->tx.req_cons;
-@@ -1522,8 +1362,8 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- 
- 		memset(extras, 0, sizeof(extras));
- 		if (txreq.flags & XEN_NETTXF_extra_info) {
--			work_to_do = netbk_get_extras(vif, extras,
--						      work_to_do);
-+			work_to_do = xen_netbk_get_extras(vif, extras,
-+							  work_to_do);
- 			idx = vif->tx.req_cons;
- 			if (unlikely(work_to_do < 0)) {
- 				netbk_tx_err(vif, &txreq, idx);
-@@ -1584,7 +1424,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- 
- 		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
- 				  GNTMAP_host_map | GNTMAP_readonly,
--				  txreq.gref, netif->domid);
-+				  txreq.gref, vif->domid);
- 		mop++;
- 
- 		memcpy(&netbk->pending_tx_info[pending_idx].req,
-@@ -1608,7 +1448,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- 
- 		netbk->pending_cons++;
- 
--		mop = xen_netbk_get_requests(netbk, netif, skb, txfrags, mop);
-+		mop = xen_netbk_get_requests(netbk, vif, skb, txfrags, mop);
- 
- 		vif->tx.req_cons = idx;
- 		xenvif_schedule_work(vif);
-@@ -1705,7 +1545,7 @@ static void xen_netbk_tx_action(unsigned long data)
- 	unsigned nr_mops;
- 	int ret;
- 
--	net_tx_action_dealloc(netbk);
-+	xen_netbk_tx_action_dealloc(netbk);
- 
- 	nr_mops = xen_netbk_tx_build_mops(netbk);
- 
-@@ -1738,7 +1578,7 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
- 	spin_lock_irqsave(&_lock, flags);
- 	index = pending_index(netbk->dealloc_prod);
- 	netbk->dealloc_ring[index] = pending_idx;
--	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
-+	/* Sync with xen_netbk_tx_action_dealloc: insert idx /then/ incr producer. */
- 	smp_wmb();
- 	netbk->dealloc_prod++;
- 	spin_unlock_irqrestore(&_lock, flags);
-@@ -1757,23 +1597,6 @@ static void netif_page_release(struct page *page, unsigned int order)
- 	xen_netbk_idx_release(&xen_netbk[group], idx);
- }
- 
--irqreturn_t xenvif_interrupt(int irq, void *dev_id)
--{
--	struct xenvif *vif = dev_id;
--	struct xen_netbk *netbk = vif->netbk;
--
--	if (netbk == NULL)
--		return IRQ_NONE;
--
--	add_to_net_schedule_list_tail(vif);
--	maybe_schedule_tx_action(netbk);
--
--	if (xenvif_schedulable(vif) && !xenvif_queue_full(vif))
--		netif_wake_queue(vif->dev);
--
--	return IRQ_HANDLED;
--}
--
- static void make_tx_response(struct xenvif *vif,
- 			     struct xen_netif_tx_request *txp,
- 			     s8       st)
-@@ -1887,12 +1710,12 @@ static int __init netback_init(void)
- 
- 		init_timer(&netbk->net_timer);
- 		netbk->net_timer.data = (unsigned long)netbk;
--		netbk->net_timer.function = net_alarm;
-+		netbk->net_timer.function = xen_netbk_alarm;
- 
- 		init_timer(&netbk->netbk_tx_pending_timer);
- 		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
- 		netbk->netbk_tx_pending_timer.function =
--			netbk_tx_pending_timeout;
-+			xen_netbk_tx_pending_timeout;
- 
- 		netbk->mmap_pages =
- 			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
-diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
-index a6ad259..e854420 100644
---- a/drivers/net/xen-netback/xenbus.c
-+++ b/drivers/net/xen-netback/xenbus.c
-@@ -21,6 +21,14 @@
- 
- #include "common.h"
- 
-+struct backend_info {
-+	struct xenbus_device *dev;
-+	struct xenvif *vif;
-+	enum xenbus_state frontend_state;
-+	struct xenbus_watch hotplug_status_watch;
-+	int have_hotplug_status_watch:1;
-+};
-+
- static int connect_rings(struct backend_info *);
- static void connect(struct backend_info *);
- static void backend_create_xenvif(struct backend_info *be);
-@@ -447,9 +455,6 @@ static int connect_rings(struct backend_info *be)
- 		val = 0;
- 	vif->csum = !val;
- 
--	/* Set dev->features */
--	xenvif_set_features(vif);
--
- 	/* Map the shared frame, irq etc. */
- 	err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, evtchn);
- 	if (err) {
--- 
-1.7.4
-
-
-From f1c011ec476d1ee2c36afb9df40024ffca087b08 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 21 Jan 2011 11:10:22 +0000
-Subject: [PATCH 097/203] xen: netback: switch to kthread mode and drop tasklet mode
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/netback.c |  108 +++++++++++-------------------------
- 1 files changed, 33 insertions(+), 75 deletions(-)
-
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index 1b7005c..73e35fd 100644
---- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -84,17 +84,8 @@ union page_ext {
- };
- 
- struct xen_netbk {
--	union {
--		struct {
--			struct tasklet_struct net_tx_tasklet;
--			struct tasklet_struct net_rx_tasklet;
--		} tasklet;
--
--		struct {
--			wait_queue_head_t netbk_action_wq;
--			struct task_struct *task;
--		} kthread;
--	};
-+	wait_queue_head_t wq;
-+	struct task_struct *task;
- 
- 	struct sk_buff_head rx_queue;
- 	struct sk_buff_head tx_queue;
-@@ -180,10 +171,6 @@ static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
- 					     u16      size,
- 					     u16      flags);
- 
--static void xen_netbk_tx_action(unsigned long data);
--
--static void xen_netbk_rx_action(unsigned long data);
--
- static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
- 				       unsigned int idx)
- {
-@@ -268,23 +255,9 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
- 
- int netbk_copy_skb_mode;
- 
--static int MODPARM_netback_kthread;
--module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
--MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
--
--/*
-- * Netback bottom half handler.
-- * dir indicates the data direction.
-- * rx: 1, tx: 0.
-- */
--void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
-+static void xen_netbk_kick_thread(struct xen_netbk *netbk)
- {
--	if (MODPARM_netback_kthread)
--		wake_up(&netbk->kthread.netbk_action_wq);
--	else if (dir)
--		tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
--	else
--		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
-+	wake_up(&netbk->wq);
- }
- 
- /*
-@@ -587,10 +560,9 @@ struct skb_cb_overlay {
- 	int meta_slots_used;
- };
- 
--static void xen_netbk_rx_action(unsigned long data)
-+static void xen_netbk_rx_action(struct xen_netbk *netbk)
- {
- 	struct xenvif *vif = NULL;
--	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	s8 status;
- 	u16 irq, flags;
- 	struct xen_netif_rx_response *resp;
-@@ -728,7 +700,7 @@ static void xen_netbk_rx_action(unsigned long data)
- 	/* More work to do? */
- 	if (!skb_queue_empty(&netbk->rx_queue) &&
- 			!timer_pending(&netbk->net_timer))
--		xen_netbk_bh_handler(netbk, 1);
-+		xen_netbk_kick_thread(netbk);
- }
- 
- void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
-@@ -737,19 +709,19 @@ void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
- 
- 	skb_queue_tail(&netbk->rx_queue, skb);
- 
--	xen_netbk_bh_handler(netbk, 1);
-+	xen_netbk_kick_thread(netbk);
- }
- 
- static void xen_netbk_tx_pending_timeout(unsigned long data)
- {
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
--	xen_netbk_bh_handler(netbk, 0);
-+	xen_netbk_kick_thread(netbk);
- }
- 
- static void xen_netbk_alarm(unsigned long data)
- {
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
--	xen_netbk_bh_handler(netbk, 1);
-+	xen_netbk_kick_thread(netbk);
- }
- 
- static int __on_net_schedule_list(struct xenvif *vif)
-@@ -807,7 +779,7 @@ kick:
- 	smp_mb();
- 	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
- 	    !list_empty(&netbk->net_schedule_list))
--		xen_netbk_bh_handler(netbk, 0);
-+		xen_netbk_kick_thread(netbk);
- }
- 
- void xen_netbk_deschedule_xenfif(struct xenvif *vif)
-@@ -1539,9 +1511,8 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
- }
- 
- /* Called after netfront has transmitted */
--static void xen_netbk_tx_action(unsigned long data)
-+static void xen_netbk_tx_action(struct xen_netbk *netbk)
- {
--	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	unsigned nr_mops;
- 	int ret;
- 
-@@ -1583,7 +1554,7 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
- 	netbk->dealloc_prod++;
- 	spin_unlock_irqrestore(&_lock, flags);
- 
--	xen_netbk_bh_handler(netbk, 0);
-+	xen_netbk_kick_thread(netbk);
- }
- 
- static void netif_page_release(struct page *page, unsigned int order)
-@@ -1662,11 +1633,11 @@ static inline int tx_work_todo(struct xen_netbk *netbk)
- 	return 0;
- }
- 
--static int xen_netbk_action_thread(void *data)
-+static int xen_netbk_kthread(void *data)
- {
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	while (!kthread_should_stop()) {
--		wait_event_interruptible(netbk->kthread.netbk_action_wq,
-+		wait_event_interruptible(netbk->wq,
- 				rx_work_todo(netbk)
- 				|| tx_work_todo(netbk)
- 				|| kthread_should_stop());
-@@ -1676,10 +1647,10 @@ static int xen_netbk_action_thread(void *data)
- 			break;
- 
- 		if (rx_work_todo(netbk))
--			xen_netbk_rx_action((unsigned long)netbk);
-+			xen_netbk_rx_action(netbk);
- 
- 		if (tx_work_todo(netbk))
--			xen_netbk_tx_action((unsigned long)netbk);
-+			xen_netbk_tx_action(netbk);
- 	}
- 
- 	return 0;
-@@ -1739,34 +1710,23 @@ static int __init netback_init(void)
- 		for (i = 0; i < MAX_PENDING_REQS; i++)
- 			netbk->pending_ring[i] = i;
- 
--		if (MODPARM_netback_kthread) {
--			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
--			netbk->kthread.task =
--				kthread_create(xen_netbk_action_thread,
--					       (void *)netbk,
--					       "netback/%u", group);
--
--			if (!IS_ERR(netbk->kthread.task)) {
--				kthread_bind(netbk->kthread.task, group);
--			} else {
--				printk(KERN_ALERT
--					"kthread_run() fails at netback\n");
--				free_empty_pages_and_pagevec(netbk->mmap_pages,
--						MAX_PENDING_REQS);
--				del_timer(&netbk->netbk_tx_pending_timer);
--				del_timer(&netbk->net_timer);
--				rc = PTR_ERR(netbk->kthread.task);
--				goto failed_init;
--			}
--		} else {
--			tasklet_init(&netbk->tasklet.net_tx_tasklet,
--				     xen_netbk_tx_action,
--				     (unsigned long)netbk);
--			tasklet_init(&netbk->tasklet.net_rx_tasklet,
--				     xen_netbk_rx_action,
--				     (unsigned long)netbk);
-+		init_waitqueue_head(&netbk->wq);
-+		netbk->task = kthread_create(xen_netbk_kthread,
-+					     (void *)netbk,
-+					     "netback/%u", group);
-+
-+		if (IS_ERR(netbk->task)) {
-+			printk(KERN_ALERT "kthread_run() fails at netback\n");
-+			free_empty_pages_and_pagevec(netbk->mmap_pages,
-+						     MAX_PENDING_REQS);
-+			del_timer(&netbk->netbk_tx_pending_timer);
-+			del_timer(&netbk->net_timer);
-+			rc = PTR_ERR(netbk->task);
-+			goto failed_init;
- 		}
- 
-+		kthread_bind(netbk->task, group);
-+
- 		INIT_LIST_HEAD(&netbk->pending_inuse_head);
- 		INIT_LIST_HEAD(&netbk->net_schedule_list);
- 
-@@ -1774,8 +1734,7 @@ static int __init netback_init(void)
- 
- 		atomic_set(&netbk->netfront_count, 0);
- 
--		if (MODPARM_netback_kthread)
--			wake_up_process(netbk->kthread.task);
-+		wake_up_process(netbk->task);
- 	}
- 
- 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
-@@ -1800,8 +1759,7 @@ failed_init:
- 				MAX_PENDING_REQS);
- 		del_timer(&netbk->netbk_tx_pending_timer);
- 		del_timer(&netbk->net_timer);
--		if (MODPARM_netback_kthread)
--			kthread_stop(netbk->kthread.task);
-+		kthread_stop(netbk->task);
- 	}
- 	vfree(xen_netbk);
- 	return rc;
--- 
-1.7.4
-
-
-From e564d36cd3368e17218052148932322f7fe324bf Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Wed, 2 Feb 2011 17:25:26 +0000
-Subject: [PATCH 098/203] xen: netback: handle frames whose head crosses a page boundary
-
-Previoulsy we copied any skb whose head crossed a page boundary before
-forwarding to the guest. However dealing with these skbs is relatively straight
-forward.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/interface.c |  103 ++---------------------------------
- drivers/net/xen-netback/netback.c   |   48 +++++++++++------
- 2 files changed, 36 insertions(+), 115 deletions(-)
-
-diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
-index c906c79..1bd2eeb 100644
---- a/drivers/net/xen-netback/interface.c
-+++ b/drivers/net/xen-netback/interface.c
-@@ -51,9 +51,12 @@ void xenvif_put(struct xenvif *vif)
- 
- static int xenvif_max_required_rx_slots(struct xenvif *vif)
- {
-+	int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
-+
- 	if (vif->can_sg || vif->gso || vif->gso_prefix)
--		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
--	return 1; /* all in one */
-+		max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
-+
-+	return max;
- }
- 
- int xenvif_queue_full(struct xenvif *vif)
-@@ -104,87 +107,6 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
- 	return IRQ_HANDLED;
- }
- 
--/* TODO: move to networking core */
--static struct sk_buff *xenvif_copy_skb(struct sk_buff *skb)
--{
--	struct skb_shared_info *ninfo;
--	struct sk_buff *nskb;
--	unsigned long offset;
--	int ret;
--	int len;
--	int headlen;
--
--	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
--
--	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
--	if (unlikely(!nskb))
--		goto err;
--
--	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
--	headlen = skb_end_pointer(nskb) - nskb->data;
--	if (headlen > skb_headlen(skb))
--		headlen = skb_headlen(skb);
--	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
--	BUG_ON(ret);
--
--	ninfo = skb_shinfo(nskb);
--	ninfo->gso_size = skb_shinfo(skb)->gso_size;
--	ninfo->gso_type = skb_shinfo(skb)->gso_type;
--
--	offset = headlen;
--	len = skb->len - headlen;
--
--	nskb->len = skb->len;
--	nskb->data_len = len;
--	nskb->truesize += len;
--
--	while (len) {
--		struct page *page;
--		int copy;
--		int zero;
--
--		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
--			dump_stack();
--			goto err_free;
--		}
--
--		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
--		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
--
--		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
--		if (unlikely(!page))
--			goto err_free;
--
--		ret = skb_copy_bits(skb, offset, page_address(page), copy);
--		BUG_ON(ret);
--
--		ninfo->frags[ninfo->nr_frags].page = page;
--		ninfo->frags[ninfo->nr_frags].page_offset = 0;
--		ninfo->frags[ninfo->nr_frags].size = copy;
--		ninfo->nr_frags++;
--
--		offset += copy;
--		len -= copy;
--	}
--
--#ifdef NET_SKBUFF_DATA_USES_OFFSET
--	offset = 0;
--#else
--	offset = nskb->data - skb->data;
--#endif
--
--	nskb->transport_header = skb->transport_header + offset;
--	nskb->network_header = skb->network_header + offset;
--	nskb->mac_header = skb->mac_header + offset;
--
--	return nskb;
--
-- err_free:
--	kfree_skb(nskb);
-- err:
--	return NULL;
--}
--
- static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
- {
- 	struct xenvif *vif = netdev_priv(dev);
-@@ -198,21 +120,6 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
- 	if (unlikely(!xenvif_schedulable(vif) || xenvif_queue_full(vif)))
- 		goto drop;
- 
--	/*
--	 * XXX For now we also copy skbuffs whose head crosses a page
--	 * boundary, because netbk_gop_skb can't handle them.
--	 */
--	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
--		struct sk_buff *nskb = xenvif_copy_skb(skb);
--		if (unlikely(nskb == NULL))
--			goto drop;
--		/* Copy only the header fields we use in this driver. */
--		nskb->dev = skb->dev;
--		nskb->ip_summed = skb->ip_summed;
--		dev_kfree_skb(skb);
--		skb = nskb;
--	}
--
- 	/* Reserve ring slots for the worst-case number of fragments. */
- 	vif->rx_req_cons_peek += xen_netbk_count_skb_slots(vif, skb);
- 	xenvif_get(vif);
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index 73e35fd..79b8c30 100644
---- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -307,12 +307,13 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
-  */
- unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
- {
--	unsigned int count = 1;
--	int i, copy_off = 0;
-+	unsigned int count;
-+	int i, copy_off;
- 
--	BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);
-+	count = DIV_ROUND_UP(
-+			offset_in_page(skb->data)+skb_headlen(skb), PAGE_SIZE);
- 
--	copy_off = skb_headlen(skb);
-+	copy_off = skb_headlen(skb) % PAGE_SIZE;
- 
- 	if (skb_shinfo(skb)->gso_size)
- 		count++;
-@@ -371,10 +372,10 @@ static struct netbk_rx_meta *get_next_rx_buffer(struct xenvif *vif,
-  * Set up the grant operations for this fragment. If it's a flipping
-  * interface, we also set up the unmap request from here.
-  */
--static void netbk_gop_frag_copy(struct xenvif *vif,
-+static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
- 				struct netrx_pending_operations *npo,
- 				struct page *page, unsigned long size,
--				unsigned long offset, int head)
-+				unsigned long offset, int *head)
- {
- 	struct gnttab_copy *copy_gop;
- 	struct netbk_rx_meta *meta;
-@@ -394,12 +395,12 @@ static void netbk_gop_frag_copy(struct xenvif *vif,
- 	while (size > 0) {
- 		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
- 
--		if (start_new_rx_buffer(npo->copy_off, size, head)) {
-+		if (start_new_rx_buffer(npo->copy_off, size, *head)) {
- 			/*
- 			 * Netfront requires there to be some data in the head
- 			 * buffer.
- 			 */
--			BUG_ON(head);
-+			BUG_ON(*head);
- 
- 			meta = get_next_rx_buffer(vif, npo);
- 		}
-@@ -436,7 +437,13 @@ static void netbk_gop_frag_copy(struct xenvif *vif,
- 
- 		offset += bytes;
- 		size -= bytes;
--		head = 0; /* There must be something in this buffer now. */
-+
-+		/* Leave a gap for the GSO descriptor. */
-+		if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
-+			vif->rx.req_cons++;
-+
-+		*head = 0; /* There must be something in this buffer now. */
-+
- 	}
- }
- 
-@@ -460,6 +467,8 @@ static int netbk_gop_skb(struct sk_buff *skb,
- 	int i;
- 	struct xen_netif_rx_request *req;
- 	struct netbk_rx_meta *meta;
-+	unsigned char *data;
-+	int head = 1;
- 	int old_meta_prod;
- 
- 	old_meta_prod = npo->meta_prod;
-@@ -486,20 +495,25 @@ static int netbk_gop_skb(struct sk_buff *skb,
- 	npo->copy_off = 0;
- 	npo->copy_gref = req->gref;
- 
--	netbk_gop_frag_copy(vif, npo, virt_to_page(skb->data),
--			    skb_headlen(skb),
--			    offset_in_page(skb->data), 1);
-+	data = skb->data;
-+	while (data < skb_tail_pointer(skb)) {
-+		unsigned int offset = offset_in_page(data);
-+		unsigned int len = PAGE_SIZE - offset;
- 
--	/* Leave a gap for the GSO descriptor. */
--	if (skb_shinfo(skb)->gso_size && !vif->gso_prefix)
--		vif->rx.req_cons++;
-+		if (data + len > skb_tail_pointer(skb))
-+			len = skb_tail_pointer(skb) - data;
-+
-+		netbk_gop_frag_copy(vif, skb, npo,
-+				    virt_to_page(data), len, offset, &head);
-+		data += len;
-+	}
- 
- 	for (i = 0; i < nr_frags; i++) {
--		netbk_gop_frag_copy(vif, npo,
-+		netbk_gop_frag_copy(vif, skb, npo,
- 				    skb_shinfo(skb)->frags[i].page,
- 				    skb_shinfo(skb)->frags[i].size,
- 				    skb_shinfo(skb)->frags[i].page_offset,
--				    0);
-+				    &head);
- 	}
- 
- 	return npo->meta_prod - old_meta_prod;
--- 
-1.7.4
-
-
-From 64702558e140c916ac2aca9bf0ec50521de105ca Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Sat, 11 Dec 2010 10:15:50 +0000
-Subject: [PATCH 099/203] xen: netback: Make dependency on PageForeign conditional
-
-When PageForeign is not available we fallback to a copying TX mode.
-
-All uses of PageForeign are now gated with HAVE_XEN_PAGE_FOREIGN, this should
-allow for easier removal of the dependency for upstream, e.g. using unifdef.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/common.h    |    4 +
- drivers/net/xen-netback/interface.c |    2 +
- drivers/net/xen-netback/netback.c   |  279 ++++++++++++++++++++++++++++++-----
- drivers/net/xen-netback/xenbus.c    |    2 +
- 4 files changed, 252 insertions(+), 35 deletions(-)
-
-diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
-index b998a27..8890825 100644
---- a/drivers/net/xen-netback/common.h
-+++ b/drivers/net/xen-netback/common.h
-@@ -95,7 +95,9 @@ struct xenvif {
- 	struct timer_list credit_timeout;
- 
- 	/* Statistics */
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	int nr_copied_skbs;
-+#endif
- 	int rx_gso_checksum_fixup;
- 
- 	/* Miscellaneous private stuff. */
-@@ -109,6 +111,7 @@ struct xenvif {
- 	wait_queue_head_t waiting_to_free;
- };
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- enum {
- 	NETBK_DONT_COPY_SKB,
- 	NETBK_DELAYED_COPY_SKB,
-@@ -116,6 +119,7 @@ enum {
- };
- 
- extern int netbk_copy_skb_mode;
-+#endif
- 
- #define XEN_NETIF_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
- #define XEN_NETIF_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
-diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
-index 1bd2eeb..6e11878 100644
---- a/drivers/net/xen-netback/interface.c
-+++ b/drivers/net/xen-netback/interface.c
-@@ -272,10 +272,12 @@ static const struct xenvif_stat {
- 	char name[ETH_GSTRING_LEN];
- 	u16 offset;
- } xenvif_stats[] = {
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	{
- 		"copied_skbs",
- 		offsetof(struct xenvif, nr_copied_skbs)
- 	},
-+#endif
- 	{
- 		"rx_gso_checksum_fixup",
- 		offsetof(struct xenvif, rx_gso_checksum_fixup)
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index 79b8c30..265fbdd 100644
---- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -40,7 +40,9 @@
- 
- #include <net/tcp.h>
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- #include <xen/balloon.h>
-+#endif
- #include <xen/events.h>
- #include <xen/interface/memory.h>
- 
-@@ -59,10 +61,12 @@ struct netbk_rx_meta {
- 	int gso_size;
- };
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- struct netbk_tx_pending_inuse {
- 	struct list_head list;
- 	unsigned long alloc_time;
- };
-+#endif
- 
- #define MAX_PENDING_REQS 256
- 
-@@ -91,16 +95,24 @@ struct xen_netbk {
- 	struct sk_buff_head tx_queue;
- 
- 	struct timer_list net_timer;
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	struct timer_list netbk_tx_pending_timer;
-+#endif
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	struct page **mmap_pages;
-+#else
-+	struct page *mmap_pages[MAX_PENDING_REQS];
-+#endif
- 
- 	pending_ring_idx_t pending_prod;
- 	pending_ring_idx_t pending_cons;
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	pending_ring_idx_t dealloc_prod;
- 	pending_ring_idx_t dealloc_cons;
- 
- 	struct list_head pending_inuse_head;
-+#endif
- 	struct list_head net_schedule_list;
- 
- 	/* Protect the net_schedule_list in netif. */
-@@ -109,13 +121,20 @@ struct xen_netbk {
- 	atomic_t netfront_count;
- 
- 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
- 	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
- 	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
- 
- 	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-+#else
-+	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
-+#endif
-+
- 	u16 pending_ring[MAX_PENDING_REQS];
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	u16 dealloc_ring[MAX_PENDING_REQS];
-+#endif
- 
- 	/*
- 	 * Each head or fragment can be up to 4096 bytes. Given
-@@ -184,9 +203,10 @@ static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
- }
- 
- /* extra field used in struct page */
--static inline void set_page_ext(struct page *pg,
--				unsigned int group, unsigned int idx)
-+static inline void set_page_ext(struct page *pg, struct xen_netbk *netbk,
-+				unsigned int idx)
- {
-+	unsigned int group = netbk - xen_netbk;
- 	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
- 
- 	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
-@@ -200,8 +220,10 @@ static int get_page_ext(struct page *pg,
- 	struct xen_netbk *netbk;
- 	unsigned int group, idx;
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	if (!PageForeign(pg))
- 		return 0;
-+#endif
- 
- 	group = ext.e.group - 1;
- 
-@@ -210,8 +232,10 @@ static int get_page_ext(struct page *pg,
- 
- 	netbk = &xen_netbk[group];
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	if (netbk->mmap_pages == NULL)
- 		return 0;
-+#endif
- 
- 	idx = ext.e.idx;
- 
-@@ -248,12 +272,14 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
- 		netbk->pending_prod + netbk->pending_cons;
- }
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- /* Setting this allows the safe use of this driver without netloop. */
- static int MODPARM_copy_skb = 1;
- module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
- MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
- 
- int netbk_copy_skb_mode;
-+#endif
- 
- static void xen_netbk_kick_thread(struct xen_netbk *netbk)
- {
-@@ -726,11 +752,13 @@ void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
- 	xen_netbk_kick_thread(netbk);
- }
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- static void xen_netbk_tx_pending_timeout(unsigned long data)
- {
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
- 	xen_netbk_kick_thread(netbk);
- }
-+#endif
- 
- static void xen_netbk_alarm(unsigned long data)
- {
-@@ -831,6 +859,7 @@ static void tx_credit_callback(unsigned long data)
- 	xenvif_schedule_work(vif);
- }
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- static inline int copy_pending_req(struct xen_netbk *netbk,
- 				   pending_ring_idx_t pending_idx)
- {
-@@ -838,7 +867,9 @@ static inline int copy_pending_req(struct xen_netbk *netbk,
- 			netbk->grant_tx_handle[pending_idx],
- 			&netbk->mmap_pages[pending_idx]);
- }
-+#endif
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
- {
- 	struct netbk_tx_pending_inuse *inuse, *n;
-@@ -943,6 +974,7 @@ static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
- 		list_del_init(&inuse->list);
- 	}
- }
-+#endif
- 
- static void netbk_tx_err(struct xenvif *vif,
- 			 struct xen_netif_tx_request *txp, RING_IDX end)
-@@ -998,15 +1030,37 @@ static int netbk_count_requests(struct xenvif *vif,
- 			return -frags;
- 		}
- 	} while ((txp++)->flags & XEN_NETTXF_more_data);
--
- 	return frags;
- }
- 
-+#ifndef HAVE_XEN_PAGE_FOREIGN
-+static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
-+					 struct sk_buff *skb,
-+					 unsigned long pending_idx)
-+{
-+	struct page *page;
-+	page = alloc_page(GFP_KERNEL|__GFP_COLD);
-+	if (!page)
-+		return NULL;
-+	set_page_ext(page, netbk, pending_idx);
-+	netbk->mmap_pages[pending_idx] = page;
-+	return page;
-+}
-+#endif
-+
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *netbk,
- 							   struct xenvif *vif,
- 							   struct sk_buff *skb,
- 							   struct xen_netif_tx_request *txp,
--							   struct gnttab_map_grant_ref *mop)
-+							   struct gnttab_map_grant_ref *gop)
-+#else
-+static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
-+						  struct xenvif *vif,
-+						  struct sk_buff *skb,
-+						  struct xen_netif_tx_request *txp,
-+						  struct gnttab_copy *gop)
-+#endif
- {
- 	struct skb_shared_info *shinfo = skb_shinfo(skb);
- 	skb_frag_t *frags = shinfo->frags;
-@@ -1017,16 +1071,39 @@ static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *net
- 	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
- 
- 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
-+#ifndef HAVE_XEN_PAGE_FOREIGN
-+		struct page *page;
-+#endif
- 		pending_ring_idx_t index;
- 		struct pending_tx_info *pending_tx_info =
- 			netbk->pending_tx_info;
- 
- 		index = pending_index(netbk->pending_cons++);
- 		pending_idx = netbk->pending_ring[index];
--
--		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
-+#ifdef HAVE_XEN_PAGE_FOREIGN
-+		gnttab_set_map_op(gop++, idx_to_kaddr(netbk, pending_idx),
- 				  GNTMAP_host_map | GNTMAP_readonly,
- 				  txp->gref, vif->domid);
-+#else
-+		page = xen_netbk_alloc_page(netbk, skb, pending_idx);
-+		if (!page)
-+			return NULL;
-+
-+		netbk->mmap_pages[pending_idx] = page;
-+
-+		gop->source.u.ref = txp->gref;
-+		gop->source.domid = vif->domid;
-+		gop->source.offset = txp->offset;
-+
-+		gop->dest.u.gmfn = virt_to_mfn(page_address(page));
-+		gop->dest.domid = DOMID_SELF;
-+		gop->dest.offset = txp->offset;
-+
-+		gop->len = txp->size;
-+		gop->flags = GNTCOPY_source_gref;
-+
-+		gop++;
-+#endif
- 
- 		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
- 		xenvif_get(vif);
-@@ -1034,14 +1111,24 @@ static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *net
- 		frags[i].page = (void *)pending_idx;
- 	}
- 
--	return mop;
-+	return gop;
- }
- 
--static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
-+#ifdef HAVE_XEN_PAGE_FOREIGN
-+static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
- 				  struct sk_buff *skb,
--				  struct gnttab_map_grant_ref **mopp)
-+				  struct gnttab_map_grant_ref **gopp)
-+#else
-+static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
-+				  struct sk_buff *skb,
-+				  struct gnttab_copy **gopp)
-+#endif
- {
--	struct gnttab_map_grant_ref *mop = *mopp;
-+#ifdef HAVE_XEN_PAGE_FOREIGN
-+	struct gnttab_map_grant_ref *gop = *gopp;
-+#else
-+	struct gnttab_copy *gop = *gopp;
-+#endif
- 	int pending_idx = *((u16 *)skb->data);
- 	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
- 	struct xenvif *vif = pending_tx_info[pending_idx].vif;
-@@ -1051,7 +1138,7 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
- 	int i, err, start;
- 
- 	/* Check status of header. */
--	err = mop->status;
-+	err = gop->status;
- 	if (unlikely(err)) {
- 		pending_ring_idx_t index;
- 		index = pending_index(netbk->pending_prod++);
-@@ -1059,11 +1146,13 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
- 		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
- 		netbk->pending_ring[index] = pending_idx;
- 		xenvif_put(vif);
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	} else {
- 		set_phys_to_machine(
- 			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
--			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
--		netbk->grant_tx_handle[pending_idx] = mop->handle;
-+			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
-+		netbk->grant_tx_handle[pending_idx] = gop->handle;
-+#endif
- 	}
- 
- 	/* Skip first skb fragment if it is on same page as header fragment. */
-@@ -1076,14 +1165,16 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
- 		pending_idx = (unsigned long)shinfo->frags[i].page;
- 
- 		/* Check error status: if okay then remember grant handle. */
--		newerr = (++mop)->status;
-+		newerr = (++gop)->status;
- 		if (likely(!newerr)) {
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 			unsigned long addr;
- 			addr = idx_to_kaddr(netbk, pending_idx);
- 			set_phys_to_machine(
- 				__pa(addr)>>PAGE_SHIFT,
--				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
--			netbk->grant_tx_handle[pending_idx] = mop->handle;
-+				FOREIGN_FRAME(gop->dev_bus_addr>>PAGE_SHIFT));
-+			netbk->grant_tx_handle[pending_idx] = gop->handle;
-+#endif
- 			/* Had a previous error? Invalidate this fragment. */
- 			if (unlikely(err))
- 				xen_netbk_idx_release(netbk, pending_idx);
-@@ -1113,7 +1204,7 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
- 		err = newerr;
- 	}
- 
--	*mopp = mop + 1;
-+	*gopp = gop + 1;
- 	return err;
- }
- 
-@@ -1129,10 +1220,11 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
- 		unsigned long pending_idx;
- 
- 		pending_idx = (unsigned long)frag->page;
--
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
- 		list_add_tail(&netbk->pending_inuse[pending_idx].list,
- 			      &netbk->pending_inuse_head);
-+#endif
- 
- 		txp = &netbk->pending_tx_info[pending_idx].req;
- 		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
-@@ -1142,6 +1234,10 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
- 		skb->len += txp->size;
- 		skb->data_len += txp->size;
- 		skb->truesize += txp->size;
-+
-+		/* Take an extra reference to offset xen_netbk_idx_release */
-+		get_page(netbk->mmap_pages[pending_idx]);
-+		xen_netbk_idx_release(netbk, pending_idx);
- 	}
- }
- 
-@@ -1300,18 +1396,24 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
- 	return false;
- }
- 
--static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
-+static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
- {
--	struct gnttab_map_grant_ref *mop;
-+#ifdef HAVE_XEN_PAGE_FOREIGN
-+	struct gnttab_map_grant_ref *gop = netbk->tx_map_ops, *request_gop;
-+#else
-+	struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
-+#endif
- 	struct sk_buff *skb;
- 	int ret;
- 
--	mop = netbk->tx_map_ops;
- 	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
- 		!list_empty(&netbk->net_schedule_list)) {
- 		struct xenvif *vif;
- 		struct xen_netif_tx_request txreq;
- 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
-+#ifndef HAVE_XEN_PAGE_FOREIGN
-+		struct page *page;
-+#endif
- 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
- 		u16 pending_idx;
- 		RING_IDX idx;
-@@ -1408,10 +1510,35 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- 			}
- 		}
- 
--		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
-+#ifdef HAVE_XEN_PAGE_FOREIGN
-+		gnttab_set_map_op(gop, idx_to_kaddr(netbk, pending_idx),
- 				  GNTMAP_host_map | GNTMAP_readonly,
- 				  txreq.gref, vif->domid);
--		mop++;
-+		gop++;
-+#else
-+		/* XXX could copy straight to head */
-+		page = xen_netbk_alloc_page(netbk, skb, pending_idx);
-+		if (!page) {
-+			kfree_skb(skb);
-+			netbk_tx_err(vif, &txreq, idx);
-+			continue;
-+		}
-+
-+		netbk->mmap_pages[pending_idx] = page;
-+
-+		gop->source.u.ref = txreq.gref;
-+		gop->source.domid = vif->domid;
-+		gop->source.offset = txreq.offset;
-+
-+		gop->dest.u.gmfn = virt_to_mfn(page_address(page));
-+		gop->dest.domid = DOMID_SELF;
-+		gop->dest.offset = txreq.offset;
-+
-+		gop->len = txreq.size;
-+		gop->flags = GNTCOPY_source_gref;
-+
-+		gop++;
-+#endif
- 
- 		memcpy(&netbk->pending_tx_info[pending_idx].req,
- 		       &txreq, sizeof(txreq));
-@@ -1434,24 +1561,43 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
- 
- 		netbk->pending_cons++;
- 
--		mop = xen_netbk_get_requests(netbk, vif, skb, txfrags, mop);
-+		request_gop = xen_netbk_get_requests(netbk, vif,
-+						     skb, txfrags, gop);
-+		if (request_gop == NULL) {
-+			kfree_skb(skb);
-+			netbk_tx_err(vif, &txreq, idx);
-+			continue;
-+		}
-+		gop = request_gop;
- 
- 		vif->tx.req_cons = idx;
- 		xenvif_schedule_work(vif);
- 
--		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
-+#ifdef HAVE_XEN_PAGE_FOREIGN
-+		if ((gop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
- 			break;
-+#else
-+		if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
-+			break;
-+#endif
- 	}
- 
--	return mop - netbk->tx_map_ops;
-+#ifdef HAVE_XEN_PAGE_FOREIGN
-+	return gop - netbk->tx_map_ops;
-+#else
-+	return gop - netbk->tx_copy_ops;
-+#endif
- }
- 
- static void xen_netbk_tx_submit(struct xen_netbk *netbk)
- {
--	struct gnttab_map_grant_ref *mop;
-+#ifdef HAVE_XEN_PAGE_FOREIGN
-+	struct gnttab_map_grant_ref *gop = netbk->tx_map_ops;
-+#else
-+	struct gnttab_copy *gop = netbk->tx_copy_ops;
-+#endif
- 	struct sk_buff *skb;
- 
--	mop = netbk->tx_map_ops;
- 	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
- 		struct xen_netif_tx_request *txp;
- 		struct xenvif *vif;
-@@ -1463,7 +1609,7 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
- 		txp = &netbk->pending_tx_info[pending_idx].req;
- 
- 		/* Check the remap error code. */
--		if (unlikely(xen_netbk_tx_check_mop(netbk, skb, &mop))) {
-+		if (unlikely(xen_netbk_tx_check_gop(netbk, skb, &gop))) {
- 			pr_debug("netback grant failed.\n");
- 			skb_shinfo(skb)->nr_frags = 0;
- 			kfree_skb(skb);
-@@ -1509,12 +1655,14 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
- 			continue;
- 		}
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
- 		    unlikely(skb_linearize(skb))) {
- 			pr_debug("Can't linearize skb in net_tx_action.\n");
- 			kfree_skb(skb);
- 			continue;
- 		}
-+#endif
- 
- 		vif->stats.rx_bytes += skb->len;
- 		vif->stats.rx_packets++;
-@@ -1527,21 +1675,31 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
- /* Called after netfront has transmitted */
- static void xen_netbk_tx_action(struct xen_netbk *netbk)
- {
--	unsigned nr_mops;
-+	unsigned nr_gops;
- 	int ret;
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	xen_netbk_tx_action_dealloc(netbk);
-+#endif
- 
--	nr_mops = xen_netbk_tx_build_mops(netbk);
-+	nr_gops = xen_netbk_tx_build_gops(netbk);
- 
--	if (nr_mops == 0)
-+#ifdef HAVE_XEN_PAGE_FOREIGN
-+	if (nr_gops == 0)
- 		goto out;
--
- 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
--					netbk->tx_map_ops, nr_mops);
-+					netbk->tx_map_ops, nr_gops);
-+#else
-+	if (nr_gops == 0)
-+		return;
-+	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
-+					netbk->tx_copy_ops, nr_gops);
-+#endif
- 	BUG_ON(ret);
- 
- 	xen_netbk_tx_submit(netbk);
-+
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- out:
- 	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
- 	    !list_empty(&netbk->pending_inuse_head)) {
-@@ -1552,8 +1710,10 @@ out:
- 		mod_timer(&netbk->netbk_tx_pending_timer,
- 				oldest->alloc_time + HZ);
- 	}
-+#endif
- }
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
- {
- 	static DEFINE_SPINLOCK(_lock);
-@@ -1581,6 +1741,33 @@ static void netif_page_release(struct page *page, unsigned int order)
- 
- 	xen_netbk_idx_release(&xen_netbk[group], idx);
- }
-+#else
-+static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
-+{
-+	struct xenvif *vif;
-+	struct pending_tx_info *pending_tx_info;
-+	pending_ring_idx_t index;
-+
-+	/* Already complete? */
-+	if (netbk->mmap_pages[pending_idx] == NULL)
-+		return;
-+
-+	pending_tx_info = &netbk->pending_tx_info[pending_idx];
-+
-+	vif = pending_tx_info->vif;
-+
-+	make_tx_response(vif, &pending_tx_info->req, XEN_NETIF_RSP_OKAY);
-+
-+	index = pending_index(netbk->pending_prod++);
-+	netbk->pending_ring[index] = pending_idx;
-+
-+	xenvif_put(vif);
-+
-+	netbk->mmap_pages[pending_idx]->mapping = 0;
-+	put_page(netbk->mmap_pages[pending_idx]);
-+	netbk->mmap_pages[pending_idx] = NULL;
-+}
-+#endif
- 
- static void make_tx_response(struct xenvif *vif,
- 			     struct xen_netif_tx_request *txp,
-@@ -1633,12 +1820,14 @@ static inline int rx_work_todo(struct xen_netbk *netbk)
- 
- static inline int tx_work_todo(struct xen_netbk *netbk)
- {
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	if (netbk->dealloc_cons != netbk->dealloc_prod)
- 		return 1;
- 
- 	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
- 	    !list_empty(&netbk->pending_inuse_head))
- 		return 1;
-+#endif
- 
- 	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
- 			!list_empty(&netbk->net_schedule_list))
-@@ -1673,7 +1862,9 @@ static int xen_netbk_kthread(void *data)
- static int __init netback_init(void)
- {
- 	int i;
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	struct page *page;
-+#endif
- 	int rc = 0;
- 	int group;
- 
-@@ -1697,11 +1888,14 @@ static int __init netback_init(void)
- 		netbk->net_timer.data = (unsigned long)netbk;
- 		netbk->net_timer.function = xen_netbk_alarm;
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 		init_timer(&netbk->netbk_tx_pending_timer);
- 		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
- 		netbk->netbk_tx_pending_timer.function =
- 			xen_netbk_tx_pending_timeout;
-+#endif
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 		netbk->mmap_pages =
- 			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
- 		if (!netbk->mmap_pages) {
-@@ -1715,9 +1909,10 @@ static int __init netback_init(void)
- 		for (i = 0; i < MAX_PENDING_REQS; i++) {
- 			page = netbk->mmap_pages[i];
- 			SetPageForeign(page, netif_page_release);
--			set_page_ext(page, group, i);
-+			set_page_ext(page, netbk, i);
- 			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
- 		}
-+#endif
- 
- 		netbk->pending_cons = 0;
- 		netbk->pending_prod = MAX_PENDING_REQS;
-@@ -1731,9 +1926,11 @@ static int __init netback_init(void)
- 
- 		if (IS_ERR(netbk->task)) {
- 			printk(KERN_ALERT "kthread_run() fails at netback\n");
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 			free_empty_pages_and_pagevec(netbk->mmap_pages,
- 						     MAX_PENDING_REQS);
- 			del_timer(&netbk->netbk_tx_pending_timer);
-+#endif
- 			del_timer(&netbk->net_timer);
- 			rc = PTR_ERR(netbk->task);
- 			goto failed_init;
-@@ -1741,7 +1938,9 @@ static int __init netback_init(void)
- 
- 		kthread_bind(netbk->task, group);
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 		INIT_LIST_HEAD(&netbk->pending_inuse_head);
-+#endif
- 		INIT_LIST_HEAD(&netbk->net_schedule_list);
- 
- 		spin_lock_init(&netbk->net_schedule_list_lock);
-@@ -1751,6 +1950,7 @@ static int __init netback_init(void)
- 		wake_up_process(netbk->task);
- 	}
- 
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
- 	if (MODPARM_copy_skb) {
- 		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
-@@ -1759,6 +1959,7 @@ static int __init netback_init(void)
- 		else
- 			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
- 	}
-+#endif
- 
- 	rc = xenvif_xenbus_init();
- 	if (rc)
-@@ -1769,9 +1970,17 @@ static int __init netback_init(void)
- failed_init:
- 	for (i = 0; i < group; i++) {
- 		struct xen_netbk *netbk = &xen_netbk[i];
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 		free_empty_pages_and_pagevec(netbk->mmap_pages,
- 				MAX_PENDING_REQS);
- 		del_timer(&netbk->netbk_tx_pending_timer);
-+#else
-+		int j;
-+		for (j = 0; j < MAX_PENDING_REQS; j++) {
-+			if (netbk->mmap_pages[i])
-+				__free_page(netbk->mmap_pages[i]);
-+		}
-+#endif
- 		del_timer(&netbk->net_timer);
- 		kthread_stop(netbk->task);
- 	}
-diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
-index e854420..d56b1ee 100644
---- a/drivers/net/xen-netback/xenbus.c
-+++ b/drivers/net/xen-netback/xenbus.c
-@@ -74,8 +74,10 @@ static int netback_probe(struct xenbus_device *dev,
- 	dev_set_drvdata(&dev->dev, be);
- 
- 	sg = 1;
-+#ifdef HAVE_XEN_PAGE_FOREIGN
- 	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
- 		sg = 0;
-+#endif
- 
- 	do {
- 		err = xenbus_transaction_start(&xbt);
--- 
-1.7.4
-
-
-From 33bf93e7541633b7667bf9390107cf8b2b67968a Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Wed, 19 Jan 2011 09:43:44 +0000
-Subject: [PATCH 100/203] xen: netback: completely drop foreign page support
-
-for i in drivers/net/xen-netback/*.[ch] ; do
-	echo $i
-	./scripts/unifdef -B -UHAVE_XEN_PAGE_FOREIGN $i > $i.unifdef
-	mv $i.unifdef $i
-done
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/net/xen-netback/common.h    |   13 --
- drivers/net/xen-netback/interface.c |    6 -
- drivers/net/xen-netback/netback.c   |  376 -----------------------------------
- drivers/net/xen-netback/xenbus.c    |    4 -
- 4 files changed, 0 insertions(+), 399 deletions(-)
-
-diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
-index 8890825..03196ab 100644
---- a/drivers/net/xen-netback/common.h
-+++ b/drivers/net/xen-netback/common.h
-@@ -95,9 +95,6 @@ struct xenvif {
- 	struct timer_list credit_timeout;
- 
- 	/* Statistics */
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	int nr_copied_skbs;
--#endif
- 	int rx_gso_checksum_fixup;
- 
- 	/* Miscellaneous private stuff. */
-@@ -111,16 +108,6 @@ struct xenvif {
- 	wait_queue_head_t waiting_to_free;
- };
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--enum {
--	NETBK_DONT_COPY_SKB,
--	NETBK_DELAYED_COPY_SKB,
--	NETBK_ALWAYS_COPY_SKB,
--};
--
--extern int netbk_copy_skb_mode;
--#endif
--
- #define XEN_NETIF_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
- #define XEN_NETIF_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
- 
-diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
-index 6e11878..98a992d 100644
---- a/drivers/net/xen-netback/interface.c
-+++ b/drivers/net/xen-netback/interface.c
-@@ -272,12 +272,6 @@ static const struct xenvif_stat {
- 	char name[ETH_GSTRING_LEN];
- 	u16 offset;
- } xenvif_stats[] = {
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	{
--		"copied_skbs",
--		offsetof(struct xenvif, nr_copied_skbs)
--	},
--#endif
- 	{
- 		"rx_gso_checksum_fixup",
- 		offsetof(struct xenvif, rx_gso_checksum_fixup)
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index 265fbdd..fbddf3d 100644
---- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -40,9 +40,6 @@
- 
- #include <net/tcp.h>
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--#include <xen/balloon.h>
--#endif
- #include <xen/events.h>
- #include <xen/interface/memory.h>
- 
-@@ -61,13 +58,6 @@ struct netbk_rx_meta {
- 	int gso_size;
- };
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--struct netbk_tx_pending_inuse {
--	struct list_head list;
--	unsigned long alloc_time;
--};
--#endif
--
- #define MAX_PENDING_REQS 256
- 
- #define MAX_BUFFER_OFFSET PAGE_SIZE
-@@ -95,24 +85,11 @@ struct xen_netbk {
- 	struct sk_buff_head tx_queue;
- 
- 	struct timer_list net_timer;
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	struct timer_list netbk_tx_pending_timer;
--#endif
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	struct page **mmap_pages;
--#else
- 	struct page *mmap_pages[MAX_PENDING_REQS];
--#endif
- 
- 	pending_ring_idx_t pending_prod;
- 	pending_ring_idx_t pending_cons;
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	pending_ring_idx_t dealloc_prod;
--	pending_ring_idx_t dealloc_cons;
--
--	struct list_head pending_inuse_head;
--#endif
- 	struct list_head net_schedule_list;
- 
- 	/* Protect the net_schedule_list in netif. */
-@@ -121,20 +98,9 @@ struct xen_netbk {
- 	atomic_t netfront_count;
- 
- 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
--	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
--	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
--
--	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
--#else
- 	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
--#endif
- 
- 	u16 pending_ring[MAX_PENDING_REQS];
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	u16 dealloc_ring[MAX_PENDING_REQS];
--#endif
- 
- 	/*
- 	 * Each head or fragment can be up to 4096 bytes. Given
-@@ -220,11 +186,6 @@ static int get_page_ext(struct page *pg,
- 	struct xen_netbk *netbk;
- 	unsigned int group, idx;
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	if (!PageForeign(pg))
--		return 0;
--#endif
--
- 	group = ext.e.group - 1;
- 
- 	if (group < 0 || group >= xen_netbk_group_nr)
-@@ -232,11 +193,6 @@ static int get_page_ext(struct page *pg,
- 
- 	netbk = &xen_netbk[group];
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	if (netbk->mmap_pages == NULL)
--		return 0;
--#endif
--
- 	idx = ext.e.idx;
- 
- 	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
-@@ -272,15 +228,6 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
- 		netbk->pending_prod + netbk->pending_cons;
- }
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--/* Setting this allows the safe use of this driver without netloop. */
--static int MODPARM_copy_skb = 1;
--module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
--MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
--
--int netbk_copy_skb_mode;
--#endif
--
- static void xen_netbk_kick_thread(struct xen_netbk *netbk)
- {
- 	wake_up(&netbk->wq);
-@@ -752,14 +699,6 @@ void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
- 	xen_netbk_kick_thread(netbk);
- }
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--static void xen_netbk_tx_pending_timeout(unsigned long data)
--{
--	struct xen_netbk *netbk = (struct xen_netbk *)data;
--	xen_netbk_kick_thread(netbk);
--}
--#endif
--
- static void xen_netbk_alarm(unsigned long data)
- {
- 	struct xen_netbk *netbk = (struct xen_netbk *)data;
-@@ -859,123 +798,6 @@ static void tx_credit_callback(unsigned long data)
- 	xenvif_schedule_work(vif);
- }
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--static inline int copy_pending_req(struct xen_netbk *netbk,
--				   pending_ring_idx_t pending_idx)
--{
--	return gnttab_copy_grant_page(
--			netbk->grant_tx_handle[pending_idx],
--			&netbk->mmap_pages[pending_idx]);
--}
--#endif
--
--#ifdef HAVE_XEN_PAGE_FOREIGN
--static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
--{
--	struct netbk_tx_pending_inuse *inuse, *n;
--	struct gnttab_unmap_grant_ref *gop;
--	u16 pending_idx;
--	pending_ring_idx_t dc, dp;
--	struct xenvif *vif;
--	int ret;
--	LIST_HEAD(list);
--
--	dc = netbk->dealloc_cons;
--	gop = netbk->tx_unmap_ops;
--
--	/* Free up any grants we have finished using. */
--	do {
--		dp = netbk->dealloc_prod;
--
--		/* Ensure we see all indices enqueued by xen_netbk_idx_release(). */
--		smp_rmb();
--
--		while (dc != dp) {
--			unsigned long pfn;
--			struct netbk_tx_pending_inuse *pending_inuse =
--					netbk->pending_inuse;
--
--			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
--			list_move_tail(&pending_inuse[pending_idx].list, &list);
--
--			pfn = idx_to_pfn(netbk, pending_idx);
--			/* Already unmapped? */
--			if (!phys_to_machine_mapping_valid(pfn))
--				continue;
--
--			gnttab_set_unmap_op(gop,
--					idx_to_kaddr(netbk, pending_idx),
--					GNTMAP_host_map,
--					netbk->grant_tx_handle[pending_idx]);
--			gop++;
--		}
--
--	} while (dp != netbk->dealloc_prod);
--
--	netbk->dealloc_cons = dc;
--
--	ret = HYPERVISOR_grant_table_op(
--		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
--		gop - netbk->tx_unmap_ops);
--	BUG_ON(ret);
--
--	/*
--	 * Copy any entries that have been pending for too long
--	 */
--	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
--	    !list_empty(&netbk->pending_inuse_head)) {
--		list_for_each_entry_safe(inuse, n,
--				&netbk->pending_inuse_head, list) {
--			struct pending_tx_info *pending_tx_info;
--			pending_tx_info = netbk->pending_tx_info;
--
--			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
--				break;
--
--			pending_idx = inuse - netbk->pending_inuse;
--
--			pending_tx_info[pending_idx].vif->nr_copied_skbs++;
--
--			switch (copy_pending_req(netbk, pending_idx)) {
--			case 0:
--				list_move_tail(&inuse->list, &list);
--				continue;
--			case -EBUSY:
--				list_del_init(&inuse->list);
--				continue;
--			case -ENOENT:
--				continue;
--			}
--
--			break;
--		}
--	}
--
--	list_for_each_entry_safe(inuse, n, &list, list) {
--		struct pending_tx_info *pending_tx_info;
--		pending_ring_idx_t index;
--
--		pending_tx_info = netbk->pending_tx_info;
--		pending_idx = inuse - netbk->pending_inuse;
--
--		vif = pending_tx_info[pending_idx].vif;
--
--		make_tx_response(vif, &pending_tx_info[pending_idx].req,
--				 XEN_NETIF_RSP_OKAY);
--
--		/* Ready for next use. */
--		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
--
--		index = pending_index(netbk->pending_prod++);
--		netbk->pending_ring[index] = pending_idx;
--
--		xenvif_put(vif);
--
--		list_del_init(&inuse->list);
--	}
--}
--#endif
--
- static void netbk_tx_err(struct xenvif *vif,
- 			 struct xen_netif_tx_request *txp, RING_IDX end)
- {
-@@ -1033,7 +855,6 @@ static int netbk_count_requests(struct xenvif *vif,
- 	return frags;
- }
- 
--#ifndef HAVE_XEN_PAGE_FOREIGN
- static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
- 					 struct sk_buff *skb,
- 					 unsigned long pending_idx)
-@@ -1046,21 +867,12 @@ static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
- 	netbk->mmap_pages[pending_idx] = page;
- 	return page;
- }
--#endif
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *netbk,
--							   struct xenvif *vif,
--							   struct sk_buff *skb,
--							   struct xen_netif_tx_request *txp,
--							   struct gnttab_map_grant_ref *gop)
--#else
- static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
- 						  struct xenvif *vif,
- 						  struct sk_buff *skb,
- 						  struct xen_netif_tx_request *txp,
- 						  struct gnttab_copy *gop)
--#endif
- {
- 	struct skb_shared_info *shinfo = skb_shinfo(skb);
- 	skb_frag_t *frags = shinfo->frags;
-@@ -1071,20 +883,13 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
- 	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
- 
- 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
--#ifndef HAVE_XEN_PAGE_FOREIGN
- 		struct page *page;
--#endif
- 		pending_ring_idx_t index;
- 		struct pending_tx_info *pending_tx_info =
- 			netbk->pending_tx_info;
- 
- 		index = pending_index(netbk->pending_cons++);
- 		pending_idx = netbk->pending_ring[index];
--#ifdef HAVE_XEN_PAGE_FOREIGN
--		gnttab_set_map_op(gop++, idx_to_kaddr(netbk, pending_idx),
--				  GNTMAP_host_map | GNTMAP_readonly,
--				  txp->gref, vif->domid);
--#else
- 		page = xen_netbk_alloc_page(netbk, skb, pending_idx);
- 		if (!page)
- 			return NULL;
-@@ -1103,7 +908,6 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
- 		gop->flags = GNTCOPY_source_gref;
- 
- 		gop++;
--#endif
- 
- 		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
- 		xenvif_get(vif);
-@@ -1114,21 +918,11 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
- 	return gop;
- }
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
--				  struct sk_buff *skb,
--				  struct gnttab_map_grant_ref **gopp)
--#else
- static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
- 				  struct sk_buff *skb,
- 				  struct gnttab_copy **gopp)
--#endif
- {
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	struct gnttab_map_grant_ref *gop = *gopp;
--#else
- 	struct gnttab_copy *gop = *gopp;
--#endif
- 	int pending_idx = *((u16 *)skb->data);
- 	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
- 	struct xenvif *vif = pending_tx_info[pending_idx].vif;
-@@ -1146,13 +940,6 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
- 		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
- 		netbk->pending_ring[index] = pending_idx;
- 		xenvif_put(vif);
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	} else {
--		set_phys_to_machine(
--			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
--			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
--		netbk->grant_tx_handle[pending_idx] = gop->handle;
--#endif
- 	}
- 
- 	/* Skip first skb fragment if it is on same page as header fragment. */
-@@ -1167,14 +954,6 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
- 		/* Check error status: if okay then remember grant handle. */
- 		newerr = (++gop)->status;
- 		if (likely(!newerr)) {
--#ifdef HAVE_XEN_PAGE_FOREIGN
--			unsigned long addr;
--			addr = idx_to_kaddr(netbk, pending_idx);
--			set_phys_to_machine(
--				__pa(addr)>>PAGE_SHIFT,
--				FOREIGN_FRAME(gop->dev_bus_addr>>PAGE_SHIFT));
--			netbk->grant_tx_handle[pending_idx] = gop->handle;
--#endif
- 			/* Had a previous error? Invalidate this fragment. */
- 			if (unlikely(err))
- 				xen_netbk_idx_release(netbk, pending_idx);
-@@ -1220,11 +999,6 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
- 		unsigned long pending_idx;
- 
- 		pending_idx = (unsigned long)frag->page;
--#ifdef HAVE_XEN_PAGE_FOREIGN
--		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
--		list_add_tail(&netbk->pending_inuse[pending_idx].list,
--			      &netbk->pending_inuse_head);
--#endif
- 
- 		txp = &netbk->pending_tx_info[pending_idx].req;
- 		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
-@@ -1398,11 +1172,7 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
- 
- static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
- {
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	struct gnttab_map_grant_ref *gop = netbk->tx_map_ops, *request_gop;
--#else
- 	struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
--#endif
- 	struct sk_buff *skb;
- 	int ret;
- 
-@@ -1411,9 +1181,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
- 		struct xenvif *vif;
- 		struct xen_netif_tx_request txreq;
- 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
--#ifndef HAVE_XEN_PAGE_FOREIGN
- 		struct page *page;
--#endif
- 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
- 		u16 pending_idx;
- 		RING_IDX idx;
-@@ -1510,12 +1278,6 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
- 			}
- 		}
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--		gnttab_set_map_op(gop, idx_to_kaddr(netbk, pending_idx),
--				  GNTMAP_host_map | GNTMAP_readonly,
--				  txreq.gref, vif->domid);
--		gop++;
--#else
- 		/* XXX could copy straight to head */
- 		page = xen_netbk_alloc_page(netbk, skb, pending_idx);
- 		if (!page) {
-@@ -1538,7 +1300,6 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
- 		gop->flags = GNTCOPY_source_gref;
- 
- 		gop++;
--#endif
- 
- 		memcpy(&netbk->pending_tx_info[pending_idx].req,
- 		       &txreq, sizeof(txreq));
-@@ -1573,29 +1334,16 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
- 		vif->tx.req_cons = idx;
- 		xenvif_schedule_work(vif);
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--		if ((gop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
--			break;
--#else
- 		if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
- 			break;
--#endif
- 	}
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	return gop - netbk->tx_map_ops;
--#else
- 	return gop - netbk->tx_copy_ops;
--#endif
- }
- 
- static void xen_netbk_tx_submit(struct xen_netbk *netbk)
- {
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	struct gnttab_map_grant_ref *gop = netbk->tx_map_ops;
--#else
- 	struct gnttab_copy *gop = netbk->tx_copy_ops;
--#endif
- 	struct sk_buff *skb;
- 
- 	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
-@@ -1655,15 +1403,6 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
- 			continue;
- 		}
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
--		    unlikely(skb_linearize(skb))) {
--			pr_debug("Can't linearize skb in net_tx_action.\n");
--			kfree_skb(skb);
--			continue;
--		}
--#endif
--
- 		vif->stats.rx_bytes += skb->len;
- 		vif->stats.rx_packets++;
- 
-@@ -1678,70 +1417,18 @@ static void xen_netbk_tx_action(struct xen_netbk *netbk)
- 	unsigned nr_gops;
- 	int ret;
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	xen_netbk_tx_action_dealloc(netbk);
--#endif
--
- 	nr_gops = xen_netbk_tx_build_gops(netbk);
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	if (nr_gops == 0)
--		goto out;
--	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
--					netbk->tx_map_ops, nr_gops);
--#else
- 	if (nr_gops == 0)
- 		return;
- 	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
- 					netbk->tx_copy_ops, nr_gops);
--#endif
- 	BUG_ON(ret);
- 
- 	xen_netbk_tx_submit(netbk);
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--out:
--	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
--	    !list_empty(&netbk->pending_inuse_head)) {
--		struct netbk_tx_pending_inuse *oldest;
--
--		oldest = list_entry(netbk->pending_inuse_head.next,
--				    struct netbk_tx_pending_inuse, list);
--		mod_timer(&netbk->netbk_tx_pending_timer,
--				oldest->alloc_time + HZ);
--	}
--#endif
- }
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
--{
--	static DEFINE_SPINLOCK(_lock);
--	unsigned long flags;
--	pending_ring_idx_t index;
--
--	spin_lock_irqsave(&_lock, flags);
--	index = pending_index(netbk->dealloc_prod);
--	netbk->dealloc_ring[index] = pending_idx;
--	/* Sync with xen_netbk_tx_action_dealloc: insert idx /then/ incr producer. */
--	smp_wmb();
--	netbk->dealloc_prod++;
--	spin_unlock_irqrestore(&_lock, flags);
--
--	xen_netbk_kick_thread(netbk);
--}
--
--static void netif_page_release(struct page *page, unsigned int order)
--{
--	unsigned int group, idx;
--	int foreign = get_page_ext(page, &group, &idx);
--
--	BUG_ON(!foreign);
--	BUG_ON(order);
--
--	xen_netbk_idx_release(&xen_netbk[group], idx);
--}
--#else
- static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
- {
- 	struct xenvif *vif;
-@@ -1767,7 +1454,6 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
- 	put_page(netbk->mmap_pages[pending_idx]);
- 	netbk->mmap_pages[pending_idx] = NULL;
- }
--#endif
- 
- static void make_tx_response(struct xenvif *vif,
- 			     struct xen_netif_tx_request *txp,
-@@ -1820,14 +1506,6 @@ static inline int rx_work_todo(struct xen_netbk *netbk)
- 
- static inline int tx_work_todo(struct xen_netbk *netbk)
- {
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	if (netbk->dealloc_cons != netbk->dealloc_prod)
--		return 1;
--
--	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
--	    !list_empty(&netbk->pending_inuse_head))
--		return 1;
--#endif
- 
- 	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
- 			!list_empty(&netbk->net_schedule_list))
-@@ -1862,9 +1540,6 @@ static int xen_netbk_kthread(void *data)
- static int __init netback_init(void)
- {
- 	int i;
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	struct page *page;
--#endif
- 	int rc = 0;
- 	int group;
- 
-@@ -1888,32 +1563,6 @@ static int __init netback_init(void)
- 		netbk->net_timer.data = (unsigned long)netbk;
- 		netbk->net_timer.function = xen_netbk_alarm;
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--		init_timer(&netbk->netbk_tx_pending_timer);
--		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
--		netbk->netbk_tx_pending_timer.function =
--			xen_netbk_tx_pending_timeout;
--#endif
--
--#ifdef HAVE_XEN_PAGE_FOREIGN
--		netbk->mmap_pages =
--			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
--		if (!netbk->mmap_pages) {
--			printk(KERN_ALERT "%s: out of memory\n", __func__);
--			del_timer(&netbk->netbk_tx_pending_timer);
--			del_timer(&netbk->net_timer);
--			rc = -ENOMEM;
--			goto failed_init;
--		}
--
--		for (i = 0; i < MAX_PENDING_REQS; i++) {
--			page = netbk->mmap_pages[i];
--			SetPageForeign(page, netif_page_release);
--			set_page_ext(page, netbk, i);
--			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
--		}
--#endif
--
- 		netbk->pending_cons = 0;
- 		netbk->pending_prod = MAX_PENDING_REQS;
- 		for (i = 0; i < MAX_PENDING_REQS; i++)
-@@ -1926,11 +1575,6 @@ static int __init netback_init(void)
- 
- 		if (IS_ERR(netbk->task)) {
- 			printk(KERN_ALERT "kthread_run() fails at netback\n");
--#ifdef HAVE_XEN_PAGE_FOREIGN
--			free_empty_pages_and_pagevec(netbk->mmap_pages,
--						     MAX_PENDING_REQS);
--			del_timer(&netbk->netbk_tx_pending_timer);
--#endif
- 			del_timer(&netbk->net_timer);
- 			rc = PTR_ERR(netbk->task);
- 			goto failed_init;
-@@ -1938,9 +1582,6 @@ static int __init netback_init(void)
- 
- 		kthread_bind(netbk->task, group);
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--		INIT_LIST_HEAD(&netbk->pending_inuse_head);
--#endif
- 		INIT_LIST_HEAD(&netbk->net_schedule_list);
- 
- 		spin_lock_init(&netbk->net_schedule_list_lock);
-@@ -1950,17 +1591,6 @@ static int __init netback_init(void)
- 		wake_up_process(netbk->task);
- 	}
- 
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
--	if (MODPARM_copy_skb) {
--		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
--					      NULL, 0))
--			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
--		else
--			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
--	}
--#endif
--
- 	rc = xenvif_xenbus_init();
- 	if (rc)
- 		goto failed_init;
-@@ -1970,17 +1600,11 @@ static int __init netback_init(void)
- failed_init:
- 	for (i = 0; i < group; i++) {
- 		struct xen_netbk *netbk = &xen_netbk[i];
--#ifdef HAVE_XEN_PAGE_FOREIGN
--		free_empty_pages_and_pagevec(netbk->mmap_pages,
--				MAX_PENDING_REQS);
--		del_timer(&netbk->netbk_tx_pending_timer);
--#else
- 		int j;
- 		for (j = 0; j < MAX_PENDING_REQS; j++) {
- 			if (netbk->mmap_pages[i])
- 				__free_page(netbk->mmap_pages[i]);
- 		}
--#endif
- 		del_timer(&netbk->net_timer);
- 		kthread_stop(netbk->task);
- 	}
-diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
-index d56b1ee..22b8c35 100644
---- a/drivers/net/xen-netback/xenbus.c
-+++ b/drivers/net/xen-netback/xenbus.c
-@@ -74,10 +74,6 @@ static int netback_probe(struct xenbus_device *dev,
- 	dev_set_drvdata(&dev->dev, be);
- 
- 	sg = 1;
--#ifdef HAVE_XEN_PAGE_FOREIGN
--	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
--		sg = 0;
--#endif
- 
- 	do {
- 		err = xenbus_transaction_start(&xbt);
--- 
-1.7.4
-
-
-From dc5f219e88294b93009eef946251251ffffb6d60 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx at linutronix.de>
-Date: Fri, 4 Feb 2011 13:19:20 +0100
-Subject: [PATCH 101/203] genirq: Add IRQF_FORCE_RESUME
-
-Xen needs to reenable interrupts which are marked IRQF_NO_SUSPEND in the
-resume path. Add a flag to force the reenabling in the resume code.
-
-Tested-and-acked-by: Ian Campbell <Ian.Campbell at eu.citrix.com>
-Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
----
- include/linux/interrupt.h |    3 ++-
- kernel/irq/manage.c       |   11 ++++++++++-
- kernel/irq/pm.c           |    3 ---
- 3 files changed, 12 insertions(+), 5 deletions(-)
-
-diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
-index 55e0d42..d746da1 100644
---- a/include/linux/interrupt.h
-+++ b/include/linux/interrupt.h
-@@ -55,7 +55,7 @@
-  *                Used by threaded interrupts which need to keep the
-  *                irq line disabled until the threaded handler has been run.
-  * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
-- *
-+ * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
-  */
- #define IRQF_DISABLED		0x00000020
- #define IRQF_SAMPLE_RANDOM	0x00000040
-@@ -67,6 +67,7 @@
- #define IRQF_IRQPOLL		0x00001000
- #define IRQF_ONESHOT		0x00002000
- #define IRQF_NO_SUSPEND		0x00004000
-+#define IRQF_FORCE_RESUME	0x00008000
- 
- #define IRQF_TIMER		(__IRQF_TIMER | IRQF_NO_SUSPEND)
- 
-diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
-index 0caa59f..b4198ee 100644
---- a/kernel/irq/manage.c
-+++ b/kernel/irq/manage.c
-@@ -282,8 +282,17 @@ EXPORT_SYMBOL(disable_irq);
- 
- void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
- {
--	if (resume)
-+	if (resume) {
-+		if (!(desc->status & IRQ_SUSPENDED)) {
-+			if (!desc->action)
-+				return;
-+			if (!(desc->action->flags & IRQF_FORCE_RESUME))
-+				return;
-+			/* Pretend that it got disabled ! */
-+			desc->depth++;
-+		}
- 		desc->status &= ~IRQ_SUSPENDED;
-+	}
- 
- 	switch (desc->depth) {
- 	case 0:
-diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
-index 0d4005d8..d6bfb89 100644
---- a/kernel/irq/pm.c
-+++ b/kernel/irq/pm.c
-@@ -53,9 +53,6 @@ void resume_device_irqs(void)
- 	for_each_irq_desc(irq, desc) {
- 		unsigned long flags;
- 
--		if (!(desc->status & IRQ_SUSPENDED))
--			continue;
--
- 		raw_spin_lock_irqsave(&desc->lock, flags);
- 		__enable_irq(desc, irq, true);
- 		raw_spin_unlock_irqrestore(&desc->lock, flags);
--- 
-1.7.4
-
-
-From bb70b10fcebc2a02591a1da53e8d6873582b421e Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 18 Jan 2011 20:17:10 -0500
-Subject: [PATCH 102/203] xen/mmu: WARN_ON when racing to swap middle leaf.
-
-The initial bootup code uses set_phys_to_machine quite a lot, and after
-bootup it would be used by the balloon driver. The balloon driver does have
-mutex lock so this should not be necessary - but just in case, add
-a WARN_ON if we do hit this scenario. If we do fail this, it is OK
-to continue as there is a backup mechanism (VM_IO) that can bypass
-the P2M and still set the _PAGE_IOMAP flags.
-
-[v2: Change from WARN to BUG_ON]
-[v3: Rebased on top of xen->p2m code split]
-[v4: Change from BUG_ON to WARN]
-Reviewed-by: Ian Campbell <Ian.Campbell at eu.citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/xen/p2m.c |    3 ++-
- 1 files changed, 2 insertions(+), 1 deletions(-)
-
-diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
-index dd30ec8..851da14 100644
---- a/arch/x86/xen/p2m.c
-+++ b/arch/x86/xen/p2m.c
-@@ -482,7 +482,8 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
- 
- 		/* Swap over from MISSING to IDENTITY if needed. */
- 		if (p2m_top[topidx][mididx] == p2m_missing) {
--			p2m_top[topidx][mididx] = p2m_identity;
-+			WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
-+				p2m_identity) != p2m_missing);
- 			return true;
- 		}
- 	}
--- 
-1.7.4
-
-
-From da60f4ccf61268444fd83d5e266ffe26d5ce366d Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 1 Feb 2011 17:15:30 -0500
-Subject: [PATCH 103/203] xen/setup: Set identity mapping for non-RAM E820 and E820 gaps.
-
-We walk the E820 region and start at 0 (for PV guests we start
-at ISA_END_ADDRESS) and skip any E820 RAM regions. For all other
-regions and as well the gaps we set them to be identity mappings.
-
-The reasons we do not want to set the identity mapping from 0->
-ISA_END_ADDRESS when running as PV is b/c that the kernel would
-try to read DMI information and fail (no permissions to read that).
-There is a lot of gnarly code to deal with that weird region so
-we won't try to do a cleanup in this patch.
-
-This code ends up calling 'set_phys_to_identity' with the start
-and end PFN of the the E820 that are non-RAM or have gaps.
-On 99% of machines that means one big region right underneath the
-4GB mark. Usually starts at 0xc0000 (or 0x80000) and goes to
-0x100000.
-
-[v2: Fix for E820 crossing 1MB region and clamp the start]
-[v3: Squshed in code that does this over ranges]
-[v4: Moved the comment to the correct spot]
-[v5: Use the "raw" E820 from the hypervisor]
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/xen/setup.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++++
- 1 files changed, 52 insertions(+), 0 deletions(-)
-
-diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
-index 7201800..54d9379 100644
---- a/arch/x86/xen/setup.c
-+++ b/arch/x86/xen/setup.c
-@@ -143,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
- 	return released;
- }
- 
-+static unsigned long __init xen_set_identity(const struct e820entry *list,
-+					     ssize_t map_size)
-+{
-+	phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
-+	phys_addr_t start_pci = last;
-+	const struct e820entry *entry;
-+	unsigned long identity = 0;
-+	int i;
-+
-+	for (i = 0, entry = list; i < map_size; i++, entry++) {
-+		phys_addr_t start = entry->addr;
-+		phys_addr_t end = start + entry->size;
-+
-+		if (start < last)
-+			start = last;
-+
-+		if (end <= start)
-+			continue;
-+
-+		/* Skip over the 1MB region. */
-+		if (last > end)
-+			continue;
-+
-+		if (entry->type == E820_RAM) {
-+			if (start > start_pci)
-+				identity += set_phys_range_identity(
-+						PFN_UP(start_pci), PFN_DOWN(start));
-+
-+			/* Without saving 'last' we would gooble RAM too
-+			 * at the end of the loop. */
-+			last = end;
-+			start_pci = end;
-+			continue;
-+		}
-+		start_pci = min(start, start_pci);
-+		last = end;
-+	}
-+	if (last > start_pci)
-+		identity += set_phys_range_identity(
-+					PFN_UP(start_pci), PFN_DOWN(last));
-+	return identity;
-+}
- /**
-  * machine_specific_memory_setup - Hook for machine specific memory setup.
-  **/
- char * __init xen_memory_setup(void)
- {
- 	static struct e820entry map[E820MAX] __initdata;
-+	static struct e820entry map_raw[E820MAX] __initdata;
- 
- 	unsigned long max_pfn = xen_start_info->nr_pages;
- 	unsigned long long mem_end;
-@@ -156,6 +199,7 @@ char * __init xen_memory_setup(void)
- 	struct xen_memory_map memmap;
- 	unsigned long extra_pages = 0;
- 	unsigned long extra_limit;
-+	unsigned long identity_pages = 0;
- 	int i;
- 	int op;
- 
-@@ -181,6 +225,7 @@ char * __init xen_memory_setup(void)
- 	}
- 	BUG_ON(rc);
- 
-+	memcpy(map_raw, map, sizeof(map));
- 	e820.nr_map = 0;
- 	xen_extra_mem_start = mem_end;
- 	for (i = 0; i < memmap.nr_entries; i++) {
-@@ -251,6 +296,13 @@ char * __init xen_memory_setup(void)
- 
- 	xen_add_extra_mem(extra_pages);
- 
-+	/*
-+	 * Set P2M for all non-RAM pages and E820 gaps to be identity
-+	 * type PFNs. We supply it with the non-sanitized version
-+	 * of the E820.
-+	 */
-+	identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
-+	printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
- 	return "Xen";
- }
- 
--- 
-1.7.4
-
-
-From 4f5d3a046949c958338ca780145dae21647c8d58 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 22 Dec 2010 08:57:30 -0500
-Subject: [PATCH 104/203] xen/debugfs: Add 'p2m' file for printing out the P2M layout.
-
-We walk over the whole P2M tree and construct a simplified view of
-which PFN regions belong to what level and what type they are.
-
-Only enabled if CONFIG_XEN_DEBUG_FS is set.
-
-[v2: UNKN->UNKNOWN, use uninitialized_var]
-[v3: Rebased on top of mmu->p2m code split]
-[v4: Fixed the else if]
-Reviewed-by: Ian Campbell <Ian.Campbell at eu.citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/include/asm/xen/page.h |    3 +
- arch/x86/xen/mmu.c              |   14 +++++++
- arch/x86/xen/p2m.c              |   78 +++++++++++++++++++++++++++++++++++++++
- 3 files changed, 95 insertions(+), 0 deletions(-)
-
-diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
-index 65fa4f2..78ebbeb 100644
---- a/arch/x86/include/asm/xen/page.h
-+++ b/arch/x86/include/asm/xen/page.h
-@@ -52,6 +52,9 @@ extern int m2p_remove_override(struct page *page);
- extern struct page *m2p_find_override(unsigned long mfn);
- extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
- 
-+#ifdef CONFIG_XEN_DEBUG_FS
-+extern int p2m_dump_show(struct seq_file *m, void *v);
-+#endif
- static inline unsigned long pfn_to_mfn(unsigned long pfn)
- {
- 	unsigned long mfn;
-diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
-index 9c9e076..b13b6ca 100644
---- a/arch/x86/xen/mmu.c
-+++ b/arch/x86/xen/mmu.c
-@@ -46,6 +46,7 @@
- #include <linux/module.h>
- #include <linux/gfp.h>
- #include <linux/memblock.h>
-+#include <linux/seq_file.h>
- 
- #include <asm/pgtable.h>
- #include <asm/tlbflush.h>
-@@ -2367,6 +2368,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
- 
- #ifdef CONFIG_XEN_DEBUG_FS
- 
-+static int p2m_dump_open(struct inode *inode, struct file *filp)
-+{
-+	return single_open(filp, p2m_dump_show, NULL);
-+}
-+
-+static const struct file_operations p2m_dump_fops = {
-+	.open		= p2m_dump_open,
-+	.read		= seq_read,
-+	.llseek		= seq_lseek,
-+	.release	= single_release,
-+};
-+
- static struct dentry *d_mmu_debug;
- 
- static int __init xen_mmu_debugfs(void)
-@@ -2422,6 +2435,7 @@ static int __init xen_mmu_debugfs(void)
- 	debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
- 			   &mmu_stats.prot_commit_batched);
- 
-+	debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
- 	return 0;
- }
- fs_initcall(xen_mmu_debugfs);
-diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
-index 851da14..ea613d0 100644
---- a/arch/x86/xen/p2m.c
-+++ b/arch/x86/xen/p2m.c
-@@ -30,6 +30,7 @@
- #include <linux/list.h>
- #include <linux/hash.h>
- #include <linux/sched.h>
-+#include <linux/seq_file.h>
- 
- #include <asm/cache.h>
- #include <asm/setup.h>
-@@ -635,3 +636,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
- 	return ret;
- }
- EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
-+
-+#ifdef CONFIG_XEN_DEBUG_FS
-+
-+int p2m_dump_show(struct seq_file *m, void *v)
-+{
-+	static const char * const level_name[] = { "top", "middle",
-+						"entry", "abnormal" };
-+	static const char * const type_name[] = { "identity", "missing",
-+						"pfn", "abnormal"};
-+#define TYPE_IDENTITY 0
-+#define TYPE_MISSING 1
-+#define TYPE_PFN 2
-+#define TYPE_UNKNOWN 3
-+	unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
-+	unsigned int uninitialized_var(prev_level);
-+	unsigned int uninitialized_var(prev_type);
-+
-+	if (!p2m_top)
-+		return 0;
-+
-+	for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
-+		unsigned topidx = p2m_top_index(pfn);
-+		unsigned mididx = p2m_mid_index(pfn);
-+		unsigned idx = p2m_index(pfn);
-+		unsigned lvl, type;
-+
-+		lvl = 4;
-+		type = TYPE_UNKNOWN;
-+		if (p2m_top[topidx] == p2m_mid_missing) {
-+			lvl = 0; type = TYPE_MISSING;
-+		} else if (p2m_top[topidx] == NULL) {
-+			lvl = 0; type = TYPE_UNKNOWN;
-+		} else if (p2m_top[topidx][mididx] == NULL) {
-+			lvl = 1; type = TYPE_UNKNOWN;
-+		} else if (p2m_top[topidx][mididx] == p2m_identity) {
-+			lvl = 1; type = TYPE_IDENTITY;
-+		} else if (p2m_top[topidx][mididx] == p2m_missing) {
-+			lvl = 1; type = TYPE_MISSING;
-+		} else if (p2m_top[topidx][mididx][idx] == 0) {
-+			lvl = 2; type = TYPE_UNKNOWN;
-+		} else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
-+			lvl = 2; type = TYPE_IDENTITY;
-+		} else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
-+			lvl = 2; type = TYPE_MISSING;
-+		} else if (p2m_top[topidx][mididx][idx] == pfn) {
-+			lvl = 2; type = TYPE_PFN;
-+		} else if (p2m_top[topidx][mididx][idx] != pfn) {
-+			lvl = 2; type = TYPE_PFN;
-+		}
-+		if (pfn == 0) {
-+			prev_level = lvl;
-+			prev_type = type;
-+		}
-+		if (pfn == MAX_DOMAIN_PAGES-1) {
-+			lvl = 3;
-+			type = TYPE_UNKNOWN;
-+		}
-+		if (prev_type != type) {
-+			seq_printf(m, " [0x%lx->0x%lx] %s\n",
-+				prev_pfn_type, pfn, type_name[prev_type]);
-+			prev_pfn_type = pfn;
-+			prev_type = type;
-+		}
-+		if (prev_level != lvl) {
-+			seq_printf(m, " [0x%lx->0x%lx] level %s\n",
-+				prev_pfn_level, pfn, level_name[prev_level]);
-+			prev_pfn_level = pfn;
-+			prev_level = lvl;
-+		}
-+	}
-+	return 0;
-+#undef TYPE_IDENTITY
-+#undef TYPE_MISSING
-+#undef TYPE_PFN
-+#undef TYPE_UNKNOWN
-+}
-+#endif
--- 
-1.7.4
-
-
-From 84f847d2e878b4d15e7552e86f20251a2d0d4522 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 23 Dec 2010 16:25:29 -0500
-Subject: [PATCH 105/203] xen/debug: WARN_ON when identity PFN has no _PAGE_IOMAP flag set.
-
-Only enabled if XEN_DEBUG is enabled. We print a warning
-when:
-
- pfn_to_mfn(pfn) == pfn, but no VM_IO (_PAGE_IOMAP) flag set
-	(and pfn is an identity mapped pfn)
- pfn_to_mfn(pfn) != pfn, and VM_IO flag is set.
-	(ditto, pfn is an identity mapped pfn)
-
-[v2: Make it dependent on CONFIG_XEN_DEBUG instead of ..DEBUG_FS]
-[v3: Fix compiler warning]
-
-Reviewed-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/xen/Kconfig |    8 ++++++++
- arch/x86/xen/mmu.c   |   38 ++++++++++++++++++++++++++++++++++++++
- 2 files changed, 46 insertions(+), 0 deletions(-)
-
-diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
-index 5b54892..e4343fe 100644
---- a/arch/x86/xen/Kconfig
-+++ b/arch/x86/xen/Kconfig
-@@ -48,3 +48,11 @@ config XEN_DEBUG_FS
- 	help
- 	  Enable statistics output and various tuning options in debugfs.
- 	  Enabling this option may incur a significant performance overhead.
-+
-+config XEN_DEBUG
-+	bool "Enable Xen debug checks"
-+	depends on XEN
-+	default n
-+	help
-+	  Enable various WARN_ON checks in the Xen MMU code.
-+	  Enabling this option WILL incur a significant performance overhead.
-diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
-index b13b6ca..0c376a2 100644
---- a/arch/x86/xen/mmu.c
-+++ b/arch/x86/xen/mmu.c
-@@ -547,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte)
- }
- PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
- 
-+#ifdef CONFIG_XEN_DEBUG
-+pte_t xen_make_pte_debug(pteval_t pte)
-+{
-+	phys_addr_t addr = (pte & PTE_PFN_MASK);
-+	phys_addr_t other_addr;
-+	bool io_page = false;
-+	pte_t _pte;
-+
-+	if (pte & _PAGE_IOMAP)
-+		io_page = true;
-+
-+	_pte = xen_make_pte(pte);
-+
-+	if (!addr)
-+		return _pte;
-+
-+	if (io_page &&
-+	    (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
-+		other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
-+		WARN(addr != other_addr,
-+			"0x%lx is using VM_IO, but it is 0x%lx!\n",
-+			(unsigned long)addr, (unsigned long)other_addr);
-+	} else {
-+		pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
-+		other_addr = (_pte.pte & PTE_PFN_MASK);
-+		WARN((addr == other_addr) && (!io_page) && (!iomap_set),
-+			"0x%lx is missing VM_IO (and wasn't fixed)!\n",
-+			(unsigned long)addr);
-+	}
-+
-+	return _pte;
-+}
-+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
-+#endif
-+
- pgd_t xen_make_pgd(pgdval_t pgd)
- {
- 	pgd = pte_pfn_to_mfn(pgd);
-@@ -1957,6 +1992,9 @@ __init void xen_ident_map_ISA(void)
- 
- static __init void xen_post_allocator_init(void)
- {
-+#ifdef CONFIG_XEN_DEBUG
-+	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
-+#endif
- 	pv_mmu_ops.set_pte = xen_set_pte;
- 	pv_mmu_ops.set_pmd = xen_set_pmd;
- 	pv_mmu_ops.set_pud = xen_set_pud;
--- 
-1.7.4
-
-
-From 5c560d90ed68b399d5020692d38e6cc77db4f6d0 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Fri, 14 Jan 2011 17:55:44 -0500
-Subject: [PATCH 106/203] xen/m2p: No need to catch exceptions when we know that there is no RAM
-
-.. beyound what we think is the end of memory. However there might
-be more System RAM - but assigned to a guest. Hence jump to the
-M2P override check and consult.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/include/asm/xen/page.h |    6 +++++-
- 1 files changed, 5 insertions(+), 1 deletions(-)
-
-diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
-index 78ebbeb..1957070 100644
---- a/arch/x86/include/asm/xen/page.h
-+++ b/arch/x86/include/asm/xen/page.h
-@@ -85,6 +85,10 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
- 	if (xen_feature(XENFEAT_auto_translated_physmap))
- 		return mfn;
- 
-+	if (unlikely((mfn >> machine_to_phys_order) != 0)) {
-+		pfn = ~0;
-+		goto try_override;
-+	}
- 	pfn = 0;
- 	/*
- 	 * The array access can fail (e.g., device space beyond end of RAM).
-@@ -92,7 +96,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
- 	 * but we must handle the fault without crashing!
- 	 */
- 	__get_user(pfn, &machine_to_phys_mapping[mfn]);
--
-+try_override:
- 	/*
- 	 * If this appears to be a foreign mfn (because the pfn
- 	 * doesn't map back to the mfn), then check the local override
--- 
-1.7.4
-
-
-From ba114b8b8112f1fa4b14c929a85ebf51c9c1a749 Mon Sep 17 00:00:00 2001
-From: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
-Date: Wed, 2 Feb 2011 18:32:59 +0000
-Subject: [PATCH 107/203] xen/m2p: Check whether the MFN has IDENTITY_FRAME bit set..
-
-If there is no proper PFN value in the M2P for the MFN
-(so we get 0xFFFFF.. or 0x55555, or 0x0), we should
-consult the M2P override to see if there is an entry for this.
-[Note: we also consult the M2P override if the MFN
-is past our machine_to_phys size].
-
-We consult the P2M with the PFN. In case the returned
-MFN is one of the special values: 0xFFF.., 0x5555
-(which signify that the MFN can be either "missing" or it
-belongs to DOMID_IO) or the p2m(m2p(mfn)) != mfn, we check
-the M2P override. If we fail the M2P override check, we reset
-the PFN value to INVALID_P2M_ENTRY.
-
-Next we try to find the MFN in the P2M using the MFN
-value (not the PFN value) and if found, we know
-that this MFN is an identity value and return it as so.
-
-Otherwise we have exhausted all the posibilities and we
-return the PFN, which at this stage can either be a real
-PFN value found in the machine_to_phys.. array, or
-INVALID_P2M_ENTRY value.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/include/asm/xen/page.h |   29 ++++++++++++++++++++++-------
- 1 files changed, 22 insertions(+), 7 deletions(-)
-
-diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
-index 1957070..c61934f 100644
---- a/arch/x86/include/asm/xen/page.h
-+++ b/arch/x86/include/asm/xen/page.h
-@@ -81,6 +81,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
- static inline unsigned long mfn_to_pfn(unsigned long mfn)
- {
- 	unsigned long pfn;
-+	int ret = 0;
- 
- 	if (xen_feature(XENFEAT_auto_translated_physmap))
- 		return mfn;
-@@ -95,15 +96,29 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
- 	 * In such cases it doesn't matter what we return (we return garbage),
- 	 * but we must handle the fault without crashing!
- 	 */
--	__get_user(pfn, &machine_to_phys_mapping[mfn]);
-+	ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
- try_override:
--	/*
--	 * If this appears to be a foreign mfn (because the pfn
--	 * doesn't map back to the mfn), then check the local override
--	 * table to see if there's a better pfn to use.
-+	/* ret might be < 0 if there are no entries in the m2p for mfn */
-+	if (ret < 0)
-+		pfn = ~0;
-+	else if (get_phys_to_machine(pfn) != mfn)
-+		/*
-+		 * If this appears to be a foreign mfn (because the pfn
-+		 * doesn't map back to the mfn), then check the local override
-+		 * table to see if there's a better pfn to use.
-+		 *
-+		 * m2p_find_override_pfn returns ~0 if it doesn't find anything.
-+		 */
-+		pfn = m2p_find_override_pfn(mfn, ~0);
-+
-+	/* 
-+	 * pfn is ~0 if there are no entries in the m2p for mfn or if the
-+	 * entry doesn't map back to the mfn and m2p_override doesn't have a
-+	 * valid entry for it.
- 	 */
--	if (get_phys_to_machine(pfn) != mfn)
--		pfn = m2p_find_override_pfn(mfn, pfn);
-+	if (pfn == ~0 &&
-+			get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
-+		pfn = mfn;
- 
- 	return pfn;
- }
--- 
-1.7.4
-
-
-From d0783a02ef1bf28650f789b5b37ec1df42b53821 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 9 Dec 2010 15:01:11 -0500
-Subject: [PATCH 108/203] xen/irq: Don't fall over when nr_irqs_gsi > nr_irqs.
-
-This scenario where the nr_irq_gsi is greater than nr_irqs
-is rather strange but lets still try to survive. Make sure
-to print a warning so the user wouldn't be surprised in case
-things don't work.
-
-Solves a bootup-crash when booting Xen and Linux under QEMU.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |    9 +++++++++
- 1 files changed, 9 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 7468147..6de2d76 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -414,6 +414,7 @@ static int find_unbound_irq(void)
- 	if (bottom == nr_irqs)
- 		goto no_irqs;
- 
-+retry:
- 	/* This loop starts from the top of IRQ space and goes down.
- 	 * We need this b/c if we have a PCI device in a Xen PV guest
- 	 * we do not have an IO-APIC (though the backend might have them)
-@@ -437,6 +438,14 @@ static int find_unbound_irq(void)
- 		goto no_irqs;
- 
- 	res = irq_alloc_desc_at(irq, -1);
-+	if (res == -EEXIST) {
-+		top--;
-+		if (bottom > top)
-+			printk(KERN_ERR "Eating in GSI/MSI space (%d)!" \
-+				" Your PCI device might not work!\n", top);
-+		if (top > NR_IRQS_LEGACY)
-+			goto retry;
-+	}
- 
- 	if (WARN_ON(res != irq))
- 		return -1;
--- 
-1.7.4
-
-
-From 4ab08528537b78a1b9696e9b2757d0e818d157a0 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 11 Jan 2011 17:20:13 +0000
-Subject: [PATCH 109/203] xen: handled remapped IRQs when enabling a pcifront PCI device.
-
-This happens to not be an issue currently because we take pains to try
-to ensure that the GSI-IRQ mapping is 1-1 in a PV guest and that
-regular event channels do not clash. However a subsequent patch is
-going to break this 1-1 mapping.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Cc: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
-Cc: Jeremy Fitzhardinge <jeremy at goop.org>
----
- arch/x86/pci/xen.c |   22 ++++++++++++++--------
- 1 files changed, 14 insertions(+), 8 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 25cd4a0..2a12f3d 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -226,21 +226,27 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev)
- {
- 	int rc;
- 	int share = 1;
-+	u8 gsi;
- 
--	dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
--
--	if (dev->irq < 0)
--		return -EINVAL;
-+	rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
-+	if (rc < 0) {
-+		dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
-+			 rc);
-+		return rc;
-+	}
- 
--	if (dev->irq < NR_IRQS_LEGACY)
-+	if (gsi < NR_IRQS_LEGACY)
- 		share = 0;
- 
--	rc = xen_allocate_pirq(dev->irq, share, "pcifront");
-+	rc = xen_allocate_pirq(gsi, share, "pcifront");
- 	if (rc < 0) {
--		dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
--			 dev->irq, rc);
-+		dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
-+			 gsi, rc);
- 		return rc;
- 	}
-+
-+	dev->irq = rc;
-+	dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
- 	return 0;
- }
- 
--- 
-1.7.4
-
-
-From 449913e7b40a40602cdc080ef3958736077d2914 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 11 Jan 2011 17:20:14 +0000
-Subject: [PATCH 110/203] xen:events: move find_unbound_irq inside CONFIG_PCI_MSI
-
-The only caller is xen_allocate_pirq_msi which is also under this
-ifdef so this fixes:
-    drivers/xen/events.c:377: warning: 'find_unbound_pirq' defined but not used
-when CONFIG_PCI_MSI=n
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Cc: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
-Cc: Jeremy Fitzhardinge <jeremy at goop.org>
----
- drivers/xen/events.c |   34 +++++++++++++++++-----------------
- 1 files changed, 17 insertions(+), 17 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 6de2d76..8e28c01 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -387,23 +387,6 @@ static int get_nr_hw_irqs(void)
- 	return ret;
- }
- 
--static int find_unbound_pirq(int type)
--{
--	int rc, i;
--	struct physdev_get_free_pirq op_get_free_pirq;
--	op_get_free_pirq.type = type;
--
--	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
--	if (!rc)
--		return op_get_free_pirq.pirq;
--
--	for (i = 0; i < nr_irqs; i++) {
--		if (pirq_to_irq[i] < 0)
--			return i;
--	}
--	return -1;
--}
--
- static int find_unbound_irq(void)
- {
- 	struct irq_data *data;
-@@ -686,6 +669,23 @@ out:
- #include <linux/msi.h>
- #include "../pci/msi.h"
- 
-+static int find_unbound_pirq(int type)
-+{
-+	int rc, i;
-+	struct physdev_get_free_pirq op_get_free_pirq;
-+	op_get_free_pirq.type = type;
-+
-+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
-+	if (!rc)
-+		return op_get_free_pirq.pirq;
-+
-+	for (i = 0; i < nr_irqs; i++) {
-+		if (pirq_to_irq[i] < 0)
-+			return i;
-+	}
-+	return -1;
-+}
-+
- void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
- {
- 	spin_lock(&irq_mapping_update_lock);
--- 
-1.7.4
-
-
-From 5a61eb864cdd63b8db29da707aa8e33b8dd54578 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 11 Jan 2011 17:20:15 +0000
-Subject: [PATCH 111/203] xen: events: add xen_allocate_irq_{dynamic, gsi} and xen_free_irq
-
-This is neater than open-coded calls to irq_alloc_desc_at and
-irq_free_desc.
-
-No intended behavioural change.
-
-Note that we previously were not checking the return value of
-irq_alloc_desc_at which would be failing for GSI<NR_IRQS_LEGACY
-because the core architecture code has already allocated those for
-us. Hence the additional check against NR_IRQS_LEGACY in
-xen_allocate_irq_gsi.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Cc: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
-Cc: Jeremy Fitzhardinge <jeremy at goop.org>
----
- drivers/xen/events.c |   53 +++++++++++++++++++++++++++++++++-----------------
- 1 files changed, 35 insertions(+), 18 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 8e28c01..e74bdcf 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -387,7 +387,7 @@ static int get_nr_hw_irqs(void)
- 	return ret;
- }
- 
--static int find_unbound_irq(void)
-+static int xen_allocate_irq_dynamic(void)
- {
- 	struct irq_data *data;
- 	int irq, res;
-@@ -445,6 +445,30 @@ static bool identity_mapped_irq(unsigned irq)
- 	return irq < get_nr_hw_irqs();
- }
- 
-+static int xen_allocate_irq_gsi(unsigned gsi)
-+{
-+	int irq;
-+
-+	if (!identity_mapped_irq(gsi) &&
-+	    (xen_initial_domain() || !xen_pv_domain()))
-+		return xen_allocate_irq_dynamic();
-+
-+	/* Legacy IRQ descriptors are already allocated by the arch. */
-+	if (gsi < NR_IRQS_LEGACY)
-+		return gsi;
-+
-+	irq = irq_alloc_desc_at(gsi, -1);
-+	if (irq < 0)
-+		panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq);
-+
-+	return irq;
-+}
-+
-+static void xen_free_irq(unsigned irq)
-+{
-+	irq_free_desc(irq);
-+}
-+
- static void pirq_unmask_notify(int irq)
- {
- 	struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) };
-@@ -630,14 +654,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
- 		goto out;	/* XXX need refcount? */
- 	}
- 
--	/* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
--	 * we are using the !xen_initial_domain() to drop in the function.*/
--	if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
--				xen_pv_domain())) {
--		irq = gsi;
--		irq_alloc_desc_at(irq, -1);
--	} else
--		irq = find_unbound_irq();
-+	irq = xen_allocate_irq_gsi(gsi);
- 
- 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
- 				      handle_level_irq, name);
-@@ -650,7 +667,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
- 	 * this in the priv domain. */
- 	if (xen_initial_domain() &&
- 	    HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
--		irq_free_desc(irq);
-+		xen_free_irq(irq);
- 		irq = -ENOSPC;
- 		goto out;
- 	}
-@@ -691,7 +708,7 @@ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
- 	spin_lock(&irq_mapping_update_lock);
- 
- 	if (alloc & XEN_ALLOC_IRQ) {
--		*irq = find_unbound_irq();
-+		*irq = xen_allocate_irq_dynamic();
- 		if (*irq == -1)
- 			goto out;
- 	}
-@@ -741,7 +758,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
- 
- 	spin_lock(&irq_mapping_update_lock);
- 
--	irq = find_unbound_irq();
-+	irq = xen_allocate_irq_dynamic();
- 
- 	if (irq == -1)
- 		goto out;
-@@ -750,7 +767,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
- 	if (rc) {
- 		printk(KERN_WARNING "xen map irq failed %d\n", rc);
- 
--		irq_free_desc(irq);
-+		xen_free_irq(irq);
- 
- 		irq = -1;
- 		goto out;
-@@ -792,7 +809,7 @@ int xen_destroy_irq(int irq)
- 	}
- 	irq_info[irq] = mk_unbound_info();
- 
--	irq_free_desc(irq);
-+	xen_free_irq(irq);
- 
- out:
- 	spin_unlock(&irq_mapping_update_lock);
-@@ -823,7 +840,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
- 	irq = evtchn_to_irq[evtchn];
- 
- 	if (irq == -1) {
--		irq = find_unbound_irq();
-+		irq = xen_allocate_irq_dynamic();
- 
- 		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
- 					      handle_fasteoi_irq, "event");
-@@ -848,7 +865,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
- 	irq = per_cpu(ipi_to_irq, cpu)[ipi];
- 
- 	if (irq == -1) {
--		irq = find_unbound_irq();
-+		irq = xen_allocate_irq_dynamic();
- 		if (irq < 0)
- 			goto out;
- 
-@@ -884,7 +901,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
- 	irq = per_cpu(virq_to_irq, cpu)[virq];
- 
- 	if (irq == -1) {
--		irq = find_unbound_irq();
-+		irq = xen_allocate_irq_dynamic();
- 
- 		set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
- 					      handle_percpu_irq, "virq");
-@@ -943,7 +960,7 @@ static void unbind_from_irq(unsigned int irq)
- 	if (irq_info[irq].type != IRQT_UNBOUND) {
- 		irq_info[irq] = mk_unbound_info();
- 
--		irq_free_desc(irq);
-+		xen_free_irq(irq);
- 	}
- 
- 	spin_unlock(&irq_mapping_update_lock);
--- 
-1.7.4
-
-
-From 026c9d2d0d75ec51b251aac4ca8855f4fae6af23 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Tue, 11 Jan 2011 17:20:16 +0000
-Subject: [PATCH 112/203] xen: events: allocate GSIs and dynamic IRQs from separate IRQ ranges.
-
-There are three cases which we need to care about, PV guest, PV domain
-0 and HVM guest.
-
-The PV guest case is simple since it has no access to ACPI or real
-APICs and therefore has no GSIs therefore we simply dynamically
-allocate all IRQs. The potentially interesting case here is PIRQ type
-event channels associated with passed through PCI devices. However
-even in this case the guest has no direct interaction with the
-physical GSI since that happens in the PCI backend.
-
-The PV domain 0 and HVM guest cases are actually the same. In domain 0
-case the kernel sees the host ACPI and GSIs (although it only sees the
-APIC indirectly via the hypervisor) and in the HVM guest case it sees
-the virtualised ACPI and emulated APICs. In these cases we start
-allocating dynamic IRQs at nr_irqs_gsi so that they cannot clash with
-any GSI.
-
-Currently xen_allocate_irq_dynamic starts at nr_irqs and works
-backwards looking for a free IRQ in order to (try and) avoid clashing
-with GSIs used in domain 0 and in HVM guests. This change avoids that
-although we retain the behaviour of allowing dynamic IRQs to encroach
-on the GSI range if no suitable IRQs are available since a future IRQ
-clash is deemed preferable to failure right now.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Cc: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
-Cc: Jeremy Fitzhardinge <jeremy at goop.org>
----
- drivers/xen/events.c |   84 +++++++++++++++----------------------------------
- 1 files changed, 26 insertions(+), 58 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index e74bdcf..06f2e61 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -376,81 +376,49 @@ static void unmask_evtchn(int port)
- 	put_cpu();
- }
- 
--static int get_nr_hw_irqs(void)
-+static int xen_allocate_irq_dynamic(void)
- {
--	int ret = 1;
-+	int first = 0;
-+	int irq;
- 
- #ifdef CONFIG_X86_IO_APIC
--	ret = get_nr_irqs_gsi();
-+	/*
-+	 * For an HVM guest or domain 0 which see "real" (emulated or
-+	 * actual repectively) GSIs we allocate dynamic IRQs
-+	 * e.g. those corresponding to event channels or MSIs
-+	 * etc. from the range above those "real" GSIs to avoid
-+	 * collisions.
-+	 */
-+	if (xen_initial_domain() || xen_hvm_domain())
-+		first = get_nr_irqs_gsi();
- #endif
- 
--	return ret;
--}
--
--static int xen_allocate_irq_dynamic(void)
--{
--	struct irq_data *data;
--	int irq, res;
--	int bottom = get_nr_hw_irqs();
--	int top = nr_irqs-1;
--
--	if (bottom == nr_irqs)
--		goto no_irqs;
--
- retry:
--	/* This loop starts from the top of IRQ space and goes down.
--	 * We need this b/c if we have a PCI device in a Xen PV guest
--	 * we do not have an IO-APIC (though the backend might have them)
--	 * mapped in. To not have a collision of physical IRQs with the Xen
--	 * event channels start at the top of the IRQ space for virtual IRQs.
--	 */
--	for (irq = top; irq > bottom; irq--) {
--		data = irq_get_irq_data(irq);
--		/* only 15->0 have init'd desc; handle irq > 16 */
--		if (!data)
--			break;
--		if (data->chip == &no_irq_chip)
--			break;
--		if (data->chip != &xen_dynamic_chip)
--			continue;
--		if (irq_info[irq].type == IRQT_UNBOUND)
--			return irq;
--	}
-+	irq = irq_alloc_desc_from(first, -1);
- 
--	if (irq == bottom)
--		goto no_irqs;
--
--	res = irq_alloc_desc_at(irq, -1);
--	if (res == -EEXIST) {
--		top--;
--		if (bottom > top)
--			printk(KERN_ERR "Eating in GSI/MSI space (%d)!" \
--				" Your PCI device might not work!\n", top);
--		if (top > NR_IRQS_LEGACY)
--			goto retry;
-+	if (irq == -ENOMEM && first > NR_IRQS_LEGACY) {
-+		printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n");
-+		first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY);
-+		goto retry;
- 	}
- 
--	if (WARN_ON(res != irq))
--		return -1;
-+	if (irq < 0)
-+		panic("No available IRQ to bind to: increase nr_irqs!\n");
- 
- 	return irq;
--
--no_irqs:
--	panic("No available IRQ to bind to: increase nr_irqs!\n");
--}
--
--static bool identity_mapped_irq(unsigned irq)
--{
--	/* identity map all the hardware irqs */
--	return irq < get_nr_hw_irqs();
- }
- 
- static int xen_allocate_irq_gsi(unsigned gsi)
- {
- 	int irq;
- 
--	if (!identity_mapped_irq(gsi) &&
--	    (xen_initial_domain() || !xen_pv_domain()))
-+	/*
-+	 * A PV guest has no concept of a GSI (since it has no ACPI
-+	 * nor access to/knowledge of the physical APICs). Therefore
-+	 * all IRQs are dynamically allocated from the entire IRQ
-+	 * space.
-+	 */
-+	if (xen_pv_domain() && !xen_initial_domain())
- 		return xen_allocate_irq_dynamic();
- 
- 	/* Legacy IRQ descriptors are already allocated by the arch. */
--- 
-1.7.4
-
-
-From d042c4b63ab93fb3089f2cc9f6628ab91b7b4073 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <Ian.Campbell at eu.citrix.com>
-Date: Thu, 3 Feb 2011 09:49:35 +0000
-Subject: [PATCH 113/203] xen: events: do not free legacy IRQs
-
-c514d00c8057 "xen: events: add xen_allocate_irq_{dynamic, gsi} and
-xen_free_irq" correctly avoids reallocating legacy IRQs (which are
-managed by the arch core) but erroneously did not prevent them being
-freed.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |    4 ++++
- 1 files changed, 4 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 06f2e61..accb37a 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -434,6 +434,10 @@ static int xen_allocate_irq_gsi(unsigned gsi)
- 
- static void xen_free_irq(unsigned irq)
- {
-+	/* Legacy IRQ descriptors are managed by the arch. */
-+	if (irq < NR_IRQS_LEGACY)
-+		return;
-+
- 	irq_free_desc(irq);
- }
- 
--- 
-1.7.4
-
-
-From 6e435118ec43bbd781df48ceef101d8f1a48e699 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx at linutronix.de>
-Date: Sat, 5 Feb 2011 20:08:52 +0000
-Subject: [PATCH 114/203] xen: Remove stale irq_chip.end
-
-irq_chip.end got obsolete with the removal of __do_IRQ()
-
-Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
-Acked-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |   18 ------------------
- 1 files changed, 0 insertions(+), 18 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index accb37a..c8826b5 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -555,23 +555,6 @@ static void ack_pirq(unsigned int irq)
- 	}
- }
- 
--static void end_pirq(unsigned int irq)
--{
--	int evtchn = evtchn_from_irq(irq);
--	struct irq_desc *desc = irq_to_desc(irq);
--
--	if (WARN_ON(!desc))
--		return;
--
--	if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
--	    (IRQ_DISABLED|IRQ_PENDING)) {
--		shutdown_pirq(irq);
--	} else if (VALID_EVTCHN(evtchn)) {
--		unmask_evtchn(evtchn);
--		pirq_unmask_notify(irq);
--	}
--}
--
- static int find_irq_by_gsi(unsigned gsi)
- {
- 	int irq;
-@@ -1508,7 +1491,6 @@ static struct irq_chip xen_pirq_chip __read_mostly = {
- 	.mask		= disable_pirq,
- 
- 	.ack		= ack_pirq,
--	.end		= end_pirq,
- 
- 	.set_affinity	= set_affinity_irq,
- 
--- 
-1.7.4
-
-
-From ab1d37664458dd36cbf7e8a81cc4199b25937142 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx at linutronix.de>
-Date: Sat, 5 Feb 2011 20:08:54 +0000
-Subject: [PATCH 115/203] xen: Switch to new irq_chip functions
-
-Convert Xen to the new irq_chip functions. Brings us closer to enable
-CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
-
-Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
-Acked-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |   95 +++++++++++++++++++++++++++-----------------------
- 1 files changed, 51 insertions(+), 44 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index c8826b5..cf1712f 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -277,7 +277,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
- 
- 	BUG_ON(irq == -1);
- #ifdef CONFIG_SMP
--	cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
-+	cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
- #endif
- 
- 	clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
-@@ -294,7 +294,7 @@ static void init_evtchn_cpu_bindings(void)
- 
- 	/* By default all event channels notify CPU#0. */
- 	for_each_irq_desc(i, desc) {
--		cpumask_copy(desc->affinity, cpumask_of(0));
-+		cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
- 	}
- #endif
- 
-@@ -474,7 +474,7 @@ static bool probing_irq(int irq)
- 	return desc && desc->action == NULL;
- }
- 
--static unsigned int startup_pirq(unsigned int irq)
-+static unsigned int __startup_pirq(unsigned int irq)
- {
- 	struct evtchn_bind_pirq bind_pirq;
- 	struct irq_info *info = info_for_irq(irq);
-@@ -512,9 +512,15 @@ out:
- 	return 0;
- }
- 
--static void shutdown_pirq(unsigned int irq)
-+static unsigned int startup_pirq(struct irq_data *data)
-+{
-+	return __startup_pirq(data->irq);
-+}
-+
-+static void shutdown_pirq(struct irq_data *data)
- {
- 	struct evtchn_close close;
-+	unsigned int irq = data->irq;
- 	struct irq_info *info = info_for_irq(irq);
- 	int evtchn = evtchn_from_irq(irq);
- 
-@@ -534,20 +540,20 @@ static void shutdown_pirq(unsigned int irq)
- 	info->evtchn = 0;
- }
- 
--static void enable_pirq(unsigned int irq)
-+static void enable_pirq(struct irq_data *data)
- {
--	startup_pirq(irq);
-+	startup_pirq(data);
- }
- 
--static void disable_pirq(unsigned int irq)
-+static void disable_pirq(struct irq_data *data)
- {
- }
- 
--static void ack_pirq(unsigned int irq)
-+static void ack_pirq(struct irq_data *data)
- {
--	int evtchn = evtchn_from_irq(irq);
-+	int evtchn = evtchn_from_irq(data->irq);
- 
--	move_native_irq(irq);
-+	irq_move_irq(data);
- 
- 	if (VALID_EVTCHN(evtchn)) {
- 		mask_evtchn(evtchn);
-@@ -1215,11 +1221,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
- 	return 0;
- }
- 
--static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
-+static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
-+			    bool force)
- {
- 	unsigned tcpu = cpumask_first(dest);
- 
--	return rebind_irq_to_cpu(irq, tcpu);
-+	return rebind_irq_to_cpu(data->irq, tcpu);
- }
- 
- int resend_irq_on_evtchn(unsigned int irq)
-@@ -1238,35 +1245,35 @@ int resend_irq_on_evtchn(unsigned int irq)
- 	return 1;
- }
- 
--static void enable_dynirq(unsigned int irq)
-+static void enable_dynirq(struct irq_data *data)
- {
--	int evtchn = evtchn_from_irq(irq);
-+	int evtchn = evtchn_from_irq(data->irq);
- 
- 	if (VALID_EVTCHN(evtchn))
- 		unmask_evtchn(evtchn);
- }
- 
--static void disable_dynirq(unsigned int irq)
-+static void disable_dynirq(struct irq_data *data)
- {
--	int evtchn = evtchn_from_irq(irq);
-+	int evtchn = evtchn_from_irq(data->irq);
- 
- 	if (VALID_EVTCHN(evtchn))
- 		mask_evtchn(evtchn);
- }
- 
--static void ack_dynirq(unsigned int irq)
-+static void ack_dynirq(struct irq_data *data)
- {
--	int evtchn = evtchn_from_irq(irq);
-+	int evtchn = evtchn_from_irq(data->irq);
- 
--	move_masked_irq(irq);
-+	move_masked_irq(data->irq);
- 
- 	if (VALID_EVTCHN(evtchn))
- 		unmask_evtchn(evtchn);
- }
- 
--static int retrigger_dynirq(unsigned int irq)
-+static int retrigger_dynirq(struct irq_data *data)
- {
--	int evtchn = evtchn_from_irq(irq);
-+	int evtchn = evtchn_from_irq(data->irq);
- 	struct shared_info *sh = HYPERVISOR_shared_info;
- 	int ret = 0;
- 
-@@ -1315,7 +1322,7 @@ static void restore_cpu_pirqs(void)
- 
- 		printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
- 
--		startup_pirq(irq);
-+		__startup_pirq(irq);
- 	}
- }
- 
-@@ -1467,44 +1474,44 @@ void xen_irq_resume(void)
- }
- 
- static struct irq_chip xen_dynamic_chip __read_mostly = {
--	.name		= "xen-dyn",
-+	.name			= "xen-dyn",
- 
--	.disable	= disable_dynirq,
--	.mask		= disable_dynirq,
--	.unmask		= enable_dynirq,
-+	.irq_disable		= disable_dynirq,
-+	.irq_mask		= disable_dynirq,
-+	.irq_unmask		= enable_dynirq,
- 
--	.eoi		= ack_dynirq,
--	.set_affinity	= set_affinity_irq,
--	.retrigger	= retrigger_dynirq,
-+	.irq_eoi		= ack_dynirq,
-+	.irq_set_affinity	= set_affinity_irq,
-+	.irq_retrigger		= retrigger_dynirq,
- };
- 
- static struct irq_chip xen_pirq_chip __read_mostly = {
--	.name		= "xen-pirq",
-+	.name			= "xen-pirq",
- 
--	.startup	= startup_pirq,
--	.shutdown	= shutdown_pirq,
-+	.irq_startup		= startup_pirq,
-+	.irq_shutdown		= shutdown_pirq,
- 
--	.enable		= enable_pirq,
--	.unmask		= enable_pirq,
-+	.irq_enable		= enable_pirq,
-+	.irq_unmask		= enable_pirq,
- 
--	.disable	= disable_pirq,
--	.mask		= disable_pirq,
-+	.irq_disable		= disable_pirq,
-+	.irq_mask		= disable_pirq,
- 
--	.ack		= ack_pirq,
-+	.irq_ack		= ack_pirq,
- 
--	.set_affinity	= set_affinity_irq,
-+	.irq_set_affinity	= set_affinity_irq,
- 
--	.retrigger	= retrigger_dynirq,
-+	.irq_retrigger		= retrigger_dynirq,
- };
- 
- static struct irq_chip xen_percpu_chip __read_mostly = {
--	.name		= "xen-percpu",
-+	.name			= "xen-percpu",
- 
--	.disable	= disable_dynirq,
--	.mask		= disable_dynirq,
--	.unmask		= enable_dynirq,
-+	.irq_disable		= disable_dynirq,
-+	.irq_mask		= disable_dynirq,
-+	.irq_unmask		= enable_dynirq,
- 
--	.ack		= ack_dynirq,
-+	.irq_ack		= ack_dynirq,
- };
- 
- int xen_set_callback_via(uint64_t via)
--- 
-1.7.4
-
-
-From ce9a77f4c22a658f4348683c932bb804af263f67 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <Ian.Campbell at citrix.com>
-Date: Mon, 7 Feb 2011 11:08:39 +0000
-Subject: [PATCH 116/203] xen: Fix compile error introduced by "switch to new irq_chip functions"
-
-drivers/xen/events.c: In function 'ack_pirq':
-drivers/xen/events.c:568: error: implicit declaration of function 'irq_move_irq'
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |    2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index cf1712f..5aa422a 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -553,7 +553,7 @@ static void ack_pirq(struct irq_data *data)
- {
- 	int evtchn = evtchn_from_irq(data->irq);
- 
--	irq_move_irq(data);
-+	move_native_irq(data->irq);
- 
- 	if (VALID_EVTCHN(evtchn)) {
- 		mask_evtchn(evtchn);
--- 
-1.7.4
-
-
-From 708b47a4eb3c6db91f9e04443cfd753e86e38b56 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <Ian.Campbell at citrix.com>
-Date: Tue, 8 Feb 2011 14:03:31 +0000
-Subject: [PATCH 117/203] xen/timer: Missing IRQF_NO_SUSPEND in timer code broke suspend.
-
-The patches missed an indirect use of IRQF_NO_SUSPEND pulled in via
-IRQF_TIMER. The following patch fixes the issue.
-
-With this fixlet PV guest migration works just fine. I also booted the
-entire series as a dom0 kernel and it appeared fine.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/xen/time.c |    4 +++-
- 1 files changed, 3 insertions(+), 1 deletions(-)
-
-diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
-index 067759e..2e2d370 100644
---- a/arch/x86/xen/time.c
-+++ b/arch/x86/xen/time.c
-@@ -397,7 +397,9 @@ void xen_setup_timer(int cpu)
- 		name = "<timer kasprintf failed>";
- 
- 	irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
--				      IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
-+				      IRQF_DISABLED|IRQF_PERCPU|
-+				      IRQF_NOBALANCING|IRQF_TIMER|
-+				      IRQF_FORCE_RESUME,
- 				      name, NULL);
- 
- 	evt = &per_cpu(xen_clock_events, cpu);
--- 
-1.7.4
-
-
-From b052181a985592f81767f631f9f42accb4b436cd Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 11 Feb 2011 15:23:56 +0000
-Subject: [PATCH 118/203] xen: events: mark cpu_evtchn_mask_p as __refdata
-
-This variable starts out pointing at init_evtchn_mask which is marked
-__initdata but is set to point to a non-init data region in xen_init_IRQ
-which is itself an __init function so this is safe.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Tested-and-acked-by: Andrew Jones <drjones at redhat.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |    2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 7468147..a313890 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -114,7 +114,7 @@ struct cpu_evtchn_s {
- static __initdata struct cpu_evtchn_s init_evtchn_mask = {
- 	.bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
- };
--static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask;
-+static struct cpu_evtchn_s __refdata *cpu_evtchn_mask_p = &init_evtchn_mask;
- 
- static inline unsigned long *cpu_evtchn_mask(int cpu)
- {
--- 
-1.7.4
-
-
-From 6b08cfebd3bd346d8a2fd68a2265fc7736849802 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 11 Feb 2011 15:23:58 +0000
-Subject: [PATCH 119/203] xen p2m: annotate variable which appears unused
-
- CC      arch/x86/xen/p2m.o
-arch/x86/xen/p2m.c: In function 'm2p_remove_override':
-arch/x86/xen/p2m.c:460: warning: 'address' may be used uninitialized in this function
-arch/x86/xen/p2m.c: In function 'm2p_add_override':
-arch/x86/xen/p2m.c:426: warning: 'address' may be used uninitialized in this function
-
-In actual fact address is inialised in one "if (!PageHighMem(page))"
-statement and used in a second and so is always initialised before
-use.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/xen/p2m.c |    4 ++--
- 1 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
-index fd12d7c..89342e5 100644
---- a/arch/x86/xen/p2m.c
-+++ b/arch/x86/xen/p2m.c
-@@ -421,7 +421,7 @@ int m2p_add_override(unsigned long mfn, struct page *page)
- {
- 	unsigned long flags;
- 	unsigned long pfn;
--	unsigned long address;
-+	unsigned long uninitialized_var(address);
- 	unsigned level;
- 	pte_t *ptep = NULL;
- 
-@@ -455,7 +455,7 @@ int m2p_remove_override(struct page *page)
- 	unsigned long flags;
- 	unsigned long mfn;
- 	unsigned long pfn;
--	unsigned long address;
-+	unsigned long uninitialized_var(address);
- 	unsigned level;
- 	pte_t *ptep = NULL;
- 
--- 
-1.7.4
-
-
-From 44b46c3ef805793ab3a7730dc71c72d0f258ea8e Mon Sep 17 00:00:00 2001
-From: Ian Campbell <Ian.Campbell at eu.citrix.com>
-Date: Fri, 11 Feb 2011 16:37:41 +0000
-Subject: [PATCH 120/203] xen: annotate functions which only call into __init at start of day
-
-Both xen_hvm_init_shared_info and xen_build_mfn_list_list can be
-called at resume time as well as at start of day but only reference
-__init functions (extend_brk) at start of day. Hence annotate with
-__ref.
-
-    WARNING: arch/x86/built-in.o(.text+0x4f1): Section mismatch in reference
-        from the function xen_hvm_init_shared_info() to the function
-        .init.text:extend_brk()
-    The function xen_hvm_init_shared_info() references
-    the function __init extend_brk().
-    This is often because xen_hvm_init_shared_info lacks a __init
-    annotation or the annotation of extend_brk is wrong.
-
-xen_hvm_init_shared_info calls extend_brk() iff !shared_info_page and
-initialises shared_info_page with the result. This happens at start of
-day only.
-
-    WARNING: arch/x86/built-in.o(.text+0x599b): Section mismatch in reference
-        from the function xen_build_mfn_list_list() to the function
-        .init.text:extend_brk()
-    The function xen_build_mfn_list_list() references
-    the function __init extend_brk().
-    This is often because xen_build_mfn_list_list lacks a __init
-    annotation or the annotation of extend_brk is wrong.
-
-(this warning occurs multiple times)
-
-xen_build_mfn_list_list only calls extend_brk() at boot time, while
-building the initial mfn list list
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/xen/enlighten.c |    2 +-
- arch/x86/xen/p2m.c       |    2 +-
- 2 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
-index 50542ef..28e6d42 100644
---- a/arch/x86/xen/enlighten.c
-+++ b/arch/x86/xen/enlighten.c
-@@ -1292,7 +1292,7 @@ static int init_hvm_pv_info(int *major, int *minor)
- 	return 0;
- }
- 
--void xen_hvm_init_shared_info(void)
-+void __ref xen_hvm_init_shared_info(void)
- {
- 	int cpu;
- 	struct xen_add_to_physmap xatp;
-diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
-index 89342e5..05cfc6a 100644
---- a/arch/x86/xen/p2m.c
-+++ b/arch/x86/xen/p2m.c
-@@ -136,7 +136,7 @@ static void p2m_init(unsigned long *p2m)
-  * - After resume we're called from within stop_machine, but the mfn
-  *   tree should alreay be completely allocated.
-  */
--void xen_build_mfn_list_list(void)
-+void __ref xen_build_mfn_list_list(void)
- {
- 	unsigned long pfn;
- 
--- 
-1.7.4
-
-
-From ef91082e90491ac99343a13f9aeff4669835c6cc Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Thu, 3 Feb 2011 12:18:59 -0500
-Subject: [PATCH 121/203] xen-gntdev: Change page limit to be global instead of per-open
-
-Because there is no limitation on how many times a user can open a
-given device file, an per-file-description limit on the number of
-pages granted offers little to no benefit. Change to a global limit
-and remove the ioctl() as the parameter can now be changed via sysfs.
-
-Xen tools changeset 22768:f8d801e5573e is needed to eliminate the
-error this change produces in xc_gnttab_set_max_grants.
-
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntdev.c |   50 ++++++++++++++------------------------------------
- 1 files changed, 14 insertions(+), 36 deletions(-)
-
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index 1e31cdc..23d208a 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -45,15 +45,15 @@ MODULE_AUTHOR("Derek G. Murray <Derek.Murray at cl.cam.ac.uk>, "
- 	      "Gerd Hoffmann <kraxel at redhat.com>");
- MODULE_DESCRIPTION("User-space granted page access driver");
- 
--static int limit = 1024;
-+static int limit = 1024*1024;
- module_param(limit, int, 0644);
--MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped at "
--		"once by a gntdev instance");
-+MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
-+		"the gntdev device");
-+
-+static atomic_t pages_mapped = ATOMIC_INIT(0);
- 
- struct gntdev_priv {
- 	struct list_head maps;
--	uint32_t used;
--	uint32_t limit;
- 	/* lock protects maps from concurrent changes */
- 	spinlock_t lock;
- 	struct mm_struct *mm;
-@@ -82,9 +82,7 @@ static void gntdev_print_maps(struct gntdev_priv *priv,
- #ifdef DEBUG
- 	struct grant_map *map;
- 
--	pr_debug("maps list (priv %p, usage %d/%d)\n",
--	       priv, priv->used, priv->limit);
--
-+	pr_debug("%s: maps list (priv %p)\n", __func__, priv);
- 	list_for_each_entry(map, &priv->maps, next)
- 		pr_debug("  index %2d, count %2d %s\n",
- 		       map->index, map->count,
-@@ -121,9 +119,6 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
- 	add->count = count;
- 	add->priv  = priv;
- 
--	if (add->count + priv->used > priv->limit)
--		goto err;
--
- 	return add;
- 
- err:
-@@ -154,7 +149,6 @@ static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add)
- 	list_add_tail(&add->next, &priv->maps);
- 
- done:
--	priv->used += add->count;
- 	gntdev_print_maps(priv, "[new]", add->index);
- }
- 
-@@ -200,7 +194,7 @@ static int gntdev_del_map(struct grant_map *map)
- 		if (map->unmap_ops[i].handle)
- 			return -EBUSY;
- 
--	map->priv->used -= map->count;
-+	atomic_sub(map->count, &pages_mapped);
- 	list_del(&map->next);
- 	return 0;
- }
-@@ -386,7 +380,6 @@ static int gntdev_open(struct inode *inode, struct file *flip)
- 
- 	INIT_LIST_HEAD(&priv->maps);
- 	spin_lock_init(&priv->lock);
--	priv->limit = limit;
- 
- 	priv->mm = get_task_mm(current);
- 	if (!priv->mm) {
-@@ -443,19 +436,24 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
- 	pr_debug("priv %p, add %d\n", priv, op.count);
- 	if (unlikely(op.count <= 0))
- 		return -EINVAL;
--	if (unlikely(op.count > priv->limit))
--		return -EINVAL;
- 
- 	err = -ENOMEM;
- 	map = gntdev_alloc_map(priv, op.count);
- 	if (!map)
- 		return err;
-+
- 	if (copy_from_user(map->grants, &u->refs,
- 			   sizeof(map->grants[0]) * op.count) != 0) {
- 		gntdev_free_map(map);
- 		return err;
- 	}
- 
-+	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
-+		pr_debug("can't map: over limit\n");
-+		gntdev_free_map(map);
-+		return err;
-+	}
-+
- 	spin_lock(&priv->lock);
- 	gntdev_add_map(priv, map);
- 	op.index = map->index << PAGE_SHIFT;
-@@ -518,23 +516,6 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
- 	return 0;
- }
- 
--static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv,
--					struct ioctl_gntdev_set_max_grants __user *u)
--{
--	struct ioctl_gntdev_set_max_grants op;
--
--	if (copy_from_user(&op, u, sizeof(op)) != 0)
--		return -EFAULT;
--	pr_debug("priv %p, limit %d\n", priv, op.count);
--	if (op.count > limit)
--		return -E2BIG;
--
--	spin_lock(&priv->lock);
--	priv->limit = op.count;
--	spin_unlock(&priv->lock);
--	return 0;
--}
--
- static long gntdev_ioctl(struct file *flip,
- 			 unsigned int cmd, unsigned long arg)
- {
-@@ -551,9 +532,6 @@ static long gntdev_ioctl(struct file *flip,
- 	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
- 		return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
- 
--	case IOCTL_GNTDEV_SET_MAX_GRANTS:
--		return gntdev_ioctl_set_max_grants(priv, ptr);
--
- 	default:
- 		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
- 		return -ENOIOCTLCMD;
--- 
-1.7.4
-
-
-From a879211bf1d70339e429603805c014450c275f2a Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Thu, 3 Feb 2011 12:19:00 -0500
-Subject: [PATCH 122/203] xen-gntdev: Use find_vma rather than iterating our vma list manually
-
-This should be faster if many mappings exist, and also removes
-the only user of map->vma not related to PTE modification.
-
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntdev.c |   32 ++++++++------------------------
- 1 files changed, 8 insertions(+), 24 deletions(-)
-
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index 23d208a..ce8c37c 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -167,23 +167,6 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
- 	return NULL;
- }
- 
--static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv,
--					       unsigned long vaddr)
--{
--	struct grant_map *map;
--
--	list_for_each_entry(map, &priv->maps, next) {
--		if (!map->vma)
--			continue;
--		if (vaddr < map->vma->vm_start)
--			continue;
--		if (vaddr >= map->vma->vm_end)
--			continue;
--		return map;
--	}
--	return NULL;
--}
--
- static int gntdev_del_map(struct grant_map *map)
- {
- 	int i;
-@@ -494,22 +477,23 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
- 					      struct ioctl_gntdev_get_offset_for_vaddr __user *u)
- {
- 	struct ioctl_gntdev_get_offset_for_vaddr op;
-+	struct vm_area_struct *vma;
- 	struct grant_map *map;
- 
- 	if (copy_from_user(&op, u, sizeof(op)) != 0)
- 		return -EFAULT;
- 	pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr);
- 
--	spin_lock(&priv->lock);
--	map = gntdev_find_map_vaddr(priv, op.vaddr);
--	if (map == NULL ||
--	    map->vma->vm_start != op.vaddr) {
--		spin_unlock(&priv->lock);
-+	vma = find_vma(current->mm, op.vaddr);
-+	if (!vma || vma->vm_ops != &gntdev_vmops)
- 		return -EINVAL;
--	}
-+
-+	map = vma->vm_private_data;
-+	if (!map)
-+		return -EINVAL;
-+
- 	op.offset = map->index << PAGE_SHIFT;
- 	op.count = map->count;
--	spin_unlock(&priv->lock);
- 
- 	if (copy_to_user(u, &op, sizeof(op)) != 0)
- 		return -EFAULT;
--- 
-1.7.4
-
-
-From 68b025c813c2eb41ff25628e3d4952d5185eb1a4 Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Thu, 3 Feb 2011 12:19:01 -0500
-Subject: [PATCH 123/203] xen-gntdev: Add reference counting to maps
-
-This allows userspace to perform mmap() on the gntdev device and then
-immediately close the filehandle or remove the mapping using the
-remove ioctl, with the mapped area remaining valid until unmapped.
-This also fixes an infinite loop when a gntdev device is closed
-without first unmapping all areas.
-
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntdev.c |   67 ++++++++++++++++++++-----------------------------
- 1 files changed, 27 insertions(+), 40 deletions(-)
-
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index ce8c37c..256162b 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -62,12 +62,12 @@ struct gntdev_priv {
- 
- struct grant_map {
- 	struct list_head next;
--	struct gntdev_priv *priv;
- 	struct vm_area_struct *vma;
- 	int index;
- 	int count;
- 	int flags;
- 	int is_mapped;
-+	atomic_t users;
- 	struct ioctl_gntdev_grant_ref *grants;
- 	struct gnttab_map_grant_ref   *map_ops;
- 	struct gnttab_unmap_grant_ref *unmap_ops;
-@@ -117,7 +117,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
- 
- 	add->index = 0;
- 	add->count = count;
--	add->priv  = priv;
-+	atomic_set(&add->users, 1);
- 
- 	return add;
- 
-@@ -167,28 +167,18 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
- 	return NULL;
- }
- 
--static int gntdev_del_map(struct grant_map *map)
--{
--	int i;
--
--	if (map->vma)
--		return -EBUSY;
--	for (i = 0; i < map->count; i++)
--		if (map->unmap_ops[i].handle)
--			return -EBUSY;
--
--	atomic_sub(map->count, &pages_mapped);
--	list_del(&map->next);
--	return 0;
--}
--
--static void gntdev_free_map(struct grant_map *map)
-+static void gntdev_put_map(struct grant_map *map)
- {
- 	int i;
- 
- 	if (!map)
- 		return;
- 
-+	if (!atomic_dec_and_test(&map->users))
-+		return;
-+
-+	atomic_sub(map->count, &pages_mapped);
-+
- 	if (map->pages)
- 		for (i = 0; i < map->count; i++) {
- 			if (map->pages[i])
-@@ -267,6 +257,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
- 	map->is_mapped = 0;
- 	map->vma = NULL;
- 	vma->vm_private_data = NULL;
-+	gntdev_put_map(map);
- }
- 
- static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-@@ -388,17 +379,14 @@ static int gntdev_release(struct inode *inode, struct file *flip)
- {
- 	struct gntdev_priv *priv = flip->private_data;
- 	struct grant_map *map;
--	int err;
- 
- 	pr_debug("priv %p\n", priv);
- 
- 	spin_lock(&priv->lock);
- 	while (!list_empty(&priv->maps)) {
- 		map = list_entry(priv->maps.next, struct grant_map, next);
--		err = gntdev_del_map(map);
--		if (WARN_ON(err))
--			gntdev_free_map(map);
--
-+		list_del(&map->next);
-+		gntdev_put_map(map);
- 	}
- 	spin_unlock(&priv->lock);
- 
-@@ -425,15 +413,15 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
- 	if (!map)
- 		return err;
- 
--	if (copy_from_user(map->grants, &u->refs,
--			   sizeof(map->grants[0]) * op.count) != 0) {
--		gntdev_free_map(map);
-+	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
-+		pr_debug("can't map: over limit\n");
-+		gntdev_put_map(map);
- 		return err;
- 	}
- 
--	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
--		pr_debug("can't map: over limit\n");
--		gntdev_free_map(map);
-+	if (copy_from_user(map->grants, &u->refs,
-+			   sizeof(map->grants[0]) * op.count) != 0) {
-+		gntdev_put_map(map);
- 		return err;
- 	}
- 
-@@ -442,13 +430,9 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
- 	op.index = map->index << PAGE_SHIFT;
- 	spin_unlock(&priv->lock);
- 
--	if (copy_to_user(u, &op, sizeof(op)) != 0) {
--		spin_lock(&priv->lock);
--		gntdev_del_map(map);
--		spin_unlock(&priv->lock);
--		gntdev_free_map(map);
--		return err;
--	}
-+	if (copy_to_user(u, &op, sizeof(op)) != 0)
-+		return -EFAULT;
-+
- 	return 0;
- }
- 
-@@ -465,11 +449,12 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
- 
- 	spin_lock(&priv->lock);
- 	map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
--	if (map)
--		err = gntdev_del_map(map);
-+	if (map) {
-+		list_del(&map->next);
-+		gntdev_put_map(map);
-+		err = 0;
-+	}
- 	spin_unlock(&priv->lock);
--	if (!err)
--		gntdev_free_map(map);
- 	return err;
- }
- 
-@@ -549,6 +534,8 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
- 		goto unlock_out;
- 	}
- 
-+	atomic_inc(&map->users);
-+
- 	vma->vm_ops = &gntdev_vmops;
- 
- 	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
--- 
-1.7.4
-
-
-From aab8f11a6b4641fcb8c139420f2eae879b5d1698 Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Thu, 3 Feb 2011 12:19:02 -0500
-Subject: [PATCH 124/203] xen-gntdev: Support mapping in HVM domains
-
-HVM does not allow direct PTE modification, so instead we request
-that Xen change its internal p2m mappings on the allocated pages and
-map the memory into userspace normally.
-
-Note:
-The HVM path for map and unmap is slightly different: HVM keeps the pages
-mapped until the area is deleted, while the PV case (use_ptemod being true)
-must unmap them when userspace unmaps the range. In the normal use case,
-this makes no difference to users since unmap time is deletion time.
-
-[v2: Expanded commit descr.]
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntdev.c      |  117 ++++++++++++++++++++++++++++++++-------------
- drivers/xen/grant-table.c |    6 ++
- 2 files changed, 89 insertions(+), 34 deletions(-)
-
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index 256162b..bcaf797 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -32,6 +32,7 @@
- #include <linux/sched.h>
- #include <linux/spinlock.h>
- #include <linux/slab.h>
-+#include <linux/highmem.h>
- 
- #include <xen/xen.h>
- #include <xen/grant_table.h>
-@@ -52,6 +53,8 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
- 
- static atomic_t pages_mapped = ATOMIC_INIT(0);
- 
-+static int use_ptemod;
-+
- struct gntdev_priv {
- 	struct list_head maps;
- 	/* lock protects maps from concurrent changes */
-@@ -74,6 +77,8 @@ struct grant_map {
- 	struct page **pages;
- };
- 
-+static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
-+
- /* ------------------------------------------------------------------ */
- 
- static void gntdev_print_maps(struct gntdev_priv *priv,
-@@ -179,11 +184,34 @@ static void gntdev_put_map(struct grant_map *map)
- 
- 	atomic_sub(map->count, &pages_mapped);
- 
--	if (map->pages)
-+	if (map->pages) {
-+		if (!use_ptemod)
-+			unmap_grant_pages(map, 0, map->count);
-+
- 		for (i = 0; i < map->count; i++) {
--			if (map->pages[i])
-+			uint32_t check, *tmp;
-+			if (!map->pages[i])
-+				continue;
-+			/* XXX When unmapping in an HVM domain, Xen will
-+			 * sometimes end up mapping the GFN to an invalid MFN.
-+			 * In this case, writes will be discarded and reads will
-+			 * return all 0xFF bytes.  Leak these unusable GFNs
-+			 * until Xen supports fixing their p2m mapping.
-+			 *
-+			 * Confirmed present in Xen 4.1-RC3 with HVM source
-+			 */
-+			tmp = kmap(map->pages[i]);
-+			*tmp = 0xdeaddead;
-+			mb();
-+			check = *tmp;
-+			kunmap(map->pages[i]);
-+			if (check == 0xdeaddead)
- 				__free_page(map->pages[i]);
-+			else
-+				pr_debug("Discard page %d=%ld\n", i,
-+					page_to_pfn(map->pages[i]));
- 		}
-+	}
- 	kfree(map->pages);
- 	kfree(map->grants);
- 	kfree(map->map_ops);
-@@ -198,17 +226,16 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
- {
- 	struct grant_map *map = data;
- 	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
-+	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
- 	u64 pte_maddr;
- 
- 	BUG_ON(pgnr >= map->count);
- 	pte_maddr = arbitrary_virt_to_machine(pte).maddr;
- 
--	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr,
--			  GNTMAP_contains_pte | map->flags,
-+	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
- 			  map->grants[pgnr].ref,
- 			  map->grants[pgnr].domid);
--	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr,
--			    GNTMAP_contains_pte | map->flags,
-+	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
- 			    0 /* handle */);
- 	return 0;
- }
-@@ -216,6 +243,19 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
- static int map_grant_pages(struct grant_map *map)
- {
- 	int i, err = 0;
-+	phys_addr_t addr;
-+
-+	if (!use_ptemod) {
-+		for (i = 0; i < map->count; i++) {
-+			addr = (phys_addr_t)
-+				pfn_to_kaddr(page_to_pfn(map->pages[i]));
-+			gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
-+				map->grants[i].ref,
-+				map->grants[i].domid);
-+			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
-+				map->flags, 0 /* handle */);
-+		}
-+	}
- 
- 	pr_debug("map %d+%d\n", map->index, map->count);
- 	err = gnttab_map_refs(map->map_ops, map->pages, map->count);
-@@ -260,17 +300,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
- 	gntdev_put_map(map);
- }
- 
--static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
--{
--	pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n",
--			vmf->virtual_address, vmf->pgoff);
--	vmf->flags = VM_FAULT_ERROR;
--	return 0;
--}
--
- static struct vm_operations_struct gntdev_vmops = {
- 	.close = gntdev_vma_close,
--	.fault = gntdev_vma_fault,
- };
- 
- /* ------------------------------------------------------------------ */
-@@ -355,14 +386,16 @@ static int gntdev_open(struct inode *inode, struct file *flip)
- 	INIT_LIST_HEAD(&priv->maps);
- 	spin_lock_init(&priv->lock);
- 
--	priv->mm = get_task_mm(current);
--	if (!priv->mm) {
--		kfree(priv);
--		return -ENOMEM;
-+	if (use_ptemod) {
-+		priv->mm = get_task_mm(current);
-+		if (!priv->mm) {
-+			kfree(priv);
-+			return -ENOMEM;
-+		}
-+		priv->mn.ops = &gntdev_mmu_ops;
-+		ret = mmu_notifier_register(&priv->mn, priv->mm);
-+		mmput(priv->mm);
- 	}
--	priv->mn.ops = &gntdev_mmu_ops;
--	ret = mmu_notifier_register(&priv->mn, priv->mm);
--	mmput(priv->mm);
- 
- 	if (ret) {
- 		kfree(priv);
-@@ -390,7 +423,8 @@ static int gntdev_release(struct inode *inode, struct file *flip)
- 	}
- 	spin_unlock(&priv->lock);
- 
--	mmu_notifier_unregister(&priv->mn, priv->mm);
-+	if (use_ptemod)
-+		mmu_notifier_unregister(&priv->mn, priv->mm);
- 	kfree(priv);
- 	return 0;
- }
-@@ -515,7 +549,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
- 	int index = vma->vm_pgoff;
- 	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
- 	struct grant_map *map;
--	int err = -EINVAL;
-+	int i, err = -EINVAL;
- 
- 	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
- 		return -EINVAL;
-@@ -527,9 +561,9 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
- 	map = gntdev_find_map_index(priv, index, count);
- 	if (!map)
- 		goto unlock_out;
--	if (map->vma)
-+	if (use_ptemod && map->vma)
- 		goto unlock_out;
--	if (priv->mm != vma->vm_mm) {
-+	if (use_ptemod && priv->mm != vma->vm_mm) {
- 		printk(KERN_WARNING "Huh? Other mm?\n");
- 		goto unlock_out;
- 	}
-@@ -541,20 +575,24 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
- 	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
- 
- 	vma->vm_private_data = map;
--	map->vma = vma;
- 
--	map->flags = GNTMAP_host_map | GNTMAP_application_map;
-+	if (use_ptemod)
-+		map->vma = vma;
-+
-+	map->flags = GNTMAP_host_map;
- 	if (!(vma->vm_flags & VM_WRITE))
- 		map->flags |= GNTMAP_readonly;
- 
- 	spin_unlock(&priv->lock);
- 
--	err = apply_to_page_range(vma->vm_mm, vma->vm_start,
--				  vma->vm_end - vma->vm_start,
--				  find_grant_ptes, map);
--	if (err) {
--		printk(KERN_WARNING "find_grant_ptes() failure.\n");
--		return err;
-+	if (use_ptemod) {
-+		err = apply_to_page_range(vma->vm_mm, vma->vm_start,
-+					  vma->vm_end - vma->vm_start,
-+					  find_grant_ptes, map);
-+		if (err) {
-+			printk(KERN_WARNING "find_grant_ptes() failure.\n");
-+			return err;
-+		}
- 	}
- 
- 	err = map_grant_pages(map);
-@@ -565,6 +603,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
- 
- 	map->is_mapped = 1;
- 
-+	if (!use_ptemod) {
-+		for (i = 0; i < count; i++) {
-+			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
-+				map->pages[i]);
-+			if (err)
-+				return err;
-+		}
-+	}
-+
- 	return 0;
- 
- unlock_out:
-@@ -595,6 +642,8 @@ static int __init gntdev_init(void)
- 	if (!xen_domain())
- 		return -ENODEV;
- 
-+	use_ptemod = xen_pv_domain();
-+
- 	err = misc_register(&gntdev_miscdev);
- 	if (err != 0) {
- 		printk(KERN_ERR "Could not register gntdev device\n");
-diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
-index 9ef54eb..9428ced 100644
---- a/drivers/xen/grant-table.c
-+++ b/drivers/xen/grant-table.c
-@@ -458,6 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
- 	if (ret)
- 		return ret;
- 
-+	if (xen_feature(XENFEAT_auto_translated_physmap))
-+		return ret;
-+
- 	for (i = 0; i < count; i++) {
- 		/* m2p override only supported for GNTMAP_contains_pte mappings */
- 		if (!(map_ops[i].flags & GNTMAP_contains_pte))
-@@ -483,6 +486,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
- 	if (ret)
- 		return ret;
- 
-+	if (xen_feature(XENFEAT_auto_translated_physmap))
-+		return ret;
-+
- 	for (i = 0; i < count; i++) {
- 		ret = m2p_remove_override(pages[i]);
- 		if (ret)
--- 
-1.7.4
-
-
-From dd3140588d9551235ebc2a0dacdca098e7677573 Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Mon, 7 Feb 2011 17:23:05 -0500
-Subject: [PATCH 125/203] xen-gntalloc: Userspace grant allocation driver
-
-This allows a userspace application to allocate a shared page for
-implementing inter-domain communication or device drivers. These
-shared pages can be mapped using the gntdev device or by the kernel
-in another domain.
-
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/Kconfig    |    8 +
- drivers/xen/Makefile   |    2 +
- drivers/xen/gntalloc.c |  486 ++++++++++++++++++++++++++++++++++++++++++++++++
- include/xen/gntalloc.h |   50 +++++
- 4 files changed, 546 insertions(+), 0 deletions(-)
- create mode 100644 drivers/xen/gntalloc.c
- create mode 100644 include/xen/gntalloc.h
-
-diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
-index 07bec09..a3d7afb 100644
---- a/drivers/xen/Kconfig
-+++ b/drivers/xen/Kconfig
-@@ -80,6 +80,14 @@ config XEN_GNTDEV
- 	help
- 	  Allows userspace processes to use grants.
- 
-+config XEN_GRANT_DEV_ALLOC
-+	tristate "User-space grant reference allocator driver"
-+	depends on XEN
-+	help
-+	  Allows userspace processes to create pages with access granted
-+	  to other domains. This can be used to implement frontend drivers
-+	  or as part of an inter-domain shared memory channel.
-+
- config XEN_PLATFORM_PCI
- 	tristate "xen platform pci device driver"
- 	depends on XEN_PVHVM && PCI
-diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
-index 5088cc2..9585a1d 100644
---- a/drivers/xen/Makefile
-+++ b/drivers/xen/Makefile
-@@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
- obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
- obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
- obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
-+obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
- obj-$(CONFIG_XENFS)		+= xenfs/
- obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
- obj-$(CONFIG_XEN_PLATFORM_PCI)	+= xen-platform-pci.o
-@@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0)		+= pci.o
- 
- xen-evtchn-y			:= evtchn.o
- xen-gntdev-y				:= gntdev.o
-+xen-gntalloc-y				:= gntalloc.o
- 
- xen-platform-pci-y		:= platform-pci.o
-diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
-new file mode 100644
-index 0000000..d06bf2b
---- /dev/null
-+++ b/drivers/xen/gntalloc.c
-@@ -0,0 +1,486 @@
-+/******************************************************************************
-+ * gntalloc.c
-+ *
-+ * Device for creating grant references (in user-space) that may be shared
-+ * with other domains.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-+ */
-+
-+/*
-+ * This driver exists to allow userspace programs in Linux to allocate kernel
-+ * memory that will later be shared with another domain.  Without this device,
-+ * Linux userspace programs cannot create grant references.
-+ *
-+ * How this stuff works:
-+ *   X -> granting a page to Y
-+ *   Y -> mapping the grant from X
-+ *
-+ *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
-+ *   2. X creates an entry in the grant table that says domid(Y) can access P.
-+ *      This is done without a hypercall unless the grant table needs expansion.
-+ *   3. X gives the grant reference identifier, GREF, to Y.
-+ *   4. Y maps the page, either directly into kernel memory for use in a backend
-+ *      driver, or via a the gntdev device to map into the address space of an
-+ *      application running in Y. This is the first point at which Xen does any
-+ *      tracking of the page.
-+ *   5. A program in X mmap()s a segment of the gntalloc device that corresponds
-+ *      to the shared page, and can now communicate with Y over the shared page.
-+ *
-+ *
-+ * NOTE TO USERSPACE LIBRARIES:
-+ *   The grant allocation and mmap()ing are, naturally, two separate operations.
-+ *   You set up the sharing by calling the create ioctl() and then the mmap().
-+ *   Teardown requires munmap() and either close() or ioctl().
-+ *
-+ * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
-+ * reference, this device can be used to consume kernel memory by leaving grant
-+ * references mapped by another domain when an application exits. Therefore,
-+ * there is a global limit on the number of pages that can be allocated. When
-+ * all references to the page are unmapped, it will be freed during the next
-+ * grant operation.
-+ */
-+
-+#include <linux/atomic.h>
-+#include <linux/module.h>
-+#include <linux/miscdevice.h>
-+#include <linux/kernel.h>
-+#include <linux/init.h>
-+#include <linux/slab.h>
-+#include <linux/fs.h>
-+#include <linux/device.h>
-+#include <linux/mm.h>
-+#include <linux/uaccess.h>
-+#include <linux/types.h>
-+#include <linux/list.h>
-+
-+#include <xen/xen.h>
-+#include <xen/page.h>
-+#include <xen/grant_table.h>
-+#include <xen/gntalloc.h>
-+
-+static int limit = 1024;
-+module_param(limit, int, 0644);
-+MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
-+		"the gntalloc device");
-+
-+static LIST_HEAD(gref_list);
-+static DEFINE_SPINLOCK(gref_lock);
-+static int gref_size;
-+
-+/* Metadata on a grant reference. */
-+struct gntalloc_gref {
-+	struct list_head next_gref;  /* list entry gref_list */
-+	struct list_head next_file;  /* list entry file->list, if open */
-+	struct page *page;	     /* The shared page */
-+	uint64_t file_index;         /* File offset for mmap() */
-+	unsigned int users;          /* Use count - when zero, waiting on Xen */
-+	grant_ref_t gref_id;         /* The grant reference number */
-+};
-+
-+struct gntalloc_file_private_data {
-+	struct list_head list;
-+	uint64_t index;
-+};
-+
-+static void __del_gref(struct gntalloc_gref *gref);
-+
-+static void do_cleanup(void)
-+{
-+	struct gntalloc_gref *gref, *n;
-+	list_for_each_entry_safe(gref, n, &gref_list, next_gref) {
-+		if (!gref->users)
-+			__del_gref(gref);
-+	}
-+}
-+
-+static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
-+	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
-+{
-+	int i, rc, readonly;
-+	LIST_HEAD(queue_gref);
-+	LIST_HEAD(queue_file);
-+	struct gntalloc_gref *gref;
-+
-+	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
-+	rc = -ENOMEM;
-+	for (i = 0; i < op->count; i++) {
-+		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
-+		if (!gref)
-+			goto undo;
-+		list_add_tail(&gref->next_gref, &queue_gref);
-+		list_add_tail(&gref->next_file, &queue_file);
-+		gref->users = 1;
-+		gref->file_index = op->index + i * PAGE_SIZE;
-+		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
-+		if (!gref->page)
-+			goto undo;
-+
-+		/* Grant foreign access to the page. */
-+		gref->gref_id = gnttab_grant_foreign_access(op->domid,
-+			pfn_to_mfn(page_to_pfn(gref->page)), readonly);
-+		if (gref->gref_id < 0) {
-+			rc = gref->gref_id;
-+			goto undo;
-+		}
-+		gref_ids[i] = gref->gref_id;
-+	}
-+
-+	/* Add to gref lists. */
-+	spin_lock(&gref_lock);
-+	list_splice_tail(&queue_gref, &gref_list);
-+	list_splice_tail(&queue_file, &priv->list);
-+	spin_unlock(&gref_lock);
-+
-+	return 0;
-+
-+undo:
-+	spin_lock(&gref_lock);
-+	gref_size -= (op->count - i);
-+
-+	list_for_each_entry(gref, &queue_file, next_file) {
-+		/* __del_gref does not remove from queue_file */
-+		__del_gref(gref);
-+	}
-+
-+	/* It's possible for the target domain to map the just-allocated grant
-+	 * references by blindly guessing their IDs; if this is done, then
-+	 * __del_gref will leave them in the queue_gref list. They need to be
-+	 * added to the global list so that we can free them when they are no
-+	 * longer referenced.
-+	 */
-+	if (unlikely(!list_empty(&queue_gref)))
-+		list_splice_tail(&queue_gref, &gref_list);
-+	spin_unlock(&gref_lock);
-+	return rc;
-+}
-+
-+static void __del_gref(struct gntalloc_gref *gref)
-+{
-+	if (gref->gref_id > 0) {
-+		if (gnttab_query_foreign_access(gref->gref_id))
-+			return;
-+
-+		if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
-+			return;
-+	}
-+
-+	gref_size--;
-+	list_del(&gref->next_gref);
-+
-+	if (gref->page)
-+		__free_page(gref->page);
-+
-+	kfree(gref);
-+}
-+
-+/* finds contiguous grant references in a file, returns the first */
-+static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
-+		uint64_t index, uint32_t count)
-+{
-+	struct gntalloc_gref *rv = NULL, *gref;
-+	list_for_each_entry(gref, &priv->list, next_file) {
-+		if (gref->file_index == index && !rv)
-+			rv = gref;
-+		if (rv) {
-+			if (gref->file_index != index)
-+				return NULL;
-+			index += PAGE_SIZE;
-+			count--;
-+			if (count == 0)
-+				return rv;
-+		}
-+	}
-+	return NULL;
-+}
-+
-+/*
-+ * -------------------------------------
-+ *  File operations.
-+ * -------------------------------------
-+ */
-+static int gntalloc_open(struct inode *inode, struct file *filp)
-+{
-+	struct gntalloc_file_private_data *priv;
-+
-+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-+	if (!priv)
-+		goto out_nomem;
-+	INIT_LIST_HEAD(&priv->list);
-+
-+	filp->private_data = priv;
-+
-+	pr_debug("%s: priv %p\n", __func__, priv);
-+
-+	return 0;
-+
-+out_nomem:
-+	return -ENOMEM;
-+}
-+
-+static int gntalloc_release(struct inode *inode, struct file *filp)
-+{
-+	struct gntalloc_file_private_data *priv = filp->private_data;
-+	struct gntalloc_gref *gref;
-+
-+	pr_debug("%s: priv %p\n", __func__, priv);
-+
-+	spin_lock(&gref_lock);
-+	while (!list_empty(&priv->list)) {
-+		gref = list_entry(priv->list.next,
-+			struct gntalloc_gref, next_file);
-+		list_del(&gref->next_file);
-+		gref->users--;
-+		if (gref->users == 0)
-+			__del_gref(gref);
-+	}
-+	kfree(priv);
-+	spin_unlock(&gref_lock);
-+
-+	return 0;
-+}
-+
-+static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
-+		struct ioctl_gntalloc_alloc_gref __user *arg)
-+{
-+	int rc = 0;
-+	struct ioctl_gntalloc_alloc_gref op;
-+	uint32_t *gref_ids;
-+
-+	pr_debug("%s: priv %p\n", __func__, priv);
-+
-+	if (copy_from_user(&op, arg, sizeof(op))) {
-+		rc = -EFAULT;
-+		goto out;
-+	}
-+
-+	gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
-+	if (!gref_ids) {
-+		rc = -ENOMEM;
-+		goto out;
-+	}
-+
-+	spin_lock(&gref_lock);
-+	/* Clean up pages that were at zero (local) users but were still mapped
-+	 * by remote domains. Since those pages count towards the limit that we
-+	 * are about to enforce, removing them here is a good idea.
-+	 */
-+	do_cleanup();
-+	if (gref_size + op.count > limit) {
-+		spin_unlock(&gref_lock);
-+		rc = -ENOSPC;
-+		goto out_free;
-+	}
-+	gref_size += op.count;
-+	op.index = priv->index;
-+	priv->index += op.count * PAGE_SIZE;
-+	spin_unlock(&gref_lock);
-+
-+	rc = add_grefs(&op, gref_ids, priv);
-+	if (rc < 0)
-+		goto out_free;
-+
-+	/* Once we finish add_grefs, it is unsafe to touch the new reference,
-+	 * since it is possible for a concurrent ioctl to remove it (by guessing
-+	 * its index). If the userspace application doesn't provide valid memory
-+	 * to write the IDs to, then it will need to close the file in order to
-+	 * release - which it will do by segfaulting when it tries to access the
-+	 * IDs to close them.
-+	 */
-+	if (copy_to_user(arg, &op, sizeof(op))) {
-+		rc = -EFAULT;
-+		goto out_free;
-+	}
-+	if (copy_to_user(arg->gref_ids, gref_ids,
-+			sizeof(gref_ids[0]) * op.count)) {
-+		rc = -EFAULT;
-+		goto out_free;
-+	}
-+
-+out_free:
-+	kfree(gref_ids);
-+out:
-+	return rc;
-+}
-+
-+static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
-+		void __user *arg)
-+{
-+	int i, rc = 0;
-+	struct ioctl_gntalloc_dealloc_gref op;
-+	struct gntalloc_gref *gref, *n;
-+
-+	pr_debug("%s: priv %p\n", __func__, priv);
-+
-+	if (copy_from_user(&op, arg, sizeof(op))) {
-+		rc = -EFAULT;
-+		goto dealloc_grant_out;
-+	}
-+
-+	spin_lock(&gref_lock);
-+	gref = find_grefs(priv, op.index, op.count);
-+	if (gref) {
-+		/* Remove from the file list only, and decrease reference count.
-+		 * The later call to do_cleanup() will remove from gref_list and
-+		 * free the memory if the pages aren't mapped anywhere.
-+		 */
-+		for (i = 0; i < op.count; i++) {
-+			n = list_entry(gref->next_file.next,
-+				struct gntalloc_gref, next_file);
-+			list_del(&gref->next_file);
-+			gref->users--;
-+			gref = n;
-+		}
-+	} else {
-+		rc = -EINVAL;
-+	}
-+
-+	do_cleanup();
-+
-+	spin_unlock(&gref_lock);
-+dealloc_grant_out:
-+	return rc;
-+}
-+
-+static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
-+		unsigned long arg)
-+{
-+	struct gntalloc_file_private_data *priv = filp->private_data;
-+
-+	switch (cmd) {
-+	case IOCTL_GNTALLOC_ALLOC_GREF:
-+		return gntalloc_ioctl_alloc(priv, (void __user *)arg);
-+
-+	case IOCTL_GNTALLOC_DEALLOC_GREF:
-+		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
-+
-+	default:
-+		return -ENOIOCTLCMD;
-+	}
-+
-+	return 0;
-+}
-+
-+static void gntalloc_vma_close(struct vm_area_struct *vma)
-+{
-+	struct gntalloc_gref *gref = vma->vm_private_data;
-+	if (!gref)
-+		return;
-+
-+	spin_lock(&gref_lock);
-+	gref->users--;
-+	if (gref->users == 0)
-+		__del_gref(gref);
-+	spin_unlock(&gref_lock);
-+}
-+
-+static struct vm_operations_struct gntalloc_vmops = {
-+	.close = gntalloc_vma_close,
-+};
-+
-+static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
-+{
-+	struct gntalloc_file_private_data *priv = filp->private_data;
-+	struct gntalloc_gref *gref;
-+	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
-+	int rv, i;
-+
-+	pr_debug("%s: priv %p, page %lu+%d\n", __func__,
-+		       priv, vma->vm_pgoff, count);
-+
-+	if (!(vma->vm_flags & VM_SHARED)) {
-+		printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
-+		return -EINVAL;
-+	}
-+
-+	spin_lock(&gref_lock);
-+	gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
-+	if (gref == NULL) {
-+		rv = -ENOENT;
-+		pr_debug("%s: Could not find grant reference",
-+				__func__);
-+		goto out_unlock;
-+	}
-+
-+	vma->vm_private_data = gref;
-+
-+	vma->vm_flags |= VM_RESERVED;
-+	vma->vm_flags |= VM_DONTCOPY;
-+	vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
-+
-+	vma->vm_ops = &gntalloc_vmops;
-+
-+	for (i = 0; i < count; i++) {
-+		gref->users++;
-+		rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
-+				gref->page);
-+		if (rv)
-+			goto out_unlock;
-+
-+		gref = list_entry(gref->next_file.next,
-+				struct gntalloc_gref, next_file);
-+	}
-+	rv = 0;
-+
-+out_unlock:
-+	spin_unlock(&gref_lock);
-+	return rv;
-+}
-+
-+static const struct file_operations gntalloc_fops = {
-+	.owner = THIS_MODULE,
-+	.open = gntalloc_open,
-+	.release = gntalloc_release,
-+	.unlocked_ioctl = gntalloc_ioctl,
-+	.mmap = gntalloc_mmap
-+};
-+
-+/*
-+ * -------------------------------------
-+ * Module creation/destruction.
-+ * -------------------------------------
-+ */
-+static struct miscdevice gntalloc_miscdev = {
-+	.minor	= MISC_DYNAMIC_MINOR,
-+	.name	= "xen/gntalloc",
-+	.fops	= &gntalloc_fops,
-+};
-+
-+static int __init gntalloc_init(void)
-+{
-+	int err;
-+
-+	if (!xen_domain())
-+		return -ENODEV;
-+
-+	err = misc_register(&gntalloc_miscdev);
-+	if (err != 0) {
-+		printk(KERN_ERR "Could not register misc gntalloc device\n");
-+		return err;
-+	}
-+
-+	pr_debug("Created grant allocation device at %d,%d\n",
-+			MISC_MAJOR, gntalloc_miscdev.minor);
-+
-+	return 0;
-+}
-+
-+static void __exit gntalloc_exit(void)
-+{
-+	misc_deregister(&gntalloc_miscdev);
-+}
-+
-+module_init(gntalloc_init);
-+module_exit(gntalloc_exit);
-+
-+MODULE_LICENSE("GPL");
-+MODULE_AUTHOR("Carter Weatherly <carter.weatherly at jhuapl.edu>, "
-+		"Daniel De Graaf <dgdegra at tycho.nsa.gov>");
-+MODULE_DESCRIPTION("User-space grant reference allocator driver");
-diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
-new file mode 100644
-index 0000000..bc3b85e
---- /dev/null
-+++ b/include/xen/gntalloc.h
-@@ -0,0 +1,50 @@
-+/******************************************************************************
-+ * gntalloc.h
-+ *
-+ * Interface to /dev/xen/gntalloc.
-+ *
-+ * Author: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-+ *
-+ * This file is in the public domain.
-+ */
-+
-+#ifndef __LINUX_PUBLIC_GNTALLOC_H__
-+#define __LINUX_PUBLIC_GNTALLOC_H__
-+
-+/*
-+ * Allocates a new page and creates a new grant reference.
-+ */
-+#define IOCTL_GNTALLOC_ALLOC_GREF \
-+_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
-+struct ioctl_gntalloc_alloc_gref {
-+	/* IN parameters */
-+	/* The ID of the domain to be given access to the grants. */
-+	uint16_t domid;
-+	/* Flags for this mapping */
-+	uint16_t flags;
-+	/* Number of pages to map */
-+	uint32_t count;
-+	/* OUT parameters */
-+	/* The offset to be used on a subsequent call to mmap(). */
-+	uint64_t index;
-+	/* The grant references of the newly created grant, one per page */
-+	/* Variable size, depending on count */
-+	uint32_t gref_ids[1];
-+};
-+
-+#define GNTALLOC_FLAG_WRITABLE 1
-+
-+/*
-+ * Deallocates the grant reference, allowing the associated page to be freed if
-+ * no other domains are using it.
-+ */
-+#define IOCTL_GNTALLOC_DEALLOC_GREF \
-+_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
-+struct ioctl_gntalloc_dealloc_gref {
-+	/* IN parameters */
-+	/* The offset returned in the map operation */
-+	uint64_t index;
-+	/* Number of references to unmap */
-+	uint32_t count;
-+};
-+#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
--- 
-1.7.4
-
-
-From bdc612dc6903c4ea06e40d02f84ad5e25d93459d Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Thu, 3 Feb 2011 12:19:04 -0500
-Subject: [PATCH 126/203] xen/gntalloc,gntdev: Add unmap notify ioctl
-
-This ioctl allows the users of a shared page to be notified when
-the other end exits abnormally.
-
-[v2: updated description in structs]
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntalloc.c |   59 ++++++++++++++++++++++++++++++++++++++++++++++
- drivers/xen/gntdev.c   |   61 +++++++++++++++++++++++++++++++++++++++++++++++-
- include/xen/gntalloc.h |   32 +++++++++++++++++++++++++
- include/xen/gntdev.h   |   31 ++++++++++++++++++++++++
- 4 files changed, 182 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
-index d06bf2b..a7ffdfe 100644
---- a/drivers/xen/gntalloc.c
-+++ b/drivers/xen/gntalloc.c
-@@ -60,11 +60,13 @@
- #include <linux/uaccess.h>
- #include <linux/types.h>
- #include <linux/list.h>
-+#include <linux/highmem.h>
- 
- #include <xen/xen.h>
- #include <xen/page.h>
- #include <xen/grant_table.h>
- #include <xen/gntalloc.h>
-+#include <xen/events.h>
- 
- static int limit = 1024;
- module_param(limit, int, 0644);
-@@ -75,6 +77,12 @@ static LIST_HEAD(gref_list);
- static DEFINE_SPINLOCK(gref_lock);
- static int gref_size;
- 
-+struct notify_info {
-+	uint16_t pgoff:12;    /* Bits 0-11: Offset of the byte to clear */
-+	uint16_t flags:2;     /* Bits 12-13: Unmap notification flags */
-+	int event;            /* Port (event channel) to notify */
-+};
-+
- /* Metadata on a grant reference. */
- struct gntalloc_gref {
- 	struct list_head next_gref;  /* list entry gref_list */
-@@ -83,6 +91,7 @@ struct gntalloc_gref {
- 	uint64_t file_index;         /* File offset for mmap() */
- 	unsigned int users;          /* Use count - when zero, waiting on Xen */
- 	grant_ref_t gref_id;         /* The grant reference number */
-+	struct notify_info notify;   /* Unmap notification */
- };
- 
- struct gntalloc_file_private_data {
-@@ -164,6 +173,16 @@ undo:
- 
- static void __del_gref(struct gntalloc_gref *gref)
- {
-+	if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
-+		uint8_t *tmp = kmap(gref->page);
-+		tmp[gref->notify.pgoff] = 0;
-+		kunmap(gref->page);
-+	}
-+	if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
-+		notify_remote_via_evtchn(gref->notify.event);
-+
-+	gref->notify.flags = 0;
-+
- 	if (gref->gref_id > 0) {
- 		if (gnttab_query_foreign_access(gref->gref_id))
- 			return;
-@@ -349,6 +368,43 @@ dealloc_grant_out:
- 	return rc;
- }
- 
-+static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
-+		void __user *arg)
-+{
-+	struct ioctl_gntalloc_unmap_notify op;
-+	struct gntalloc_gref *gref;
-+	uint64_t index;
-+	int pgoff;
-+	int rc;
-+
-+	if (copy_from_user(&op, arg, sizeof(op)))
-+		return -EFAULT;
-+
-+	index = op.index & ~(PAGE_SIZE - 1);
-+	pgoff = op.index & (PAGE_SIZE - 1);
-+
-+	spin_lock(&gref_lock);
-+
-+	gref = find_grefs(priv, index, 1);
-+	if (!gref) {
-+		rc = -ENOENT;
-+		goto unlock_out;
-+	}
-+
-+	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) {
-+		rc = -EINVAL;
-+		goto unlock_out;
-+	}
-+
-+	gref->notify.flags = op.action;
-+	gref->notify.pgoff = pgoff;
-+	gref->notify.event = op.event_channel_port;
-+	rc = 0;
-+ unlock_out:
-+	spin_unlock(&gref_lock);
-+	return rc;
-+}
-+
- static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
- 		unsigned long arg)
- {
-@@ -361,6 +417,9 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
- 	case IOCTL_GNTALLOC_DEALLOC_GREF:
- 		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
- 
-+	case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY:
-+		return gntalloc_ioctl_unmap_notify(priv, (void __user *)arg);
-+
- 	default:
- 		return -ENOIOCTLCMD;
- 	}
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index bcaf797..9694a1a 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -37,6 +37,7 @@
- #include <xen/xen.h>
- #include <xen/grant_table.h>
- #include <xen/gntdev.h>
-+#include <xen/events.h>
- #include <asm/xen/hypervisor.h>
- #include <asm/xen/hypercall.h>
- #include <asm/xen/page.h>
-@@ -63,6 +64,13 @@ struct gntdev_priv {
- 	struct mmu_notifier mn;
- };
- 
-+struct unmap_notify {
-+	int flags;
-+	/* Address relative to the start of the grant_map */
-+	int addr;
-+	int event;
-+};
-+
- struct grant_map {
- 	struct list_head next;
- 	struct vm_area_struct *vma;
-@@ -71,6 +79,7 @@ struct grant_map {
- 	int flags;
- 	int is_mapped;
- 	atomic_t users;
-+	struct unmap_notify notify;
- 	struct ioctl_gntdev_grant_ref *grants;
- 	struct gnttab_map_grant_ref   *map_ops;
- 	struct gnttab_unmap_grant_ref *unmap_ops;
-@@ -165,7 +174,7 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
- 	list_for_each_entry(map, &priv->maps, next) {
- 		if (map->index != index)
- 			continue;
--		if (map->count != count)
-+		if (count && map->count != count)
- 			continue;
- 		return map;
- 	}
-@@ -184,6 +193,10 @@ static void gntdev_put_map(struct grant_map *map)
- 
- 	atomic_sub(map->count, &pages_mapped);
- 
-+	if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
-+		notify_remote_via_evtchn(map->notify.event);
-+	}
-+
- 	if (map->pages) {
- 		if (!use_ptemod)
- 			unmap_grant_pages(map, 0, map->count);
-@@ -274,6 +287,16 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
- {
- 	int i, err = 0;
- 
-+	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
-+		int pgno = (map->notify.addr >> PAGE_SHIFT);
-+		if (pgno >= offset && pgno < offset + pages) {
-+			uint8_t *tmp = kmap(map->pages[pgno]);
-+			tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
-+			kunmap(map->pages[pgno]);
-+			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
-+		}
-+	}
-+
- 	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
- 	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages);
- 	if (err)
-@@ -519,6 +542,39 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
- 	return 0;
- }
- 
-+static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
-+{
-+	struct ioctl_gntdev_unmap_notify op;
-+	struct grant_map *map;
-+	int rc;
-+
-+	if (copy_from_user(&op, u, sizeof(op)))
-+		return -EFAULT;
-+
-+	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
-+		return -EINVAL;
-+
-+	spin_lock(&priv->lock);
-+
-+	list_for_each_entry(map, &priv->maps, next) {
-+		uint64_t begin = map->index << PAGE_SHIFT;
-+		uint64_t end = (map->index + map->count) << PAGE_SHIFT;
-+		if (op.index >= begin && op.index < end)
-+			goto found;
-+	}
-+	rc = -ENOENT;
-+	goto unlock_out;
-+
-+ found:
-+	map->notify.flags = op.action;
-+	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
-+	map->notify.event = op.event_channel_port;
-+	rc = 0;
-+ unlock_out:
-+	spin_unlock(&priv->lock);
-+	return rc;
-+}
-+
- static long gntdev_ioctl(struct file *flip,
- 			 unsigned int cmd, unsigned long arg)
- {
-@@ -535,6 +591,9 @@ static long gntdev_ioctl(struct file *flip,
- 	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
- 		return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
- 
-+	case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
-+		return gntdev_ioctl_notify(priv, ptr);
-+
- 	default:
- 		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
- 		return -ENOIOCTLCMD;
-diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
-index bc3b85e..76bd580 100644
---- a/include/xen/gntalloc.h
-+++ b/include/xen/gntalloc.h
-@@ -47,4 +47,36 @@ struct ioctl_gntalloc_dealloc_gref {
- 	/* Number of references to unmap */
- 	uint32_t count;
- };
-+
-+/*
-+ * Sets up an unmap notification within the page, so that the other side can do
-+ * cleanup if this side crashes. Required to implement cross-domain robust
-+ * mutexes or close notification on communication channels.
-+ *
-+ * Each mapped page only supports one notification; multiple calls referring to
-+ * the same page overwrite the previous notification. You must clear the
-+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
-+ * to occur.
-+ */
-+#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
-+_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntalloc_unmap_notify))
-+struct ioctl_gntalloc_unmap_notify {
-+	/* IN parameters */
-+	/* Offset in the file descriptor for a byte within the page (same as
-+	 * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
-+	 * be cleared. Otherwise, it can be any byte in the page whose
-+	 * notification we are adjusting.
-+	 */
-+	uint64_t index;
-+	/* Action(s) to take on unmap */
-+	uint32_t action;
-+	/* Event channel to notify */
-+	uint32_t event_channel_port;
-+};
-+
-+/* Clear (set to zero) the byte specified by index */
-+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
-+/* Send an interrupt on the indicated event channel */
-+#define UNMAP_NOTIFY_SEND_EVENT 0x2
-+
- #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
-diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
-index eb23f41..5304bd3 100644
---- a/include/xen/gntdev.h
-+++ b/include/xen/gntdev.h
-@@ -116,4 +116,35 @@ struct ioctl_gntdev_set_max_grants {
- 	uint32_t count;
- };
- 
-+/*
-+ * Sets up an unmap notification within the page, so that the other side can do
-+ * cleanup if this side crashes. Required to implement cross-domain robust
-+ * mutexes or close notification on communication channels.
-+ *
-+ * Each mapped page only supports one notification; multiple calls referring to
-+ * the same page overwrite the previous notification. You must clear the
-+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
-+ * to occur.
-+ */
-+#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
-+_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
-+struct ioctl_gntdev_unmap_notify {
-+	/* IN parameters */
-+	/* Offset in the file descriptor for a byte within the page (same as
-+	 * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
-+	 * be cleared. Otherwise, it can be any byte in the page whose
-+	 * notification we are adjusting.
-+	 */
-+	uint64_t index;
-+	/* Action(s) to take on unmap */
-+	uint32_t action;
-+	/* Event channel to notify */
-+	uint32_t event_channel_port;
-+};
-+
-+/* Clear (set to zero) the byte specified by index */
-+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
-+/* Send an interrupt on the indicated event channel */
-+#define UNMAP_NOTIFY_SEND_EVENT 0x2
-+
- #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
--- 
-1.7.4
-
-
-From 90b6f30548a52f3a59cda5c7db0b9c2a99ebb156 Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Thu, 3 Feb 2011 14:16:54 -0500
-Subject: [PATCH 127/203] xen-gntdev: Fix memory leak when mmap fails
-
-The error path did not decrement the reference count of the grant structure.
-
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntdev.c |   14 ++++++++------
- 1 files changed, 8 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index 9694a1a..2aa8316 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -650,15 +650,13 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
- 					  find_grant_ptes, map);
- 		if (err) {
- 			printk(KERN_WARNING "find_grant_ptes() failure.\n");
--			return err;
-+			goto out_put_map;
- 		}
- 	}
- 
- 	err = map_grant_pages(map);
--	if (err) {
--		printk(KERN_WARNING "map_grant_pages() failure.\n");
--		return err;
--	}
-+	if (err)
-+		goto out_put_map;
- 
- 	map->is_mapped = 1;
- 
-@@ -667,7 +665,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
- 			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
- 				map->pages[i]);
- 			if (err)
--				return err;
-+				goto out_put_map;
- 		}
- 	}
- 
-@@ -676,6 +674,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
- unlock_out:
- 	spin_unlock(&priv->lock);
- 	return err;
-+
-+out_put_map:
-+	gntdev_put_map(map);
-+	return err;
- }
- 
- static const struct file_operations gntdev_fops = {
--- 
-1.7.4
-
-
-From 0ea22f072fb1b3da4307573c280ce904f0bf1589 Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Tue, 8 Feb 2011 09:14:06 -0500
-Subject: [PATCH 128/203] xen-gntdev: Fix unmap notify on PV domains
-
-In paravirtualized guests, the struct page* for mappings is only a
-placeholder, and cannot be used to access the granted memory. Use the
-userspace mapping that we have set up in order to implement
-UNMAP_NOTIFY_CLEAR_BYTE.
-
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntdev.c |    9 +++++++--
- 1 files changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index 2aa8316..75f8037 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -289,7 +289,12 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
- 
- 	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
- 		int pgno = (map->notify.addr >> PAGE_SHIFT);
--		if (pgno >= offset && pgno < offset + pages) {
-+		if (pgno >= offset && pgno < offset + pages && use_ptemod) {
-+			void __user *tmp;
-+			tmp = map->vma->vm_start + map->notify.addr;
-+			copy_to_user(tmp, &err, 1);
-+			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
-+		} else if (pgno >= offset && pgno < offset + pages) {
- 			uint8_t *tmp = kmap(map->pages[pgno]);
- 			tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
- 			kunmap(map->pages[pgno]);
-@@ -298,7 +303,7 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
- 	}
- 
- 	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
--	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages);
-+	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages);
- 	if (err)
- 		return err;
- 
--- 
-1.7.4
-
-
-From 84e4075d60fc8f1c0b937765620bc784dd0c3d39 Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Wed, 9 Feb 2011 15:11:59 -0500
-Subject: [PATCH 129/203] xen-gntdev: Use map->vma for checking map validity
-
-The is_mapped flag used to be set at the completion of the map operation,
-but was not checked in all error paths. Use map->vma instead, which will
-now be cleared if the initial grant mapping fails.
-
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntdev.c |    8 ++------
- 1 files changed, 2 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index 75f8037..4ca4262 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -77,7 +77,6 @@ struct grant_map {
- 	int index;
- 	int count;
- 	int flags;
--	int is_mapped;
- 	atomic_t users;
- 	struct unmap_notify notify;
- 	struct ioctl_gntdev_grant_ref *grants;
-@@ -322,7 +321,6 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
- 	struct grant_map *map = vma->vm_private_data;
- 
- 	pr_debug("close %p\n", vma);
--	map->is_mapped = 0;
- 	map->vma = NULL;
- 	vma->vm_private_data = NULL;
- 	gntdev_put_map(map);
-@@ -347,8 +345,6 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
- 	list_for_each_entry(map, &priv->maps, next) {
- 		if (!map->vma)
- 			continue;
--		if (!map->is_mapped)
--			continue;
- 		if (map->vma->vm_start >= end)
- 			continue;
- 		if (map->vma->vm_end <= start)
-@@ -663,8 +659,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
- 	if (err)
- 		goto out_put_map;
- 
--	map->is_mapped = 1;
--
- 	if (!use_ptemod) {
- 		for (i = 0; i < count; i++) {
- 			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
-@@ -681,6 +675,8 @@ unlock_out:
- 	return err;
- 
- out_put_map:
-+	if (use_ptemod)
-+		map->vma = NULL;
- 	gntdev_put_map(map);
- 	return err;
- }
--- 
-1.7.4
-
-
-From b57c18694ea1641b691fa05ed8af0ce339fa430b Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Wed, 9 Feb 2011 15:12:00 -0500
-Subject: [PATCH 130/203] xen-gntdev: Avoid unmapping ranges twice
-
-In paravirtualized domains, mn_invl_page or mn_invl_range_start can
-unmap a segment of a mapped region without unmapping all pages. When
-the region is later released, the pages will be unmapped twice, leading
-to an incorrect -EINVAL return.
-
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntdev.c |   33 +++++++++++++++++++++++++++++++--
- 1 files changed, 31 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index 4ca4262..4687cd5 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -282,7 +282,7 @@ static int map_grant_pages(struct grant_map *map)
- 	return err;
- }
- 
--static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
-+static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
- {
- 	int i, err = 0;
- 
-@@ -301,7 +301,6 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
- 		}
- 	}
- 
--	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
- 	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages);
- 	if (err)
- 		return err;
-@@ -314,6 +313,36 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
- 	return err;
- }
- 
-+static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
-+{
-+	int range, err = 0;
-+
-+	pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
-+
-+	/* It is possible the requested range will have a "hole" where we
-+	 * already unmapped some of the grants. Only unmap valid ranges.
-+	 */
-+	while (pages && !err) {
-+		while (pages && !map->unmap_ops[offset].handle) {
-+			offset++;
-+			pages--;
-+		}
-+		range = 0;
-+		while (range < pages) {
-+			if (!map->unmap_ops[offset+range].handle) {
-+				range--;
-+				break;
-+			}
-+			range++;
-+		}
-+		err = __unmap_grant_pages(map, offset, range);
-+		offset += range;
-+		pages -= range;
-+	}
-+
-+	return err;
-+}
-+
- /* ------------------------------------------------------------------ */
- 
- static void gntdev_vma_close(struct vm_area_struct *vma)
--- 
-1.7.4
-
-
-From 12996fc38a2d760f3b30c9ceae26d0eeb92fe52d Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Wed, 9 Feb 2011 16:11:32 -0500
-Subject: [PATCH 131/203] xen-gntdev: Avoid double-mapping memory
-
-If an already-mapped area of the device was mapped into userspace a
-second time, a hypercall was incorrectly made to remap the memory
-again. Avoid the hypercall on later mmap calls, and fail the mmap call
-if a writable mapping is attempted on a read-only range.
-
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntdev.c |   15 ++++++++++++---
- 1 files changed, 12 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index 4687cd5..2c4cc94 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -258,6 +258,9 @@ static int map_grant_pages(struct grant_map *map)
- 	phys_addr_t addr;
- 
- 	if (!use_ptemod) {
-+		/* Note: it could already be mapped */
-+		if (map->map_ops[0].handle)
-+			return 0;
- 		for (i = 0; i < map->count; i++) {
- 			addr = (phys_addr_t)
- 				pfn_to_kaddr(page_to_pfn(map->pages[i]));
-@@ -668,9 +671,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
- 	if (use_ptemod)
- 		map->vma = vma;
- 
--	map->flags = GNTMAP_host_map;
--	if (!(vma->vm_flags & VM_WRITE))
--		map->flags |= GNTMAP_readonly;
-+	if (map->flags) {
-+		if ((vma->vm_flags & VM_WRITE) &&
-+				(map->flags & GNTMAP_readonly))
-+			return -EINVAL;
-+	} else {
-+		map->flags = GNTMAP_host_map;
-+		if (!(vma->vm_flags & VM_WRITE))
-+			map->flags |= GNTMAP_readonly;
-+	}
- 
- 	spin_unlock(&priv->lock);
- 
--- 
-1.7.4
-
-
-From 9960be970cea52c1cb7d7c747ff6da367e1c01b5 Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Wed, 9 Feb 2011 18:15:50 -0500
-Subject: [PATCH 132/203] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
-
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntdev.c |   10 +++++++++-
- 1 files changed, 9 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index 2c4cc94..2a4733c 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -294,7 +294,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
- 		if (pgno >= offset && pgno < offset + pages && use_ptemod) {
- 			void __user *tmp;
- 			tmp = map->vma->vm_start + map->notify.addr;
--			copy_to_user(tmp, &err, 1);
-+			err = copy_to_user(tmp, &err, 1);
-+			if (err)
-+				return err;
- 			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
- 		} else if (pgno >= offset && pgno < offset + pages) {
- 			uint8_t *tmp = kmap(map->pages[pgno]);
-@@ -599,6 +601,12 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
- 	goto unlock_out;
- 
-  found:
-+	if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
-+			(map->flags & GNTMAP_readonly)) {
-+		rc = -EINVAL;
-+		goto unlock_out;
-+	}
-+
- 	map->notify.flags = op.action;
- 	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
- 	map->notify.event = op.event_channel_port;
--- 
-1.7.4
-
-
-From aa95762050b578ea143d674c228f8d5afd33e9ab Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx at linutronix.de>
-Date: Sat, 5 Feb 2011 20:08:59 +0000
-Subject: [PATCH 133/203] xen: Use IRQF_FORCE_RESUME
-
-Mark the IRQF_NO_SUSPEND interrupts IRQF_FORCE_RESUME and remove the extra
-walk through the interrupt descriptors.
-
-Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |   20 +-------------------
- 1 files changed, 1 insertions(+), 19 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 5aa422a..975e90f 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -977,7 +977,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
- 	if (irq < 0)
- 		return irq;
- 
--	irqflags |= IRQF_NO_SUSPEND;
-+	irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
- 	retval = request_irq(irq, handler, irqflags, devname, dev_id);
- 	if (retval != 0) {
- 		unbind_from_irq(irq);
-@@ -1433,7 +1433,6 @@ void xen_poll_irq(int irq)
- void xen_irq_resume(void)
- {
- 	unsigned int cpu, irq, evtchn;
--	struct irq_desc *desc;
- 
- 	init_evtchn_cpu_bindings();
- 
-@@ -1453,23 +1452,6 @@ void xen_irq_resume(void)
- 		restore_cpu_ipis(cpu);
- 	}
- 
--	/*
--	 * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
--	 * are not handled by the IRQ core.
--	 */
--	for_each_irq_desc(irq, desc) {
--		if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
--			continue;
--		if (desc->status & IRQ_DISABLED)
--			continue;
--
--		evtchn = evtchn_from_irq(irq);
--		if (evtchn == -1)
--			continue;
--
--		unmask_evtchn(evtchn);
--	}
--
- 	restore_cpu_pirqs();
- }
- 
--- 
-1.7.4
-
-
-From 1f169f66ddcc3b4614f840bef367de1ca20e16fe Mon Sep 17 00:00:00 2001
-From: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
-Date: Thu, 10 Feb 2011 12:08:21 +0000
-Subject: [PATCH 134/203] xen: change xen/[gntdev/gntalloc] to default m
-
-When CONFIG_XEN is enabled the gntdev and gntalloc driver will be
-compiled as a module by default.
-
-[v2: Added the fix for the gntalloc driver as well]
-
-Signed-off-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/Kconfig |    2 ++
- 1 files changed, 2 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
-index a3d7afb..a59638b 100644
---- a/drivers/xen/Kconfig
-+++ b/drivers/xen/Kconfig
-@@ -76,6 +76,7 @@ config XEN_XENBUS_FRONTEND
- config XEN_GNTDEV
- 	tristate "userspace grant access device driver"
- 	depends on XEN
-+	default m
- 	select MMU_NOTIFIER
- 	help
- 	  Allows userspace processes to use grants.
-@@ -83,6 +84,7 @@ config XEN_GNTDEV
- config XEN_GRANT_DEV_ALLOC
- 	tristate "User-space grant reference allocator driver"
- 	depends on XEN
-+	default m
- 	help
- 	  Allows userspace processes to create pages with access granted
- 	  to other domains. This can be used to implement frontend drivers
--- 
-1.7.4
-
-
-From db2e2e6ee9ee9ce93b04c6975fdfef304771d6ad Mon Sep 17 00:00:00 2001
-From: Tejun Heo <tj at kernel.org>
-Date: Mon, 24 Jan 2011 15:43:03 +0100
-Subject: [PATCH 135/203] xen-pcifront: don't use flush_scheduled_work()
-
-flush_scheduled_work() is scheduled for deprecation.  Cancel ->op_work
-directly instead.
-
-Signed-off-by: Tejun Heo <tj at kernel.org>
-Cc: Ryan Wilson <hap9 at epoch.ncsc.mil>
-Cc: Jan Beulich <JBeulich at novell.com>
-Cc: Jesse Barnes <jbarnes at virtuousgeek.org>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/pci/xen-pcifront.c |    3 +--
- 1 files changed, 1 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
-index 3a5a6fc..030ce37 100644
---- a/drivers/pci/xen-pcifront.c
-+++ b/drivers/pci/xen-pcifront.c
-@@ -733,8 +733,7 @@ static void free_pdev(struct pcifront_device *pdev)
- 
- 	pcifront_free_roots(pdev);
- 
--	/*For PCIE_AER error handling job*/
--	flush_scheduled_work();
-+	cancel_work_sync(&pdev->op_work);
- 
- 	if (pdev->irq >= 0)
- 		unbind_from_irqhandler(pdev->irq, pdev);
--- 
-1.7.4
-
-
-From 1fb6c1f210d13d7099ece8aee5f351b7e5ecb3bd Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 17 Feb 2011 11:23:58 -0500
-Subject: [PATCH 136/203] xen/irq: Cleanup up the pirq_to_irq for DomU PV PCI passthrough guests as well.
-
-We only did this for PV guests that are xen_initial_domain() but
-there is not reason not to do this for other cases. The other
-case is only exercised when you pass in a PCI device to a PV guest
-_and_ the device in question.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |    3 ++-
- 1 files changed, 2 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 975e90f..89987a7 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -766,8 +766,9 @@ int xen_destroy_irq(int irq)
- 			printk(KERN_WARNING "unmap irq failed %d\n", rc);
- 			goto out;
- 		}
--		pirq_to_irq[info->u.pirq.pirq] = -1;
- 	}
-+	pirq_to_irq[info->u.pirq.pirq] = -1;
-+
- 	irq_info[irq] = mk_unbound_info();
- 
- 	xen_free_irq(irq);
--- 
-1.7.4
-
-
-From 1d4610527bc71d3f9eea520fc51a02d54f79dcd0 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 16 Feb 2011 13:43:22 -0500
-Subject: [PATCH 137/203] xen-pcifront: Sanity check the MSI/MSI-X values
-
-Check the returned vector values for any values that are
-odd or plain incorrect (say vector value zero), and if so
-print a warning. Also fixup the return values.
-
-This patch was precipiated by the Xen PCIBack returning the
-incorrect values due to how it was retrieving PIRQ values.
-This has been fixed in the xen-pciback by
-"xen/pciback: Utilize 'xen_pirq_from_irq' to get PIRQ value"
-patch.
-
-Reviewed-by: Ian Campbell <Ian.Campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/pci/xen-pcifront.c |   20 ++++++++++++++++----
- 1 files changed, 16 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
-index 030ce37..5c7b6ad 100644
---- a/drivers/pci/xen-pcifront.c
-+++ b/drivers/pci/xen-pcifront.c
-@@ -277,18 +277,24 @@ static int pci_frontend_enable_msix(struct pci_dev *dev,
- 	if (likely(!err)) {
- 		if (likely(!op.value)) {
- 			/* we get the result */
--			for (i = 0; i < nvec; i++)
-+			for (i = 0; i < nvec; i++) {
-+				if (op.msix_entries[i].vector <= 0) {
-+					dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n",
-+						i, op.msix_entries[i].vector);
-+					err = -EINVAL;
-+					*(*vector+i) = -1;
-+					continue;
-+				}
- 				*(*vector+i) = op.msix_entries[i].vector;
--			return 0;
-+			}
- 		} else {
- 			printk(KERN_DEBUG "enable msix get value %x\n",
- 				op.value);
--			return op.value;
- 		}
- 	} else {
- 		dev_err(&dev->dev, "enable msix get err %x\n", err);
--		return err;
- 	}
-+	return err;
- }
- 
- static void pci_frontend_disable_msix(struct pci_dev *dev)
-@@ -325,6 +331,12 @@ static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
- 	err = do_pci_op(pdev, &op);
- 	if (likely(!err)) {
- 		*(*vector) = op.value;
-+		if (op.value <= 0) {
-+			dev_warn(&dev->dev, "MSI entry is invalid: %d!\n",
-+				op.value);
-+			err = -EINVAL;
-+			*(*vector) = -1;	
-+		}
- 	} else {
- 		dev_err(&dev->dev, "pci frontend enable msi failed for dev "
- 				    "%x:%x\n", op.bus, op.devfn);
--- 
-1.7.4
-
-
-From 55cb8cd45e0600df1473489518d7f12ce1bbe973 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 16 Feb 2011 13:43:04 -0500
-Subject: [PATCH 138/203] pci/xen: Use xen_allocate_pirq_msi instead of xen_allocate_pirq
-
-xen_allocate_pirq -> xen_map_pirq_gsi -> PHYSDEVOP_alloc_irq_vector IFF
-xen_initial_domain() in addition to the kernel side book-keeping side of
-things (set chip and handler, update irq_info etc) whereas
-xen_allocate_pirq_msi just does the kernel book keeping.
-
-Also xen_allocate_pirq allocates an IRQ in the 1-1 GSI space whereas
-xen_allocate_pirq_msi allocates a dynamic one in the >GSI IRQ space.
-
-All of this is uneccessary as this code path is only executed
-when we run as a domU PV guest with an MSI/MSI-X PCI card passed in.
-Hence we can jump straight to allocating an dynamic IRQ (and
-binding it to the proper PIRQ) and skip the rest.
-
-In short: this change is a cosmetic one.
-
-Reviewed-by: Ian Campbell <Ian.Campbell at citrix.com>
-Reviewed-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/pci/xen.c |    6 +++---
- 1 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 25cd4a0..6432f75 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -157,14 +157,14 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 		goto error;
- 	i = 0;
- 	list_for_each_entry(msidesc, &dev->msi_list, list) {
--		irq = xen_allocate_pirq(v[i], 0, /* not sharable */
-+		xen_allocate_pirq_msi(
- 			(type == PCI_CAP_ID_MSIX) ?
--			"pcifront-msi-x" : "pcifront-msi");
-+			"pcifront-msi-x" : "pcifront-msi",
-+			&irq, &v[i], XEN_ALLOC_IRQ);
- 		if (irq < 0) {
- 			ret = -1;
- 			goto free;
- 		}
--
- 		ret = set_irq_msi(irq, msidesc);
- 		if (ret)
- 			goto error_while;
--- 
-1.7.4
-
-
-From cc0f89c4a426fcd6400a89e9e34e4a8851abef76 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 17 Feb 2011 12:02:23 -0500
-Subject: [PATCH 139/203] pci/xen: Cleanup: convert int** to int[]
-
-Cleanup code. Cosmetic change to make the code look easier
-to read.
-
-Reviewed-by: Ian Campbell <Ian.Campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/include/asm/xen/pci.h |    8 ++++----
- arch/x86/pci/xen.c             |    4 ++--
- drivers/pci/xen-pcifront.c     |   12 ++++++------
- 3 files changed, 12 insertions(+), 12 deletions(-)
-
-diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
-index 2329b3e..aa86209 100644
---- a/arch/x86/include/asm/xen/pci.h
-+++ b/arch/x86/include/asm/xen/pci.h
-@@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void)
-  * its own functions.
-  */
- struct xen_pci_frontend_ops {
--	int (*enable_msi)(struct pci_dev *dev, int **vectors);
-+	int (*enable_msi)(struct pci_dev *dev, int vectors[]);
- 	void (*disable_msi)(struct pci_dev *dev);
--	int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec);
-+	int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec);
- 	void (*disable_msix)(struct pci_dev *dev);
- };
- 
- extern struct xen_pci_frontend_ops *xen_pci_frontend;
- 
- static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
--					      int **vectors)
-+					      int vectors[])
- {
- 	if (xen_pci_frontend && xen_pci_frontend->enable_msi)
- 		return xen_pci_frontend->enable_msi(dev, vectors);
-@@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
- 			xen_pci_frontend->disable_msi(dev);
- }
- static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
--					       int **vectors, int nvec)
-+					       int vectors[], int nvec)
- {
- 	if (xen_pci_frontend && xen_pci_frontend->enable_msix)
- 		return xen_pci_frontend->enable_msix(dev, vectors, nvec);
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 6432f75..30fdd09 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -150,9 +150,9 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 		return -ENOMEM;
- 
- 	if (type == PCI_CAP_ID_MSIX)
--		ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
-+		ret = xen_pci_frontend_enable_msix(dev, v, nvec);
- 	else
--		ret = xen_pci_frontend_enable_msi(dev, &v);
-+		ret = xen_pci_frontend_enable_msi(dev, v);
- 	if (ret)
- 		goto error;
- 	i = 0;
-diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
-index 5c7b6ad..492b7d8 100644
---- a/drivers/pci/xen-pcifront.c
-+++ b/drivers/pci/xen-pcifront.c
-@@ -243,7 +243,7 @@ struct pci_ops pcifront_bus_ops = {
- 
- #ifdef CONFIG_PCI_MSI
- static int pci_frontend_enable_msix(struct pci_dev *dev,
--				    int **vector, int nvec)
-+				    int vector[], int nvec)
- {
- 	int err;
- 	int i;
-@@ -282,10 +282,10 @@ static int pci_frontend_enable_msix(struct pci_dev *dev,
- 					dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n",
- 						i, op.msix_entries[i].vector);
- 					err = -EINVAL;
--					*(*vector+i) = -1;
-+					vector[i] = -1;
- 					continue;
- 				}
--				*(*vector+i) = op.msix_entries[i].vector;
-+				vector[i] = op.msix_entries[i].vector;
- 			}
- 		} else {
- 			printk(KERN_DEBUG "enable msix get value %x\n",
-@@ -316,7 +316,7 @@ static void pci_frontend_disable_msix(struct pci_dev *dev)
- 		dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
- }
- 
--static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
-+static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
- {
- 	int err;
- 	struct xen_pci_op op = {
-@@ -330,12 +330,12 @@ static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
- 
- 	err = do_pci_op(pdev, &op);
- 	if (likely(!err)) {
--		*(*vector) = op.value;
-+		vector[0] = op.value;
- 		if (op.value <= 0) {
- 			dev_warn(&dev->dev, "MSI entry is invalid: %d!\n",
- 				op.value);
- 			err = -EINVAL;
--			*(*vector) = -1;	
-+			vector[0] = -1;
- 		}
- 	} else {
- 		dev_err(&dev->dev, "pci frontend enable msi failed for dev "
--- 
-1.7.4
-
-
-From 3d74a539ae07a8f3c061332e426fc07b2310cf05 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 17 Feb 2011 16:12:51 -0500
-Subject: [PATCH 140/203] pci/xen: When free-ing MSI-X/MSI irq->desc also use generic code.
-
-This code path is only run when an MSI/MSI-X PCI device is passed
-in to PV DomU.
-
-In 2.6.37 time-frame we over-wrote the default cleanup handler for
-MSI/MSI-X irq->desc to be "xen_teardown_msi_irqs". That function
-calls the the xen-pcifront driver which can tell the backend to
-cleanup/take back the MSI/MSI-X device.
-
-However, we forgot to continue the process of free-ing the MSI/MSI-X
-device resources (irq->desc) in the PV domU side. Which is what
-the default cleanup handler: default_teardown_msi_irqs did.
-
-Hence we would leak IRQ descriptors.
-
-Without this patch, doing "rmmod igbvf;modprobe igbvf" multiple
-times ends with abandoned IRQ descriptors:
-
- 28:          5  xen-pirq-pcifront-msi-x
- 29:          8  xen-pirq-pcifront-msi-x
-...
-130:         10  xen-pirq-pcifront-msi-x
-
-with the end result of running out of IRQ descriptors.
-
-Reviewed-by: Ian Campbell <Ian.Campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/pci/xen.c |    3 +++
- 1 files changed, 3 insertions(+), 0 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 30fdd09..57afd1d 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -193,6 +193,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
- 		xen_pci_frontend_disable_msix(dev);
- 	else
- 		xen_pci_frontend_disable_msi(dev);
-+
-+	/* Free the IRQ's and the msidesc using the generic code. */
-+	default_teardown_msi_irqs(dev);
- }
- 
- static void xen_teardown_msi_irq(unsigned int irq)
--- 
-1.7.4
-
-
-From 2f14ddc3a7146ea4cd5a3d1ecd993f85f2e4f948 Mon Sep 17 00:00:00 2001
-From: Zhang, Fengzhe <fengzhe.zhang at intel.com>
-Date: Wed, 16 Feb 2011 22:26:20 +0800
-Subject: [PATCH 141/203] xen/setup: Inhibit resource API from using System RAM E820 gaps as PCI mem gaps.
-
-With the hypervisor argument of dom0_mem=X we iterate over the physical
-(only for the initial domain) E820 and subtract the the size from each
-E820_RAM region the delta so that the cumulative size of all E820_RAM regions
-is equal to 'X'. This sometimes ends up with E820_RAM regions with zero size
-(which are removed by e820_sanitize) and E820_RAM that are smaller
-than physically.
-
-Later on the PCI API looks at the E820 and attempts to set up an
-resource region for the "PCI mem". The E820 (assume dom0_mem=1GB is
-set) compared to the physical looks as so:
-
- [    0.000000] BIOS-provided physical RAM map:
- [    0.000000]  Xen: 0000000000000000 - 0000000000097c00 (usable)
- [    0.000000]  Xen: 0000000000097c00 - 0000000000100000 (reserved)
--[    0.000000]  Xen: 0000000000100000 - 00000000defafe00 (usable)
-+[    0.000000]  Xen: 0000000000100000 - 0000000040000000 (usable)
- [    0.000000]  Xen: 00000000defafe00 - 00000000defb1ea0 (ACPI NVS)
- [    0.000000]  Xen: 00000000defb1ea0 - 00000000e0000000 (reserved)
- [    0.000000]  Xen: 00000000f4000000 - 00000000f8000000 (reserved)
-..
-And we get
-[    0.000000] Allocating PCI resources starting at 40000000 (gap: 40000000:9efafe00)
-
-while it should have started at e0000000 (a nice big gap up to
-f4000000 exists). The "Allocating PCI" is part of the resource API.
-
-The users that end up using those PCI I/O regions usually supply their
-own BARs when calling the resource API (request_resource, or allocate_resource),
-but there are exceptions which provide an empty 'struct resource' and
-expect the API to provide the 'struct resource' to be populated with valid values.
-The one that triggered this bug was the intel AGP driver that requested
-a region for the flush page (intel_i9xx_setup_flush).
-
-Before this patch, when running under Xen hypervisor, the 'struct resource'
-returned could have (depending on the dom0_mem size) physical ranges of a 'System RAM'
-instead of 'I/O' regions. This ended up with the Hypervisor failing a request
-to populate PTE's with those PFNs as the domain did not have access to those
-'System RAM' regions (rightly so).
-
-After this patch, the left-over E820_RAM region from the truncation, will be
-labeled as E820_UNUSABLE. The E820 will look as so:
-
- [    0.000000] BIOS-provided physical RAM map:
- [    0.000000]  Xen: 0000000000000000 - 0000000000097c00 (usable)
- [    0.000000]  Xen: 0000000000097c00 - 0000000000100000 (reserved)
--[    0.000000]  Xen: 0000000000100000 - 00000000defafe00 (usable)
-+[    0.000000]  Xen: 0000000000100000 - 0000000040000000 (usable)
-+[    0.000000]  Xen: 0000000040000000 - 00000000defafe00 (unusable)
- [    0.000000]  Xen: 00000000defafe00 - 00000000defb1ea0 (ACPI NVS)
- [    0.000000]  Xen: 00000000defb1ea0 - 00000000e0000000 (reserved)
- [    0.000000]  Xen: 00000000f4000000 - 00000000f8000000 (reserved)
-
-For more information:
-http://mid.gmane.org/1A42CE6F5F474C41B63392A5F80372B2335E978C@shsmsx501.ccr.corp.intel.com
-
-BugLink: http://bugzilla.xensource.com/bugzilla/show_bug.cgi?id=1726
-
-Signed-off-by: Fengzhe Zhang <fengzhe.zhang at intel.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/xen/setup.c |    8 ++++++++
- 1 files changed, 8 insertions(+), 0 deletions(-)
-
-diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
-index a8a66a5..2a4add9 100644
---- a/arch/x86/xen/setup.c
-+++ b/arch/x86/xen/setup.c
-@@ -194,6 +194,14 @@ char * __init xen_memory_setup(void)
- 			end -= delta;
- 
- 			extra_pages += PFN_DOWN(delta);
-+			/*
-+			 * Set RAM below 4GB that is not for us to be unusable.
-+			 * This prevents "System RAM" address space from being
-+			 * used as potential resource for I/O address (happens
-+			 * when 'allocate_resource' is called).
-+			 */
-+			if (delta && end < 0x100000000UL)
-+				e820_add_region(end, delta, E820_UNUSABLE);
- 		}
- 
- 		if (map[i].size > 0 && end > xen_extra_mem_start)
--- 
-1.7.4
-
-
-From 458059da8969d8b70a1d0e6600b3f11f91887728 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 18 Feb 2011 16:43:26 +0000
-Subject: [PATCH 142/203] xen: pci: only define xen_initdom_setup_msi_irqs if CONFIG_XEN_DOM0
-
-Fixes:
- CC      arch/x86/pci/xen.o
-arch/x86/pci/xen.c:183: warning: 'xen_initdom_setup_msi_irqs' defined but not used
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/pci/xen.c |    2 ++
- 1 files changed, 2 insertions(+), 0 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 8634e1b..47c4688 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -203,6 +203,7 @@ static void xen_teardown_msi_irq(unsigned int irq)
- 	xen_destroy_irq(irq);
- }
- 
-+#ifdef CONFIG_XEN_DOM0
- static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- {
- 	int irq, ret;
-@@ -224,6 +225,7 @@ error:
- 	return ret;
- }
- #endif
-+#endif
- 
- static int xen_pcifront_enable_irq(struct pci_dev *dev)
- {
--- 
-1.7.4
-
-
-From 290d9163997c4c460f1e6cb666f0e44712bb7087 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 18 Feb 2011 16:43:27 +0000
-Subject: [PATCH 143/203] xen: events: do not leak IRQ from xen_allocate_pirq_msi when no pirq available.
-
-Cc: Jeremy Fitzhardinge <jeremy at goop.org>
-Cc: xen-devel at lists.xensource.com
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |    5 ++++-
- 1 files changed, 4 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 89987a7..bce3035 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -676,8 +676,11 @@ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
- 
- 	if (alloc & XEN_ALLOC_PIRQ) {
- 		*pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
--		if (*pirq == -1)
-+		if (*pirq == -1) {
-+			xen_free_irq(*irq);
-+			*irq = -1;
- 			goto out;
-+		}
- 	}
- 
- 	set_irq_chip_and_handler_name(*irq, &xen_pirq_chip,
--- 
-1.7.4
-
-
-From b0a311e11032c5ac23183906b0d2baa5d0252446 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 18 Feb 2011 16:43:28 +0000
-Subject: [PATCH 144/203] xen: events: drop XEN_ALLOC_IRQ flag to xen_allocate_pirq_msi
-
-All callers pass this flag so it is pointless.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/pci/xen.c   |    6 +++---
- drivers/xen/events.c |   12 +++++-------
- include/xen/events.h |    5 +----
- 3 files changed, 9 insertions(+), 14 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 47c4688..ca5fa09 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -101,7 +101,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 			((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
- 		if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) {
- 			xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
--					"msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ);
-+					"msi-x" : "msi", &irq, &pirq, 0);
- 			if (irq < 0)
- 				goto error;
- 			ret = set_irq_msi(irq, msidesc);
-@@ -112,7 +112,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 			return 0;
- 		}
- 		xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
--				"msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ));
-+				"msi-x" : "msi", &irq, &pirq, 1);
- 		if (irq < 0 || pirq < 0)
- 			goto error;
- 		printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
-@@ -160,7 +160,7 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 		xen_allocate_pirq_msi(
- 			(type == PCI_CAP_ID_MSIX) ?
- 			"pcifront-msi-x" : "pcifront-msi",
--			&irq, &v[i], XEN_ALLOC_IRQ);
-+			&irq, &v[i], 0);
- 		if (irq < 0) {
- 			ret = -1;
- 			goto free;
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index bce3035..36e9adc 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -664,17 +664,15 @@ static int find_unbound_pirq(int type)
- 	return -1;
- }
- 
--void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
-+void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_pirq)
- {
- 	spin_lock(&irq_mapping_update_lock);
- 
--	if (alloc & XEN_ALLOC_IRQ) {
--		*irq = xen_allocate_irq_dynamic();
--		if (*irq == -1)
--			goto out;
--	}
-+	*irq = xen_allocate_irq_dynamic();
-+	if (*irq == -1)
-+		goto out;
- 
--	if (alloc & XEN_ALLOC_PIRQ) {
-+	if (alloc_pirq) {
- 		*pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
- 		if (*pirq == -1) {
- 			xen_free_irq(*irq);
-diff --git a/include/xen/events.h b/include/xen/events.h
-index 00f53dd..8d98861 100644
---- a/include/xen/events.h
-+++ b/include/xen/events.h
-@@ -75,10 +75,7 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
- int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
- 
- #ifdef CONFIG_PCI_MSI
--/* Allocate an irq and a pirq to be used with MSIs. */
--#define XEN_ALLOC_PIRQ (1 << 0)
--#define XEN_ALLOC_IRQ  (1 << 1)
--void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_mask);
-+void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_pirq);
- int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
- #endif
- 
--- 
-1.7.4
-
-
-From d71552da237c3f5fd88422ea19b4e64706e4f813 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 18 Feb 2011 16:43:29 +0000
-Subject: [PATCH 145/203] xen: events: return irq from xen_allocate_pirq_msi
-
-consistent with other similar functions.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/pci/xen.c   |   12 ++++++------
- drivers/xen/events.c |   19 +++++++++++--------
- include/xen/events.h |    2 +-
- 3 files changed, 18 insertions(+), 15 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index ca5fa09..6fd695b 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -100,8 +100,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 		pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
- 			((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
- 		if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) {
--			xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
--					"msi-x" : "msi", &irq, &pirq, 0);
-+			irq = xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
-+						    "msi-x" : "msi", &pirq, 0);
- 			if (irq < 0)
- 				goto error;
- 			ret = set_irq_msi(irq, msidesc);
-@@ -111,8 +111,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 					" pirq=%d\n", irq, pirq);
- 			return 0;
- 		}
--		xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
--				"msi-x" : "msi", &irq, &pirq, 1);
-+		irq = xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
-+					    "msi-x" : "msi", &pirq, 1);
- 		if (irq < 0 || pirq < 0)
- 			goto error;
- 		printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
-@@ -157,10 +157,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 		goto error;
- 	i = 0;
- 	list_for_each_entry(msidesc, &dev->msi_list, list) {
--		xen_allocate_pirq_msi(
-+		irq = xen_allocate_pirq_msi(
- 			(type == PCI_CAP_ID_MSIX) ?
- 			"pcifront-msi-x" : "pcifront-msi",
--			&irq, &v[i], 0);
-+			&v[i], 0);
- 		if (irq < 0) {
- 			ret = -1;
- 			goto free;
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 36e9adc..ed3420d 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -664,31 +664,34 @@ static int find_unbound_pirq(int type)
- 	return -1;
- }
- 
--void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_pirq)
-+int xen_allocate_pirq_msi(char *name, int *pirq, int alloc_pirq)
- {
-+	int irq;
-+
- 	spin_lock(&irq_mapping_update_lock);
- 
--	*irq = xen_allocate_irq_dynamic();
--	if (*irq == -1)
-+	irq = xen_allocate_irq_dynamic();
-+	if (irq == -1)
- 		goto out;
- 
- 	if (alloc_pirq) {
- 		*pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
- 		if (*pirq == -1) {
--			xen_free_irq(*irq);
--			*irq = -1;
-+			xen_free_irq(irq);
-+			irq = -1;
- 			goto out;
- 		}
- 	}
- 
--	set_irq_chip_and_handler_name(*irq, &xen_pirq_chip,
-+	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
- 				      handle_level_irq, name);
- 
--	irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0);
--	pirq_to_irq[*pirq] = *irq;
-+	irq_info[irq] = mk_pirq_info(0, *pirq, 0, 0);
-+	pirq_to_irq[*pirq] = irq;
- 
- out:
- 	spin_unlock(&irq_mapping_update_lock);
-+	return irq;
- }
- 
- int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
-diff --git a/include/xen/events.h b/include/xen/events.h
-index 8d98861..f70536a 100644
---- a/include/xen/events.h
-+++ b/include/xen/events.h
-@@ -75,7 +75,7 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
- int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
- 
- #ifdef CONFIG_PCI_MSI
--void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_pirq);
-+int xen_allocate_pirq_msi(char *name, int *pirq, int alloc_pirq);
- int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
- #endif
- 
--- 
-1.7.4
-
-
-From e70c689053946cf87902c06f98941db5ed5ef6b7 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 18 Feb 2011 16:43:30 +0000
-Subject: [PATCH 146/203] xen: pci: collapse apic_register_gsi_xen_hvm and xen_hvm_register_pirq
-
-apic_register_gsi_xen_hvm is a tiny wrapper around
-xen_hvm_register_pirq.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/pci/xen.c |   11 +++--------
- 1 files changed, 3 insertions(+), 8 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 6fd695b..0d5087e 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -20,7 +20,8 @@
- #include <asm/xen/pci.h>
- 
- #ifdef CONFIG_ACPI
--static int xen_hvm_register_pirq(u32 gsi, int triggering)
-+static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
-+				 int trigger, int polarity)
- {
- 	int rc, irq;
- 	struct physdev_map_pirq map_irq;
-@@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
- 		return -1;
- 	}
- 
--	if (triggering == ACPI_EDGE_SENSITIVE) {
-+	if (trigger == ACPI_EDGE_SENSITIVE) {
- 		shareable = 0;
- 		name = "ioapic-edge";
- 	} else {
-@@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
- 
- 	return irq;
- }
--
--static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
--				 int trigger, int polarity)
--{
--	return xen_hvm_register_pirq(gsi, trigger);
--}
- #endif
- 
- #if defined(CONFIG_PCI_MSI)
--- 
-1.7.4
-
-
-From a3b7bc8f4a9c12df1d71d5f9b7eb5c1a85119729 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 18 Feb 2011 16:43:31 +0000
-Subject: [PATCH 147/203] xen: events: assume PHYSDEVOP_get_free_pirq exists
-
-The find_unbound_pirq is called only from xen_allocate_pirq_msi and
-only if alloc_pirq is true. The only caller which does this is
-xen_hvm_setup_msi_irqs. The use of this function is gated, in
-pci_xen_hvm_init, on XENFEAT_hvm_pirqs.
-
-The PHYSDEVOP_get_free_pirq interfaces was added to the hypervisor in
-22410:be96f6058c05 while XENFEAT_hvm_pirqs was added a couple of
-minutes prior in 22409:6663214f06ac. Therefore we do not need to
-concern ourselves with hypervisors which support XENFEAT_hvm_pirqs but
-not PHYSDEVOP_get_free_pirq.
-
-This eliminates the fallback path in find_unbound_pirq which walks to
-pirq_to_irq array looking for a free pirq. Unlike the
-PHYSDEVOP_get_free_pirq interface this fallback only looks up a free
-pirq but does not reserve it. Removing this fallback will simplify
-locking in the future.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |   15 ++++++---------
- 1 files changed, 6 insertions(+), 9 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index ed3420d..c21066f 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -649,19 +649,16 @@ out:
- 
- static int find_unbound_pirq(int type)
- {
--	int rc, i;
-+	int rc;
- 	struct physdev_get_free_pirq op_get_free_pirq;
--	op_get_free_pirq.type = type;
- 
-+	op_get_free_pirq.type = type;
- 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
--	if (!rc)
--		return op_get_free_pirq.pirq;
- 
--	for (i = 0; i < nr_irqs; i++) {
--		if (pirq_to_irq[i] < 0)
--			return i;
--	}
--	return -1;
-+	WARN_ONCE(rc == -ENOSYS,
-+		  "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
-+
-+	return rc ? -1 : op_get_free_pirq.pirq;
- }
- 
- int xen_allocate_pirq_msi(char *name, int *pirq, int alloc_pirq)
--- 
-1.7.4
-
-
-From 3d621208a9221fda2148317b486195e3f7e9d068 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 18 Feb 2011 16:43:32 +0000
-Subject: [PATCH 148/203] xen: events: separate MSI PIRQ allocation from PIRQ binding to IRQ
-
-Split the binding aspect of xen_allocate_pirq_msi out into a new
-xen_bind_pirq_to_irq function.
-
-In xen_hvm_setup_msi_irq when allocating a pirq write the MSI message
-to signal the PIRQ as soon as the pirq is obtained. There is no way to
-free the pirq back so if the subsequent binding to an IRQ fails we
-want to ensure that we will reuse the PIRQ next time rather than leak
-it.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/pci/xen.c   |   68 +++++++++++++++++++------------------------------
- drivers/xen/events.c |   30 ++++++++++-----------
- include/xen/events.h |    4 ++-
- 3 files changed, 43 insertions(+), 59 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 0d5087e..93e4215 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -86,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
- 
- static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- {
--	int irq, pirq, ret = 0;
-+	int irq, pirq;
- 	struct msi_desc *msidesc;
- 	struct msi_msg msg;
- 
-@@ -94,39 +94,32 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 		__read_msi_msg(msidesc, &msg);
- 		pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
- 			((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
--		if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) {
--			irq = xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
--						    "msi-x" : "msi", &pirq, 0);
--			if (irq < 0)
-+		if (msg.data != XEN_PIRQ_MSI_DATA ||
-+		    xen_irq_from_pirq(pirq) < 0) {
-+			pirq = xen_allocate_pirq_msi(dev, msidesc);
-+			if (pirq < 0)
- 				goto error;
--			ret = set_irq_msi(irq, msidesc);
--			if (ret < 0)
--				goto error_while;
--			printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d"
--					" pirq=%d\n", irq, pirq);
--			return 0;
-+			xen_msi_compose_msg(dev, pirq, &msg);
-+			__write_msi_msg(msidesc, &msg);
-+			dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
-+		} else {
-+			dev_dbg(&dev->dev,
-+				"xen: msi already bound to pirq=%d\n", pirq);
- 		}
--		irq = xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
--					    "msi-x" : "msi", &pirq, 1);
--		if (irq < 0 || pirq < 0)
-+		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
-+					       (type == PCI_CAP_ID_MSIX) ?
-+					       "msi-x" : "msi");
-+		if (irq < 0)
- 			goto error;
--		printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
--		xen_msi_compose_msg(dev, pirq, &msg);
--		ret = set_irq_msi(irq, msidesc);
--		if (ret < 0)
--			goto error_while;
--		write_msi_msg(irq, &msg);
-+		dev_dbg(&dev->dev,
-+			"xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
- 	}
- 	return 0;
- 
--error_while:
--	unbind_from_irqhandler(irq, NULL);
- error:
--	if (ret == -ENODEV)
--		dev_err(&dev->dev, "Xen PCI frontend has not registered" \
--				" MSI/MSI-X support!\n");
--
--	return ret;
-+	dev_err(&dev->dev,
-+		"Xen PCI frontend has not registered MSI/MSI-X support!\n");
-+	return -ENODEV;
- }
- 
- /*
-@@ -152,28 +145,19 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 		goto error;
- 	i = 0;
- 	list_for_each_entry(msidesc, &dev->msi_list, list) {
--		irq = xen_allocate_pirq_msi(
--			(type == PCI_CAP_ID_MSIX) ?
--			"pcifront-msi-x" : "pcifront-msi",
--			&v[i], 0);
--		if (irq < 0) {
--			ret = -1;
-+		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i],
-+					       (type == PCI_CAP_ID_MSIX) ?
-+					       "pcifront-msi-x" :
-+					       "pcifront-msi");
-+		if (irq < 0)
- 			goto free;
--		}
--		ret = set_irq_msi(irq, msidesc);
--		if (ret)
--			goto error_while;
- 		i++;
- 	}
- 	kfree(v);
- 	return 0;
- 
--error_while:
--	unbind_from_irqhandler(irq, NULL);
- error:
--	if (ret == -ENODEV)
--		dev_err(&dev->dev, "Xen PCI frontend has not registered" \
--			" MSI/MSI-X support!\n");
-+	dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
- free:
- 	kfree(v);
- 	return ret;
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index c21066f..1033f62 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -647,12 +647,12 @@ out:
- #include <linux/msi.h>
- #include "../pci/msi.h"
- 
--static int find_unbound_pirq(int type)
-+int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
- {
- 	int rc;
- 	struct physdev_get_free_pirq op_get_free_pirq;
- 
--	op_get_free_pirq.type = type;
-+	op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
- 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
- 
- 	WARN_ONCE(rc == -ENOSYS,
-@@ -661,9 +661,10 @@ static int find_unbound_pirq(int type)
- 	return rc ? -1 : op_get_free_pirq.pirq;
- }
- 
--int xen_allocate_pirq_msi(char *name, int *pirq, int alloc_pirq)
-+int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-+			     int pirq, const char *name)
- {
--	int irq;
-+	int irq, ret;
- 
- 	spin_lock(&irq_mapping_update_lock);
- 
-@@ -671,24 +672,21 @@ int xen_allocate_pirq_msi(char *name, int *pirq, int alloc_pirq)
- 	if (irq == -1)
- 		goto out;
- 
--	if (alloc_pirq) {
--		*pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
--		if (*pirq == -1) {
--			xen_free_irq(irq);
--			irq = -1;
--			goto out;
--		}
--	}
--
- 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
- 				      handle_level_irq, name);
- 
--	irq_info[irq] = mk_pirq_info(0, *pirq, 0, 0);
--	pirq_to_irq[*pirq] = irq;
--
-+	irq_info[irq] = mk_pirq_info(0, pirq, 0, 0);
-+	pirq_to_irq[pirq] = irq;
-+	ret = set_irq_msi(irq, msidesc);
-+	if (ret < 0)
-+		goto error_irq;
- out:
- 	spin_unlock(&irq_mapping_update_lock);
- 	return irq;
-+error_irq:
-+	spin_unlock(&irq_mapping_update_lock);
-+	xen_free_irq(irq);
-+	return -1;
- }
- 
- int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
-diff --git a/include/xen/events.h b/include/xen/events.h
-index f70536a..18bf825 100644
---- a/include/xen/events.h
-+++ b/include/xen/events.h
-@@ -75,7 +75,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
- int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
- 
- #ifdef CONFIG_PCI_MSI
--int xen_allocate_pirq_msi(char *name, int *pirq, int alloc_pirq);
-+int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
-+int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-+			     int pirq, const char *name);
- int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
- #endif
- 
--- 
-1.7.4
-
-
-From 83ca2344daef90649dac5ff1f22392c2e048d2fb Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 18 Feb 2011 16:43:33 +0000
-Subject: [PATCH 149/203] xen: events: refactor xen_create_msi_irq slightly
-
-Calling PHYSDEVOP_map_pirq earlier simplifies error handling and
-starts to make the tail end of this function look like
-xen_bind_pirq_msi_to_irq.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |   15 ++++++---------
- 1 files changed, 6 insertions(+), 9 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 1033f62..b54285e 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -716,6 +716,12 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
- 		map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
- 	}
- 
-+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
-+	if (rc) {
-+		dev_warn(&dev->dev, "xen map irq failed %d\n", rc);
-+		goto out;
-+	}
-+
- 	spin_lock(&irq_mapping_update_lock);
- 
- 	irq = xen_allocate_irq_dynamic();
-@@ -723,15 +729,6 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
- 	if (irq == -1)
- 		goto out;
- 
--	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
--	if (rc) {
--		printk(KERN_WARNING "xen map irq failed %d\n", rc);
--
--		xen_free_irq(irq);
--
--		irq = -1;
--		goto out;
--	}
- 	irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
- 
- 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
--- 
-1.7.4
-
-
-From 3c8728dbc7942997ea91bc9dbf6416a3070c6721 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 18 Feb 2011 16:43:34 +0000
-Subject: [PATCH 150/203] xen: events: update pirq_to_irq in xen_create_msi_irq
-
-I don't think this was a deliberate ommision.
-
-Makes the tail end of this function look even more like
-xen_bind_pirq_msi_to_irq.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |    1 +
- 1 files changed, 1 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index b54285e..721b393 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -730,6 +730,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
- 		goto out;
- 
- 	irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
-+	pirq_to_irq[map_irq.pirq] = irq;
- 
- 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
- 			handle_level_irq,
--- 
-1.7.4
-
-
-From a3805cf7bd00c259d46b43aa79b12002db3a2921 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 18 Feb 2011 16:43:35 +0000
-Subject: [PATCH 151/203] xen: events: push set_irq_msi down into xen_create_msi_irq
-
-Makes the tail end of this function look even more like
-xen_bind_pirq_msi_to_irq.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/pci/xen.c   |   10 +---------
- drivers/xen/events.c |   10 +++++++++-
- 2 files changed, 10 insertions(+), 10 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 93e4215..15fd981 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -185,23 +185,15 @@ static void xen_teardown_msi_irq(unsigned int irq)
- #ifdef CONFIG_XEN_DOM0
- static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- {
--	int irq, ret;
-+	int irq;
- 	struct msi_desc *msidesc;
- 
- 	list_for_each_entry(msidesc, &dev->msi_list, list) {
- 		irq = xen_create_msi_irq(dev, msidesc, type);
- 		if (irq < 0)
- 			return -1;
--
--		ret = set_irq_msi(irq, msidesc);
--		if (ret)
--			goto error;
- 	}
- 	return 0;
--
--error:
--	xen_destroy_irq(irq);
--	return ret;
- }
- #endif
- #endif
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 721b393..77ede77 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -691,7 +691,7 @@ error_irq:
- 
- int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
- {
--	int irq = -1;
-+	int ret, irq = -1;
- 	struct physdev_map_pirq map_irq;
- 	int rc;
- 	int pos;
-@@ -736,9 +736,17 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
- 			handle_level_irq,
- 			(type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
- 
-+	ret = set_irq_msi(irq, msidesc);
-+	if (ret)
-+		goto out_irq;
-+
- out:
- 	spin_unlock(&irq_mapping_update_lock);
- 	return irq;
-+out_irq:
-+	spin_unlock(&irq_mapping_update_lock);
-+	xen_free_irq(irq);
-+	return -1;
- }
- #endif
- 
--- 
-1.7.4
-
-
-From 84798bd20f98d01fbd4b479cc35dc878d6eb6165 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Fri, 18 Feb 2011 16:43:36 +0000
-Subject: [PATCH 152/203] xen: events: use xen_bind_pirq_msi_to_irq from xen_create_msi_irq
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/pci/xen.c   |    4 ++--
- drivers/xen/events.c |   36 +++++++-----------------------------
- include/xen/events.h |    2 +-
- 3 files changed, 10 insertions(+), 32 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 15fd981..ffd8c7a 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -106,7 +106,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 			dev_dbg(&dev->dev,
- 				"xen: msi already bound to pirq=%d\n", pirq);
- 		}
--		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
-+		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
- 					       (type == PCI_CAP_ID_MSIX) ?
- 					       "msi-x" : "msi");
- 		if (irq < 0)
-@@ -145,7 +145,7 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 		goto error;
- 	i = 0;
- 	list_for_each_entry(msidesc, &dev->msi_list, list) {
--		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i],
-+		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
- 					       (type == PCI_CAP_ID_MSIX) ?
- 					       "pcifront-msi-x" :
- 					       "pcifront-msi");
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 77ede77..3446948 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -662,7 +662,7 @@ int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
- }
- 
- int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
--			     int pirq, const char *name)
-+			     int pirq, int vector, const char *name)
- {
- 	int irq, ret;
- 
-@@ -675,7 +675,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
- 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
- 				      handle_level_irq, name);
- 
--	irq_info[irq] = mk_pirq_info(0, pirq, 0, 0);
-+	irq_info[irq] = mk_pirq_info(0, pirq, 0, vector);
- 	pirq_to_irq[pirq] = irq;
- 	ret = set_irq_msi(irq, msidesc);
- 	if (ret < 0)
-@@ -691,7 +691,6 @@ error_irq:
- 
- int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
- {
--	int ret, irq = -1;
- 	struct physdev_map_pirq map_irq;
- 	int rc;
- 	int pos;
-@@ -719,34 +718,13 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
- 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
- 	if (rc) {
- 		dev_warn(&dev->dev, "xen map irq failed %d\n", rc);
--		goto out;
-+		return -1;
- 	}
- 
--	spin_lock(&irq_mapping_update_lock);
--
--	irq = xen_allocate_irq_dynamic();
--
--	if (irq == -1)
--		goto out;
--
--	irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
--	pirq_to_irq[map_irq.pirq] = irq;
--
--	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
--			handle_level_irq,
--			(type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
--
--	ret = set_irq_msi(irq, msidesc);
--	if (ret)
--		goto out_irq;
--
--out:
--	spin_unlock(&irq_mapping_update_lock);
--	return irq;
--out_irq:
--	spin_unlock(&irq_mapping_update_lock);
--	xen_free_irq(irq);
--	return -1;
-+	return xen_bind_pirq_msi_to_irq(dev, msidesc,
-+					map_irq.pirq, map_irq.index,
-+					(type == PCI_CAP_ID_MSIX) ?
-+					"msi-x" : "msi");
- }
- #endif
- 
-diff --git a/include/xen/events.h b/include/xen/events.h
-index 18bf825..45c08a0 100644
---- a/include/xen/events.h
-+++ b/include/xen/events.h
-@@ -77,7 +77,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
- #ifdef CONFIG_PCI_MSI
- int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
- int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
--			     int pirq, const char *name);
-+			     int pirq, int vector, const char *name);
- int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
- #endif
- 
--- 
-1.7.4
-
-
-From c5ae07bb307b658c8458f29ca77d237aec0f9327 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <Ian.Campbell at citrix.com>
-Date: Fri, 18 Feb 2011 17:06:55 +0000
-Subject: [PATCH 153/203] xen: events: remove dom0 specific xen_create_msi_irq
-
-The function name does not distinguish it from xen_allocate_pirq_msi
-(which operates on domU and pvhvm domains rather than dom0).
-
-Hoist domain 0 specific functionality up into the only caller leaving
-functionality common to all guest types in xen_bind_pirq_msi_to_irq.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/pci/xen.c   |   45 ++++++++++++++++++++++++++++++++++++++++-----
- drivers/xen/events.c |   41 -----------------------------------------
- include/xen/events.h |    1 -
- 3 files changed, 40 insertions(+), 47 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index ffd8c7a..8c4085a 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -185,15 +185,50 @@ static void xen_teardown_msi_irq(unsigned int irq)
- #ifdef CONFIG_XEN_DOM0
- static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- {
--	int irq;
-+	int ret = 0;
- 	struct msi_desc *msidesc;
- 
- 	list_for_each_entry(msidesc, &dev->msi_list, list) {
--		irq = xen_create_msi_irq(dev, msidesc, type);
--		if (irq < 0)
--			return -1;
-+		struct physdev_map_pirq map_irq;
-+
-+		memset(&map_irq, 0, sizeof(map_irq));
-+		map_irq.domid = DOMID_SELF;
-+		map_irq.type = MAP_PIRQ_TYPE_MSI;
-+		map_irq.index = -1;
-+		map_irq.pirq = -1;
-+		map_irq.bus = dev->bus->number;
-+		map_irq.devfn = dev->devfn;
-+
-+		if (type == PCI_CAP_ID_MSIX) {
-+			int pos;
-+			u32 table_offset, bir;
-+
-+			pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
-+
-+			pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
-+					      &table_offset);
-+			bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
-+
-+			map_irq.table_base = pci_resource_start(dev, bir);
-+			map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
-+		}
-+
-+		ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
-+		if (ret) {
-+			dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
-+			goto out;
-+		}
-+
-+		ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
-+					       map_irq.pirq, map_irq.index,
-+					       (type == PCI_CAP_ID_MSIX) ?
-+					       "msi-x" : "msi");
-+		if (ret < 0)
-+			goto out;
- 	}
--	return 0;
-+	ret = 0;
-+out:
-+	return ret;
- }
- #endif
- #endif
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 3446948..6befe62 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -644,9 +644,6 @@ out:
- }
- 
- #ifdef CONFIG_PCI_MSI
--#include <linux/msi.h>
--#include "../pci/msi.h"
--
- int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
- {
- 	int rc;
-@@ -688,44 +685,6 @@ error_irq:
- 	xen_free_irq(irq);
- 	return -1;
- }
--
--int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
--{
--	struct physdev_map_pirq map_irq;
--	int rc;
--	int pos;
--	u32 table_offset, bir;
--
--	memset(&map_irq, 0, sizeof(map_irq));
--	map_irq.domid = DOMID_SELF;
--	map_irq.type = MAP_PIRQ_TYPE_MSI;
--	map_irq.index = -1;
--	map_irq.pirq = -1;
--	map_irq.bus = dev->bus->number;
--	map_irq.devfn = dev->devfn;
--
--	if (type == PCI_CAP_ID_MSIX) {
--		pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
--
--		pci_read_config_dword(dev, msix_table_offset_reg(pos),
--					&table_offset);
--		bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
--
--		map_irq.table_base = pci_resource_start(dev, bir);
--		map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
--	}
--
--	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
--	if (rc) {
--		dev_warn(&dev->dev, "xen map irq failed %d\n", rc);
--		return -1;
--	}
--
--	return xen_bind_pirq_msi_to_irq(dev, msidesc,
--					map_irq.pirq, map_irq.index,
--					(type == PCI_CAP_ID_MSIX) ?
--					"msi-x" : "msi");
--}
- #endif
- 
- int xen_destroy_irq(int irq)
-diff --git a/include/xen/events.h b/include/xen/events.h
-index 45c08a0..962da2c 100644
---- a/include/xen/events.h
-+++ b/include/xen/events.h
-@@ -78,7 +78,6 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
- int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
- int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
- 			     int pirq, int vector, const char *name);
--int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
- #endif
- 
- /* De-allocates the above mentioned physical interrupt. */
--- 
-1.7.4
-
-
-From 77c35acb7c81cd94c6b30a3bef488dd2d8145131 Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Wed, 23 Feb 2011 08:11:35 -0500
-Subject: [PATCH 154/203] xen-gntdev: Fix incorrect use of zero handle
-
-The handle with numeric value 0 is a valid map handle, so it cannot
-be used to indicate that a page has not been mapped. Use -1 instead.
-
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntdev.c |   23 ++++++++++++++++-------
- 1 files changed, 16 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index 2a4733c..cdc28dc 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -126,6 +126,8 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
- 		add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
- 		if (add->pages[i] == NULL)
- 			goto err;
-+		add->map_ops[i].handle = -1;
-+		add->unmap_ops[i].handle = -1;
- 	}
- 
- 	add->index = 0;
-@@ -248,7 +250,7 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
- 			  map->grants[pgnr].ref,
- 			  map->grants[pgnr].domid);
- 	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
--			    0 /* handle */);
-+			    -1 /* handle */);
- 	return 0;
- }
- 
-@@ -259,7 +261,7 @@ static int map_grant_pages(struct grant_map *map)
- 
- 	if (!use_ptemod) {
- 		/* Note: it could already be mapped */
--		if (map->map_ops[0].handle)
-+		if (map->map_ops[0].handle != -1)
- 			return 0;
- 		for (i = 0; i < map->count; i++) {
- 			addr = (phys_addr_t)
-@@ -268,7 +270,7 @@ static int map_grant_pages(struct grant_map *map)
- 				map->grants[i].ref,
- 				map->grants[i].domid);
- 			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
--				map->flags, 0 /* handle */);
-+				map->flags, -1 /* handle */);
- 		}
- 	}
- 
-@@ -280,7 +282,11 @@ static int map_grant_pages(struct grant_map *map)
- 	for (i = 0; i < map->count; i++) {
- 		if (map->map_ops[i].status)
- 			err = -EINVAL;
--		map->unmap_ops[i].handle = map->map_ops[i].handle;
-+		else {
-+			BUG_ON(map->map_ops[i].handle == -1);
-+			map->unmap_ops[i].handle = map->map_ops[i].handle;
-+			pr_debug("map handle=%d\n", map->map_ops[i].handle);
-+		}
- 	}
- 	return err;
- }
-@@ -313,7 +319,10 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
- 	for (i = 0; i < pages; i++) {
- 		if (map->unmap_ops[offset+i].status)
- 			err = -EINVAL;
--		map->unmap_ops[offset+i].handle = 0;
-+		pr_debug("unmap handle=%d st=%d\n",
-+			map->unmap_ops[offset+i].handle,
-+			map->unmap_ops[offset+i].status);
-+		map->unmap_ops[offset+i].handle = -1;
- 	}
- 	return err;
- }
-@@ -328,13 +337,13 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
- 	 * already unmapped some of the grants. Only unmap valid ranges.
- 	 */
- 	while (pages && !err) {
--		while (pages && !map->unmap_ops[offset].handle) {
-+		while (pages && map->unmap_ops[offset].handle == -1) {
- 			offset++;
- 			pages--;
- 		}
- 		range = 0;
- 		while (range < pages) {
--			if (!map->unmap_ops[offset+range].handle) {
-+			if (map->unmap_ops[offset+range].handle == -1) {
- 				range--;
- 				break;
- 			}
--- 
-1.7.4
-
-
-From f4ee4af447b67135de7eb8a6615811c13ce938e2 Mon Sep 17 00:00:00 2001
-From: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Date: Wed, 23 Feb 2011 08:11:36 -0500
-Subject: [PATCH 155/203] xen-gntdev: Add cast to pointer
-
-Signed-off-by: Daniel De Graaf <dgdegra at tycho.nsa.gov>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/gntdev.c |    4 ++--
- 1 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-index cdc28dc..d43ff30 100644
---- a/drivers/xen/gntdev.c
-+++ b/drivers/xen/gntdev.c
-@@ -298,8 +298,8 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
- 	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
- 		int pgno = (map->notify.addr >> PAGE_SHIFT);
- 		if (pgno >= offset && pgno < offset + pages && use_ptemod) {
--			void __user *tmp;
--			tmp = map->vma->vm_start + map->notify.addr;
-+			void __user *tmp = (void __user *)
-+				map->vma->vm_start + map->notify.addr;
- 			err = copy_to_user(tmp, &err, 1);
- 			if (err)
- 				return err;
--- 
-1.7.4
-
-
-From 2bd3f2608788e776ff0524ce77cca995094191a1 Mon Sep 17 00:00:00 2001
-From: Alex Deucher <alexdeucher at gmail.com>
-Date: Thu, 24 Feb 2011 03:16:21 -0500
-Subject: [PATCH 156/203] drm/radeon/kms: clean up gart dummy page handling
-
-As per Konrad's original patch, the dummy page used
-by the gart code and allocated in radeon_gart_init()
-was not freed properly in radeon_gart_fini().
-
-At the same time r6xx and newer allocated and freed the
-dummy page on their own.  So to do Konrad's patch one
-better, just remove the allocation and freeing of the
-dummy page in the r6xx, 7xx, evergreen, and ni code and
-allocate and free in the gart_init/fini() functions for
-all asics.
-
-Cc: Jerome Glisse <jglisse at redhat.com>
-Signed-off-by: Alex Deucher <alexdeucher at gmail.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/gpu/drm/radeon/evergreen.c   |    4 ----
- drivers/gpu/drm/radeon/r600.c        |    4 ----
- drivers/gpu/drm/radeon/radeon_gart.c |    2 ++
- drivers/gpu/drm/radeon/rv770.c       |    4 ----
- 4 files changed, 2 insertions(+), 12 deletions(-)
-
-diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
-index d270b3f..f643133 100644
---- a/drivers/gpu/drm/radeon/evergreen.c
-+++ b/drivers/gpu/drm/radeon/evergreen.c
-@@ -3048,9 +3048,6 @@ int evergreen_init(struct radeon_device *rdev)
- {
- 	int r;
- 
--	r = radeon_dummy_page_init(rdev);
--	if (r)
--		return r;
- 	/* This don't do much */
- 	r = radeon_gem_init(rdev);
- 	if (r)
-@@ -3162,7 +3159,6 @@ void evergreen_fini(struct radeon_device *rdev)
- 	radeon_atombios_fini(rdev);
- 	kfree(rdev->bios);
- 	rdev->bios = NULL;
--	radeon_dummy_page_fini(rdev);
- }
- 
- static void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
-diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
-index de88624..36efc45 100644
---- a/drivers/gpu/drm/radeon/r600.c
-+++ b/drivers/gpu/drm/radeon/r600.c
-@@ -2509,9 +2509,6 @@ int r600_init(struct radeon_device *rdev)
- {
- 	int r;
- 
--	r = radeon_dummy_page_init(rdev);
--	if (r)
--		return r;
- 	if (r600_debugfs_mc_info_init(rdev)) {
- 		DRM_ERROR("Failed to register debugfs file for mc !\n");
- 	}
-@@ -2625,7 +2622,6 @@ void r600_fini(struct radeon_device *rdev)
- 	radeon_atombios_fini(rdev);
- 	kfree(rdev->bios);
- 	rdev->bios = NULL;
--	radeon_dummy_page_fini(rdev);
- }
- 
- 
-diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
-index 6501611..dc04c7b 100644
---- a/drivers/gpu/drm/radeon/radeon_gart.c
-+++ b/drivers/gpu/drm/radeon/radeon_gart.c
-@@ -269,4 +269,6 @@ void radeon_gart_fini(struct radeon_device *rdev)
- 	kfree(rdev->gart.pages_addr);
- 	rdev->gart.pages = NULL;
- 	rdev->gart.pages_addr = NULL;
-+
-+	radeon_dummy_page_fini(rdev);
- }
-diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
-index d8ba676..6a312e6 100644
---- a/drivers/gpu/drm/radeon/rv770.c
-+++ b/drivers/gpu/drm/radeon/rv770.c
-@@ -1256,9 +1256,6 @@ int rv770_init(struct radeon_device *rdev)
- {
- 	int r;
- 
--	r = radeon_dummy_page_init(rdev);
--	if (r)
--		return r;
- 	/* This don't do much */
- 	r = radeon_gem_init(rdev);
- 	if (r)
-@@ -1373,7 +1370,6 @@ void rv770_fini(struct radeon_device *rdev)
- 	radeon_atombios_fini(rdev);
- 	kfree(rdev->bios);
- 	rdev->bios = NULL;
--	radeon_dummy_page_fini(rdev);
- }
- 
- static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
--- 
-1.7.4
-
-
-From 36ff9d246ac231fe6bce55121ddd6f799c9b0c3b Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Mon, 29 Nov 2010 13:52:18 -0500
-Subject: [PATCH 157/203] ttm: Introduce a placeholder for DMA (bus) addresses.
-
-This is right now limited to only non-pool constructs.
-
-[v2: Fixed indentation issues, add review-by tag]
-
-Reviewed-by: Thomas Hellstrom <thomas at shipmail.org>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Tested-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/gpu/drm/ttm/ttm_page_alloc.c |    8 +++++---
- drivers/gpu/drm/ttm/ttm_tt.c         |   10 ++++++++--
- include/drm/ttm/ttm_bo_driver.h      |    2 ++
- include/drm/ttm/ttm_page_alloc.h     |    8 ++++++--
- 4 files changed, 21 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
-index b1e02ff..9d9d929 100644
---- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
-+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
-@@ -38,6 +38,7 @@
- #include <linux/mm.h>
- #include <linux/seq_file.h> /* for seq_printf */
- #include <linux/slab.h>
-+#include <linux/dma-mapping.h>
- 
- #include <asm/atomic.h>
- 
-@@ -662,7 +663,8 @@ out:
-  * cached pages.
-  */
- int ttm_get_pages(struct list_head *pages, int flags,
--		enum ttm_caching_state cstate, unsigned count)
-+		  enum ttm_caching_state cstate, unsigned count,
-+		  dma_addr_t *dma_address)
- {
- 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
- 	struct page *p = NULL;
-@@ -720,7 +722,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
- 			printk(KERN_ERR TTM_PFX
- 			       "Failed to allocate extra pages "
- 			       "for large request.");
--			ttm_put_pages(pages, 0, flags, cstate);
-+			ttm_put_pages(pages, 0, flags, cstate, NULL);
- 			return r;
- 		}
- 	}
-@@ -731,7 +733,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
- 
- /* Put all pages in pages list to correct pool to wait for reuse */
- void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
--		enum ttm_caching_state cstate)
-+		   enum ttm_caching_state cstate, dma_addr_t *dma_address)
- {
- 	unsigned long irq_flags;
- 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
-diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
-index af789dc..0d39001 100644
---- a/drivers/gpu/drm/ttm/ttm_tt.c
-+++ b/drivers/gpu/drm/ttm/ttm_tt.c
-@@ -49,12 +49,16 @@ static int ttm_tt_swapin(struct ttm_tt *ttm);
- static void ttm_tt_alloc_page_directory(struct ttm_tt *ttm)
- {
- 	ttm->pages = drm_calloc_large(ttm->num_pages, sizeof(*ttm->pages));
-+	ttm->dma_address = drm_calloc_large(ttm->num_pages,
-+					    sizeof(*ttm->dma_address));
- }
- 
- static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
- {
- 	drm_free_large(ttm->pages);
- 	ttm->pages = NULL;
-+	drm_free_large(ttm->dma_address);
-+	ttm->dma_address = NULL;
- }
- 
- static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
-@@ -105,7 +109,8 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
- 
- 		INIT_LIST_HEAD(&h);
- 
--		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
-+		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1,
-+				    &ttm->dma_address[index]);
- 
- 		if (ret != 0)
- 			return NULL;
-@@ -298,7 +303,8 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
- 			count++;
- 		}
- 	}
--	ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state);
-+	ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
-+		      ttm->dma_address);
- 	ttm->state = tt_unpopulated;
- 	ttm->first_himem_page = ttm->num_pages;
- 	ttm->last_lomem_page = -1;
-diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
-index 1da8af6..1cff8b8 100644
---- a/include/drm/ttm/ttm_bo_driver.h
-+++ b/include/drm/ttm/ttm_bo_driver.h
-@@ -149,6 +149,7 @@ enum ttm_caching_state {
-  * @swap_storage: Pointer to shmem struct file for swap storage.
-  * @caching_state: The current caching state of the pages.
-  * @state: The current binding state of the pages.
-+ * @dma_address: The DMA (bus) addresses of the pages (if TTM_PAGE_FLAG_DMA32)
-  *
-  * This is a structure holding the pages, caching- and aperture binding
-  * status for a buffer object that isn't backed by fixed (VRAM / AGP)
-@@ -173,6 +174,7 @@ struct ttm_tt {
- 		tt_unbound,
- 		tt_unpopulated,
- 	} state;
-+	dma_addr_t *dma_address;
- };
- 
- #define TTM_MEMTYPE_FLAG_FIXED         (1 << 0)	/* Fixed (on-card) PCI memory */
-diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
-index 1168214..8062890 100644
---- a/include/drm/ttm/ttm_page_alloc.h
-+++ b/include/drm/ttm/ttm_page_alloc.h
-@@ -36,11 +36,13 @@
-  * @flags: ttm flags for page allocation.
-  * @cstate: ttm caching state for the page.
-  * @count: number of pages to allocate.
-+ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
-  */
- int ttm_get_pages(struct list_head *pages,
- 		  int flags,
- 		  enum ttm_caching_state cstate,
--		  unsigned count);
-+		  unsigned count,
-+		  dma_addr_t *dma_address);
- /**
-  * Put linked list of pages to pool.
-  *
-@@ -49,11 +51,13 @@ int ttm_get_pages(struct list_head *pages,
-  * count.
-  * @flags: ttm flags for page allocation.
-  * @cstate: ttm caching state.
-+ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
-  */
- void ttm_put_pages(struct list_head *pages,
- 		   unsigned page_count,
- 		   int flags,
--		   enum ttm_caching_state cstate);
-+		   enum ttm_caching_state cstate,
-+		   dma_addr_t *dma_address);
- /**
-  * Initialize pool allocator.
-  */
--- 
-1.7.4
-
-
-From 0438748313f46e887aca12ef0cfc4178f2dfe920 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Mon, 29 Nov 2010 14:03:30 -0500
-Subject: [PATCH 158/203] ttm: Utilize the DMA API for pages that have TTM_PAGE_FLAG_DMA32 set.
-
-For pages that have the TTM_PAGE_FLAG_DMA32 flag set we
-use the DMA API. We save the bus address in our array which we
-use to program the GART (see "radeon/ttm/PCIe: Use dma_addr if TTM
-has set it." and "nouveau/ttm/PCIe: Use dma_addr if TTM has set it.").
-
-The reason behind using the DMA API is that under Xen we would
-end up programming the GART with the bounce buffer (SWIOTLB)
-DMA address instead of the physical DMA address of the TTM page.
-The reason being that alloc_page with GFP_DMA32 does not allocate
-pages under the the 4GB mark when running under Xen hypervisor.
-
-Under baremetal this means we do the DMA API call earlier instead
-of when we program the GART.
-
-For details please refer to:
-https://lkml.org/lkml/2011/1/7/251
-
-[v2: Fixed indentation, revised desc, added Reviewed-by]
-Reviewed-by: Thomas Hellstrom <thomas at shipmail.org>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Tested-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/gpu/drm/ttm/ttm_page_alloc.c |   26 +++++++++++++++++++++++---
- 1 files changed, 23 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
-index 9d9d929..737a2a2 100644
---- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
-+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
-@@ -683,14 +683,22 @@ int ttm_get_pages(struct list_head *pages, int flags,
- 			gfp_flags |= GFP_HIGHUSER;
- 
- 		for (r = 0; r < count; ++r) {
--			p = alloc_page(gfp_flags);
-+			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
-+				void *addr;
-+				addr = dma_alloc_coherent(NULL, PAGE_SIZE,
-+							  &dma_address[r],
-+							  gfp_flags);
-+				if (addr == NULL)
-+					return -ENOMEM;
-+				p = virt_to_page(addr);
-+			} else
-+				p = alloc_page(gfp_flags);
- 			if (!p) {
- 
- 				printk(KERN_ERR TTM_PFX
- 				       "Unable to allocate page.");
- 				return -ENOMEM;
- 			}
--
- 			list_add(&p->lru, pages);
- 		}
- 		return 0;
-@@ -738,12 +746,24 @@ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
- 	unsigned long irq_flags;
- 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
- 	struct page *p, *tmp;
-+	unsigned r;
- 
- 	if (pool == NULL) {
- 		/* No pool for this memory type so free the pages */
- 
-+		r = page_count-1;
- 		list_for_each_entry_safe(p, tmp, pages, lru) {
--			__free_page(p);
-+			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
-+				void *addr = page_address(p);
-+				WARN_ON(!addr || !dma_address[r]);
-+				if (addr)
-+					dma_free_coherent(NULL, PAGE_SIZE,
-+							  addr,
-+							  dma_address[r]);
-+				dma_address[r] = 0;
-+			} else
-+				__free_page(p);
-+			r--;
- 		}
- 		/* Make the pages list empty */
- 		INIT_LIST_HEAD(pages);
--- 
-1.7.4
-
-
-From 6b1f175951de66ac07e765a07d4f22004c0d7ce2 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 2 Dec 2010 10:24:13 -0500
-Subject: [PATCH 159/203] ttm: Expand (*populate) to support an array of DMA addresses.
-
-We pass in the array of ttm pages to be populated in the GART/MM
-of the card (or AGP). Patch titled: "ttm: Utilize the DMA API for
-pages that have TTM_PAGE_FLAG_DMA32 set." uses the DMA API to make
-those pages have a proper DMA addresses (in the situation where
-page_to_phys or virt_to_phys do not give use the DMA (bus) address).
-
-Since we are using the DMA API on those pages, we should pass in the
-DMA address to this function so it can save it in its proper fields
-(later patches use it).
-
-[v2: Added reviewed-by tag]
-
-Reviewed-by: Thomas Hellstrom <thellstrom at shipmail.org>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Tested-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/gpu/drm/nouveau/nouveau_sgdma.c |    3 ++-
- drivers/gpu/drm/radeon/radeon_ttm.c     |    3 ++-
- drivers/gpu/drm/ttm/ttm_agp_backend.c   |    3 ++-
- drivers/gpu/drm/ttm/ttm_tt.c            |    2 +-
- drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c  |    3 ++-
- include/drm/ttm/ttm_bo_driver.h         |    4 +++-
- 6 files changed, 12 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
-index 9a250eb..c3c0674 100644
---- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
-+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
-@@ -20,7 +20,8 @@ struct nouveau_sgdma_be {
- 
- static int
- nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
--		       struct page **pages, struct page *dummy_read_page)
-+		       struct page **pages, struct page *dummy_read_page,
-+		       dma_addr_t *dma_addrs)
- {
- 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
- 	struct drm_device *dev = nvbe->dev;
-diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
-index e5b2cf1..81f422d 100644
---- a/drivers/gpu/drm/radeon/radeon_ttm.c
-+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
-@@ -655,7 +655,8 @@ struct radeon_ttm_backend {
- static int radeon_ttm_backend_populate(struct ttm_backend *backend,
- 				       unsigned long num_pages,
- 				       struct page **pages,
--				       struct page *dummy_read_page)
-+				       struct page *dummy_read_page,
-+				       dma_addr_t *dma_addrs)
- {
- 	struct radeon_ttm_backend *gtt;
- 
-diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
-index f999e36..1c4a72f 100644
---- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
-+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
-@@ -47,7 +47,8 @@ struct ttm_agp_backend {
- 
- static int ttm_agp_populate(struct ttm_backend *backend,
- 			    unsigned long num_pages, struct page **pages,
--			    struct page *dummy_read_page)
-+			    struct page *dummy_read_page,
-+			    dma_addr_t *dma_addrs)
- {
- 	struct ttm_agp_backend *agp_be =
- 	    container_of(backend, struct ttm_agp_backend, backend);
-diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
-index 0d39001..86d5b17 100644
---- a/drivers/gpu/drm/ttm/ttm_tt.c
-+++ b/drivers/gpu/drm/ttm/ttm_tt.c
-@@ -169,7 +169,7 @@ int ttm_tt_populate(struct ttm_tt *ttm)
- 	}
- 
- 	be->func->populate(be, ttm->num_pages, ttm->pages,
--			   ttm->dummy_read_page);
-+			   ttm->dummy_read_page, ttm->dma_address);
- 	ttm->state = tt_unbound;
- 	return 0;
- }
-diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
-index 80bc37b..87e43e0 100644
---- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
-+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
-@@ -102,7 +102,8 @@ struct vmw_ttm_backend {
- 
- static int vmw_ttm_populate(struct ttm_backend *backend,
- 			    unsigned long num_pages, struct page **pages,
--			    struct page *dummy_read_page)
-+			    struct page *dummy_read_page,
-+			    dma_addr_t *dma_addrs)
- {
- 	struct vmw_ttm_backend *vmw_be =
- 	    container_of(backend, struct vmw_ttm_backend, backend);
-diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
-index 1cff8b8..efed082 100644
---- a/include/drm/ttm/ttm_bo_driver.h
-+++ b/include/drm/ttm/ttm_bo_driver.h
-@@ -50,13 +50,15 @@ struct ttm_backend_func {
- 	 * @pages: Array of pointers to ttm pages.
- 	 * @dummy_read_page: Page to be used instead of NULL pages in the
- 	 * array @pages.
-+	 * @dma_addrs: Array of DMA (bus) address of the ttm pages.
- 	 *
- 	 * Populate the backend with ttm pages. Depending on the backend,
- 	 * it may or may not copy the @pages array.
- 	 */
- 	int (*populate) (struct ttm_backend *backend,
- 			 unsigned long num_pages, struct page **pages,
--			 struct page *dummy_read_page);
-+			 struct page *dummy_read_page,
-+			 dma_addr_t *dma_addrs);
- 	/**
- 	 * struct ttm_backend_func member clear
- 	 *
--- 
-1.7.4
-
-
-From eadf243146fbcd88ead082be5341996e52c13b73 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 24 Feb 2011 11:23:49 -0500
-Subject: [PATCH 160/203] radeon/ttm/PCIe: Use dma_addr if TTM has set it.
-
-If the TTM layer has used the DMA API to setup pages that are
-TTM_PAGE_FLAG_DMA32 (look at patch titled: "ttm: Utilize the dma_addr_t
-array for pages that are to in DMA32 pool."), lets use it
-when programming the GART in the PCIe type cards.
-
-This patch skips doing the pci_map_page (and pci_unmap_page) if
-there is a DMA addresses passed in for that page. If the dma_address
-is zero (or DMA_ERROR_CODE), then we continue on with our old
-behaviour.
-
-[v2: Fixed an indentation problem, added reviewed-by tag]
-[v3: Added Acked-by Jerome]
-
-Acked-by: Jerome Glisse <j.glisse at gmail.com>
-Reviewed-by: Thomas Hellstrom <thomas at shipmail.org>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Tested-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/gpu/drm/radeon/radeon.h      |    4 ++-
- drivers/gpu/drm/radeon/radeon_gart.c |   37 +++++++++++++++++++++++----------
- drivers/gpu/drm/radeon/radeon_ttm.c  |    5 +++-
- 3 files changed, 33 insertions(+), 13 deletions(-)
-
-diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
-index 56c48b6..c5955d3 100644
---- a/drivers/gpu/drm/radeon/radeon.h
-+++ b/drivers/gpu/drm/radeon/radeon.h
-@@ -319,6 +319,7 @@ struct radeon_gart {
- 	union radeon_gart_table		table;
- 	struct page			**pages;
- 	dma_addr_t			*pages_addr;
-+	bool				*ttm_alloced;
- 	bool				ready;
- };
- 
-@@ -331,7 +332,8 @@ void radeon_gart_fini(struct radeon_device *rdev);
- void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
- 			int pages);
- int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
--		     int pages, struct page **pagelist);
-+		     int pages, struct page **pagelist,
-+		     dma_addr_t *dma_addr);
- 
- 
- /*
-diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
-index dc04c7b..de4a86f 100644
---- a/drivers/gpu/drm/radeon/radeon_gart.c
-+++ b/drivers/gpu/drm/radeon/radeon_gart.c
-@@ -149,8 +149,9 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
- 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
- 	for (i = 0; i < pages; i++, p++) {
- 		if (rdev->gart.pages[p]) {
--			pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
--				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-+			if (!rdev->gart.ttm_alloced[p])
-+				pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
-+				       		PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- 			rdev->gart.pages[p] = NULL;
- 			rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
- 			page_base = rdev->gart.pages_addr[p];
-@@ -165,7 +166,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
- }
- 
- int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
--		     int pages, struct page **pagelist)
-+		     int pages, struct page **pagelist, dma_addr_t *dma_addr)
- {
- 	unsigned t;
- 	unsigned p;
-@@ -180,15 +181,22 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
- 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
- 
- 	for (i = 0; i < pages; i++, p++) {
--		/* we need to support large memory configurations */
--		/* assume that unbind have already been call on the range */
--		rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
-+		/* On TTM path, we only use the DMA API if TTM_PAGE_FLAG_DMA32
-+		 * is requested. */
-+		if (dma_addr[i] != DMA_ERROR_CODE) {
-+			rdev->gart.ttm_alloced[p] = true;
-+			rdev->gart.pages_addr[p] = dma_addr[i];
-+		} else {
-+			/* we need to support large memory configurations */
-+			/* assume that unbind have already been call on the range */
-+			rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
- 							0, PAGE_SIZE,
- 							PCI_DMA_BIDIRECTIONAL);
--		if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
--			/* FIXME: failed to map page (return -ENOMEM?) */
--			radeon_gart_unbind(rdev, offset, pages);
--			return -ENOMEM;
-+			if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
-+				/* FIXME: failed to map page (return -ENOMEM?) */
-+				radeon_gart_unbind(rdev, offset, pages);
-+				return -ENOMEM;
-+			}
- 		}
- 		rdev->gart.pages[p] = pagelist[i];
- 		page_base = rdev->gart.pages_addr[p];
-@@ -251,6 +259,12 @@ int radeon_gart_init(struct radeon_device *rdev)
- 		radeon_gart_fini(rdev);
- 		return -ENOMEM;
- 	}
-+	rdev->gart.ttm_alloced = kzalloc(sizeof(bool) *
-+					 rdev->gart.num_cpu_pages, GFP_KERNEL);
-+	if (rdev->gart.ttm_alloced == NULL) {
-+		radeon_gart_fini(rdev);
-+		return -ENOMEM;
-+	}
- 	/* set GART entry to point to the dummy page by default */
- 	for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
- 		rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
-@@ -267,8 +281,9 @@ void radeon_gart_fini(struct radeon_device *rdev)
- 	rdev->gart.ready = false;
- 	kfree(rdev->gart.pages);
- 	kfree(rdev->gart.pages_addr);
-+	kfree(rdev->gart.ttm_alloced);
- 	rdev->gart.pages = NULL;
- 	rdev->gart.pages_addr = NULL;
--
-+	rdev->gart.ttm_alloced = NULL;
- 	radeon_dummy_page_fini(rdev);
- }
-diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
-index 81f422d..9ead11f 100644
---- a/drivers/gpu/drm/radeon/radeon_ttm.c
-+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
-@@ -647,6 +647,7 @@ struct radeon_ttm_backend {
- 	unsigned long			num_pages;
- 	struct page			**pages;
- 	struct page			*dummy_read_page;
-+	dma_addr_t			*dma_addrs;
- 	bool				populated;
- 	bool				bound;
- 	unsigned			offset;
-@@ -662,6 +663,7 @@ static int radeon_ttm_backend_populate(struct ttm_backend *backend,
- 
- 	gtt = container_of(backend, struct radeon_ttm_backend, backend);
- 	gtt->pages = pages;
-+	gtt->dma_addrs = dma_addrs;
- 	gtt->num_pages = num_pages;
- 	gtt->dummy_read_page = dummy_read_page;
- 	gtt->populated = true;
-@@ -674,6 +676,7 @@ static void radeon_ttm_backend_clear(struct ttm_backend *backend)
- 
- 	gtt = container_of(backend, struct radeon_ttm_backend, backend);
- 	gtt->pages = NULL;
-+	gtt->dma_addrs = NULL;
- 	gtt->num_pages = 0;
- 	gtt->dummy_read_page = NULL;
- 	gtt->populated = false;
-@@ -694,7 +697,7 @@ static int radeon_ttm_backend_bind(struct ttm_backend *backend,
- 		     gtt->num_pages, bo_mem, backend);
- 	}
- 	r = radeon_gart_bind(gtt->rdev, gtt->offset,
--			     gtt->num_pages, gtt->pages);
-+			     gtt->num_pages, gtt->pages, gtt->dma_addrs);
- 	if (r) {
- 		DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
- 			  gtt->num_pages, gtt->offset);
--- 
-1.7.4
-
-
-From 7c20cf7644dc632b61a6f7ee52fa4ae2f8abc1b2 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 2 Dec 2010 11:36:24 -0500
-Subject: [PATCH 161/203] nouveau/ttm/PCIe: Use dma_addr if TTM has set it.
-
-If the TTM layer has used the DMA API to setup pages that are
-TTM_PAGE_FLAG_DMA32 (look at patch titled: "ttm: Utilize the
-DMA API for pages that have TTM_PAGE_FLAG_DMA32 set"), lets
-use it when programming the GART in the PCIe type cards.
-
-This patch skips doing the pci_map_page (and pci_unmap_page) if
-there is a DMA addresses passed in for that page. If the dma_address
-is zero (or DMA_ERROR_CODE), then we continue on with our old
-behaviour.
-
-[v2: Added a review-by tag]
-
-Reviewed-by: Thomas Hellstrom <thomas at shipmail.org>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Tested-by: Ian Campbell <ian.campbell at citrix.com>
----
- drivers/gpu/drm/nouveau/nouveau_sgdma.c |   28 +++++++++++++++++++++-------
- 1 files changed, 21 insertions(+), 7 deletions(-)
-
-diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
-index c3c0674..07b1151 100644
---- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
-+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
-@@ -12,6 +12,7 @@ struct nouveau_sgdma_be {
- 	struct drm_device *dev;
- 
- 	dma_addr_t *pages;
-+	bool *ttm_alloced;
- 	unsigned nr_pages;
- 
- 	u64 offset;
-@@ -35,15 +36,25 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
- 	if (!nvbe->pages)
- 		return -ENOMEM;
- 
-+	nvbe->ttm_alloced = kmalloc(sizeof(bool) * num_pages, GFP_KERNEL);
-+	if (!nvbe->ttm_alloced)
-+		return -ENOMEM;
-+
- 	nvbe->nr_pages = 0;
- 	while (num_pages--) {
--		nvbe->pages[nvbe->nr_pages] =
--			pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
-+		if (dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE) {
-+			nvbe->pages[nvbe->nr_pages] =
-+					dma_addrs[nvbe->nr_pages];
-+		 	nvbe->ttm_alloced[nvbe->nr_pages] = true;
-+		} else {
-+			nvbe->pages[nvbe->nr_pages] =
-+				pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
- 				     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
--		if (pci_dma_mapping_error(dev->pdev,
--					  nvbe->pages[nvbe->nr_pages])) {
--			be->func->clear(be);
--			return -EFAULT;
-+			if (pci_dma_mapping_error(dev->pdev,
-+						  nvbe->pages[nvbe->nr_pages])) {
-+				be->func->clear(be);
-+				return -EFAULT;
-+			}
- 		}
- 
- 		nvbe->nr_pages++;
-@@ -66,11 +77,14 @@ nouveau_sgdma_clear(struct ttm_backend *be)
- 			be->func->unbind(be);
- 
- 		while (nvbe->nr_pages--) {
--			pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
-+			if (!nvbe->ttm_alloced[nvbe->nr_pages])
-+				pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
- 				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- 		}
- 		kfree(nvbe->pages);
-+		kfree(nvbe->ttm_alloced);
- 		nvbe->pages = NULL;
-+		nvbe->ttm_alloced = NULL;
- 		nvbe->nr_pages = 0;
- 	}
- }
--- 
-1.7.4
-
-
-From 02bbfbab7dd6a107ea2f5d6e882631cd31c72eda Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 22 Feb 2011 13:24:32 -0500
-Subject: [PATCH 162/203] ttm: Include the 'struct dev' when using the DMA API.
-
-This makes the accounting when using 'debug_dma_dump_mappings()'
-and CONFIG_DMA_API_DEBUG=y be assigned to the correct device
-instead of 'fallback'.
-
-No functional change - just cosmetic.
-
-At first it seems that we just need to keep a copy of
-'struct device' in the struct ttm_bo_device and use that. However,
-when 'ttm_tt_destroy' is called it sets ttm->be (which contains
-the 'struct ttm_bo_device') to NULL so we cannot use it. Hence
-we copy the 'struct device' pointer to the 'struct ttm_tm' and keep
-it there.
-
-[v2: Added 'struct device' in 'struct ttm_tm']
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/gpu/drm/nouveau/nouveau_mem.c |    1 +
- drivers/gpu/drm/radeon/radeon_ttm.c   |    1 +
- drivers/gpu/drm/ttm/ttm_page_alloc.c  |   11 ++++++-----
- drivers/gpu/drm/ttm/ttm_tt.c          |    5 +++--
- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   |    2 +-
- include/drm/ttm/ttm_bo_driver.h       |    2 ++
- include/drm/ttm/ttm_page_alloc.h      |    8 ++++++--
- 7 files changed, 20 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
-index 26347b7..7b57067 100644
---- a/drivers/gpu/drm/nouveau/nouveau_mem.c
-+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
-@@ -409,6 +409,7 @@ nouveau_mem_vram_init(struct drm_device *dev)
- 	if (ret)
- 		return ret;
- 
-+	dev_priv->ttm.bdev.dev = dev->dev;
- 	ret = ttm_bo_device_init(&dev_priv->ttm.bdev,
- 				 dev_priv->ttm.bo_global_ref.ref.object,
- 				 &nouveau_bo_driver, DRM_FILE_PAGE_OFFSET,
-diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
-index 9ead11f..d19bfcf 100644
---- a/drivers/gpu/drm/radeon/radeon_ttm.c
-+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
-@@ -513,6 +513,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
- 	if (r) {
- 		return r;
- 	}
-+	rdev->mman.bdev.dev = rdev->dev;
- 	/* No others user of address space so set it to 0 */
- 	r = ttm_bo_device_init(&rdev->mman.bdev,
- 			       rdev->mman.bo_global_ref.ref.object,
-diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
-index 737a2a2..35849db 100644
---- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
-+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
-@@ -664,7 +664,7 @@ out:
-  */
- int ttm_get_pages(struct list_head *pages, int flags,
- 		  enum ttm_caching_state cstate, unsigned count,
--		  dma_addr_t *dma_address)
-+		  dma_addr_t *dma_address, struct device *dev)
- {
- 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
- 	struct page *p = NULL;
-@@ -685,7 +685,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
- 		for (r = 0; r < count; ++r) {
- 			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
- 				void *addr;
--				addr = dma_alloc_coherent(NULL, PAGE_SIZE,
-+				addr = dma_alloc_coherent(dev, PAGE_SIZE,
- 							  &dma_address[r],
- 							  gfp_flags);
- 				if (addr == NULL)
-@@ -730,7 +730,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
- 			printk(KERN_ERR TTM_PFX
- 			       "Failed to allocate extra pages "
- 			       "for large request.");
--			ttm_put_pages(pages, 0, flags, cstate, NULL);
-+			ttm_put_pages(pages, 0, flags, cstate, NULL, NULL);
- 			return r;
- 		}
- 	}
-@@ -741,7 +741,8 @@ int ttm_get_pages(struct list_head *pages, int flags,
- 
- /* Put all pages in pages list to correct pool to wait for reuse */
- void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
--		   enum ttm_caching_state cstate, dma_addr_t *dma_address)
-+		   enum ttm_caching_state cstate, dma_addr_t *dma_address,
-+		   struct device *dev)
- {
- 	unsigned long irq_flags;
- 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
-@@ -757,7 +758,7 @@ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
- 				void *addr = page_address(p);
- 				WARN_ON(!addr || !dma_address[r]);
- 				if (addr)
--					dma_free_coherent(NULL, PAGE_SIZE,
-+					dma_free_coherent(dev, PAGE_SIZE,
- 							  addr,
- 							  dma_address[r]);
- 				dma_address[r] = 0;
-diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
-index 86d5b17..354f9d9 100644
---- a/drivers/gpu/drm/ttm/ttm_tt.c
-+++ b/drivers/gpu/drm/ttm/ttm_tt.c
-@@ -110,7 +110,7 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
- 		INIT_LIST_HEAD(&h);
- 
- 		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1,
--				    &ttm->dma_address[index]);
-+				    &ttm->dma_address[index], ttm->dev);
- 
- 		if (ret != 0)
- 			return NULL;
-@@ -304,7 +304,7 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
- 		}
- 	}
- 	ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
--		      ttm->dma_address);
-+		      ttm->dma_address, ttm->dev);
- 	ttm->state = tt_unpopulated;
- 	ttm->first_himem_page = ttm->num_pages;
- 	ttm->last_lomem_page = -1;
-@@ -397,6 +397,7 @@ struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, unsigned long size,
- 	ttm->last_lomem_page = -1;
- 	ttm->caching_state = tt_cached;
- 	ttm->page_flags = page_flags;
-+	ttm->dev = bdev->dev;
- 
- 	ttm->dummy_read_page = dummy_read_page;
- 
-diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
-index 10ca97e..4a8c789 100644
---- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
-+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
-@@ -322,7 +322,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
- 	ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
- 	dev_priv->active_master = &dev_priv->fbdev_master;
- 
--
-+	dev_priv->bdev.dev = dev->dev;
- 	ret = ttm_bo_device_init(&dev_priv->bdev,
- 				 dev_priv->bo_global_ref.ref.object,
- 				 &vmw_bo_driver, VMWGFX_FILE_PAGE_OFFSET,
-diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
-index efed082..7589c0a 100644
---- a/include/drm/ttm/ttm_bo_driver.h
-+++ b/include/drm/ttm/ttm_bo_driver.h
-@@ -177,6 +177,7 @@ struct ttm_tt {
- 		tt_unpopulated,
- 	} state;
- 	dma_addr_t *dma_address;
-+	struct device *dev;
- };
- 
- #define TTM_MEMTYPE_FLAG_FIXED         (1 << 0)	/* Fixed (on-card) PCI memory */
-@@ -551,6 +552,7 @@ struct ttm_bo_device {
- 	struct list_head device_list;
- 	struct ttm_bo_global *glob;
- 	struct ttm_bo_driver *driver;
-+	struct device *dev;
- 	rwlock_t vm_lock;
- 	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
- 	spinlock_t fence_lock;
-diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
-index 8062890..ccb6b7a 100644
---- a/include/drm/ttm/ttm_page_alloc.h
-+++ b/include/drm/ttm/ttm_page_alloc.h
-@@ -37,12 +37,14 @@
-  * @cstate: ttm caching state for the page.
-  * @count: number of pages to allocate.
-  * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
-+ * @dev: struct device for appropiate DMA accounting.
-  */
- int ttm_get_pages(struct list_head *pages,
- 		  int flags,
- 		  enum ttm_caching_state cstate,
- 		  unsigned count,
--		  dma_addr_t *dma_address);
-+		  dma_addr_t *dma_address,
-+		  struct device *dev);
- /**
-  * Put linked list of pages to pool.
-  *
-@@ -52,12 +54,14 @@ int ttm_get_pages(struct list_head *pages,
-  * @flags: ttm flags for page allocation.
-  * @cstate: ttm caching state.
-  * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
-+ * @dev: struct device for appropiate DMA accounting.
-  */
- void ttm_put_pages(struct list_head *pages,
- 		   unsigned page_count,
- 		   int flags,
- 		   enum ttm_caching_state cstate,
--		   dma_addr_t *dma_address);
-+		   dma_addr_t *dma_address,
-+		   struct device *dev);
- /**
-  * Initialize pool allocator.
-  */
--- 
-1.7.4
-
-
-From 733301920082553b52ce4453493fe6abf6aa7d1a Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 23 Feb 2011 22:42:38 -0500
-Subject: [PATCH 163/203] ttm: Pass in 'struct device' to TTM so it can do DMA API on behalf of device.
-
-We want to pass in the 'struct device' to the TTM layer in a nice way.
-This is not strictly required, but it does make accounting of
-pages and their DMA addresses correct when using CONFIG_DMA_API_DEBUG=y.
-
-This patch builds on top of "ttm: Include the 'struct dev' when using the DMA API."
-and moves the mechanism of passing in 'struct device' to the TTM API.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/gpu/drm/nouveau/nouveau_mem.c |    4 ++--
- drivers/gpu/drm/radeon/radeon_ttm.c   |    4 ++--
- drivers/gpu/drm/ttm/ttm_bo.c          |    4 +++-
- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   |    4 ++--
- include/drm/ttm/ttm_bo_driver.h       |    5 ++++-
- 5 files changed, 13 insertions(+), 8 deletions(-)
-
-diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
-index 7b57067..3706156 100644
---- a/drivers/gpu/drm/nouveau/nouveau_mem.c
-+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
-@@ -409,11 +409,11 @@ nouveau_mem_vram_init(struct drm_device *dev)
- 	if (ret)
- 		return ret;
- 
--	dev_priv->ttm.bdev.dev = dev->dev;
- 	ret = ttm_bo_device_init(&dev_priv->ttm.bdev,
- 				 dev_priv->ttm.bo_global_ref.ref.object,
- 				 &nouveau_bo_driver, DRM_FILE_PAGE_OFFSET,
--				 dma_bits <= 32 ? true : false);
-+				 dma_bits <= 32 ? true : false,
-+				 dev->dev);
- 	if (ret) {
- 		NV_ERROR(dev, "Error initialising bo driver: %d\n", ret);
- 		return ret;
-diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
-index d19bfcf..371890c 100644
---- a/drivers/gpu/drm/radeon/radeon_ttm.c
-+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
-@@ -513,12 +513,12 @@ int radeon_ttm_init(struct radeon_device *rdev)
- 	if (r) {
- 		return r;
- 	}
--	rdev->mman.bdev.dev = rdev->dev;
- 	/* No others user of address space so set it to 0 */
- 	r = ttm_bo_device_init(&rdev->mman.bdev,
- 			       rdev->mman.bo_global_ref.ref.object,
- 			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET,
--			       rdev->need_dma32);
-+			       rdev->need_dma32,
-+			       rdev->dev);
- 	if (r) {
- 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
- 		return r;
-diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
-index af61fc2..278a2d3 100644
---- a/drivers/gpu/drm/ttm/ttm_bo.c
-+++ b/drivers/gpu/drm/ttm/ttm_bo.c
-@@ -1526,12 +1526,14 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
- 		       struct ttm_bo_global *glob,
- 		       struct ttm_bo_driver *driver,
- 		       uint64_t file_page_offset,
--		       bool need_dma32)
-+		       bool need_dma32,
-+		       struct device *dev)
- {
- 	int ret = -EINVAL;
- 
- 	rwlock_init(&bdev->vm_lock);
- 	bdev->driver = driver;
-+	bdev->dev = dev;
- 
- 	memset(bdev->man, 0, sizeof(bdev->man));
- 
-diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
-index 4a8c789..803d979 100644
---- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
-+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
-@@ -322,11 +322,11 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
- 	ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
- 	dev_priv->active_master = &dev_priv->fbdev_master;
- 
--	dev_priv->bdev.dev = dev->dev;
- 	ret = ttm_bo_device_init(&dev_priv->bdev,
- 				 dev_priv->bo_global_ref.ref.object,
- 				 &vmw_bo_driver, VMWGFX_FILE_PAGE_OFFSET,
--				 false);
-+				 false,
-+				 dev->dev);
- 	if (unlikely(ret != 0)) {
- 		DRM_ERROR("Failed initializing TTM buffer object driver.\n");
- 		goto out_err1;
-diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
-index 7589c0a..2024a74 100644
---- a/include/drm/ttm/ttm_bo_driver.h
-+++ b/include/drm/ttm/ttm_bo_driver.h
-@@ -793,6 +793,8 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
-  * @file_page_offset: Offset into the device address space that is available
-  * for buffer data. This ensures compatibility with other users of the
-  * address space.
-+ * @need_dma32: Allocate pages under 4GB
-+ * @dev: 'struct device' of the PCI device.
-  *
-  * Initializes a struct ttm_bo_device:
-  * Returns:
-@@ -801,7 +803,8 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
- extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
- 			      struct ttm_bo_global *glob,
- 			      struct ttm_bo_driver *driver,
--			      uint64_t file_page_offset, bool need_dma32);
-+			      uint64_t file_page_offset, bool need_dma32,
-+			      struct device *dev);
- 
- /**
-  * ttm_bo_unmap_virtual
--- 
-1.7.4
-
-
-From 92ecb86a77e143a9b9b4491158d063a0b319709f Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Mon, 8 Nov 2010 14:13:35 -0500
-Subject: [PATCH 164/203] xen/pci: Add xen_[find|register|unregister]_device_domain_owner functions.
-
-When the Xen PCI backend is told to enable or disable MSI/MSI-X functions,
-the initial domain performs these operations. The initial domain needs
-to know which domain (guest) is going to use the PCI device so when it
-makes the appropiate hypercall to retrieve the MSI/MSI-X vector it will
-also assign the PCI device to the appropiate domain (guest).
-
-This boils down to us needing a mechanism to find, set and unset the domain
-id that will be using the device.
-
-[v2: EXPORT_SYMBOL -> EXPORT_SYMBOL_GPL.]
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- arch/x86/include/asm/xen/pci.h |   16 +++++++++
- arch/x86/pci/xen.c             |   73 ++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 89 insertions(+), 0 deletions(-)
-
-diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
-index aa86209..4fbda9a 100644
---- a/arch/x86/include/asm/xen/pci.h
-+++ b/arch/x86/include/asm/xen/pci.h
-@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void)
- #endif
- #if defined(CONFIG_XEN_DOM0)
- void __init xen_setup_pirqs(void);
-+int xen_find_device_domain_owner(struct pci_dev *dev);
-+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
-+int xen_unregister_device_domain_owner(struct pci_dev *dev);
- #else
- static inline void __init xen_setup_pirqs(void)
- {
- }
-+static inline int xen_find_device_domain_owner(struct pci_dev *dev)
++static int connect_rings(struct backend_info *);
++static void connect(struct backend_info *);
++static void backend_create_xenvif(struct backend_info *be);
++static void unregister_hotplug_status_watch(struct backend_info *be);
++
++static int netback_remove(struct xenbus_device *dev)
 +{
-+	return -1;
++	struct backend_info *be = dev_get_drvdata(&dev->dev);
++
++	unregister_hotplug_status_watch(be);
++	if (be->vif) {
++		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
++		xenvif_disconnect(be->vif);
++		be->vif = NULL;
++	}
++	kfree(be);
++	dev_set_drvdata(&dev->dev, NULL);
++	return 0;
 +}
-+static inline int xen_register_device_domain_owner(struct pci_dev *dev,
-+						   uint16_t domain)
++
++
++/**
++ * Entry point to this code when a new device is created.  Allocate the basic
++ * structures and switch to InitWait.
++ */
++static int netback_probe(struct xenbus_device *dev,
++			 const struct xenbus_device_id *id)
 +{
-+	return -1;
++	const char *message;
++	struct xenbus_transaction xbt;
++	int err;
++	int sg;
++	struct backend_info *be = kzalloc(sizeof(struct backend_info),
++					  GFP_KERNEL);
++	if (!be) {
++		xenbus_dev_fatal(dev, -ENOMEM,
++				 "allocating backend structure");
++		return -ENOMEM;
++	}
++
++	be->dev = dev;
++	dev_set_drvdata(&dev->dev, be);
++
++	sg = 1;
++
++	do {
++		err = xenbus_transaction_start(&xbt);
++		if (err) {
++			xenbus_dev_fatal(dev, err, "starting transaction");
++			goto fail;
++		}
++
++		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
++		if (err) {
++			message = "writing feature-sg";
++			goto abort_transaction;
++		}
++
++		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
++				    "%d", sg);
++		if (err) {
++			message = "writing feature-gso-tcpv4";
++			goto abort_transaction;
++		}
++
++		/* We support rx-copy path. */
++		err = xenbus_printf(xbt, dev->nodename,
++				    "feature-rx-copy", "%d", 1);
++		if (err) {
++			message = "writing feature-rx-copy";
++			goto abort_transaction;
++		}
++
++		/*
++		 * We don't support rx-flip path (except old guests who don't
++		 * grok this feature flag).
++		 */
++		err = xenbus_printf(xbt, dev->nodename,
++				    "feature-rx-flip", "%d", 0);
++		if (err) {
++			message = "writing feature-rx-flip";
++			goto abort_transaction;
++		}
++
++		err = xenbus_transaction_end(xbt, 0);
++	} while (err == -EAGAIN);
++
++	if (err) {
++		xenbus_dev_fatal(dev, err, "completing transaction");
++		goto fail;
++	}
++
++	err = xenbus_switch_state(dev, XenbusStateInitWait);
++	if (err)
++		goto fail;
++
++	/* This kicks hotplug scripts, so do it immediately. */
++	backend_create_xenvif(be);
++
++	return 0;
++
++abort_transaction:
++	xenbus_transaction_end(xbt, 1);
++	xenbus_dev_fatal(dev, err, "%s", message);
++fail:
++	pr_debug("failed");
++	netback_remove(dev);
++	return err;
++}
++
++
++/*
++ * Handle the creation of the hotplug script environment.  We add the script
++ * and vif variables to the environment, for the benefit of the vif-* hotplug
++ * scripts.
++ */
++static int netback_uevent(struct xenbus_device *xdev,
++			  struct kobj_uevent_env *env)
++{
++	struct backend_info *be = dev_get_drvdata(&xdev->dev);
++	char *val;
++
++	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
++	if (IS_ERR(val)) {
++		int err = PTR_ERR(val);
++		xenbus_dev_fatal(xdev, err, "reading script");
++		return err;
++	} else {
++		if (add_uevent_var(env, "script=%s", val)) {
++			kfree(val);
++			return -ENOMEM;
++		}
++		kfree(val);
++	}
++
++	if (!be || !be->vif)
++		return 0;
++
++	return add_uevent_var(env, "vif=%s", be->vif->dev->name);
++}
++
++
++static void backend_create_xenvif(struct backend_info *be)
++{
++	int err;
++	long handle;
++	struct xenbus_device *dev = be->dev;
++
++	if (be->vif != NULL)
++		return;
++
++	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
++	if (err != 1) {
++		xenbus_dev_fatal(dev, err, "reading handle");
++		return;
++	}
++
++	be->vif = xenvif_alloc(&dev->dev, dev->otherend_id, handle);
++	if (IS_ERR(be->vif)) {
++		err = PTR_ERR(be->vif);
++		be->vif = NULL;
++		xenbus_dev_fatal(dev, err, "creating interface");
++		return;
++	}
++
++	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
++}
++
++
++static void disconnect_backend(struct xenbus_device *dev)
++{
++	struct backend_info *be = dev_get_drvdata(&dev->dev);
++
++	if (be->vif) {
++		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
++		xenvif_disconnect(be->vif);
++		be->vif = NULL;
++	}
++}
++
++/**
++ * Callback received when the frontend's state changes.
++ */
++static void frontend_changed(struct xenbus_device *dev,
++			     enum xenbus_state frontend_state)
++{
++	struct backend_info *be = dev_get_drvdata(&dev->dev);
++
++	pr_debug("frontend state %s", xenbus_strstate(frontend_state));
++
++	be->frontend_state = frontend_state;
++
++	switch (frontend_state) {
++	case XenbusStateInitialising:
++		if (dev->state == XenbusStateClosed) {
++			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
++			       __func__, dev->nodename);
++			xenbus_switch_state(dev, XenbusStateInitWait);
++		}
++		break;
++
++	case XenbusStateInitialised:
++		break;
++
++	case XenbusStateConnected:
++		if (dev->state == XenbusStateConnected)
++			break;
++		backend_create_xenvif(be);
++		if (be->vif)
++			connect(be);
++		break;
++
++	case XenbusStateClosing:
++		if (be->vif)
++			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
++		disconnect_backend(dev);
++		xenbus_switch_state(dev, XenbusStateClosing);
++		break;
++
++	case XenbusStateClosed:
++		xenbus_switch_state(dev, XenbusStateClosed);
++		if (xenbus_dev_is_online(dev))
++			break;
++		/* fall through if not online */
++	case XenbusStateUnknown:
++		device_unregister(&dev->dev);
++		break;
++
++	default:
++		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
++				 frontend_state);
++		break;
++	}
++}
++
++
++static void xen_net_read_rate(struct xenbus_device *dev,
++			      unsigned long *bytes, unsigned long *usec)
++{
++	char *s, *e;
++	unsigned long b, u;
++	char *ratestr;
++
++	/* Default to unlimited bandwidth. */
++	*bytes = ~0UL;
++	*usec = 0;
++
++	ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
++	if (IS_ERR(ratestr))
++		return;
++
++	s = ratestr;
++	b = simple_strtoul(s, &e, 10);
++	if ((s == e) || (*e != ','))
++		goto fail;
++
++	s = e + 1;
++	u = simple_strtoul(s, &e, 10);
++	if ((s == e) || (*e != '\0'))
++		goto fail;
++
++	*bytes = b;
++	*usec = u;
++
++	kfree(ratestr);
++	return;
++
++ fail:
++	pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
++	kfree(ratestr);
 +}
-+static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
++
++static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
 +{
-+	return -1;
-+}
- #endif
- 
- #if defined(CONFIG_PCI_MSI)
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 8c4085a..2d04454 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -444,3 +444,76 @@ void __init xen_setup_pirqs(void)
- 	}
- }
- #endif
++	char *s, *e, *macstr;
++	int i;
 +
-+struct xen_device_domain_owner {
-+	domid_t domain;
-+	struct pci_dev *dev;
-+	struct list_head list;
-+};
++	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
++	if (IS_ERR(macstr))
++		return PTR_ERR(macstr);
 +
-+static DEFINE_SPINLOCK(dev_domain_list_spinlock);
-+static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
++	for (i = 0; i < ETH_ALEN; i++) {
++		mac[i] = simple_strtoul(s, &e, 16);
++		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
++			kfree(macstr);
++			return -ENOENT;
++		}
++		s = e+1;
++	}
 +
-+static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
-+{
-+	struct xen_device_domain_owner *owner;
++	kfree(macstr);
++	return 0;
++}
 +
-+	list_for_each_entry(owner, &dev_domain_list, list) {
-+		if (owner->dev == dev)
-+			return owner;
++static void unregister_hotplug_status_watch(struct backend_info *be)
++{
++	if (be->have_hotplug_status_watch) {
++		unregister_xenbus_watch(&be->hotplug_status_watch);
++		kfree(be->hotplug_status_watch.node);
 +	}
-+	return NULL;
++	be->have_hotplug_status_watch = 0;
 +}
 +
-+int xen_find_device_domain_owner(struct pci_dev *dev)
++static void hotplug_status_changed(struct xenbus_watch *watch,
++				   const char **vec,
++				   unsigned int vec_size)
 +{
-+	struct xen_device_domain_owner *owner;
-+	int domain = -ENODEV;
++	struct backend_info *be = container_of(watch,
++					       struct backend_info,
++					       hotplug_status_watch);
++	char *str;
++	unsigned int len;
 +
-+	spin_lock(&dev_domain_list_spinlock);
-+	owner = find_device(dev);
-+	if (owner)
-+		domain = owner->domain;
-+	spin_unlock(&dev_domain_list_spinlock);
-+	return domain;
++	str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
++	if (IS_ERR(str))
++		return;
++	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
++		xenbus_switch_state(be->dev, XenbusStateConnected);
++		/* Not interested in this watch anymore. */
++		unregister_hotplug_status_watch(be);
++	}
++	kfree(str);
 +}
-+EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
 +
-+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
++static void connect(struct backend_info *be)
 +{
-+	struct xen_device_domain_owner *owner;
++	int err;
++	struct xenbus_device *dev = be->dev;
 +
-+	owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
-+	if (!owner)
-+		return -ENODEV;
++	err = connect_rings(be);
++	if (err)
++		return;
 +
-+	spin_lock(&dev_domain_list_spinlock);
-+	if (find_device(dev)) {
-+		spin_unlock(&dev_domain_list_spinlock);
-+		kfree(owner);
-+		return -EEXIST;
++	err = xen_net_read_mac(dev, be->vif->fe_dev_addr);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
++		return;
 +	}
-+	owner->domain = domain;
-+	owner->dev = dev;
-+	list_add_tail(&owner->list, &dev_domain_list);
-+	spin_unlock(&dev_domain_list_spinlock);
-+	return 0;
++
++	xen_net_read_rate(dev, &be->vif->credit_bytes,
++			  &be->vif->credit_usec);
++	be->vif->remaining_credit = be->vif->credit_bytes;
++
++	unregister_hotplug_status_watch(be);
++	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
++				   hotplug_status_changed,
++				   "%s/%s", dev->nodename, "hotplug-status");
++	if (err) {
++		/* Switch now, since we can't do a watch. */
++		xenbus_switch_state(dev, XenbusStateConnected);
++	} else {
++		be->have_hotplug_status_watch = 1;
++	}
++
++	netif_wake_queue(be->vif->dev);
 +}
-+EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
 +
-+int xen_unregister_device_domain_owner(struct pci_dev *dev)
++
++static int connect_rings(struct backend_info *be)
 +{
-+	struct xen_device_domain_owner *owner;
++	struct xenvif *vif = be->vif;
++	struct xenbus_device *dev = be->dev;
++	unsigned long tx_ring_ref, rx_ring_ref;
++	unsigned int evtchn, rx_copy;
++	int err;
++	int val;
 +
-+	spin_lock(&dev_domain_list_spinlock);
-+	owner = find_device(dev);
-+	if (!owner) {
-+		spin_unlock(&dev_domain_list_spinlock);
-+		return -ENODEV;
++	err = xenbus_gather(XBT_NIL, dev->otherend,
++			    "tx-ring-ref", "%lu", &tx_ring_ref,
++			    "rx-ring-ref", "%lu", &rx_ring_ref,
++			    "event-channel", "%u", &evtchn, NULL);
++	if (err) {
++		xenbus_dev_fatal(dev, err,
++				 "reading %s/ring-ref and event-channel",
++				 dev->otherend);
++		return err;
++	}
++
++	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
++			   &rx_copy);
++	if (err == -ENOENT) {
++		err = 0;
++		rx_copy = 0;
++	}
++	if (err < 0) {
++		xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
++				 dev->otherend);
++		return err;
++	}
++	if (!rx_copy)
++		return -EOPNOTSUPP;
++
++	if (vif->dev->tx_queue_len != 0) {
++		if (xenbus_scanf(XBT_NIL, dev->otherend,
++				 "feature-rx-notify", "%d", &val) < 0)
++			val = 0;
++		if (val)
++			vif->can_queue = 1;
++		else
++			/* Must be non-zero for pfifo_fast to work. */
++			vif->dev->tx_queue_len = 1;
++	}
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
++			 "%d", &val) < 0)
++		val = 0;
++	vif->can_sg = !!val;
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
++			 "%d", &val) < 0)
++		val = 0;
++	vif->gso = !!val;
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
++			 "%d", &val) < 0)
++		val = 0;
++	vif->gso_prefix = !!val;
++
++	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
++			 "%d", &val) < 0)
++		val = 0;
++	vif->csum = !val;
++
++	/* Map the shared frame, irq etc. */
++	err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, evtchn);
++	if (err) {
++		xenbus_dev_fatal(dev, err,
++				 "mapping shared-frames %lu/%lu port %u",
++				 tx_ring_ref, rx_ring_ref, evtchn);
++		return err;
 +	}
-+	list_del(&owner->list);
-+	spin_unlock(&dev_domain_list_spinlock);
-+	kfree(owner);
 +	return 0;
 +}
-+EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
--- 
-1.7.4
-
-
-From 338415cd3c45f2519ea87858ee9e7abec0fa9ba1 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 24 Feb 2011 13:35:52 -0500
-Subject: [PATCH 165/203] xen/irq: Check if the PCI device is owned by a domain different than DOMID_SELF.
-
-We check if there is a domain owner for the PCI device. In case of failure
-(meaning no domain has registered for this device) we make DOMID_SELF the owner.
-
-[v2: deal with rebasing on v2.6.37-1]
-[v3: deal with rebasing on stable/irq.cleanup]
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Acked-by: Xiantao Zhang <xiantao.zhang at intel.com>
----
- arch/x86/pci/xen.c   |   21 ++++++++++++++++-----
- drivers/xen/events.c |   16 ++++++++++------
- include/xen/events.h |    3 ++-
- 3 files changed, 28 insertions(+), 12 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 2d04454..309c0a0 100644
---- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -108,7 +108,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 		}
- 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
- 					       (type == PCI_CAP_ID_MSIX) ?
--					       "msi-x" : "msi");
-+					       "msi-x" : "msi",
-+					       DOMID_SELF);
- 		if (irq < 0)
- 			goto error;
- 		dev_dbg(&dev->dev,
-@@ -148,7 +149,8 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
- 					       (type == PCI_CAP_ID_MSIX) ?
- 					       "pcifront-msi-x" :
--					       "pcifront-msi");
-+					       "pcifront-msi",
-+						DOMID_SELF);
- 		if (irq < 0)
- 			goto free;
- 		i++;
-@@ -190,9 +192,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
++
++
++/* ** Driver Registration ** */
++
++
++static const struct xenbus_device_id netback_ids[] = {
++	{ "vif" },
++	{ "" }
++};
++
++
++static struct xenbus_driver netback = {
++	.name = "vif",
++	.owner = THIS_MODULE,
++	.ids = netback_ids,
++	.probe = netback_probe,
++	.remove = netback_remove,
++	.uevent = netback_uevent,
++	.otherend_changed = frontend_changed,
++};
++
++int xenvif_xenbus_init(void)
++{
++	return xenbus_register_backend(&netback);
++}
+diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
+index da1f121..a6ab973 100644
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -359,7 +359,7 @@ static void xennet_tx_buf_gc(struct net_device *dev)
+ 			struct xen_netif_tx_response *txrsp;
  
- 	list_for_each_entry(msidesc, &dev->msi_list, list) {
- 		struct physdev_map_pirq map_irq;
-+		domid_t domid;
-+
-+		domid = ret = xen_find_device_domain_owner(dev);
-+		/* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED,
-+		 * hence check ret value for < 0. */
-+		if (ret < 0)
-+			domid = DOMID_SELF;
+ 			txrsp = RING_GET_RESPONSE(&np->tx, cons);
+-			if (txrsp->status == NETIF_RSP_NULL)
++			if (txrsp->status == XEN_NETIF_RSP_NULL)
+ 				continue;
  
- 		memset(&map_irq, 0, sizeof(map_irq));
--		map_irq.domid = DOMID_SELF;
-+		map_irq.domid = domid;
- 		map_irq.type = MAP_PIRQ_TYPE_MSI;
- 		map_irq.index = -1;
- 		map_irq.pirq = -1;
-@@ -215,14 +224,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- 
- 		ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
- 		if (ret) {
--			dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
-+			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
-+				 ret, domid);
- 			goto out;
- 		}
+ 			id  = txrsp->id;
+@@ -416,7 +416,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
+ 	   larger than a page), split it it into page-sized chunks. */
+ 	while (len > PAGE_SIZE - offset) {
+ 		tx->size = PAGE_SIZE - offset;
+-		tx->flags |= NETTXF_more_data;
++		tx->flags |= XEN_NETTXF_more_data;
+ 		len -= tx->size;
+ 		data += tx->size;
+ 		offset = 0;
+@@ -442,7 +442,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
+ 	for (i = 0; i < frags; i++) {
+ 		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
  
- 		ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
- 					       map_irq.pirq, map_irq.index,
- 					       (type == PCI_CAP_ID_MSIX) ?
--					       "msi-x" : "msi");
-+					       "msi-x" : "msi",
-+						domid);
- 		if (ret < 0)
- 			goto out;
- 	}
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 6befe62..3ccba7d 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -97,6 +97,7 @@ struct irq_info
- 			unsigned short gsi;
- 			unsigned char vector;
- 			unsigned char flags;
-+			uint16_t domid;
- 		} pirq;
- 	} u;
- };
-@@ -153,11 +154,13 @@ static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq)
- }
+-		tx->flags |= NETTXF_more_data;
++		tx->flags |= XEN_NETTXF_more_data;
  
- static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq,
--				    unsigned short gsi, unsigned short vector)
-+				    unsigned short gsi, unsigned short vector,
-+				    domid_t domid)
- {
- 	return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
- 			.cpu = 0,
--			.u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } };
-+			.u.pirq = { .pirq = pirq, .gsi = gsi,
-+				    .vector = vector, .domid = domid } };
- }
+ 		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
+ 		np->tx_skbs[id].skb = skb_get(skb);
+@@ -517,10 +517,10 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 	tx->flags = 0;
+ 	if (skb->ip_summed == CHECKSUM_PARTIAL)
+ 		/* local packet? */
+-		tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
++		tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
+ 	else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+ 		/* remote but checksummed. */
+-		tx->flags |= NETTXF_data_validated;
++		tx->flags |= XEN_NETTXF_data_validated;
  
- /*
-@@ -633,7 +636,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
- 		goto out;
- 	}
+ 	if (skb_shinfo(skb)->gso_size) {
+ 		struct xen_netif_extra_info *gso;
+@@ -531,7 +531,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 		if (extra)
+ 			extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+ 		else
+-			tx->flags |= NETTXF_extra_info;
++			tx->flags |= XEN_NETTXF_extra_info;
  
--	irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector);
-+	irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector, DOMID_SELF);
- 	irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0;
- 	pirq_to_irq[pirq] = irq;
+ 		gso->u.gso.size = skb_shinfo(skb)->gso_size;
+ 		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+@@ -651,7 +651,7 @@ static int xennet_get_responses(struct netfront_info *np,
+ 	int err = 0;
+ 	unsigned long ret;
  
-@@ -659,7 +662,8 @@ int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
- }
+-	if (rx->flags & NETRXF_extra_info) {
++	if (rx->flags & XEN_NETRXF_extra_info) {
+ 		err = xennet_get_extras(np, extras, rp);
+ 		cons = np->rx.rsp_cons;
+ 	}
+@@ -688,7 +688,7 @@ static int xennet_get_responses(struct netfront_info *np,
+ 		__skb_queue_tail(list, skb);
  
- int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
--			     int pirq, int vector, const char *name)
-+			     int pirq, int vector, const char *name,
-+			     domid_t domid)
- {
- 	int irq, ret;
+ next:
+-		if (!(rx->flags & NETRXF_more_data))
++		if (!(rx->flags & XEN_NETRXF_more_data))
+ 			break;
  
-@@ -672,7 +676,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
- 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
- 				      handle_level_irq, name);
+ 		if (cons + frags == rp) {
+@@ -983,9 +983,9 @@ err:
+ 		skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
+ 		skb->len += skb->data_len;
  
--	irq_info[irq] = mk_pirq_info(0, pirq, 0, vector);
-+	irq_info[irq] = mk_pirq_info(0, pirq, 0, vector, domid);
- 	pirq_to_irq[pirq] = irq;
- 	ret = set_irq_msi(irq, msidesc);
- 	if (ret < 0)
-@@ -702,7 +706,7 @@ int xen_destroy_irq(int irq)
+-		if (rx->flags & NETRXF_csum_blank)
++		if (rx->flags & XEN_NETRXF_csum_blank)
+ 			skb->ip_summed = CHECKSUM_PARTIAL;
+-		else if (rx->flags & NETRXF_data_validated)
++		else if (rx->flags & XEN_NETRXF_data_validated)
+ 			skb->ip_summed = CHECKSUM_UNNECESSARY;
+ 
+ 		__skb_queue_tail(&rxq, skb);
+diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
+index 3a5a6fc..492b7d8 100644
+--- a/drivers/pci/xen-pcifront.c
++++ b/drivers/pci/xen-pcifront.c
+@@ -243,7 +243,7 @@ struct pci_ops pcifront_bus_ops = {
  
- 	if (xen_initial_domain()) {
- 		unmap_irq.pirq = info->u.pirq.pirq;
--		unmap_irq.domid = DOMID_SELF;
-+		unmap_irq.domid = info->u.pirq.domid;
- 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
- 		if (rc) {
- 			printk(KERN_WARNING "unmap irq failed %d\n", rc);
-diff --git a/include/xen/events.h b/include/xen/events.h
-index 962da2c..d146200 100644
---- a/include/xen/events.h
-+++ b/include/xen/events.h
-@@ -77,7 +77,8 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
  #ifdef CONFIG_PCI_MSI
- int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
- int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
--			     int pirq, int vector, const char *name);
-+			     int pirq, int vector, const char *name,
-+			     domid_t domid);
- #endif
+ static int pci_frontend_enable_msix(struct pci_dev *dev,
+-				    int **vector, int nvec)
++				    int vector[], int nvec)
+ {
+ 	int err;
+ 	int i;
+@@ -277,18 +277,24 @@ static int pci_frontend_enable_msix(struct pci_dev *dev,
+ 	if (likely(!err)) {
+ 		if (likely(!op.value)) {
+ 			/* we get the result */
+-			for (i = 0; i < nvec; i++)
+-				*(*vector+i) = op.msix_entries[i].vector;
+-			return 0;
++			for (i = 0; i < nvec; i++) {
++				if (op.msix_entries[i].vector <= 0) {
++					dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n",
++						i, op.msix_entries[i].vector);
++					err = -EINVAL;
++					vector[i] = -1;
++					continue;
++				}
++				vector[i] = op.msix_entries[i].vector;
++			}
+ 		} else {
+ 			printk(KERN_DEBUG "enable msix get value %x\n",
+ 				op.value);
+-			return op.value;
+ 		}
+ 	} else {
+ 		dev_err(&dev->dev, "enable msix get err %x\n", err);
+-		return err;
+ 	}
++	return err;
+ }
  
- /* De-allocates the above mentioned physical interrupt. */
--- 
-1.7.4
-
-
-From 256b7b7ebebfe53468564466d9302dbaf26ebcbe Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Mon, 8 Nov 2010 14:26:36 -0500
-Subject: [PATCH 166/203] xen/irq: Add support to check if IRQ line is shared with other domains.
-
-We do this via the PHYSDEVOP_irq_status_query support hypervisor call.
-We will get a positive value if another domain has binded its
-PIRQ to the specified GSI (IRQ line).
-
-[v2: Deal with v2.6.37-rc1 rebase fallout]
-[v3: Deal with stable/irq.cleanup fallout]
-[v4: xen_ignore_irq->xen_test_irq_shared]
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |   12 ++++++++++++
- include/xen/events.h |    3 +++
- 2 files changed, 15 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 3ccba7d..3afd1f4 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -1377,6 +1377,18 @@ void xen_poll_irq(int irq)
- 	xen_poll_irq_timeout(irq, 0 /* no timeout */);
+ static void pci_frontend_disable_msix(struct pci_dev *dev)
+@@ -310,7 +316,7 @@ static void pci_frontend_disable_msix(struct pci_dev *dev)
+ 		dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
  }
  
-+/* Check whether the IRQ line is shared with other guests. */
-+int xen_test_irq_shared(int irq)
-+{
-+	struct irq_info *info = info_for_irq(irq);
-+	struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq };
-+
-+	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
-+		return 0;
-+	return !(irq_status.flags & XENIRQSTAT_shared);
-+}
-+EXPORT_SYMBOL_GPL(xen_test_irq_shared);
-+
- void xen_irq_resume(void)
+-static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
++static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
  {
- 	unsigned int cpu, irq, evtchn;
-diff --git a/include/xen/events.h b/include/xen/events.h
-index d146200..71fad28 100644
---- a/include/xen/events.h
-+++ b/include/xen/events.h
-@@ -93,4 +93,7 @@ int xen_gsi_from_irq(unsigned pirq);
- /* Return irq from pirq */
- int xen_irq_from_pirq(unsigned pirq);
+ 	int err;
+ 	struct xen_pci_op op = {
+@@ -324,7 +330,13 @@ static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
  
-+/* Determine whether to ignore this IRQ if it is passed to a guest. */
-+int xen_test_irq_shared(int irq);
-+
- #endif	/* _XEN_EVENTS_H */
--- 
-1.7.4
-
-
-From 507762e78160d66c3381aae1dc8cbd595f73f73c Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ian.campbell at citrix.com>
-Date: Mon, 9 Feb 2009 12:05:50 -0800
-Subject: [PATCH 167/203] xen/irq: implement bind_interdomain_evtchn_to_irqhandler for backend drivers
-
-Impact: new Xen-internal API
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |   38 ++++++++++++++++++++++++++++++++++++++
- include/xen/events.h |    6 ++++++
- 2 files changed, 44 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 3afd1f4..f300265 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -798,6 +798,21 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
- 	return irq;
- }
+ 	err = do_pci_op(pdev, &op);
+ 	if (likely(!err)) {
+-		*(*vector) = op.value;
++		vector[0] = op.value;
++		if (op.value <= 0) {
++			dev_warn(&dev->dev, "MSI entry is invalid: %d!\n",
++				op.value);
++			err = -EINVAL;
++			vector[0] = -1;
++		}
+ 	} else {
+ 		dev_err(&dev->dev, "pci frontend enable msi failed for dev "
+ 				    "%x:%x\n", op.bus, op.devfn);
+@@ -733,8 +745,7 @@ static void free_pdev(struct pcifront_device *pdev)
  
-+static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
-+					  unsigned int remote_port)
-+{
-+	struct evtchn_bind_interdomain bind_interdomain;
-+	int err;
+ 	pcifront_free_roots(pdev);
+ 
+-	/*For PCIE_AER error handling job*/
+-	flush_scheduled_work();
++	cancel_work_sync(&pdev->op_work);
+ 
+ 	if (pdev->irq >= 0)
+ 		unbind_from_irqhandler(pdev->irq, pdev);
+diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
+index 07bec09..e5ecae6 100644
+--- a/drivers/xen/Kconfig
++++ b/drivers/xen/Kconfig
+@@ -37,6 +37,79 @@ config XEN_BACKEND
+ 	  Support for backend device drivers that provide I/O services
+ 	  to other virtual machines.
+ 
++config XEN_BLKDEV_BACKEND
++	tristate "Block-device backend driver"
++	depends on XEN_BACKEND && BLOCK
++	help
++	  The block-device backend driver allows the kernel to export its
++	  block devices to other guests via a high-performance shared-memory
++	  interface.
++
++config XEN_PCIDEV_BACKEND
++	tristate "PCI-device backend driver"
++	depends on PCI
++	depends on XEN_BACKEND
++	help
++	  The PCI device backend driver allows the kernel to export arbitrary
++	  PCI devices to other guests. If you select this to be a module, you
++	  will need to make sure no other driver has bound to the device(s)
++	  you want to make visible to other guests.
++
++choice
++	prompt "PCI Backend Mode"
++	depends on XEN_PCIDEV_BACKEND
++	default XEN_PCIDEV_BACKEND_VPCI if !IA64
++	default XEN_PCIDEV_BACKEND_CONTROLLER if IA64
 +
-+	bind_interdomain.remote_dom  = remote_domain;
-+	bind_interdomain.remote_port = remote_port;
++config XEN_PCIDEV_BACKEND_VPCI
++	bool "Virtual PCI"
++	---help---
++	  This PCI Backend hides the true PCI topology and makes the frontend
++	  think there is a single PCI bus with only the exported devices on it.
++	  For example, a device at 03:05.0 will be re-assigned to 00:00.0. A
++	  second device at 02:1a.1 will be re-assigned to 00:01.1.
 +
-+	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
-+					  &bind_interdomain);
++config XEN_PCIDEV_BACKEND_PASS
++	bool "Passthrough"
++	---help---
++	  This PCI Backend provides a real view of the PCI topology to the
++	  frontend (for example, a device at 06:01.b will still appear at
++	  06:01.b to the frontend). This is similar to how Xen 2.0.x exposed
++	  PCI devices to its driver domains. This may be required for drivers
++	  which depend on finding their hardward in certain bus/slot
++	  locations.
 +
-+	return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
-+}
++config XEN_PCIDEV_BACKEND_SLOT
++	bool "Slot"
++	---help---
++	  This PCI Backend hides the true PCI topology and makes the frontend
++	  think there is a single PCI bus with only the exported devices on it.
++	  Contrary to the virtual PCI backend, a function becomes a new slot.
++	  For example, a device at 03:05.2 will be re-assigned to 00:00.0. A
++	  second device at 02:1a.1 will be re-assigned to 00:01.0.
 +
- 
- int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
- {
-@@ -893,6 +908,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
- }
- EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
- 
-+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
-+					  unsigned int remote_port,
-+					  irq_handler_t handler,
-+					  unsigned long irqflags,
-+					  const char *devname,
-+					  void *dev_id)
-+{
-+	int irq, retval;
++config XEN_PCIDEV_BACKEND_CONTROLLER
++	bool "Controller"
++	depends on IA64
++	---help---
++	  This PCI backend virtualizes the PCI bus topology by providing a
++	  virtual bus per PCI root device.  Devices which are physically under
++	  the same root bus will appear on the same virtual bus.  For systems
++	  with complex I/O addressing, this is the only backend which supports
++	  extended I/O port spaces and MMIO translation offsets.  This backend
++	  also supports slot virtualization.  For example, a device at
++	  0000:01:02.1 will be re-assigned to 0000:00:00.0.  A second device
++	  at 0000:02:05.0 (behind a P2P bridge on bus 0000:01) will be
++	  re-assigned to 0000:00:01.0.  A third device at 0000:16:05.0 (under
++	  a different PCI root bus) will be re-assigned to 0000:01:00.0.
 +
-+	irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
-+	if (irq < 0)
-+		return irq;
++endchoice
 +
-+	retval = request_irq(irq, handler, irqflags, devname, dev_id);
-+	if (retval != 0) {
-+		unbind_from_irq(irq);
-+		return retval;
-+	}
++config XEN_PCIDEV_BE_DEBUG
++	bool "PCI Backend Debugging"
++	depends on XEN_PCIDEV_BACKEND
 +
-+	return irq;
-+}
-+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
 +
- int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
- 			    irq_handler_t handler,
- 			    unsigned long irqflags, const char *devname, void *dev_id)
-diff --git a/include/xen/events.h b/include/xen/events.h
-index 71fad28..23de713 100644
---- a/include/xen/events.h
-+++ b/include/xen/events.h
-@@ -23,6 +23,12 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
- 			   unsigned long irqflags,
- 			   const char *devname,
- 			   void *dev_id);
-+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
-+					  unsigned int remote_port,
-+					  irq_handler_t handler,
-+					  unsigned long irqflags,
-+					  const char *devname,
-+					  void *dev_id);
- 
- /*
-  * Common unbind function for all event sources. Takes IRQ to unbind from.
--- 
-1.7.4
-
-
-From faf6ced2cde784ff515274f89278f222fefdb24a Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 24 Feb 2011 14:20:12 -0500
-Subject: [PATCH 168/203] xen/irq: Export 'xen_pirq_from_irq' function.
-
-We need this to find the real Xen PIRQ value for a device
-that requests an MSI or MSI-X. In the past we used
-'xen_gsi_from_irq' since that function would return
-an Xen PIRQ or GSI depending on the provided IRQ. Now that
-we have seperated that we need to use the correct
-function.
-
-[v2: Deal with rebase on stable/irq.cleanup]
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |    6 ++++++
- include/xen/events.h |    3 +++
- 2 files changed, 9 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index f300265..e285c11 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -739,6 +739,12 @@ int xen_irq_from_pirq(unsigned pirq)
- 	return pirq_to_irq[pirq];
- }
+ config XENFS
+ 	tristate "Xen filesystem"
+ 	default y
+@@ -76,10 +149,20 @@ config XEN_XENBUS_FRONTEND
+ config XEN_GNTDEV
+ 	tristate "userspace grant access device driver"
+ 	depends on XEN
++	default m
+ 	select MMU_NOTIFIER
+ 	help
+ 	  Allows userspace processes to use grants.
  
++config XEN_GRANT_DEV_ALLOC
++	tristate "User-space grant reference allocator driver"
++	depends on XEN
++	default m
++	help
++	  Allows userspace processes to create pages with access granted
++	  to other domains. This can be used to implement frontend drivers
++	  or as part of an inter-domain shared memory channel.
 +
-+int xen_pirq_from_irq(unsigned irq)
-+{
-+	return pirq_from_irq(irq);
-+}
-+EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
- int bind_evtchn_to_irq(unsigned int evtchn)
+ config XEN_PLATFORM_PCI
+ 	tristate "xen platform pci device driver"
+ 	depends on XEN_PVHVM && PCI
+diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
+index 5088cc2..c1cb873 100644
+--- a/drivers/xen/Makefile
++++ b/drivers/xen/Makefile
+@@ -9,7 +9,10 @@ obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
+ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
+ obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
+ obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
++obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
++obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
+ obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
++obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
+ obj-$(CONFIG_XENFS)		+= xenfs/
+ obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
+ obj-$(CONFIG_XEN_PLATFORM_PCI)	+= xen-platform-pci.o
+@@ -18,5 +21,6 @@ obj-$(CONFIG_XEN_DOM0)		+= pci.o
+ 
+ xen-evtchn-y			:= evtchn.o
+ xen-gntdev-y				:= gntdev.o
++xen-gntalloc-y				:= gntalloc.o
+ 
+ xen-platform-pci-y		:= platform-pci.o
+diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
+index 43f9f02..718050a 100644
+--- a/drivers/xen/balloon.c
++++ b/drivers/xen/balloon.c
+@@ -232,7 +232,7 @@ static int increase_reservation(unsigned long nr_pages)
+ 		set_phys_to_machine(pfn, frame_list[i]);
+ 
+ 		/* Link back into the page tables if not highmem. */
+-		if (pfn < max_low_pfn) {
++		if (!xen_hvm_domain() && pfn < max_low_pfn) {
+ 			int ret;
+ 			ret = HYPERVISOR_update_va_mapping(
+ 				(unsigned long)__va(pfn << PAGE_SHIFT),
+@@ -280,7 +280,7 @@ static int decrease_reservation(unsigned long nr_pages)
+ 
+ 		scrub_page(page);
+ 
+-		if (!PageHighMem(page)) {
++		if (!xen_hvm_domain() && !PageHighMem(page)) {
+ 			ret = HYPERVISOR_update_va_mapping(
+ 				(unsigned long)__va(pfn << PAGE_SHIFT),
+ 				__pte_ma(0), 0);
+@@ -296,7 +296,7 @@ static int decrease_reservation(unsigned long nr_pages)
+ 	/* No more mappings: invalidate P2M and add to balloon. */
+ 	for (i = 0; i < nr_pages; i++) {
+ 		pfn = mfn_to_pfn(frame_list[i]);
+-		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
++		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ 		balloon_append(pfn_to_page(pfn));
+ 	}
+ 
+@@ -392,15 +392,19 @@ static struct notifier_block xenstore_notifier;
+ 
+ static int __init balloon_init(void)
  {
- 	int irq;
-diff --git a/include/xen/events.h b/include/xen/events.h
-index 23de713..a0c8185 100644
---- a/include/xen/events.h
-+++ b/include/xen/events.h
-@@ -99,6 +99,9 @@ int xen_gsi_from_irq(unsigned pirq);
- /* Return irq from pirq */
- int xen_irq_from_pirq(unsigned pirq);
+-	unsigned long pfn, extra_pfn_end;
++ 	unsigned long pfn, nr_pages, extra_pfn_end;
+ 	struct page *page;
  
-+/* Return the pirq allocated to the irq. */
-+int xen_pirq_from_irq(unsigned irq);
-+
- /* Determine whether to ignore this IRQ if it is passed to a guest. */
- int xen_test_irq_shared(int irq);
- 
--- 
-1.7.4
-
-
-From 57d64e41477079960a5e2b5f5923121bd9118d47 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 16 Feb 2011 16:26:44 -0500
-Subject: [PATCH 169/203] xen/irq: The Xen hypervisor cleans up the PIRQs if the other domain forgot.
-
-And if the other domain forgot to clean up its PIRQs we don't need
-to fail the operation. Just take a note of it and continue on.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/events.c |    9 ++++++++-
- 1 files changed, 8 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index e285c11..803c71a 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -708,7 +708,14 @@ int xen_destroy_irq(int irq)
- 		unmap_irq.pirq = info->u.pirq.pirq;
- 		unmap_irq.domid = info->u.pirq.domid;
- 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
--		if (rc) {
-+		/* If another domain quits without making the pci_disable_msix
-+		 * call, the Xen hypervisor takes care of freeing the PIRQs
-+		 * (free_domain_pirqs).
-+		 */
-+		if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
-+			printk(KERN_INFO "domain %d does not have %d anymore\n",
-+				info->u.pirq.domid, info->u.pirq.pirq);
-+		else if (rc) {
- 			printk(KERN_WARNING "unmap irq failed %d\n", rc);
- 			goto out;
- 		}
--- 
-1.7.4
-
-
-From 6d3057d191cd8cedae334c5182c06d2ceec7966e Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 13 Oct 2009 17:22:20 -0400
-Subject: [PATCH 170/203] xen-pciback: Initial copy from linux-2.6.18.hg off pciback driver.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/Makefile                    |   17 +
- drivers/xen/pciback/conf_space.c                |  435 ++++++++
- drivers/xen/pciback/conf_space.h                |  126 +++
- drivers/xen/pciback/conf_space_capability.c     |   69 ++
- drivers/xen/pciback/conf_space_capability.h     |   23 +
- drivers/xen/pciback/conf_space_capability_msi.c |   79 ++
- drivers/xen/pciback/conf_space_capability_pm.c  |  126 +++
- drivers/xen/pciback/conf_space_capability_vpd.c |   40 +
- drivers/xen/pciback/conf_space_header.c         |  317 ++++++
- drivers/xen/pciback/conf_space_quirks.c         |  138 +++
- drivers/xen/pciback/conf_space_quirks.h         |   35 +
- drivers/xen/pciback/controller.c                |  443 ++++++++
- drivers/xen/pciback/passthrough.c               |  176 +++
- drivers/xen/pciback/pci_stub.c                  | 1316 +++++++++++++++++++++++
- drivers/xen/pciback/pciback.h                   |  126 +++
- drivers/xen/pciback/pciback_ops.c               |  134 +++
- drivers/xen/pciback/slot.c                      |  187 ++++
- drivers/xen/pciback/vpci.c                      |  242 +++++
- drivers/xen/pciback/xenbus.c                    |  710 ++++++++++++
- 19 files changed, 4739 insertions(+), 0 deletions(-)
- create mode 100644 drivers/xen/pciback/Makefile
- create mode 100644 drivers/xen/pciback/conf_space.c
- create mode 100644 drivers/xen/pciback/conf_space.h
- create mode 100644 drivers/xen/pciback/conf_space_capability.c
- create mode 100644 drivers/xen/pciback/conf_space_capability.h
- create mode 100644 drivers/xen/pciback/conf_space_capability_msi.c
- create mode 100644 drivers/xen/pciback/conf_space_capability_pm.c
- create mode 100644 drivers/xen/pciback/conf_space_capability_vpd.c
- create mode 100644 drivers/xen/pciback/conf_space_header.c
- create mode 100644 drivers/xen/pciback/conf_space_quirks.c
- create mode 100644 drivers/xen/pciback/conf_space_quirks.h
- create mode 100644 drivers/xen/pciback/controller.c
- create mode 100644 drivers/xen/pciback/passthrough.c
- create mode 100644 drivers/xen/pciback/pci_stub.c
- create mode 100644 drivers/xen/pciback/pciback.h
- create mode 100644 drivers/xen/pciback/pciback_ops.c
- create mode 100644 drivers/xen/pciback/slot.c
- create mode 100644 drivers/xen/pciback/vpci.c
- create mode 100644 drivers/xen/pciback/xenbus.c
-
-diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile
+-	if (!xen_pv_domain())
++	if (!xen_domain())
+ 		return -ENODEV;
+ 
+ 	pr_info("xen_balloon: Initialising balloon driver.\n");
+ 
+-	balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
++ 	if (xen_pv_domain())
++ 		nr_pages = xen_start_info->nr_pages;
++ 	else
++ 		nr_pages = max_pfn;
++ 	balloon_stats.current_pages = min(nr_pages, max_pfn);
+ 	balloon_stats.target_pages  = balloon_stats.current_pages;
+ 	balloon_stats.balloon_low   = 0;
+ 	balloon_stats.balloon_high  = 0;
+diff --git a/drivers/xen/blkback/Makefile b/drivers/xen/blkback/Makefile
 new file mode 100644
-index 0000000..106dae7
+index 0000000..f1ae1ff
 --- /dev/null
-+++ b/drivers/xen/pciback/Makefile
-@@ -0,0 +1,17 @@
-+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
-+
-+pciback-y := pci_stub.o pciback_ops.o xenbus.o
-+pciback-y += conf_space.o conf_space_header.o \
-+	     conf_space_capability.o \
-+	     conf_space_capability_vpd.o \
-+	     conf_space_capability_pm.o \
-+             conf_space_quirks.o
-+pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
-+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
-+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
-+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
-+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
++++ b/drivers/xen/blkback/Makefile
+@@ -0,0 +1,3 @@
++obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o
 +
-+ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
-+EXTRA_CFLAGS += -DDEBUG
-+endif
-diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
++xen-blkback-y	:= blkback.o xenbus.o interface.o vbd.o
+diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c
 new file mode 100644
-index 0000000..0c76db1
+index 0000000..15790ae
 --- /dev/null
-+++ b/drivers/xen/pciback/conf_space.c
-@@ -0,0 +1,435 @@
++++ b/drivers/xen/blkback/blkback.c
+@@ -0,0 +1,708 @@
++/******************************************************************************
++ * arch/xen/drivers/blkif/backend/main.c
++ *
++ * Back-end of the driver for virtual block devices. This portion of the
++ * driver exports a 'unified' block-device interface that can be accessed
++ * by any operating system that implements a compatible front end. A
++ * reference front-end implementation can be found in:
++ *  arch/xen/drivers/blkif/frontend
++ *
++ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
++ * Copyright (c) 2005, Christopher Clark
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/spinlock.h>
++#include <linux/kthread.h>
++#include <linux/list.h>
++#include <linux/delay.h>
++#include <linux/freezer.h>
++
++#include <xen/events.h>
++#include <xen/page.h>
++#include <asm/xen/hypervisor.h>
++#include <asm/xen/hypercall.h>
++#include "common.h"
++
 +/*
-+ * PCI Backend - Functions for creating a virtual configuration space for
-+ *               exported PCI Devices.
-+ *               It's dangerous to allow PCI Driver Domains to change their
-+ *               device's resources (memory, i/o ports, interrupts). We need to
-+ *               restrict changes to certain PCI Configuration registers:
-+ *               BARs, INTERRUPT_PIN, most registers in the header...
++ * These are rather arbitrary. They are fairly large because adjacent requests
++ * pulled from a communication ring are quite likely to end up being part of
++ * the same scatter/gather request at the disc.
 + *
-+ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
++ *
++ * This will increase the chances of being able to write whole tracks.
++ * 64 should be enough to keep us competitive with Linux.
 + */
++static int blkif_reqs = 64;
++module_param_named(reqs, blkif_reqs, int, 0);
++MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
 +
-+#include <linux/kernel.h>
-+#include <linux/pci.h>
-+#include "pciback.h"
-+#include "conf_space.h"
-+#include "conf_space_quirks.h"
++/* Run-time switchable: /sys/module/blkback/parameters/ */
++static unsigned int log_stats = 0;
++static unsigned int debug_lvl = 0;
++module_param(log_stats, int, 0644);
++module_param(debug_lvl, int, 0644);
++
++/*
++ * Each outstanding request that we've passed to the lower device layers has a
++ * 'pending_req' allocated to it. Each buffer_head that completes decrements
++ * the pendcnt towards zero. When it hits zero, the specified domain has a
++ * response queued for it, with the saved 'id' passed back.
++ */
++typedef struct {
++	blkif_t       *blkif;
++	u64            id;
++	int            nr_pages;
++	atomic_t       pendcnt;
++	unsigned short operation;
++	int            status;
++	struct list_head free_list;
++} pending_req_t;
++
++#define BLKBACK_INVALID_HANDLE (~0)
++
++struct xen_blkbk {
++	pending_req_t	*pending_reqs;
++	struct list_head	pending_free;
++	spinlock_t		pending_free_lock;
++	wait_queue_head_t	pending_free_wq;
++	struct page		**pending_pages;
++	grant_handle_t		*pending_grant_handles;
++};
++
++static struct xen_blkbk *blkbk;
 +
-+static int permissive;
-+module_param(permissive, bool, 0644);
++static inline int vaddr_pagenr(pending_req_t *req, int seg)
++{
++	return (req - blkbk->pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
++}
 +
-+#define DEFINE_PCI_CONFIG(op,size,type) 			\
-+int pciback_##op##_config_##size 				\
-+(struct pci_dev *dev, int offset, type value, void *data)	\
-+{								\
-+	return pci_##op##_config_##size (dev, offset, value);	\
++#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
++
++static inline unsigned long vaddr(pending_req_t *req, int seg)
++{
++	unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
++	return (unsigned long)pfn_to_kaddr(pfn);
 +}
 +
-+DEFINE_PCI_CONFIG(read, byte, u8 *)
-+DEFINE_PCI_CONFIG(read, word, u16 *)
-+DEFINE_PCI_CONFIG(read, dword, u32 *)
++#define pending_handle(_req, _seg) \
++	(blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
 +
-+DEFINE_PCI_CONFIG(write, byte, u8)
-+DEFINE_PCI_CONFIG(write, word, u16)
-+DEFINE_PCI_CONFIG(write, dword, u32)
 +
-+static int conf_space_read(struct pci_dev *dev,
-+			   const struct config_field_entry *entry,
-+			   int offset, u32 *value)
-+{
-+	int ret = 0;
-+	const struct config_field *field = entry->field;
++static int do_block_io_op(blkif_t *blkif);
++static void dispatch_rw_block_io(blkif_t *blkif,
++				 struct blkif_request *req,
++				 pending_req_t *pending_req);
++static void make_response(blkif_t *blkif, u64 id,
++			  unsigned short op, int st);
 +
-+	*value = 0;
++/******************************************************************
++ * misc small helpers
++ */
++static pending_req_t* alloc_req(void)
++{
++	pending_req_t *req = NULL;
++	unsigned long flags;
 +
-+	switch (field->size) {
-+	case 1:
-+		if (field->u.b.read)
-+			ret = field->u.b.read(dev, offset, (u8 *) value,
-+					      entry->data);
-+		break;
-+	case 2:
-+		if (field->u.w.read)
-+			ret = field->u.w.read(dev, offset, (u16 *) value,
-+					      entry->data);
-+		break;
-+	case 4:
-+		if (field->u.dw.read)
-+			ret = field->u.dw.read(dev, offset, value, entry->data);
-+		break;
++	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
++	if (!list_empty(&blkbk->pending_free)) {
++		req = list_entry(blkbk->pending_free.next, pending_req_t, free_list);
++		list_del(&req->free_list);
 +	}
-+	return ret;
++	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
++	return req;
 +}
 +
-+static int conf_space_write(struct pci_dev *dev,
-+			    const struct config_field_entry *entry,
-+			    int offset, u32 value)
++static void free_req(pending_req_t *req)
 +{
-+	int ret = 0;
-+	const struct config_field *field = entry->field;
++	unsigned long flags;
++	int was_empty;
 +
-+	switch (field->size) {
-+	case 1:
-+		if (field->u.b.write)
-+			ret = field->u.b.write(dev, offset, (u8) value,
-+					       entry->data);
-+		break;
-+	case 2:
-+		if (field->u.w.write)
-+			ret = field->u.w.write(dev, offset, (u16) value,
-+					       entry->data);
-+		break;
-+	case 4:
-+		if (field->u.dw.write)
-+			ret = field->u.dw.write(dev, offset, value,
-+						entry->data);
-+		break;
-+	}
-+	return ret;
++	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
++	was_empty = list_empty(&blkbk->pending_free);
++	list_add(&req->free_list, &blkbk->pending_free);
++	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
++	if (was_empty)
++		wake_up(&blkbk->pending_free_wq);
 +}
 +
-+static inline u32 get_mask(int size)
++static void unplug_queue(blkif_t *blkif)
 +{
-+	if (size == 1)
-+		return 0xff;
-+	else if (size == 2)
-+		return 0xffff;
-+	else
-+		return 0xffffffff;
++	if (blkif->plug == NULL)
++		return;
++	if (blkif->plug->unplug_fn)
++		blkif->plug->unplug_fn(blkif->plug);
++	blk_put_queue(blkif->plug);
++	blkif->plug = NULL;
 +}
 +
-+static inline int valid_request(int offset, int size)
++static void plug_queue(blkif_t *blkif, struct block_device *bdev)
 +{
-+	/* Validate request (no un-aligned requests) */
-+	if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
-+		return 1;
-+	return 0;
++	struct request_queue *q = bdev_get_queue(bdev);
++
++	if (q == blkif->plug)
++		return;
++	unplug_queue(blkif);
++	blk_get_queue(q);
++	blkif->plug = q;
 +}
 +
-+static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
-+			      int offset)
++static void fast_flush_area(pending_req_t *req)
 +{
-+	if (offset >= 0) {
-+		new_val_mask <<= (offset * 8);
-+		new_val <<= (offset * 8);
-+	} else {
-+		new_val_mask >>= (offset * -8);
-+		new_val >>= (offset * -8);
++	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
++	unsigned int i, invcount = 0;
++	grant_handle_t handle;
++	int ret;
++
++	for (i = 0; i < req->nr_pages; i++) {
++		handle = pending_handle(req, i);
++		if (handle == BLKBACK_INVALID_HANDLE)
++			continue;
++		gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
++				    GNTMAP_host_map, handle);
++		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
++		invcount++;
 +	}
-+	val = (val & ~new_val_mask) | (new_val & new_val_mask);
 +
-+	return val;
++	ret = HYPERVISOR_grant_table_op(
++		GNTTABOP_unmap_grant_ref, unmap, invcount);
++	BUG_ON(ret);
++	/* Note, we use invcount, so nr->pages, so we can't index
++	 * using vaddr(req, i). */
++	for (i = 0; i < invcount; i++) {
++		ret = m2p_remove_override(
++			virt_to_page(unmap[i].host_addr), false);
++		if (ret) {
++			printk(KERN_ALERT "Failed to remove M2P override for " \
++				"%lx\n", (unsigned long)unmap[i].host_addr);
++			continue;
++		}
++	}
 +}
 +
-+static int pcibios_err_to_errno(int err)
++/******************************************************************
++ * SCHEDULER FUNCTIONS
++ */
++
++static void print_stats(blkif_t *blkif)
 +{
-+	switch (err) {
-+	case PCIBIOS_SUCCESSFUL:
-+		return XEN_PCI_ERR_success;
-+	case PCIBIOS_DEVICE_NOT_FOUND:
-+		return XEN_PCI_ERR_dev_not_found;
-+	case PCIBIOS_BAD_REGISTER_NUMBER:
-+		return XEN_PCI_ERR_invalid_offset;
-+	case PCIBIOS_FUNC_NOT_SUPPORTED:
-+		return XEN_PCI_ERR_not_implemented;
-+	case PCIBIOS_SET_FAILED:
-+		return XEN_PCI_ERR_access_denied;
-+	}
-+	return err;
++	printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d  |  br %4d\n",
++	       current->comm, blkif->st_oo_req,
++	       blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
++	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
++	blkif->st_rd_req = 0;
++	blkif->st_wr_req = 0;
++	blkif->st_oo_req = 0;
 +}
 +
-+int pciback_config_read(struct pci_dev *dev, int offset, int size,
-+			u32 * ret_val)
++int blkif_schedule(void *arg)
 +{
-+	int err = 0;
-+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+	const struct config_field_entry *cfg_entry;
-+	const struct config_field *field;
-+	int req_start, req_end, field_start, field_end;
-+	/* if read fails for any reason, return 0 (as if device didn't respond) */
-+	u32 value = 0, tmp_val;
++	blkif_t *blkif = arg;
++	struct vbd *vbd = &blkif->vbd;
 +
-+	if (unlikely(verbose_request))
-+		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n",
-+		       pci_name(dev), size, offset);
++	blkif_get(blkif);
 +
-+	if (!valid_request(offset, size)) {
-+		err = XEN_PCI_ERR_invalid_offset;
-+		goto out;
-+	}
++	if (debug_lvl)
++		printk(KERN_DEBUG "%s: started\n", current->comm);
 +
-+	/* Get the real value first, then modify as appropriate */
-+	switch (size) {
-+	case 1:
-+		err = pci_read_config_byte(dev, offset, (u8 *) & value);
-+		break;
-+	case 2:
-+		err = pci_read_config_word(dev, offset, (u16 *) & value);
-+		break;
-+	case 4:
-+		err = pci_read_config_dword(dev, offset, &value);
-+		break;
++	while (!kthread_should_stop()) {
++		if (try_to_freeze())
++			continue;
++		if (unlikely(vbd->size != vbd_size(vbd)))
++			vbd_resize(blkif);
++
++		wait_event_interruptible(
++			blkif->wq,
++			blkif->waiting_reqs || kthread_should_stop());
++		wait_event_interruptible(
++			blkbk->pending_free_wq,
++			!list_empty(&blkbk->pending_free) || kthread_should_stop());
++
++		blkif->waiting_reqs = 0;
++		smp_mb(); /* clear flag *before* checking for work */
++
++		if (do_block_io_op(blkif))
++			blkif->waiting_reqs = 1;
++		unplug_queue(blkif);
++
++		if (log_stats && time_after(jiffies, blkif->st_print))
++			print_stats(blkif);
 +	}
 +
-+	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-+		field = cfg_entry->field;
++	if (log_stats)
++		print_stats(blkif);
++	if (debug_lvl)
++		printk(KERN_DEBUG "%s: exiting\n", current->comm);
 +
-+		req_start = offset;
-+		req_end = offset + size;
-+		field_start = OFFSET(cfg_entry);
-+		field_end = OFFSET(cfg_entry) + field->size;
++	blkif->xenblkd = NULL;
++	blkif_put(blkif);
 +
-+		if ((req_start >= field_start && req_start < field_end)
-+		    || (req_end > field_start && req_end <= field_end)) {
-+			err = conf_space_read(dev, cfg_entry, field_start,
-+					      &tmp_val);
-+			if (err)
-+				goto out;
++	return 0;
++}
 +
-+			value = merge_value(value, tmp_val,
-+					    get_mask(field->size),
-+					    field_start - req_start);
-+		}
++/******************************************************************
++ * COMPLETION CALLBACK -- Called as bh->b_end_io()
++ */
++
++static void __end_block_io_op(pending_req_t *pending_req, int error)
++{
++	/* An error fails the entire request. */
++	if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
++	    (error == -EOPNOTSUPP)) {
++		DPRINTK("blkback: write barrier op failed, not supported\n");
++		blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
++		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
++	} else if (error) {
++		DPRINTK("Buffer not up-to-date at end of operation, "
++			"error=%d\n", error);
++		pending_req->status = BLKIF_RSP_ERROR;
 +	}
 +
-+      out:
-+	if (unlikely(verbose_request))
-+		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
-+		       pci_name(dev), size, offset, value);
++	if (atomic_dec_and_test(&pending_req->pendcnt)) {
++		fast_flush_area(pending_req);
++		make_response(pending_req->blkif, pending_req->id,
++			      pending_req->operation, pending_req->status);
++		blkif_put(pending_req->blkif);
++		free_req(pending_req);
++	}
++}
 +
-+	*ret_val = value;
-+	return pcibios_err_to_errno(err);
++static void end_block_io_op(struct bio *bio, int error)
++{
++	__end_block_io_op(bio->bi_private, error);
++	bio_put(bio);
 +}
 +
-+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
++
++/******************************************************************************
++ * NOTIFICATION FROM GUEST OS.
++ */
++
++static void blkif_notify_work(blkif_t *blkif)
 +{
-+	int err = 0, handled = 0;
-+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+	const struct config_field_entry *cfg_entry;
-+	const struct config_field *field;
-+	u32 tmp_val;
-+	int req_start, req_end, field_start, field_end;
++	blkif->waiting_reqs = 1;
++	wake_up(&blkif->wq);
++}
 +
-+	if (unlikely(verbose_request))
-+		printk(KERN_DEBUG
-+		       "pciback: %s: write request %d bytes at 0x%x = %x\n",
-+		       pci_name(dev), size, offset, value);
++irqreturn_t blkif_be_int(int irq, void *dev_id)
++{
++	blkif_notify_work(dev_id);
++	return IRQ_HANDLED;
++}
 +
-+	if (!valid_request(offset, size))
-+		return XEN_PCI_ERR_invalid_offset;
 +
-+	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-+		field = cfg_entry->field;
 +
-+		req_start = offset;
-+		req_end = offset + size;
-+		field_start = OFFSET(cfg_entry);
-+		field_end = OFFSET(cfg_entry) + field->size;
++/******************************************************************
++ * DOWNWARD CALLS -- These interface with the block-device layer proper.
++ */
 +
-+		if ((req_start >= field_start && req_start < field_end)
-+		    || (req_end > field_start && req_end <= field_end)) {
-+			tmp_val = 0;
++static int do_block_io_op(blkif_t *blkif)
++{
++	union blkif_back_rings *blk_rings = &blkif->blk_rings;
++	struct blkif_request req;
++	pending_req_t *pending_req;
++	RING_IDX rc, rp;
++	int more_to_do = 0;
 +
-+			err = pciback_config_read(dev, field_start,
-+						  field->size, &tmp_val);
-+			if (err)
-+				break;
++	rc = blk_rings->common.req_cons;
++	rp = blk_rings->common.sring->req_prod;
++	rmb(); /* Ensure we see queued requests up to 'rp'. */
 +
-+			tmp_val = merge_value(tmp_val, value, get_mask(size),
-+					      req_start - field_start);
++	while (rc != rp) {
 +
-+			err = conf_space_write(dev, cfg_entry, field_start,
-+					       tmp_val);
++		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
++			break;
 +
-+			/* handled is set true here, but not every byte
-+			 * may have been written! Properly detecting if
-+			 * every byte is handled is unnecessary as the
-+			 * flag is used to detect devices that need
-+			 * special helpers to work correctly.
-+			 */
-+			handled = 1;
++		if (kthread_should_stop()) {
++			more_to_do = 1;
++			break;
 +		}
-+	}
 +
-+	if (!handled && !err) {
-+		/* By default, anything not specificially handled above is
-+		 * read-only. The permissive flag changes this behavior so
-+		 * that anything not specifically handled above is writable.
-+		 * This means that some fields may still be read-only because
-+		 * they have entries in the config_field list that intercept
-+		 * the write and do nothing. */
-+		if (dev_data->permissive || permissive) {
-+			switch (size) {
-+			case 1:
-+				err = pci_write_config_byte(dev, offset,
-+							    (u8) value);
-+				break;
-+			case 2:
-+				err = pci_write_config_word(dev, offset,
-+							    (u16) value);
-+				break;
-+			case 4:
-+				err = pci_write_config_dword(dev, offset,
-+							     (u32) value);
-+				break;
-+			}
-+		} else if (!dev_data->warned_on_write) {
-+			dev_data->warned_on_write = 1;
-+			dev_warn(&dev->dev, "Driver tried to write to a "
-+				 "read-only configuration space field at offset "
-+				 "0x%x, size %d. This may be harmless, but if "
-+				 "you have problems with your device:\n"
-+				 "1) see permissive attribute in sysfs\n"
-+				 "2) report problems to the xen-devel "
-+				 "mailing list along with details of your "
-+				 "device obtained from lspci.\n", offset, size);
++		pending_req = alloc_req();
++		if (NULL == pending_req) {
++			blkif->st_oo_req++;
++			more_to_do = 1;
++			break;
++		}
++
++		switch (blkif->blk_protocol) {
++		case BLKIF_PROTOCOL_NATIVE:
++			memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
++			break;
++		case BLKIF_PROTOCOL_X86_32:
++			blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
++			break;
++		case BLKIF_PROTOCOL_X86_64:
++			blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
++			break;
++		default:
++			BUG();
++		}
++		blk_rings->common.req_cons = ++rc; /* before make_response() */
++
++		/* Apply all sanity checks to /private copy/ of request. */
++		barrier();
++
++		switch (req.operation) {
++		case BLKIF_OP_READ:
++			blkif->st_rd_req++;
++			dispatch_rw_block_io(blkif, &req, pending_req);
++			break;
++		case BLKIF_OP_WRITE_BARRIER:
++			blkif->st_br_req++;
++			/* fall through */
++		case BLKIF_OP_WRITE:
++			blkif->st_wr_req++;
++			dispatch_rw_block_io(blkif, &req, pending_req);
++			break;
++		default:
++			/* A good sign something is wrong: sleep for a while to
++			 * avoid excessive CPU consumption by a bad guest. */
++			msleep(1);
++			DPRINTK("error: unknown block io operation [%d]\n",
++				req.operation);
++			make_response(blkif, req.id, req.operation,
++				      BLKIF_RSP_ERROR);
++			free_req(pending_req);
++			break;
 +		}
++
++		/* Yield point for this unbounded loop. */
++		cond_resched();
 +	}
 +
-+	return pcibios_err_to_errno(err);
++	return more_to_do;
 +}
 +
-+void pciback_config_free_dyn_fields(struct pci_dev *dev)
++static void dispatch_rw_block_io(blkif_t *blkif,
++				 struct blkif_request *req,
++				 pending_req_t *pending_req)
 +{
-+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+	struct config_field_entry *cfg_entry, *t;
-+	const struct config_field *field;
++	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
++	struct phys_req preq;
++	struct {
++		unsigned long buf; unsigned int nsec;
++	} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
++	unsigned int nseg;
++	struct bio *bio = NULL;
++	int ret, i;
++	int operation;
++
++	switch (req->operation) {
++	case BLKIF_OP_READ:
++		operation = READ;
++		break;
++	case BLKIF_OP_WRITE:
++		operation = WRITE;
++		break;
++	case BLKIF_OP_WRITE_BARRIER:
++		operation = REQ_FLUSH | REQ_FUA;
++		break;
++	default:
++		operation = 0; /* make gcc happy */
++		BUG();
++	}
 +
-+	dev_dbg(&dev->dev,
-+		"free-ing dynamically allocated virtual configuration space fields\n");
-+	if (!dev_data)
-+		return;
++	/* Check that number of segments is sane. */
++	nseg = req->nr_segments;
++	if (unlikely(nseg == 0 && operation != (REQ_FLUSH | REQ_FUA)) ||
++	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
++		DPRINTK("Bad number of segments in request (%d)\n", nseg);
++		goto fail_response;
++	}
 +
-+	list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
-+		field = cfg_entry->field;
++	preq.dev           = req->handle;
++	preq.sector_number = req->u.rw.sector_number;
++	preq.nr_sects      = 0;
 +
-+		if (field->clean) {
-+			field->clean((struct config_field *)field);
++	pending_req->blkif     = blkif;
++	pending_req->id        = req->id;
++	pending_req->operation = req->operation;
++	pending_req->status    = BLKIF_RSP_OKAY;
++	pending_req->nr_pages  = nseg;
 +
-+			if (cfg_entry->data)
-+				kfree(cfg_entry->data);
++	for (i = 0; i < nseg; i++) {
++		uint32_t flags;
 +
-+			list_del(&cfg_entry->list);
-+			kfree(cfg_entry);
++		seg[i].nsec = req->u.rw.seg[i].last_sect -
++			req->u.rw.seg[i].first_sect + 1;
++
++		if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
++		    (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
++			goto fail_response;
++		preq.nr_sects += seg[i].nsec;
++
++		flags = GNTMAP_host_map;
++		if (operation != READ)
++			flags |= GNTMAP_readonly;
++		gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
++				  req->u.rw.seg[i].gref, blkif->domid);
++	}
++
++	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
++	BUG_ON(ret);
++
++	for (i = 0; i < nseg; i++) {
++		if (unlikely(map[i].status != 0)) {
++			DPRINTK("invalid buffer -- could not remap it\n");
++			map[i].handle = BLKBACK_INVALID_HANDLE;
++			ret |= 1;
++		}
++
++		pending_handle(pending_req, i) = map[i].handle;
++
++		if (ret)
++			continue;
++		
++		ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
++			blkbk->pending_page(pending_req, i), false);
++		if (ret) {
++			printk(KERN_ALERT "Failed to install M2P override for"\
++				" %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret);
++			continue;
 +		}
 +
++		seg[i].buf  = map[i].dev_bus_addr |
++			(req->u.rw.seg[i].first_sect << 9);
 +	}
-+}
 +
-+void pciback_config_reset_dev(struct pci_dev *dev)
-+{
-+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+	const struct config_field_entry *cfg_entry;
-+	const struct config_field *field;
++	if (ret)
++		goto fail_flush;
++
++	if (vbd_translate(&preq, blkif, operation) != 0) {
++		DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
++			operation == READ ? "read" : "write",
++			preq.sector_number,
++			preq.sector_number + preq.nr_sects, preq.dev);
++		goto fail_flush;
++	}
++
++	plug_queue(blkif, preq.bdev);
++	atomic_set(&pending_req->pendcnt, 1);
++	blkif_get(blkif);
++
++	for (i = 0; i < nseg; i++) {
++		if (((int)preq.sector_number|(int)seg[i].nsec) &
++		    ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
++			DPRINTK("Misaligned I/O request from domain %d",
++				blkif->domid);
++			goto fail_put_bio;
++		}
++
++		while ((bio == NULL) ||
++		       (bio_add_page(bio,
++				     blkbk->pending_page(pending_req, i),
++				     seg[i].nsec << 9,
++				     seg[i].buf & ~PAGE_MASK) == 0)) {
++			if (bio) {
++				atomic_inc(&pending_req->pendcnt);
++				submit_bio(operation, bio);
++			}
 +
-+	dev_dbg(&dev->dev, "resetting virtual configuration space\n");
-+	if (!dev_data)
-+		return;
++			bio = bio_alloc(GFP_KERNEL, nseg-i);
++			if (unlikely(bio == NULL))
++				goto fail_put_bio;
 +
-+	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-+		field = cfg_entry->field;
++			bio->bi_bdev    = preq.bdev;
++			bio->bi_private = pending_req;
++			bio->bi_end_io  = end_block_io_op;
++			bio->bi_sector  = preq.sector_number;
++		}
 +
-+		if (field->reset)
-+			field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
++		preq.sector_number += seg[i].nsec;
++	}
++
++	if (!bio) {
++		BUG_ON(operation != (REQ_FLUSH | REQ_FUA));
++		bio = bio_alloc(GFP_KERNEL, 0);
++		if (unlikely(bio == NULL))
++			goto fail_put_bio;
++
++		bio->bi_bdev    = preq.bdev;
++		bio->bi_private = pending_req;
++		bio->bi_end_io  = end_block_io_op;
++		bio->bi_sector  = -1;
 +	}
++
++	submit_bio(operation, bio);
++
++	if (operation == READ)
++		blkif->st_rd_sect += preq.nr_sects;
++	else if (operation == WRITE || operation == (REQ_FLUSH | REQ_FUA))
++		blkif->st_wr_sect += preq.nr_sects;
++
++	return;
++
++ fail_flush:
++	fast_flush_area(pending_req);
++ fail_response:
++	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
++	free_req(pending_req);
++	msleep(1); /* back off a bit */
++	return;
++
++ fail_put_bio:
++	__end_block_io_op(pending_req, -EINVAL);
++	if (bio)
++		bio_put(bio);
++	unplug_queue(blkif);
++	msleep(1); /* back off a bit */
++	return;
 +}
 +
-+void pciback_config_free_dev(struct pci_dev *dev)
-+{
-+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+	struct config_field_entry *cfg_entry, *t;
-+	const struct config_field *field;
 +
-+	dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
-+	if (!dev_data)
-+		return;
 +
-+	list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
-+		list_del(&cfg_entry->list);
++/******************************************************************
++ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
++ */
 +
-+		field = cfg_entry->field;
 +
-+		if (field->release)
-+			field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
++static void make_response(blkif_t *blkif, u64 id,
++			  unsigned short op, int st)
++{
++	struct blkif_response  resp;
++	unsigned long     flags;
++	union blkif_back_rings *blk_rings = &blkif->blk_rings;
++	int more_to_do = 0;
++	int notify;
 +
-+		kfree(cfg_entry);
++	resp.id        = id;
++	resp.operation = op;
++	resp.status    = st;
++
++	spin_lock_irqsave(&blkif->blk_ring_lock, flags);
++	/* Place on the response ring for the relevant domain. */
++	switch (blkif->blk_protocol) {
++	case BLKIF_PROTOCOL_NATIVE:
++		memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
++		       &resp, sizeof(resp));
++		break;
++	case BLKIF_PROTOCOL_X86_32:
++		memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
++		       &resp, sizeof(resp));
++		break;
++	case BLKIF_PROTOCOL_X86_64:
++		memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
++		       &resp, sizeof(resp));
++		break;
++	default:
++		BUG();
 +	}
++	blk_rings->common.rsp_prod_pvt++;
++	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
++	if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
++		/*
++		 * Tail check for pending requests. Allows frontend to avoid
++		 * notifications if requests are already in flight (lower
++		 * overheads and promotes batching).
++		 */
++		RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
++
++	} else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
++		more_to_do = 1;
++	}
++
++	spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
++
++	if (more_to_do)
++		blkif_notify_work(blkif);
++	if (notify)
++		notify_remote_via_irq(blkif->irq);
 +}
 +
-+int pciback_config_add_field_offset(struct pci_dev *dev,
-+				    const struct config_field *field,
-+				    unsigned int base_offset)
++static int __init blkif_init(void)
 +{
-+	int err = 0;
-+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+	struct config_field_entry *cfg_entry;
-+	void *tmp;
++	int i, mmap_pages;
++	int rc = 0;
 +
-+	cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
-+	if (!cfg_entry) {
-+		err = -ENOMEM;
-+		goto out;
++	if (!xen_pv_domain())
++		return -ENODEV;
++
++	blkbk = (struct xen_blkbk *)vmalloc(sizeof(struct xen_blkbk));
++	if (!blkbk) {
++		printk(KERN_ALERT "%s: out of memory!\n", __func__);
++		return -ENOMEM;
 +	}
 +
-+	cfg_entry->data = NULL;
-+	cfg_entry->field = field;
-+	cfg_entry->base_offset = base_offset;
++	mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
 +
-+	/* silently ignore duplicate fields */
-+	err = pciback_field_is_dup(dev,OFFSET(cfg_entry));
-+	if (err)
-+		goto out;
++	blkbk->pending_reqs          = kmalloc(sizeof(blkbk->pending_reqs[0]) *
++					blkif_reqs, GFP_KERNEL);
++	blkbk->pending_grant_handles = vzalloc(sizeof(blkbk->pending_grant_handles[0]) *
++					mmap_pages);
++	blkbk->pending_pages         = vzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages);
 +
-+	if (field->init) {
-+		tmp = field->init(dev, OFFSET(cfg_entry));
++	if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) {
++		rc = -ENOMEM;
++		goto out_of_memory;
++	}
 +
-+		if (IS_ERR(tmp)) {
-+			err = PTR_ERR(tmp);
-+			goto out;
++	for (i = 0; i < mmap_pages; i++) {
++		blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
++		blkbk->pending_pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
++		if (blkbk->pending_pages[i] == NULL) {
++			rc = -ENOMEM;
++			goto out_of_memory;
 +		}
-+
-+		cfg_entry->data = tmp;
 +	}
++	rc = blkif_interface_init();
++	if (rc)
++		goto failed_init;
 +
-+	dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
-+		OFFSET(cfg_entry));
-+	list_add_tail(&cfg_entry->list, &dev_data->config_fields);
++	memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
 +
-+      out:
-+	if (err)
-+		kfree(cfg_entry);
++	INIT_LIST_HEAD(&blkbk->pending_free);
++	spin_lock_init(&blkbk->pending_free_lock);
++	init_waitqueue_head(&blkbk->pending_free_wq);
 +
-+	return err;
++	for (i = 0; i < blkif_reqs; i++)
++		list_add_tail(&blkbk->pending_reqs[i].free_list, &blkbk->pending_free);
++
++	rc = blkif_xenbus_init();
++	if (rc)
++		goto failed_init;
++
++	return 0;
++
++ out_of_memory:
++	printk(KERN_ERR "%s: out of memory\n", __func__);
++ failed_init:
++	kfree(blkbk->pending_reqs);
++	vfree(blkbk->pending_grant_handles);
++	for (i = 0; i < mmap_pages; i++) {
++		if (blkbk->pending_pages[i])
++			__free_page(blkbk->pending_pages[i]);
++	}
++	vfree(blkbk->pending_pages);
++	vfree(blkbk);
++	blkbk = NULL;
++	return rc;
 +}
 +
-+/* This sets up the device's virtual configuration space to keep track of 
-+ * certain registers (like the base address registers (BARs) so that we can
-+ * keep the client from manipulating them directly.
++module_init(blkif_init);
++
++MODULE_LICENSE("Dual BSD/GPL");
+diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h
+new file mode 100644
+index 0000000..0f91830
+--- /dev/null
++++ b/drivers/xen/blkback/common.h
+@@ -0,0 +1,141 @@
++/*
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#ifndef __BLKIF__BACKEND__COMMON_H__
++#define __BLKIF__BACKEND__COMMON_H__
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/slab.h>
++#include <linux/blkdev.h>
++#include <linux/vmalloc.h>
++#include <linux/wait.h>
++#include <asm/io.h>
++#include <asm/setup.h>
++#include <asm/pgalloc.h>
++#include <asm/hypervisor.h>
++#include <xen/blkif.h>
++#include <xen/grant_table.h>
++#include <xen/xenbus.h>
++
++#define DPRINTK(_f, _a...)			\
++	pr_debug("(file=%s, line=%d) " _f,	\
++		 __FILE__ , __LINE__ , ## _a )
++
++struct vbd {
++	blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
++	unsigned char  readonly;    /* Non-zero -> read-only */
++	unsigned char  type;        /* VDISK_xxx */
++	u32            pdevice;     /* phys device that this vbd maps to */
++	struct block_device *bdev;
++	sector_t       size;        /* Cached size parameter */
++};
++
++struct backend_info;
++
++typedef struct blkif_st {
++	/* Unique identifier for this interface. */
++	domid_t           domid;
++	unsigned int      handle;
++	/* Physical parameters of the comms window. */
++	unsigned int      irq;
++	/* Comms information. */
++	enum blkif_protocol blk_protocol;
++	union blkif_back_rings blk_rings;
++	struct vm_struct *blk_ring_area;
++	/* The VBD attached to this interface. */
++	struct vbd        vbd;
++	/* Back pointer to the backend_info. */
++	struct backend_info *be;
++	/* Private fields. */
++	spinlock_t       blk_ring_lock;
++	atomic_t         refcnt;
++
++	wait_queue_head_t   wq;
++	struct task_struct  *xenblkd;
++	unsigned int        waiting_reqs;
++	struct request_queue     *plug;
++
++	/* statistics */
++	unsigned long       st_print;
++	int                 st_rd_req;
++	int                 st_wr_req;
++	int                 st_oo_req;
++	int                 st_br_req;
++	int                 st_rd_sect;
++	int                 st_wr_sect;
++
++	wait_queue_head_t waiting_to_free;
++
++	grant_handle_t shmem_handle;
++	grant_ref_t    shmem_ref;
++} blkif_t;
++
++blkif_t *blkif_alloc(domid_t domid);
++void blkif_disconnect(blkif_t *blkif);
++void blkif_free(blkif_t *blkif);
++int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
++void vbd_resize(blkif_t *blkif);
++
++#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
++#define blkif_put(_b)					\
++	do {						\
++		if (atomic_dec_and_test(&(_b)->refcnt))	\
++			wake_up(&(_b)->waiting_to_free);\
++	} while (0)
++
++/* Create a vbd. */
++int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
++	       unsigned minor, int readonly, int cdrom);
++void vbd_free(struct vbd *vbd);
++
++unsigned long long vbd_size(struct vbd *vbd);
++unsigned int vbd_info(struct vbd *vbd);
++unsigned long vbd_secsize(struct vbd *vbd);
++
++struct phys_req {
++	unsigned short       dev;
++	unsigned short       nr_sects;
++	struct block_device *bdev;
++	blkif_sector_t       sector_number;
++};
++
++int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
++
++int blkif_interface_init(void);
++
++int blkif_xenbus_init(void);
++
++irqreturn_t blkif_be_int(int irq, void *dev_id);
++int blkif_schedule(void *arg);
++
++int blkback_barrier(struct xenbus_transaction xbt,
++		    struct backend_info *be, int state);
++
++struct xenbus_device *blkback_xenbus(struct backend_info *be);
++
++#endif /* __BLKIF__BACKEND__COMMON_H__ */
+diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c
+new file mode 100644
+index 0000000..e397a41
+--- /dev/null
++++ b/drivers/xen/blkback/interface.c
+@@ -0,0 +1,186 @@
++/******************************************************************************
++ * arch/xen/drivers/blkif/backend/interface.c
++ *
++ * Block-device interface management.
++ *
++ * Copyright (c) 2004, Keir Fraser
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
 + */
-+int pciback_config_init_dev(struct pci_dev *dev)
-+{
-+	int err = 0;
-+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
 +
-+	dev_dbg(&dev->dev, "initializing virtual configuration space\n");
++#include "common.h"
++#include <xen/events.h>
++#include <xen/grant_table.h>
++#include <linux/kthread.h>
 +
-+	INIT_LIST_HEAD(&dev_data->config_fields);
++static struct kmem_cache *blkif_cachep;
 +
-+	err = pciback_config_header_add_fields(dev);
-+	if (err)
-+		goto out;
++blkif_t *blkif_alloc(domid_t domid)
++{
++	blkif_t *blkif;
 +
-+	err = pciback_config_capability_add_fields(dev);
-+	if (err)
-+		goto out;
++	blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
++	if (!blkif)
++		return ERR_PTR(-ENOMEM);
 +
-+	err = pciback_config_quirks_init(dev);
++	memset(blkif, 0, sizeof(*blkif));
++	blkif->domid = domid;
++	spin_lock_init(&blkif->blk_ring_lock);
++	atomic_set(&blkif->refcnt, 1);
++	init_waitqueue_head(&blkif->wq);
++	blkif->st_print = jiffies;
++	init_waitqueue_head(&blkif->waiting_to_free);
 +
-+      out:
-+	return err;
++	return blkif;
 +}
 +
-+int pciback_config_init(void)
++static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
 +{
-+	return pciback_config_capability_init();
-+}
-diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h
-new file mode 100644
-index 0000000..fe746ef
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space.h
-@@ -0,0 +1,126 @@
-+/*
-+ * PCI Backend - Common data structures for overriding the configuration space
-+ *
-+ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
-+ */
-+
-+#ifndef __XEN_PCIBACK_CONF_SPACE_H__
-+#define __XEN_PCIBACK_CONF_SPACE_H__
-+
-+#include <linux/list.h>
-+#include <linux/err.h>
-+
-+/* conf_field_init can return an errno in a ptr with ERR_PTR() */
-+typedef void *(*conf_field_init) (struct pci_dev * dev, int offset);
-+typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data);
-+typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data);
-+
-+typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value,
-+				 void *data);
-+typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value,
-+				void *data);
-+typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value,
-+				void *data);
-+typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value,
-+				void *data);
-+typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value,
-+			       void *data);
-+typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value,
-+			       void *data);
++	struct gnttab_map_grant_ref op;
 +
-+/* These are the fields within the configuration space which we
-+ * are interested in intercepting reads/writes to and changing their
-+ * values.
-+ */
-+struct config_field {
-+	unsigned int offset;
-+	unsigned int size;
-+	unsigned int mask;
-+	conf_field_init init;
-+	conf_field_reset reset;
-+	conf_field_free release;
-+	void (*clean) (struct config_field * field);
-+	union {
-+		struct {
-+			conf_dword_write write;
-+			conf_dword_read read;
-+		} dw;
-+		struct {
-+			conf_word_write write;
-+			conf_word_read read;
-+		} w;
-+		struct {
-+			conf_byte_write write;
-+			conf_byte_read read;
-+		} b;
-+	} u;
-+	struct list_head list;
-+};
++	gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
++			  GNTMAP_host_map, shared_page, blkif->domid);
 +
-+struct config_field_entry {
-+	struct list_head list;
-+	const struct config_field *field;
-+	unsigned int base_offset;
-+	void *data;
-+};
++	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
++		BUG();
 +
-+#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
++	if (op.status) {
++		DPRINTK(" Grant table operation failure !\n");
++		return op.status;
++	}
 +
-+/* Add fields to a device - the add_fields macro expects to get a pointer to
-+ * the first entry in an array (of which the ending is marked by size==0)
-+ */
-+int pciback_config_add_field_offset(struct pci_dev *dev,
-+				    const struct config_field *field,
-+				    unsigned int offset);
++	blkif->shmem_ref = shared_page;
++	blkif->shmem_handle = op.handle;
 +
-+static inline int pciback_config_add_field(struct pci_dev *dev,
-+					   const struct config_field *field)
-+{
-+	return pciback_config_add_field_offset(dev, field, 0);
++	return 0;
 +}
 +
-+static inline int pciback_config_add_fields(struct pci_dev *dev,
-+					    const struct config_field *field)
++static void unmap_frontend_page(blkif_t *blkif)
 +{
-+	int i, err = 0;
-+	for (i = 0; field[i].size != 0; i++) {
-+		err = pciback_config_add_field(dev, &field[i]);
-+		if (err)
-+			break;
-+	}
-+	return err;
-+}
++	struct gnttab_unmap_grant_ref op;
 +
-+static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
-+						   const struct config_field *field,
-+						   unsigned int offset)
-+{
-+	int i, err = 0;
-+	for (i = 0; field[i].size != 0; i++) {
-+		err = pciback_config_add_field_offset(dev, &field[i], offset);
-+		if (err)
-+			break;
-+	}
-+	return err;
-+}
++	gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
++			    GNTMAP_host_map, blkif->shmem_handle);
 +
-+/* Read/Write the real configuration space */
-+int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value,
-+			     void *data);
-+int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value,
-+			     void *data);
-+int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value,
-+			      void *data);
-+int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
-+			      void *data);
-+int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value,
-+			      void *data);
-+int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value,
-+			       void *data);
++	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
++		BUG();
++}
 +
-+int pciback_config_capability_init(void);
++int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
++{
++	int err;
 +
-+int pciback_config_header_add_fields(struct pci_dev *dev);
-+int pciback_config_capability_add_fields(struct pci_dev *dev);
++	/* Already connected through? */
++	if (blkif->irq)
++		return 0;
 +
-+#endif				/* __XEN_PCIBACK_CONF_SPACE_H__ */
-diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c
-new file mode 100644
-index 0000000..50efca4
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_capability.c
-@@ -0,0 +1,69 @@
-+/*
-+ * PCI Backend - Handles the virtual fields found on the capability lists
-+ *               in the configuration space.
-+ *
-+ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
-+ */
++	if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
++		return -ENOMEM;
 +
-+#include <linux/kernel.h>
-+#include <linux/pci.h>
-+#include "pciback.h"
-+#include "conf_space.h"
-+#include "conf_space_capability.h"
++	err = map_frontend_page(blkif, shared_page);
++	if (err) {
++		free_vm_area(blkif->blk_ring_area);
++		return err;
++	}
 +
-+static LIST_HEAD(capabilities);
++	switch (blkif->blk_protocol) {
++	case BLKIF_PROTOCOL_NATIVE:
++	{
++		struct blkif_sring *sring;
++		sring = (struct blkif_sring *)blkif->blk_ring_area->addr;
++		BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
++		break;
++	}
++	case BLKIF_PROTOCOL_X86_32:
++	{
++		struct blkif_x86_32_sring *sring_x86_32;
++		sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr;
++		BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
++		break;
++	}
++	case BLKIF_PROTOCOL_X86_64:
++	{
++		struct blkif_x86_64_sring *sring_x86_64;
++		sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr;
++		BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
++		break;
++	}
++	default:
++		BUG();
++	}
 +
-+static const struct config_field caplist_header[] = {
++	err = bind_interdomain_evtchn_to_irqhandler(
++		blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif);
++	if (err < 0)
 +	{
-+	 .offset    = PCI_CAP_LIST_ID,
-+	 .size      = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
-+	 .u.w.read  = pciback_read_config_word,
-+	 .u.w.write = NULL,
-+	},
-+	{}
-+};
++		unmap_frontend_page(blkif);
++		free_vm_area(blkif->blk_ring_area);
++		blkif->blk_rings.common.sring = NULL;
++		return err;
++	}
++	blkif->irq = err;
 +
-+static inline void register_capability(struct pciback_config_capability *cap)
-+{
-+	list_add_tail(&cap->cap_list, &capabilities);
++	return 0;
 +}
 +
-+int pciback_config_capability_add_fields(struct pci_dev *dev)
++void blkif_disconnect(blkif_t *blkif)
 +{
-+	int err = 0;
-+	struct pciback_config_capability *cap;
-+	int cap_offset;
++	if (blkif->xenblkd) {
++		kthread_stop(blkif->xenblkd);
++		blkif->xenblkd = NULL;
++	}
 +
-+	list_for_each_entry(cap, &capabilities, cap_list) {
-+		cap_offset = pci_find_capability(dev, cap->capability);
-+		if (cap_offset) {
-+			dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
-+				cap->capability, cap_offset);
++	atomic_dec(&blkif->refcnt);
++	wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
++	atomic_inc(&blkif->refcnt);
 +
-+			err = pciback_config_add_fields_offset(dev,
-+							       caplist_header,
-+							       cap_offset);
-+			if (err)
-+				goto out;
-+			err = pciback_config_add_fields_offset(dev,
-+							       cap->fields,
-+							       cap_offset);
-+			if (err)
-+				goto out;
-+		}
++	if (blkif->irq) {
++		unbind_from_irqhandler(blkif->irq, blkif);
++		blkif->irq = 0;
 +	}
 +
-+      out:
-+	return err;
++	if (blkif->blk_rings.common.sring) {
++		unmap_frontend_page(blkif);
++		free_vm_area(blkif->blk_ring_area);
++		blkif->blk_rings.common.sring = NULL;
++	}
 +}
 +
-+extern struct pciback_config_capability pciback_config_capability_vpd;
-+extern struct pciback_config_capability pciback_config_capability_pm;
++void blkif_free(blkif_t *blkif)
++{
++	if (!atomic_dec_and_test(&blkif->refcnt))
++		BUG();
++	kmem_cache_free(blkif_cachep, blkif);
++}
 +
-+int pciback_config_capability_init(void)
++int __init blkif_interface_init(void)
 +{
-+	register_capability(&pciback_config_capability_vpd);
-+	register_capability(&pciback_config_capability_pm);
++	blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
++					 0, 0, NULL);
++	if (!blkif_cachep)
++		return -ENOMEM;
 +
 +	return 0;
 +}
-diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h
+diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c
 new file mode 100644
-index 0000000..823392e
+index 0000000..8c91a2f
 --- /dev/null
-+++ b/drivers/xen/pciback/conf_space_capability.h
-@@ -0,0 +1,23 @@
-+/*
-+ * PCI Backend - Data structures for special overlays for structures on
-+ *               the capability list.
++++ b/drivers/xen/blkback/vbd.c
+@@ -0,0 +1,163 @@
++/******************************************************************************
++ * blkback/vbd.c
 + *
-+ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
-+ */
-+
-+#ifndef __PCIBACK_CONFIG_CAPABILITY_H__
-+#define __PCIBACK_CONFIG_CAPABILITY_H__
-+
-+#include <linux/pci.h>
-+#include <linux/list.h>
-+
-+struct pciback_config_capability {
-+	struct list_head cap_list;
-+
-+	int capability;
-+
-+	/* If the device has the capability found above, add these fields */
-+	const struct config_field *fields;
-+};
-+
-+#endif
-diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
-new file mode 100644
-index 0000000..762e396
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_capability_msi.c
-@@ -0,0 +1,79 @@
-+/*
-+ * PCI Backend -- Configuration overlay for MSI capability
++ * Routines for managing virtual block devices (VBDs).
++ *
++ * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
 + */
-+#include <linux/pci.h>
-+#include <linux/slab.h>
-+#include "conf_space.h"
-+#include "conf_space_capability.h"
-+#include <xen/interface/io/pciif.h>
-+#include "pciback.h"
-+
-+int pciback_enable_msi(struct pciback_device *pdev,
-+		struct pci_dev *dev, struct xen_pci_op *op)
-+{
-+	int otherend = pdev->xdev->otherend_id;
-+	int status;
 +
-+	status = pci_enable_msi(dev);
++#include "common.h"
 +
-+	if (status) {
-+		printk("error enable msi for guest %x status %x\n", otherend, status);
-+		op->value = 0;
-+		return XEN_PCI_ERR_op_failed;
-+	}
++#define vbd_sz(_v)   ((_v)->bdev->bd_part ?				\
++		      (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
 +
-+	op->value = dev->irq;
-+	return 0;
++unsigned long long vbd_size(struct vbd *vbd)
++{
++	return vbd_sz(vbd);
 +}
 +
-+int pciback_disable_msi(struct pciback_device *pdev,
-+		struct pci_dev *dev, struct xen_pci_op *op)
++unsigned int vbd_info(struct vbd *vbd)
 +{
-+	pci_disable_msi(dev);
++	return vbd->type | (vbd->readonly?VDISK_READONLY:0);
++}
 +
-+	op->value = dev->irq;
-+	return 0;
++unsigned long vbd_secsize(struct vbd *vbd)
++{
++	return bdev_logical_block_size(vbd->bdev);
 +}
 +
-+int pciback_enable_msix(struct pciback_device *pdev,
-+		struct pci_dev *dev, struct xen_pci_op *op)
++int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
++	       unsigned minor, int readonly, int cdrom)
 +{
-+	int i, result;
-+	struct msix_entry *entries;
++	struct vbd *vbd;
++	struct block_device *bdev;
 +
-+	if (op->value > SH_INFO_MAX_VEC)
-+		return -EINVAL;
++	vbd = &blkif->vbd;
++	vbd->handle   = handle;
++	vbd->readonly = readonly;
++	vbd->type     = 0;
 +
-+	entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
-+	if (entries == NULL)
-+		return -ENOMEM;
++	vbd->pdevice  = MKDEV(major, minor);
 +
-+	for (i = 0; i < op->value; i++) {
-+		entries[i].entry = op->msix_entries[i].entry;
-+		entries[i].vector = op->msix_entries[i].vector;
++	bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
++				 FMODE_READ : FMODE_WRITE, NULL);
++
++	if (IS_ERR(bdev)) {
++		DPRINTK("vbd_creat: device %08x could not be opened.\n",
++			vbd->pdevice);
++		return -ENOENT;
 +	}
 +
-+	result = pci_enable_msix(dev, entries, op->value);
++	vbd->bdev = bdev;
++	vbd->size = vbd_size(vbd);
 +
-+	for (i = 0; i < op->value; i++) {
-+		op->msix_entries[i].entry = entries[i].entry;
-+		op->msix_entries[i].vector = entries[i].vector;
++	if (vbd->bdev->bd_disk == NULL) {
++		DPRINTK("vbd_creat: device %08x doesn't exist.\n",
++			vbd->pdevice);
++		vbd_free(vbd);
++		return -ENOENT;
 +	}
 +
-+	kfree(entries);
-+
-+	op->value = result;
++	if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
++		vbd->type |= VDISK_CDROM;
++	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
++		vbd->type |= VDISK_REMOVABLE;
 +
-+	return result;
++	DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
++		handle, blkif->domid);
++	return 0;
 +}
 +
-+int pciback_disable_msix(struct pciback_device *pdev,
-+		struct pci_dev *dev, struct xen_pci_op *op)
++void vbd_free(struct vbd *vbd)
 +{
-+
-+	pci_disable_msix(dev);
-+
-+	op->value = dev->irq;
-+	return 0;
++	if (vbd->bdev)
++		blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
++	vbd->bdev = NULL;
 +}
 +
-diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
-new file mode 100644
-index 0000000..e2f99c7
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_capability_pm.c
-@@ -0,0 +1,126 @@
-+/*
-+ * PCI Backend - Configuration space overlay for power management
-+ *
-+ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
-+ */
-+
-+#include <linux/pci.h>
-+#include "conf_space.h"
-+#include "conf_space_capability.h"
-+
-+static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
-+			void *data)
++int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
 +{
-+	int err;
-+	u16 real_value;
++	struct vbd *vbd = &blkif->vbd;
++	int rc = -EACCES;
 +
-+	err = pci_read_config_word(dev, offset, &real_value);
-+	if (err)
++	if ((operation != READ) && vbd->readonly)
 +		goto out;
 +
-+	*value = real_value & ~PCI_PM_CAP_PME_MASK;
++	if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
++		goto out;
 +
-+      out:
-+	return err;
-+}
++	req->dev  = vbd->pdevice;
++	req->bdev = vbd->bdev;
++	rc = 0;
 +
-+/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
-+ * Can't allow driver domain to enable PMEs - they're shared */
-+#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
++ out:
++	return rc;
++}
 +
-+static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
-+			 void *data)
++void vbd_resize(blkif_t *blkif)
 +{
++	struct vbd *vbd = &blkif->vbd;
++	struct xenbus_transaction xbt;
 +	int err;
-+	u16 old_value;
-+	pci_power_t new_state, old_state;
-+
-+	err = pci_read_config_word(dev, offset, &old_value);
-+	if (err)
-+		goto out;
-+
-+	old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
-+	new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
++	struct xenbus_device *dev = blkback_xenbus(blkif->be);
++	unsigned long long new_size = vbd_size(vbd);
 +
-+	new_value &= PM_OK_BITS;
-+	if ((old_value & PM_OK_BITS) != new_value) {
-+		new_value = (old_value & ~PM_OK_BITS) | new_value;
-+		err = pci_write_config_word(dev, offset, new_value);
-+		if (err)
-+			goto out;
++	printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n",
++		blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice));
++	printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size);
++	vbd->size = new_size;
++again:
++	err = xenbus_transaction_start(&xbt);
++	if (err) {
++		printk(KERN_WARNING "Error starting transaction");
++		return;
 +	}
-+
-+	/* Let pci core handle the power management change */
-+	dev_dbg(&dev->dev, "set power state to %x\n", new_state);
-+	err = pci_set_power_state(dev, new_state);
++	err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu",
++			    vbd_size(vbd));
 +	if (err) {
-+		err = PCIBIOS_SET_FAILED;
-+		goto out;
++		printk(KERN_WARNING "Error writing new size");
++		goto abort;
 +	}
-+
 +	/*
-+	 * Device may lose PCI config info on D3->D0 transition. This
-+	 * is a problem for some guests which will not reset BARs. Even
-+	 * those that have a go will be foiled by our BAR-write handler
-+	 * which will discard the write! Since Linux won't re-init
-+	 * the config space automatically in all cases, we do it here.
-+	 * Future: Should we re-initialise all first 64 bytes of config space?
++	 * Write the current state; we will use this to synchronize
++	 * the front-end. If the current state is "connected" the
++	 * front-end will get the new size information online.
 +	 */
-+	if (new_state == PCI_D0 &&
-+	    (old_state == PCI_D3hot || old_state == PCI_D3cold) &&
-+	    !(old_value & PCI_PM_CTRL_NO_SOFT_RESET))
-+		pci_restore_bars(dev);
++	err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
++	if (err) {
++		printk(KERN_WARNING "Error writing the state");
++		goto abort;
++	}
 +
-+ out:
-+	return err;
++	err = xenbus_transaction_end(xbt, 0);
++	if (err == -EAGAIN)
++		goto again;
++	if (err)
++		printk(KERN_WARNING "Error ending transaction");
++abort:
++	xenbus_transaction_end(xbt, 1);
 +}
+diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c
+new file mode 100644
+index 0000000..031bc3d
+--- /dev/null
++++ b/drivers/xen/blkback/xenbus.c
+@@ -0,0 +1,559 @@
++/*  Xenbus code for blkif backend
++    Copyright (C) 2005 Rusty Russell <rusty at rustcorp.com.au>
++    Copyright (C) 2005 XenSource Ltd
 +
-+/* Ensure PMEs are disabled */
-+static void *pm_ctrl_init(struct pci_dev *dev, int offset)
-+{
-+	int err;
-+	u16 value;
++    This program is free software; you can redistribute it and/or modify
++    it under the terms of the GNU General Public License as published by
++    the Free Software Foundation; either version 2 of the License, or
++    (at your option) any later version.
 +
-+	err = pci_read_config_word(dev, offset, &value);
-+	if (err)
-+		goto out;
++    This program is distributed in the hope that it will be useful,
++    but WITHOUT ANY WARRANTY; without even the implied warranty of
++    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++    GNU General Public License for more details.
 +
-+	if (value & PCI_PM_CTRL_PME_ENABLE) {
-+		value &= ~PCI_PM_CTRL_PME_ENABLE;
-+		err = pci_write_config_word(dev, offset, value);
-+	}
++    You should have received a copy of the GNU General Public License
++    along with this program; if not, write to the Free Software
++    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++*/
 +
-+      out:
-+	return ERR_PTR(err);
-+}
++#include <stdarg.h>
++#include <linux/module.h>
++#include <linux/kthread.h>
++#include "common.h"
 +
-+static const struct config_field caplist_pm[] = {
-+	{
-+		.offset     = PCI_PM_PMC,
-+		.size       = 2,
-+		.u.w.read   = pm_caps_read,
-+	},
-+	{
-+		.offset     = PCI_PM_CTRL,
-+		.size       = 2,
-+		.init       = pm_ctrl_init,
-+		.u.w.read   = pciback_read_config_word,
-+		.u.w.write  = pm_ctrl_write,
-+	},
-+	{
-+		.offset     = PCI_PM_PPB_EXTENSIONS,
-+		.size       = 1,
-+		.u.b.read   = pciback_read_config_byte,
-+	},
-+	{
-+		.offset     = PCI_PM_DATA_REGISTER,
-+		.size       = 1,
-+		.u.b.read   = pciback_read_config_byte,
-+	},
-+	{}
-+};
++#undef DPRINTK
++#define DPRINTK(fmt, args...)				\
++	pr_debug("blkback/xenbus (%s:%d) " fmt ".\n",	\
++		 __FUNCTION__, __LINE__, ##args)
 +
-+struct pciback_config_capability pciback_config_capability_pm = {
-+	.capability = PCI_CAP_ID_PM,
-+	.fields = caplist_pm,
++struct backend_info
++{
++	struct xenbus_device *dev;
++	blkif_t *blkif;
++	struct xenbus_watch backend_watch;
++	unsigned major;
++	unsigned minor;
++	char *mode;
 +};
-diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c
-new file mode 100644
-index 0000000..920cb4a
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_capability_vpd.c
-@@ -0,0 +1,40 @@
-+/*
-+ * PCI Backend - Configuration space overlay for Vital Product Data
-+ *
-+ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
-+ */
 +
-+#include <linux/pci.h>
-+#include "conf_space.h"
-+#include "conf_space_capability.h"
++static void connect(struct backend_info *);
++static int connect_ring(struct backend_info *);
++static void backend_changed(struct xenbus_watch *, const char **,
++			    unsigned int);
 +
-+static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
-+			     void *data)
++struct xenbus_device *blkback_xenbus(struct backend_info *be)
 +{
-+	/* Disallow writes to the vital product data */
-+	if (value & PCI_VPD_ADDR_F)
-+		return PCIBIOS_SET_FAILED;
-+	else
-+		return pci_write_config_word(dev, offset, value);
++	return be->dev;
 +}
 +
-+static const struct config_field caplist_vpd[] = {
-+	{
-+	 .offset    = PCI_VPD_ADDR,
-+	 .size      = 2,
-+	 .u.w.read  = pciback_read_config_word,
-+	 .u.w.write = vpd_address_write,
-+	 },
-+	{
-+	 .offset     = PCI_VPD_DATA,
-+	 .size       = 4,
-+	 .u.dw.read  = pciback_read_config_dword,
-+	 .u.dw.write = NULL,
-+	 },
-+	{}
-+};
-+ 
-+struct pciback_config_capability pciback_config_capability_vpd = {
-+	.capability = PCI_CAP_ID_VPD,
-+	.fields = caplist_vpd,
-+};
-diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
-new file mode 100644
-index 0000000..f794e12
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_header.c
-@@ -0,0 +1,317 @@
-+/*
-+ * PCI Backend - Handles the virtual fields in the configuration space headers.
-+ *
-+ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
-+ */
++static int blkback_name(blkif_t *blkif, char *buf)
++{
++	char *devpath, *devname;
++	struct xenbus_device *dev = blkif->be->dev;
 +
-+#include <linux/kernel.h>
-+#include <linux/pci.h>
-+#include "pciback.h"
-+#include "conf_space.h"
++	devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
++	if (IS_ERR(devpath))
++		return PTR_ERR(devpath);
 +
-+struct pci_bar_info {
-+	u32 val;
-+	u32 len_val;
-+	int which;
-+};
++	if ((devname = strstr(devpath, "/dev/")) != NULL)
++		devname += strlen("/dev/");
++	else
++		devname  = devpath;
 +
-+#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
-+#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
++	snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
++	kfree(devpath);
 +
-+static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
++	return 0;
++}
++
++static void update_blkif_status(blkif_t *blkif)
 +{
 +	int err;
++	char name[TASK_COMM_LEN];
 +
-+	if (!dev->is_enabled && is_enable_cmd(value)) {
-+		if (unlikely(verbose_request))
-+			printk(KERN_DEBUG "pciback: %s: enable\n",
-+			       pci_name(dev));
-+		err = pci_enable_device(dev);
-+		if (err)
-+			return err;
-+	} else if (dev->is_enabled && !is_enable_cmd(value)) {
-+		if (unlikely(verbose_request))
-+			printk(KERN_DEBUG "pciback: %s: disable\n",
-+			       pci_name(dev));
-+		pci_disable_device(dev);
++	/* Not ready to connect? */
++	if (!blkif->irq || !blkif->vbd.bdev)
++		return;
++
++	/* Already connected? */
++	if (blkif->be->dev->state == XenbusStateConnected)
++		return;
++
++	/* Attempt to connect: exit if we fail to. */
++	connect(blkif->be);
++	if (blkif->be->dev->state != XenbusStateConnected)
++		return;
++
++	err = blkback_name(blkif, name);
++	if (err) {
++		xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
++		return;
 +	}
 +
-+	if (!dev->is_busmaster && is_master_cmd(value)) {
-+		if (unlikely(verbose_request))
-+			printk(KERN_DEBUG "pciback: %s: set bus master\n",
-+			       pci_name(dev));
-+		pci_set_master(dev);
++	err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
++	if (err) {
++		xenbus_dev_error(blkif->be->dev, err, "block flush");
++		return;
 +	}
++	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
 +
-+	if (value & PCI_COMMAND_INVALIDATE) {
-+		if (unlikely(verbose_request))
-+			printk(KERN_DEBUG
-+			       "pciback: %s: enable memory-write-invalidate\n",
-+			       pci_name(dev));
-+		err = pci_set_mwi(dev);
-+		if (err) {
-+			printk(KERN_WARNING
-+			       "pciback: %s: cannot enable memory-write-invalidate (%d)\n",
-+			       pci_name(dev), err);
-+			value &= ~PCI_COMMAND_INVALIDATE;
-+		}
++	blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
++	if (IS_ERR(blkif->xenblkd)) {
++		err = PTR_ERR(blkif->xenblkd);
++		blkif->xenblkd = NULL;
++		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
 +	}
++}
 +
-+	return pci_write_config_word(dev, offset, value);
++
++/****************************************************************
++ *  sysfs interface for VBD I/O requests
++ */
++
++#define VBD_SHOW(name, format, args...)					\
++	static ssize_t show_##name(struct device *_dev,			\
++				   struct device_attribute *attr,	\
++				   char *buf)				\
++	{								\
++		struct xenbus_device *dev = to_xenbus_device(_dev);	\
++		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
++									\
++		return sprintf(buf, format, ##args);			\
++	}								\
++	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
++
++VBD_SHOW(oo_req,  "%d\n", be->blkif->st_oo_req);
++VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
++VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
++VBD_SHOW(br_req,  "%d\n", be->blkif->st_br_req);
++VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
++VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
++
++static struct attribute *vbdstat_attrs[] = {
++	&dev_attr_oo_req.attr,
++	&dev_attr_rd_req.attr,
++	&dev_attr_wr_req.attr,
++	&dev_attr_br_req.attr,
++	&dev_attr_rd_sect.attr,
++	&dev_attr_wr_sect.attr,
++	NULL
++};
++
++static struct attribute_group vbdstat_group = {
++	.name = "statistics",
++	.attrs = vbdstat_attrs,
++};
++
++VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
++VBD_SHOW(mode, "%s\n", be->mode);
++
++int xenvbd_sysfs_addif(struct xenbus_device *dev)
++{
++	int error;
++
++	error = device_create_file(&dev->dev, &dev_attr_physical_device);
++ 	if (error)
++		goto fail1;
++
++	error = device_create_file(&dev->dev, &dev_attr_mode);
++	if (error)
++		goto fail2;
++
++	error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group);
++	if (error)
++		goto fail3;
++
++	return 0;
++
++fail3:	sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
++fail2:	device_remove_file(&dev->dev, &dev_attr_mode);
++fail1:	device_remove_file(&dev->dev, &dev_attr_physical_device);
++	return error;
 +}
 +
-+static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
++void xenvbd_sysfs_delif(struct xenbus_device *dev)
 +{
-+	struct pci_bar_info *bar = data;
++	sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
++	device_remove_file(&dev->dev, &dev_attr_mode);
++	device_remove_file(&dev->dev, &dev_attr_physical_device);
++}
 +
-+	if (unlikely(!bar)) {
-+		printk(KERN_WARNING "pciback: driver data not found for %s\n",
-+		       pci_name(dev));
-+		return XEN_PCI_ERR_op_failed;
-+	}
++static int blkback_remove(struct xenbus_device *dev)
++{
++	struct backend_info *be = dev_get_drvdata(&dev->dev);
 +
-+	/* A write to obtain the length must happen as a 32-bit write.
-+	 * This does not (yet) support writing individual bytes
-+	 */
-+	if (value == ~PCI_ROM_ADDRESS_ENABLE)
-+		bar->which = 1;
-+	else {
-+		u32 tmpval;
-+		pci_read_config_dword(dev, offset, &tmpval);
-+		if (tmpval != bar->val && value == bar->val) {
-+			/* Allow restoration of bar value. */
-+			pci_write_config_dword(dev, offset, bar->val);
-+		}
-+		bar->which = 0;
++	DPRINTK("");
++
++	if (be->major || be->minor)
++		xenvbd_sysfs_delif(dev);
++
++	if (be->backend_watch.node) {
++		unregister_xenbus_watch(&be->backend_watch);
++		kfree(be->backend_watch.node);
++		be->backend_watch.node = NULL;
 +	}
 +
-+	/* Do we need to support enabling/disabling the rom address here? */
++	if (be->blkif) {
++		blkif_disconnect(be->blkif);
++		vbd_free(&be->blkif->vbd);
++		blkif_free(be->blkif);
++		be->blkif = NULL;
++	}
 +
++	kfree(be);
++	dev_set_drvdata(&dev->dev, NULL);
 +	return 0;
 +}
 +
-+/* For the BARs, only allow writes which write ~0 or
-+ * the correct resource information
-+ * (Needed for when the driver probes the resource usage)
-+ */
-+static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
++int blkback_barrier(struct xenbus_transaction xbt,
++		    struct backend_info *be, int state)
 +{
-+	struct pci_bar_info *bar = data;
++	struct xenbus_device *dev = be->dev;
++	int err;
 +
-+	if (unlikely(!bar)) {
-+		printk(KERN_WARNING "pciback: driver data not found for %s\n",
-+		       pci_name(dev));
-+		return XEN_PCI_ERR_op_failed;
++	err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
++			    "%d", state);
++	if (err)
++		xenbus_dev_fatal(dev, err, "writing feature-barrier");
++
++	return err;
++}
++
++/**
++ * Entry point to this code when a new device is created.  Allocate the basic
++ * structures, and watch the store waiting for the hotplug scripts to tell us
++ * the device's physical major and minor numbers.  Switch to InitWait.
++ */
++static int blkback_probe(struct xenbus_device *dev,
++			 const struct xenbus_device_id *id)
++{
++	int err;
++	struct backend_info *be = kzalloc(sizeof(struct backend_info),
++					  GFP_KERNEL);
++	if (!be) {
++		xenbus_dev_fatal(dev, -ENOMEM,
++				 "allocating backend structure");
++		return -ENOMEM;
 +	}
++	be->dev = dev;
++	dev_set_drvdata(&dev->dev, be);
 +
-+	/* A write to obtain the length must happen as a 32-bit write.
-+	 * This does not (yet) support writing individual bytes
-+	 */
-+	if (value == ~0)
-+		bar->which = 1;
-+	else {
-+		u32 tmpval;
-+		pci_read_config_dword(dev, offset, &tmpval);
-+		if (tmpval != bar->val && value == bar->val) {
-+			/* Allow restoration of bar value. */
-+			pci_write_config_dword(dev, offset, bar->val);
-+		}
-+		bar->which = 0;
++	be->blkif = blkif_alloc(dev->otherend_id);
++	if (IS_ERR(be->blkif)) {
++		err = PTR_ERR(be->blkif);
++		be->blkif = NULL;
++		xenbus_dev_fatal(dev, err, "creating block interface");
++		goto fail;
 +	}
 +
++	/* setup back pointer */
++	be->blkif->be = be;
++
++	err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
++				   "%s/%s", dev->nodename, "physical-device");
++	if (err)
++		goto fail;
++
++	err = xenbus_switch_state(dev, XenbusStateInitWait);
++	if (err)
++		goto fail;
++
 +	return 0;
++
++fail:
++	DPRINTK("failed");
++	blkback_remove(dev);
++	return err;
 +}
 +
-+static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
++
++/**
++ * Callback received when the hotplug scripts have placed the physical-device
++ * node.  Read it and the mode node, and create a vbd.  If the frontend is
++ * ready, connect.
++ */
++static void backend_changed(struct xenbus_watch *watch,
++			    const char **vec, unsigned int len)
 +{
-+	struct pci_bar_info *bar = data;
++	int err;
++	unsigned major;
++	unsigned minor;
++	struct backend_info *be
++		= container_of(watch, struct backend_info, backend_watch);
++	struct xenbus_device *dev = be->dev;
++	int cdrom = 0;
++	char *device_type;
 +
-+	if (unlikely(!bar)) {
-+		printk(KERN_WARNING "pciback: driver data not found for %s\n",
-+		       pci_name(dev));
-+		return XEN_PCI_ERR_op_failed;
++	DPRINTK("");
++
++	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
++			   &major, &minor);
++	if (XENBUS_EXIST_ERR(err)) {
++		/* Since this watch will fire once immediately after it is
++		   registered, we expect this.  Ignore it, and wait for the
++		   hotplug scripts. */
++		return;
++	}
++	if (err != 2) {
++		xenbus_dev_fatal(dev, err, "reading physical-device");
++		return;
 +	}
 +
-+	*value = bar->which ? bar->len_val : bar->val;
++	if ((be->major || be->minor) &&
++	    ((be->major != major) || (be->minor != minor))) {
++		printk(KERN_WARNING
++		       "blkback: changing physical device (from %x:%x to "
++		       "%x:%x) not supported.\n", be->major, be->minor,
++		       major, minor);
++		return;
++	}
 +
-+	return 0;
++	be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
++	if (IS_ERR(be->mode)) {
++		err = PTR_ERR(be->mode);
++		be->mode = NULL;
++		xenbus_dev_fatal(dev, err, "reading mode");
++		return;
++	}
++
++	device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
++	if (!IS_ERR(device_type)) {
++		cdrom = strcmp(device_type, "cdrom") == 0;
++		kfree(device_type);
++	}
++
++	if (be->major == 0 && be->minor == 0) {
++		/* Front end dir is a number, which is used as the handle. */
++
++		char *p = strrchr(dev->otherend, '/') + 1;
++		long handle = simple_strtoul(p, NULL, 0);
++
++		be->major = major;
++		be->minor = minor;
++
++		err = vbd_create(be->blkif, handle, major, minor,
++				 (NULL == strchr(be->mode, 'w')), cdrom);
++		if (err) {
++			be->major = be->minor = 0;
++			xenbus_dev_fatal(dev, err, "creating vbd structure");
++			return;
++		}
++
++		err = xenvbd_sysfs_addif(dev);
++		if (err) {
++			vbd_free(&be->blkif->vbd);
++			be->major = be->minor = 0;
++			xenbus_dev_fatal(dev, err, "creating sysfs entries");
++			return;
++		}
++
++		/* We're potentially connected now */
++		update_blkif_status(be->blkif);
++	}
 +}
 +
-+static inline void read_dev_bar(struct pci_dev *dev,
-+				struct pci_bar_info *bar_info, int offset,
-+				u32 len_mask)
++
++/**
++ * Callback received when the frontend's state changes.
++ */
++static void frontend_changed(struct xenbus_device *dev,
++			     enum xenbus_state frontend_state)
 +{
-+	pci_read_config_dword(dev, offset, &bar_info->val);
-+	pci_write_config_dword(dev, offset, len_mask);
-+	pci_read_config_dword(dev, offset, &bar_info->len_val);
-+	pci_write_config_dword(dev, offset, bar_info->val);
++	struct backend_info *be = dev_get_drvdata(&dev->dev);
++	int err;
++
++	DPRINTK("%s", xenbus_strstate(frontend_state));
++
++	switch (frontend_state) {
++	case XenbusStateInitialising:
++		if (dev->state == XenbusStateClosed) {
++			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
++			       __FUNCTION__, dev->nodename);
++			xenbus_switch_state(dev, XenbusStateInitWait);
++		}
++		break;
++
++	case XenbusStateInitialised:
++	case XenbusStateConnected:
++		/* Ensure we connect even when two watches fire in
++		   close successsion and we miss the intermediate value
++		   of frontend_state. */
++		if (dev->state == XenbusStateConnected)
++			break;
++
++		/* Enforce precondition before potential leak point.
++		 * blkif_disconnect() is idempotent.
++		 */
++		blkif_disconnect(be->blkif);
++
++		err = connect_ring(be);
++		if (err)
++			break;
++		update_blkif_status(be->blkif);
++		break;
++
++	case XenbusStateClosing:
++		blkif_disconnect(be->blkif);
++		xenbus_switch_state(dev, XenbusStateClosing);
++		break;
++
++	case XenbusStateClosed:
++		xenbus_switch_state(dev, XenbusStateClosed);
++		if (xenbus_dev_is_online(dev))
++			break;
++		/* fall through if not online */
++	case XenbusStateUnknown:
++		/* implies blkif_disconnect() via blkback_remove() */
++		device_unregister(&dev->dev);
++		break;
++
++	default:
++		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
++				 frontend_state);
++		break;
++	}
 +}
 +
-+static void *bar_init(struct pci_dev *dev, int offset)
++
++/* ** Connection ** */
++
++
++/**
++ * Write the physical details regarding the block device to the store, and
++ * switch to Connected state.
++ */
++static void connect(struct backend_info *be)
 +{
-+	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
++	struct xenbus_transaction xbt;
++	int err;
++	struct xenbus_device *dev = be->dev;
 +
-+	if (!bar)
-+		return ERR_PTR(-ENOMEM);
++	DPRINTK("%s", dev->otherend);
++
++	/* Supply the information about the device the frontend needs */
++again:
++	err = xenbus_transaction_start(&xbt);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "starting transaction");
++		return;
++	}
 +
-+	read_dev_bar(dev, bar, offset, ~0);
-+	bar->which = 0;
++	err = blkback_barrier(xbt, be, 1);
++	if (err)
++		goto abort;
 +
-+	return bar;
-+}
++	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
++			    vbd_size(&be->blkif->vbd));
++	if (err) {
++		xenbus_dev_fatal(dev, err, "writing %s/sectors",
++				 dev->nodename);
++		goto abort;
++	}
 +
-+static void *rom_init(struct pci_dev *dev, int offset)
-+{
-+	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
++	/* FIXME: use a typename instead */
++	err = xenbus_printf(xbt, dev->nodename, "info", "%u",
++			    vbd_info(&be->blkif->vbd));
++	if (err) {
++		xenbus_dev_fatal(dev, err, "writing %s/info",
++				 dev->nodename);
++		goto abort;
++	}
++	err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
++			    vbd_secsize(&be->blkif->vbd));
++	if (err) {
++		xenbus_dev_fatal(dev, err, "writing %s/sector-size",
++				 dev->nodename);
++		goto abort;
++	}
 +
-+	if (!bar)
-+		return ERR_PTR(-ENOMEM);
++	err = xenbus_transaction_end(xbt, 0);
++	if (err == -EAGAIN)
++		goto again;
++	if (err)
++		xenbus_dev_fatal(dev, err, "ending transaction");
 +
-+	read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
-+	bar->which = 0;
++	err = xenbus_switch_state(dev, XenbusStateConnected);
++	if (err)
++		xenbus_dev_fatal(dev, err, "switching to Connected state",
++				 dev->nodename);
 +
-+	return bar;
++	return;
++ abort:
++	xenbus_transaction_end(xbt, 1);
 +}
 +
-+static void bar_reset(struct pci_dev *dev, int offset, void *data)
++
++static int connect_ring(struct backend_info *be)
 +{
-+	struct pci_bar_info *bar = data;
++	struct xenbus_device *dev = be->dev;
++	unsigned long ring_ref;
++	unsigned int evtchn;
++	char protocol[64] = "";
++	int err;
 +
-+	bar->which = 0;
-+}
++	DPRINTK("%s", dev->otherend);
 +
-+static void bar_release(struct pci_dev *dev, int offset, void *data)
-+{
-+	kfree(data);
-+}
++	err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref,
++			    "event-channel", "%u", &evtchn, NULL);
++	if (err) {
++		xenbus_dev_fatal(dev, err,
++				 "reading %s/ring-ref and event-channel",
++				 dev->otherend);
++		return err;
++	}
 +
-+static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
-+			  void *data)
-+{
-+	*value = (u8) dev->irq;
++	be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
++	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
++			    "%63s", protocol, NULL);
++	if (err)
++		strcpy(protocol, "unspecified, assuming native");
++	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
++		be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
++	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
++		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
++	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
++		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
++	else {
++		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
++		return -1;
++	}
++	printk(KERN_INFO
++	       "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n",
++	       ring_ref, evtchn, be->blkif->blk_protocol, protocol);
++
++	/* Map the shared frame, irq etc. */
++	err = blkif_map(be->blkif, ring_ref, evtchn);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
++				 ring_ref, evtchn);
++		return err;
++	}
 +
 +	return 0;
 +}
 +
-+static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
-+{
-+	u8 cur_value;
-+	int err;
 +
-+	err = pci_read_config_byte(dev, offset, &cur_value);
-+	if (err)
-+		goto out;
++/* ** Driver Registration ** */
 +
-+	if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
-+	    || value == PCI_BIST_START)
-+		err = pci_write_config_byte(dev, offset, value);
 +
-+      out:
-+	return err;
-+}
++static const struct xenbus_device_id blkback_ids[] = {
++	{ "vbd" },
++	{ "" }
++};
 +
-+static const struct config_field header_common[] = {
-+	{
-+	 .offset    = PCI_COMMAND,
-+	 .size      = 2,
-+	 .u.w.read  = pciback_read_config_word,
-+	 .u.w.write = command_write,
-+	},
-+	{
-+	 .offset    = PCI_INTERRUPT_LINE,
-+	 .size      = 1,
-+	 .u.b.read  = interrupt_read,
-+	},
-+	{
-+	 .offset    = PCI_INTERRUPT_PIN,
-+	 .size      = 1,
-+	 .u.b.read  = pciback_read_config_byte,
-+	},
-+	{
-+	 /* Any side effects of letting driver domain control cache line? */
-+	 .offset    = PCI_CACHE_LINE_SIZE,
-+	 .size      = 1,
-+	 .u.b.read  = pciback_read_config_byte,
-+	 .u.b.write = pciback_write_config_byte,
-+	},
-+	{
-+	 .offset    = PCI_LATENCY_TIMER,
-+	 .size      = 1,
-+	 .u.b.read  = pciback_read_config_byte,
-+	},
-+	{
-+	 .offset    = PCI_BIST,
-+	 .size      = 1,
-+	 .u.b.read  = pciback_read_config_byte,
-+	 .u.b.write = bist_write,
-+	},
-+	{}
++
++static struct xenbus_driver blkback = {
++	.name = "vbd",
++	.owner = THIS_MODULE,
++	.ids = blkback_ids,
++	.probe = blkback_probe,
++	.remove = blkback_remove,
++	.otherend_changed = frontend_changed
 +};
 +
-+#define CFG_FIELD_BAR(reg_offset) 			\
-+	{ 						\
-+	 .offset     = reg_offset, 			\
-+	 .size       = 4, 				\
-+	 .init       = bar_init, 			\
-+	 .reset      = bar_reset, 			\
-+	 .release    = bar_release, 			\
-+	 .u.dw.read  = bar_read, 			\
-+	 .u.dw.write = bar_write, 			\
-+	 }
-+
-+#define CFG_FIELD_ROM(reg_offset) 			\
-+	{ 						\
-+	 .offset     = reg_offset, 			\
-+	 .size       = 4, 				\
-+	 .init       = rom_init, 			\
-+	 .reset      = bar_reset, 			\
-+	 .release    = bar_release, 			\
-+	 .u.dw.read  = bar_read, 			\
-+	 .u.dw.write = rom_write, 			\
-+	 }
 +
-+static const struct config_field header_0[] = {
-+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
-+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
-+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
-+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
-+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
-+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
-+	CFG_FIELD_ROM(PCI_ROM_ADDRESS),
-+	{}
-+};
++int blkif_xenbus_init(void)
++{
++	return xenbus_register_backend(&blkback);
++}
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 7468147..8b7fc9a6 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -97,6 +97,7 @@ struct irq_info
+ 			unsigned short gsi;
+ 			unsigned char vector;
+ 			unsigned char flags;
++			uint16_t domid;
+ 		} pirq;
+ 	} u;
+ };
+@@ -114,7 +115,7 @@ struct cpu_evtchn_s {
+ static __initdata struct cpu_evtchn_s init_evtchn_mask = {
+ 	.bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
+ };
+-static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask;
++static struct cpu_evtchn_s __refdata *cpu_evtchn_mask_p = &init_evtchn_mask;
+ 
+ static inline unsigned long *cpu_evtchn_mask(int cpu)
+ {
+@@ -153,11 +154,13 @@ static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq)
+ }
+ 
+ static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq,
+-				    unsigned short gsi, unsigned short vector)
++				    unsigned short gsi, unsigned short vector,
++				    domid_t domid)
+ {
+ 	return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
+ 			.cpu = 0,
+-			.u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } };
++			.u.pirq = { .pirq = pirq, .gsi = gsi,
++				    .vector = vector, .domid = domid } };
+ }
+ 
+ /*
+@@ -277,7 +280,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
+ 
+ 	BUG_ON(irq == -1);
+ #ifdef CONFIG_SMP
+-	cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
++	cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
+ #endif
+ 
+ 	clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
+@@ -294,7 +297,7 @@ static void init_evtchn_cpu_bindings(void)
+ 
+ 	/* By default all event channels notify CPU#0. */
+ 	for_each_irq_desc(i, desc) {
+-		cpumask_copy(desc->affinity, cpumask_of(0));
++		cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
+ 	}
+ #endif
+ 
+@@ -376,81 +379,69 @@ static void unmask_evtchn(int port)
+ 	put_cpu();
+ }
+ 
+-static int get_nr_hw_irqs(void)
++static int xen_allocate_irq_dynamic(void)
+ {
+-	int ret = 1;
++	int first = 0;
++	int irq;
+ 
+ #ifdef CONFIG_X86_IO_APIC
+-	ret = get_nr_irqs_gsi();
++	/*
++	 * For an HVM guest or domain 0 which see "real" (emulated or
++	 * actual repectively) GSIs we allocate dynamic IRQs
++	 * e.g. those corresponding to event channels or MSIs
++	 * etc. from the range above those "real" GSIs to avoid
++	 * collisions.
++	 */
++	if (xen_initial_domain() || xen_hvm_domain())
++		first = get_nr_irqs_gsi();
+ #endif
+ 
+-	return ret;
+-}
++retry:
++	irq = irq_alloc_desc_from(first, -1);
+ 
+-static int find_unbound_pirq(int type)
+-{
+-	int rc, i;
+-	struct physdev_get_free_pirq op_get_free_pirq;
+-	op_get_free_pirq.type = type;
++	if (irq == -ENOMEM && first > NR_IRQS_LEGACY) {
++		printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n");
++		first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY);
++		goto retry;
++	}
+ 
+-	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
+-	if (!rc)
+-		return op_get_free_pirq.pirq;
++	if (irq < 0)
++		panic("No available IRQ to bind to: increase nr_irqs!\n");
+ 
+-	for (i = 0; i < nr_irqs; i++) {
+-		if (pirq_to_irq[i] < 0)
+-			return i;
+-	}
+-	return -1;
++	return irq;
+ }
+ 
+-static int find_unbound_irq(void)
++static int xen_allocate_irq_gsi(unsigned gsi)
+ {
+-	struct irq_data *data;
+-	int irq, res;
+-	int bottom = get_nr_hw_irqs();
+-	int top = nr_irqs-1;
+-
+-	if (bottom == nr_irqs)
+-		goto no_irqs;
++	int irq;
+ 
+-	/* This loop starts from the top of IRQ space and goes down.
+-	 * We need this b/c if we have a PCI device in a Xen PV guest
+-	 * we do not have an IO-APIC (though the backend might have them)
+-	 * mapped in. To not have a collision of physical IRQs with the Xen
+-	 * event channels start at the top of the IRQ space for virtual IRQs.
++	/*
++	 * A PV guest has no concept of a GSI (since it has no ACPI
++	 * nor access to/knowledge of the physical APICs). Therefore
++	 * all IRQs are dynamically allocated from the entire IRQ
++	 * space.
+ 	 */
+-	for (irq = top; irq > bottom; irq--) {
+-		data = irq_get_irq_data(irq);
+-		/* only 15->0 have init'd desc; handle irq > 16 */
+-		if (!data)
+-			break;
+-		if (data->chip == &no_irq_chip)
+-			break;
+-		if (data->chip != &xen_dynamic_chip)
+-			continue;
+-		if (irq_info[irq].type == IRQT_UNBOUND)
+-			return irq;
+-	}
++	if (xen_pv_domain() && !xen_initial_domain())
++		return xen_allocate_irq_dynamic();
+ 
+-	if (irq == bottom)
+-		goto no_irqs;
++	/* Legacy IRQ descriptors are already allocated by the arch. */
++	if (gsi < NR_IRQS_LEGACY)
++		return gsi;
+ 
+-	res = irq_alloc_desc_at(irq, -1);
+-
+-	if (WARN_ON(res != irq))
+-		return -1;
++	irq = irq_alloc_desc_at(gsi, -1);
++	if (irq < 0)
++		panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq);
+ 
+ 	return irq;
+-
+-no_irqs:
+-	panic("No available IRQ to bind to: increase nr_irqs!\n");
+ }
+ 
+-static bool identity_mapped_irq(unsigned irq)
++static void xen_free_irq(unsigned irq)
+ {
+-	/* identity map all the hardware irqs */
+-	return irq < get_nr_hw_irqs();
++	/* Legacy IRQ descriptors are managed by the arch. */
++	if (irq < NR_IRQS_LEGACY)
++		return;
++
++	irq_free_desc(irq);
+ }
+ 
+ static void pirq_unmask_notify(int irq)
+@@ -486,7 +477,7 @@ static bool probing_irq(int irq)
+ 	return desc && desc->action == NULL;
+ }
+ 
+-static unsigned int startup_pirq(unsigned int irq)
++static unsigned int __startup_pirq(unsigned int irq)
+ {
+ 	struct evtchn_bind_pirq bind_pirq;
+ 	struct irq_info *info = info_for_irq(irq);
+@@ -524,9 +515,15 @@ out:
+ 	return 0;
+ }
+ 
+-static void shutdown_pirq(unsigned int irq)
++static unsigned int startup_pirq(struct irq_data *data)
++{
++	return __startup_pirq(data->irq);
++}
++
++static void shutdown_pirq(struct irq_data *data)
+ {
+ 	struct evtchn_close close;
++	unsigned int irq = data->irq;
+ 	struct irq_info *info = info_for_irq(irq);
+ 	int evtchn = evtchn_from_irq(irq);
+ 
+@@ -546,20 +543,20 @@ static void shutdown_pirq(unsigned int irq)
+ 	info->evtchn = 0;
+ }
+ 
+-static void enable_pirq(unsigned int irq)
++static void enable_pirq(struct irq_data *data)
+ {
+-	startup_pirq(irq);
++	startup_pirq(data);
+ }
+ 
+-static void disable_pirq(unsigned int irq)
++static void disable_pirq(struct irq_data *data)
+ {
+ }
+ 
+-static void ack_pirq(unsigned int irq)
++static void ack_pirq(struct irq_data *data)
+ {
+-	int evtchn = evtchn_from_irq(irq);
++	int evtchn = evtchn_from_irq(data->irq);
+ 
+-	move_native_irq(irq);
++	move_native_irq(data->irq);
+ 
+ 	if (VALID_EVTCHN(evtchn)) {
+ 		mask_evtchn(evtchn);
+@@ -567,23 +564,6 @@ static void ack_pirq(unsigned int irq)
+ 	}
+ }
+ 
+-static void end_pirq(unsigned int irq)
+-{
+-	int evtchn = evtchn_from_irq(irq);
+-	struct irq_desc *desc = irq_to_desc(irq);
+-
+-	if (WARN_ON(!desc))
+-		return;
+-
+-	if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
+-	    (IRQ_DISABLED|IRQ_PENDING)) {
+-		shutdown_pirq(irq);
+-	} else if (VALID_EVTCHN(evtchn)) {
+-		unmask_evtchn(evtchn);
+-		pirq_unmask_notify(irq);
+-	}
+-}
+-
+ static int find_irq_by_gsi(unsigned gsi)
+ {
+ 	int irq;
+@@ -638,14 +618,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
+ 		goto out;	/* XXX need refcount? */
+ 	}
+ 
+-	/* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
+-	 * we are using the !xen_initial_domain() to drop in the function.*/
+-	if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
+-				xen_pv_domain())) {
+-		irq = gsi;
+-		irq_alloc_desc_at(irq, -1);
+-	} else
+-		irq = find_unbound_irq();
++	irq = xen_allocate_irq_gsi(gsi);
+ 
+ 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
+ 				      handle_level_irq, name);
+@@ -658,12 +631,12 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
+ 	 * this in the priv domain. */
+ 	if (xen_initial_domain() &&
+ 	    HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+-		irq_free_desc(irq);
++		xen_free_irq(irq);
+ 		irq = -ENOSPC;
+ 		goto out;
+ 	}
+ 
+-	irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector);
++	irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector, DOMID_SELF);
+ 	irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0;
+ 	pirq_to_irq[pirq] = irq;
+ 
+@@ -674,87 +647,47 @@ out:
+ }
+ 
+ #ifdef CONFIG_PCI_MSI
+-#include <linux/msi.h>
+-#include "../pci/msi.h"
+-
+-void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
++int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
+ {
+-	spin_lock(&irq_mapping_update_lock);
+-
+-	if (alloc & XEN_ALLOC_IRQ) {
+-		*irq = find_unbound_irq();
+-		if (*irq == -1)
+-			goto out;
+-	}
+-
+-	if (alloc & XEN_ALLOC_PIRQ) {
+-		*pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
+-		if (*pirq == -1)
+-			goto out;
+-	}
++	int rc;
++	struct physdev_get_free_pirq op_get_free_pirq;
+ 
+-	set_irq_chip_and_handler_name(*irq, &xen_pirq_chip,
+-				      handle_level_irq, name);
++	op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
++	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
+ 
+-	irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0);
+-	pirq_to_irq[*pirq] = *irq;
++	WARN_ONCE(rc == -ENOSYS,
++		  "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
+ 
+-out:
+-	spin_unlock(&irq_mapping_update_lock);
++	return rc ? -1 : op_get_free_pirq.pirq;
+ }
+ 
+-int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
++int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
++			     int pirq, int vector, const char *name,
++			     domid_t domid)
+ {
+-	int irq = -1;
+-	struct physdev_map_pirq map_irq;
+-	int rc;
+-	int pos;
+-	u32 table_offset, bir;
+-
+-	memset(&map_irq, 0, sizeof(map_irq));
+-	map_irq.domid = DOMID_SELF;
+-	map_irq.type = MAP_PIRQ_TYPE_MSI;
+-	map_irq.index = -1;
+-	map_irq.pirq = -1;
+-	map_irq.bus = dev->bus->number;
+-	map_irq.devfn = dev->devfn;
+-
+-	if (type == PCI_CAP_ID_MSIX) {
+-		pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+-
+-		pci_read_config_dword(dev, msix_table_offset_reg(pos),
+-					&table_offset);
+-		bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
+-
+-		map_irq.table_base = pci_resource_start(dev, bir);
+-		map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
+-	}
++	int irq, ret;
+ 
+ 	spin_lock(&irq_mapping_update_lock);
+ 
+-	irq = find_unbound_irq();
+-
++	irq = xen_allocate_irq_dynamic();
+ 	if (irq == -1)
+ 		goto out;
+ 
+-	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+-	if (rc) {
+-		printk(KERN_WARNING "xen map irq failed %d\n", rc);
+-
+-		irq_free_desc(irq);
+-
+-		irq = -1;
+-		goto out;
+-	}
+-	irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
+-
+ 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
+-			handle_level_irq,
+-			(type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
++				      handle_level_irq, name);
+ 
++	irq_info[irq] = mk_pirq_info(0, pirq, 0, vector, domid);
++	pirq_to_irq[pirq] = irq;
++	ret = set_irq_msi(irq, msidesc);
++	if (ret < 0)
++		goto error_irq;
+ out:
+ 	spin_unlock(&irq_mapping_update_lock);
+ 	return irq;
++error_irq:
++	spin_unlock(&irq_mapping_update_lock);
++	xen_free_irq(irq);
++	return -1;
+ }
+ #endif
+ 
+@@ -773,17 +706,25 @@ int xen_destroy_irq(int irq)
+ 
+ 	if (xen_initial_domain()) {
+ 		unmap_irq.pirq = info->u.pirq.pirq;
+-		unmap_irq.domid = DOMID_SELF;
++		unmap_irq.domid = info->u.pirq.domid;
+ 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
+-		if (rc) {
++		/* If another domain quits without making the pci_disable_msix
++		 * call, the Xen hypervisor takes care of freeing the PIRQs
++		 * (free_domain_pirqs).
++		 */
++		if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
++			printk(KERN_INFO "domain %d does not have %d anymore\n",
++				info->u.pirq.domid, info->u.pirq.pirq);
++		else if (rc) {
+ 			printk(KERN_WARNING "unmap irq failed %d\n", rc);
+ 			goto out;
+ 		}
+-		pirq_to_irq[info->u.pirq.pirq] = -1;
+ 	}
++	pirq_to_irq[info->u.pirq.pirq] = -1;
 +
-+static const struct config_field header_1[] = {
-+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
-+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
-+	CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
-+	{}
-+};
+ 	irq_info[irq] = mk_unbound_info();
+ 
+-	irq_free_desc(irq);
++	xen_free_irq(irq);
+ 
+ out:
+ 	spin_unlock(&irq_mapping_update_lock);
+@@ -805,6 +746,12 @@ int xen_irq_from_pirq(unsigned pirq)
+ 	return pirq_to_irq[pirq];
+ }
+ 
 +
-+int pciback_config_header_add_fields(struct pci_dev *dev)
++int xen_pirq_from_irq(unsigned irq)
 +{
-+	int err;
-+
-+	err = pciback_config_add_fields(dev, header_common);
-+	if (err)
-+		goto out;
-+
-+	switch (dev->hdr_type) {
-+	case PCI_HEADER_TYPE_NORMAL:
-+		err = pciback_config_add_fields(dev, header_0);
-+		break;
-+
-+	case PCI_HEADER_TYPE_BRIDGE:
-+		err = pciback_config_add_fields(dev, header_1);
-+		break;
-+
-+	default:
-+		err = -EINVAL;
-+		printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n",
-+		       pci_name(dev), dev->hdr_type);
-+		break;
-+	}
-+
-+      out:
-+	return err;
++	return pirq_from_irq(irq);
 +}
-diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c
-new file mode 100644
-index 0000000..244a438
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_quirks.c
-@@ -0,0 +1,138 @@
-+/*
-+ * PCI Backend - Handle special overlays for broken devices.
-+ *
-+ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
-+ * Author: Chris Bookholt <hap10 at epoch.ncsc.mil>
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/pci.h>
-+#include "pciback.h"
-+#include "conf_space.h"
-+#include "conf_space_quirks.h"
-+
-+LIST_HEAD(pciback_quirks);
-+
-+static inline const struct pci_device_id *
-+match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
++EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
+ int bind_evtchn_to_irq(unsigned int evtchn)
+ {
+ 	int irq;
+@@ -814,7 +761,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
+ 	irq = evtchn_to_irq[evtchn];
+ 
+ 	if (irq == -1) {
+-		irq = find_unbound_irq();
++		irq = xen_allocate_irq_dynamic();
+ 
+ 		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+ 					      handle_fasteoi_irq, "event");
+@@ -839,7 +786,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ 	irq = per_cpu(ipi_to_irq, cpu)[ipi];
+ 
+ 	if (irq == -1) {
+-		irq = find_unbound_irq();
++		irq = xen_allocate_irq_dynamic();
+ 		if (irq < 0)
+ 			goto out;
+ 
+@@ -864,6 +811,21 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ 	return irq;
+ }
+ 
++static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
++					  unsigned int remote_port)
 +{
-+	if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
-+	    (id->device == PCI_ANY_ID || id->device == dev->device) &&
-+	    (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
-+	    (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
-+	    !((id->class ^ dev->class) & id->class_mask))
-+		return id;
-+	return NULL;
-+}
++	struct evtchn_bind_interdomain bind_interdomain;
++	int err;
 +
-+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
-+{
-+	struct pciback_config_quirk *tmp_quirk;
++	bind_interdomain.remote_dom  = remote_domain;
++	bind_interdomain.remote_port = remote_port;
 +
-+	list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list)
-+		if (match_one_device(&tmp_quirk->devid, dev) != NULL)
-+			goto out;
-+	tmp_quirk = NULL;
-+	printk(KERN_DEBUG
-+	       "quirk didn't match any device pciback knows about\n");
-+      out:
-+	return tmp_quirk;
-+}
++	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
++					  &bind_interdomain);
 +
-+static inline void register_quirk(struct pciback_config_quirk *quirk)
-+{
-+	list_add_tail(&quirk->quirks_list, &pciback_quirks);
++	return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
 +}
 +
-+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
+ 
+ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+ {
+@@ -875,7 +837,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+ 	irq = per_cpu(virq_to_irq, cpu)[virq];
+ 
+ 	if (irq == -1) {
+-		irq = find_unbound_irq();
++		irq = xen_allocate_irq_dynamic();
+ 
+ 		set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
+ 					      handle_percpu_irq, "virq");
+@@ -934,7 +896,7 @@ static void unbind_from_irq(unsigned int irq)
+ 	if (irq_info[irq].type != IRQT_UNBOUND) {
+ 		irq_info[irq] = mk_unbound_info();
+ 
+-		irq_free_desc(irq);
++		xen_free_irq(irq);
+ 	}
+ 
+ 	spin_unlock(&irq_mapping_update_lock);
+@@ -959,6 +921,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
+ }
+ EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
+ 
++int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
++					  unsigned int remote_port,
++					  irq_handler_t handler,
++					  unsigned long irqflags,
++					  const char *devname,
++					  void *dev_id)
 +{
-+	int ret = 0;
-+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+	struct config_field_entry *cfg_entry;
-+
-+	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-+		if ( OFFSET(cfg_entry) == reg) {
-+			ret = 1;
-+			break;
-+		}
-+	}
-+	return ret;
-+}
++	int irq, retval;
 +
-+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
-+				    *field)
-+{
-+	int err = 0;
++	irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
++	if (irq < 0)
++		return irq;
 +
-+	switch (field->size) {
-+	case 1:
-+		field->u.b.read = pciback_read_config_byte;
-+		field->u.b.write = pciback_write_config_byte;
-+		break;
-+	case 2:
-+		field->u.w.read = pciback_read_config_word;
-+		field->u.w.write = pciback_write_config_word;
-+		break;
-+	case 4:
-+		field->u.dw.read = pciback_read_config_dword;
-+		field->u.dw.write = pciback_write_config_dword;
-+		break;
-+	default:
-+		err = -EINVAL;
-+		goto out;
++	retval = request_irq(irq, handler, irqflags, devname, dev_id);
++	if (retval != 0) {
++		unbind_from_irq(irq);
++		return retval;
 +	}
 +
-+	pciback_config_add_field(dev, field);
-+
-+      out:
-+	return err;
++	return irq;
 +}
++EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
 +
-+int pciback_config_quirks_init(struct pci_dev *dev)
+ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
+ 			    irq_handler_t handler,
+ 			    unsigned long irqflags, const char *devname, void *dev_id)
+@@ -990,7 +975,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
+ 	if (irq < 0)
+ 		return irq;
+ 
+-	irqflags |= IRQF_NO_SUSPEND;
++	irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
+ 	retval = request_irq(irq, handler, irqflags, devname, dev_id);
+ 	if (retval != 0) {
+ 		unbind_from_irq(irq);
+@@ -1234,11 +1219,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+ 	return 0;
+ }
+ 
+-static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
++static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
++			    bool force)
+ {
+ 	unsigned tcpu = cpumask_first(dest);
+ 
+-	return rebind_irq_to_cpu(irq, tcpu);
++	return rebind_irq_to_cpu(data->irq, tcpu);
+ }
+ 
+ int resend_irq_on_evtchn(unsigned int irq)
+@@ -1257,35 +1243,35 @@ int resend_irq_on_evtchn(unsigned int irq)
+ 	return 1;
+ }
+ 
+-static void enable_dynirq(unsigned int irq)
++static void enable_dynirq(struct irq_data *data)
+ {
+-	int evtchn = evtchn_from_irq(irq);
++	int evtchn = evtchn_from_irq(data->irq);
+ 
+ 	if (VALID_EVTCHN(evtchn))
+ 		unmask_evtchn(evtchn);
+ }
+ 
+-static void disable_dynirq(unsigned int irq)
++static void disable_dynirq(struct irq_data *data)
+ {
+-	int evtchn = evtchn_from_irq(irq);
++	int evtchn = evtchn_from_irq(data->irq);
+ 
+ 	if (VALID_EVTCHN(evtchn))
+ 		mask_evtchn(evtchn);
+ }
+ 
+-static void ack_dynirq(unsigned int irq)
++static void ack_dynirq(struct irq_data *data)
+ {
+-	int evtchn = evtchn_from_irq(irq);
++	int evtchn = evtchn_from_irq(data->irq);
+ 
+-	move_masked_irq(irq);
++	move_masked_irq(data->irq);
+ 
+ 	if (VALID_EVTCHN(evtchn))
+ 		unmask_evtchn(evtchn);
+ }
+ 
+-static int retrigger_dynirq(unsigned int irq)
++static int retrigger_dynirq(struct irq_data *data)
+ {
+-	int evtchn = evtchn_from_irq(irq);
++	int evtchn = evtchn_from_irq(data->irq);
+ 	struct shared_info *sh = HYPERVISOR_shared_info;
+ 	int ret = 0;
+ 
+@@ -1334,7 +1320,7 @@ static void restore_cpu_pirqs(void)
+ 
+ 		printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
+ 
+-		startup_pirq(irq);
++		__startup_pirq(irq);
+ 	}
+ }
+ 
+@@ -1442,10 +1428,21 @@ void xen_poll_irq(int irq)
+ 	xen_poll_irq_timeout(irq, 0 /* no timeout */);
+ }
+ 
++/* Check whether the IRQ line is shared with other guests. */
++int xen_test_irq_shared(int irq)
 +{
-+	struct pciback_config_quirk *quirk;
-+	int ret = 0;
-+
-+	quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
-+	if (!quirk) {
-+		ret = -ENOMEM;
-+		goto out;
-+	}
-+
-+	quirk->devid.vendor = dev->vendor;
-+	quirk->devid.device = dev->device;
-+	quirk->devid.subvendor = dev->subsystem_vendor;
-+	quirk->devid.subdevice = dev->subsystem_device;
-+	quirk->devid.class = 0;
-+	quirk->devid.class_mask = 0;
-+	quirk->devid.driver_data = 0UL;
-+
-+	quirk->pdev = dev;
-+
-+	register_quirk(quirk);
-+      out:
-+	return ret;
-+}
++	struct irq_info *info = info_for_irq(irq);
++	struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq };
 +
-+void pciback_config_field_free(struct config_field *field)
-+{
-+	kfree(field);
++	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
++		return 0;
++	return !(irq_status.flags & XENIRQSTAT_shared);
 +}
++EXPORT_SYMBOL_GPL(xen_test_irq_shared);
 +
-+int pciback_config_quirk_release(struct pci_dev *dev)
-+{
-+	struct pciback_config_quirk *quirk;
-+	int ret = 0;
-+
-+	quirk = pciback_find_quirk(dev);
-+	if (!quirk) {
-+		ret = -ENXIO;
-+		goto out;
-+	}
-+
-+	list_del(&quirk->quirks_list);
-+	kfree(quirk);
-+
-+      out:
-+	return ret;
-+}
-diff --git a/drivers/xen/pciback/conf_space_quirks.h b/drivers/xen/pciback/conf_space_quirks.h
+ void xen_irq_resume(void)
+ {
+ 	unsigned int cpu, irq, evtchn;
+-	struct irq_desc *desc;
+ 
+ 	init_evtchn_cpu_bindings();
+ 
+@@ -1465,66 +1462,48 @@ void xen_irq_resume(void)
+ 		restore_cpu_ipis(cpu);
+ 	}
+ 
+-	/*
+-	 * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
+-	 * are not handled by the IRQ core.
+-	 */
+-	for_each_irq_desc(irq, desc) {
+-		if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
+-			continue;
+-		if (desc->status & IRQ_DISABLED)
+-			continue;
+-
+-		evtchn = evtchn_from_irq(irq);
+-		if (evtchn == -1)
+-			continue;
+-
+-		unmask_evtchn(evtchn);
+-	}
+-
+ 	restore_cpu_pirqs();
+ }
+ 
+ static struct irq_chip xen_dynamic_chip __read_mostly = {
+-	.name		= "xen-dyn",
++	.name			= "xen-dyn",
+ 
+-	.disable	= disable_dynirq,
+-	.mask		= disable_dynirq,
+-	.unmask		= enable_dynirq,
++	.irq_disable		= disable_dynirq,
++	.irq_mask		= disable_dynirq,
++	.irq_unmask		= enable_dynirq,
+ 
+-	.eoi		= ack_dynirq,
+-	.set_affinity	= set_affinity_irq,
+-	.retrigger	= retrigger_dynirq,
++	.irq_eoi		= ack_dynirq,
++	.irq_set_affinity	= set_affinity_irq,
++	.irq_retrigger		= retrigger_dynirq,
+ };
+ 
+ static struct irq_chip xen_pirq_chip __read_mostly = {
+-	.name		= "xen-pirq",
++	.name			= "xen-pirq",
+ 
+-	.startup	= startup_pirq,
+-	.shutdown	= shutdown_pirq,
++	.irq_startup		= startup_pirq,
++	.irq_shutdown		= shutdown_pirq,
+ 
+-	.enable		= enable_pirq,
+-	.unmask		= enable_pirq,
++	.irq_enable		= enable_pirq,
++	.irq_unmask		= enable_pirq,
+ 
+-	.disable	= disable_pirq,
+-	.mask		= disable_pirq,
++	.irq_disable		= disable_pirq,
++	.irq_mask		= disable_pirq,
+ 
+-	.ack		= ack_pirq,
+-	.end		= end_pirq,
++	.irq_ack		= ack_pirq,
+ 
+-	.set_affinity	= set_affinity_irq,
++	.irq_set_affinity	= set_affinity_irq,
+ 
+-	.retrigger	= retrigger_dynirq,
++	.irq_retrigger		= retrigger_dynirq,
+ };
+ 
+ static struct irq_chip xen_percpu_chip __read_mostly = {
+-	.name		= "xen-percpu",
++	.name			= "xen-percpu",
+ 
+-	.disable	= disable_dynirq,
+-	.mask		= disable_dynirq,
+-	.unmask		= enable_dynirq,
++	.irq_disable		= disable_dynirq,
++	.irq_mask		= disable_dynirq,
++	.irq_unmask		= enable_dynirq,
+ 
+-	.ack		= ack_dynirq,
++	.irq_ack		= ack_dynirq,
+ };
+ 
+ int xen_set_callback_via(uint64_t via)
+diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
 new file mode 100644
-index 0000000..acd0e1a
+index 0000000..a7ffdfe
 --- /dev/null
-+++ b/drivers/xen/pciback/conf_space_quirks.h
-@@ -0,0 +1,35 @@
-+/*
-+ * PCI Backend - Data structures for special overlays for broken devices.
++++ b/drivers/xen/gntalloc.c
+@@ -0,0 +1,545 @@
++/******************************************************************************
++ * gntalloc.c
 + *
-+ * Ryan Wilson <hap9 at epoch.ncsc.mil>
-+ * Chris Bookholt <hap10 at epoch.ncsc.mil>
++ * Device for creating grant references (in user-space) that may be shared
++ * with other domains.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 + */
 +
-+#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
-+#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
-+
-+#include <linux/pci.h>
-+#include <linux/list.h>
-+
-+struct pciback_config_quirk {
-+	struct list_head quirks_list;
-+	struct pci_device_id devid;
-+	struct pci_dev *pdev;
-+};
-+
-+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev);
-+
-+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
-+				    *field);
-+
-+int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg);
-+
-+int pciback_config_quirks_init(struct pci_dev *dev);
-+
-+void pciback_config_field_free(struct config_field *field);
-+
-+int pciback_config_quirk_release(struct pci_dev *dev);
-+
-+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg);
-+
-+#endif
-diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
-new file mode 100644
-index 0000000..294e48f
---- /dev/null
-+++ b/drivers/xen/pciback/controller.c
-@@ -0,0 +1,443 @@
 +/*
-+ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
-+ *      Alex Williamson <alex.williamson at hp.com>
++ * This driver exists to allow userspace programs in Linux to allocate kernel
++ * memory that will later be shared with another domain.  Without this device,
++ * Linux userspace programs cannot create grant references.
 + *
-+ * PCI "Controller" Backend - virtualize PCI bus topology based on PCI
-+ * controllers.  Devices under the same PCI controller are exposed on the
-+ * same virtual domain:bus.  Within a bus, device slots are virtualized
-+ * to compact the bus.
++ * How this stuff works:
++ *   X -> granting a page to Y
++ *   Y -> mapping the grant from X
 + *
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
++ *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
++ *   2. X creates an entry in the grant table that says domid(Y) can access P.
++ *      This is done without a hypercall unless the grant table needs expansion.
++ *   3. X gives the grant reference identifier, GREF, to Y.
++ *   4. Y maps the page, either directly into kernel memory for use in a backend
++ *      driver, or via a the gntdev device to map into the address space of an
++ *      application running in Y. This is the first point at which Xen does any
++ *      tracking of the page.
++ *   5. A program in X mmap()s a segment of the gntalloc device that corresponds
++ *      to the shared page, and can now communicate with Y over the shared page.
 + *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
 + *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * NOTE TO USERSPACE LIBRARIES:
++ *   The grant allocation and mmap()ing are, naturally, two separate operations.
++ *   You set up the sharing by calling the create ioctl() and then the mmap().
++ *   Teardown requires munmap() and either close() or ioctl().
++ *
++ * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
++ * reference, this device can be used to consume kernel memory by leaving grant
++ * references mapped by another domain when an application exits. Therefore,
++ * there is a global limit on the number of pages that can be allocated. When
++ * all references to the page are unmapped, it will be freed during the next
++ * grant operation.
 + */
 +
-+#include <linux/acpi.h>
++#include <linux/atomic.h>
++#include <linux/module.h>
++#include <linux/miscdevice.h>
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/device.h>
++#include <linux/mm.h>
++#include <linux/uaccess.h>
++#include <linux/types.h>
 +#include <linux/list.h>
-+#include <linux/pci.h>
-+#include <linux/spinlock.h>
-+#include "pciback.h"
++#include <linux/highmem.h>
 +
-+#define PCI_MAX_BUSSES	255
-+#define PCI_MAX_SLOTS	32
++#include <xen/xen.h>
++#include <xen/page.h>
++#include <xen/grant_table.h>
++#include <xen/gntalloc.h>
++#include <xen/events.h>
 +
-+struct controller_dev_entry {
-+	struct list_head list;
-+	struct pci_dev *dev;
-+	unsigned int devfn;
++static int limit = 1024;
++module_param(limit, int, 0644);
++MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
++		"the gntalloc device");
++
++static LIST_HEAD(gref_list);
++static DEFINE_SPINLOCK(gref_lock);
++static int gref_size;
++
++struct notify_info {
++	uint16_t pgoff:12;    /* Bits 0-11: Offset of the byte to clear */
++	uint16_t flags:2;     /* Bits 12-13: Unmap notification flags */
++	int event;            /* Port (event channel) to notify */
 +};
 +
-+struct controller_list_entry {
-+	struct list_head list;
-+	struct pci_controller *controller;
-+	unsigned int domain;
-+	unsigned int bus;
-+	unsigned int next_devfn;
-+	struct list_head dev_list;
++/* Metadata on a grant reference. */
++struct gntalloc_gref {
++	struct list_head next_gref;  /* list entry gref_list */
++	struct list_head next_file;  /* list entry file->list, if open */
++	struct page *page;	     /* The shared page */
++	uint64_t file_index;         /* File offset for mmap() */
++	unsigned int users;          /* Use count - when zero, waiting on Xen */
++	grant_ref_t gref_id;         /* The grant reference number */
++	struct notify_info notify;   /* Unmap notification */
 +};
 +
-+struct controller_dev_data {
++struct gntalloc_file_private_data {
 +	struct list_head list;
-+	unsigned int next_domain;
-+	unsigned int next_bus;
-+	spinlock_t lock;
++	uint64_t index;
 +};
 +
-+struct walk_info {
-+	struct pciback_device *pdev;
-+	int resource_count;
-+	int root_num;
-+};
++static void __del_gref(struct gntalloc_gref *gref);
 +
-+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
-+				    unsigned int domain, unsigned int bus,
-+				    unsigned int devfn)
++static void do_cleanup(void)
 +{
-+	struct controller_dev_data *dev_data = pdev->pci_dev_data;
-+	struct controller_dev_entry *dev_entry;
-+	struct controller_list_entry *cntrl_entry;
-+	struct pci_dev *dev = NULL;
-+	unsigned long flags;
-+
-+	spin_lock_irqsave(&dev_data->lock, flags);
-+
-+	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
-+		if (cntrl_entry->domain != domain ||
-+		    cntrl_entry->bus != bus)
-+			continue;
-+
-+		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
-+			if (devfn == dev_entry->devfn) {
-+				dev = dev_entry->dev;
-+				goto found;
-+			}
-+		}
++	struct gntalloc_gref *gref, *n;
++	list_for_each_entry_safe(gref, n, &gref_list, next_gref) {
++		if (!gref->users)
++			__del_gref(gref);
 +	}
-+found:
-+	spin_unlock_irqrestore(&dev_data->lock, flags);
-+
-+	return dev;
 +}
 +
-+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
-+			int devid, publish_pci_dev_cb publish_cb)
++static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
++	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
 +{
-+	struct controller_dev_data *dev_data = pdev->pci_dev_data;
-+	struct controller_dev_entry *dev_entry;
-+	struct controller_list_entry *cntrl_entry;
-+	struct pci_controller *dev_controller = PCI_CONTROLLER(dev);
-+	unsigned long flags;
-+	int ret = 0, found = 0;
++	int i, rc, readonly;
++	LIST_HEAD(queue_gref);
++	LIST_HEAD(queue_file);
++	struct gntalloc_gref *gref;
 +
-+	spin_lock_irqsave(&dev_data->lock, flags);
++	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
++	rc = -ENOMEM;
++	for (i = 0; i < op->count; i++) {
++		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
++		if (!gref)
++			goto undo;
++		list_add_tail(&gref->next_gref, &queue_gref);
++		list_add_tail(&gref->next_file, &queue_file);
++		gref->users = 1;
++		gref->file_index = op->index + i * PAGE_SIZE;
++		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
++		if (!gref->page)
++			goto undo;
 +
-+	/* Look to see if we already have a domain:bus for this controller */
-+	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
-+		if (cntrl_entry->controller == dev_controller) {
-+			found = 1;
-+			break;
++		/* Grant foreign access to the page. */
++		gref->gref_id = gnttab_grant_foreign_access(op->domid,
++			pfn_to_mfn(page_to_pfn(gref->page)), readonly);
++		if (gref->gref_id < 0) {
++			rc = gref->gref_id;
++			goto undo;
 +		}
++		gref_ids[i] = gref->gref_id;
 +	}
 +
-+	if (!found) {
-+		cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC);
-+		if (!cntrl_entry) {
-+			ret =  -ENOMEM;
-+			goto out;
-+		}
-+
-+		cntrl_entry->controller = dev_controller;
-+		cntrl_entry->next_devfn = PCI_DEVFN(0, 0);
-+
-+		cntrl_entry->domain = dev_data->next_domain;
-+		cntrl_entry->bus = dev_data->next_bus++;
-+		if (dev_data->next_bus > PCI_MAX_BUSSES) {
-+			dev_data->next_domain++;
-+			dev_data->next_bus = 0;
-+		}
-+
-+		INIT_LIST_HEAD(&cntrl_entry->dev_list);
++	/* Add to gref lists. */
++	spin_lock(&gref_lock);
++	list_splice_tail(&queue_gref, &gref_list);
++	list_splice_tail(&queue_file, &priv->list);
++	spin_unlock(&gref_lock);
 +
-+		list_add_tail(&cntrl_entry->list, &dev_data->list);
-+	}
++	return 0;
 +
-+	if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) {
-+		/*
-+		 * While it seems unlikely, this can actually happen if
-+		 * a controller has P2P bridges under it.
-+		 */
-+		xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x "
-+				 "is full, no room to export %04x:%02x:%02x.%x",
-+				 cntrl_entry->domain, cntrl_entry->bus,
-+				 pci_domain_nr(dev->bus), dev->bus->number,
-+				 PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
-+		ret = -ENOSPC;
-+		goto out;
-+	}
++undo:
++	spin_lock(&gref_lock);
++	gref_size -= (op->count - i);
 +
-+	dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC);
-+	if (!dev_entry) {
-+		if (list_empty(&cntrl_entry->dev_list)) {
-+			list_del(&cntrl_entry->list);
-+			kfree(cntrl_entry);
-+		}
-+		ret = -ENOMEM;
-+		goto out;
++	list_for_each_entry(gref, &queue_file, next_file) {
++		/* __del_gref does not remove from queue_file */
++		__del_gref(gref);
 +	}
 +
-+	dev_entry->dev = dev;
-+	dev_entry->devfn = cntrl_entry->next_devfn;
-+
-+	list_add_tail(&dev_entry->list, &cntrl_entry->dev_list);
-+
-+	cntrl_entry->next_devfn += PCI_DEVFN(1, 0);
-+
-+out:
-+	spin_unlock_irqrestore(&dev_data->lock, flags);
-+
-+	/* TODO: Publish virtual domain:bus:slot.func here. */
-+
-+	return ret;
++	/* It's possible for the target domain to map the just-allocated grant
++	 * references by blindly guessing their IDs; if this is done, then
++	 * __del_gref will leave them in the queue_gref list. They need to be
++	 * added to the global list so that we can free them when they are no
++	 * longer referenced.
++	 */
++	if (unlikely(!list_empty(&queue_gref)))
++		list_splice_tail(&queue_gref, &gref_list);
++	spin_unlock(&gref_lock);
++	return rc;
 +}
 +
-+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
-+{
-+	struct controller_dev_data *dev_data = pdev->pci_dev_data;
-+	struct controller_list_entry *cntrl_entry;
-+	struct controller_dev_entry *dev_entry = NULL;
-+	struct pci_dev *found_dev = NULL;
-+	unsigned long flags;
-+
-+	spin_lock_irqsave(&dev_data->lock, flags);
-+
-+	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
-+		if (cntrl_entry->controller != PCI_CONTROLLER(dev))
-+			continue;
-+
-+		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
-+			if (dev_entry->dev == dev) {
-+				found_dev = dev_entry->dev;
-+				break;
-+			}
-+		}
++static void __del_gref(struct gntalloc_gref *gref)
++{
++	if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
++		uint8_t *tmp = kmap(gref->page);
++		tmp[gref->notify.pgoff] = 0;
++		kunmap(gref->page);
 +	}
++	if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
++		notify_remote_via_evtchn(gref->notify.event);
 +
-+	if (!found_dev) {
-+		spin_unlock_irqrestore(&dev_data->lock, flags);
-+		return;
-+	}
++	gref->notify.flags = 0;
 +
-+	list_del(&dev_entry->list);
-+	kfree(dev_entry);
++	if (gref->gref_id > 0) {
++		if (gnttab_query_foreign_access(gref->gref_id))
++			return;
 +
-+	if (list_empty(&cntrl_entry->dev_list)) {
-+		list_del(&cntrl_entry->list);
-+		kfree(cntrl_entry);
++		if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
++			return;
 +	}
 +
-+	spin_unlock_irqrestore(&dev_data->lock, flags);
-+	pcistub_put_pci_dev(found_dev);
++	gref_size--;
++	list_del(&gref->next_gref);
++
++	if (gref->page)
++		__free_page(gref->page);
++
++	kfree(gref);
 +}
 +
-+int pciback_init_devices(struct pciback_device *pdev)
++/* finds contiguous grant references in a file, returns the first */
++static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
++		uint64_t index, uint32_t count)
 +{
-+	struct controller_dev_data *dev_data;
-+
-+	dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
-+	if (!dev_data)
-+		return -ENOMEM;
++	struct gntalloc_gref *rv = NULL, *gref;
++	list_for_each_entry(gref, &priv->list, next_file) {
++		if (gref->file_index == index && !rv)
++			rv = gref;
++		if (rv) {
++			if (gref->file_index != index)
++				return NULL;
++			index += PAGE_SIZE;
++			count--;
++			if (count == 0)
++				return rv;
++		}
++	}
++	return NULL;
++}
 +
-+	spin_lock_init(&dev_data->lock);
++/*
++ * -------------------------------------
++ *  File operations.
++ * -------------------------------------
++ */
++static int gntalloc_open(struct inode *inode, struct file *filp)
++{
++	struct gntalloc_file_private_data *priv;
 +
-+	INIT_LIST_HEAD(&dev_data->list);
++	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
++	if (!priv)
++		goto out_nomem;
++	INIT_LIST_HEAD(&priv->list);
 +
-+	/* Starting domain:bus numbers */
-+	dev_data->next_domain = 0;
-+	dev_data->next_bus = 0;
++	filp->private_data = priv;
 +
-+	pdev->pci_dev_data = dev_data;
++	pr_debug("%s: priv %p\n", __func__, priv);
 +
 +	return 0;
++
++out_nomem:
++	return -ENOMEM;
 +}
 +
-+static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
++static int gntalloc_release(struct inode *inode, struct file *filp)
 +{
-+	struct walk_info *info = data;
-+	struct acpi_resource_address64 addr;
-+	acpi_status status;
-+	int i, len, err;
-+	char str[32], tmp[3];
-+	unsigned char *ptr, *buf;
++	struct gntalloc_file_private_data *priv = filp->private_data;
++	struct gntalloc_gref *gref;
 +
-+	status = acpi_resource_to_address64(res, &addr);
++	pr_debug("%s: priv %p\n", __func__, priv);
 +
-+	/* Do we care about this range?  Let's check. */
-+	if (!ACPI_SUCCESS(status) ||
-+	    !(addr.resource_type == ACPI_MEMORY_RANGE ||
-+	      addr.resource_type == ACPI_IO_RANGE) ||
-+	    !addr.address_length || addr.producer_consumer != ACPI_PRODUCER)
-+		return AE_OK;
++	spin_lock(&gref_lock);
++	while (!list_empty(&priv->list)) {
++		gref = list_entry(priv->list.next,
++			struct gntalloc_gref, next_file);
++		list_del(&gref->next_file);
++		gref->users--;
++		if (gref->users == 0)
++			__del_gref(gref);
++	}
++	kfree(priv);
++	spin_unlock(&gref_lock);
 +
-+	/*
-+	 * Furthermore, we really only care to tell the guest about
-+	 * address ranges that require address translation of some sort.
-+	 */
-+	if (!(addr.resource_type == ACPI_MEMORY_RANGE &&
-+	      addr.info.mem.translation) &&
-+	    !(addr.resource_type == ACPI_IO_RANGE &&
-+	      addr.info.io.translation))
-+		return AE_OK;
-+	   
-+	/* Store the resource in xenbus for the guest */
-+	len = snprintf(str, sizeof(str), "root-%d-resource-%d",
-+		       info->root_num, info->resource_count);
-+	if (unlikely(len >= (sizeof(str) - 1)))
-+		return AE_OK;
++	return 0;
++}
 +
-+	buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL);
-+	if (!buf)
-+		return AE_OK;
++static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
++		struct ioctl_gntalloc_alloc_gref __user *arg)
++{
++	int rc = 0;
++	struct ioctl_gntalloc_alloc_gref op;
++	uint32_t *gref_ids;
 +
-+	/* Clean out resource_source */
-+	res->data.address64.resource_source.index = 0xFF;
-+	res->data.address64.resource_source.string_length = 0;
-+	res->data.address64.resource_source.string_ptr = NULL;
++	pr_debug("%s: priv %p\n", __func__, priv);
 +
-+	ptr = (unsigned char *)res;
++	if (copy_from_user(&op, arg, sizeof(op))) {
++		rc = -EFAULT;
++		goto out;
++	}
 +
-+	/* Turn the acpi_resource into an ASCII byte stream */
-+	for (i = 0; i < sizeof(*res); i++) {
-+		snprintf(tmp, sizeof(tmp), "%02x", ptr[i]);
-+		strncat(buf, tmp, 2);
++	gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
++	if (!gref_ids) {
++		rc = -ENOMEM;
++		goto out;
 +	}
 +
-+	err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename,
-+			    str, "%s", buf);
++	spin_lock(&gref_lock);
++	/* Clean up pages that were at zero (local) users but were still mapped
++	 * by remote domains. Since those pages count towards the limit that we
++	 * are about to enforce, removing them here is a good idea.
++	 */
++	do_cleanup();
++	if (gref_size + op.count > limit) {
++		spin_unlock(&gref_lock);
++		rc = -ENOSPC;
++		goto out_free;
++	}
++	gref_size += op.count;
++	op.index = priv->index;
++	priv->index += op.count * PAGE_SIZE;
++	spin_unlock(&gref_lock);
 +
-+	if (!err)
-+		info->resource_count++;
++	rc = add_grefs(&op, gref_ids, priv);
++	if (rc < 0)
++		goto out_free;
 +
-+	kfree(buf);
++	/* Once we finish add_grefs, it is unsafe to touch the new reference,
++	 * since it is possible for a concurrent ioctl to remove it (by guessing
++	 * its index). If the userspace application doesn't provide valid memory
++	 * to write the IDs to, then it will need to close the file in order to
++	 * release - which it will do by segfaulting when it tries to access the
++	 * IDs to close them.
++	 */
++	if (copy_to_user(arg, &op, sizeof(op))) {
++		rc = -EFAULT;
++		goto out_free;
++	}
++	if (copy_to_user(arg->gref_ids, gref_ids,
++			sizeof(gref_ids[0]) * op.count)) {
++		rc = -EFAULT;
++		goto out_free;
++	}
 +
-+	return AE_OK;
++out_free:
++	kfree(gref_ids);
++out:
++	return rc;
 +}
 +
-+int pciback_publish_pci_roots(struct pciback_device *pdev,
-+			      publish_pci_root_cb publish_root_cb)
++static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
++		void __user *arg)
 +{
-+	struct controller_dev_data *dev_data = pdev->pci_dev_data;
-+	struct controller_list_entry *cntrl_entry;
-+	int i, root_num, len, err = 0;
-+	unsigned int domain, bus;
-+	char str[64];
-+	struct walk_info info;
++	int i, rc = 0;
++	struct ioctl_gntalloc_dealloc_gref op;
++	struct gntalloc_gref *gref, *n;
 +
-+	spin_lock(&dev_data->lock);
++	pr_debug("%s: priv %p\n", __func__, priv);
 +
-+	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
-+		/* First publish all the domain:bus info */
-+		err = publish_root_cb(pdev, cntrl_entry->domain,
-+				      cntrl_entry->bus);
-+		if (err)
-+			goto out;
++	if (copy_from_user(&op, arg, sizeof(op))) {
++		rc = -EFAULT;
++		goto dealloc_grant_out;
++	}
 +
-+		/*
-+ 		 * Now figure out which root-%d this belongs to
-+		 * so we can associate resources with it.
++	spin_lock(&gref_lock);
++	gref = find_grefs(priv, op.index, op.count);
++	if (gref) {
++		/* Remove from the file list only, and decrease reference count.
++		 * The later call to do_cleanup() will remove from gref_list and
++		 * free the memory if the pages aren't mapped anywhere.
 +		 */
-+		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-+				   "root_num", "%d", &root_num);
-+
-+		if (err != 1)
-+			goto out;
-+
-+		for (i = 0; i < root_num; i++) {
-+			len = snprintf(str, sizeof(str), "root-%d", i);
-+			if (unlikely(len >= (sizeof(str) - 1))) {
-+				err = -ENOMEM;
-+				goto out;
-+			}
-+
-+			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-+					   str, "%x:%x", &domain, &bus);
-+			if (err != 2)
-+				goto out;
-+
-+			/* Is this the one we just published? */
-+			if (domain == cntrl_entry->domain &&
-+			    bus == cntrl_entry->bus)
-+				break;
++		for (i = 0; i < op.count; i++) {
++			n = list_entry(gref->next_file.next,
++				struct gntalloc_gref, next_file);
++			list_del(&gref->next_file);
++			gref->users--;
++			gref = n;
 +		}
++	} else {
++		rc = -EINVAL;
++	}
 +
-+		if (i == root_num)
-+			goto out;
++	do_cleanup();
 +
-+		info.pdev = pdev;
-+		info.resource_count = 0;
-+		info.root_num = i;
++	spin_unlock(&gref_lock);
++dealloc_grant_out:
++	return rc;
++}
 +
-+		/* Let ACPI do the heavy lifting on decoding resources */
-+		acpi_walk_resources(cntrl_entry->controller->acpi_handle,
-+				    METHOD_NAME__CRS, write_xenbus_resource,
-+				    &info);
++static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
++		void __user *arg)
++{
++	struct ioctl_gntalloc_unmap_notify op;
++	struct gntalloc_gref *gref;
++	uint64_t index;
++	int pgoff;
++	int rc;
 +
-+		/* No resouces.  OK.  On to the next one */
-+		if (!info.resource_count)
-+			continue;
++	if (copy_from_user(&op, arg, sizeof(op)))
++		return -EFAULT;
 +
-+		/* Store the number of resources we wrote for this root-%d */
-+		len = snprintf(str, sizeof(str), "root-%d-resources", i);
-+		if (unlikely(len >= (sizeof(str) - 1))) {
-+			err = -ENOMEM;
-+			goto out;
-+		}
++	index = op.index & ~(PAGE_SIZE - 1);
++	pgoff = op.index & (PAGE_SIZE - 1);
 +
-+		err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
-+				    "%d", info.resource_count);
-+		if (err)
-+			goto out;
-+	}
++	spin_lock(&gref_lock);
 +
-+	/* Finally, write some magic to synchronize with the guest. */
-+	len = snprintf(str, sizeof(str), "root-resource-magic");
-+	if (unlikely(len >= (sizeof(str) - 1))) {
-+		err = -ENOMEM;
-+		goto out;
++	gref = find_grefs(priv, index, 1);
++	if (!gref) {
++		rc = -ENOENT;
++		goto unlock_out;
 +	}
 +
-+	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
-+			    "%lx", (sizeof(struct acpi_resource) * 2) + 1);
-+
-+out:
-+	spin_unlock(&dev_data->lock);
++	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) {
++		rc = -EINVAL;
++		goto unlock_out;
++	}
 +
-+	return err;
++	gref->notify.flags = op.action;
++	gref->notify.pgoff = pgoff;
++	gref->notify.event = op.event_channel_port;
++	rc = 0;
++ unlock_out:
++	spin_unlock(&gref_lock);
++	return rc;
 +}
 +
-+void pciback_release_devices(struct pciback_device *pdev)
++static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
++		unsigned long arg)
 +{
-+	struct controller_dev_data *dev_data = pdev->pci_dev_data;
-+	struct controller_list_entry *cntrl_entry, *c;
-+	struct controller_dev_entry *dev_entry, *d;
++	struct gntalloc_file_private_data *priv = filp->private_data;
 +
-+	list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) {
-+		list_for_each_entry_safe(dev_entry, d,
-+					 &cntrl_entry->dev_list, list) {
-+			list_del(&dev_entry->list);
-+			pcistub_put_pci_dev(dev_entry->dev);
-+			kfree(dev_entry);
-+		}
-+		list_del(&cntrl_entry->list);
-+		kfree(cntrl_entry);
-+	}
++	switch (cmd) {
++	case IOCTL_GNTALLOC_ALLOC_GREF:
++		return gntalloc_ioctl_alloc(priv, (void __user *)arg);
 +
-+	kfree(dev_data);
-+	pdev->pci_dev_data = NULL;
-+}
++	case IOCTL_GNTALLOC_DEALLOC_GREF:
++		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
 +
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev, 
-+		struct pciback_device *pdev, 
-+		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
-+{
-+	struct controller_dev_data *dev_data = pdev->pci_dev_data;
-+	struct controller_dev_entry *dev_entry;
-+	struct controller_list_entry *cntrl_entry;
-+	unsigned long flags;
-+	int found = 0;
-+	spin_lock_irqsave(&dev_data->lock, flags);
++	case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY:
++		return gntalloc_ioctl_unmap_notify(priv, (void __user *)arg);
 +
-+	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
-+		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
-+			if ( (dev_entry->dev->bus->number == 
-+					pcidev->bus->number) &&
-+			  	(dev_entry->dev->devfn ==
-+					pcidev->devfn) &&
-+				(pci_domain_nr(dev_entry->dev->bus) ==
-+					pci_domain_nr(pcidev->bus)))
-+			{
-+				found = 1;
-+				*domain = cntrl_entry->domain;
-+				*bus = cntrl_entry->bus;
-+				*devfn = dev_entry->devfn;
-+				goto out;
-+			}
-+		}
++	default:
++		return -ENOIOCTLCMD;
 +	}
-+out:
-+	spin_unlock_irqrestore(&dev_data->lock, flags);
-+	return found;
 +
++	return 0;
 +}
 +
-diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c
-new file mode 100644
-index 0000000..9e7a0c4
---- /dev/null
-+++ b/drivers/xen/pciback/passthrough.c
-@@ -0,0 +1,176 @@
-+/*
-+ * PCI Backend - Provides restricted access to the real PCI bus topology
-+ *               to the frontend
-+ *
-+ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
-+ */
++static void gntalloc_vma_close(struct vm_area_struct *vma)
++{
++	struct gntalloc_gref *gref = vma->vm_private_data;
++	if (!gref)
++		return;
 +
-+#include <linux/list.h>
-+#include <linux/pci.h>
-+#include <linux/spinlock.h>
-+#include "pciback.h"
++	spin_lock(&gref_lock);
++	gref->users--;
++	if (gref->users == 0)
++		__del_gref(gref);
++	spin_unlock(&gref_lock);
++}
 +
-+struct passthrough_dev_data {
-+	/* Access to dev_list must be protected by lock */
-+	struct list_head dev_list;
-+	spinlock_t lock;
++static struct vm_operations_struct gntalloc_vmops = {
++	.close = gntalloc_vma_close,
 +};
 +
-+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
-+				    unsigned int domain, unsigned int bus,
-+				    unsigned int devfn)
++static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
 +{
-+	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-+	struct pci_dev_entry *dev_entry;
-+	struct pci_dev *dev = NULL;
-+	unsigned long flags;
++	struct gntalloc_file_private_data *priv = filp->private_data;
++	struct gntalloc_gref *gref;
++	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
++	int rv, i;
 +
-+	spin_lock_irqsave(&dev_data->lock, flags);
++	pr_debug("%s: priv %p, page %lu+%d\n", __func__,
++		       priv, vma->vm_pgoff, count);
 +
-+	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
-+		if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
-+		    && bus == (unsigned int)dev_entry->dev->bus->number
-+		    && devfn == dev_entry->dev->devfn) {
-+			dev = dev_entry->dev;
-+			break;
-+		}
++	if (!(vma->vm_flags & VM_SHARED)) {
++		printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
++		return -EINVAL;
 +	}
 +
-+	spin_unlock_irqrestore(&dev_data->lock, flags);
++	spin_lock(&gref_lock);
++	gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
++	if (gref == NULL) {
++		rv = -ENOENT;
++		pr_debug("%s: Could not find grant reference",
++				__func__);
++		goto out_unlock;
++	}
 +
-+	return dev;
-+}
++	vma->vm_private_data = gref;
 +
-+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
-+			int devid, publish_pci_dev_cb publish_cb)
-+{
-+	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-+	struct pci_dev_entry *dev_entry;
-+	unsigned long flags;
-+	unsigned int domain, bus, devfn;
-+	int err;
++	vma->vm_flags |= VM_RESERVED;
++	vma->vm_flags |= VM_DONTCOPY;
++	vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
 +
-+	dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
-+	if (!dev_entry)
-+		return -ENOMEM;
-+	dev_entry->dev = dev;
++	vma->vm_ops = &gntalloc_vmops;
 +
-+	spin_lock_irqsave(&dev_data->lock, flags);
-+	list_add_tail(&dev_entry->list, &dev_data->dev_list);
-+	spin_unlock_irqrestore(&dev_data->lock, flags);
++	for (i = 0; i < count; i++) {
++		gref->users++;
++		rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
++				gref->page);
++		if (rv)
++			goto out_unlock;
 +
-+	/* Publish this device. */
-+	domain = (unsigned int)pci_domain_nr(dev->bus);
-+	bus = (unsigned int)dev->bus->number;
-+	devfn = dev->devfn;
-+	err = publish_cb(pdev, domain, bus, devfn, devid);
++		gref = list_entry(gref->next_file.next,
++				struct gntalloc_gref, next_file);
++	}
++	rv = 0;
 +
-+	return err;
++out_unlock:
++	spin_unlock(&gref_lock);
++	return rv;
 +}
 +
-+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++static const struct file_operations gntalloc_fops = {
++	.owner = THIS_MODULE,
++	.open = gntalloc_open,
++	.release = gntalloc_release,
++	.unlocked_ioctl = gntalloc_ioctl,
++	.mmap = gntalloc_mmap
++};
++
++/*
++ * -------------------------------------
++ * Module creation/destruction.
++ * -------------------------------------
++ */
++static struct miscdevice gntalloc_miscdev = {
++	.minor	= MISC_DYNAMIC_MINOR,
++	.name	= "xen/gntalloc",
++	.fops	= &gntalloc_fops,
++};
++
++static int __init gntalloc_init(void)
 +{
-+	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-+	struct pci_dev_entry *dev_entry, *t;
-+	struct pci_dev *found_dev = NULL;
-+	unsigned long flags;
++	int err;
 +
-+	spin_lock_irqsave(&dev_data->lock, flags);
++	if (!xen_domain())
++		return -ENODEV;
 +
-+	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
-+		if (dev_entry->dev == dev) {
-+			list_del(&dev_entry->list);
-+			found_dev = dev_entry->dev;
-+			kfree(dev_entry);
-+		}
++	err = misc_register(&gntalloc_miscdev);
++	if (err != 0) {
++		printk(KERN_ERR "Could not register misc gntalloc device\n");
++		return err;
 +	}
 +
-+	spin_unlock_irqrestore(&dev_data->lock, flags);
++	pr_debug("Created grant allocation device at %d,%d\n",
++			MISC_MAJOR, gntalloc_miscdev.minor);
 +
-+	if (found_dev)
-+		pcistub_put_pci_dev(found_dev);
++	return 0;
 +}
 +
-+int pciback_init_devices(struct pciback_device *pdev)
++static void __exit gntalloc_exit(void)
 +{
-+	struct passthrough_dev_data *dev_data;
++	misc_deregister(&gntalloc_miscdev);
++}
 +
-+	dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
-+	if (!dev_data)
-+		return -ENOMEM;
++module_init(gntalloc_init);
++module_exit(gntalloc_exit);
 +
-+	spin_lock_init(&dev_data->lock);
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Carter Weatherly <carter.weatherly at jhuapl.edu>, "
++		"Daniel De Graaf <dgdegra at tycho.nsa.gov>");
++MODULE_DESCRIPTION("User-space grant reference allocator driver");
+diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
+index 1e31cdc..d43ff30 100644
+--- a/drivers/xen/gntdev.c
++++ b/drivers/xen/gntdev.c
+@@ -32,10 +32,12 @@
+ #include <linux/sched.h>
+ #include <linux/spinlock.h>
+ #include <linux/slab.h>
++#include <linux/highmem.h>
+ 
+ #include <xen/xen.h>
+ #include <xen/grant_table.h>
+ #include <xen/gntdev.h>
++#include <xen/events.h>
+ #include <asm/xen/hypervisor.h>
+ #include <asm/xen/hypercall.h>
+ #include <asm/xen/page.h>
+@@ -45,35 +47,46 @@ MODULE_AUTHOR("Derek G. Murray <Derek.Murray at cl.cam.ac.uk>, "
+ 	      "Gerd Hoffmann <kraxel at redhat.com>");
+ MODULE_DESCRIPTION("User-space granted page access driver");
+ 
+-static int limit = 1024;
++static int limit = 1024*1024;
+ module_param(limit, int, 0644);
+-MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped at "
+-		"once by a gntdev instance");
++MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
++		"the gntdev device");
 +
-+	INIT_LIST_HEAD(&dev_data->dev_list);
++static atomic_t pages_mapped = ATOMIC_INIT(0);
 +
-+	pdev->pci_dev_data = dev_data;
++static int use_ptemod;
+ 
+ struct gntdev_priv {
+ 	struct list_head maps;
+-	uint32_t used;
+-	uint32_t limit;
+ 	/* lock protects maps from concurrent changes */
+ 	spinlock_t lock;
+ 	struct mm_struct *mm;
+ 	struct mmu_notifier mn;
+ };
+ 
++struct unmap_notify {
++	int flags;
++	/* Address relative to the start of the grant_map */
++	int addr;
++	int event;
++};
 +
-+	return 0;
-+}
+ struct grant_map {
+ 	struct list_head next;
+-	struct gntdev_priv *priv;
+ 	struct vm_area_struct *vma;
+ 	int index;
+ 	int count;
+ 	int flags;
+-	int is_mapped;
++	atomic_t users;
++	struct unmap_notify notify;
+ 	struct ioctl_gntdev_grant_ref *grants;
+ 	struct gnttab_map_grant_ref   *map_ops;
+ 	struct gnttab_unmap_grant_ref *unmap_ops;
+ 	struct page **pages;
+ };
+ 
++static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
 +
-+int pciback_publish_pci_roots(struct pciback_device *pdev,
-+			      publish_pci_root_cb publish_root_cb)
-+{
-+	int err = 0;
-+	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-+	struct pci_dev_entry *dev_entry, *e;
-+	struct pci_dev *dev;
-+	int found;
-+	unsigned int domain, bus;
+ /* ------------------------------------------------------------------ */
+ 
+ static void gntdev_print_maps(struct gntdev_priv *priv,
+@@ -82,9 +95,7 @@ static void gntdev_print_maps(struct gntdev_priv *priv,
+ #ifdef DEBUG
+ 	struct grant_map *map;
+ 
+-	pr_debug("maps list (priv %p, usage %d/%d)\n",
+-	       priv, priv->used, priv->limit);
+-
++	pr_debug("%s: maps list (priv %p)\n", __func__, priv);
+ 	list_for_each_entry(map, &priv->maps, next)
+ 		pr_debug("  index %2d, count %2d %s\n",
+ 		       map->index, map->count,
+@@ -115,14 +126,13 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
+ 		add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+ 		if (add->pages[i] == NULL)
+ 			goto err;
++		add->map_ops[i].handle = -1;
++		add->unmap_ops[i].handle = -1;
+ 	}
+ 
+ 	add->index = 0;
+ 	add->count = count;
+-	add->priv  = priv;
+-
+-	if (add->count + priv->used > priv->limit)
+-		goto err;
++	atomic_set(&add->users, 1);
+ 
+ 	return add;
+ 
+@@ -154,7 +164,6 @@ static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add)
+ 	list_add_tail(&add->next, &priv->maps);
+ 
+ done:
+-	priv->used += add->count;
+ 	gntdev_print_maps(priv, "[new]", add->index);
+ }
+ 
+@@ -166,57 +175,57 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
+ 	list_for_each_entry(map, &priv->maps, next) {
+ 		if (map->index != index)
+ 			continue;
+-		if (map->count != count)
+-			continue;
+-		return map;
+-	}
+-	return NULL;
+-}
+-
+-static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv,
+-					       unsigned long vaddr)
+-{
+-	struct grant_map *map;
+-
+-	list_for_each_entry(map, &priv->maps, next) {
+-		if (!map->vma)
+-			continue;
+-		if (vaddr < map->vma->vm_start)
+-			continue;
+-		if (vaddr >= map->vma->vm_end)
++		if (count && map->count != count)
+ 			continue;
+ 		return map;
+ 	}
+ 	return NULL;
+ }
+ 
+-static int gntdev_del_map(struct grant_map *map)
++static void gntdev_put_map(struct grant_map *map)
+ {
+ 	int i;
+ 
+-	if (map->vma)
+-		return -EBUSY;
+-	for (i = 0; i < map->count; i++)
+-		if (map->unmap_ops[i].handle)
+-			return -EBUSY;
++	if (!map)
++		return;
+ 
+-	map->priv->used -= map->count;
+-	list_del(&map->next);
+-	return 0;
+-}
++	if (!atomic_dec_and_test(&map->users))
++		return;
+ 
+-static void gntdev_free_map(struct grant_map *map)
+-{
+-	int i;
++	atomic_sub(map->count, &pages_mapped);
+ 
+-	if (!map)
+-		return;
++	if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
++		notify_remote_via_evtchn(map->notify.event);
++	}
 +
-+	spin_lock(&dev_data->lock);
++	if (map->pages) {
++		if (!use_ptemod)
++			unmap_grant_pages(map, 0, map->count);
+ 
+-	if (map->pages)
+ 		for (i = 0; i < map->count; i++) {
+-			if (map->pages[i])
++			uint32_t check, *tmp;
++			if (!map->pages[i])
++				continue;
++			/* XXX When unmapping in an HVM domain, Xen will
++			 * sometimes end up mapping the GFN to an invalid MFN.
++			 * In this case, writes will be discarded and reads will
++			 * return all 0xFF bytes.  Leak these unusable GFNs
++			 * until Xen supports fixing their p2m mapping.
++			 *
++			 * Confirmed present in Xen 4.1-RC3 with HVM source
++			 */
++			tmp = kmap(map->pages[i]);
++			*tmp = 0xdeaddead;
++			mb();
++			check = *tmp;
++			kunmap(map->pages[i]);
++			if (check == 0xdeaddead)
+ 				__free_page(map->pages[i]);
++			else
++				pr_debug("Discard page %d=%ld\n", i,
++					page_to_pfn(map->pages[i]));
+ 		}
++	}
+ 	kfree(map->pages);
+ 	kfree(map->grants);
+ 	kfree(map->map_ops);
+@@ -231,24 +240,39 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
+ {
+ 	struct grant_map *map = data;
+ 	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
++	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
+ 	u64 pte_maddr;
+ 
+ 	BUG_ON(pgnr >= map->count);
+ 	pte_maddr = arbitrary_virt_to_machine(pte).maddr;
+ 
+-	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr,
+-			  GNTMAP_contains_pte | map->flags,
++	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
+ 			  map->grants[pgnr].ref,
+ 			  map->grants[pgnr].domid);
+-	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr,
+-			    GNTMAP_contains_pte | map->flags,
+-			    0 /* handle */);
++	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
++			    -1 /* handle */);
+ 	return 0;
+ }
+ 
+ static int map_grant_pages(struct grant_map *map)
+ {
+ 	int i, err = 0;
++	phys_addr_t addr;
 +
-+	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
-+		/* Only publish this device as a root if none of its
-+		 * parent bridges are exported
-+		 */
-+		found = 0;
-+		dev = dev_entry->dev->bus->self;
-+		for (; !found && dev != NULL; dev = dev->bus->self) {
-+			list_for_each_entry(e, &dev_data->dev_list, list) {
-+				if (dev == e->dev) {
-+					found = 1;
-+					break;
-+				}
-+			}
++	if (!use_ptemod) {
++		/* Note: it could already be mapped */
++		if (map->map_ops[0].handle != -1)
++			return 0;
++		for (i = 0; i < map->count; i++) {
++			addr = (phys_addr_t)
++				pfn_to_kaddr(page_to_pfn(map->pages[i]));
++			gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
++				map->grants[i].ref,
++				map->grants[i].domid);
++			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
++				map->flags, -1 /* handle */);
 +		}
-+
-+		domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
-+		bus = (unsigned int)dev_entry->dev->bus->number;
-+
-+		if (!found) {
-+			err = publish_root_cb(pdev, domain, bus);
++	}
+ 
+ 	pr_debug("map %d+%d\n", map->index, map->count);
+ 	err = gnttab_map_refs(map->map_ops, map->pages, map->count);
+@@ -258,28 +282,81 @@ static int map_grant_pages(struct grant_map *map)
+ 	for (i = 0; i < map->count; i++) {
+ 		if (map->map_ops[i].status)
+ 			err = -EINVAL;
+-		map->unmap_ops[i].handle = map->map_ops[i].handle;
++		else {
++			BUG_ON(map->map_ops[i].handle == -1);
++			map->unmap_ops[i].handle = map->map_ops[i].handle;
++			pr_debug("map handle=%d\n", map->map_ops[i].handle);
++		}
+ 	}
+ 	return err;
+ }
+ 
+-static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
++static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
+ {
+ 	int i, err = 0;
+ 
+-	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
+-	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages);
++	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
++		int pgno = (map->notify.addr >> PAGE_SHIFT);
++		if (pgno >= offset && pgno < offset + pages && use_ptemod) {
++			void __user *tmp = (void __user *)
++				map->vma->vm_start + map->notify.addr;
++			err = copy_to_user(tmp, &err, 1);
 +			if (err)
-+				break;
++				return err;
++			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
++		} else if (pgno >= offset && pgno < offset + pages) {
++			uint8_t *tmp = kmap(map->pages[pgno]);
++			tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
++			kunmap(map->pages[pgno]);
++			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
 +		}
 +	}
 +
-+	spin_unlock(&dev_data->lock);
-+
-+	return err;
-+}
-+
-+void pciback_release_devices(struct pciback_device *pdev)
-+{
-+	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-+	struct pci_dev_entry *dev_entry, *t;
-+
-+	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
-+		list_del(&dev_entry->list);
-+		pcistub_put_pci_dev(dev_entry->dev);
-+		kfree(dev_entry);
-+	}
-+
-+	kfree(dev_data);
-+	pdev->pci_dev_data = NULL;
-+}
-+
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
-+		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
-+
-+{
-+	*domain = pci_domain_nr(pcidev->bus);
-+	*bus = pcidev->bus->number;
-+	*devfn = pcidev->devfn;
-+	return 1;
-+}
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-new file mode 100644
-index 0000000..c481a73
---- /dev/null
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -0,0 +1,1316 @@
-+/*
-+ * PCI Stub Driver - Grabs devices in backend to be exported later
-+ *
-+ * Ryan Wilson <hap9 at epoch.ncsc.mil>
-+ * Chris Bookholt <hap10 at epoch.ncsc.mil>
-+ */
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/rwsem.h>
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+#include <linux/kref.h>
-+#include <linux/pci.h>
-+#include <linux/wait.h>
-+#include <asm/atomic.h>
-+#include <xen/evtchn.h>
-+#include "pciback.h"
-+#include "conf_space.h"
-+#include "conf_space_quirks.h"
-+
-+static char *pci_devs_to_hide = NULL;
-+wait_queue_head_t aer_wait_queue;
-+/*Add sem for sync AER handling and pciback remove/reconfigue ops,
-+* We want to avoid in middle of AER ops, pciback devices is being removed
-+*/
-+static DECLARE_RWSEM(pcistub_sem);
-+module_param_named(hide, pci_devs_to_hide, charp, 0444);
-+
-+struct pcistub_device_id {
-+	struct list_head slot_list;
-+	int domain;
-+	unsigned char bus;
-+	unsigned int devfn;
-+};
-+static LIST_HEAD(pcistub_device_ids);
-+static DEFINE_SPINLOCK(device_ids_lock);
-+
-+struct pcistub_device {
-+	struct kref kref;
-+	struct list_head dev_list;
-+	spinlock_t lock;
-+
-+	struct pci_dev *dev;
-+	struct pciback_device *pdev;	/* non-NULL if struct pci_dev is in use */
-+};
-+
-+/* Access to pcistub_devices & seized_devices lists and the initialize_devices
-+ * flag must be locked with pcistub_devices_lock
-+ */
-+static DEFINE_SPINLOCK(pcistub_devices_lock);
-+static LIST_HEAD(pcistub_devices);
-+
-+/* wait for device_initcall before initializing our devices
-+ * (see pcistub_init_devices_late)
-+ */
-+static int initialize_devices = 0;
-+static LIST_HEAD(seized_devices);
-+
-+static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
-+{
-+	struct pcistub_device *psdev;
-+
-+	dev_dbg(&dev->dev, "pcistub_device_alloc\n");
-+
-+	psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
-+	if (!psdev)
-+		return NULL;
-+
-+	psdev->dev = pci_dev_get(dev);
-+	if (!psdev->dev) {
-+		kfree(psdev);
-+		return NULL;
-+	}
-+
-+	kref_init(&psdev->kref);
-+	spin_lock_init(&psdev->lock);
-+
-+	return psdev;
-+}
-+
-+/* Don't call this directly as it's called by pcistub_device_put */
-+static void pcistub_device_release(struct kref *kref)
-+{
-+	struct pcistub_device *psdev;
-+
-+	psdev = container_of(kref, struct pcistub_device, kref);
-+
-+	dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
-+
-+	/* Clean-up the device */
-+	pciback_reset_device(psdev->dev);
-+	pciback_config_free_dyn_fields(psdev->dev);
-+	pciback_config_free_dev(psdev->dev);
-+	kfree(pci_get_drvdata(psdev->dev));
-+	pci_set_drvdata(psdev->dev, NULL);
-+
-+	pci_dev_put(psdev->dev);
-+
-+	kfree(psdev);
-+}
-+
-+static inline void pcistub_device_get(struct pcistub_device *psdev)
-+{
-+	kref_get(&psdev->kref);
-+}
-+
-+static inline void pcistub_device_put(struct pcistub_device *psdev)
-+{
-+	kref_put(&psdev->kref, pcistub_device_release);
-+}
-+
-+static struct pcistub_device *pcistub_device_find(int domain, int bus,
-+						  int slot, int func)
++	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages);
+ 	if (err)
+ 		return err;
+ 
+ 	for (i = 0; i < pages; i++) {
+ 		if (map->unmap_ops[offset+i].status)
+ 			err = -EINVAL;
+-		map->unmap_ops[offset+i].handle = 0;
++		pr_debug("unmap handle=%d st=%d\n",
++			map->unmap_ops[offset+i].handle,
++			map->unmap_ops[offset+i].status);
++		map->unmap_ops[offset+i].handle = -1;
+ 	}
+ 	return err;
+ }
+ 
++static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
 +{
-+	struct pcistub_device *psdev = NULL;
-+	unsigned long flags;
++	int range, err = 0;
 +
-+	spin_lock_irqsave(&pcistub_devices_lock, flags);
++	pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
 +
-+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+		if (psdev->dev != NULL
-+		    && domain == pci_domain_nr(psdev->dev->bus)
-+		    && bus == psdev->dev->bus->number
-+		    && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
-+			pcistub_device_get(psdev);
-+			goto out;
++	/* It is possible the requested range will have a "hole" where we
++	 * already unmapped some of the grants. Only unmap valid ranges.
++	 */
++	while (pages && !err) {
++		while (pages && map->unmap_ops[offset].handle == -1) {
++			offset++;
++			pages--;
++		}
++		range = 0;
++		while (range < pages) {
++			if (map->unmap_ops[offset+range].handle == -1) {
++				range--;
++				break;
++			}
++			range++;
 +		}
++		err = __unmap_grant_pages(map, offset, range);
++		offset += range;
++		pages -= range;
 +	}
 +
-+	/* didn't find it */
-+	psdev = NULL;
-+
-+      out:
-+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+	return psdev;
++	return err;
 +}
 +
-+static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev,
-+						  struct pcistub_device *psdev)
-+{
-+	struct pci_dev *pci_dev = NULL;
-+	unsigned long flags;
-+
-+	pcistub_device_get(psdev);
+ /* ------------------------------------------------------------------ */
+ 
+ static void gntdev_vma_close(struct vm_area_struct *vma)
+@@ -287,22 +364,13 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
+ 	struct grant_map *map = vma->vm_private_data;
+ 
+ 	pr_debug("close %p\n", vma);
+-	map->is_mapped = 0;
+ 	map->vma = NULL;
+ 	vma->vm_private_data = NULL;
+-}
+-
+-static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+-{
+-	pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n",
+-			vmf->virtual_address, vmf->pgoff);
+-	vmf->flags = VM_FAULT_ERROR;
+-	return 0;
++	gntdev_put_map(map);
+ }
+ 
+ static struct vm_operations_struct gntdev_vmops = {
+ 	.close = gntdev_vma_close,
+-	.fault = gntdev_vma_fault,
+ };
+ 
+ /* ------------------------------------------------------------------ */
+@@ -320,8 +388,6 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
+ 	list_for_each_entry(map, &priv->maps, next) {
+ 		if (!map->vma)
+ 			continue;
+-		if (!map->is_mapped)
+-			continue;
+ 		if (map->vma->vm_start >= end)
+ 			continue;
+ 		if (map->vma->vm_end <= start)
+@@ -386,16 +452,17 @@ static int gntdev_open(struct inode *inode, struct file *flip)
+ 
+ 	INIT_LIST_HEAD(&priv->maps);
+ 	spin_lock_init(&priv->lock);
+-	priv->limit = limit;
+ 
+-	priv->mm = get_task_mm(current);
+-	if (!priv->mm) {
+-		kfree(priv);
+-		return -ENOMEM;
++	if (use_ptemod) {
++		priv->mm = get_task_mm(current);
++		if (!priv->mm) {
++			kfree(priv);
++			return -ENOMEM;
++		}
++		priv->mn.ops = &gntdev_mmu_ops;
++		ret = mmu_notifier_register(&priv->mn, priv->mm);
++		mmput(priv->mm);
+ 	}
+-	priv->mn.ops = &gntdev_mmu_ops;
+-	ret = mmu_notifier_register(&priv->mn, priv->mm);
+-	mmput(priv->mm);
+ 
+ 	if (ret) {
+ 		kfree(priv);
+@@ -412,21 +479,19 @@ static int gntdev_release(struct inode *inode, struct file *flip)
+ {
+ 	struct gntdev_priv *priv = flip->private_data;
+ 	struct grant_map *map;
+-	int err;
+ 
+ 	pr_debug("priv %p\n", priv);
+ 
+ 	spin_lock(&priv->lock);
+ 	while (!list_empty(&priv->maps)) {
+ 		map = list_entry(priv->maps.next, struct grant_map, next);
+-		err = gntdev_del_map(map);
+-		if (WARN_ON(err))
+-			gntdev_free_map(map);
+-
++		list_del(&map->next);
++		gntdev_put_map(map);
+ 	}
+ 	spin_unlock(&priv->lock);
+ 
+-	mmu_notifier_unregister(&priv->mn, priv->mm);
++	if (use_ptemod)
++		mmu_notifier_unregister(&priv->mn, priv->mm);
+ 	kfree(priv);
+ 	return 0;
+ }
+@@ -443,16 +508,21 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
+ 	pr_debug("priv %p, add %d\n", priv, op.count);
+ 	if (unlikely(op.count <= 0))
+ 		return -EINVAL;
+-	if (unlikely(op.count > priv->limit))
+-		return -EINVAL;
+ 
+ 	err = -ENOMEM;
+ 	map = gntdev_alloc_map(priv, op.count);
+ 	if (!map)
+ 		return err;
 +
-+	spin_lock_irqsave(&psdev->lock, flags);
-+	if (!psdev->pdev) {
-+		psdev->pdev = pdev;
-+		pci_dev = psdev->dev;
++	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
++		pr_debug("can't map: over limit\n");
++		gntdev_put_map(map);
++		return err;
 +	}
-+	spin_unlock_irqrestore(&psdev->lock, flags);
-+
-+	if (!pci_dev)
-+		pcistub_device_put(psdev);
-+
-+	return pci_dev;
-+}
-+
-+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
-+					    int domain, int bus,
-+					    int slot, int func)
-+{
-+	struct pcistub_device *psdev;
-+	struct pci_dev *found_dev = NULL;
-+	unsigned long flags;
 +
-+	spin_lock_irqsave(&pcistub_devices_lock, flags);
+ 	if (copy_from_user(map->grants, &u->refs,
+ 			   sizeof(map->grants[0]) * op.count) != 0) {
+-		gntdev_free_map(map);
++		gntdev_put_map(map);
+ 		return err;
+ 	}
+ 
+@@ -461,13 +531,9 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
+ 	op.index = map->index << PAGE_SHIFT;
+ 	spin_unlock(&priv->lock);
+ 
+-	if (copy_to_user(u, &op, sizeof(op)) != 0) {
+-		spin_lock(&priv->lock);
+-		gntdev_del_map(map);
+-		spin_unlock(&priv->lock);
+-		gntdev_free_map(map);
+-		return err;
+-	}
++	if (copy_to_user(u, &op, sizeof(op)) != 0)
++		return -EFAULT;
 +
-+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+		if (psdev->dev != NULL
-+		    && domain == pci_domain_nr(psdev->dev->bus)
-+		    && bus == psdev->dev->bus->number
-+		    && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
-+			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
-+			break;
-+		}
+ 	return 0;
+ }
+ 
+@@ -484,11 +550,12 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
+ 
+ 	spin_lock(&priv->lock);
+ 	map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
+-	if (map)
+-		err = gntdev_del_map(map);
++	if (map) {
++		list_del(&map->next);
++		gntdev_put_map(map);
++		err = 0;
 +	}
+ 	spin_unlock(&priv->lock);
+-	if (!err)
+-		gntdev_free_map(map);
+ 	return err;
+ }
+ 
+@@ -496,43 +563,66 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
+ 					      struct ioctl_gntdev_get_offset_for_vaddr __user *u)
+ {
+ 	struct ioctl_gntdev_get_offset_for_vaddr op;
++	struct vm_area_struct *vma;
+ 	struct grant_map *map;
+ 
+ 	if (copy_from_user(&op, u, sizeof(op)) != 0)
+ 		return -EFAULT;
+ 	pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr);
+ 
+-	spin_lock(&priv->lock);
+-	map = gntdev_find_map_vaddr(priv, op.vaddr);
+-	if (map == NULL ||
+-	    map->vma->vm_start != op.vaddr) {
+-		spin_unlock(&priv->lock);
++	vma = find_vma(current->mm, op.vaddr);
++	if (!vma || vma->vm_ops != &gntdev_vmops)
+ 		return -EINVAL;
+-	}
 +
-+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+	return found_dev;
-+}
++	map = vma->vm_private_data;
++	if (!map)
++		return -EINVAL;
 +
-+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
-+				    struct pci_dev *dev)
-+{
-+	struct pcistub_device *psdev;
-+	struct pci_dev *found_dev = NULL;
-+	unsigned long flags;
+ 	op.offset = map->index << PAGE_SHIFT;
+ 	op.count = map->count;
+-	spin_unlock(&priv->lock);
+ 
+ 	if (copy_to_user(u, &op, sizeof(op)) != 0)
+ 		return -EFAULT;
+ 	return 0;
+ }
+ 
+-static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv,
+-					struct ioctl_gntdev_set_max_grants __user *u)
++static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
+ {
+-	struct ioctl_gntdev_set_max_grants op;
++	struct ioctl_gntdev_unmap_notify op;
++	struct grant_map *map;
++	int rc;
+ 
+-	if (copy_from_user(&op, u, sizeof(op)) != 0)
++	if (copy_from_user(&op, u, sizeof(op)))
+ 		return -EFAULT;
+-	pr_debug("priv %p, limit %d\n", priv, op.count);
+-	if (op.count > limit)
+-		return -E2BIG;
 +
-+	spin_lock_irqsave(&pcistub_devices_lock, flags);
++	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
++		return -EINVAL;
+ 
+ 	spin_lock(&priv->lock);
+-	priv->limit = op.count;
 +
-+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+		if (psdev->dev == dev) {
-+			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
-+			break;
-+		}
++	list_for_each_entry(map, &priv->maps, next) {
++		uint64_t begin = map->index << PAGE_SHIFT;
++		uint64_t end = (map->index + map->count) << PAGE_SHIFT;
++		if (op.index >= begin && op.index < end)
++			goto found;
++	}
++	rc = -ENOENT;
++	goto unlock_out;
++
++ found:
++	if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
++			(map->flags & GNTMAP_readonly)) {
++		rc = -EINVAL;
++		goto unlock_out;
 +	}
 +
-+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+	return found_dev;
-+}
-+
-+void pcistub_put_pci_dev(struct pci_dev *dev)
-+{
-+	struct pcistub_device *psdev, *found_psdev = NULL;
-+	unsigned long flags;
++	map->notify.flags = op.action;
++	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
++	map->notify.event = op.event_channel_port;
++	rc = 0;
++ unlock_out:
+ 	spin_unlock(&priv->lock);
+-	return 0;
++	return rc;
+ }
+ 
+ static long gntdev_ioctl(struct file *flip,
+@@ -551,8 +641,8 @@ static long gntdev_ioctl(struct file *flip,
+ 	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
+ 		return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
+ 
+-	case IOCTL_GNTDEV_SET_MAX_GRANTS:
+-		return gntdev_ioctl_set_max_grants(priv, ptr);
++	case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
++		return gntdev_ioctl_notify(priv, ptr);
+ 
+ 	default:
+ 		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
+@@ -568,7 +658,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
+ 	int index = vma->vm_pgoff;
+ 	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ 	struct grant_map *map;
+-	int err = -EINVAL;
++	int i, err = -EINVAL;
+ 
+ 	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
+ 		return -EINVAL;
+@@ -580,47 +670,70 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
+ 	map = gntdev_find_map_index(priv, index, count);
+ 	if (!map)
+ 		goto unlock_out;
+-	if (map->vma)
++	if (use_ptemod && map->vma)
+ 		goto unlock_out;
+-	if (priv->mm != vma->vm_mm) {
++	if (use_ptemod && priv->mm != vma->vm_mm) {
+ 		printk(KERN_WARNING "Huh? Other mm?\n");
+ 		goto unlock_out;
+ 	}
+ 
++	atomic_inc(&map->users);
 +
-+	spin_lock_irqsave(&pcistub_devices_lock, flags);
+ 	vma->vm_ops = &gntdev_vmops;
+ 
+ 	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
+ 
+ 	vma->vm_private_data = map;
+-	map->vma = vma;
+ 
+-	map->flags = GNTMAP_host_map | GNTMAP_application_map;
+-	if (!(vma->vm_flags & VM_WRITE))
+-		map->flags |= GNTMAP_readonly;
++	if (use_ptemod)
++		map->vma = vma;
 +
-+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+		if (psdev->dev == dev) {
-+			found_psdev = psdev;
-+			break;
++	if (map->flags) {
++		if ((vma->vm_flags & VM_WRITE) &&
++				(map->flags & GNTMAP_readonly))
++			return -EINVAL;
++	} else {
++		map->flags = GNTMAP_host_map;
++		if (!(vma->vm_flags & VM_WRITE))
++			map->flags |= GNTMAP_readonly;
++	}
+ 
+ 	spin_unlock(&priv->lock);
+ 
+-	err = apply_to_page_range(vma->vm_mm, vma->vm_start,
+-				  vma->vm_end - vma->vm_start,
+-				  find_grant_ptes, map);
+-	if (err) {
+-		printk(KERN_WARNING "find_grant_ptes() failure.\n");
+-		return err;
++	if (use_ptemod) {
++		err = apply_to_page_range(vma->vm_mm, vma->vm_start,
++					  vma->vm_end - vma->vm_start,
++					  find_grant_ptes, map);
++		if (err) {
++			printk(KERN_WARNING "find_grant_ptes() failure.\n");
++			goto out_put_map;
++		}
+ 	}
+ 
+ 	err = map_grant_pages(map);
+-	if (err) {
+-		printk(KERN_WARNING "map_grant_pages() failure.\n");
+-		return err;
+-	}
++	if (err)
++		goto out_put_map;
+ 
+-	map->is_mapped = 1;
++	if (!use_ptemod) {
++		for (i = 0; i < count; i++) {
++			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
++				map->pages[i]);
++			if (err)
++				goto out_put_map;
 +		}
 +	}
+ 
+ 	return 0;
+ 
+ unlock_out:
+ 	spin_unlock(&priv->lock);
+ 	return err;
 +
-+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+
-+	/*hold this lock for avoiding breaking link between
-+	* pcistub and pciback when AER is in processing
-+	*/
-+	down_write(&pcistub_sem);
-+	/* Cleanup our device
-+	 * (so it's ready for the next domain)
-+	 */
-+	pciback_reset_device(found_psdev->dev);
-+	pciback_config_free_dyn_fields(found_psdev->dev);
-+	pciback_config_reset_dev(found_psdev->dev);
++out_put_map:
++	if (use_ptemod)
++		map->vma = NULL;
++	gntdev_put_map(map);
++	return err;
+ }
+ 
+ static const struct file_operations gntdev_fops = {
+@@ -646,6 +759,8 @@ static int __init gntdev_init(void)
+ 	if (!xen_domain())
+ 		return -ENODEV;
+ 
++	use_ptemod = xen_pv_domain();
 +
-+	spin_lock_irqsave(&found_psdev->lock, flags);
-+	found_psdev->pdev = NULL;
-+	spin_unlock_irqrestore(&found_psdev->lock, flags);
+ 	err = misc_register(&gntdev_miscdev);
+ 	if (err != 0) {
+ 		printk(KERN_ERR "Could not register gntdev device\n");
+diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
+index 9ef54eb..1a9bc2b 100644
+--- a/drivers/xen/grant-table.c
++++ b/drivers/xen/grant-table.c
+@@ -458,14 +458,19 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
+ 	if (ret)
+ 		return ret;
+ 
++	if (xen_feature(XENFEAT_auto_translated_physmap))
++		return ret;
 +
-+	pcistub_device_put(found_psdev);
-+	up_write(&pcistub_sem);
-+}
+ 	for (i = 0; i < count; i++) {
+-		/* m2p override only supported for GNTMAP_contains_pte mappings */
+-		if (!(map_ops[i].flags & GNTMAP_contains_pte))
+-			continue;
+-		pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
++		if (map_ops[i].flags & GNTMAP_contains_pte) {
++			pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
+ 				(map_ops[i].host_addr & ~PAGE_MASK));
+-		mfn = pte_mfn(*pte);
+-		ret = m2p_add_override(mfn, pages[i]);
++			mfn = pte_mfn(*pte);
++		} else {
++			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
++		}
++		ret = m2p_add_override(mfn, pages[i],
++				       map_ops[i].flags & GNTMAP_contains_pte);
+ 		if (ret)
+ 			return ret;
+ 	}
+@@ -483,8 +488,13 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
+ 	if (ret)
+ 		return ret;
+ 
++	if (xen_feature(XENFEAT_auto_translated_physmap))
++		return ret;
 +
-+static int __devinit pcistub_match_one(struct pci_dev *dev,
-+				       struct pcistub_device_id *pdev_id)
+ 	for (i = 0; i < count; i++) {
+-		ret = m2p_remove_override(pages[i]);
++		/* We do not have the means of checking if GNTMAP_contains_pte
++		 * is set. */
++		ret = m2p_remove_override(pages[i], true /* clear the PTE */);
+ 		if (ret)
+ 			return ret;
+ 	}
+diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
+index 2417727..ebb2928 100644
+--- a/drivers/xen/manage.c
++++ b/drivers/xen/manage.c
+@@ -34,42 +34,38 @@ enum shutdown_state {
+ /* Ignore multiple shutdown requests. */
+ static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
+ 
+-#ifdef CONFIG_PM_SLEEP
+-static int xen_hvm_suspend(void *data)
+-{
+-	int err;
+-	struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
+-	int *cancelled = data;
+-
+-	BUG_ON(!irqs_disabled());
+-
+-	err = sysdev_suspend(PMSG_SUSPEND);
+-	if (err) {
+-		printk(KERN_ERR "xen_hvm_suspend: sysdev_suspend failed: %d\n",
+-		       err);
+-		return err;
+-	}
+-
+-	*cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
++struct suspend_info {
++	int cancelled;
++	unsigned long arg; /* extra hypercall argument */
++	void (*pre)(void);
++	void (*post)(int cancelled);
++};
+ 
+-	xen_hvm_post_suspend(*cancelled);
++static void xen_hvm_post_suspend(int cancelled)
 +{
-+	/* Match the specified device by domain, bus, slot, func and also if
-+	 * any of the device's parent bridges match.
-+	 */
-+	for (; dev != NULL; dev = dev->bus->self) {
-+		if (pci_domain_nr(dev->bus) == pdev_id->domain
-+		    && dev->bus->number == pdev_id->bus
-+		    && dev->devfn == pdev_id->devfn)
-+			return 1;
-+
-+		/* Sometimes topmost bridge links to itself. */
-+		if (dev == dev->bus->self)
-+			break;
-+	}
-+
-+	return 0;
++	xen_arch_hvm_post_suspend(cancelled);
+ 	gnttab_resume();
 +}
-+
-+static int __devinit pcistub_match(struct pci_dev *dev)
+ 
+-	if (!*cancelled) {
+-		xen_irq_resume();
+-		xen_console_resume();
+-		xen_timer_resume();
+-	}
+-
+-	sysdev_resume();
++static void xen_pre_suspend(void)
 +{
-+	struct pcistub_device_id *pdev_id;
-+	unsigned long flags;
-+	int found = 0;
-+
-+	spin_lock_irqsave(&device_ids_lock, flags);
-+	list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
-+		if (pcistub_match_one(dev, pdev_id)) {
-+			found = 1;
-+			break;
-+		}
-+	}
-+	spin_unlock_irqrestore(&device_ids_lock, flags);
-+
-+	return found;
++	xen_mm_pin_all();
++	gnttab_suspend();
++	xen_arch_pre_suspend();
 +}
-+
-+static int __devinit pcistub_init_device(struct pci_dev *dev)
+ 
+-	return 0;
++static void xen_post_suspend(int cancelled)
 +{
-+	struct pciback_dev_data *dev_data;
-+	int err = 0;
-+
-+	dev_dbg(&dev->dev, "initializing...\n");
++	xen_arch_post_suspend(cancelled);
++	gnttab_resume();
++	xen_mm_unpin_all();
+ }
+ 
++#ifdef CONFIG_PM_SLEEP
+ static int xen_suspend(void *data)
+ {
++	struct suspend_info *si = data;
+ 	int err;
+-	int *cancelled = data;
+ 
+ 	BUG_ON(!irqs_disabled());
+ 
+@@ -80,22 +76,20 @@ static int xen_suspend(void *data)
+ 		return err;
+ 	}
+ 
+-	xen_mm_pin_all();
+-	gnttab_suspend();
+-	xen_pre_suspend();
++	if (si->pre)
++		si->pre();
+ 
+ 	/*
+ 	 * This hypercall returns 1 if suspend was cancelled
+ 	 * or the domain was merely checkpointed, and 0 if it
+ 	 * is resuming in a new domain.
+ 	 */
+-	*cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
++	si->cancelled = HYPERVISOR_suspend(si->arg);
+ 
+-	xen_post_suspend(*cancelled);
+-	gnttab_resume();
+-	xen_mm_unpin_all();
++	if (si->post)
++		si->post(si->cancelled);
+ 
+-	if (!*cancelled) {
++	if (!si->cancelled) {
+ 		xen_irq_resume();
+ 		xen_console_resume();
+ 		xen_timer_resume();
+@@ -109,7 +103,7 @@ static int xen_suspend(void *data)
+ static void do_suspend(void)
+ {
+ 	int err;
+-	int cancelled = 1;
++	struct suspend_info si;
+ 
+ 	shutting_down = SHUTDOWN_SUSPEND;
+ 
+@@ -139,20 +133,29 @@ static void do_suspend(void)
+ 		goto out_resume;
+ 	}
+ 
+-	if (xen_hvm_domain())
+-		err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0));
+-	else
+-		err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
++	si.cancelled = 1;
 +
-+	/* The PCI backend is not intended to be a module (or to work with
-+	 * removable PCI devices (yet). If it were, pciback_config_free()
-+	 * would need to be called somewhere to free the memory allocated
-+	 * here and then to call kfree(pci_get_drvdata(psdev->dev)).
-+	 */
-+	dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
-+	if (!dev_data) {
-+		err = -ENOMEM;
-+		goto out;
++	if (xen_hvm_domain()) {
++		si.arg = 0UL;
++		si.pre = NULL;
++		si.post = &xen_hvm_post_suspend;
++	} else {
++		si.arg = virt_to_mfn(xen_start_info);
++		si.pre = &xen_pre_suspend;
++		si.post = &xen_post_suspend;
 +	}
-+	pci_set_drvdata(dev, dev_data);
 +
-+	dev_dbg(&dev->dev, "initializing config\n");
++	err = stop_machine(xen_suspend, &si, cpumask_of(0));
+ 
+ 	dpm_resume_noirq(PMSG_RESUME);
+ 
+ 	if (err) {
+ 		printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
+-		cancelled = 1;
++		si.cancelled = 1;
+ 	}
+ 
+ out_resume:
+-	if (!cancelled) {
++	if (!si.cancelled) {
+ 		xen_arch_resume();
+ 		xs_resume();
+ 	} else
+@@ -172,12 +175,39 @@ out:
+ }
+ #endif	/* CONFIG_PM_SLEEP */
+ 
++struct shutdown_handler {
++	const char *command;
++	void (*cb)(void);
++};
 +
-+	init_waitqueue_head(&aer_wait_queue);
-+	err = pciback_config_init_dev(dev);
-+	if (err)
-+		goto out;
++static void do_poweroff(void)
++{
++	shutting_down = SHUTDOWN_POWEROFF;
++	orderly_poweroff(false);
++}
 +
-+	/* HACK: Force device (& ACPI) to determine what IRQ it's on - we
-+	 * must do this here because pcibios_enable_device may specify
-+	 * the pci device's true irq (and possibly its other resources)
-+	 * if they differ from what's in the configuration space.
-+	 * This makes the assumption that the device's resources won't
-+	 * change after this point (otherwise this code may break!)
-+	 */
-+	dev_dbg(&dev->dev, "enabling device\n");
-+	err = pci_enable_device(dev);
-+	if (err)
-+		goto config_release;
++static void do_reboot(void)
++{
++	shutting_down = SHUTDOWN_POWEROFF; /* ? */
++	ctrl_alt_del();
++}
 +
-+	/* Now disable the device (this also ensures some private device
-+	 * data is setup before we export)
-+	 */
-+	dev_dbg(&dev->dev, "reset device\n");
-+	pciback_reset_device(dev);
+ static void shutdown_handler(struct xenbus_watch *watch,
+ 			     const char **vec, unsigned int len)
+ {
+ 	char *str;
+ 	struct xenbus_transaction xbt;
+ 	int err;
++	static struct shutdown_handler handlers[] = {
++		{ "poweroff",	do_poweroff },
++		{ "halt",	do_poweroff },
++		{ "reboot",	do_reboot   },
++#ifdef CONFIG_PM_SLEEP
++		{ "suspend",	do_suspend  },
++#endif
++		{NULL, NULL},
++	};
++	static struct shutdown_handler *handler;
+ 
+ 	if (shutting_down != SHUTDOWN_INVALID)
+ 		return;
+@@ -194,7 +224,14 @@ static void shutdown_handler(struct xenbus_watch *watch,
+ 		return;
+ 	}
+ 
+-	xenbus_write(xbt, "control", "shutdown", "");
++	for (handler = &handlers[0]; handler->command; handler++) {
++		if (strcmp(str, handler->command) == 0)
++			break;
++	}
 +
-+	return 0;
++	/* Only acknowledge commands which we are prepared to handle. */
++	if (handler->cb)
++		xenbus_write(xbt, "control", "shutdown", "");
+ 
+ 	err = xenbus_transaction_end(xbt, 0);
+ 	if (err == -EAGAIN) {
+@@ -202,17 +239,8 @@ static void shutdown_handler(struct xenbus_watch *watch,
+ 		goto again;
+ 	}
+ 
+-	if (strcmp(str, "poweroff") == 0 ||
+-	    strcmp(str, "halt") == 0) {
+-		shutting_down = SHUTDOWN_POWEROFF;
+-		orderly_poweroff(false);
+-	} else if (strcmp(str, "reboot") == 0) {
+-		shutting_down = SHUTDOWN_POWEROFF; /* ? */
+-		ctrl_alt_del();
+-#ifdef CONFIG_PM_SLEEP
+-	} else if (strcmp(str, "suspend") == 0) {
+-		do_suspend();
+-#endif
++	if (handler->cb) {
++		handler->cb();
+ 	} else {
+ 		printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
+ 		shutting_down = SHUTDOWN_INVALID;
+@@ -291,27 +319,18 @@ static int shutdown_event(struct notifier_block *notifier,
+ 	return NOTIFY_DONE;
+ }
+ 
+-static int __init __setup_shutdown_event(void)
+-{
+-	/* Delay initialization in the PV on HVM case */
+-	if (xen_hvm_domain())
+-		return 0;
+-
+-	if (!xen_pv_domain())
+-		return -ENODEV;
+-
+-	return xen_setup_shutdown_event();
+-}
+-
+ int xen_setup_shutdown_event(void)
+ {
+ 	static struct notifier_block xenstore_notifier = {
+ 		.notifier_call = shutdown_event
+ 	};
 +
-+      config_release:
-+	pciback_config_free_dev(dev);
++	if (!xen_domain())
++		return -ENODEV;
+ 	register_xenstore_notifier(&xenstore_notifier);
+ 
+ 	return 0;
+ }
+ EXPORT_SYMBOL_GPL(xen_setup_shutdown_event);
+ 
+-subsys_initcall(__setup_shutdown_event);
++subsys_initcall(xen_setup_shutdown_event);
+diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile
+new file mode 100644
+index 0000000..38bc123
+--- /dev/null
++++ b/drivers/xen/pciback/Makefile
+@@ -0,0 +1,17 @@
++obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
 +
-+      out:
-+	pci_set_drvdata(dev, NULL);
-+	kfree(dev_data);
-+	return err;
-+}
++xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
++xen-pciback-y += conf_space.o conf_space_header.o \
++		 conf_space_capability.o \
++		 conf_space_capability_vpd.o \
++		 conf_space_capability_pm.o \
++		 conf_space_quirks.o
++xen-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
++xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
 +
++ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
++EXTRA_CFLAGS += -DDEBUG
++endif
+diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
+new file mode 100644
+index 0000000..eb6bba0
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space.c
+@@ -0,0 +1,435 @@
 +/*
-+ * Because some initialization still happens on
-+ * devices during fs_initcall, we need to defer
-+ * full initialization of our devices until
-+ * device_initcall.
++ * PCI Backend - Functions for creating a virtual configuration space for
++ *               exported PCI Devices.
++ *               It's dangerous to allow PCI Driver Domains to change their
++ *               device's resources (memory, i/o ports, interrupts). We need to
++ *               restrict changes to certain PCI Configuration registers:
++ *               BARs, INTERRUPT_PIN, most registers in the header...
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
 + */
-+static int __init pcistub_init_devices_late(void)
-+{
-+	struct pcistub_device *psdev;
-+	unsigned long flags;
-+	int err = 0;
-+
-+	pr_debug("pciback: pcistub_init_devices_late\n");
 +
-+	spin_lock_irqsave(&pcistub_devices_lock, flags);
-+
-+	while (!list_empty(&seized_devices)) {
-+		psdev = container_of(seized_devices.next,
-+				     struct pcistub_device, dev_list);
-+		list_del(&psdev->dev_list);
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_quirks.h"
 +
-+		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++static int permissive;
++module_param(permissive, bool, 0644);
 +
-+		err = pcistub_init_device(psdev->dev);
-+		if (err) {
-+			dev_err(&psdev->dev->dev,
-+				"error %d initializing device\n", err);
-+			kfree(psdev);
-+			psdev = NULL;
-+		}
++#define DEFINE_PCI_CONFIG(op, size, type)			\
++int pciback_##op##_config_##size				\
++(struct pci_dev *dev, int offset, type value, void *data)	\
++{								\
++	return pci_##op##_config_##size(dev, offset, value);	\
++}
 +
-+		spin_lock_irqsave(&pcistub_devices_lock, flags);
++DEFINE_PCI_CONFIG(read, byte, u8 *)
++DEFINE_PCI_CONFIG(read, word, u16 *)
++DEFINE_PCI_CONFIG(read, dword, u32 *)
 +
-+		if (psdev)
-+			list_add_tail(&psdev->dev_list, &pcistub_devices);
-+	}
++DEFINE_PCI_CONFIG(write, byte, u8)
++DEFINE_PCI_CONFIG(write, word, u16)
++DEFINE_PCI_CONFIG(write, dword, u32)
 +
-+	initialize_devices = 1;
++static int conf_space_read(struct pci_dev *dev,
++			   const struct config_field_entry *entry,
++			   int offset, u32 *value)
++{
++	int ret = 0;
++	const struct config_field *field = entry->field;
 +
-+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	*value = 0;
 +
-+	return 0;
++	switch (field->size) {
++	case 1:
++		if (field->u.b.read)
++			ret = field->u.b.read(dev, offset, (u8 *) value,
++					      entry->data);
++		break;
++	case 2:
++		if (field->u.w.read)
++			ret = field->u.w.read(dev, offset, (u16 *) value,
++					      entry->data);
++		break;
++	case 4:
++		if (field->u.dw.read)
++			ret = field->u.dw.read(dev, offset, value, entry->data);
++		break;
++	}
++	return ret;
 +}
 +
-+static int __devinit pcistub_seize(struct pci_dev *dev)
++static int conf_space_write(struct pci_dev *dev,
++			    const struct config_field_entry *entry,
++			    int offset, u32 value)
 +{
-+	struct pcistub_device *psdev;
-+	unsigned long flags;
-+	int err = 0;
-+
-+	psdev = pcistub_device_alloc(dev);
-+	if (!psdev)
-+		return -ENOMEM;
-+
-+	spin_lock_irqsave(&pcistub_devices_lock, flags);
++	int ret = 0;
++	const struct config_field *field = entry->field;
 +
-+	if (initialize_devices) {
-+		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	switch (field->size) {
++	case 1:
++		if (field->u.b.write)
++			ret = field->u.b.write(dev, offset, (u8) value,
++					       entry->data);
++		break;
++	case 2:
++		if (field->u.w.write)
++			ret = field->u.w.write(dev, offset, (u16) value,
++					       entry->data);
++		break;
++	case 4:
++		if (field->u.dw.write)
++			ret = field->u.dw.write(dev, offset, value,
++						entry->data);
++		break;
++	}
++	return ret;
++}
 +
-+		/* don't want irqs disabled when calling pcistub_init_device */
-+		err = pcistub_init_device(psdev->dev);
++static inline u32 get_mask(int size)
++{
++	if (size == 1)
++		return 0xff;
++	else if (size == 2)
++		return 0xffff;
++	else
++		return 0xffffffff;
++}
 +
-+		spin_lock_irqsave(&pcistub_devices_lock, flags);
++static inline int valid_request(int offset, int size)
++{
++	/* Validate request (no un-aligned requests) */
++	if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
++		return 1;
++	return 0;
++}
 +
-+		if (!err)
-+			list_add(&psdev->dev_list, &pcistub_devices);
++static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
++			      int offset)
++{
++	if (offset >= 0) {
++		new_val_mask <<= (offset * 8);
++		new_val <<= (offset * 8);
 +	} else {
-+		dev_dbg(&dev->dev, "deferring initialization\n");
-+		list_add(&psdev->dev_list, &seized_devices);
++		new_val_mask >>= (offset * -8);
++		new_val >>= (offset * -8);
 +	}
++	val = (val & ~new_val_mask) | (new_val & new_val_mask);
 +
-+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+
-+	if (err)
-+		pcistub_device_put(psdev);
-+
-+	return err;
++	return val;
 +}
 +
-+static int __devinit pcistub_probe(struct pci_dev *dev,
-+				   const struct pci_device_id *id)
++static int pcibios_err_to_errno(int err)
 +{
-+	int err = 0;
-+
-+	dev_dbg(&dev->dev, "probing...\n");
-+
-+	if (pcistub_match(dev)) {
-+
-+		if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
-+		    && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
-+			dev_err(&dev->dev, "can't export pci devices that "
-+				"don't have a normal (0) or bridge (1) "
-+				"header type!\n");
-+			err = -ENODEV;
-+			goto out;
-+		}
-+
-+		dev_info(&dev->dev, "seizing device\n");
-+		err = pcistub_seize(dev);
-+#ifdef CONFIG_PCI_GUESTDEV
-+	} else if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
-+		if (!pci_is_guestdev(dev)) {
-+			err = -ENODEV;
-+			goto out;
-+		}
-+
-+		dev_info(&dev->dev, "seizing device\n");
-+		err = pcistub_seize(dev);
-+#endif /* CONFIG_PCI_GUESTDEV */
-+	} else
-+		/* Didn't find the device */
-+		err = -ENODEV;
-+
-+      out:
++	switch (err) {
++	case PCIBIOS_SUCCESSFUL:
++		return XEN_PCI_ERR_success;
++	case PCIBIOS_DEVICE_NOT_FOUND:
++		return XEN_PCI_ERR_dev_not_found;
++	case PCIBIOS_BAD_REGISTER_NUMBER:
++		return XEN_PCI_ERR_invalid_offset;
++	case PCIBIOS_FUNC_NOT_SUPPORTED:
++		return XEN_PCI_ERR_not_implemented;
++	case PCIBIOS_SET_FAILED:
++		return XEN_PCI_ERR_access_denied;
++	}
 +	return err;
 +}
 +
-+static void pcistub_remove(struct pci_dev *dev)
++int pciback_config_read(struct pci_dev *dev, int offset, int size,
++			u32 *ret_val)
 +{
-+	struct pcistub_device *psdev, *found_psdev = NULL;
-+	unsigned long flags;
-+
-+	dev_dbg(&dev->dev, "removing\n");
++	int err = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	const struct config_field_entry *cfg_entry;
++	const struct config_field *field;
++	int req_start, req_end, field_start, field_end;
++	/* if read fails for any reason, return 0
++	 * (as if device didn't respond) */
++	u32 value = 0, tmp_val;
 +
-+	spin_lock_irqsave(&pcistub_devices_lock, flags);
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n",
++		       pci_name(dev), size, offset);
 +
-+	pciback_config_quirk_release(dev);
++	if (!valid_request(offset, size)) {
++		err = XEN_PCI_ERR_invalid_offset;
++		goto out;
++	}
 +
-+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+		if (psdev->dev == dev) {
-+			found_psdev = psdev;
-+			break;
-+		}
++	/* Get the real value first, then modify as appropriate */
++	switch (size) {
++	case 1:
++		err = pci_read_config_byte(dev, offset, (u8 *) &value);
++		break;
++	case 2:
++		err = pci_read_config_word(dev, offset, (u16 *) &value);
++		break;
++	case 4:
++		err = pci_read_config_dword(dev, offset, &value);
++		break;
 +	}
 +
-+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++		field = cfg_entry->field;
 +
-+	if (found_psdev) {
-+		dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
-+			found_psdev->pdev);
++		req_start = offset;
++		req_end = offset + size;
++		field_start = OFFSET(cfg_entry);
++		field_end = OFFSET(cfg_entry) + field->size;
 +
-+		if (found_psdev->pdev) {
-+			printk(KERN_WARNING "pciback: ****** removing device "
-+			       "%s while still in-use! ******\n",
-+			       pci_name(found_psdev->dev));
-+			printk(KERN_WARNING "pciback: ****** driver domain may "
-+			       "still access this device's i/o resources!\n");
-+			printk(KERN_WARNING "pciback: ****** shutdown driver "
-+			       "domain before binding device\n");
-+			printk(KERN_WARNING "pciback: ****** to other drivers "
-+			       "or domains\n");
++		if ((req_start >= field_start && req_start < field_end)
++		    || (req_end > field_start && req_end <= field_end)) {
++			err = conf_space_read(dev, cfg_entry, field_start,
++					      &tmp_val);
++			if (err)
++				goto out;
 +
-+			pciback_release_pci_dev(found_psdev->pdev,
-+						found_psdev->dev);
++			value = merge_value(value, tmp_val,
++					    get_mask(field->size),
++					    field_start - req_start);
 +		}
++	}
 +
-+		spin_lock_irqsave(&pcistub_devices_lock, flags);
-+		list_del(&found_psdev->dev_list);
-+		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++out:
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
++		       pci_name(dev), size, offset, value);
 +
-+		/* the final put for releasing from the list */
-+		pcistub_device_put(found_psdev);
-+	}
++	*ret_val = value;
++	return pcibios_err_to_errno(err);
 +}
 +
-+static const struct pci_device_id pcistub_ids[] = {
-+	{
-+	 .vendor = PCI_ANY_ID,
-+	 .device = PCI_ANY_ID,
-+	 .subvendor = PCI_ANY_ID,
-+	 .subdevice = PCI_ANY_ID,
-+	 },
-+	{0,},
-+};
-+
-+static void kill_domain_by_device(struct pcistub_device *psdev)
++int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
 +{
-+	struct xenbus_transaction xbt;
-+	int err;
-+	char nodename[1024];
++	int err = 0, handled = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	const struct config_field_entry *cfg_entry;
++	const struct config_field *field;
++	u32 tmp_val;
++	int req_start, req_end, field_start, field_end;
 +
-+	if (!psdev) 
-+		dev_err(&psdev->dev->dev,
-+			"device is NULL when do AER recovery/kill_domain\n");
-+	sprintf(nodename, "/local/domain/0/backend/pci/%d/0", 
-+		psdev->pdev->xdev->otherend_id);
-+	nodename[strlen(nodename)] = '\0';
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG
++		       "pciback: %s: write request %d bytes at 0x%x = %x\n",
++		       pci_name(dev), size, offset, value);
 +
-+again:
-+	err = xenbus_transaction_start(&xbt);
-+	if (err)
-+	{
-+		dev_err(&psdev->dev->dev,
-+			"error %d when start xenbus transaction\n", err);
-+		return;
-+	}
-+	/*PV AER handlers will set this flag*/
-+	xenbus_printf(xbt, nodename, "aerState" , "aerfail" );
-+	err = xenbus_transaction_end(xbt, 0);
-+	if (err)
-+	{
-+		if (err == -EAGAIN)
-+			goto again;
-+		dev_err(&psdev->dev->dev,
-+			"error %d when end xenbus transaction\n", err);
-+		return;
-+	}
-+}
++	if (!valid_request(offset, size))
++		return XEN_PCI_ERR_invalid_offset;
 +
-+/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
-+ * backend need to have cooperation. In pciback, those steps will do similar
-+ * jobs: send service request and waiting for front_end response. 
-+*/
-+static pci_ers_result_t common_process(struct pcistub_device *psdev, 
-+		pci_channel_state_t state, int aer_cmd, pci_ers_result_t result)
-+{
-+	pci_ers_result_t res = result;
-+	struct xen_pcie_aer_op *aer_op;
-+	int ret;
++	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++		field = cfg_entry->field;
 +
-+	/*with PV AER drivers*/
-+	aer_op = &(psdev->pdev->sh_info->aer_op);
-+	aer_op->cmd = aer_cmd ;
-+	/*useful for error_detected callback*/
-+	aer_op->err = state;
-+	/*pcifront_end BDF*/
-+	ret = pciback_get_pcifront_dev(psdev->dev, psdev->pdev,
-+		&aer_op->domain, &aer_op->bus, &aer_op->devfn);
-+	if (!ret) {
-+		dev_err(&psdev->dev->dev,
-+			"pciback: failed to get pcifront device\n");
-+		return PCI_ERS_RESULT_NONE; 
-+	}
-+	wmb();
++		req_start = offset;
++		req_end = offset + size;
++		field_start = OFFSET(cfg_entry);
++		field_end = OFFSET(cfg_entry) + field->size;
 +
-+	dev_dbg(&psdev->dev->dev, 
-+			"pciback: aer_op %x dom %x bus %x devfn %x\n",  
-+			aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
-+	/*local flag to mark there's aer request, pciback callback will use this
-+	* flag to judge whether we need to check pci-front give aer service
-+	* ack signal
-+	*/
-+	set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
++		if ((req_start >= field_start && req_start < field_end)
++		    || (req_end > field_start && req_end <= field_end)) {
++			tmp_val = 0;
 +
-+	/*It is possible that a pcifront conf_read_write ops request invokes
-+	* the callback which cause the spurious execution of wake_up. 
-+	* Yet it is harmless and better than a spinlock here
-+	*/
-+	set_bit(_XEN_PCIB_active, 
-+		(unsigned long *)&psdev->pdev->sh_info->flags);
-+	wmb();
-+	notify_remote_via_irq(psdev->pdev->evtchn_irq);
++			err = pciback_config_read(dev, field_start,
++						  field->size, &tmp_val);
++			if (err)
++				break;
 +
-+	ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active,
-+                (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
++			tmp_val = merge_value(tmp_val, value, get_mask(size),
++					      req_start - field_start);
 +
-+	if (!ret) {
-+		if (test_bit(_XEN_PCIB_active, 
-+			(unsigned long *)&psdev->pdev->sh_info->flags)) {
-+			dev_err(&psdev->dev->dev, 
-+				"pcifront aer process not responding!\n");
-+			clear_bit(_XEN_PCIB_active,
-+			  (unsigned long *)&psdev->pdev->sh_info->flags);
-+			aer_op->err = PCI_ERS_RESULT_NONE;
-+			return res;
++			err = conf_space_write(dev, cfg_entry, field_start,
++					       tmp_val);
++
++			/* handled is set true here, but not every byte
++			 * may have been written! Properly detecting if
++			 * every byte is handled is unnecessary as the
++			 * flag is used to detect devices that need
++			 * special helpers to work correctly.
++			 */
++			handled = 1;
++		}
++	}
++
++	if (!handled && !err) {
++		/* By default, anything not specificially handled above is
++		 * read-only. The permissive flag changes this behavior so
++		 * that anything not specifically handled above is writable.
++		 * This means that some fields may still be read-only because
++		 * they have entries in the config_field list that intercept
++		 * the write and do nothing. */
++		if (dev_data->permissive || permissive) {
++			switch (size) {
++			case 1:
++				err = pci_write_config_byte(dev, offset,
++							    (u8) value);
++				break;
++			case 2:
++				err = pci_write_config_word(dev, offset,
++							    (u16) value);
++				break;
++			case 4:
++				err = pci_write_config_dword(dev, offset,
++							     (u32) value);
++				break;
++			}
++		} else if (!dev_data->warned_on_write) {
++			dev_data->warned_on_write = 1;
++			dev_warn(&dev->dev, "Driver tried to write to a "
++				 "read-only configuration space field at offset"
++				 " 0x%x, size %d. This may be harmless, but if "
++				 "you have problems with your device:\n"
++				 "1) see permissive attribute in sysfs\n"
++				 "2) report problems to the xen-devel "
++				 "mailing list along with details of your "
++				 "device obtained from lspci.\n", offset, size);
 +		}
 +	}
-+	clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
-+
-+	if ( test_bit( _XEN_PCIF_active,
-+		(unsigned long*)&psdev->pdev->sh_info->flags)) {
-+		dev_dbg(&psdev->dev->dev, 
-+			"schedule pci_conf service in pciback \n");
-+		test_and_schedule_op(psdev->pdev);
-+	}
 +
-+	res = (pci_ers_result_t)aer_op->err;
-+	return res;
-+} 
++	return pcibios_err_to_errno(err);
++}
 +
-+/*
-+* pciback_slot_reset: it will send the slot_reset request to  pcifront in case
-+* of the device driver could provide this service, and then wait for pcifront
-+* ack.
-+* @dev: pointer to PCI devices
-+* return value is used by aer_core do_recovery policy
-+*/
-+static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
++void pciback_config_free_dyn_fields(struct pci_dev *dev)
 +{
-+	struct pcistub_device *psdev;
-+	pci_ers_result_t result;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry, *t;
++	const struct config_field *field;
 +
-+	result = PCI_ERS_RESULT_RECOVERED;
-+	dev_dbg(&dev->dev, "pciback_slot_reset(bus:%x,devfn:%x)\n",
-+		dev->bus->number, dev->devfn);
++	dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
++			   "configuration space fields\n");
++	if (!dev_data)
++		return;
 +
-+	down_write(&pcistub_sem);
-+	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
-+				dev->bus->number,
-+				PCI_SLOT(dev->devfn),
-+				PCI_FUNC(dev->devfn));
++	list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
++		field = cfg_entry->field;
 +
-+	if ( !psdev || !psdev->pdev )
-+	{
-+		dev_err(&dev->dev, 
-+			"pciback device is not found/assigned\n");
-+		goto end;
-+	}
++		if (field->clean) {
++			field->clean((struct config_field *)field);
 +
-+	if ( !psdev->pdev->sh_info )
-+	{
-+		dev_err(&dev->dev, "pciback device is not connected or owned"
-+			" by HVM, kill it\n");
-+		kill_domain_by_device(psdev);
-+		goto release;
-+	}
++			kfree(cfg_entry->data);
 +
-+	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
-+		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
-+		dev_err(&dev->dev, 
-+			"guest with no AER driver should have been killed\n");
-+		goto release;
-+	}
-+	result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
++			list_del(&cfg_entry->list);
++			kfree(cfg_entry);
++		}
 +
-+	if (result == PCI_ERS_RESULT_NONE ||
-+		result == PCI_ERS_RESULT_DISCONNECT) {
-+		dev_dbg(&dev->dev, 
-+			"No AER slot_reset service or disconnected!\n");
-+		kill_domain_by_device(psdev);
 +	}
-+release:
-+	pcistub_device_put(psdev);
-+end:
-+	up_write(&pcistub_sem);
-+	return result;
-+
 +}
 +
++void pciback_config_reset_dev(struct pci_dev *dev)
++{
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	const struct config_field_entry *cfg_entry;
++	const struct config_field *field;
 +
-+/*pciback_mmio_enabled: it will send the mmio_enabled request to  pcifront 
-+* in case of the device driver could provide this service, and then wait 
-+* for pcifront ack.
-+* @dev: pointer to PCI devices
-+* return value is used by aer_core do_recovery policy
-+*/
++	dev_dbg(&dev->dev, "resetting virtual configuration space\n");
++	if (!dev_data)
++		return;
 +
-+static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
++	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++		field = cfg_entry->field;
++
++		if (field->reset)
++			field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
++	}
++}
++
++void pciback_config_free_dev(struct pci_dev *dev)
 +{
-+	struct pcistub_device *psdev;
-+	pci_ers_result_t result;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry, *t;
++	const struct config_field *field;
 +
-+	result = PCI_ERS_RESULT_RECOVERED;
-+	dev_dbg(&dev->dev, "pciback_mmio_enabled(bus:%x,devfn:%x)\n",
-+		dev->bus->number, dev->devfn);
++	dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
++	if (!dev_data)
++		return;
 +
-+	down_write(&pcistub_sem);
-+	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
-+				dev->bus->number,
-+				PCI_SLOT(dev->devfn),
-+				PCI_FUNC(dev->devfn));
++	list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
++		list_del(&cfg_entry->list);
 +
-+	if ( !psdev || !psdev->pdev )
-+	{
-+		dev_err(&dev->dev, 
-+			"pciback device is not found/assigned\n");
-+		goto end;
-+	}
++		field = cfg_entry->field;
 +
-+	if ( !psdev->pdev->sh_info )
-+	{
-+		dev_err(&dev->dev, "pciback device is not connected or owned"
-+			" by HVM, kill it\n");
-+		kill_domain_by_device(psdev);
-+		goto release;
-+	}
++		if (field->release)
++			field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
 +
-+	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
-+		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
-+		dev_err(&dev->dev, 
-+			"guest with no AER driver should have been killed\n");
-+		goto release;
++		kfree(cfg_entry);
 +	}
-+	result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
++}
 +
-+	if (result == PCI_ERS_RESULT_NONE ||
-+		result == PCI_ERS_RESULT_DISCONNECT) {
-+		dev_dbg(&dev->dev, 
-+			"No AER mmio_enabled service or disconnected!\n");
-+		kill_domain_by_device(psdev);
++int pciback_config_add_field_offset(struct pci_dev *dev,
++				    const struct config_field *field,
++				    unsigned int base_offset)
++{
++	int err = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry;
++	void *tmp;
++
++	cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
++	if (!cfg_entry) {
++		err = -ENOMEM;
++		goto out;
 +	}
-+release:
-+	pcistub_device_put(psdev);
-+end:
-+	up_write(&pcistub_sem);
-+	return result;
-+}
 +
-+/*pciback_error_detected: it will send the error_detected request to  pcifront 
-+* in case of the device driver could provide this service, and then wait 
-+* for pcifront ack.
-+* @dev: pointer to PCI devices
-+* @error: the current PCI connection state
-+* return value is used by aer_core do_recovery policy
-+*/
++	cfg_entry->data = NULL;
++	cfg_entry->field = field;
++	cfg_entry->base_offset = base_offset;
 +
-+static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
-+	pci_channel_state_t error)
-+{
-+	struct pcistub_device *psdev;
-+	pci_ers_result_t result;
++	/* silently ignore duplicate fields */
++	err = pciback_field_is_dup(dev, OFFSET(cfg_entry));
++	if (err)
++		goto out;
 +
-+	result = PCI_ERS_RESULT_CAN_RECOVER;
-+	dev_dbg(&dev->dev, "pciback_error_detected(bus:%x,devfn:%x)\n",
-+		dev->bus->number, dev->devfn);
++	if (field->init) {
++		tmp = field->init(dev, OFFSET(cfg_entry));
 +
-+	down_write(&pcistub_sem);
-+	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
-+				dev->bus->number,
-+				PCI_SLOT(dev->devfn),
-+				PCI_FUNC(dev->devfn));
++		if (IS_ERR(tmp)) {
++			err = PTR_ERR(tmp);
++			goto out;
++		}
 +
-+	if ( !psdev || !psdev->pdev )
-+	{
-+		dev_err(&dev->dev, 
-+			"pciback device is not found/assigned\n");
-+		goto end;
++		cfg_entry->data = tmp;
 +	}
 +
-+	if ( !psdev->pdev->sh_info )
-+	{
-+		dev_err(&dev->dev, "pciback device is not connected or owned"
-+			" by HVM, kill it\n");
-+		kill_domain_by_device(psdev);
-+		goto release;
-+	}
++	dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
++		OFFSET(cfg_entry));
++	list_add_tail(&cfg_entry->list, &dev_data->config_fields);
 +
-+	/*Guest owns the device yet no aer handler regiested, kill guest*/
-+	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
-+		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
-+		dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
-+		kill_domain_by_device(psdev);
-+		goto release;
-+	}
-+	result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
++out:
++	if (err)
++		kfree(cfg_entry);
 +
-+	if (result == PCI_ERS_RESULT_NONE ||
-+		result == PCI_ERS_RESULT_DISCONNECT) {
-+		dev_dbg(&dev->dev, 
-+			"No AER error_detected service or disconnected!\n");
-+		kill_domain_by_device(psdev);
-+	}
-+release:
-+	pcistub_device_put(psdev);
-+end:
-+	up_write(&pcistub_sem);
-+	return result;
++	return err;
 +}
 +
-+/*pciback_error_resume: it will send the error_resume request to  pcifront 
-+* in case of the device driver could provide this service, and then wait 
-+* for pcifront ack.
-+* @dev: pointer to PCI devices
-+*/
++/* This sets up the device's virtual configuration space to keep track of
++ * certain registers (like the base address registers (BARs) so that we can
++ * keep the client from manipulating them directly.
++ */
++int pciback_config_init_dev(struct pci_dev *dev)
++{
++	int err = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++
++	dev_dbg(&dev->dev, "initializing virtual configuration space\n");
++
++	INIT_LIST_HEAD(&dev_data->config_fields);
++
++	err = pciback_config_header_add_fields(dev);
++	if (err)
++		goto out;
++
++	err = pciback_config_capability_add_fields(dev);
++	if (err)
++		goto out;
 +
-+static void pciback_error_resume(struct pci_dev *dev)
-+{
-+	struct pcistub_device *psdev;
++	err = pciback_config_quirks_init(dev);
 +
-+	dev_dbg(&dev->dev, "pciback_error_resume(bus:%x,devfn:%x)\n",
-+		dev->bus->number, dev->devfn);
++out:
++	return err;
++}
 +
-+	down_write(&pcistub_sem);
-+	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
-+				dev->bus->number,
-+				PCI_SLOT(dev->devfn),
-+				PCI_FUNC(dev->devfn));
++int pciback_config_init(void)
++{
++	return pciback_config_capability_init();
++}
+diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h
+new file mode 100644
+index 0000000..50ebef2
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space.h
+@@ -0,0 +1,126 @@
++/*
++ * PCI Backend - Common data structures for overriding the configuration space
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
 +
-+	if ( !psdev || !psdev->pdev )
-+	{
-+		dev_err(&dev->dev, 
-+			"pciback device is not found/assigned\n");
-+		goto end;
-+	}
++#ifndef __XEN_PCIBACK_CONF_SPACE_H__
++#define __XEN_PCIBACK_CONF_SPACE_H__
 +
-+	if ( !psdev->pdev->sh_info )
-+	{
-+		dev_err(&dev->dev, "pciback device is not connected or owned"
-+			" by HVM, kill it\n");
-+		kill_domain_by_device(psdev);
-+		goto release;
-+	}
++#include <linux/list.h>
++#include <linux/err.h>
 +
-+	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
-+		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
-+		dev_err(&dev->dev, 
-+			"guest with no AER driver should have been killed\n");
-+		kill_domain_by_device(psdev);
-+		goto release;
-+	}
-+	common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED);
-+release:
-+	pcistub_device_put(psdev);
-+end:
-+	up_write(&pcistub_sem);
-+	return;
-+}
++/* conf_field_init can return an errno in a ptr with ERR_PTR() */
++typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
++typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
++typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
 +
-+/*add pciback AER handling*/
-+static struct pci_error_handlers pciback_error_handler = {
-+	.error_detected = pciback_error_detected,
-+	.mmio_enabled = pciback_mmio_enabled,
-+	.slot_reset = pciback_slot_reset,
-+	.resume = pciback_error_resume,
-+};
++typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
++				 void *data);
++typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
++				void *data);
++typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
++				void *data);
++typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
++				void *data);
++typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
++			       void *data);
++typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
++			       void *data);
 +
-+/*
-+ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
-+ * for a normal device. I don't want it to be loaded automatically.
++/* These are the fields within the configuration space which we
++ * are interested in intercepting reads/writes to and changing their
++ * values.
 + */
-+
-+static struct pci_driver pciback_pci_driver = {
-+	.name = "pciback",
-+	.id_table = pcistub_ids,
-+	.probe = pcistub_probe,
-+	.remove = pcistub_remove,
-+	.err_handler = &pciback_error_handler,
++struct config_field {
++	unsigned int offset;
++	unsigned int size;
++	unsigned int mask;
++	conf_field_init init;
++	conf_field_reset reset;
++	conf_field_free release;
++	void (*clean) (struct config_field *field);
++	union {
++		struct {
++			conf_dword_write write;
++			conf_dword_read read;
++		} dw;
++		struct {
++			conf_word_write write;
++			conf_word_read read;
++		} w;
++		struct {
++			conf_byte_write write;
++			conf_byte_read read;
++		} b;
++	} u;
++	struct list_head list;
 +};
 +
-+static inline int str_to_slot(const char *buf, int *domain, int *bus,
-+			      int *slot, int *func)
-+{
-+	int err;
++struct config_field_entry {
++	struct list_head list;
++	const struct config_field *field;
++	unsigned int base_offset;
++	void *data;
++};
 +
-+	err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
-+	if (err == 4)
-+		return 0;
-+	else if (err < 0)
-+		return -EINVAL;
++#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
 +
-+	/* try again without domain */
-+	*domain = 0;
-+	err = sscanf(buf, " %x:%x.%x", bus, slot, func);
-+	if (err == 3)
-+		return 0;
++/* Add fields to a device - the add_fields macro expects to get a pointer to
++ * the first entry in an array (of which the ending is marked by size==0)
++ */
++int pciback_config_add_field_offset(struct pci_dev *dev,
++				    const struct config_field *field,
++				    unsigned int offset);
 +
-+	return -EINVAL;
++static inline int pciback_config_add_field(struct pci_dev *dev,
++					   const struct config_field *field)
++{
++	return pciback_config_add_field_offset(dev, field, 0);
 +}
 +
-+static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
-+			       *slot, int *func, int *reg, int *size, int *mask)
++static inline int pciback_config_add_fields(struct pci_dev *dev,
++					    const struct config_field *field)
 +{
-+	int err;
-+
-+	err =
-+	    sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
-+		   func, reg, size, mask);
-+	if (err == 7)
-+		return 0;
-+	return -EINVAL;
++	int i, err = 0;
++	for (i = 0; field[i].size != 0; i++) {
++		err = pciback_config_add_field(dev, &field[i]);
++		if (err)
++			break;
++	}
++	return err;
 +}
 +
-+static int pcistub_device_id_add(int domain, int bus, int slot, int func)
++static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
++					const struct config_field *field,
++					unsigned int offset)
 +{
-+	struct pcistub_device_id *pci_dev_id;
-+	unsigned long flags;
-+
-+	pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
-+	if (!pci_dev_id)
-+		return -ENOMEM;
-+
-+	pci_dev_id->domain = domain;
-+	pci_dev_id->bus = bus;
-+	pci_dev_id->devfn = PCI_DEVFN(slot, func);
-+
-+	pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n",
-+		 domain, bus, slot, func);
++	int i, err = 0;
++	for (i = 0; field[i].size != 0; i++) {
++		err = pciback_config_add_field_offset(dev, &field[i], offset);
++		if (err)
++			break;
++	}
++	return err;
++}
 +
-+	spin_lock_irqsave(&device_ids_lock, flags);
-+	list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
-+	spin_unlock_irqrestore(&device_ids_lock, flags);
++/* Read/Write the real configuration space */
++int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
++			     void *data);
++int pciback_read_config_word(struct pci_dev *dev, int offset, u16 *value,
++			     void *data);
++int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
++			      void *data);
++int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
++			      void *data);
++int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value,
++			      void *data);
++int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value,
++			       void *data);
 +
-+	return 0;
-+}
++int pciback_config_capability_init(void);
 +
-+static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
-+{
-+	struct pcistub_device_id *pci_dev_id, *t;
-+	int devfn = PCI_DEVFN(slot, func);
-+	int err = -ENOENT;
-+	unsigned long flags;
++int pciback_config_header_add_fields(struct pci_dev *dev);
++int pciback_config_capability_add_fields(struct pci_dev *dev);
 +
-+	spin_lock_irqsave(&device_ids_lock, flags);
-+	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) {
++#endif				/* __XEN_PCIBACK_CONF_SPACE_H__ */
+diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c
+new file mode 100644
+index 0000000..0ea84d6
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability.c
+@@ -0,0 +1,66 @@
++/*
++ * PCI Backend - Handles the virtual fields found on the capability lists
++ *               in the configuration space.
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
 +
-+		if (pci_dev_id->domain == domain
-+		    && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
-+			/* Don't break; here because it's possible the same
-+			 * slot could be in the list more than once
-+			 */
-+			list_del(&pci_dev_id->slot_list);
-+			kfree(pci_dev_id);
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_capability.h"
 +
-+			err = 0;
++static LIST_HEAD(capabilities);
 +
-+			pr_debug("pciback: removed %04x:%02x:%02x.%01x from "
-+				 "seize list\n", domain, bus, slot, func);
-+		}
-+	}
-+	spin_unlock_irqrestore(&device_ids_lock, flags);
++static const struct config_field caplist_header[] = {
++	{
++	 .offset    = PCI_CAP_LIST_ID,
++	 .size      = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
++	 .u.w.read  = pciback_read_config_word,
++	 .u.w.write = NULL,
++	},
++	{}
++};
 +
-+	return err;
++static inline void register_capability(struct pciback_config_capability *cap)
++{
++	list_add_tail(&cap->cap_list, &capabilities);
 +}
 +
-+static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
-+			   int size, int mask)
++int pciback_config_capability_add_fields(struct pci_dev *dev)
 +{
 +	int err = 0;
-+	struct pcistub_device *psdev;
-+	struct pci_dev *dev;
-+	struct config_field *field;
++	struct pciback_config_capability *cap;
++	int cap_offset;
 +
-+	psdev = pcistub_device_find(domain, bus, slot, func);
-+	if (!psdev || !psdev->dev) {
-+		err = -ENODEV;
-+		goto out;
-+	}
-+	dev = psdev->dev;
++	list_for_each_entry(cap, &capabilities, cap_list) {
++		cap_offset = pci_find_capability(dev, cap->capability);
++		if (cap_offset) {
++			dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
++				cap->capability, cap_offset);
 +
-+	field = kzalloc(sizeof(*field), GFP_ATOMIC);
-+	if (!field) {
-+		err = -ENOMEM;
-+		goto out;
++			err = pciback_config_add_fields_offset(dev,
++							       caplist_header,
++							       cap_offset);
++			if (err)
++				goto out;
++			err = pciback_config_add_fields_offset(dev,
++							       cap->fields,
++							       cap_offset);
++			if (err)
++				goto out;
++		}
 +	}
 +
-+	field->offset = reg;
-+	field->size = size;
-+	field->mask = mask;
-+	field->init = NULL;
-+	field->reset = NULL;
-+	field->release = NULL;
-+	field->clean = pciback_config_field_free;
-+
-+	err = pciback_config_quirks_add_field(dev, field);
-+	if (err)
-+		kfree(field);
-+      out:
++out:
 +	return err;
 +}
 +
-+static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
-+				size_t count)
++int pciback_config_capability_init(void)
 +{
-+	int domain, bus, slot, func;
-+	int err;
-+
-+	err = str_to_slot(buf, &domain, &bus, &slot, &func);
-+	if (err)
-+		goto out;
-+
-+	err = pcistub_device_id_add(domain, bus, slot, func);
++	register_capability(&pciback_config_capability_vpd);
++	register_capability(&pciback_config_capability_pm);
 +
-+      out:
-+	if (!err)
-+		err = count;
-+	return err;
++	return 0;
 +}
+diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h
+new file mode 100644
+index 0000000..8da3ac4
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability.h
+@@ -0,0 +1,26 @@
++/*
++ * PCI Backend - Data structures for special overlays for structures on
++ *               the capability list.
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
 +
-+DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
++#ifndef __PCIBACK_CONFIG_CAPABILITY_H__
++#define __PCIBACK_CONFIG_CAPABILITY_H__
 +
-+static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
-+				   size_t count)
-+{
-+	int domain, bus, slot, func;
-+	int err;
++#include <linux/pci.h>
++#include <linux/list.h>
 +
-+	err = str_to_slot(buf, &domain, &bus, &slot, &func);
-+	if (err)
-+		goto out;
++struct pciback_config_capability {
++	struct list_head cap_list;
 +
-+	err = pcistub_device_id_remove(domain, bus, slot, func);
++	int capability;
 +
-+      out:
-+	if (!err)
-+		err = count;
-+	return err;
-+}
++	/* If the device has the capability found above, add these fields */
++	const struct config_field *fields;
++};
 +
-+DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
++extern struct pciback_config_capability pciback_config_capability_vpd;
++extern struct pciback_config_capability pciback_config_capability_pm;
 +
-+static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
-+{
-+	struct pcistub_device_id *pci_dev_id;
-+	size_t count = 0;
-+	unsigned long flags;
++#endif
+diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
+new file mode 100644
+index 0000000..041e4aa
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability_msi.c
+@@ -0,0 +1,136 @@
++/*
++ * PCI Backend -- Configuration overlay for MSI capability
++ */
++#include <linux/pci.h>
++#include <linux/slab.h>
++#include "conf_space.h"
++#include "conf_space_capability.h"
++#include <xen/interface/io/pciif.h>
++#include <xen/events.h>
++#include "pciback.h"
 +
-+	spin_lock_irqsave(&device_ids_lock, flags);
-+	list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
-+		if (count >= PAGE_SIZE)
-+			break;
++int pciback_enable_msi(struct pciback_device *pdev,
++		struct pci_dev *dev, struct xen_pci_op *op)
++{
++	struct pciback_dev_data *dev_data;
++	int otherend = pdev->xdev->otherend_id;
++	int status;
 +
-+		count += scnprintf(buf + count, PAGE_SIZE - count,
-+				   "%04x:%02x:%02x.%01x\n",
-+				   pci_dev_id->domain, pci_dev_id->bus,
-+				   PCI_SLOT(pci_dev_id->devfn),
-+				   PCI_FUNC(pci_dev_id->devfn));
-+	}
-+	spin_unlock_irqrestore(&device_ids_lock, flags);
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev));
 +
-+	return count;
-+}
++	status = pci_enable_msi(dev);
 +
-+DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
++	if (status) {
++		printk(KERN_ERR "error enable msi for guest %x status %x\n",
++			otherend, status);
++		op->value = 0;
++		return XEN_PCI_ERR_op_failed;
++	}
 +
-+static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
-+				 size_t count)
-+{
-+	int domain, bus, slot, func, reg, size, mask;
-+	int err;
++	/* The value the guest needs is actually the IDT vector, not the
++	 * the local domain's IRQ number. */
 +
-+	err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
-+			   &mask);
-+	if (err)
-+		goto out;
++	op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: MSI: %d\n", pci_name(dev),
++			op->value);
 +
-+	err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
++	dev_data = pci_get_drvdata(dev);
++	if (dev_data)
++		dev_data->ack_intr = 0;
 +
-+      out:
-+	if (!err)
-+		err = count;
-+	return err;
++	return 0;
 +}
 +
-+static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
++int pciback_disable_msi(struct pciback_device *pdev,
++		struct pci_dev *dev, struct xen_pci_op *op)
 +{
-+	int count = 0;
-+	unsigned long flags;
-+	extern struct list_head pciback_quirks;
-+	struct pciback_config_quirk *quirk;
 +	struct pciback_dev_data *dev_data;
-+	const struct config_field *field;
-+	const struct config_field_entry *cfg_entry;
-+
-+	spin_lock_irqsave(&device_ids_lock, flags);
-+	list_for_each_entry(quirk, &pciback_quirks, quirks_list) {
-+		if (count >= PAGE_SIZE)
-+			goto out;
-+
-+		count += scnprintf(buf + count, PAGE_SIZE - count,
-+				   "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
-+				   quirk->pdev->bus->number,
-+				   PCI_SLOT(quirk->pdev->devfn),
-+				   PCI_FUNC(quirk->pdev->devfn),
-+				   quirk->devid.vendor, quirk->devid.device,
-+				   quirk->devid.subvendor,
-+				   quirk->devid.subdevice);
-+
-+		dev_data = pci_get_drvdata(quirk->pdev);
-+
-+		list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-+			field = cfg_entry->field;
-+			if (count >= PAGE_SIZE)
-+				goto out;
-+
-+			count += scnprintf(buf + count, PAGE_SIZE - count,
-+					   "\t\t%08x:%01x:%08x\n",
-+					   cfg_entry->base_offset + field->offset, 
-+					   field->size, field->mask);
-+		}
-+	}
 +
-+      out:
-+	spin_unlock_irqrestore(&device_ids_lock, flags);
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev));
++	pci_disable_msi(dev);
 +
-+	return count;
++	op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: MSI: %d\n", pci_name(dev),
++			op->value);
++	dev_data = pci_get_drvdata(dev);
++	if (dev_data)
++		dev_data->ack_intr = 1;
++	return 0;
 +}
 +
-+DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
-+
-+static ssize_t permissive_add(struct device_driver *drv, const char *buf,
-+			      size_t count)
++int pciback_enable_msix(struct pciback_device *pdev,
++		struct pci_dev *dev, struct xen_pci_op *op)
 +{
-+	int domain, bus, slot, func;
-+	int err;
-+	struct pcistub_device *psdev;
 +	struct pciback_dev_data *dev_data;
-+	err = str_to_slot(buf, &domain, &bus, &slot, &func);
-+	if (err)
-+		goto out;
-+	psdev = pcistub_device_find(domain, bus, slot, func);
-+	if (!psdev) {
-+		err = -ENODEV;
-+		goto out;
-+	}
-+	if (!psdev->dev) {
-+		err = -ENODEV;
-+		goto release;
-+	}
-+	dev_data = pci_get_drvdata(psdev->dev);
-+	/* the driver data for a device should never be null at this point */
-+	if (!dev_data) {
-+		err = -ENXIO;
-+		goto release;
++	int i, result;
++	struct msix_entry *entries;
++
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev));
++	if (op->value > SH_INFO_MAX_VEC)
++		return -EINVAL;
++
++	entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
++	if (entries == NULL)
++		return -ENOMEM;
++
++	for (i = 0; i < op->value; i++) {
++		entries[i].entry = op->msix_entries[i].entry;
++		entries[i].vector = op->msix_entries[i].vector;
 +	}
-+	if (!dev_data->permissive) {
-+		dev_data->permissive = 1;
-+		/* Let user know that what they're doing could be unsafe */
-+		dev_warn(&psdev->dev->dev,
-+			 "enabling permissive mode configuration space accesses!\n");
-+		dev_warn(&psdev->dev->dev,
-+			 "permissive mode is potentially unsafe!\n");
++
++	result = pci_enable_msix(dev, entries, op->value);
++
++	if (result == 0) {
++		for (i = 0; i < op->value; i++) {
++			op->msix_entries[i].entry = entries[i].entry;
++			if (entries[i].vector)
++				op->msix_entries[i].vector =
++					xen_pirq_from_irq(entries[i].vector);
++				if (unlikely(verbose_request))
++					printk(KERN_DEBUG "pciback: %s: " \
++						"MSI-X[%d]: %d\n",
++						pci_name(dev), i,
++						op->msix_entries[i].vector);
++		}
++	} else {
++		printk(KERN_WARNING "pciback: %s: failed to enable MSI-X: err %d!\n",
++			pci_name(dev), result);
 +	}
-+      release:
-+	pcistub_device_put(psdev);
-+      out:
-+	if (!err)
-+		err = count;
-+	return err;
++	kfree(entries);
++
++	op->value = result;
++	dev_data = pci_get_drvdata(dev);
++	if (dev_data)
++		dev_data->ack_intr = 0;
++
++	return result;
 +}
 +
-+static ssize_t permissive_show(struct device_driver *drv, char *buf)
++int pciback_disable_msix(struct pciback_device *pdev,
++		struct pci_dev *dev, struct xen_pci_op *op)
 +{
-+	struct pcistub_device *psdev;
 +	struct pciback_dev_data *dev_data;
-+	size_t count = 0;
-+	unsigned long flags;
-+	spin_lock_irqsave(&pcistub_devices_lock, flags);
-+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+		if (count >= PAGE_SIZE)
-+			break;
-+		if (!psdev->dev)
-+			continue;
-+		dev_data = pci_get_drvdata(psdev->dev);
-+		if (!dev_data || !dev_data->permissive)
-+			continue;
-+		count +=
-+		    scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
-+			      pci_name(psdev->dev));
-+	}
-+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+	return count;
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: disable MSI-X\n",
++			pci_name(dev));
++	pci_disable_msix(dev);
++
++	/*
++	 * SR-IOV devices (which don't have any legacy IRQ) have
++	 * an undefined IRQ value of zero.
++	 */
++	op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
++	if (unlikely(verbose_request))
++		printk(KERN_DEBUG "pciback: %s: MSI-X: %d\n", pci_name(dev),
++			op->value);
++	dev_data = pci_get_drvdata(dev);
++	if (dev_data)
++		dev_data->ack_intr = 1;
++	return 0;
 +}
 +
-+DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
+diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
+new file mode 100644
+index 0000000..0442616
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability_pm.c
+@@ -0,0 +1,113 @@
++/*
++ * PCI Backend - Configuration space overlay for power management
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
 +
-+#ifdef CONFIG_PCI_MSI
++#include <linux/pci.h>
++#include "conf_space.h"
++#include "conf_space_capability.h"
 +
-+int pciback_get_owner(struct pci_dev *dev)
++static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
++			void *data)
 +{
-+	struct pcistub_device *psdev;
++	int err;
++	u16 real_value;
 +
-+	psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number,
-+			PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
++	err = pci_read_config_word(dev, offset, &real_value);
++	if (err)
++		goto out;
 +
-+	if (!psdev || !psdev->pdev)
-+		return -1;
++	*value = real_value & ~PCI_PM_CAP_PME_MASK;
 +
-+	return psdev->pdev->xdev->otherend_id;
++out:
++	return err;
 +}
-+#endif
-+
-+static void pcistub_exit(void)
-+{
-+	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
-+	driver_remove_file(&pciback_pci_driver.driver,
-+			   &driver_attr_remove_slot);
-+	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
-+	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
-+	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
 +
-+	pci_unregister_driver(&pciback_pci_driver);
-+	WARN_ON(unregister_msi_get_owner(pciback_get_owner));
-+}
++/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
++ * Can't allow driver domain to enable PMEs - they're shared */
++#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
 +
-+static int __init pcistub_init(void)
++static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
++			 void *data)
 +{
-+	int pos = 0;
-+	int err = 0;
-+	int domain, bus, slot, func;
-+	int parsed;
-+
-+	if (pci_devs_to_hide && *pci_devs_to_hide) {
-+		do {
-+			parsed = 0;
++	int err;
++	u16 old_value;
++	pci_power_t new_state, old_state;
 +
-+			err = sscanf(pci_devs_to_hide + pos,
-+				     " (%x:%x:%x.%x) %n",
-+				     &domain, &bus, &slot, &func, &parsed);
-+			if (err != 4) {
-+				domain = 0;
-+				err = sscanf(pci_devs_to_hide + pos,
-+					     " (%x:%x.%x) %n",
-+					     &bus, &slot, &func, &parsed);
-+				if (err != 3)
-+					goto parse_error;
-+			}
++	err = pci_read_config_word(dev, offset, &old_value);
++	if (err)
++		goto out;
 +
-+			err = pcistub_device_id_add(domain, bus, slot, func);
-+			if (err)
-+				goto out;
++	old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
++	new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
 +
-+			/* if parsed<=0, we've reached the end of the string */
-+			pos += parsed;
-+		} while (parsed > 0 && pci_devs_to_hide[pos]);
++	new_value &= PM_OK_BITS;
++	if ((old_value & PM_OK_BITS) != new_value) {
++		new_value = (old_value & ~PM_OK_BITS) | new_value;
++		err = pci_write_config_word(dev, offset, new_value);
++		if (err)
++			goto out;
 +	}
 +
-+	/* If we're the first PCI Device Driver to register, we're the
-+	 * first one to get offered PCI devices as they become
-+	 * available (and thus we can be the first to grab them)
-+	 */
-+	err = pci_register_driver(&pciback_pci_driver);
-+	if (err < 0)
++	/* Let pci core handle the power management change */
++	dev_dbg(&dev->dev, "set power state to %x\n", new_state);
++	err = pci_set_power_state(dev, new_state);
++	if (err) {
++		err = PCIBIOS_SET_FAILED;
 +		goto out;
++	}
 +
-+	err = driver_create_file(&pciback_pci_driver.driver,
-+				 &driver_attr_new_slot);
-+	if (!err)
-+		err = driver_create_file(&pciback_pci_driver.driver,
-+					 &driver_attr_remove_slot);
-+	if (!err)
-+		err = driver_create_file(&pciback_pci_driver.driver,
-+					 &driver_attr_slots);
-+	if (!err)
-+		err = driver_create_file(&pciback_pci_driver.driver,
-+					 &driver_attr_quirks);
-+	if (!err)
-+		err = driver_create_file(&pciback_pci_driver.driver,
-+					 &driver_attr_permissive);
-+
-+	if (!err)
-+		err = register_msi_get_owner(pciback_get_owner);
-+	if (err)
-+		pcistub_exit();
-+
-+      out:
++ out:
 +	return err;
-+
-+      parse_error:
-+	printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
-+	       pci_devs_to_hide + pos);
-+	return -EINVAL;
 +}
 +
-+#ifndef MODULE
-+/*
-+ * fs_initcall happens before device_initcall
-+ * so pciback *should* get called first (b/c we 
-+ * want to suck up any device before other drivers
-+ * get a chance by being the first pci device
-+ * driver to register)
-+ */
-+fs_initcall(pcistub_init);
-+#endif
-+
-+static int __init pciback_init(void)
++/* Ensure PMEs are disabled */
++static void *pm_ctrl_init(struct pci_dev *dev, int offset)
 +{
 +	int err;
++	u16 value;
 +
-+	err = pciback_config_init();
++	err = pci_read_config_word(dev, offset, &value);
 +	if (err)
-+		return err;
++		goto out;
 +
-+#ifdef MODULE
-+	err = pcistub_init();
-+	if (err < 0)
-+		return err;
-+#endif
++	if (value & PCI_PM_CTRL_PME_ENABLE) {
++		value &= ~PCI_PM_CTRL_PME_ENABLE;
++		err = pci_write_config_word(dev, offset, value);
++	}
 +
-+	pcistub_init_devices_late();
-+	err = pciback_xenbus_register();
-+	if (err)
-+		pcistub_exit();
++out:
++	return ERR_PTR(err);
++}
++
++static const struct config_field caplist_pm[] = {
++	{
++		.offset     = PCI_PM_PMC,
++		.size       = 2,
++		.u.w.read   = pm_caps_read,
++	},
++	{
++		.offset     = PCI_PM_CTRL,
++		.size       = 2,
++		.init       = pm_ctrl_init,
++		.u.w.read   = pciback_read_config_word,
++		.u.w.write  = pm_ctrl_write,
++	},
++	{
++		.offset     = PCI_PM_PPB_EXTENSIONS,
++		.size       = 1,
++		.u.b.read   = pciback_read_config_byte,
++	},
++	{
++		.offset     = PCI_PM_DATA_REGISTER,
++		.size       = 1,
++		.u.b.read   = pciback_read_config_byte,
++	},
++	{}
++};
 +
-+	return err;
-+}
++struct pciback_config_capability pciback_config_capability_pm = {
++	.capability = PCI_CAP_ID_PM,
++	.fields = caplist_pm,
++};
+diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c
+new file mode 100644
+index 0000000..e7b4d66
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_capability_vpd.c
+@@ -0,0 +1,40 @@
++/*
++ * PCI Backend - Configuration space overlay for Vital Product Data
++ *
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
 +
-+static void __exit pciback_cleanup(void)
++#include <linux/pci.h>
++#include "conf_space.h"
++#include "conf_space_capability.h"
++
++static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
++			     void *data)
 +{
-+	pciback_xenbus_unregister();
-+	pcistub_exit();
++	/* Disallow writes to the vital product data */
++	if (value & PCI_VPD_ADDR_F)
++		return PCIBIOS_SET_FAILED;
++	else
++		return pci_write_config_word(dev, offset, value);
 +}
 +
-+module_init(pciback_init);
-+module_exit(pciback_cleanup);
++static const struct config_field caplist_vpd[] = {
++	{
++	 .offset    = PCI_VPD_ADDR,
++	 .size      = 2,
++	 .u.w.read  = pciback_read_config_word,
++	 .u.w.write = vpd_address_write,
++	 },
++	{
++	 .offset     = PCI_VPD_DATA,
++	 .size       = 4,
++	 .u.dw.read  = pciback_read_config_dword,
++	 .u.dw.write = NULL,
++	 },
++	{}
++};
 +
-+MODULE_LICENSE("Dual BSD/GPL");
-diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
++struct pciback_config_capability pciback_config_capability_vpd = {
++	.capability = PCI_CAP_ID_VPD,
++	.fields = caplist_vpd,
++};
+diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
 new file mode 100644
-index 0000000..6744f45
+index 0000000..22ad0f5
 --- /dev/null
-+++ b/drivers/xen/pciback/pciback.h
-@@ -0,0 +1,126 @@
++++ b/drivers/xen/pciback/conf_space_header.c
+@@ -0,0 +1,385 @@
 +/*
-+ * PCI Backend Common Data Structures & Function Declarations
++ * PCI Backend - Handles the virtual fields in the configuration space headers.
 + *
-+ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
 + */
-+#ifndef __XEN_PCIBACK_H__
-+#define __XEN_PCIBACK_H__
 +
++#include <linux/kernel.h>
 +#include <linux/pci.h>
-+#include <linux/interrupt.h>
-+#include <xen/xenbus.h>
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+#include <linux/workqueue.h>
-+#include <asm/atomic.h>
-+#include <xen/interface/io/pciif.h>
++#include "pciback.h"
++#include "conf_space.h"
 +
-+struct pci_dev_entry {
-+	struct list_head list;
-+	struct pci_dev *dev;
++struct pci_bar_info {
++	u32 val;
++	u32 len_val;
++	int which;
 +};
 +
-+#define _PDEVF_op_active 	(0)
-+#define PDEVF_op_active 	(1<<(_PDEVF_op_active))
-+#define _PCIB_op_pending	(1)
-+#define PCIB_op_pending		(1<<(_PCIB_op_pending))
++#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
++#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
 +
-+struct pciback_device {
-+	void *pci_dev_data;
-+	spinlock_t dev_lock;
++static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
++{
++	int i;
++	int ret;
 +
-+	struct xenbus_device *xdev;
++	ret = pciback_read_config_word(dev, offset, value, data);
++	if (!atomic_read(&dev->enable_cnt))
++		return ret;
 +
-+	struct xenbus_watch be_watch;
-+	u8 be_watching;
++	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
++		if (dev->resource[i].flags & IORESOURCE_IO)
++			*value |= PCI_COMMAND_IO;
++		if (dev->resource[i].flags & IORESOURCE_MEM)
++			*value |= PCI_COMMAND_MEMORY;
++	}
 +
-+	int evtchn_irq;
++	return ret;
++}
 +
-+	struct vm_struct *sh_area;
-+	struct xen_pci_sharedinfo *sh_info;
++static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
++{
++	struct pciback_dev_data *dev_data;
++	int err;
 +
-+	unsigned long flags;
++	dev_data = pci_get_drvdata(dev);
++	if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
++		if (unlikely(verbose_request))
++			printk(KERN_DEBUG "pciback: %s: enable\n",
++			       pci_name(dev));
++		err = pci_enable_device(dev);
++		if (err)
++			return err;
++		if (dev_data)
++			dev_data->enable_intx = 1;
++	} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
++		if (unlikely(verbose_request))
++			printk(KERN_DEBUG "pciback: %s: disable\n",
++			       pci_name(dev));
++		pci_disable_device(dev);
++		if (dev_data)
++			dev_data->enable_intx = 0;
++	}
 +
-+	struct work_struct op_work;
-+};
++	if (!dev->is_busmaster && is_master_cmd(value)) {
++		if (unlikely(verbose_request))
++			printk(KERN_DEBUG "pciback: %s: set bus master\n",
++			       pci_name(dev));
++		pci_set_master(dev);
++	}
 +
-+struct pciback_dev_data {
-+	struct list_head config_fields;
-+	int permissive;
-+	int warned_on_write;
-+};
++	if (value & PCI_COMMAND_INVALIDATE) {
++		if (unlikely(verbose_request))
++			printk(KERN_DEBUG
++			       "pciback: %s: enable memory-write-invalidate\n",
++			       pci_name(dev));
++		err = pci_set_mwi(dev);
++		if (err) {
++			printk(KERN_WARNING
++			       "pciback: %s: cannot enable "
++			       "memory-write-invalidate (%d)\n",
++			       pci_name(dev), err);
++			value &= ~PCI_COMMAND_INVALIDATE;
++		}
++	}
 +
-+/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
-+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
-+					    int domain, int bus,
-+					    int slot, int func);
-+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
-+				    struct pci_dev *dev);
-+void pcistub_put_pci_dev(struct pci_dev *dev);
++	return pci_write_config_word(dev, offset, value);
++}
 +
-+/* Ensure a device is turned off or reset */
-+void pciback_reset_device(struct pci_dev *pdev);
++static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
++{
++	struct pci_bar_info *bar = data;
 +
-+/* Access a virtual configuration space for a PCI device */
-+int pciback_config_init(void);
-+int pciback_config_init_dev(struct pci_dev *dev);
-+void pciback_config_free_dyn_fields(struct pci_dev *dev);
-+void pciback_config_reset_dev(struct pci_dev *dev);
-+void pciback_config_free_dev(struct pci_dev *dev);
-+int pciback_config_read(struct pci_dev *dev, int offset, int size,
-+			u32 * ret_val);
-+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
++	if (unlikely(!bar)) {
++		printk(KERN_WARNING "pciback: driver data not found for %s\n",
++		       pci_name(dev));
++		return XEN_PCI_ERR_op_failed;
++	}
 +
-+/* Handle requests for specific devices from the frontend */
-+typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
-+				   unsigned int domain, unsigned int bus,
-+				   unsigned int devfn, unsigned int devid);
-+typedef int (*publish_pci_root_cb) (struct pciback_device * pdev,
-+				    unsigned int domain, unsigned int bus);
-+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
-+			int devid, publish_pci_dev_cb publish_cb);
-+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev);
-+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
-+				    unsigned int domain, unsigned int bus,
-+				    unsigned int devfn);
++	/* A write to obtain the length must happen as a 32-bit write.
++	 * This does not (yet) support writing individual bytes
++	 */
++	if (value == ~PCI_ROM_ADDRESS_ENABLE)
++		bar->which = 1;
++	else {
++		u32 tmpval;
++		pci_read_config_dword(dev, offset, &tmpval);
++		if (tmpval != bar->val && value == bar->val) {
++			/* Allow restoration of bar value. */
++			pci_write_config_dword(dev, offset, bar->val);
++		}
++		bar->which = 0;
++	}
++
++	/* Do we need to support enabling/disabling the rom address here? */
++
++	return 0;
++}
++
++/* For the BARs, only allow writes which write ~0 or
++ * the correct resource information
++ * (Needed for when the driver probes the resource usage)
++ */
++static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
++{
++	struct pci_bar_info *bar = data;
++
++	if (unlikely(!bar)) {
++		printk(KERN_WARNING "pciback: driver data not found for %s\n",
++		       pci_name(dev));
++		return XEN_PCI_ERR_op_failed;
++	}
++
++	/* A write to obtain the length must happen as a 32-bit write.
++	 * This does not (yet) support writing individual bytes
++	 */
++	if (value == ~0)
++		bar->which = 1;
++	else {
++		u32 tmpval;
++		pci_read_config_dword(dev, offset, &tmpval);
++		if (tmpval != bar->val && value == bar->val) {
++			/* Allow restoration of bar value. */
++			pci_write_config_dword(dev, offset, bar->val);
++		}
++		bar->which = 0;
++	}
++
++	return 0;
++}
++
++static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
++{
++	struct pci_bar_info *bar = data;
++
++	if (unlikely(!bar)) {
++		printk(KERN_WARNING "pciback: driver data not found for %s\n",
++		       pci_name(dev));
++		return XEN_PCI_ERR_op_failed;
++	}
++
++	*value = bar->which ? bar->len_val : bar->val;
 +
-+/** 
-+* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback
-+* before sending aer request to pcifront, so that guest could identify 
-+* device, coopearte with pciback to finish aer recovery job if device driver
-+* has the capability
-+*/
++	return 0;
++}
 +
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
-+				unsigned int *domain, unsigned int *bus, unsigned int *devfn);
-+int pciback_init_devices(struct pciback_device *pdev);
-+int pciback_publish_pci_roots(struct pciback_device *pdev,
-+			      publish_pci_root_cb cb);
-+void pciback_release_devices(struct pciback_device *pdev);
++static inline void read_dev_bar(struct pci_dev *dev,
++				struct pci_bar_info *bar_info, int offset,
++				u32 len_mask)
++{
++	int	pos;
++	struct resource	*res = dev->resource;
 +
-+/* Handles events from front-end */
-+irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
-+void pciback_do_op(void *data);
++	if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
++		pos = PCI_ROM_RESOURCE;
++	else {
++		pos = (offset - PCI_BASE_ADDRESS_0) / 4;
++		if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
++				PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
++			   (PCI_BASE_ADDRESS_SPACE_MEMORY |
++				PCI_BASE_ADDRESS_MEM_TYPE_64))) {
++			bar_info->val = res[pos - 1].start >> 32;
++			bar_info->len_val = res[pos - 1].end >> 32;
++			return;
++		}
++	}
 +
-+int pciback_xenbus_register(void);
-+void pciback_xenbus_unregister(void);
++	bar_info->val = res[pos].start |
++			(res[pos].flags & PCI_REGION_FLAG_MASK);
++	bar_info->len_val = res[pos].end - res[pos].start + 1;
++}
 +
-+#ifdef CONFIG_PCI_MSI
-+int pciback_enable_msi(struct pciback_device *pdev,
-+                       struct pci_dev *dev, struct xen_pci_op *op);
++static void *bar_init(struct pci_dev *dev, int offset)
++{
++	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
 +
-+int pciback_disable_msi(struct pciback_device *pdev,
-+                         struct pci_dev *dev, struct xen_pci_op *op);
++	if (!bar)
++		return ERR_PTR(-ENOMEM);
 +
++	read_dev_bar(dev, bar, offset, ~0);
++	bar->which = 0;
 +
-+int pciback_enable_msix(struct pciback_device *pdev,
-+                        struct pci_dev *dev, struct xen_pci_op *op);
++	return bar;
++}
 +
-+int pciback_disable_msix(struct pciback_device *pdev,
-+                        struct pci_dev *dev, struct xen_pci_op *op);
-+#endif
-+extern int verbose_request;
++static void *rom_init(struct pci_dev *dev, int offset)
++{
++	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
 +
-+void test_and_schedule_op(struct pciback_device *pdev);
-+#endif
++	if (!bar)
++		return ERR_PTR(-ENOMEM);
 +
-diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
-new file mode 100644
-index 0000000..b85b2db
---- /dev/null
-+++ b/drivers/xen/pciback/pciback_ops.c
-@@ -0,0 +1,134 @@
-+/*
-+ * PCI Backend Operations - respond to PCI requests from Frontend
-+ *
-+ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
-+ */
-+#include <linux/module.h>
-+#include <linux/wait.h>
-+#include <asm/bitops.h>
-+#include <xen/evtchn.h>
-+#include "pciback.h"
++	read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
++	bar->which = 0;
 +
-+int verbose_request = 0;
-+module_param(verbose_request, int, 0644);
++	return bar;
++}
 +
-+/* Ensure a device is "turned off" and ready to be exported.
-+ * (Also see pciback_config_reset to ensure virtual configuration space is
-+ * ready to be re-exported)
-+ */
-+void pciback_reset_device(struct pci_dev *dev)
++static void bar_reset(struct pci_dev *dev, int offset, void *data)
 +{
-+	u16 cmd;
++	struct pci_bar_info *bar = data;
 +
-+	/* Disable devices (but not bridges) */
-+	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
-+		pci_disable_device(dev);
++	bar->which = 0;
++}
 +
-+		pci_write_config_word(dev, PCI_COMMAND, 0);
++static void bar_release(struct pci_dev *dev, int offset, void *data)
++{
++	kfree(data);
++}
 +
-+		dev->is_enabled = 0;
-+		dev->is_busmaster = 0;
-+	} else {
-+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
-+		if (cmd & (PCI_COMMAND_INVALIDATE)) {
-+			cmd &= ~(PCI_COMMAND_INVALIDATE);
-+			pci_write_config_word(dev, PCI_COMMAND, cmd);
++static int pciback_read_vendor(struct pci_dev *dev, int offset,
++			       u16 *value, void *data)
++{
++	*value = dev->vendor;
 +
-+			dev->is_busmaster = 0;
-+		}
-+	}
++	return 0;
 +}
-+extern wait_queue_head_t aer_wait_queue;
-+extern struct workqueue_struct *pciback_wq;
-+/*
-+* Now the same evtchn is used for both pcifront conf_read_write request
-+* as well as pcie aer front end ack. We use a new work_queue to schedule
-+* pciback conf_read_write service for avoiding confict with aer_core 
-+* do_recovery job which also use the system default work_queue
-+*/
-+void test_and_schedule_op(struct pciback_device *pdev)
++
++static int pciback_read_device(struct pci_dev *dev, int offset,
++			       u16 *value, void *data)
 +{
-+	/* Check that frontend is requesting an operation and that we are not
-+	 * already processing a request */
-+	if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
-+	    && !test_and_set_bit(_PDEVF_op_active, &pdev->flags))
-+	{
-+		queue_work(pciback_wq, &pdev->op_work);
-+	}
-+	/*_XEN_PCIB_active should have been cleared by pcifront. And also make
-+	sure pciback is waiting for ack by checking _PCIB_op_pending*/
-+	if (!test_bit(_XEN_PCIB_active,(unsigned long *)&pdev->sh_info->flags)
-+	    &&test_bit(_PCIB_op_pending, &pdev->flags)) {
-+		wake_up(&aer_wait_queue);
-+	}
++	*value = dev->device;
++
++	return 0;
 +}
 +
-+/* Performing the configuration space reads/writes must not be done in atomic
-+ * context because some of the pci_* functions can sleep (mostly due to ACPI
-+ * use of semaphores). This function is intended to be called from a work
-+ * queue in process context taking a struct pciback_device as a parameter */
-+void pciback_do_op(void *data)
++static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
++			  void *data)
 +{
-+	struct pciback_device *pdev = data;
-+	struct pci_dev *dev;
-+	struct xen_pci_op *op = &pdev->sh_info->op;
++	*value = (u8) dev->irq;
 +
-+	dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
++	return 0;
++}
 +
-+	if (dev == NULL)
-+		op->err = XEN_PCI_ERR_dev_not_found;
-+	else
++static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
++{
++	u8 cur_value;
++	int err;
++
++	err = pci_read_config_byte(dev, offset, &cur_value);
++	if (err)
++		goto out;
++
++	if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
++	    || value == PCI_BIST_START)
++		err = pci_write_config_byte(dev, offset, value);
++
++out:
++	return err;
++}
++
++static const struct config_field header_common[] = {
 +	{
-+		switch (op->cmd)
-+		{
-+			case XEN_PCI_OP_conf_read:
-+				op->err = pciback_config_read(dev,
-+					  op->offset, op->size, &op->value);
-+				break;
-+			case XEN_PCI_OP_conf_write:
-+				op->err = pciback_config_write(dev,
-+					  op->offset, op->size,	op->value);
-+				break;
-+#ifdef CONFIG_PCI_MSI
-+			case XEN_PCI_OP_enable_msi:
-+				op->err = pciback_enable_msi(pdev, dev, op);
-+				break;
-+			case XEN_PCI_OP_disable_msi:
-+				op->err = pciback_disable_msi(pdev, dev, op);
-+				break;
-+			case XEN_PCI_OP_enable_msix:
-+				op->err = pciback_enable_msix(pdev, dev, op);
-+				break;
-+			case XEN_PCI_OP_disable_msix:
-+				op->err = pciback_disable_msix(pdev, dev, op);
-+				break;
-+#endif
-+			default:
-+				op->err = XEN_PCI_ERR_not_implemented;
-+				break;
-+		}
++	 .offset    = PCI_VENDOR_ID,
++	 .size      = 2,
++	 .u.w.read  = pciback_read_vendor,
++	},
++	{
++	 .offset    = PCI_DEVICE_ID,
++	 .size      = 2,
++	 .u.w.read  = pciback_read_device,
++	},
++	{
++	 .offset    = PCI_COMMAND,
++	 .size      = 2,
++	 .u.w.read  = command_read,
++	 .u.w.write = command_write,
++	},
++	{
++	 .offset    = PCI_INTERRUPT_LINE,
++	 .size      = 1,
++	 .u.b.read  = interrupt_read,
++	},
++	{
++	 .offset    = PCI_INTERRUPT_PIN,
++	 .size      = 1,
++	 .u.b.read  = pciback_read_config_byte,
++	},
++	{
++	 /* Any side effects of letting driver domain control cache line? */
++	 .offset    = PCI_CACHE_LINE_SIZE,
++	 .size      = 1,
++	 .u.b.read  = pciback_read_config_byte,
++	 .u.b.write = pciback_write_config_byte,
++	},
++	{
++	 .offset    = PCI_LATENCY_TIMER,
++	 .size      = 1,
++	 .u.b.read  = pciback_read_config_byte,
++	},
++	{
++	 .offset    = PCI_BIST,
++	 .size      = 1,
++	 .u.b.read  = pciback_read_config_byte,
++	 .u.b.write = bist_write,
++	},
++	{}
++};
++
++#define CFG_FIELD_BAR(reg_offset)			\
++	{						\
++	.offset     = reg_offset,			\
++	.size       = 4,				\
++	.init       = bar_init,				\
++	.reset      = bar_reset,			\
++	.release    = bar_release,			\
++	.u.dw.read  = bar_read,				\
++	.u.dw.write = bar_write,			\
 +	}
-+	/* Tell the driver domain that we're done. */ 
-+	wmb();
-+	clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
-+	notify_remote_via_irq(pdev->evtchn_irq);
 +
-+	/* Mark that we're done. */
-+	smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
-+	clear_bit(_PDEVF_op_active, &pdev->flags);
-+	smp_mb__after_clear_bit(); /* /before/ final check for work */
++#define CFG_FIELD_ROM(reg_offset)			\
++	{						\
++	.offset     = reg_offset,			\
++	.size       = 4,				\
++	.init       = rom_init,				\
++	.reset      = bar_reset,			\
++	.release    = bar_release,			\
++	.u.dw.read  = bar_read,				\
++	.u.dw.write = rom_write,			\
++	}
++
++static const struct config_field header_0[] = {
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
++	CFG_FIELD_ROM(PCI_ROM_ADDRESS),
++	{}
++};
 +
-+	/* Check to see if the driver domain tried to start another request in
-+	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. 
-+	*/
-+	test_and_schedule_op(pdev);
-+}
++static const struct config_field header_1[] = {
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
++	CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
++	CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
++	{}
++};
 +
-+irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
++int pciback_config_header_add_fields(struct pci_dev *dev)
 +{
-+	struct pciback_device *pdev = dev_id;
++	int err;
 +
-+	test_and_schedule_op(pdev);
++	err = pciback_config_add_fields(dev, header_common);
++	if (err)
++		goto out;
 +
-+	return IRQ_HANDLED;
++	switch (dev->hdr_type) {
++	case PCI_HEADER_TYPE_NORMAL:
++		err = pciback_config_add_fields(dev, header_0);
++		break;
++
++	case PCI_HEADER_TYPE_BRIDGE:
++		err = pciback_config_add_fields(dev, header_1);
++		break;
++
++	default:
++		err = -EINVAL;
++		printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n",
++		       pci_name(dev), dev->hdr_type);
++		break;
++	}
++
++out:
++	return err;
 +}
-diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c
+diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c
 new file mode 100644
-index 0000000..105a8b6
+index 0000000..45c31fb
 --- /dev/null
-+++ b/drivers/xen/pciback/slot.c
-@@ -0,0 +1,187 @@
++++ b/drivers/xen/pciback/conf_space_quirks.c
+@@ -0,0 +1,140 @@
 +/*
-+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
-+ *               to the frontend
++ * PCI Backend - Handle special overlays for broken devices.
 + *
-+ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil> (vpci.c)
-+ *   Author: Tristan Gingold <tristan.gingold at bull.net>, from vpci.c
++ * Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ * Author: Chris Bookholt <hap10 at epoch.ncsc.mil>
 + */
 +
-+#include <linux/list.h>
-+#include <linux/slab.h>
++#include <linux/kernel.h>
 +#include <linux/pci.h>
-+#include <linux/spinlock.h>
 +#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_quirks.h"
 +
-+/* There are at most 32 slots in a pci bus.  */
-+#define PCI_SLOT_MAX 32
-+
-+#define PCI_BUS_NBR 2
++LIST_HEAD(pciback_quirks);
 +
-+struct slot_dev_data {
-+	/* Access to dev_list must be protected by lock */
-+	struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX];
-+	spinlock_t lock;
-+};
++static inline const struct pci_device_id *
++match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
++{
++	if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
++	    (id->device == PCI_ANY_ID || id->device == dev->device) &&
++	    (id->subvendor == PCI_ANY_ID ||
++				id->subvendor == dev->subsystem_vendor) &&
++	    (id->subdevice == PCI_ANY_ID ||
++				id->subdevice == dev->subsystem_device) &&
++	    !((id->class ^ dev->class) & id->class_mask))
++		return id;
++	return NULL;
++}
 +
-+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
-+				    unsigned int domain, unsigned int bus,
-+				    unsigned int devfn)
++struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
 +{
-+	struct pci_dev *dev = NULL;
-+	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
-+	unsigned long flags;
++	struct pciback_config_quirk *tmp_quirk;
 +
-+	if (domain != 0 || PCI_FUNC(devfn) != 0)
-+		return NULL;
++	list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list)
++		if (match_one_device(&tmp_quirk->devid, dev) != NULL)
++			goto out;
++	tmp_quirk = NULL;
++	printk(KERN_DEBUG
++	       "quirk didn't match any device pciback knows about\n");
++out:
++	return tmp_quirk;
++}
 +
-+	if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR)
-+		return NULL;
++static inline void register_quirk(struct pciback_config_quirk *quirk)
++{
++	list_add_tail(&quirk->quirks_list, &pciback_quirks);
++}
 +
-+	spin_lock_irqsave(&slot_dev->lock, flags);
-+	dev = slot_dev->slots[bus][PCI_SLOT(devfn)];
-+	spin_unlock_irqrestore(&slot_dev->lock, flags);
++int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
++{
++	int ret = 0;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++	struct config_field_entry *cfg_entry;
 +
-+	return dev;
++	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++		if (OFFSET(cfg_entry) == reg) {
++			ret = 1;
++			break;
++		}
++	}
++	return ret;
 +}
 +
-+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
-+			int devid, publish_pci_dev_cb publish_cb)
++int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
++				    *field)
 +{
-+	int err = 0, slot, bus;
-+	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
-+	unsigned long flags;
++	int err = 0;
 +
-+	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
-+		err = -EFAULT;
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				 "Can't export bridges on the virtual PCI bus");
++	switch (field->size) {
++	case 1:
++		field->u.b.read = pciback_read_config_byte;
++		field->u.b.write = pciback_write_config_byte;
++		break;
++	case 2:
++		field->u.w.read = pciback_read_config_word;
++		field->u.w.write = pciback_write_config_word;
++		break;
++	case 4:
++		field->u.dw.read = pciback_read_config_dword;
++		field->u.dw.write = pciback_write_config_dword;
++		break;
++	default:
++		err = -EINVAL;
 +		goto out;
 +	}
 +
-+	spin_lock_irqsave(&slot_dev->lock, flags);
-+
-+	/* Assign to a new slot on the virtual PCI bus */
-+	for (bus = 0; bus < PCI_BUS_NBR; bus++)
-+		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+			if (slot_dev->slots[bus][slot] == NULL) {
-+				printk(KERN_INFO
-+				       "pciback: slot: %s: assign to virtual slot %d, bus %d\n",
-+				       pci_name(dev), slot, bus);
-+				slot_dev->slots[bus][slot] = dev;
-+				goto unlock;
-+			}
-+		}
-+
-+	err = -ENOMEM;
-+	xenbus_dev_fatal(pdev->xdev, err,
-+			 "No more space on root virtual PCI bus");
-+
-+      unlock:
-+	spin_unlock_irqrestore(&slot_dev->lock, flags);
-+
-+	/* Publish this device. */
-+	if(!err)
-+		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
++	pciback_config_add_field(dev, field);
 +
-+      out:
++out:
 +	return err;
 +}
 +
-+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++int pciback_config_quirks_init(struct pci_dev *dev)
 +{
-+	int slot, bus;
-+	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
-+	struct pci_dev *found_dev = NULL;
-+	unsigned long flags;
++	struct pciback_config_quirk *quirk;
++	int ret = 0;
 +
-+	spin_lock_irqsave(&slot_dev->lock, flags);
++	quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
++	if (!quirk) {
++		ret = -ENOMEM;
++		goto out;
++	}
 +
-+	for (bus = 0; bus < PCI_BUS_NBR; bus++)
-+		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+			if (slot_dev->slots[bus][slot] == dev) {
-+				slot_dev->slots[bus][slot] = NULL;
-+				found_dev = dev;
-+				goto out;
-+			}
-+		}
++	quirk->devid.vendor = dev->vendor;
++	quirk->devid.device = dev->device;
++	quirk->devid.subvendor = dev->subsystem_vendor;
++	quirk->devid.subdevice = dev->subsystem_device;
++	quirk->devid.class = 0;
++	quirk->devid.class_mask = 0;
++	quirk->devid.driver_data = 0UL;
 +
-+      out:
-+	spin_unlock_irqrestore(&slot_dev->lock, flags);
++	quirk->pdev = dev;
 +
-+	if (found_dev)
-+		pcistub_put_pci_dev(found_dev);
++	register_quirk(quirk);
++out:
++	return ret;
 +}
 +
-+int pciback_init_devices(struct pciback_device *pdev)
++void pciback_config_field_free(struct config_field *field)
 +{
-+	int slot, bus;
-+	struct slot_dev_data *slot_dev;
-+
-+	slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL);
-+	if (!slot_dev)
-+		return -ENOMEM;
++	kfree(field);
++}
 +
-+	spin_lock_init(&slot_dev->lock);
++int pciback_config_quirk_release(struct pci_dev *dev)
++{
++	struct pciback_config_quirk *quirk;
++	int ret = 0;
 +
-+	for (bus = 0; bus < PCI_BUS_NBR; bus++)
-+		for (slot = 0; slot < PCI_SLOT_MAX; slot++)
-+			slot_dev->slots[bus][slot] = NULL;
++	quirk = pciback_find_quirk(dev);
++	if (!quirk) {
++		ret = -ENXIO;
++		goto out;
++	}
 +
-+	pdev->pci_dev_data = slot_dev;
++	list_del(&quirk->quirks_list);
++	kfree(quirk);
 +
-+	return 0;
++out:
++	return ret;
 +}
+diff --git a/drivers/xen/pciback/conf_space_quirks.h b/drivers/xen/pciback/conf_space_quirks.h
+new file mode 100644
+index 0000000..acd0e1a
+--- /dev/null
++++ b/drivers/xen/pciback/conf_space_quirks.h
+@@ -0,0 +1,35 @@
++/*
++ * PCI Backend - Data structures for special overlays for broken devices.
++ *
++ * Ryan Wilson <hap9 at epoch.ncsc.mil>
++ * Chris Bookholt <hap10 at epoch.ncsc.mil>
++ */
++
++#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
++#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
++
++#include <linux/pci.h>
++#include <linux/list.h>
++
++struct pciback_config_quirk {
++	struct list_head quirks_list;
++	struct pci_device_id devid;
++	struct pci_dev *pdev;
++};
 +
-+int pciback_publish_pci_roots(struct pciback_device *pdev,
-+			      publish_pci_root_cb publish_cb)
-+{
-+	/* The Virtual PCI bus has only one root */
-+	return publish_cb(pdev, 0, 0);
-+}
++struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev);
 +
-+void pciback_release_devices(struct pciback_device *pdev)
-+{
-+	int slot, bus;
-+	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
-+	struct pci_dev *dev;
++int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
++				    *field);
 +
-+	for (bus = 0; bus < PCI_BUS_NBR; bus++)
-+		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+			dev = slot_dev->slots[bus][slot];
-+			if (dev != NULL)
-+				pcistub_put_pci_dev(dev);
-+		}
++int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg);
 +
-+	kfree(slot_dev);
-+	pdev->pci_dev_data = NULL;
-+}
++int pciback_config_quirks_init(struct pci_dev *dev);
 +
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
-+		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
-+{
-+	int slot, busnr;
-+	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
-+	struct pci_dev *dev;
-+	int found = 0;
-+	unsigned long flags;
++void pciback_config_field_free(struct config_field *field);
 +
-+	spin_lock_irqsave(&slot_dev->lock, flags);
++int pciback_config_quirk_release(struct pci_dev *dev);
 +
-+	for (busnr = 0; busnr < PCI_BUS_NBR; bus++)
-+		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+			dev = slot_dev->slots[busnr][slot];
-+			if (dev && dev->bus->number == pcidev->bus->number
-+				&& dev->devfn == pcidev->devfn
-+				&& pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)) {
-+				found = 1;
-+				*domain = 0;
-+				*bus = busnr;
-+				*devfn = PCI_DEVFN(slot,0);
-+				goto out;
-+			}
-+		}
-+out:
-+	spin_unlock_irqrestore(&slot_dev->lock, flags);
-+	return found;
++int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg);
 +
-+}
-diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
++#endif
+diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
 new file mode 100644
-index 0000000..a5b7ece
+index 0000000..5a7e4cc
 --- /dev/null
-+++ b/drivers/xen/pciback/vpci.c
-@@ -0,0 +1,242 @@
++++ b/drivers/xen/pciback/controller.c
+@@ -0,0 +1,442 @@
 +/*
-+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
-+ *               to the frontend
++ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
++ *      Alex Williamson <alex.williamson at hp.com>
 + *
-+ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ * PCI "Controller" Backend - virtualize PCI bus topology based on PCI
++ * controllers.  Devices under the same PCI controller are exposed on the
++ * same virtual domain:bus.  Within a bus, device slots are virtualized
++ * to compact the bus.
++ *
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 + */
 +
++#include <linux/acpi.h>
 +#include <linux/list.h>
-+#include <linux/slab.h>
 +#include <linux/pci.h>
 +#include <linux/spinlock.h>
 +#include "pciback.h"
 +
-+#define PCI_SLOT_MAX 32
++#define PCI_MAX_BUSSES	255
++#define PCI_MAX_SLOTS	32
 +
-+struct vpci_dev_data {
-+	/* Access to dev_list must be protected by lock */
-+	struct list_head dev_list[PCI_SLOT_MAX];
++struct controller_dev_entry {
++	struct list_head list;
++	struct pci_dev *dev;
++	unsigned int devfn;
++};
++
++struct controller_list_entry {
++	struct list_head list;
++	struct pci_controller *controller;
++	unsigned int domain;
++	unsigned int bus;
++	unsigned int next_devfn;
++	struct list_head dev_list;
++};
++
++struct controller_dev_data {
++	struct list_head list;
++	unsigned int next_domain;
++	unsigned int next_bus;
 +	spinlock_t lock;
 +};
 +
-+static inline struct list_head *list_first(struct list_head *head)
-+{
-+	return head->next;
-+}
++struct walk_info {
++	struct pciback_device *pdev;
++	int resource_count;
++	int root_num;
++};
 +
 +struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
 +				    unsigned int domain, unsigned int bus,
 +				    unsigned int devfn)
 +{
-+	struct pci_dev_entry *entry;
++	struct controller_dev_data *dev_data = pdev->pci_dev_data;
++	struct controller_dev_entry *dev_entry;
++	struct controller_list_entry *cntrl_entry;
 +	struct pci_dev *dev = NULL;
-+	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
 +	unsigned long flags;
 +
-+	if (domain != 0 || bus != 0)
-+		return NULL;
++	spin_lock_irqsave(&dev_data->lock, flags);
 +
-+	if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
-+		spin_lock_irqsave(&vpci_dev->lock, flags);
++	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++		if (cntrl_entry->domain != domain ||
++		    cntrl_entry->bus != bus)
++			continue;
 +
-+		list_for_each_entry(entry,
-+				    &vpci_dev->dev_list[PCI_SLOT(devfn)],
-+				    list) {
-+			if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
-+				dev = entry->dev;
-+				break;
++		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
++			if (devfn == dev_entry->devfn) {
++				dev = dev_entry->dev;
++				goto found;
 +			}
 +		}
-+
-+		spin_unlock_irqrestore(&vpci_dev->lock, flags);
 +	}
-+	return dev;
-+}
-+
-+static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
-+{
-+	if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
-+	    && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
-+		return 1;
++found:
++	spin_unlock_irqrestore(&dev_data->lock, flags);
 +
-+	return 0;
++	return dev;
 +}
 +
 +int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
 +			int devid, publish_pci_dev_cb publish_cb)
 +{
-+	int err = 0, slot, func;
-+	struct pci_dev_entry *t, *dev_entry;
-+	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	struct controller_dev_data *dev_data = pdev->pci_dev_data;
++	struct controller_dev_entry *dev_entry;
++	struct controller_list_entry *cntrl_entry;
++	struct pci_controller *dev_controller = PCI_CONTROLLER(dev);
 +	unsigned long flags;
++	int ret = 0, found = 0;
 +
-+	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
-+		err = -EFAULT;
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				 "Can't export bridges on the virtual PCI bus");
-+		goto out;
-+	}
++	spin_lock_irqsave(&dev_data->lock, flags);
 +
-+	dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
-+	if (!dev_entry) {
-+		err = -ENOMEM;
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				 "Error adding entry to virtual PCI bus");
-+		goto out;
++	/* Look to see if we already have a domain:bus for this controller */
++	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++		if (cntrl_entry->controller == dev_controller) {
++			found = 1;
++			break;
++		}
 +	}
 +
-+	dev_entry->dev = dev;
-+
-+	spin_lock_irqsave(&vpci_dev->lock, flags);
++	if (!found) {
++		cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC);
++		if (!cntrl_entry) {
++			ret =  -ENOMEM;
++			goto out;
++		}
 +
-+	/* Keep multi-function devices together on the virtual PCI bus */
-+	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+		if (!list_empty(&vpci_dev->dev_list[slot])) {
-+			t = list_entry(list_first(&vpci_dev->dev_list[slot]),
-+				       struct pci_dev_entry, list);
++		cntrl_entry->controller = dev_controller;
++		cntrl_entry->next_devfn = PCI_DEVFN(0, 0);
 +
-+			if (match_slot(dev, t->dev)) {
-+				pr_info("pciback: vpci: %s: "
-+					"assign to virtual slot %d func %d\n",
-+					pci_name(dev), slot,
-+					PCI_FUNC(dev->devfn));
-+				list_add_tail(&dev_entry->list,
-+					      &vpci_dev->dev_list[slot]);
-+				func = PCI_FUNC(dev->devfn);
-+				goto unlock;
-+			}
++		cntrl_entry->domain = dev_data->next_domain;
++		cntrl_entry->bus = dev_data->next_bus++;
++		if (dev_data->next_bus > PCI_MAX_BUSSES) {
++			dev_data->next_domain++;
++			dev_data->next_bus = 0;
 +		}
++
++		INIT_LIST_HEAD(&cntrl_entry->dev_list);
++
++		list_add_tail(&cntrl_entry->list, &dev_data->list);
 +	}
 +
-+	/* Assign to a new slot on the virtual PCI bus */
-+	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+		if (list_empty(&vpci_dev->dev_list[slot])) {
-+			printk(KERN_INFO
-+			       "pciback: vpci: %s: assign to virtual slot %d\n",
-+			       pci_name(dev), slot);
-+			list_add_tail(&dev_entry->list,
-+				      &vpci_dev->dev_list[slot]);
-+			func = PCI_FUNC(dev->devfn);
-+			goto unlock;
++	if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) {
++		/*
++		 * While it seems unlikely, this can actually happen if
++		 * a controller has P2P bridges under it.
++		 */
++		xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x "
++				 "is full, no room to export %04x:%02x:%02x.%x",
++				 cntrl_entry->domain, cntrl_entry->bus,
++				 pci_domain_nr(dev->bus), dev->bus->number,
++				 PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
++		ret = -ENOSPC;
++		goto out;
++	}
++
++	dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC);
++	if (!dev_entry) {
++		if (list_empty(&cntrl_entry->dev_list)) {
++			list_del(&cntrl_entry->list);
++			kfree(cntrl_entry);
 +		}
++		ret = -ENOMEM;
++		goto out;
 +	}
 +
-+	err = -ENOMEM;
-+	xenbus_dev_fatal(pdev->xdev, err,
-+			 "No more space on root virtual PCI bus");
++	dev_entry->dev = dev;
++	dev_entry->devfn = cntrl_entry->next_devfn;
++
++	list_add_tail(&dev_entry->list, &cntrl_entry->dev_list);
 +
-+      unlock:
-+	spin_unlock_irqrestore(&vpci_dev->lock, flags);
++	cntrl_entry->next_devfn += PCI_DEVFN(1, 0);
 +
-+	/* Publish this device. */
-+	if(!err)
-+		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
++out:
++	spin_unlock_irqrestore(&dev_data->lock, flags);
 +
-+      out:
-+	return err;
++	/* TODO: Publish virtual domain:bus:slot.func here. */
++
++	return ret;
 +}
 +
 +void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
 +{
-+	int slot;
-+	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	struct controller_dev_data *dev_data = pdev->pci_dev_data;
++	struct controller_list_entry *cntrl_entry;
++	struct controller_dev_entry *dev_entry = NULL;
 +	struct pci_dev *found_dev = NULL;
 +	unsigned long flags;
 +
-+	spin_lock_irqsave(&vpci_dev->lock, flags);
++	spin_lock_irqsave(&dev_data->lock, flags);
 +
-+	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+		struct pci_dev_entry *e, *tmp;
-+		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
-+					 list) {
-+			if (e->dev == dev) {
-+				list_del(&e->list);
-+				found_dev = e->dev;
-+				kfree(e);
-+				goto out;
++	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++		if (cntrl_entry->controller != PCI_CONTROLLER(dev))
++			continue;
++
++		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
++			if (dev_entry->dev == dev) {
++				found_dev = dev_entry->dev;
++				break;
 +			}
 +		}
 +	}
 +
-+      out:
-+	spin_unlock_irqrestore(&vpci_dev->lock, flags);
++	if (!found_dev) {
++		spin_unlock_irqrestore(&dev_data->lock, flags);
++		return;
++	}
 +
-+	if (found_dev)
-+		pcistub_put_pci_dev(found_dev);
++	list_del(&dev_entry->list);
++	kfree(dev_entry);
++
++	if (list_empty(&cntrl_entry->dev_list)) {
++		list_del(&cntrl_entry->list);
++		kfree(cntrl_entry);
++	}
++
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++	pcistub_put_pci_dev(found_dev);
 +}
 +
 +int pciback_init_devices(struct pciback_device *pdev)
 +{
-+	int slot;
-+	struct vpci_dev_data *vpci_dev;
++	struct controller_dev_data *dev_data;
 +
-+	vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
-+	if (!vpci_dev)
++	dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
++	if (!dev_data)
 +		return -ENOMEM;
 +
-+	spin_lock_init(&vpci_dev->lock);
-+
-+	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+		INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
-+	}
-+
-+	pdev->pci_dev_data = vpci_dev;
-+
-+	return 0;
-+}
++	spin_lock_init(&dev_data->lock);
 +
-+int pciback_publish_pci_roots(struct pciback_device *pdev,
-+			      publish_pci_root_cb publish_cb)
-+{
-+	/* The Virtual PCI bus has only one root */
-+	return publish_cb(pdev, 0, 0);
-+}
++	INIT_LIST_HEAD(&dev_data->list);
 +
-+void pciback_release_devices(struct pciback_device *pdev)
-+{
-+	int slot;
-+	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	/* Starting domain:bus numbers */
++	dev_data->next_domain = 0;
++	dev_data->next_bus = 0;
 +
-+	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+		struct pci_dev_entry *e, *tmp;
-+		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
-+					 list) {
-+			list_del(&e->list);
-+			pcistub_put_pci_dev(e->dev);
-+			kfree(e);
-+		}
-+	}
++	pdev->pci_dev_data = dev_data;
 +
-+	kfree(vpci_dev);
-+	pdev->pci_dev_data = NULL;
++	return 0;
 +}
 +
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
-+		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
 +{
-+	struct pci_dev_entry *entry;
-+	struct pci_dev *dev = NULL;
-+	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-+	unsigned long flags;
-+	int found = 0, slot;
-+
-+	spin_lock_irqsave(&vpci_dev->lock, flags);
-+	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+		list_for_each_entry(entry,
-+			    &vpci_dev->dev_list[slot],
-+			    list) {
-+			dev = entry->dev;
-+			if (dev && dev->bus->number == pcidev->bus->number
-+				&& pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)
-+				&& dev->devfn == pcidev->devfn)
-+			{
-+				found = 1;
-+				*domain = 0;
-+				*bus = 0;
-+				*devfn = PCI_DEVFN(slot, PCI_FUNC(pcidev->devfn));
-+			}
-+		}		
-+	}
-+	spin_unlock_irqrestore(&vpci_dev->lock, flags);
-+	return found;
-+}
-diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
-new file mode 100644
-index 0000000..4d56c45
---- /dev/null
-+++ b/drivers/xen/pciback/xenbus.c
-@@ -0,0 +1,710 @@
-+/*
-+ * PCI Backend Xenbus Setup - handles setup with frontend and xend
-+ *
-+ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
-+ */
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/list.h>
-+#include <linux/vmalloc.h>
-+#include <xen/xenbus.h>
-+#include <xen/evtchn.h>
-+#include "pciback.h"
++	struct walk_info *info = data;
++	struct acpi_resource_address64 addr;
++	acpi_status status;
++	int i, len, err;
++	char str[32], tmp[3];
++	unsigned char *ptr, *buf;
 +
-+#define INVALID_EVTCHN_IRQ  (-1)
-+struct workqueue_struct *pciback_wq;
++	status = acpi_resource_to_address64(res, &addr);
 +
-+static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
-+{
-+	struct pciback_device *pdev;
++	/* Do we care about this range?  Let's check. */
++	if (!ACPI_SUCCESS(status) ||
++	    !(addr.resource_type == ACPI_MEMORY_RANGE ||
++	      addr.resource_type == ACPI_IO_RANGE) ||
++	    !addr.address_length || addr.producer_consumer != ACPI_PRODUCER)
++		return AE_OK;
 +
-+	pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
-+	if (pdev == NULL)
-+		goto out;
-+	dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
++	/*
++	 * Furthermore, we really only care to tell the guest about
++	 * address ranges that require address translation of some sort.
++	 */
++	if (!(addr.resource_type == ACPI_MEMORY_RANGE &&
++	      addr.info.mem.translation) &&
++	    !(addr.resource_type == ACPI_IO_RANGE &&
++	      addr.info.io.translation))
++		return AE_OK;
 +
-+	pdev->xdev = xdev;
-+	xdev->dev.driver_data = pdev;
++	/* Store the resource in xenbus for the guest */
++	len = snprintf(str, sizeof(str), "root-%d-resource-%d",
++		       info->root_num, info->resource_count);
++	if (unlikely(len >= (sizeof(str) - 1)))
++		return AE_OK;
 +
-+	spin_lock_init(&pdev->dev_lock);
++	buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL);
++	if (!buf)
++		return AE_OK;
 +
-+	pdev->sh_area = NULL;
-+	pdev->sh_info = NULL;
-+	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
-+	pdev->be_watching = 0;
++	/* Clean out resource_source */
++	res->data.address64.resource_source.index = 0xFF;
++	res->data.address64.resource_source.string_length = 0;
++	res->data.address64.resource_source.string_ptr = NULL;
 +
-+	INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
++	ptr = (unsigned char *)res;
 +
-+	if (pciback_init_devices(pdev)) {
-+		kfree(pdev);
-+		pdev = NULL;
++	/* Turn the acpi_resource into an ASCII byte stream */
++	for (i = 0; i < sizeof(*res); i++) {
++		snprintf(tmp, sizeof(tmp), "%02x", ptr[i]);
++		strncat(buf, tmp, 2);
 +	}
-+      out:
-+	return pdev;
-+}
-+
-+static void pciback_disconnect(struct pciback_device *pdev)
-+{
-+	spin_lock(&pdev->dev_lock);
 +
-+	/* Ensure the guest can't trigger our handler before removing devices */
-+	if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
-+		unbind_from_irqhandler(pdev->evtchn_irq, pdev);
-+		pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
-+	}
++	err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename,
++			    str, "%s", buf);
 +
-+	/* If the driver domain started an op, make sure we complete it
-+	 * before releasing the shared memory */
-+	flush_workqueue(pciback_wq);
++	if (!err)
++		info->resource_count++;
 +
-+	if (pdev->sh_info != NULL) {
-+		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
-+		pdev->sh_info = NULL;
-+	}
++	kfree(buf);
 +
-+	spin_unlock(&pdev->dev_lock);
++	return AE_OK;
 +}
 +
-+static void free_pdev(struct pciback_device *pdev)
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb publish_root_cb)
 +{
-+	if (pdev->be_watching)
-+		unregister_xenbus_watch(&pdev->be_watch);
-+
-+	pciback_disconnect(pdev);
-+
-+	pciback_release_devices(pdev);
++	struct controller_dev_data *dev_data = pdev->pci_dev_data;
++	struct controller_list_entry *cntrl_entry;
++	int i, root_num, len, err = 0;
++	unsigned int domain, bus;
++	char str[64];
++	struct walk_info info;
 +
-+	pdev->xdev->dev.driver_data = NULL;
-+	pdev->xdev = NULL;
++	spin_lock(&dev_data->lock);
 +
-+	kfree(pdev);
-+}
++	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++		/* First publish all the domain:bus info */
++		err = publish_root_cb(pdev, cntrl_entry->domain,
++				      cntrl_entry->bus);
++		if (err)
++			goto out;
 +
-+static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
-+			     int remote_evtchn)
-+{
-+	int err = 0;
-+	struct vm_struct *area;
++		/*
++		 * Now figure out which root-%d this belongs to
++		 * so we can associate resources with it.
++		 */
++		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++				   "root_num", "%d", &root_num);
 +
-+	dev_dbg(&pdev->xdev->dev,
-+		"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
-+		gnt_ref, remote_evtchn);
++		if (err != 1)
++			goto out;
 +
-+	area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
-+	if (IS_ERR(area)) {
-+		err = PTR_ERR(area);
-+		goto out;
-+	}
-+	pdev->sh_area = area;
-+	pdev->sh_info = area->addr;
++		for (i = 0; i < root_num; i++) {
++			len = snprintf(str, sizeof(str), "root-%d", i);
++			if (unlikely(len >= (sizeof(str) - 1))) {
++				err = -ENOMEM;
++				goto out;
++			}
 +
-+	err = bind_interdomain_evtchn_to_irqhandler(
-+		pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
-+		SA_SAMPLE_RANDOM, "pciback", pdev);
-+	if (err < 0) {
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				 "Error binding event channel to IRQ");
-+		goto out;
-+	}
-+	pdev->evtchn_irq = err;
-+	err = 0;
++			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++					   str, "%x:%x", &domain, &bus);
++			if (err != 2)
++				goto out;
 +
-+	dev_dbg(&pdev->xdev->dev, "Attached!\n");
-+      out:
-+	return err;
-+}
++			/* Is this the one we just published? */
++			if (domain == cntrl_entry->domain &&
++			    bus == cntrl_entry->bus)
++				break;
++		}
 +
-+static int pciback_attach(struct pciback_device *pdev)
-+{
-+	int err = 0;
-+	int gnt_ref, remote_evtchn;
-+	char *magic = NULL;
++		if (i == root_num)
++			goto out;
 +
-+	spin_lock(&pdev->dev_lock);
++		info.pdev = pdev;
++		info.resource_count = 0;
++		info.root_num = i;
 +
-+	/* Make sure we only do this setup once */
-+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-+	    XenbusStateInitialised)
-+		goto out;
++		/* Let ACPI do the heavy lifting on decoding resources */
++		acpi_walk_resources(cntrl_entry->controller->acpi_handle,
++				    METHOD_NAME__CRS, write_xenbus_resource,
++				    &info);
 +
-+	/* Wait for frontend to state that it has published the configuration */
-+	if (xenbus_read_driver_state(pdev->xdev->otherend) !=
-+	    XenbusStateInitialised)
-+		goto out;
++		/* No resouces.  OK.  On to the next one */
++		if (!info.resource_count)
++			continue;
 +
-+	dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
++		/* Store the number of resources we wrote for this root-%d */
++		len = snprintf(str, sizeof(str), "root-%d-resources", i);
++		if (unlikely(len >= (sizeof(str) - 1))) {
++			err = -ENOMEM;
++			goto out;
++		}
 +
-+	err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
-+			    "pci-op-ref", "%u", &gnt_ref,
-+			    "event-channel", "%u", &remote_evtchn,
-+			    "magic", NULL, &magic, NULL);
-+	if (err) {
-+		/* If configuration didn't get read correctly, wait longer */
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				 "Error reading configuration from frontend");
-+		goto out;
++		err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++				    "%d", info.resource_count);
++		if (err)
++			goto out;
 +	}
 +
-+	if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
-+		xenbus_dev_fatal(pdev->xdev, -EFAULT,
-+				 "version mismatch (%s/%s) with pcifront - "
-+				 "halting pciback",
-+				 magic, XEN_PCI_MAGIC);
++	/* Finally, write some magic to synchronize with the guest. */
++	len = snprintf(str, sizeof(str), "root-resource-magic");
++	if (unlikely(len >= (sizeof(str) - 1))) {
++		err = -ENOMEM;
 +		goto out;
 +	}
 +
-+	err = pciback_do_attach(pdev, gnt_ref, remote_evtchn);
-+	if (err)
-+		goto out;
++	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++			    "%lx", (sizeof(struct acpi_resource) *2) + 1);
 +
-+	dev_dbg(&pdev->xdev->dev, "Connecting...\n");
++out:
++	spin_unlock(&dev_data->lock);
 +
-+	err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
-+	if (err)
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				 "Error switching to connected state!");
++	return err;
++}
 +
-+	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
-+      out:
-+	spin_unlock(&pdev->dev_lock);
++void pciback_release_devices(struct pciback_device *pdev)
++{
++	struct controller_dev_data *dev_data = pdev->pci_dev_data;
++	struct controller_list_entry *cntrl_entry, *c;
++	struct controller_dev_entry *dev_entry, *d;
 +
-+	if (magic)
-+		kfree(magic);
++	list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) {
++		list_for_each_entry_safe(dev_entry, d,
++					 &cntrl_entry->dev_list, list) {
++			list_del(&dev_entry->list);
++			pcistub_put_pci_dev(dev_entry->dev);
++			kfree(dev_entry);
++		}
++		list_del(&cntrl_entry->list);
++		kfree(cntrl_entry);
++	}
 +
-+	return err;
++	kfree(dev_data);
++	pdev->pci_dev_data = NULL;
 +}
 +
-+static int pciback_publish_pci_dev(struct pciback_device *pdev,
-+				   unsigned int domain, unsigned int bus,
-+				   unsigned int devfn, unsigned int devid)
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++		struct pciback_device *pdev,
++		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
 +{
-+	int err;
-+	int len;
-+	char str[64];
++	struct controller_dev_data *dev_data = pdev->pci_dev_data;
++	struct controller_dev_entry *dev_entry;
++	struct controller_list_entry *cntrl_entry;
++	unsigned long flags;
++	int found = 0;
++	spin_lock_irqsave(&dev_data->lock, flags);
 +
-+	len = snprintf(str, sizeof(str), "vdev-%d", devid);
-+	if (unlikely(len >= (sizeof(str) - 1))) {
-+		err = -ENOMEM;
-+		goto out;
++	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
++		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
++			if ((dev_entry->dev->bus->number ==
++					pcidev->bus->number) &&
++				(dev_entry->dev->devfn ==
++					pcidev->devfn) &&
++				(pci_domain_nr(dev_entry->dev->bus) ==
++					pci_domain_nr(pcidev->bus))) {
++				found = 1;
++				*domain = cntrl_entry->domain;
++				*bus = cntrl_entry->bus;
++				*devfn = dev_entry->devfn;
++				goto out;
++			}
++		}
 +	}
++out:
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++	return found;
 +
-+	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
-+			    "%04x:%02x:%02x.%02x", domain, bus,
-+			    PCI_SLOT(devfn), PCI_FUNC(devfn));
-+
-+      out:
-+	return err;
 +}
 +
-+static int pciback_export_device(struct pciback_device *pdev,
-+				 int domain, int bus, int slot, int func,
-+				 int devid)
+diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c
+new file mode 100644
+index 0000000..5386bebf
+--- /dev/null
++++ b/drivers/xen/pciback/passthrough.c
+@@ -0,0 +1,178 @@
++/*
++ * PCI Backend - Provides restricted access to the real PCI bus topology
++ *               to the frontend
++ *
++ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/list.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++struct passthrough_dev_data {
++	/* Access to dev_list must be protected by lock */
++	struct list_head dev_list;
++	spinlock_t lock;
++};
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn)
 +{
-+	struct pci_dev *dev;
-+	int err = 0;
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry;
++	struct pci_dev *dev = NULL;
++	unsigned long flags;
 +
-+	dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
-+		domain, bus, slot, func);
++	spin_lock_irqsave(&dev_data->lock, flags);
 +
-+	dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
-+	if (!dev) {
-+		err = -EINVAL;
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				 "Couldn't locate PCI device "
-+				 "(%04x:%02x:%02x.%01x)! "
-+				 "perhaps already in-use?",
-+				 domain, bus, slot, func);
-+		goto out;
++	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
++		if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
++		    && bus == (unsigned int)dev_entry->dev->bus->number
++		    && devfn == dev_entry->dev->devfn) {
++			dev = dev_entry->dev;
++			break;
++		}
 +	}
 +
-+	err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev);
-+	if (err)
-+		goto out;
++	spin_unlock_irqrestore(&dev_data->lock, flags);
 +
-+	/* TODO: It'd be nice to export a bridge and have all of its children
-+	 * get exported with it. This may be best done in xend (which will
-+	 * have to calculate resource usage anyway) but we probably want to
-+	 * put something in here to ensure that if a bridge gets given to a
-+	 * driver domain, that all devices under that bridge are not given
-+	 * to other driver domains (as he who controls the bridge can disable
-+	 * it and stop the other devices from working).
-+	 */
-+      out:
-+	return err;
++	return dev;
 +}
 +
-+static int pciback_remove_device(struct pciback_device *pdev,
-+				 int domain, int bus, int slot, int func)
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++			int devid, publish_pci_dev_cb publish_cb)
 +{
-+	int err = 0;
-+	struct pci_dev *dev;
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry;
++	unsigned long flags;
++	unsigned int domain, bus, devfn;
++	int err;
 +
-+	dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
-+		domain, bus, slot, func);
++	dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
++	if (!dev_entry)
++		return -ENOMEM;
++	dev_entry->dev = dev;
 +
-+	dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
-+	if (!dev) {
-+		err = -EINVAL;
-+		dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
-+			"(%04x:%02x:%02x.%01x)! not owned by this domain\n",
-+			domain, bus, slot, func);
-+		goto out;
-+	}
++	spin_lock_irqsave(&dev_data->lock, flags);
++	list_add_tail(&dev_entry->list, &dev_data->dev_list);
++	spin_unlock_irqrestore(&dev_data->lock, flags);
++
++	/* Publish this device. */
++	domain = (unsigned int)pci_domain_nr(dev->bus);
++	bus = (unsigned int)dev->bus->number;
++	devfn = dev->devfn;
++	err = publish_cb(pdev, domain, bus, devfn, devid);
 +
-+	pciback_release_pci_dev(pdev, dev);
-+	
-+      out:
 +	return err;
 +}
 +
-+static int pciback_publish_pci_root(struct pciback_device *pdev,
-+				    unsigned int domain, unsigned int bus)
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
 +{
-+	unsigned int d, b;
-+	int i, root_num, len, err;
-+	char str[64];
-+
-+	dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry, *t;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
 +
-+	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-+			   "root_num", "%d", &root_num);
-+	if (err == 0 || err == -ENOENT)
-+		root_num = 0;
-+	else if (err < 0)
-+		goto out;
++	spin_lock_irqsave(&dev_data->lock, flags);
 +
-+	/* Verify that we haven't already published this pci root */
-+	for (i = 0; i < root_num; i++) {
-+		len = snprintf(str, sizeof(str), "root-%d", i);
-+		if (unlikely(len >= (sizeof(str) - 1))) {
-+			err = -ENOMEM;
-+			goto out;
++	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
++		if (dev_entry->dev == dev) {
++			list_del(&dev_entry->list);
++			found_dev = dev_entry->dev;
++			kfree(dev_entry);
 +		}
++	}
 +
-+		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-+				   str, "%x:%x", &d, &b);
-+		if (err < 0)
-+			goto out;
-+		if (err != 2) {
-+			err = -EINVAL;
-+			goto out;
-+		}
++	spin_unlock_irqrestore(&dev_data->lock, flags);
 +
-+		if (d == domain && b == bus) {
-+			err = 0;
-+			goto out;
-+		}
-+	}
++	if (found_dev)
++		pcistub_put_pci_dev(found_dev);
++}
 +
-+	len = snprintf(str, sizeof(str), "root-%d", root_num);
-+	if (unlikely(len >= (sizeof(str) - 1))) {
-+		err = -ENOMEM;
-+		goto out;
-+	}
++int pciback_init_devices(struct pciback_device *pdev)
++{
++	struct passthrough_dev_data *dev_data;
 +
-+	dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
-+		root_num, domain, bus);
++	dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
++	if (!dev_data)
++		return -ENOMEM;
 +
-+	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
-+			    "%04x:%02x", domain, bus);
-+	if (err)
-+		goto out;
++	spin_lock_init(&dev_data->lock);
 +
-+	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
-+			    "root_num", "%d", (root_num + 1));
++	INIT_LIST_HEAD(&dev_data->dev_list);
 +
-+      out:
-+	return err;
++	pdev->pci_dev_data = dev_data;
++
++	return 0;
 +}
 +
-+static int pciback_reconfigure(struct pciback_device *pdev)
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb publish_root_cb)
 +{
 +	int err = 0;
-+	int num_devs;
-+	int domain, bus, slot, func;
-+	int substate;
-+	int i, len;
-+	char state_str[64];
-+	char dev_str[64];
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry, *e;
++	struct pci_dev *dev;
++	int found;
++	unsigned int domain, bus;
 +
-+	spin_lock(&pdev->dev_lock);
++	spin_lock(&dev_data->lock);
 +
-+	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
++	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
++		/* Only publish this device as a root if none of its
++		 * parent bridges are exported
++		 */
++		found = 0;
++		dev = dev_entry->dev->bus->self;
++		for (; !found && dev != NULL; dev = dev->bus->self) {
++			list_for_each_entry(e, &dev_data->dev_list, list) {
++				if (dev == e->dev) {
++					found = 1;
++					break;
++				}
++			}
++		}
 +
-+	/* Make sure we only reconfigure once */
-+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-+	    XenbusStateReconfiguring)
-+		goto out;
++		domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
++		bus = (unsigned int)dev_entry->dev->bus->number;
 +
-+	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
-+			   &num_devs);
-+	if (err != 1) {
-+		if (err >= 0)
-+			err = -EINVAL;
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				 "Error reading number of devices");
-+		goto out;
++		if (!found) {
++			err = publish_root_cb(pdev, domain, bus);
++			if (err)
++				break;
++		}
 +	}
 +
-+	for (i = 0; i < num_devs; i++) {
-+		len = snprintf(state_str, sizeof(state_str), "state-%d", i);
-+		if (unlikely(len >= (sizeof(state_str) - 1))) {
-+			err = -ENOMEM;
-+			xenbus_dev_fatal(pdev->xdev, err,
-+					 "String overflow while reading "
-+					 "configuration");
-+			goto out;
-+		}
-+		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
-+				   "%d", &substate);
-+		if (err != 1) 
-+			substate = XenbusStateUnknown;
++	spin_unlock(&dev_data->lock);
 +
-+		switch (substate) {
-+		case XenbusStateInitialising:
-+			dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
++	return err;
++}
 +
-+			len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
-+			if (unlikely(len >= (sizeof(dev_str) - 1))) {
-+				err = -ENOMEM;
-+				xenbus_dev_fatal(pdev->xdev, err,
-+						 "String overflow while "
-+						 "reading configuration");
-+				goto out;
-+			}
-+			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-+					   dev_str, "%x:%x:%x.%x",
-+					   &domain, &bus, &slot, &func);
-+			if (err < 0) {
-+				xenbus_dev_fatal(pdev->xdev, err,
-+						 "Error reading device "
-+						 "configuration");
-+				goto out;
-+			}
-+			if (err != 4) {
-+				err = -EINVAL;
-+				xenbus_dev_fatal(pdev->xdev, err,
-+						 "Error parsing pci device "
-+						 "configuration");
-+				goto out;
-+			}
-+	
-+			err = pciback_export_device(pdev, domain, bus, slot,
-+						    func, i);
-+			if (err)
-+				goto out;
++void pciback_release_devices(struct pciback_device *pdev)
++{
++	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
++	struct pci_dev_entry *dev_entry, *t;
 +
-+			/* Publish pci roots. */
-+			err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
-+			if (err) {
-+				xenbus_dev_fatal(pdev->xdev, err,
-+						 "Error while publish PCI root"
-+						 "buses for frontend");
-+				goto out;
-+			}
++	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
++		list_del(&dev_entry->list);
++		pcistub_put_pci_dev(dev_entry->dev);
++		kfree(dev_entry);
++	}
 +
-+			err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
-+					    state_str, "%d",
-+					    XenbusStateInitialised);
-+			if (err) {
-+				xenbus_dev_fatal(pdev->xdev, err,
-+						 "Error switching substate of "
-+						 "dev-%d\n", i);
-+				goto out;
-+			}	
-+			break;
++	kfree(dev_data);
++	pdev->pci_dev_data = NULL;
++}
++
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++			     struct pciback_device *pdev,
++			     unsigned int *domain, unsigned int *bus,
++			     unsigned int *devfn)
++
++{
++	*domain = pci_domain_nr(pcidev->bus);
++	*bus = pcidev->bus->number;
++	*devfn = pcidev->devfn;
++	return 1;
++}
+diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
+new file mode 100644
+index 0000000..c4d1071
+--- /dev/null
++++ b/drivers/xen/pciback/pci_stub.c
+@@ -0,0 +1,1371 @@
++/*
++ * PCI Stub Driver - Grabs devices in backend to be exported later
++ *
++ * Ryan Wilson <hap9 at epoch.ncsc.mil>
++ * Chris Bookholt <hap10 at epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/rwsem.h>
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/kref.h>
++#include <linux/pci.h>
++#include <linux/wait.h>
++#include <linux/sched.h>
++#include <linux/atomic.h>
++#include <xen/events.h>
++#include <asm/xen/pci.h>
++#include <asm/xen/hypervisor.h>
++#include "pciback.h"
++#include "conf_space.h"
++#include "conf_space_quirks.h"
++
++#define DRV_NAME	"pciback"
++
++static char *pci_devs_to_hide;
++wait_queue_head_t aer_wait_queue;
++/*Add sem for sync AER handling and pciback remove/reconfigue ops,
++* We want to avoid in middle of AER ops, pciback devices is being removed
++*/
++static DECLARE_RWSEM(pcistub_sem);
++module_param_named(hide, pci_devs_to_hide, charp, 0444);
++
++struct pcistub_device_id {
++	struct list_head slot_list;
++	int domain;
++	unsigned char bus;
++	unsigned int devfn;
++};
++static LIST_HEAD(pcistub_device_ids);
++static DEFINE_SPINLOCK(device_ids_lock);
 +
-+		case XenbusStateClosing:
-+			dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
++struct pcistub_device {
++	struct kref kref;
++	struct list_head dev_list;
++	spinlock_t lock;
 +
-+			len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
-+			if (unlikely(len >= (sizeof(dev_str) - 1))) {
-+				err = -ENOMEM;
-+				xenbus_dev_fatal(pdev->xdev, err,
-+						 "String overflow while "
-+						 "reading configuration");
-+				goto out;
-+			}
-+			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-+					   dev_str, "%x:%x:%x.%x",
-+					   &domain, &bus, &slot, &func);
-+			if (err < 0) {
-+				xenbus_dev_fatal(pdev->xdev, err,
-+						 "Error reading device "
-+						 "configuration");
-+				goto out;
-+			}
-+			if (err != 4) {
-+				err = -EINVAL;
-+				xenbus_dev_fatal(pdev->xdev, err,
-+						 "Error parsing pci device "
-+						 "configuration");
-+				goto out;
-+			}
++	struct pci_dev *dev;
++	struct pciback_device *pdev;/* non-NULL if struct pci_dev is in use */
++};
 +
-+			err = pciback_remove_device(pdev, domain, bus, slot,
-+						    func);
-+			if(err)
-+				goto out;
++/* Access to pcistub_devices & seized_devices lists and the initialize_devices
++ * flag must be locked with pcistub_devices_lock
++ */
++static DEFINE_SPINLOCK(pcistub_devices_lock);
++static LIST_HEAD(pcistub_devices);
 +
-+			/* TODO: If at some point we implement support for pci
-+			 * root hot-remove on pcifront side, we'll need to
-+			 * remove unnecessary xenstore nodes of pci roots here.
-+			 */
++/* wait for device_initcall before initializing our devices
++ * (see pcistub_init_devices_late)
++ */
++static int initialize_devices;
++static LIST_HEAD(seized_devices);
 +
-+			break;
++static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
 +
-+		default:
-+			break;
-+		}
-+	}
++	dev_dbg(&dev->dev, "pcistub_device_alloc\n");
 +
-+	err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
-+	if (err) {
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				 "Error switching to reconfigured state!");
-+		goto out;
++	psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
++	if (!psdev)
++		return NULL;
++
++	psdev->dev = pci_dev_get(dev);
++	if (!psdev->dev) {
++		kfree(psdev);
++		return NULL;
 +	}
-+	
-+      out:
-+	spin_unlock(&pdev->dev_lock);
 +
-+	return 0;
++	kref_init(&psdev->kref);
++	spin_lock_init(&psdev->lock);
++
++	return psdev;
 +}
 +
-+static void pciback_frontend_changed(struct xenbus_device *xdev,
-+				     enum xenbus_state fe_state)
++/* Don't call this directly as it's called by pcistub_device_put */
++static void pcistub_device_release(struct kref *kref)
 +{
-+	struct pciback_device *pdev = xdev->dev.driver_data;
++	struct pcistub_device *psdev;
 +
-+	dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
++	psdev = container_of(kref, struct pcistub_device, kref);
 +
-+	switch (fe_state) {
-+	case XenbusStateInitialised:
-+		pciback_attach(pdev);
-+		break;
++	dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
 +
-+	case XenbusStateReconfiguring:
-+		pciback_reconfigure(pdev);
-+		break;
++	xen_unregister_device_domain_owner(psdev->dev);
 +
-+	case XenbusStateConnected:
-+		/* pcifront switched its state from reconfiguring to connected.
-+		 * Then switch to connected state.
-+		 */
-+		xenbus_switch_state(xdev, XenbusStateConnected);
-+		break;
++	/* Clean-up the device */
++	pciback_reset_device(psdev->dev);
++	pciback_config_free_dyn_fields(psdev->dev);
++	pciback_config_free_dev(psdev->dev);
++	kfree(pci_get_drvdata(psdev->dev));
++	pci_set_drvdata(psdev->dev, NULL);
 +
-+	case XenbusStateClosing:
-+		pciback_disconnect(pdev);
-+		xenbus_switch_state(xdev, XenbusStateClosing);
-+		break;
++	pci_dev_put(psdev->dev);
 +
-+	case XenbusStateClosed:
-+		pciback_disconnect(pdev);
-+		xenbus_switch_state(xdev, XenbusStateClosed);
-+		if (xenbus_dev_is_online(xdev))
-+			break;
-+		/* fall through if not online */
-+	case XenbusStateUnknown:
-+		dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
-+		device_unregister(&xdev->dev);
-+		break;
++	kfree(psdev);
++}
 +
-+	default:
-+		break;
-+	}
++static inline void pcistub_device_get(struct pcistub_device *psdev)
++{
++	kref_get(&psdev->kref);
 +}
 +
-+static int pciback_setup_backend(struct pciback_device *pdev)
++static inline void pcistub_device_put(struct pcistub_device *psdev)
 +{
-+	/* Get configuration from xend (if available now) */
-+	int domain, bus, slot, func;
-+	int err = 0;
-+	int i, num_devs;
-+	char dev_str[64];
-+	char state_str[64];
++	kref_put(&psdev->kref, pcistub_device_release);
++}
 +
-+	spin_lock(&pdev->dev_lock);
++static struct pcistub_device *pcistub_device_find(int domain, int bus,
++						  int slot, int func)
++{
++	struct pcistub_device *psdev = NULL;
++	unsigned long flags;
 +
-+	/* It's possible we could get the call to setup twice, so make sure
-+	 * we're not already connected.
-+	 */
-+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-+	    XenbusStateInitWait)
-+		goto out;
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
 +
-+	dev_dbg(&pdev->xdev->dev, "getting be setup\n");
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev != NULL
++		    && domain == pci_domain_nr(psdev->dev->bus)
++		    && bus == psdev->dev->bus->number
++		    && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
++			pcistub_device_get(psdev);
++			goto out;
++		}
++	}
 +
-+	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
-+			   &num_devs);
-+	if (err != 1) {
-+		if (err >= 0)
-+			err = -EINVAL;
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				 "Error reading number of devices");
-+		goto out;
++	/* didn't find it */
++	psdev = NULL;
++
++out:
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return psdev;
++}
++
++static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev,
++						  struct pcistub_device *psdev)
++{
++	struct pci_dev *pci_dev = NULL;
++	unsigned long flags;
++
++	pcistub_device_get(psdev);
++
++	spin_lock_irqsave(&psdev->lock, flags);
++	if (!psdev->pdev) {
++		psdev->pdev = pdev;
++		pci_dev = psdev->dev;
 +	}
++	spin_unlock_irqrestore(&psdev->lock, flags);
 +
-+	for (i = 0; i < num_devs; i++) {
-+		int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
-+		if (unlikely(l >= (sizeof(dev_str) - 1))) {
-+			err = -ENOMEM;
-+			xenbus_dev_fatal(pdev->xdev, err,
-+					 "String overflow while reading "
-+					 "configuration");
-+			goto out;
-+		}
++	if (!pci_dev)
++		pcistub_device_put(psdev);
 +
-+		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
-+				   "%x:%x:%x.%x", &domain, &bus, &slot, &func);
-+		if (err < 0) {
-+			xenbus_dev_fatal(pdev->xdev, err,
-+					 "Error reading device configuration");
-+			goto out;
-+		}
-+		if (err != 4) {
-+			err = -EINVAL;
-+			xenbus_dev_fatal(pdev->xdev, err,
-+					 "Error parsing pci device "
-+					 "configuration");
-+			goto out;
++	return pci_dev;
++}
++
++struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
++					    int domain, int bus,
++					    int slot, int func)
++{
++	struct pcistub_device *psdev;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev != NULL
++		    && domain == pci_domain_nr(psdev->dev->bus)
++		    && bus == psdev->dev->bus->number
++		    && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
++			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
++			break;
 +		}
++	}
 +
-+		err = pciback_export_device(pdev, domain, bus, slot, func, i);
-+		if (err)
-+			goto out;
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return found_dev;
++}
 +
-+		/* Switch substate of this device. */
-+		l = snprintf(state_str, sizeof(state_str), "state-%d", i);
-+		if (unlikely(l >= (sizeof(state_str) - 1))) {
-+			err = -ENOMEM;
-+			xenbus_dev_fatal(pdev->xdev, err,
-+					 "String overflow while reading "
-+					 "configuration");
-+			goto out;
++struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
++				    struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev == dev) {
++			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
++			break;
 +		}
-+		err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
-+				    "%d", XenbusStateInitialised);
-+		if (err) {
-+			xenbus_dev_fatal(pdev->xdev, err, "Error switching "
-+					 "substate of dev-%d\n", i);
-+			goto out;
-+		}	
 +	}
 +
-+	err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
-+	if (err) {
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				 "Error while publish PCI root buses "
-+				 "for frontend");
-+		goto out;
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return found_dev;
++}
++
++void pcistub_put_pci_dev(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev, *found_psdev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev == dev) {
++			found_psdev = psdev;
++			break;
++		}
 +	}
 +
-+	err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
-+	if (err)
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				 "Error switching to initialised state!");
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
 +
-+      out:
-+	spin_unlock(&pdev->dev_lock);
++	/*hold this lock for avoiding breaking link between
++	* pcistub and pciback when AER is in processing
++	*/
++	down_write(&pcistub_sem);
++	/* Cleanup our device
++	 * (so it's ready for the next domain)
++	 */
++	pciback_reset_device(found_psdev->dev);
++	pciback_config_free_dyn_fields(found_psdev->dev);
++	pciback_config_reset_dev(found_psdev->dev);
 +
-+	if (!err)
-+		/* see if pcifront is already configured (if not, we'll wait) */
-+		pciback_attach(pdev);
++	spin_lock_irqsave(&found_psdev->lock, flags);
++	found_psdev->pdev = NULL;
++	spin_unlock_irqrestore(&found_psdev->lock, flags);
 +
-+	return err;
++	pcistub_device_put(found_psdev);
++	up_write(&pcistub_sem);
 +}
 +
-+static void pciback_be_watch(struct xenbus_watch *watch,
-+			     const char **vec, unsigned int len)
++static int __devinit pcistub_match_one(struct pci_dev *dev,
++				       struct pcistub_device_id *pdev_id)
 +{
-+	struct pciback_device *pdev =
-+	    container_of(watch, struct pciback_device, be_watch);
++	/* Match the specified device by domain, bus, slot, func and also if
++	 * any of the device's parent bridges match.
++	 */
++	for (; dev != NULL; dev = dev->bus->self) {
++		if (pci_domain_nr(dev->bus) == pdev_id->domain
++		    && dev->bus->number == pdev_id->bus
++		    && dev->devfn == pdev_id->devfn)
++			return 1;
 +
-+	switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
-+	case XenbusStateInitWait:
-+		pciback_setup_backend(pdev);
-+		break;
++		/* Sometimes topmost bridge links to itself. */
++		if (dev == dev->bus->self)
++			break;
++	}
 +
-+	default:
-+		break;
++	return 0;
++}
++
++static int __devinit pcistub_match(struct pci_dev *dev)
++{
++	struct pcistub_device_id *pdev_id;
++	unsigned long flags;
++	int found = 0;
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
++		if (pcistub_match_one(dev, pdev_id)) {
++			found = 1;
++			break;
++		}
 +	}
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return found;
 +}
 +
-+static int pciback_xenbus_probe(struct xenbus_device *dev,
-+				const struct xenbus_device_id *id)
++static int __devinit pcistub_init_device(struct pci_dev *dev)
 +{
++	struct pciback_dev_data *dev_data;
 +	int err = 0;
-+	struct pciback_device *pdev = alloc_pdev(dev);
 +
-+	if (pdev == NULL) {
++	dev_dbg(&dev->dev, "initializing...\n");
++
++	/* The PCI backend is not intended to be a module (or to work with
++	 * removable PCI devices (yet). If it were, pciback_config_free()
++	 * would need to be called somewhere to free the memory allocated
++	 * here and then to call kfree(pci_get_drvdata(psdev->dev)).
++	 */
++	dev_data = kzalloc(sizeof(*dev_data) +  strlen(DRV_NAME "[]")
++				+ strlen(pci_name(dev)) + 1, GFP_ATOMIC);
++	if (!dev_data) {
 +		err = -ENOMEM;
-+		xenbus_dev_fatal(dev, err,
-+				 "Error allocating pciback_device struct");
 +		goto out;
 +	}
++	pci_set_drvdata(dev, dev_data);
 +
-+	/* wait for xend to configure us */
-+	err = xenbus_switch_state(dev, XenbusStateInitWait);
++	/*
++	 * Setup name for fake IRQ handler. It will only be enabled
++	 * once the device is turned on by the guest.
++	 */
++	sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
++
++	dev_dbg(&dev->dev, "initializing config\n");
++
++	init_waitqueue_head(&aer_wait_queue);
++	err = pciback_config_init_dev(dev);
 +	if (err)
 +		goto out;
 +
-+	/* watch the backend node for backend configuration information */
-+	err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
-+				pciback_be_watch);
++	/* HACK: Force device (& ACPI) to determine what IRQ it's on - we
++	 * must do this here because pcibios_enable_device may specify
++	 * the pci device's true irq (and possibly its other resources)
++	 * if they differ from what's in the configuration space.
++	 * This makes the assumption that the device's resources won't
++	 * change after this point (otherwise this code may break!)
++	 */
++	dev_dbg(&dev->dev, "enabling device\n");
++	err = pci_enable_device(dev);
 +	if (err)
-+		goto out;
-+	pdev->be_watching = 1;
++		goto config_release;
 +
-+	/* We need to force a call to our callback here in case
-+	 * xend already configured us!
++	/* Now disable the device (this also ensures some private device
++	 * data is setup before we export)
 +	 */
-+	pciback_be_watch(&pdev->be_watch, NULL, 0);
++	dev_dbg(&dev->dev, "reset device\n");
++	pciback_reset_device(dev);
++
++	return 0;
++
++config_release:
++	pciback_config_free_dev(dev);
 +
-+      out:
++out:
++	pci_set_drvdata(dev, NULL);
++	kfree(dev_data);
 +	return err;
 +}
 +
-+static int pciback_xenbus_remove(struct xenbus_device *dev)
++/*
++ * Because some initialization still happens on
++ * devices during fs_initcall, we need to defer
++ * full initialization of our devices until
++ * device_initcall.
++ */
++static int __init pcistub_init_devices_late(void)
 +{
-+	struct pciback_device *pdev = dev->dev.driver_data;
++	struct pcistub_device *psdev;
++	unsigned long flags;
++	int err = 0;
 +
-+	if (pdev != NULL)
-+		free_pdev(pdev);
++	pr_debug("pciback: pcistub_init_devices_late\n");
 +
-+	return 0;
-+}
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
 +
-+static const struct xenbus_device_id xenpci_ids[] = {
-+	{"pci"},
-+	{{0}},
-+};
++	while (!list_empty(&seized_devices)) {
++		psdev = container_of(seized_devices.next,
++				     struct pcistub_device, dev_list);
++		list_del(&psdev->dev_list);
 +
-+static struct xenbus_driver xenbus_pciback_driver = {
-+	.name 			= "pciback",
-+	.owner 			= THIS_MODULE,
-+	.ids 			= xenpci_ids,
-+	.probe 			= pciback_xenbus_probe,
-+	.remove 		= pciback_xenbus_remove,
-+	.otherend_changed 	= pciback_frontend_changed,
-+};
++		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
 +
-+int __init pciback_xenbus_register(void)
-+{
-+	if (!is_running_on_xen())
-+		return -ENODEV;
-+	pciback_wq = create_workqueue("pciback_workqueue");
-+	if (!pciback_wq) {
-+		printk(KERN_ERR "pciback_xenbus_register: create"
-+			"pciback_workqueue failed\n");
-+		return -EFAULT;
++		err = pcistub_init_device(psdev->dev);
++		if (err) {
++			dev_err(&psdev->dev->dev,
++				"error %d initializing device\n", err);
++			kfree(psdev);
++			psdev = NULL;
++		}
++
++		spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++		if (psdev)
++			list_add_tail(&psdev->dev_list, &pcistub_devices);
 +	}
-+	return xenbus_register_backend(&xenbus_pciback_driver);
++
++	initialize_devices = 1;
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++	return 0;
 +}
 +
-+void __exit pciback_xenbus_unregister(void)
++static int __devinit pcistub_seize(struct pci_dev *dev)
 +{
-+	destroy_workqueue(pciback_wq);
-+	xenbus_unregister_driver(&xenbus_pciback_driver);
-+}
--- 
-1.7.4
-
-
-From a4c91b92b09f5b0b7fcb2c6ccd2bc3347eaa2db9 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 13 Oct 2009 17:22:22 -0400
-Subject: [PATCH 171/203] xen-pciback: Fix include header name change (evtchn.h is now events.h)
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/pci_stub.c    |    2 +-
- drivers/xen/pciback/pciback_ops.c |    2 +-
- drivers/xen/pciback/xenbus.c      |    2 +-
- 3 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index c481a73..c02f21f 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -13,7 +13,7 @@
- #include <linux/pci.h>
- #include <linux/wait.h>
- #include <asm/atomic.h>
--#include <xen/evtchn.h>
-+#include <xen/events.h>
- #include "pciback.h"
- #include "conf_space.h"
- #include "conf_space_quirks.h"
-diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
-index b85b2db..58d09eb 100644
---- a/drivers/xen/pciback/pciback_ops.c
-+++ b/drivers/xen/pciback/pciback_ops.c
-@@ -6,7 +6,7 @@
- #include <linux/module.h>
- #include <linux/wait.h>
- #include <asm/bitops.h>
--#include <xen/evtchn.h>
-+#include <xen/events.h>
- #include "pciback.h"
- 
- int verbose_request = 0;
-diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
-index 4d56c45..bbca3fe 100644
---- a/drivers/xen/pciback/xenbus.c
-+++ b/drivers/xen/pciback/xenbus.c
-@@ -8,7 +8,7 @@
- #include <linux/list.h>
- #include <linux/vmalloc.h>
- #include <xen/xenbus.h>
--#include <xen/evtchn.h>
-+#include <xen/events.h>
- #include "pciback.h"
- 
- #define INVALID_EVTCHN_IRQ  (-1)
--- 
-1.7.4
-
-
-From f9645fb1e1dc8fc0596469aa47b661a4f5070a1c Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 13 Oct 2009 17:22:24 -0400
-Subject: [PATCH 172/203] xen-pciback: Use pci_is_enabled() instead of is_enabled.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/conf_space_header.c |    4 ++--
- drivers/xen/pciback/pciback_ops.c       |    1 -
- 2 files changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
-index f794e12..5a9e028 100644
---- a/drivers/xen/pciback/conf_space_header.c
-+++ b/drivers/xen/pciback/conf_space_header.c
-@@ -22,14 +22,14 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
- {
- 	int err;
- 
--	if (!dev->is_enabled && is_enable_cmd(value)) {
-+	if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
- 		if (unlikely(verbose_request))
- 			printk(KERN_DEBUG "pciback: %s: enable\n",
- 			       pci_name(dev));
- 		err = pci_enable_device(dev);
- 		if (err)
- 			return err;
--	} else if (dev->is_enabled && !is_enable_cmd(value)) {
-+	} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
- 		if (unlikely(verbose_request))
- 			printk(KERN_DEBUG "pciback: %s: disable\n",
- 			       pci_name(dev));
-diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
-index 58d09eb..2d570e7 100644
---- a/drivers/xen/pciback/pciback_ops.c
-+++ b/drivers/xen/pciback/pciback_ops.c
-@@ -26,7 +26,6 @@ void pciback_reset_device(struct pci_dev *dev)
- 
- 		pci_write_config_word(dev, PCI_COMMAND, 0);
- 
--		dev->is_enabled = 0;
- 		dev->is_busmaster = 0;
- 	} else {
- 		pci_read_config_word(dev, PCI_COMMAND, &cmd);
--- 
-1.7.4
-
-
-From a16f8410e3eb0e9db51aa1c90c95715893395a8a Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 13 Oct 2009 17:22:25 -0400
-Subject: [PATCH 173/203] xen-pciback: Fix usage of INIT_WORK.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/pciback.h     |    4 ++--
- drivers/xen/pciback/pciback_ops.c |    7 ++++---
- drivers/xen/pciback/xenbus.c      |    3 ++-
- 3 files changed, 8 insertions(+), 6 deletions(-)
-
-diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
-index 6744f45..4fb8c05 100644
---- a/drivers/xen/pciback/pciback.h
-+++ b/drivers/xen/pciback/pciback.h
-@@ -99,8 +99,8 @@ int pciback_publish_pci_roots(struct pciback_device *pdev,
- void pciback_release_devices(struct pciback_device *pdev);
- 
- /* Handles events from front-end */
--irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
--void pciback_do_op(void *data);
-+irqreturn_t pciback_handle_event(int irq, void *dev_id);
-+void pciback_do_op(struct work_struct *data);
- 
- int pciback_xenbus_register(void);
- void pciback_xenbus_unregister(void);
-diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
-index 2d570e7..6624faf 100644
---- a/drivers/xen/pciback/pciback_ops.c
-+++ b/drivers/xen/pciback/pciback_ops.c
-@@ -66,9 +66,10 @@ void test_and_schedule_op(struct pciback_device *pdev)
-  * context because some of the pci_* functions can sleep (mostly due to ACPI
-  * use of semaphores). This function is intended to be called from a work
-  * queue in process context taking a struct pciback_device as a parameter */
--void pciback_do_op(void *data)
++	struct pcistub_device *psdev;
++	unsigned long flags;
++	int err = 0;
 +
-+void pciback_do_op(struct work_struct *data)
- {
--	struct pciback_device *pdev = data;
-+	struct pciback_device *pdev = container_of(data, struct pciback_device, op_work);
- 	struct pci_dev *dev;
- 	struct xen_pci_op *op = &pdev->sh_info->op;
- 
-@@ -123,7 +124,7 @@ void pciback_do_op(void *data)
- 	test_and_schedule_op(pdev);
- }
- 
--irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
-+irqreturn_t pciback_handle_event(int irq, void *dev_id)
- {
- 	struct pciback_device *pdev = dev_id;
- 
-diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
-index bbca3fe..bd52289 100644
---- a/drivers/xen/pciback/xenbus.c
-+++ b/drivers/xen/pciback/xenbus.c
-@@ -9,6 +9,7 @@
- #include <linux/vmalloc.h>
- #include <xen/xenbus.h>
- #include <xen/events.h>
-+#include <linux/workqueue.h>
- #include "pciback.h"
- 
- #define INVALID_EVTCHN_IRQ  (-1)
-@@ -33,7 +34,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
- 	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
- 	pdev->be_watching = 0;
- 
--	INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
-+	INIT_WORK(&pdev->op_work, pciback_do_op);
- 
- 	if (pciback_init_devices(pdev)) {
- 		kfree(pdev);
--- 
-1.7.4
-
-
-From e0e72f5f5b3eff68c649c193d937fad54c7e956e Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 13 Oct 2009 17:22:26 -0400
-Subject: [PATCH 174/203] xen-pciback: Update the calling mechanism for xenbus_[map|unmap]_ring_valloc functions.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/pciback.h |    1 -
- drivers/xen/pciback/xenbus.c  |   18 +++++++++---------
- 2 files changed, 9 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
-index 4fb8c05..5e8e14e 100644
---- a/drivers/xen/pciback/pciback.h
-+++ b/drivers/xen/pciback/pciback.h
-@@ -36,7 +36,6 @@ struct pciback_device {
- 
- 	int evtchn_irq;
- 
--	struct vm_struct *sh_area;
- 	struct xen_pci_sharedinfo *sh_info;
- 
- 	unsigned long flags;
-diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
-index bd52289..5be1350 100644
---- a/drivers/xen/pciback/xenbus.c
-+++ b/drivers/xen/pciback/xenbus.c
-@@ -7,6 +7,7 @@
- #include <linux/init.h>
- #include <linux/list.h>
- #include <linux/vmalloc.h>
-+#include <linux/workqueue.h>
- #include <xen/xenbus.h>
- #include <xen/events.h>
- #include <linux/workqueue.h>
-@@ -29,7 +30,6 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
- 
- 	spin_lock_init(&pdev->dev_lock);
- 
--	pdev->sh_area = NULL;
- 	pdev->sh_info = NULL;
- 	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
- 	pdev->be_watching = 0;
-@@ -59,7 +59,7 @@ static void pciback_disconnect(struct pciback_device *pdev)
- 	flush_workqueue(pciback_wq);
- 
- 	if (pdev->sh_info != NULL) {
--		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
-+		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
- 		pdev->sh_info = NULL;
- 	}
- 
-@@ -85,23 +85,23 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
- 			     int remote_evtchn)
- {
- 	int err = 0;
--	struct vm_struct *area;
-+	void *vaddr;
- 
- 	dev_dbg(&pdev->xdev->dev,
- 		"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
- 		gnt_ref, remote_evtchn);
- 
--	area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
--	if (IS_ERR(area)) {
--		err = PTR_ERR(area);
-+	err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
-+	if (err < 0) {
-+		xenbus_dev_fatal(pdev->xdev, err,
-+				"Error mapping other domain page in ours.");
- 		goto out;
- 	}
--	pdev->sh_area = area;
--	pdev->sh_info = area->addr;
-+	pdev->sh_info = vaddr;
- 
- 	err = bind_interdomain_evtchn_to_irqhandler(
- 		pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
--		SA_SAMPLE_RANDOM, "pciback", pdev);
-+		0, "pciback", pdev);
- 	if (err < 0) {
- 		xenbus_dev_fatal(pdev->xdev, err,
- 				 "Error binding event channel to IRQ");
--- 
-1.7.4
-
-
-From 2e44d1116767a69ce24b4909a074e38c97da7aeb Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 13 Oct 2009 17:22:30 -0400
-Subject: [PATCH 175/203] xen-pciback: Add check to load only under priviliged domain.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/pci_stub.c |    5 +++++
- drivers/xen/pciback/xenbus.c   |    2 --
- 2 files changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index c02f21f..d97dac5 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -14,6 +14,8 @@
- #include <linux/wait.h>
- #include <asm/atomic.h>
- #include <xen/events.h>
-+#include <asm/xen/pci.h>
-+#include <asm/xen/hypervisor.h>
- #include "pciback.h"
- #include "conf_space.h"
- #include "conf_space_quirks.h"
-@@ -1286,6 +1288,9 @@ static int __init pciback_init(void)
- {
- 	int err;
- 
-+	if (!xen_initial_domain())
-+		return -ENODEV;
++	psdev = pcistub_device_alloc(dev);
++	if (!psdev)
++		return -ENOMEM;
 +
- 	err = pciback_config_init();
- 	if (err)
- 		return err;
-diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
-index 5be1350..a85c413 100644
---- a/drivers/xen/pciback/xenbus.c
-+++ b/drivers/xen/pciback/xenbus.c
-@@ -693,8 +693,6 @@ static struct xenbus_driver xenbus_pciback_driver = {
- 
- int __init pciback_xenbus_register(void)
- {
--	if (!is_running_on_xen())
--		return -ENODEV;
- 	pciback_wq = create_workqueue("pciback_workqueue");
- 	if (!pciback_wq) {
- 		printk(KERN_ERR "pciback_xenbus_register: create"
--- 
-1.7.4
-
-
-From e52f6bc512613c2a447b4e26ff66cf3810e4da33 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 13 Oct 2009 17:22:31 -0400
-Subject: [PATCH 176/203] xen-pciback: Remove usage of pci_restore_bars() as Linux handles the power-up states correctly now.
-
-Originally this code was pulled from the upstream kernel, and stuck
-in the linux-2.6-sparse tree. At that point of time, the Linux tree (2.6.16?)
-did not know how to handle this. Nowadays the pci_set_power_state routine
-handles this case so we do not need this anymore.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/conf_space_capability_pm.c |   13 -------------
- 1 files changed, 0 insertions(+), 13 deletions(-)
-
-diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
-index e2f99c7..e1d3af4 100644
---- a/drivers/xen/pciback/conf_space_capability_pm.c
-+++ b/drivers/xen/pciback/conf_space_capability_pm.c
-@@ -58,19 +58,6 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
- 		goto out;
- 	}
- 
--	/*
--	 * Device may lose PCI config info on D3->D0 transition. This
--	 * is a problem for some guests which will not reset BARs. Even
--	 * those that have a go will be foiled by our BAR-write handler
--	 * which will discard the write! Since Linux won't re-init
--	 * the config space automatically in all cases, we do it here.
--	 * Future: Should we re-initialise all first 64 bytes of config space?
--	 */
--	if (new_state == PCI_D0 &&
--	    (old_state == PCI_D3hot || old_state == PCI_D3cold) &&
--	    !(old_value & PCI_PM_CTRL_NO_SOFT_RESET))
--		pci_restore_bars(dev);
--
-  out:
- 	return err;
- }
--- 
-1.7.4
-
-
-From 9c663bc577e3052cb0fdd2dfc7a7550be3897ce5 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 5 Nov 2009 15:25:43 -0500
-Subject: [PATCH 177/203] xen-pciback: Return the physical IRQ number instead of the allocated IRQ number to pcifront.
-
-The allocation of IRQ numbers in Linux privileged domains is based
-on finding the first unbound IRQ number. After the allocation is done
-a HYPERCALL to Xen is done, which allocates a PIRQ globally.
-That PIRQ->IRQ binding is saved in data structures that are used
-during ISR executions.
-
-Before this patch, for non-privileged domains we would return the local
-IRQ number instead of the PIRQ. The non-privileged domains require the
-PIRQ so that they can attach the their own interrupt handler to it.
-Fortunatly there is a function, 'xen_gsi_from_irq' that returns
-that global IRQ number.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/conf_space_capability_msi.c |   12 ++++++++----
- 1 files changed, 8 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
-index 762e396..7fb5371 100644
---- a/drivers/xen/pciback/conf_space_capability_msi.c
-+++ b/drivers/xen/pciback/conf_space_capability_msi.c
-@@ -6,6 +6,7 @@
- #include "conf_space.h"
- #include "conf_space_capability.h"
- #include <xen/interface/io/pciif.h>
-+#include <xen/events.h>
- #include "pciback.h"
- 
- int pciback_enable_msi(struct pciback_device *pdev,
-@@ -22,7 +23,9 @@ int pciback_enable_msi(struct pciback_device *pdev,
- 		return XEN_PCI_ERR_op_failed;
- 	}
- 
--	op->value = dev->irq;
-+	/* The value the guest needs is actually the IDT vector, not the
-+	 * the local domain's IRQ number. */
-+	op->value = xen_gsi_from_irq(dev->irq);
- 	return 0;
- }
- 
-@@ -31,7 +34,7 @@ int pciback_disable_msi(struct pciback_device *pdev,
- {
- 	pci_disable_msi(dev);
- 
--	op->value = dev->irq;
-+	op->value = xen_gsi_from_irq(dev->irq);
- 	return 0;
- }
- 
-@@ -57,7 +60,8 @@ int pciback_enable_msix(struct pciback_device *pdev,
- 
- 	for (i = 0; i < op->value; i++) {
- 		op->msix_entries[i].entry = entries[i].entry;
--		op->msix_entries[i].vector = entries[i].vector;
-+		op->msix_entries[i].vector =
-+					xen_gsi_from_irq(entries[i].vector);
- 	}
- 
- 	kfree(entries);
-@@ -73,7 +77,7 @@ int pciback_disable_msix(struct pciback_device *pdev,
- 
- 	pci_disable_msix(dev);
- 
--	op->value = dev->irq;
-+	op->value = xen_gsi_from_irq(dev->irq);
- 	return 0;
- }
- 
--- 
-1.7.4
-
-
-From f88da5c0e001b475b139501b10b9c6ef4b0d8f59 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 5 Nov 2009 15:25:44 -0500
-Subject: [PATCH 178/203] xen-pciback: Fix checkpatch warnings and errors for pciback/ directory.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/conf_space.c                |   34 +++---
- drivers/xen/pciback/conf_space.h                |   30 ++--
- drivers/xen/pciback/conf_space_capability.c     |    5 +-
- drivers/xen/pciback/conf_space_capability.h     |    3 +
- drivers/xen/pciback/conf_space_capability_msi.c |    3 +-
- drivers/xen/pciback/conf_space_capability_pm.c  |    4 +-
- drivers/xen/pciback/conf_space_capability_vpd.c |    2 +-
- drivers/xen/pciback/conf_space_header.c         |    7 +-
- drivers/xen/pciback/conf_space_quirks.c         |   16 ++-
- drivers/xen/pciback/controller.c                |   15 +-
- drivers/xen/pciback/passthrough.c               |    6 +-
- drivers/xen/pciback/pci_stub.c                  |  165 +++++++++++------------
- drivers/xen/pciback/pciback.h                   |   28 +++--
- drivers/xen/pciback/pciback_ops.c               |   74 +++++------
- drivers/xen/pciback/slot.c                      |   22 ++--
- drivers/xen/pciback/vpci.c                      |   28 ++--
- drivers/xen/pciback/xenbus.c                    |   42 +++---
- 17 files changed, 245 insertions(+), 239 deletions(-)
-
-diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
-index 0c76db1..370c18e 100644
---- a/drivers/xen/pciback/conf_space.c
-+++ b/drivers/xen/pciback/conf_space.c
-@@ -18,11 +18,11 @@
- static int permissive;
- module_param(permissive, bool, 0644);
- 
--#define DEFINE_PCI_CONFIG(op,size,type) 			\
-+#define DEFINE_PCI_CONFIG(op, size, type) 			\
- int pciback_##op##_config_##size 				\
- (struct pci_dev *dev, int offset, type value, void *data)	\
- {								\
--	return pci_##op##_config_##size (dev, offset, value);	\
-+	return pci_##op##_config_##size(dev, offset, value);	\
- }
- 
- DEFINE_PCI_CONFIG(read, byte, u8 *)
-@@ -139,14 +139,15 @@ static int pcibios_err_to_errno(int err)
- }
- 
- int pciback_config_read(struct pci_dev *dev, int offset, int size,
--			u32 * ret_val)
-+			u32 *ret_val)
- {
- 	int err = 0;
- 	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
- 	const struct config_field_entry *cfg_entry;
- 	const struct config_field *field;
- 	int req_start, req_end, field_start, field_end;
--	/* if read fails for any reason, return 0 (as if device didn't respond) */
-+	/* if read fails for any reason, return 0
-+	 * (as if device didn't respond) */
- 	u32 value = 0, tmp_val;
- 
- 	if (unlikely(verbose_request))
-@@ -161,10 +162,10 @@ int pciback_config_read(struct pci_dev *dev, int offset, int size,
- 	/* Get the real value first, then modify as appropriate */
- 	switch (size) {
- 	case 1:
--		err = pci_read_config_byte(dev, offset, (u8 *) & value);
-+		err = pci_read_config_byte(dev, offset, (u8 *) &value);
- 		break;
- 	case 2:
--		err = pci_read_config_word(dev, offset, (u16 *) & value);
-+		err = pci_read_config_word(dev, offset, (u16 *) &value);
- 		break;
- 	case 4:
- 		err = pci_read_config_dword(dev, offset, &value);
-@@ -192,7 +193,7 @@ int pciback_config_read(struct pci_dev *dev, int offset, int size,
- 		}
- 	}
- 
--      out:
-+out:
- 	if (unlikely(verbose_request))
- 		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
- 		       pci_name(dev), size, offset, value);
-@@ -276,8 +277,8 @@ int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
- 		} else if (!dev_data->warned_on_write) {
- 			dev_data->warned_on_write = 1;
- 			dev_warn(&dev->dev, "Driver tried to write to a "
--				 "read-only configuration space field at offset "
--				 "0x%x, size %d. This may be harmless, but if "
-+				 "read-only configuration space field at offset"
-+				 " 0x%x, size %d. This may be harmless, but if "
- 				 "you have problems with your device:\n"
- 				 "1) see permissive attribute in sysfs\n"
- 				 "2) report problems to the xen-devel "
-@@ -295,8 +296,8 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev)
- 	struct config_field_entry *cfg_entry, *t;
- 	const struct config_field *field;
- 
--	dev_dbg(&dev->dev,
--		"free-ing dynamically allocated virtual configuration space fields\n");
-+	dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
-+			   "configuration space fields\n");
- 	if (!dev_data)
- 		return;
- 
-@@ -306,8 +307,7 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev)
- 		if (field->clean) {
- 			field->clean((struct config_field *)field);
- 
--			if (cfg_entry->data)
--				kfree(cfg_entry->data);
-+			kfree(cfg_entry->data);
- 
- 			list_del(&cfg_entry->list);
- 			kfree(cfg_entry);
-@@ -376,7 +376,7 @@ int pciback_config_add_field_offset(struct pci_dev *dev,
- 	cfg_entry->base_offset = base_offset;
- 
- 	/* silently ignore duplicate fields */
--	err = pciback_field_is_dup(dev,OFFSET(cfg_entry));
-+	err = pciback_field_is_dup(dev, OFFSET(cfg_entry));
- 	if (err)
- 		goto out;
- 
-@@ -395,14 +395,14 @@ int pciback_config_add_field_offset(struct pci_dev *dev,
- 		OFFSET(cfg_entry));
- 	list_add_tail(&cfg_entry->list, &dev_data->config_fields);
- 
--      out:
-+out:
- 	if (err)
- 		kfree(cfg_entry);
- 
- 	return err;
- }
- 
--/* This sets up the device's virtual configuration space to keep track of 
-+/* This sets up the device's virtual configuration space to keep track of
-  * certain registers (like the base address registers (BARs) so that we can
-  * keep the client from manipulating them directly.
-  */
-@@ -425,7 +425,7 @@ int pciback_config_init_dev(struct pci_dev *dev)
- 
- 	err = pciback_config_quirks_init(dev);
- 
--      out:
-+out:
- 	return err;
- }
- 
-diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h
-index fe746ef..50ebef2 100644
---- a/drivers/xen/pciback/conf_space.h
-+++ b/drivers/xen/pciback/conf_space.h
-@@ -11,21 +11,21 @@
- #include <linux/err.h>
- 
- /* conf_field_init can return an errno in a ptr with ERR_PTR() */
--typedef void *(*conf_field_init) (struct pci_dev * dev, int offset);
--typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data);
--typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data);
-+typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
-+typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
-+typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
- 
--typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value,
-+typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
- 				 void *data);
--typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value,
-+typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
- 				void *data);
--typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value,
-+typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
- 				void *data);
--typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value,
-+typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
- 				void *data);
--typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value,
-+typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
- 			       void *data);
--typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value,
-+typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
- 			       void *data);
- 
- /* These are the fields within the configuration space which we
-@@ -39,7 +39,7 @@ struct config_field {
- 	conf_field_init init;
- 	conf_field_reset reset;
- 	conf_field_free release;
--	void (*clean) (struct config_field * field);
-+	void (*clean) (struct config_field *field);
- 	union {
- 		struct {
- 			conf_dword_write write;
-@@ -92,8 +92,8 @@ static inline int pciback_config_add_fields(struct pci_dev *dev,
- }
- 
- static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
--						   const struct config_field *field,
--						   unsigned int offset)
-+					const struct config_field *field,
-+					unsigned int offset)
- {
- 	int i, err = 0;
- 	for (i = 0; field[i].size != 0; i++) {
-@@ -105,11 +105,11 @@ static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
- }
- 
- /* Read/Write the real configuration space */
--int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value,
-+int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
- 			     void *data);
--int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value,
-+int pciback_read_config_word(struct pci_dev *dev, int offset, u16 *value,
- 			     void *data);
--int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value,
-+int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
- 			      void *data);
- int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
- 			      void *data);
-diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c
-index 50efca4..0ea84d6 100644
---- a/drivers/xen/pciback/conf_space_capability.c
-+++ b/drivers/xen/pciback/conf_space_capability.c
-@@ -53,13 +53,10 @@ int pciback_config_capability_add_fields(struct pci_dev *dev)
- 		}
- 	}
- 
--      out:
-+out:
- 	return err;
- }
- 
--extern struct pciback_config_capability pciback_config_capability_vpd;
--extern struct pciback_config_capability pciback_config_capability_pm;
--
- int pciback_config_capability_init(void)
- {
- 	register_capability(&pciback_config_capability_vpd);
-diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h
-index 823392e..8da3ac4 100644
---- a/drivers/xen/pciback/conf_space_capability.h
-+++ b/drivers/xen/pciback/conf_space_capability.h
-@@ -20,4 +20,7 @@ struct pciback_config_capability {
- 	const struct config_field *fields;
- };
- 
-+extern struct pciback_config_capability pciback_config_capability_vpd;
-+extern struct pciback_config_capability pciback_config_capability_pm;
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
 +
- #endif
-diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
-index 7fb5371..b70ea8b 100644
---- a/drivers/xen/pciback/conf_space_capability_msi.c
-+++ b/drivers/xen/pciback/conf_space_capability_msi.c
-@@ -18,7 +18,8 @@ int pciback_enable_msi(struct pciback_device *pdev,
- 	status = pci_enable_msi(dev);
- 
- 	if (status) {
--		printk("error enable msi for guest %x status %x\n", otherend, status);
-+		printk(KERN_ERR "error enable msi for guest %x status %x\n",
-+			otherend, status);
- 		op->value = 0;
- 		return XEN_PCI_ERR_op_failed;
- 	}
-diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
-index e1d3af4..0442616 100644
---- a/drivers/xen/pciback/conf_space_capability_pm.c
-+++ b/drivers/xen/pciback/conf_space_capability_pm.c
-@@ -20,7 +20,7 @@ static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
- 
- 	*value = real_value & ~PCI_PM_CAP_PME_MASK;
- 
--      out:
-+out:
- 	return err;
- }
- 
-@@ -77,7 +77,7 @@ static void *pm_ctrl_init(struct pci_dev *dev, int offset)
- 		err = pci_write_config_word(dev, offset, value);
- 	}
- 
--      out:
-+out:
- 	return ERR_PTR(err);
- }
- 
-diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c
-index 920cb4a..e7b4d66 100644
---- a/drivers/xen/pciback/conf_space_capability_vpd.c
-+++ b/drivers/xen/pciback/conf_space_capability_vpd.c
-@@ -33,7 +33,7 @@ static const struct config_field caplist_vpd[] = {
- 	 },
- 	{}
- };
-- 
++	if (initialize_devices) {
++		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++		/* don't want irqs disabled when calling pcistub_init_device */
++		err = pcistub_init_device(psdev->dev);
++
++		spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++		if (!err)
++			list_add(&psdev->dev_list, &pcistub_devices);
++	} else {
++		dev_dbg(&dev->dev, "deferring initialization\n");
++		list_add(&psdev->dev_list, &seized_devices);
++	}
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++	if (err)
++		pcistub_device_put(psdev);
++
++	return err;
++}
++
++static int __devinit pcistub_probe(struct pci_dev *dev,
++				   const struct pci_device_id *id)
++{
++	int err = 0;
++
++	dev_dbg(&dev->dev, "probing...\n");
++
++	if (pcistub_match(dev)) {
++
++		if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
++		    && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
++			dev_err(&dev->dev, "can't export pci devices that "
++				"don't have a normal (0) or bridge (1) "
++				"header type!\n");
++			err = -ENODEV;
++			goto out;
++		}
++
++		dev_info(&dev->dev, "seizing device\n");
++		err = pcistub_seize(dev);
++	} else
++		/* Didn't find the device */
++		err = -ENODEV;
 +
- struct pciback_config_capability pciback_config_capability_vpd = {
- 	.capability = PCI_CAP_ID_VPD,
- 	.fields = caplist_vpd,
-diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
-index 5a9e028..3ae7da1 100644
---- a/drivers/xen/pciback/conf_space_header.c
-+++ b/drivers/xen/pciback/conf_space_header.c
-@@ -51,7 +51,8 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
- 		err = pci_set_mwi(dev);
- 		if (err) {
- 			printk(KERN_WARNING
--			       "pciback: %s: cannot enable memory-write-invalidate (%d)\n",
-+			       "pciback: %s: cannot enable "
-+			       "memory-write-invalidate (%d)\n",
- 			       pci_name(dev), err);
- 			value &= ~PCI_COMMAND_INVALIDATE;
- 		}
-@@ -206,7 +207,7 @@ static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
- 	    || value == PCI_BIST_START)
- 		err = pci_write_config_byte(dev, offset, value);
- 
--      out:
-+out:
- 	return err;
- }
- 
-@@ -312,6 +313,6 @@ int pciback_config_header_add_fields(struct pci_dev *dev)
- 		break;
- 	}
- 
--      out:
-+out:
- 	return err;
- }
-diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c
-index 244a438..45c31fb 100644
---- a/drivers/xen/pciback/conf_space_quirks.c
-+++ b/drivers/xen/pciback/conf_space_quirks.c
-@@ -18,8 +18,10 @@ match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
- {
- 	if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
- 	    (id->device == PCI_ANY_ID || id->device == dev->device) &&
--	    (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
--	    (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
-+	    (id->subvendor == PCI_ANY_ID ||
-+				id->subvendor == dev->subsystem_vendor) &&
-+	    (id->subdevice == PCI_ANY_ID ||
-+				id->subdevice == dev->subsystem_device) &&
- 	    !((id->class ^ dev->class) & id->class_mask))
- 		return id;
- 	return NULL;
-@@ -35,7 +37,7 @@ struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
- 	tmp_quirk = NULL;
- 	printk(KERN_DEBUG
- 	       "quirk didn't match any device pciback knows about\n");
--      out:
-+out:
- 	return tmp_quirk;
- }
- 
-@@ -51,7 +53,7 @@ int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
- 	struct config_field_entry *cfg_entry;
- 
- 	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
--		if ( OFFSET(cfg_entry) == reg) {
-+		if (OFFSET(cfg_entry) == reg) {
- 			ret = 1;
- 			break;
- 		}
-@@ -84,7 +86,7 @@ int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
- 
- 	pciback_config_add_field(dev, field);
- 
--      out:
-+out:
- 	return err;
- }
- 
-@@ -110,7 +112,7 @@ int pciback_config_quirks_init(struct pci_dev *dev)
- 	quirk->pdev = dev;
- 
- 	register_quirk(quirk);
--      out:
-+out:
- 	return ret;
- }
- 
-@@ -133,6 +135,6 @@ int pciback_config_quirk_release(struct pci_dev *dev)
- 	list_del(&quirk->quirks_list);
- 	kfree(quirk);
- 
--      out:
-+out:
- 	return ret;
- }
-diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
-index 294e48f..7f04f11 100644
---- a/drivers/xen/pciback/controller.c
-+++ b/drivers/xen/pciback/controller.c
-@@ -259,7 +259,7 @@ static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
- 	    !(addr.resource_type == ACPI_IO_RANGE &&
- 	      addr.info.io.translation))
- 		return AE_OK;
--	   
-+
- 	/* Store the resource in xenbus for the guest */
- 	len = snprintf(str, sizeof(str), "root-%d-resource-%d",
- 		       info->root_num, info->resource_count);
-@@ -314,7 +314,7 @@ int pciback_publish_pci_roots(struct pciback_device *pdev,
- 			goto out;
- 
- 		/*
-- 		 * Now figure out which root-%d this belongs to
-+		 * Now figure out which root-%d this belongs to
- 		 * so we can associate resources with it.
- 		 */
- 		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-@@ -407,8 +407,8 @@ void pciback_release_devices(struct pciback_device *pdev)
- 	pdev->pci_dev_data = NULL;
- }
- 
--int pciback_get_pcifront_dev(struct pci_dev *pcidev, 
--		struct pciback_device *pdev, 
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
-+		struct pciback_device *pdev,
- 		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
- {
- 	struct controller_dev_data *dev_data = pdev->pci_dev_data;
-@@ -420,13 +420,12 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev,
- 
- 	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
- 		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
--			if ( (dev_entry->dev->bus->number == 
-+			if ((dev_entry->dev->bus->number ==
- 					pcidev->bus->number) &&
--			  	(dev_entry->dev->devfn ==
-+				(dev_entry->dev->devfn ==
- 					pcidev->devfn) &&
- 				(pci_domain_nr(dev_entry->dev->bus) ==
--					pci_domain_nr(pcidev->bus)))
--			{
-+					pci_domain_nr(pcidev->bus))) {
- 				found = 1;
- 				*domain = cntrl_entry->domain;
- 				*bus = cntrl_entry->bus;
-diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c
-index 9e7a0c4..5386bebf 100644
---- a/drivers/xen/pciback/passthrough.c
-+++ b/drivers/xen/pciback/passthrough.c
-@@ -165,8 +165,10 @@ void pciback_release_devices(struct pciback_device *pdev)
- 	pdev->pci_dev_data = NULL;
- }
- 
--int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
--		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
-+			     struct pciback_device *pdev,
-+			     unsigned int *domain, unsigned int *bus,
-+			     unsigned int *devfn)
- 
- {
- 	*domain = pci_domain_nr(pcidev->bus);
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index d97dac5..28222ee 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -20,7 +20,7 @@
- #include "conf_space.h"
- #include "conf_space_quirks.h"
- 
--static char *pci_devs_to_hide = NULL;
-+static char *pci_devs_to_hide;
- wait_queue_head_t aer_wait_queue;
- /*Add sem for sync AER handling and pciback remove/reconfigue ops,
- * We want to avoid in middle of AER ops, pciback devices is being removed
-@@ -43,7 +43,7 @@ struct pcistub_device {
- 	spinlock_t lock;
- 
- 	struct pci_dev *dev;
--	struct pciback_device *pdev;	/* non-NULL if struct pci_dev is in use */
-+	struct pciback_device *pdev;/* non-NULL if struct pci_dev is in use */
- };
- 
- /* Access to pcistub_devices & seized_devices lists and the initialize_devices
-@@ -55,7 +55,7 @@ static LIST_HEAD(pcistub_devices);
- /* wait for device_initcall before initializing our devices
-  * (see pcistub_init_devices_late)
-  */
--static int initialize_devices = 0;
-+static int initialize_devices;
- static LIST_HEAD(seized_devices);
- 
- static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
-@@ -132,7 +132,7 @@ static struct pcistub_device *pcistub_device_find(int domain, int bus,
- 	/* didn't find it */
- 	psdev = NULL;
- 
--      out:
-+out:
- 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
- 	return psdev;
- }
-@@ -321,10 +321,10 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)
- 
- 	return 0;
- 
--      config_release:
-+config_release:
- 	pciback_config_free_dev(dev);
- 
--      out:
-+out:
- 	pci_set_drvdata(dev, NULL);
- 	kfree(dev_data);
- 	return err;
-@@ -443,7 +443,7 @@ static int __devinit pcistub_probe(struct pci_dev *dev,
- 		/* Didn't find the device */
- 		err = -ENODEV;
- 
--      out:
 +out:
- 	return err;
- }
- 
-@@ -511,26 +511,24 @@ static void kill_domain_by_device(struct pcistub_device *psdev)
- 	int err;
- 	char nodename[1024];
- 
--	if (!psdev) 
++	return err;
++}
++
++static void pcistub_remove(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev, *found_psdev = NULL;
++	unsigned long flags;
++
++	dev_dbg(&dev->dev, "removing\n");
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++
++	pciback_config_quirk_release(dev);
++
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (psdev->dev == dev) {
++			found_psdev = psdev;
++			break;
++		}
++	}
++
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++	if (found_psdev) {
++		dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
++			found_psdev->pdev);
++
++		if (found_psdev->pdev) {
++			printk(KERN_WARNING "pciback: ****** removing device "
++			       "%s while still in-use! ******\n",
++			       pci_name(found_psdev->dev));
++			printk(KERN_WARNING "pciback: ****** driver domain may "
++			       "still access this device's i/o resources!\n");
++			printk(KERN_WARNING "pciback: ****** shutdown driver "
++			       "domain before binding device\n");
++			printk(KERN_WARNING "pciback: ****** to other drivers "
++			       "or domains\n");
++
++			pciback_release_pci_dev(found_psdev->pdev,
++						found_psdev->dev);
++		}
++
++		spin_lock_irqsave(&pcistub_devices_lock, flags);
++		list_del(&found_psdev->dev_list);
++		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++
++		/* the final put for releasing from the list */
++		pcistub_device_put(found_psdev);
++	}
++}
++
++static DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = {
++	{
++	 .vendor = PCI_ANY_ID,
++	 .device = PCI_ANY_ID,
++	 .subvendor = PCI_ANY_ID,
++	 .subdevice = PCI_ANY_ID,
++	 },
++	{0,},
++};
++
++#define PCI_NODENAME_MAX 40
++static void kill_domain_by_device(struct pcistub_device *psdev)
++{
++	struct xenbus_transaction xbt;
++	int err;
++	char nodename[PCI_NODENAME_MAX];
++
 +	if (!psdev)
- 		dev_err(&psdev->dev->dev,
- 			"device is NULL when do AER recovery/kill_domain\n");
--	sprintf(nodename, "/local/domain/0/backend/pci/%d/0", 
-+	sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
- 		psdev->pdev->xdev->otherend_id);
- 	nodename[strlen(nodename)] = '\0';
- 
- again:
- 	err = xenbus_transaction_start(&xbt);
--	if (err)
--	{
++		dev_err(&psdev->dev->dev,
++			"device is NULL when do AER recovery/kill_domain\n");
++	snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
++		psdev->pdev->xdev->otherend_id);
++	nodename[strlen(nodename)] = '\0';
++
++again:
++	err = xenbus_transaction_start(&xbt);
 +	if (err) {
- 		dev_err(&psdev->dev->dev,
- 			"error %d when start xenbus transaction\n", err);
- 		return;
- 	}
- 	/*PV AER handlers will set this flag*/
--	xenbus_printf(xbt, nodename, "aerState" , "aerfail" );
++		dev_err(&psdev->dev->dev,
++			"error %d when start xenbus transaction\n", err);
++		return;
++	}
++	/*PV AER handlers will set this flag*/
 +	xenbus_printf(xbt, nodename, "aerState" , "aerfail");
- 	err = xenbus_transaction_end(xbt, 0);
--	if (err)
--	{
++	err = xenbus_transaction_end(xbt, 0);
 +	if (err) {
- 		if (err == -EAGAIN)
- 			goto again;
- 		dev_err(&psdev->dev->dev,
-@@ -541,9 +539,9 @@ again:
- 
- /* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
-  * backend need to have cooperation. In pciback, those steps will do similar
-- * jobs: send service request and waiting for front_end response. 
++		if (err == -EAGAIN)
++			goto again;
++		dev_err(&psdev->dev->dev,
++			"error %d when end xenbus transaction\n", err);
++		return;
++	}
++}
++
++/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
++ * backend need to have cooperation. In pciback, those steps will do similar
 + * jobs: send service request and waiting for front_end response.
- */
--static pci_ers_result_t common_process(struct pcistub_device *psdev, 
++*/
 +static pci_ers_result_t common_process(struct pcistub_device *psdev,
- 		pci_channel_state_t state, int aer_cmd, pci_ers_result_t result)
- {
- 	pci_ers_result_t res = result;
-@@ -561,12 +559,12 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
- 	if (!ret) {
- 		dev_err(&psdev->dev->dev,
- 			"pciback: failed to get pcifront device\n");
--		return PCI_ERS_RESULT_NONE; 
++		pci_channel_state_t state, int aer_cmd, pci_ers_result_t result)
++{
++	pci_ers_result_t res = result;
++	struct xen_pcie_aer_op *aer_op;
++	int ret;
++
++	/*with PV AER drivers*/
++	aer_op = &(psdev->pdev->sh_info->aer_op);
++	aer_op->cmd = aer_cmd ;
++	/*useful for error_detected callback*/
++	aer_op->err = state;
++	/*pcifront_end BDF*/
++	ret = pciback_get_pcifront_dev(psdev->dev, psdev->pdev,
++		&aer_op->domain, &aer_op->bus, &aer_op->devfn);
++	if (!ret) {
++		dev_err(&psdev->dev->dev,
++			"pciback: failed to get pcifront device\n");
 +		return PCI_ERS_RESULT_NONE;
- 	}
- 	wmb();
- 
--	dev_dbg(&psdev->dev->dev, 
--			"pciback: aer_op %x dom %x bus %x devfn %x\n",  
++	}
++	wmb();
++
 +	dev_dbg(&psdev->dev->dev,
 +			"pciback: aer_op %x dom %x bus %x devfn %x\n",
- 			aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
- 	/*local flag to mark there's aer request, pciback callback will use this
- 	* flag to judge whether we need to check pci-front give aer service
-@@ -575,21 +573,21 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
- 	set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
- 
- 	/*It is possible that a pcifront conf_read_write ops request invokes
--	* the callback which cause the spurious execution of wake_up. 
++			aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
++	/*local flag to mark there's aer request, pciback callback will use this
++	* flag to judge whether we need to check pci-front give aer service
++	* ack signal
++	*/
++	set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
++
++	/*It is possible that a pcifront conf_read_write ops request invokes
 +	* the callback which cause the spurious execution of wake_up.
- 	* Yet it is harmless and better than a spinlock here
- 	*/
--	set_bit(_XEN_PCIB_active, 
++	* Yet it is harmless and better than a spinlock here
++	*/
 +	set_bit(_XEN_PCIB_active,
- 		(unsigned long *)&psdev->pdev->sh_info->flags);
- 	wmb();
- 	notify_remote_via_irq(psdev->pdev->evtchn_irq);
- 
- 	ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active,
--                (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
++		(unsigned long *)&psdev->pdev->sh_info->flags);
++	wmb();
++	notify_remote_via_irq(psdev->pdev->evtchn_irq);
++
++	ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active,
 +		(unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
- 
- 	if (!ret) {
--		if (test_bit(_XEN_PCIB_active, 
++
++	if (!ret) {
 +		if (test_bit(_XEN_PCIB_active,
- 			(unsigned long *)&psdev->pdev->sh_info->flags)) {
--			dev_err(&psdev->dev->dev, 
++			(unsigned long *)&psdev->pdev->sh_info->flags)) {
 +			dev_err(&psdev->dev->dev,
- 				"pcifront aer process not responding!\n");
- 			clear_bit(_XEN_PCIB_active,
- 			  (unsigned long *)&psdev->pdev->sh_info->flags);
-@@ -599,16 +597,16 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
- 	}
- 	clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
- 
--	if ( test_bit( _XEN_PCIF_active,
--		(unsigned long*)&psdev->pdev->sh_info->flags)) {
--		dev_dbg(&psdev->dev->dev, 
++				"pcifront aer process not responding!\n");
++			clear_bit(_XEN_PCIB_active,
++			  (unsigned long *)&psdev->pdev->sh_info->flags);
++			aer_op->err = PCI_ERS_RESULT_NONE;
++			return res;
++		}
++	}
++	clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
++
 +	if (test_bit(_XEN_PCIF_active,
 +		(unsigned long *)&psdev->pdev->sh_info->flags)) {
-+		dev_dbg(&psdev->dev->dev,
- 			"schedule pci_conf service in pciback \n");
- 		test_and_schedule_op(psdev->pdev);
- 	}
- 
- 	res = (pci_ers_result_t)aer_op->err;
- 	return res;
--} 
++		dev_dbg(&psdev->dev->dev,
++			"schedule pci_conf service in pciback\n");
++		test_and_schedule_op(psdev->pdev);
++	}
++
++	res = (pci_ers_result_t)aer_op->err;
++	return res;
 +}
- 
- /*
- * pciback_slot_reset: it will send the slot_reset request to  pcifront in case
-@@ -632,24 +630,22 @@ static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
- 				PCI_SLOT(dev->devfn),
- 				PCI_FUNC(dev->devfn));
- 
--	if ( !psdev || !psdev->pdev )
--	{
--		dev_err(&dev->dev, 
++
++/*
++* pciback_slot_reset: it will send the slot_reset request to  pcifront in case
++* of the device driver could provide this service, and then wait for pcifront
++* ack.
++* @dev: pointer to PCI devices
++* return value is used by aer_core do_recovery policy
++*/
++static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++	pci_ers_result_t result;
++
++	result = PCI_ERS_RESULT_RECOVERED;
++	dev_dbg(&dev->dev, "pciback_slot_reset(bus:%x,devfn:%x)\n",
++		dev->bus->number, dev->devfn);
++
++	down_write(&pcistub_sem);
++	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
++				dev->bus->number,
++				PCI_SLOT(dev->devfn),
++				PCI_FUNC(dev->devfn));
++
 +	if (!psdev || !psdev->pdev) {
 +		dev_err(&dev->dev,
- 			"pciback device is not found/assigned\n");
- 		goto end;
- 	}
- 
--	if ( !psdev->pdev->sh_info )
--	{
++			"pciback device is not found/assigned\n");
++		goto end;
++	}
++
 +	if (!psdev->pdev->sh_info) {
- 		dev_err(&dev->dev, "pciback device is not connected or owned"
- 			" by HVM, kill it\n");
- 		kill_domain_by_device(psdev);
- 		goto release;
- 	}
- 
--	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
--		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
--		dev_err(&dev->dev, 
++		dev_err(&dev->dev, "pciback device is not connected or owned"
++			" by HVM, kill it\n");
++		kill_domain_by_device(psdev);
++		goto release;
++	}
++
 +	if (!test_bit(_XEN_PCIB_AERHANDLER,
 +		(unsigned long *)&psdev->pdev->sh_info->flags)) {
 +		dev_err(&dev->dev,
- 			"guest with no AER driver should have been killed\n");
- 		goto release;
- 	}
-@@ -657,7 +653,7 @@ static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
- 
- 	if (result == PCI_ERS_RESULT_NONE ||
- 		result == PCI_ERS_RESULT_DISCONNECT) {
--		dev_dbg(&dev->dev, 
++			"guest with no AER driver should have been killed\n");
++		goto release;
++	}
++	result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
++
++	if (result == PCI_ERS_RESULT_NONE ||
++		result == PCI_ERS_RESULT_DISCONNECT) {
 +		dev_dbg(&dev->dev,
- 			"No AER slot_reset service or disconnected!\n");
- 		kill_domain_by_device(psdev);
- 	}
-@@ -670,9 +666,9 @@ end:
- }
- 
- 
--/*pciback_mmio_enabled: it will send the mmio_enabled request to  pcifront 
--* in case of the device driver could provide this service, and then wait 
--* for pcifront ack.
++			"No AER slot_reset service or disconnected!\n");
++		kill_domain_by_device(psdev);
++	}
++release:
++	pcistub_device_put(psdev);
++end:
++	up_write(&pcistub_sem);
++	return result;
++
++}
++
++
 +/*pciback_mmio_enabled: it will send the mmio_enabled request to  pcifront
 +* in case of the device driver could provide this service, and then wait
 +* for pcifront ack
- * @dev: pointer to PCI devices
- * return value is used by aer_core do_recovery policy
- */
-@@ -692,24 +688,22 @@ static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
- 				PCI_SLOT(dev->devfn),
- 				PCI_FUNC(dev->devfn));
- 
--	if ( !psdev || !psdev->pdev )
--	{
--		dev_err(&dev->dev, 
++* @dev: pointer to PCI devices
++* return value is used by aer_core do_recovery policy
++*/
++
++static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++	pci_ers_result_t result;
++
++	result = PCI_ERS_RESULT_RECOVERED;
++	dev_dbg(&dev->dev, "pciback_mmio_enabled(bus:%x,devfn:%x)\n",
++		dev->bus->number, dev->devfn);
++
++	down_write(&pcistub_sem);
++	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
++				dev->bus->number,
++				PCI_SLOT(dev->devfn),
++				PCI_FUNC(dev->devfn));
++
 +	if (!psdev || !psdev->pdev) {
 +		dev_err(&dev->dev,
- 			"pciback device is not found/assigned\n");
- 		goto end;
- 	}
- 
--	if ( !psdev->pdev->sh_info )
--	{
++			"pciback device is not found/assigned\n");
++		goto end;
++	}
++
 +	if (!psdev->pdev->sh_info) {
- 		dev_err(&dev->dev, "pciback device is not connected or owned"
- 			" by HVM, kill it\n");
- 		kill_domain_by_device(psdev);
- 		goto release;
- 	}
- 
--	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
--		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
--		dev_err(&dev->dev, 
++		dev_err(&dev->dev, "pciback device is not connected or owned"
++			" by HVM, kill it\n");
++		kill_domain_by_device(psdev);
++		goto release;
++	}
++
 +	if (!test_bit(_XEN_PCIB_AERHANDLER,
 +		(unsigned long *)&psdev->pdev->sh_info->flags)) {
 +		dev_err(&dev->dev,
- 			"guest with no AER driver should have been killed\n");
- 		goto release;
- 	}
-@@ -717,7 +711,7 @@ static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
- 
- 	if (result == PCI_ERS_RESULT_NONE ||
- 		result == PCI_ERS_RESULT_DISCONNECT) {
--		dev_dbg(&dev->dev, 
++			"guest with no AER driver should have been killed\n");
++		goto release;
++	}
++	result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
++
++	if (result == PCI_ERS_RESULT_NONE ||
++		result == PCI_ERS_RESULT_DISCONNECT) {
 +		dev_dbg(&dev->dev,
- 			"No AER mmio_enabled service or disconnected!\n");
- 		kill_domain_by_device(psdev);
- 	}
-@@ -728,8 +722,8 @@ end:
- 	return result;
- }
- 
--/*pciback_error_detected: it will send the error_detected request to  pcifront 
--* in case of the device driver could provide this service, and then wait 
++			"No AER mmio_enabled service or disconnected!\n");
++		kill_domain_by_device(psdev);
++	}
++release:
++	pcistub_device_put(psdev);
++end:
++	up_write(&pcistub_sem);
++	return result;
++}
++
 +/*pciback_error_detected: it will send the error_detected request to  pcifront
 +* in case of the device driver could provide this service, and then wait
- * for pcifront ack.
- * @dev: pointer to PCI devices
- * @error: the current PCI connection state
-@@ -752,15 +746,13 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
- 				PCI_SLOT(dev->devfn),
- 				PCI_FUNC(dev->devfn));
- 
--	if ( !psdev || !psdev->pdev )
--	{
--		dev_err(&dev->dev, 
++* for pcifront ack.
++* @dev: pointer to PCI devices
++* @error: the current PCI connection state
++* return value is used by aer_core do_recovery policy
++*/
++
++static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
++	pci_channel_state_t error)
++{
++	struct pcistub_device *psdev;
++	pci_ers_result_t result;
++
++	result = PCI_ERS_RESULT_CAN_RECOVER;
++	dev_dbg(&dev->dev, "pciback_error_detected(bus:%x,devfn:%x)\n",
++		dev->bus->number, dev->devfn);
++
++	down_write(&pcistub_sem);
++	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
++				dev->bus->number,
++				PCI_SLOT(dev->devfn),
++				PCI_FUNC(dev->devfn));
++
 +	if (!psdev || !psdev->pdev) {
 +		dev_err(&dev->dev,
- 			"pciback device is not found/assigned\n");
- 		goto end;
- 	}
- 
--	if ( !psdev->pdev->sh_info )
--	{
++			"pciback device is not found/assigned\n");
++		goto end;
++	}
++
 +	if (!psdev->pdev->sh_info) {
- 		dev_err(&dev->dev, "pciback device is not connected or owned"
- 			" by HVM, kill it\n");
- 		kill_domain_by_device(psdev);
-@@ -768,8 +760,8 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
- 	}
- 
- 	/*Guest owns the device yet no aer handler regiested, kill guest*/
--	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
--		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
++		dev_err(&dev->dev, "pciback device is not connected or owned"
++			" by HVM, kill it\n");
++		kill_domain_by_device(psdev);
++		goto release;
++	}
++
++	/*Guest owns the device yet no aer handler regiested, kill guest*/
 +	if (!test_bit(_XEN_PCIB_AERHANDLER,
 +		(unsigned long *)&psdev->pdev->sh_info->flags)) {
- 		dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
- 		kill_domain_by_device(psdev);
- 		goto release;
-@@ -778,7 +770,7 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
- 
- 	if (result == PCI_ERS_RESULT_NONE ||
- 		result == PCI_ERS_RESULT_DISCONNECT) {
--		dev_dbg(&dev->dev, 
++		dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
++		kill_domain_by_device(psdev);
++		goto release;
++	}
++	result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
++
++	if (result == PCI_ERS_RESULT_NONE ||
++		result == PCI_ERS_RESULT_DISCONNECT) {
 +		dev_dbg(&dev->dev,
- 			"No AER error_detected service or disconnected!\n");
- 		kill_domain_by_device(psdev);
- 	}
-@@ -789,8 +781,8 @@ end:
- 	return result;
- }
- 
--/*pciback_error_resume: it will send the error_resume request to  pcifront 
--* in case of the device driver could provide this service, and then wait 
++			"No AER error_detected service or disconnected!\n");
++		kill_domain_by_device(psdev);
++	}
++release:
++	pcistub_device_put(psdev);
++end:
++	up_write(&pcistub_sem);
++	return result;
++}
++
 +/*pciback_error_resume: it will send the error_resume request to  pcifront
 +* in case of the device driver could provide this service, and then wait
- * for pcifront ack.
- * @dev: pointer to PCI devices
- */
-@@ -808,29 +800,28 @@ static void pciback_error_resume(struct pci_dev *dev)
- 				PCI_SLOT(dev->devfn),
- 				PCI_FUNC(dev->devfn));
- 
--	if ( !psdev || !psdev->pdev )
--	{
--		dev_err(&dev->dev, 
++* for pcifront ack.
++* @dev: pointer to PCI devices
++*/
++
++static void pciback_error_resume(struct pci_dev *dev)
++{
++	struct pcistub_device *psdev;
++
++	dev_dbg(&dev->dev, "pciback_error_resume(bus:%x,devfn:%x)\n",
++		dev->bus->number, dev->devfn);
++
++	down_write(&pcistub_sem);
++	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
++				dev->bus->number,
++				PCI_SLOT(dev->devfn),
++				PCI_FUNC(dev->devfn));
++
 +	if (!psdev || !psdev->pdev) {
 +		dev_err(&dev->dev,
- 			"pciback device is not found/assigned\n");
- 		goto end;
- 	}
- 
--	if ( !psdev->pdev->sh_info )
--	{
++			"pciback device is not found/assigned\n");
++		goto end;
++	}
++
 +	if (!psdev->pdev->sh_info) {
- 		dev_err(&dev->dev, "pciback device is not connected or owned"
- 			" by HVM, kill it\n");
- 		kill_domain_by_device(psdev);
- 		goto release;
- 	}
- 
--	if ( !test_bit(_XEN_PCIB_AERHANDLER, 
--		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
--		dev_err(&dev->dev, 
++		dev_err(&dev->dev, "pciback device is not connected or owned"
++			" by HVM, kill it\n");
++		kill_domain_by_device(psdev);
++		goto release;
++	}
++
 +	if (!test_bit(_XEN_PCIB_AERHANDLER,
 +		(unsigned long *)&psdev->pdev->sh_info->flags)) {
 +		dev_err(&dev->dev,
- 			"guest with no AER driver should have been killed\n");
- 		kill_domain_by_device(psdev);
- 		goto release;
- 	}
--	common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED);
++			"guest with no AER driver should have been killed\n");
++		kill_domain_by_device(psdev);
++		goto release;
++	}
 +	common_process(psdev, 1, XEN_PCI_OP_aer_resume,
 +		       PCI_ERS_RESULT_RECOVERED);
- release:
- 	pcistub_device_put(psdev);
- end:
-@@ -923,8 +914,8 @@ static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
- 	unsigned long flags;
- 
- 	spin_lock_irqsave(&device_ids_lock, flags);
--	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) {
--
++release:
++	pcistub_device_put(psdev);
++end:
++	up_write(&pcistub_sem);
++	return;
++}
++
++/*add pciback AER handling*/
++static struct pci_error_handlers pciback_error_handler = {
++	.error_detected = pciback_error_detected,
++	.mmio_enabled = pciback_mmio_enabled,
++	.slot_reset = pciback_slot_reset,
++	.resume = pciback_error_resume,
++};
++
++/*
++ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
++ * for a normal device. I don't want it to be loaded automatically.
++ */
++
++static struct pci_driver pciback_pci_driver = {
++	.name = DRV_NAME,
++	.id_table = pcistub_ids,
++	.probe = pcistub_probe,
++	.remove = pcistub_remove,
++	.err_handler = &pciback_error_handler,
++};
++
++static inline int str_to_slot(const char *buf, int *domain, int *bus,
++			      int *slot, int *func)
++{
++	int err;
++
++	err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
++	if (err == 4)
++		return 0;
++	else if (err < 0)
++		return -EINVAL;
++
++	/* try again without domain */
++	*domain = 0;
++	err = sscanf(buf, " %x:%x.%x", bus, slot, func);
++	if (err == 3)
++		return 0;
++
++	return -EINVAL;
++}
++
++static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
++			       *slot, int *func, int *reg, int *size, int *mask)
++{
++	int err;
++
++	err =
++	    sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
++		   func, reg, size, mask);
++	if (err == 7)
++		return 0;
++	return -EINVAL;
++}
++
++static int pcistub_device_id_add(int domain, int bus, int slot, int func)
++{
++	struct pcistub_device_id *pci_dev_id;
++	unsigned long flags;
++
++	pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
++	if (!pci_dev_id)
++		return -ENOMEM;
++
++	pci_dev_id->domain = domain;
++	pci_dev_id->bus = bus;
++	pci_dev_id->devfn = PCI_DEVFN(slot, func);
++
++	pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n",
++		 domain, bus, slot, func);
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return 0;
++}
++
++static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
++{
++	struct pcistub_device_id *pci_dev_id, *t;
++	int devfn = PCI_DEVFN(slot, func);
++	int err = -ENOENT;
++	unsigned long flags;
++
++	spin_lock_irqsave(&device_ids_lock, flags);
 +	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
 +				 slot_list) {
- 		if (pci_dev_id->domain == domain
- 		    && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
- 			/* Don't break; here because it's possible the same
-@@ -976,7 +967,7 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
- 	err = pciback_config_quirks_add_field(dev, field);
- 	if (err)
- 		kfree(field);
--      out:
++		if (pci_dev_id->domain == domain
++		    && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
++			/* Don't break; here because it's possible the same
++			 * slot could be in the list more than once
++			 */
++			list_del(&pci_dev_id->slot_list);
++			kfree(pci_dev_id);
++
++			err = 0;
++
++			pr_debug("pciback: removed %04x:%02x:%02x.%01x from "
++				 "seize list\n", domain, bus, slot, func);
++		}
++	}
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return err;
++}
++
++static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
++			   int size, int mask)
++{
++	int err = 0;
++	struct pcistub_device *psdev;
++	struct pci_dev *dev;
++	struct config_field *field;
++
++	psdev = pcistub_device_find(domain, bus, slot, func);
++	if (!psdev || !psdev->dev) {
++		err = -ENODEV;
++		goto out;
++	}
++	dev = psdev->dev;
++
++	field = kzalloc(sizeof(*field), GFP_ATOMIC);
++	if (!field) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	field->offset = reg;
++	field->size = size;
++	field->mask = mask;
++	field->init = NULL;
++	field->reset = NULL;
++	field->release = NULL;
++	field->clean = pciback_config_field_free;
++
++	err = pciback_config_quirks_add_field(dev, field);
++	if (err)
++		kfree(field);
 +out:
- 	return err;
- }
- 
-@@ -992,7 +983,7 @@ static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
- 
- 	err = pcistub_device_id_add(domain, bus, slot, func);
- 
--      out:
++	return err;
++}
++
++static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
++				size_t count)
++{
++	int domain, bus, slot, func;
++	int err;
++
++	err = str_to_slot(buf, &domain, &bus, &slot, &func);
++	if (err)
++		goto out;
++
++	err = pcistub_device_id_add(domain, bus, slot, func);
++
 +out:
- 	if (!err)
- 		err = count;
- 	return err;
-@@ -1012,7 +1003,7 @@ static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
- 
- 	err = pcistub_device_id_remove(domain, bus, slot, func);
- 
--      out:
++	if (!err)
++		err = count;
++	return err;
++}
++
++DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
++
++static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
++				   size_t count)
++{
++	int domain, bus, slot, func;
++	int err;
++
++	err = str_to_slot(buf, &domain, &bus, &slot, &func);
++	if (err)
++		goto out;
++
++	err = pcistub_device_id_remove(domain, bus, slot, func);
++
 +out:
- 	if (!err)
- 		err = count;
- 	return err;
-@@ -1057,7 +1048,7 @@ static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
- 
- 	err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
- 
--      out:
++	if (!err)
++		err = count;
++	return err;
++}
++
++DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
++
++static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
++{
++	struct pcistub_device_id *pci_dev_id;
++	size_t count = 0;
++	unsigned long flags;
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
++		if (count >= PAGE_SIZE)
++			break;
++
++		count += scnprintf(buf + count, PAGE_SIZE - count,
++				   "%04x:%02x:%02x.%01x\n",
++				   pci_dev_id->domain, pci_dev_id->bus,
++				   PCI_SLOT(pci_dev_id->devfn),
++				   PCI_FUNC(pci_dev_id->devfn));
++	}
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return count;
++}
++
++DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
++
++static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
++{
++	struct pcistub_device *psdev;
++	struct pciback_dev_data *dev_data;
++	size_t count = 0;
++	unsigned long flags;
++
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (count >= PAGE_SIZE)
++			break;
++		if (!psdev->dev)
++			continue;
++		dev_data = pci_get_drvdata(psdev->dev);
++		if (!dev_data)
++			continue;
++		count +=
++		    scnprintf(buf + count, PAGE_SIZE - count,
++			      "%s:%s:%sing:%ld\n",
++			      pci_name(psdev->dev),
++			      dev_data->isr_on ? "on" : "off",
++			      dev_data->ack_intr ? "ack" : "not ack",
++			      dev_data->handled);
++	}
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return count;
++}
++
++DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
++
++static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
++					  const char *buf,
++					  size_t count)
++{
++	struct pcistub_device *psdev;
++	struct pciback_dev_data *dev_data;
++	int domain, bus, slot, func;
++	int err = -ENOENT;
++
++	err = str_to_slot(buf, &domain, &bus, &slot, &func);
++	if (err)
++		goto out;
++
++	psdev = pcistub_device_find(domain, bus, slot, func);
++
++	if (!psdev)
++		goto out;
++
++	dev_data = pci_get_drvdata(psdev->dev);
++	if (!dev_data)
++		goto out;
++
++	dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
++		dev_data->irq_name, dev_data->isr_on,
++		!dev_data->isr_on);
++
++	dev_data->isr_on = !(dev_data->isr_on);
++	if (dev_data->isr_on)
++		dev_data->ack_intr = 1;
++out:
++	if (!err)
++		err = count;
++	return err;
++}
++DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
++
++static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
++				 size_t count)
++{
++	int domain, bus, slot, func, reg, size, mask;
++	int err;
++
++	err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
++			   &mask);
++	if (err)
++		goto out;
++
++	err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
++
 +out:
- 	if (!err)
- 		err = count;
- 	return err;
-@@ -1067,7 +1058,6 @@ static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
- {
- 	int count = 0;
- 	unsigned long flags;
--	extern struct list_head pciback_quirks;
- 	struct pciback_config_quirk *quirk;
- 	struct pciback_dev_data *dev_data;
- 	const struct config_field *field;
-@@ -1096,12 +1086,13 @@ static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
- 
- 			count += scnprintf(buf + count, PAGE_SIZE - count,
- 					   "\t\t%08x:%01x:%08x\n",
--					   cfg_entry->base_offset + field->offset, 
--					   field->size, field->mask);
++	if (!err)
++		err = count;
++	return err;
++}
++
++static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
++{
++	int count = 0;
++	unsigned long flags;
++	struct pciback_config_quirk *quirk;
++	struct pciback_dev_data *dev_data;
++	const struct config_field *field;
++	const struct config_field_entry *cfg_entry;
++
++	spin_lock_irqsave(&device_ids_lock, flags);
++	list_for_each_entry(quirk, &pciback_quirks, quirks_list) {
++		if (count >= PAGE_SIZE)
++			goto out;
++
++		count += scnprintf(buf + count, PAGE_SIZE - count,
++				   "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
++				   quirk->pdev->bus->number,
++				   PCI_SLOT(quirk->pdev->devfn),
++				   PCI_FUNC(quirk->pdev->devfn),
++				   quirk->devid.vendor, quirk->devid.device,
++				   quirk->devid.subvendor,
++				   quirk->devid.subdevice);
++
++		dev_data = pci_get_drvdata(quirk->pdev);
++
++		list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
++			field = cfg_entry->field;
++			if (count >= PAGE_SIZE)
++				goto out;
++
++			count += scnprintf(buf + count, PAGE_SIZE - count,
++					   "\t\t%08x:%01x:%08x\n",
 +					   cfg_entry->base_offset +
 +					   field->offset, field->size,
 +					   field->mask);
- 		}
- 	}
- 
--      out:
++		}
++	}
++
 +out:
- 	spin_unlock_irqrestore(&device_ids_lock, flags);
- 
- 	return count;
-@@ -1137,14 +1128,14 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf,
- 	if (!dev_data->permissive) {
- 		dev_data->permissive = 1;
- 		/* Let user know that what they're doing could be unsafe */
--		dev_warn(&psdev->dev->dev,
--			 "enabling permissive mode configuration space accesses!\n");
++	spin_unlock_irqrestore(&device_ids_lock, flags);
++
++	return count;
++}
++
++DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
++
++static ssize_t permissive_add(struct device_driver *drv, const char *buf,
++			      size_t count)
++{
++	int domain, bus, slot, func;
++	int err;
++	struct pcistub_device *psdev;
++	struct pciback_dev_data *dev_data;
++	err = str_to_slot(buf, &domain, &bus, &slot, &func);
++	if (err)
++		goto out;
++	psdev = pcistub_device_find(domain, bus, slot, func);
++	if (!psdev) {
++		err = -ENODEV;
++		goto out;
++	}
++	if (!psdev->dev) {
++		err = -ENODEV;
++		goto release;
++	}
++	dev_data = pci_get_drvdata(psdev->dev);
++	/* the driver data for a device should never be null at this point */
++	if (!dev_data) {
++		err = -ENXIO;
++		goto release;
++	}
++	if (!dev_data->permissive) {
++		dev_data->permissive = 1;
++		/* Let user know that what they're doing could be unsafe */
 +		dev_warn(&psdev->dev->dev, "enabling permissive mode "
 +			 "configuration space accesses!\n");
- 		dev_warn(&psdev->dev->dev,
- 			 "permissive mode is potentially unsafe!\n");
- 	}
--      release:
++		dev_warn(&psdev->dev->dev,
++			 "permissive mode is potentially unsafe!\n");
++	}
 +release:
- 	pcistub_device_put(psdev);
--      out:
++	pcistub_device_put(psdev);
 +out:
- 	if (!err)
- 		err = count;
- 	return err;
-@@ -1264,10 +1255,10 @@ static int __init pcistub_init(void)
- 	if (err)
- 		pcistub_exit();
- 
--      out:
++	if (!err)
++		err = count;
++	return err;
++}
++
++static ssize_t permissive_show(struct device_driver *drv, char *buf)
++{
++	struct pcistub_device *psdev;
++	struct pciback_dev_data *dev_data;
++	size_t count = 0;
++	unsigned long flags;
++	spin_lock_irqsave(&pcistub_devices_lock, flags);
++	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
++		if (count >= PAGE_SIZE)
++			break;
++		if (!psdev->dev)
++			continue;
++		dev_data = pci_get_drvdata(psdev->dev);
++		if (!dev_data || !dev_data->permissive)
++			continue;
++		count +=
++		    scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
++			      pci_name(psdev->dev));
++	}
++	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
++	return count;
++}
++
++DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
++
++static void pcistub_exit(void)
++{
++	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
++	driver_remove_file(&pciback_pci_driver.driver,
++			   &driver_attr_remove_slot);
++	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
++	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
++	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
++	driver_remove_file(&pciback_pci_driver.driver,
++			   &driver_attr_irq_handlers);
++	driver_remove_file(&pciback_pci_driver.driver,
++			   &driver_attr_irq_handler_state);
++	pci_unregister_driver(&pciback_pci_driver);
++}
++
++static int __init pcistub_init(void)
++{
++	int pos = 0;
++	int err = 0;
++	int domain, bus, slot, func;
++	int parsed;
++
++	if (pci_devs_to_hide && *pci_devs_to_hide) {
++		do {
++			parsed = 0;
++
++			err = sscanf(pci_devs_to_hide + pos,
++				     " (%x:%x:%x.%x) %n",
++				     &domain, &bus, &slot, &func, &parsed);
++			if (err != 4) {
++				domain = 0;
++				err = sscanf(pci_devs_to_hide + pos,
++					     " (%x:%x.%x) %n",
++					     &bus, &slot, &func, &parsed);
++				if (err != 3)
++					goto parse_error;
++			}
++
++			err = pcistub_device_id_add(domain, bus, slot, func);
++			if (err)
++				goto out;
++
++			/* if parsed<=0, we've reached the end of the string */
++			pos += parsed;
++		} while (parsed > 0 && pci_devs_to_hide[pos]);
++	}
++
++	/* If we're the first PCI Device Driver to register, we're the
++	 * first one to get offered PCI devices as they become
++	 * available (and thus we can be the first to grab them)
++	 */
++	err = pci_register_driver(&pciback_pci_driver);
++	if (err < 0)
++		goto out;
++
++	err = driver_create_file(&pciback_pci_driver.driver,
++				 &driver_attr_new_slot);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_remove_slot);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_slots);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_quirks);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_permissive);
++
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					 &driver_attr_irq_handlers);
++	if (!err)
++		err = driver_create_file(&pciback_pci_driver.driver,
++					&driver_attr_irq_handler_state);
++	if (err)
++		pcistub_exit();
++
 +out:
- 	return err;
- 
--      parse_error:
++	return err;
++
 +parse_error:
- 	printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
- 	       pci_devs_to_hide + pos);
- 	return -EINVAL;
-@@ -1276,7 +1267,7 @@ static int __init pcistub_init(void)
- #ifndef MODULE
- /*
-  * fs_initcall happens before device_initcall
-- * so pciback *should* get called first (b/c we 
++	printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
++	       pci_devs_to_hide + pos);
++	return -EINVAL;
++}
++
++#ifndef MODULE
++/*
++ * fs_initcall happens before device_initcall
 + * so pciback *should* get called first (b/c we
-  * want to suck up any device before other drivers
-  * get a chance by being the first pci device
-  * driver to register)
++ * want to suck up any device before other drivers
++ * get a chance by being the first pci device
++ * driver to register)
++ */
++fs_initcall(pcistub_init);
++#endif
++
++static int __init pciback_init(void)
++{
++	int err;
++
++	if (!xen_initial_domain())
++		return -ENODEV;
++
++	err = pciback_config_init();
++	if (err)
++		return err;
++
++#ifdef MODULE
++	err = pcistub_init();
++	if (err < 0)
++		return err;
++#endif
++
++	pcistub_init_devices_late();
++	err = pciback_xenbus_register();
++	if (err)
++		pcistub_exit();
++
++	return err;
++}
++
++static void __exit pciback_cleanup(void)
++{
++	pciback_xenbus_unregister();
++	pcistub_exit();
++}
++
++module_init(pciback_init);
++module_exit(pciback_cleanup);
++
++MODULE_LICENSE("Dual BSD/GPL");
 diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
-index 5e8e14e..98e2912 100644
---- a/drivers/xen/pciback/pciback.h
+new file mode 100644
+index 0000000..5c14020
+--- /dev/null
 +++ b/drivers/xen/pciback/pciback.h
-@@ -49,6 +49,12 @@ struct pciback_dev_data {
- 	int warned_on_write;
- };
- 
+@@ -0,0 +1,142 @@
++/*
++ * PCI Backend Common Data Structures & Function Declarations
++ *
++ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++#ifndef __XEN_PCIBACK_H__
++#define __XEN_PCIBACK_H__
++
++#include <linux/pci.h>
++#include <linux/interrupt.h>
++#include <xen/xenbus.h>
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/workqueue.h>
++#include <linux/atomic.h>
++#include <xen/interface/io/pciif.h>
++
++struct pci_dev_entry {
++	struct list_head list;
++	struct pci_dev *dev;
++};
++
++#define _PDEVF_op_active	(0)
++#define PDEVF_op_active		(1<<(_PDEVF_op_active))
++#define _PCIB_op_pending	(1)
++#define PCIB_op_pending		(1<<(_PCIB_op_pending))
++
++struct pciback_device {
++	void *pci_dev_data;
++	spinlock_t dev_lock;
++
++	struct xenbus_device *xdev;
++
++	struct xenbus_watch be_watch;
++	u8 be_watching;
++
++	int evtchn_irq;
++
++	struct xen_pci_sharedinfo *sh_info;
++
++	unsigned long flags;
++
++	struct work_struct op_work;
++};
++
++struct pciback_dev_data {
++	struct list_head config_fields;
++	unsigned int permissive:1;
++	unsigned int warned_on_write:1;
++	unsigned int enable_intx:1;
++	unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
++	unsigned int ack_intr:1; /* .. and ACK-ing */
++	unsigned long handled;
++	unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
++	char irq_name[0]; /* pciback[000:04:00.0] */
++};
++
 +/* Used by XenBus and pciback_ops.c */
 +extern wait_queue_head_t aer_wait_queue;
 +extern struct workqueue_struct *pciback_wq;
 +/* Used by pcistub.c and conf_space_quirks.c */
 +extern struct list_head pciback_quirks;
 +
- /* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
- struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
- 					    int domain, int bus,
-@@ -67,14 +73,14 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev);
- void pciback_config_reset_dev(struct pci_dev *dev);
- void pciback_config_free_dev(struct pci_dev *dev);
- int pciback_config_read(struct pci_dev *dev, int offset, int size,
--			u32 * ret_val);
-+			u32 *ret_val);
- int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
- 
- /* Handle requests for specific devices from the frontend */
- typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
- 				   unsigned int domain, unsigned int bus,
- 				   unsigned int devfn, unsigned int devid);
--typedef int (*publish_pci_root_cb) (struct pciback_device * pdev,
-+typedef int (*publish_pci_root_cb) (struct pciback_device *pdev,
- 				    unsigned int domain, unsigned int bus);
- int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
- 			int devid, publish_pci_dev_cb publish_cb);
-@@ -83,15 +89,17 @@ struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
- 				    unsigned int domain, unsigned int bus,
- 				    unsigned int devfn);
- 
--/** 
-+/**
- * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback
--* before sending aer request to pcifront, so that guest could identify 
-+* before sending aer request to pcifront, so that guest could identify
- * device, coopearte with pciback to finish aer recovery job if device driver
- * has the capability
- */
- 
--int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
--				unsigned int *domain, unsigned int *bus, unsigned int *devfn);
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
-+			     struct pciback_device *pdev,
-+			     unsigned int *domain, unsigned int *bus,
-+			     unsigned int *devfn);
- int pciback_init_devices(struct pciback_device *pdev);
- int pciback_publish_pci_roots(struct pciback_device *pdev,
- 			      publish_pci_root_cb cb);
-@@ -106,17 +114,17 @@ void pciback_xenbus_unregister(void);
- 
- #ifdef CONFIG_PCI_MSI
- int pciback_enable_msi(struct pciback_device *pdev,
--                       struct pci_dev *dev, struct xen_pci_op *op);
-+			struct pci_dev *dev, struct xen_pci_op *op);
- 
- int pciback_disable_msi(struct pciback_device *pdev,
--                         struct pci_dev *dev, struct xen_pci_op *op);
-+			struct pci_dev *dev, struct xen_pci_op *op);
- 
- 
- int pciback_enable_msix(struct pciback_device *pdev,
--                        struct pci_dev *dev, struct xen_pci_op *op);
-+			struct pci_dev *dev, struct xen_pci_op *op);
- 
- int pciback_disable_msix(struct pciback_device *pdev,
--                        struct pci_dev *dev, struct xen_pci_op *op);
-+			struct pci_dev *dev, struct xen_pci_op *op);
- #endif
- extern int verbose_request;
- 
-diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
-index 6624faf..bf83dca 100644
---- a/drivers/xen/pciback/pciback_ops.c
-+++ b/drivers/xen/pciback/pciback_ops.c
-@@ -5,11 +5,11 @@
-  */
- #include <linux/module.h>
- #include <linux/wait.h>
--#include <asm/bitops.h>
-+#include <linux/bitops.h>
- #include <xen/events.h>
- #include "pciback.h"
- 
--int verbose_request = 0;
-+int verbose_request;
- module_param(verbose_request, int, 0644);
- 
- /* Ensure a device is "turned off" and ready to be exported.
-@@ -37,12 +37,10 @@ void pciback_reset_device(struct pci_dev *dev)
- 		}
- 	}
- }
--extern wait_queue_head_t aer_wait_queue;
--extern struct workqueue_struct *pciback_wq;
- /*
- * Now the same evtchn is used for both pcifront conf_read_write request
- * as well as pcie aer front end ack. We use a new work_queue to schedule
--* pciback conf_read_write service for avoiding confict with aer_core 
++/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
++struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
++					    int domain, int bus,
++					    int slot, int func);
++struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
++				    struct pci_dev *dev);
++void pcistub_put_pci_dev(struct pci_dev *dev);
++
++/* Ensure a device is turned off or reset */
++void pciback_reset_device(struct pci_dev *pdev);
++
++/* Access a virtual configuration space for a PCI device */
++int pciback_config_init(void);
++int pciback_config_init_dev(struct pci_dev *dev);
++void pciback_config_free_dyn_fields(struct pci_dev *dev);
++void pciback_config_reset_dev(struct pci_dev *dev);
++void pciback_config_free_dev(struct pci_dev *dev);
++int pciback_config_read(struct pci_dev *dev, int offset, int size,
++			u32 *ret_val);
++int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
++
++/* Handle requests for specific devices from the frontend */
++typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
++				   unsigned int domain, unsigned int bus,
++				   unsigned int devfn, unsigned int devid);
++typedef int (*publish_pci_root_cb) (struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus);
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++			int devid, publish_pci_dev_cb publish_cb);
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev);
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn);
++
++/**
++* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback
++* before sending aer request to pcifront, so that guest could identify
++* device, coopearte with pciback to finish aer recovery job if device driver
++* has the capability
++*/
++
++int pciback_get_pcifront_dev(struct pci_dev *pcidev,
++			     struct pciback_device *pdev,
++			     unsigned int *domain, unsigned int *bus,
++			     unsigned int *devfn);
++int pciback_init_devices(struct pciback_device *pdev);
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb cb);
++void pciback_release_devices(struct pciback_device *pdev);
++
++/* Handles events from front-end */
++irqreturn_t pciback_handle_event(int irq, void *dev_id);
++void pciback_do_op(struct work_struct *data);
++
++int pciback_xenbus_register(void);
++void pciback_xenbus_unregister(void);
++
++#ifdef CONFIG_PCI_MSI
++int pciback_enable_msi(struct pciback_device *pdev,
++			struct pci_dev *dev, struct xen_pci_op *op);
++
++int pciback_disable_msi(struct pciback_device *pdev,
++			struct pci_dev *dev, struct xen_pci_op *op);
++
++
++int pciback_enable_msix(struct pciback_device *pdev,
++			struct pci_dev *dev, struct xen_pci_op *op);
++
++int pciback_disable_msix(struct pciback_device *pdev,
++			struct pci_dev *dev, struct xen_pci_op *op);
++#endif
++extern int verbose_request;
++
++void test_and_schedule_op(struct pciback_device *pdev);
++#endif
++
++/* Handles shared IRQs that can to device domain and control domain. */
++void pciback_irq_handler(struct pci_dev *dev, int reset);
++irqreturn_t pciback_guest_interrupt(int irq, void *dev_id);
+diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
+new file mode 100644
+index 0000000..28a2a55
+--- /dev/null
++++ b/drivers/xen/pciback/pciback_ops.c
+@@ -0,0 +1,248 @@
++/*
++ * PCI Backend Operations - respond to PCI requests from Frontend
++ *
++ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/wait.h>
++#include <linux/bitops.h>
++#include <xen/events.h>
++#include <linux/sched.h>
++#include "pciback.h"
++
++int verbose_request;
++module_param(verbose_request, int, 0644);
++
++/* Ensure a device is has the fake IRQ handler "turned on/off" and is
++ * ready to be exported. This MUST be run after pciback_reset_device
++ * which does the actual PCI device enable/disable.
++ */
++void pciback_control_isr(struct pci_dev *dev, int reset)
++{
++	struct pciback_dev_data *dev_data;
++	int rc;
++	int enable = 0;
++
++	dev_data = pci_get_drvdata(dev);
++	if (!dev_data)
++		return;
++
++	/* We don't deal with bridges */
++	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
++		return;
++
++	if (reset) {
++		dev_data->enable_intx = 0;
++		dev_data->ack_intr = 0;
++	}
++	enable =  dev_data->enable_intx;
++
++	/* Asked to disable, but ISR isn't runnig */
++	if (!enable && !dev_data->isr_on)
++		return;
++
++	/* Squirrel away the IRQs in the dev_data. We need this
++	 * b/c when device transitions to MSI, the dev->irq is
++	 * overwritten with the MSI vector.
++	 */
++	if (enable)
++		dev_data->irq = dev->irq;
++
++	/*
++	 * SR-IOV devices in all use MSI-X and have no legacy
++	 * interrupts, so inhibit creating a fake IRQ handler for them.
++	 */
++	if (dev_data->irq == 0)
++		goto out;
++
++	dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
++		dev_data->irq_name,
++		dev_data->irq,
++		pci_is_enabled(dev) ? "on" : "off",
++		dev->msi_enabled ? "MSI" : "",
++		dev->msix_enabled ? "MSI/X" : "",
++		dev_data->isr_on ? "enable" : "disable",
++		enable ? "enable" : "disable");
++
++	if (enable) {
++		rc = request_irq(dev_data->irq,
++				pciback_guest_interrupt, IRQF_SHARED,
++				dev_data->irq_name, dev);
++		if (rc) {
++			dev_err(&dev->dev, "%s: failed to install fake IRQ " \
++				"handler for IRQ %d! (rc:%d)\n",
++				dev_data->irq_name, dev_data->irq, rc);
++			goto out;
++		}
++	} else {
++		free_irq(dev_data->irq, dev);
++		dev_data->irq = 0;
++	}
++	dev_data->isr_on = enable;
++	dev_data->ack_intr = enable;
++out:
++	dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
++		dev_data->irq_name,
++		dev_data->irq,
++		pci_is_enabled(dev) ? "on" : "off",
++		dev->msi_enabled ? "MSI" : "",
++		dev->msix_enabled ? "MSI/X" : "",
++		enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
++			(dev_data->isr_on ? "failed to disable" : "disabled"));
++}
++
++/* Ensure a device is "turned off" and ready to be exported.
++ * (Also see pciback_config_reset to ensure virtual configuration space is
++ * ready to be re-exported)
++ */
++void pciback_reset_device(struct pci_dev *dev)
++{
++	u16 cmd;
++
++	pciback_control_isr(dev, 1 /* reset device */);
++
++	/* Disable devices (but not bridges) */
++	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
++#ifdef CONFIG_PCI_MSI
++		/* The guest could have been abruptly killed without
++		 * disabling MSI/MSI-X interrupts.*/
++		if (dev->msix_enabled)
++			pci_disable_msix(dev);
++		if (dev->msi_enabled)
++			pci_disable_msi(dev);
++#endif
++		pci_disable_device(dev);
++
++		pci_write_config_word(dev, PCI_COMMAND, 0);
++
++		dev->is_busmaster = 0;
++	} else {
++		pci_read_config_word(dev, PCI_COMMAND, &cmd);
++		if (cmd & (PCI_COMMAND_INVALIDATE)) {
++			cmd &= ~(PCI_COMMAND_INVALIDATE);
++			pci_write_config_word(dev, PCI_COMMAND, cmd);
++
++			dev->is_busmaster = 0;
++		}
++	}
++}
++/*
++* Now the same evtchn is used for both pcifront conf_read_write request
++* as well as pcie aer front end ack. We use a new work_queue to schedule
 +* pciback conf_read_write service for avoiding confict with aer_core
- * do_recovery job which also use the system default work_queue
- */
- void test_and_schedule_op(struct pciback_device *pdev)
-@@ -50,14 +48,13 @@ void test_and_schedule_op(struct pciback_device *pdev)
- 	/* Check that frontend is requesting an operation and that we are not
- 	 * already processing a request */
- 	if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
--	    && !test_and_set_bit(_PDEVF_op_active, &pdev->flags))
--	{
++* do_recovery job which also use the system default work_queue
++*/
++void test_and_schedule_op(struct pciback_device *pdev)
++{
++	/* Check that frontend is requesting an operation and that we are not
++	 * already processing a request */
++	if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
 +	    && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
- 		queue_work(pciback_wq, &pdev->op_work);
- 	}
- 	/*_XEN_PCIB_active should have been cleared by pcifront. And also make
- 	sure pciback is waiting for ack by checking _PCIB_op_pending*/
--	if (!test_bit(_XEN_PCIB_active,(unsigned long *)&pdev->sh_info->flags)
--	    &&test_bit(_PCIB_op_pending, &pdev->flags)) {
++		queue_work(pciback_wq, &pdev->op_work);
++	}
++	/*_XEN_PCIB_active should have been cleared by pcifront. And also make
++	sure pciback is waiting for ack by checking _PCIB_op_pending*/
 +	if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
 +	    && test_bit(_PCIB_op_pending, &pdev->flags)) {
- 		wake_up(&aer_wait_queue);
- 	}
- }
-@@ -69,7 +66,8 @@ void test_and_schedule_op(struct pciback_device *pdev)
- 
- void pciback_do_op(struct work_struct *data)
- {
--	struct pciback_device *pdev = container_of(data, struct pciback_device, op_work);
++		wake_up(&aer_wait_queue);
++	}
++}
++
++/* Performing the configuration space reads/writes must not be done in atomic
++ * context because some of the pci_* functions can sleep (mostly due to ACPI
++ * use of semaphores). This function is intended to be called from a work
++ * queue in process context taking a struct pciback_device as a parameter */
++
++void pciback_do_op(struct work_struct *data)
++{
 +	struct pciback_device *pdev =
 +		container_of(data, struct pciback_device, op_work);
- 	struct pci_dev *dev;
- 	struct xen_pci_op *op = &pdev->sh_info->op;
- 
-@@ -77,38 +75,36 @@ void pciback_do_op(struct work_struct *data)
- 
- 	if (dev == NULL)
- 		op->err = XEN_PCI_ERR_dev_not_found;
--	else
--	{
--		switch (op->cmd)
--		{
--			case XEN_PCI_OP_conf_read:
--				op->err = pciback_config_read(dev,
--					  op->offset, op->size, &op->value);
--				break;
--			case XEN_PCI_OP_conf_write:
--				op->err = pciback_config_write(dev,
--					  op->offset, op->size,	op->value);
--				break;
++	struct pci_dev *dev;
++	struct pciback_dev_data *dev_data = NULL;
++	struct xen_pci_op *op = &pdev->sh_info->op;
++	int test_intx = 0;
++
++	dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
++
++	if (dev == NULL)
++		op->err = XEN_PCI_ERR_dev_not_found;
 +	else {
++		dev_data = pci_get_drvdata(dev);
++		if (dev_data)
++			test_intx = dev_data->enable_intx;
 +		switch (op->cmd) {
 +		case XEN_PCI_OP_conf_read:
 +			op->err = pciback_config_read(dev,
@@ -38656,19 +13320,7 @@ index 6624faf..bf83dca 100644
 +			op->err = pciback_config_write(dev,
 +				  op->offset, op->size,	op->value);
 +			break;
- #ifdef CONFIG_PCI_MSI
--			case XEN_PCI_OP_enable_msi:
--				op->err = pciback_enable_msi(pdev, dev, op);
--				break;
--			case XEN_PCI_OP_disable_msi:
--				op->err = pciback_disable_msi(pdev, dev, op);
--				break;
--			case XEN_PCI_OP_enable_msix:
--				op->err = pciback_enable_msix(pdev, dev, op);
--				break;
--			case XEN_PCI_OP_disable_msix:
--				op->err = pciback_disable_msix(pdev, dev, op);
--				break;
++#ifdef CONFIG_PCI_MSI
 +		case XEN_PCI_OP_enable_msi:
 +			op->err = pciback_enable_msi(pdev, dev, op);
 +			break;
@@ -38681,719 +13333,747 @@ index 6624faf..bf83dca 100644
 +		case XEN_PCI_OP_disable_msix:
 +			op->err = pciback_disable_msix(pdev, dev, op);
 +			break;
- #endif
--			default:
--				op->err = XEN_PCI_ERR_not_implemented;
--				break;
++#endif
 +		default:
 +			op->err = XEN_PCI_ERR_not_implemented;
 +			break;
- 		}
- 	}
--	/* Tell the driver domain that we're done. */ 
++		}
++	}
++	if (!op->err && dev && dev_data) {
++		/* Transition detected */
++		if ((dev_data->enable_intx != test_intx))
++			pciback_control_isr(dev, 0 /* no reset */);
++	}
 +	/* Tell the driver domain that we're done. */
- 	wmb();
- 	clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
- 	notify_remote_via_irq(pdev->evtchn_irq);
-@@ -119,7 +115,7 @@ void pciback_do_op(struct work_struct *data)
- 	smp_mb__after_clear_bit(); /* /before/ final check for work */
- 
- 	/* Check to see if the driver domain tried to start another request in
--	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. 
++	wmb();
++	clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
++	notify_remote_via_irq(pdev->evtchn_irq);
++
++	/* Mark that we're done. */
++	smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
++	clear_bit(_PDEVF_op_active, &pdev->flags);
++	smp_mb__after_clear_bit(); /* /before/ final check for work */
++
++	/* Check to see if the driver domain tried to start another request in
 +	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
- 	*/
- 	test_and_schedule_op(pdev);
- }
++	*/
++	test_and_schedule_op(pdev);
++}
++
++irqreturn_t pciback_handle_event(int irq, void *dev_id)
++{
++	struct pciback_device *pdev = dev_id;
++
++	test_and_schedule_op(pdev);
++
++	return IRQ_HANDLED;
++}
++irqreturn_t pciback_guest_interrupt(int irq, void *dev_id)
++{
++	struct pci_dev *dev = (struct pci_dev *)dev_id;
++	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
++
++	if (dev_data->isr_on && dev_data->ack_intr) {
++		dev_data->handled++;
++		if ((dev_data->handled % 1000) == 0) {
++			if (xen_test_irq_shared(irq)) {
++				printk(KERN_INFO "%s IRQ line is not shared "
++					"with other domains. Turning ISR off\n",
++					 dev_data->irq_name);
++				dev_data->ack_intr = 0;
++			}
++		}
++		return IRQ_HANDLED;
++	}
++	return IRQ_NONE;
++}
 diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c
-index 105a8b6..efb922d 100644
---- a/drivers/xen/pciback/slot.c
+new file mode 100644
+index 0000000..efb922d
+--- /dev/null
 +++ b/drivers/xen/pciback/slot.c
-@@ -65,7 +65,8 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
- 		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
- 			if (slot_dev->slots[bus][slot] == NULL) {
- 				printk(KERN_INFO
--				       "pciback: slot: %s: assign to virtual slot %d, bus %d\n",
+@@ -0,0 +1,191 @@
++/*
++ * PCI Backend - Provides a Virtual PCI bus (with real devices)
++ *               to the frontend
++ *
++ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil> (vpci.c)
++ *   Author: Tristan Gingold <tristan.gingold at bull.net>, from vpci.c
++ */
++
++#include <linux/list.h>
++#include <linux/slab.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++/* There are at most 32 slots in a pci bus.  */
++#define PCI_SLOT_MAX 32
++
++#define PCI_BUS_NBR 2
++
++struct slot_dev_data {
++	/* Access to dev_list must be protected by lock */
++	struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX];
++	spinlock_t lock;
++};
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn)
++{
++	struct pci_dev *dev = NULL;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	unsigned long flags;
++
++	if (domain != 0 || PCI_FUNC(devfn) != 0)
++		return NULL;
++
++	if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR)
++		return NULL;
++
++	spin_lock_irqsave(&slot_dev->lock, flags);
++	dev = slot_dev->slots[bus][PCI_SLOT(devfn)];
++	spin_unlock_irqrestore(&slot_dev->lock, flags);
++
++	return dev;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++			int devid, publish_pci_dev_cb publish_cb)
++{
++	int err = 0, slot, bus;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	unsigned long flags;
++
++	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
++		err = -EFAULT;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Can't export bridges on the virtual PCI bus");
++		goto out;
++	}
++
++	spin_lock_irqsave(&slot_dev->lock, flags);
++
++	/* Assign to a new slot on the virtual PCI bus */
++	for (bus = 0; bus < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++			if (slot_dev->slots[bus][slot] == NULL) {
++				printk(KERN_INFO
 +				       "pciback: slot: %s: assign to virtual "
 +				       "slot %d, bus %d\n",
- 				       pci_name(dev), slot, bus);
- 				slot_dev->slots[bus][slot] = dev;
- 				goto unlock;
-@@ -76,14 +77,14 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
- 	xenbus_dev_fatal(pdev->xdev, err,
- 			 "No more space on root virtual PCI bus");
- 
--      unlock:
++				       pci_name(dev), slot, bus);
++				slot_dev->slots[bus][slot] = dev;
++				goto unlock;
++			}
++		}
++
++	err = -ENOMEM;
++	xenbus_dev_fatal(pdev->xdev, err,
++			 "No more space on root virtual PCI bus");
++
 +unlock:
- 	spin_unlock_irqrestore(&slot_dev->lock, flags);
- 
- 	/* Publish this device. */
--	if(!err)
++	spin_unlock_irqrestore(&slot_dev->lock, flags);
++
++	/* Publish this device. */
 +	if (!err)
- 		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
- 
--      out:
-+out:
- 	return err;
- }
- 
-@@ -105,7 +106,7 @@ void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
- 			}
- 		}
- 
--      out:
++		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
++
 +out:
- 	spin_unlock_irqrestore(&slot_dev->lock, flags);
- 
- 	if (found_dev)
-@@ -156,8 +157,10 @@ void pciback_release_devices(struct pciback_device *pdev)
- 	pdev->pci_dev_data = NULL;
- }
- 
--int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
--		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++	return err;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++	int slot, bus;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&slot_dev->lock, flags);
++
++	for (bus = 0; bus < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++			if (slot_dev->slots[bus][slot] == dev) {
++				slot_dev->slots[bus][slot] = NULL;
++				found_dev = dev;
++				goto out;
++			}
++		}
++
++out:
++	spin_unlock_irqrestore(&slot_dev->lock, flags);
++
++	if (found_dev)
++		pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++	int slot, bus;
++	struct slot_dev_data *slot_dev;
++
++	slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL);
++	if (!slot_dev)
++		return -ENOMEM;
++
++	spin_lock_init(&slot_dev->lock);
++
++	for (bus = 0; bus < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++)
++			slot_dev->slots[bus][slot] = NULL;
++
++	pdev->pci_dev_data = slot_dev;
++
++	return 0;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb publish_cb)
++{
++	/* The Virtual PCI bus has only one root */
++	return publish_cb(pdev, 0, 0);
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++	int slot, bus;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	struct pci_dev *dev;
++
++	for (bus = 0; bus < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++			dev = slot_dev->slots[bus][slot];
++			if (dev != NULL)
++				pcistub_put_pci_dev(dev);
++		}
++
++	kfree(slot_dev);
++	pdev->pci_dev_data = NULL;
++}
++
 +int pciback_get_pcifront_dev(struct pci_dev *pcidev,
 +			     struct pciback_device *pdev,
 +			     unsigned int *domain, unsigned int *bus,
 +			     unsigned int *devfn)
- {
- 	int slot, busnr;
- 	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
-@@ -172,11 +175,12 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev
- 			dev = slot_dev->slots[busnr][slot];
- 			if (dev && dev->bus->number == pcidev->bus->number
- 				&& dev->devfn == pcidev->devfn
--				&& pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)) {
++{
++	int slot, busnr;
++	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
++	struct pci_dev *dev;
++	int found = 0;
++	unsigned long flags;
++
++	spin_lock_irqsave(&slot_dev->lock, flags);
++
++	for (busnr = 0; busnr < PCI_BUS_NBR; bus++)
++		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++			dev = slot_dev->slots[busnr][slot];
++			if (dev && dev->bus->number == pcidev->bus->number
++				&& dev->devfn == pcidev->devfn
 +				&& pci_domain_nr(dev->bus) ==
 +					pci_domain_nr(pcidev->bus)) {
- 				found = 1;
- 				*domain = 0;
- 				*bus = busnr;
--				*devfn = PCI_DEVFN(slot,0);
++				found = 1;
++				*domain = 0;
++				*bus = busnr;
 +				*devfn = PCI_DEVFN(slot, 0);
- 				goto out;
- 			}
- 		}
++				goto out;
++			}
++		}
++out:
++	spin_unlock_irqrestore(&slot_dev->lock, flags);
++	return found;
++
++}
 diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
-index a5b7ece..721b81b 100644
---- a/drivers/xen/pciback/vpci.c
+new file mode 100644
+index 0000000..2857ab8
+--- /dev/null
 +++ b/drivers/xen/pciback/vpci.c
-@@ -125,14 +125,14 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
- 	xenbus_dev_fatal(pdev->xdev, err,
- 			 "No more space on root virtual PCI bus");
- 
--      unlock:
+@@ -0,0 +1,244 @@
++/*
++ * PCI Backend - Provides a Virtual PCI bus (with real devices)
++ *               to the frontend
++ *
++ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++
++#include <linux/list.h>
++#include <linux/slab.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include "pciback.h"
++
++#define PCI_SLOT_MAX 32
++
++struct vpci_dev_data {
++	/* Access to dev_list must be protected by lock */
++	struct list_head dev_list[PCI_SLOT_MAX];
++	spinlock_t lock;
++};
++
++static inline struct list_head *list_first(struct list_head *head)
++{
++	return head->next;
++}
++
++struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus,
++				    unsigned int devfn)
++{
++	struct pci_dev_entry *entry;
++	struct pci_dev *dev = NULL;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	unsigned long flags;
++
++	if (domain != 0 || bus != 0)
++		return NULL;
++
++	if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
++		spin_lock_irqsave(&vpci_dev->lock, flags);
++
++		list_for_each_entry(entry,
++				    &vpci_dev->dev_list[PCI_SLOT(devfn)],
++				    list) {
++			if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
++				dev = entry->dev;
++				break;
++			}
++		}
++
++		spin_unlock_irqrestore(&vpci_dev->lock, flags);
++	}
++	return dev;
++}
++
++static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
++{
++	if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
++	    && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
++		return 1;
++
++	return 0;
++}
++
++int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
++			int devid, publish_pci_dev_cb publish_cb)
++{
++	int err = 0, slot, func = -1;
++	struct pci_dev_entry *t, *dev_entry;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	unsigned long flags;
++
++	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
++		err = -EFAULT;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Can't export bridges on the virtual PCI bus");
++		goto out;
++	}
++
++	dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
++	if (!dev_entry) {
++		err = -ENOMEM;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error adding entry to virtual PCI bus");
++		goto out;
++	}
++
++	dev_entry->dev = dev;
++
++	spin_lock_irqsave(&vpci_dev->lock, flags);
++
++	/* Keep multi-function devices together on the virtual PCI bus */
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		if (!list_empty(&vpci_dev->dev_list[slot])) {
++			t = list_entry(list_first(&vpci_dev->dev_list[slot]),
++				       struct pci_dev_entry, list);
++
++			if (match_slot(dev, t->dev)) {
++				pr_info("pciback: vpci: %s: "
++					"assign to virtual slot %d func %d\n",
++					pci_name(dev), slot,
++					PCI_FUNC(dev->devfn));
++				list_add_tail(&dev_entry->list,
++					      &vpci_dev->dev_list[slot]);
++				func = PCI_FUNC(dev->devfn);
++				goto unlock;
++			}
++		}
++	}
++
++	/* Assign to a new slot on the virtual PCI bus */
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		if (list_empty(&vpci_dev->dev_list[slot])) {
++			printk(KERN_INFO
++			       "pciback: vpci: %s: assign to virtual slot %d\n",
++			       pci_name(dev), slot);
++			list_add_tail(&dev_entry->list,
++				      &vpci_dev->dev_list[slot]);
++			func = PCI_FUNC(dev->devfn);
++			goto unlock;
++		}
++	}
++
++	err = -ENOMEM;
++	xenbus_dev_fatal(pdev->xdev, err,
++			 "No more space on root virtual PCI bus");
++
 +unlock:
- 	spin_unlock_irqrestore(&vpci_dev->lock, flags);
- 
- 	/* Publish this device. */
--	if(!err)
++	spin_unlock_irqrestore(&vpci_dev->lock, flags);
++
++	/* Publish this device. */
 +	if (!err)
- 		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
- 
--      out:
++		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
++
 +out:
- 	return err;
- }
- 
-@@ -158,7 +158,7 @@ void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
- 		}
- 	}
- 
--      out:
++	return err;
++}
++
++void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
++{
++	int slot;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	struct pci_dev *found_dev = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&vpci_dev->lock, flags);
++
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		struct pci_dev_entry *e, *tmp;
++		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
++					 list) {
++			if (e->dev == dev) {
++				list_del(&e->list);
++				found_dev = e->dev;
++				kfree(e);
++				goto out;
++			}
++		}
++	}
++
 +out:
- 	spin_unlock_irqrestore(&vpci_dev->lock, flags);
- 
- 	if (found_dev)
-@@ -176,9 +176,8 @@ int pciback_init_devices(struct pciback_device *pdev)
- 
- 	spin_lock_init(&vpci_dev->lock);
- 
--	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++	spin_unlock_irqrestore(&vpci_dev->lock, flags);
++
++	if (found_dev)
++		pcistub_put_pci_dev(found_dev);
++}
++
++int pciback_init_devices(struct pciback_device *pdev)
++{
++	int slot;
++	struct vpci_dev_data *vpci_dev;
++
++	vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
++	if (!vpci_dev)
++		return -ENOMEM;
++
++	spin_lock_init(&vpci_dev->lock);
++
 +	for (slot = 0; slot < PCI_SLOT_MAX; slot++)
- 		INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
--	}
- 
- 	pdev->pci_dev_data = vpci_dev;
- 
-@@ -211,8 +210,10 @@ void pciback_release_devices(struct pciback_device *pdev)
- 	pdev->pci_dev_data = NULL;
- }
- 
--int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev, 
--		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
++		INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
++
++	pdev->pci_dev_data = vpci_dev;
++
++	return 0;
++}
++
++int pciback_publish_pci_roots(struct pciback_device *pdev,
++			      publish_pci_root_cb publish_cb)
++{
++	/* The Virtual PCI bus has only one root */
++	return publish_cb(pdev, 0, 0);
++}
++
++void pciback_release_devices(struct pciback_device *pdev)
++{
++	int slot;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		struct pci_dev_entry *e, *tmp;
++		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
++					 list) {
++			list_del(&e->list);
++			pcistub_put_pci_dev(e->dev);
++			kfree(e);
++		}
++	}
++
++	kfree(vpci_dev);
++	pdev->pci_dev_data = NULL;
++}
++
 +int pciback_get_pcifront_dev(struct pci_dev *pcidev,
 +			     struct pciback_device *pdev,
 +			     unsigned int *domain, unsigned int *bus,
 +			     unsigned int *devfn)
- {
- 	struct pci_dev_entry *entry;
- 	struct pci_dev *dev = NULL;
-@@ -227,15 +228,16 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev
- 			    list) {
- 			dev = entry->dev;
- 			if (dev && dev->bus->number == pcidev->bus->number
--				&& pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)
--				&& dev->devfn == pcidev->devfn)
--			{
++{
++	struct pci_dev_entry *entry;
++	struct pci_dev *dev = NULL;
++	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
++	unsigned long flags;
++	int found = 0, slot;
++
++	spin_lock_irqsave(&vpci_dev->lock, flags);
++	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
++		list_for_each_entry(entry,
++			    &vpci_dev->dev_list[slot],
++			    list) {
++			dev = entry->dev;
++			if (dev && dev->bus->number == pcidev->bus->number
 +				&& pci_domain_nr(dev->bus) ==
 +					pci_domain_nr(pcidev->bus)
 +				&& dev->devfn == pcidev->devfn) {
- 				found = 1;
- 				*domain = 0;
- 				*bus = 0;
--				*devfn = PCI_DEVFN(slot, PCI_FUNC(pcidev->devfn));
++				found = 1;
++				*domain = 0;
++				*bus = 0;
 +				*devfn = PCI_DEVFN(slot,
 +					 PCI_FUNC(pcidev->devfn));
- 			}
--		}		
++			}
 +		}
- 	}
- 	spin_unlock_irqrestore(&vpci_dev->lock, flags);
- 	return found;
++	}
++	spin_unlock_irqrestore(&vpci_dev->lock, flags);
++	return found;
++}
 diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
-index a85c413..efec585 100644
---- a/drivers/xen/pciback/xenbus.c
+new file mode 100644
+index 0000000..70030c4
+--- /dev/null
 +++ b/drivers/xen/pciback/xenbus.c
-@@ -40,7 +40,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
- 		kfree(pdev);
- 		pdev = NULL;
- 	}
--      out:
+@@ -0,0 +1,726 @@
++/*
++ * PCI Backend Xenbus Setup - handles setup with frontend and xend
++ *
++ *   Author: Ryan Wilson <hap9 at epoch.ncsc.mil>
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/vmalloc.h>
++#include <linux/workqueue.h>
++#include <xen/xenbus.h>
++#include <xen/events.h>
++#include <asm/xen/pci.h>
++#include <linux/workqueue.h>
++#include "pciback.h"
++
++#define INVALID_EVTCHN_IRQ  (-1)
++struct workqueue_struct *pciback_wq;
++
++static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
++{
++	struct pciback_device *pdev;
++
++	pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
++	if (pdev == NULL)
++		goto out;
++	dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
++
++	pdev->xdev = xdev;
++	dev_set_drvdata(&xdev->dev, pdev);
++
++	spin_lock_init(&pdev->dev_lock);
++
++	pdev->sh_info = NULL;
++	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
++	pdev->be_watching = 0;
++
++	INIT_WORK(&pdev->op_work, pciback_do_op);
++
++	if (pciback_init_devices(pdev)) {
++		kfree(pdev);
++		pdev = NULL;
++	}
 +out:
- 	return pdev;
- }
- 
-@@ -111,7 +111,7 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
- 	err = 0;
- 
- 	dev_dbg(&pdev->xdev->dev, "Attached!\n");
--      out:
++	return pdev;
++}
++
++static void pciback_disconnect(struct pciback_device *pdev)
++{
++	spin_lock(&pdev->dev_lock);
++
++	/* Ensure the guest can't trigger our handler before removing devices */
++	if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
++		unbind_from_irqhandler(pdev->evtchn_irq, pdev);
++		pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
++	}
++	spin_unlock(&pdev->dev_lock);
++
++	/* If the driver domain started an op, make sure we complete it
++	 * before releasing the shared memory */
++
++	/* Note, the workqueue does not use spinlocks at all.*/
++	flush_workqueue(pciback_wq);
++
++	spin_lock(&pdev->dev_lock);
++	if (pdev->sh_info != NULL) {
++		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
++		pdev->sh_info = NULL;
++	}
++	spin_unlock(&pdev->dev_lock);
++
++}
++
++static void free_pdev(struct pciback_device *pdev)
++{
++	if (pdev->be_watching) {
++		unregister_xenbus_watch(&pdev->be_watch);
++		pdev->be_watching = 0;
++	}
++
++	pciback_disconnect(pdev);
++
++	pciback_release_devices(pdev);
++
++	dev_set_drvdata(&pdev->xdev->dev, NULL);
++	pdev->xdev = NULL;
++
++	kfree(pdev);
++}
++
++static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
++			     int remote_evtchn)
++{
++	int err = 0;
++	void *vaddr;
++
++	dev_dbg(&pdev->xdev->dev,
++		"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
++		gnt_ref, remote_evtchn);
++
++	err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
++	if (err < 0) {
++		xenbus_dev_fatal(pdev->xdev, err,
++				"Error mapping other domain page in ours.");
++		goto out;
++	}
++
++	spin_lock(&pdev->dev_lock);
++	pdev->sh_info = vaddr;
++	spin_unlock(&pdev->dev_lock);
++
++	err = bind_interdomain_evtchn_to_irqhandler(
++		pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
++		0, "pciback", pdev);
++	if (err < 0) {
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error binding event channel to IRQ");
++		goto out;
++	}
++
++	spin_lock(&pdev->dev_lock);
++	pdev->evtchn_irq = err;
++	spin_unlock(&pdev->dev_lock);
++	err = 0;
++
++	dev_dbg(&pdev->xdev->dev, "Attached!\n");
 +out:
- 	return err;
- }
- 
-@@ -166,11 +166,10 @@ static int pciback_attach(struct pciback_device *pdev)
- 				 "Error switching to connected state!");
- 
- 	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
--      out:
++	return err;
++}
++
++static int pciback_attach(struct pciback_device *pdev)
++{
++	int err = 0;
++	int gnt_ref, remote_evtchn;
++	char *magic = NULL;
++
++
++	/* Make sure we only do this setup once */
++	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
++	    XenbusStateInitialised)
++		goto out;
++
++	/* Wait for frontend to state that it has published the configuration */
++	if (xenbus_read_driver_state(pdev->xdev->otherend) !=
++	    XenbusStateInitialised)
++		goto out;
++
++	dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
++
++	err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
++			    "pci-op-ref", "%u", &gnt_ref,
++			    "event-channel", "%u", &remote_evtchn,
++			    "magic", NULL, &magic, NULL);
++	if (err) {
++		/* If configuration didn't get read correctly, wait longer */
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error reading configuration from frontend");
++		goto out;
++	}
++
++	if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
++		xenbus_dev_fatal(pdev->xdev, -EFAULT,
++				 "version mismatch (%s/%s) with pcifront - "
++				 "halting pciback",
++				 magic, XEN_PCI_MAGIC);
++		goto out;
++	}
++
++	err = pciback_do_attach(pdev, gnt_ref, remote_evtchn);
++	if (err)
++		goto out;
++
++	dev_dbg(&pdev->xdev->dev, "Connecting...\n");
++
++	err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
++	if (err)
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error switching to connected state!");
++
++	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
 +out:
- 	spin_unlock(&pdev->dev_lock);
- 
--	if (magic)
--		kfree(magic);
++
 +	kfree(magic);
- 
- 	return err;
- }
-@@ -193,7 +192,7 @@ static int pciback_publish_pci_dev(struct pciback_device *pdev,
- 			    "%04x:%02x:%02x.%02x", domain, bus,
- 			    PCI_SLOT(devfn), PCI_FUNC(devfn));
- 
--      out:
-+out:
- 	return err;
- }
- 
-@@ -230,7 +229,7 @@ static int pciback_export_device(struct pciback_device *pdev,
- 	 * to other driver domains (as he who controls the bridge can disable
- 	 * it and stop the other devices from working).
- 	 */
--      out:
-+out:
- 	return err;
- }
- 
-@@ -253,8 +252,8 @@ static int pciback_remove_device(struct pciback_device *pdev,
- 	}
- 
- 	pciback_release_pci_dev(pdev, dev);
--	
--      out:
 +
-+out:
- 	return err;
- }
- 
-@@ -314,7 +313,7 @@ static int pciback_publish_pci_root(struct pciback_device *pdev,
- 	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
- 			    "root_num", "%d", (root_num + 1));
- 
--      out:
-+out:
- 	return err;
- }
- 
-@@ -358,7 +357,7 @@ static int pciback_reconfigure(struct pciback_device *pdev)
- 		}
- 		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
- 				   "%d", &substate);
--		if (err != 1) 
-+		if (err != 1)
- 			substate = XenbusStateUnknown;
- 
- 		switch (substate) {
-@@ -389,14 +388,15 @@ static int pciback_reconfigure(struct pciback_device *pdev)
- 						 "configuration");
- 				goto out;
- 			}
--	
++	return err;
++}
 +
- 			err = pciback_export_device(pdev, domain, bus, slot,
- 						    func, i);
- 			if (err)
- 				goto out;
- 
- 			/* Publish pci roots. */
--			err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
-+			err = pciback_publish_pci_roots(pdev,
-+						pciback_publish_pci_root);
- 			if (err) {
- 				xenbus_dev_fatal(pdev->xdev, err,
- 						 "Error while publish PCI root"
-@@ -412,7 +412,7 @@ static int pciback_reconfigure(struct pciback_device *pdev)
- 						 "Error switching substate of "
- 						 "dev-%d\n", i);
- 				goto out;
--			}	
-+			}
- 			break;
- 
- 		case XenbusStateClosing:
-@@ -445,7 +445,7 @@ static int pciback_reconfigure(struct pciback_device *pdev)
- 
- 			err = pciback_remove_device(pdev, domain, bus, slot,
- 						    func);
--			if(err)
-+			if (err)
- 				goto out;
- 
- 			/* TODO: If at some point we implement support for pci
-@@ -466,8 +466,8 @@ static int pciback_reconfigure(struct pciback_device *pdev)
- 				 "Error switching to reconfigured state!");
- 		goto out;
- 	}
--	
--      out:
++static int pciback_publish_pci_dev(struct pciback_device *pdev,
++				   unsigned int domain, unsigned int bus,
++				   unsigned int devfn, unsigned int devid)
++{
++	int err;
++	int len;
++	char str[64];
++
++	len = snprintf(str, sizeof(str), "vdev-%d", devid);
++	if (unlikely(len >= (sizeof(str) - 1))) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++			    "%04x:%02x:%02x.%02x", domain, bus,
++			    PCI_SLOT(devfn), PCI_FUNC(devfn));
 +
 +out:
- 	spin_unlock(&pdev->dev_lock);
- 
- 	return 0;
-@@ -591,7 +591,7 @@ static int pciback_setup_backend(struct pciback_device *pdev)
- 			xenbus_dev_fatal(pdev->xdev, err, "Error switching "
- 					 "substate of dev-%d\n", i);
- 			goto out;
--		}	
-+		}
- 	}
- 
- 	err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
-@@ -607,7 +607,7 @@ static int pciback_setup_backend(struct pciback_device *pdev)
- 		xenbus_dev_fatal(pdev->xdev, err,
- 				 "Error switching to initialised state!");
- 
--      out:
-+out:
- 	spin_unlock(&pdev->dev_lock);
- 
- 	if (!err)
-@@ -663,7 +663,7 @@ static int pciback_xenbus_probe(struct xenbus_device *dev,
- 	 */
- 	pciback_be_watch(&pdev->be_watch, NULL, 0);
- 
--      out:
-+out:
- 	return err;
- }
- 
-@@ -679,7 +679,7 @@ static int pciback_xenbus_remove(struct xenbus_device *dev)
- 
- static const struct xenbus_device_id xenpci_ids[] = {
- 	{"pci"},
--	{{0}},
-+	{""},
- };
- 
- static struct xenbus_driver xenbus_pciback_driver = {
--- 
-1.7.4
-
-
-From dbcfc1c122172cae29736c95cb87979b654965cb Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Mon, 8 Mar 2010 18:39:15 -0500
-Subject: [PATCH 179/203] xen-pciback: remove driver_data direct access to struct device
-
-The driver core is going to not allow direct access to the
-driver_data pointer in struct device.  Instead, the functions
-dev_get_drvdata() and dev_set_drvdata() should be used.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/xenbus.c |    8 ++++----
- 1 files changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
-index efec585..af6c25a 100644
---- a/drivers/xen/pciback/xenbus.c
-+++ b/drivers/xen/pciback/xenbus.c
-@@ -26,7 +26,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
- 	dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
- 
- 	pdev->xdev = xdev;
--	xdev->dev.driver_data = pdev;
-+	dev_set_drvdata(&xdev->dev, pdev);
- 
- 	spin_lock_init(&pdev->dev_lock);
- 
-@@ -75,7 +75,7 @@ static void free_pdev(struct pciback_device *pdev)
- 
- 	pciback_release_devices(pdev);
- 
--	pdev->xdev->dev.driver_data = NULL;
-+	dev_set_drvdata(&pdev->xdev->dev, NULL);
- 	pdev->xdev = NULL;
- 
- 	kfree(pdev);
-@@ -476,7 +476,7 @@ out:
- static void pciback_frontend_changed(struct xenbus_device *xdev,
- 				     enum xenbus_state fe_state)
- {
--	struct pciback_device *pdev = xdev->dev.driver_data;
-+	struct pciback_device *pdev = dev_get_drvdata(&xdev->dev);
- 
- 	dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
- 
-@@ -669,7 +669,7 @@ out:
- 
- static int pciback_xenbus_remove(struct xenbus_device *dev)
- {
--	struct pciback_device *pdev = dev->dev.driver_data;
-+	struct pciback_device *pdev = dev_get_drvdata(&dev->dev);
- 
- 	if (pdev != NULL)
- 		free_pdev(pdev);
--- 
-1.7.4
-
-
-From 47ec0e3343f9a7cc8944effc293960add935e196 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Mon, 8 Mar 2010 18:47:55 -0500
-Subject: [PATCH 180/203] xen-pciback: Fix compile error: 'TASK_NORMAL' undeclared.
-
-Both files were missing the #include <linux/sched.h>
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/pci_stub.c    |    1 +
- drivers/xen/pciback/pciback_ops.c |    1 +
- 2 files changed, 2 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index 28222ee..6fc0b6e 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -12,6 +12,7 @@
- #include <linux/kref.h>
- #include <linux/pci.h>
- #include <linux/wait.h>
-+#include <linux/sched.h>
- #include <asm/atomic.h>
- #include <xen/events.h>
- #include <asm/xen/pci.h>
-diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
-index bf83dca..2b9a93e 100644
---- a/drivers/xen/pciback/pciback_ops.c
-+++ b/drivers/xen/pciback/pciback_ops.c
-@@ -7,6 +7,7 @@
- #include <linux/wait.h>
- #include <linux/bitops.h>
- #include <xen/events.h>
-+#include <linux/sched.h>
- #include "pciback.h"
- 
- int verbose_request;
--- 
-1.7.4
-
-
-From 139a71e1ede3d7ac69f05bec42965d5b00aed03b Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 9 Dec 2009 17:43:16 -0500
-Subject: [PATCH 181/203] xen-pciback: Remove the vestiges of CONFIG_PCI_GUESTDEV.
-
-The same functionality for this (that used to be called
-pci_is_guestdev) is now via: "pci=resource_alignment="
-command line argument.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/pci_stub.c |   10 ----------
- 1 files changed, 0 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index 6fc0b6e..d30aa7c 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -430,16 +430,6 @@ static int __devinit pcistub_probe(struct pci_dev *dev,
- 
- 		dev_info(&dev->dev, "seizing device\n");
- 		err = pcistub_seize(dev);
--#ifdef CONFIG_PCI_GUESTDEV
--	} else if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
--		if (!pci_is_guestdev(dev)) {
--			err = -ENODEV;
--			goto out;
--		}
--
--		dev_info(&dev->dev, "seizing device\n");
--		err = pcistub_seize(dev);
--#endif /* CONFIG_PCI_GUESTDEV */
- 	} else
- 		/* Didn't find the device */
- 		err = -ENODEV;
--- 
-1.7.4
-
-
-From 4cc738bdf28898d008f6a34dcbcdbb8b05ee4247 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 9 Dec 2009 17:43:17 -0500
-Subject: [PATCH 182/203] xen-pciback: Remove deprecated routine to find domain owner of PCI device.
-
-In linux-2.6.18.hg tree the mechanism to find the domain owner was
-for the MSI driver (msi-xen.c) to call in this function to retrieve
-the domain number. This is not the way anymore.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/pci_stub.c |   19 -------------------
- 1 files changed, 0 insertions(+), 19 deletions(-)
-
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index d30aa7c..30e7b59 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -1157,22 +1157,6 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf)
- 
- DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
- 
--#ifdef CONFIG_PCI_MSI
--
--int pciback_get_owner(struct pci_dev *dev)
--{
--	struct pcistub_device *psdev;
--
--	psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number,
--			PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
--
--	if (!psdev || !psdev->pdev)
--		return -1;
--
--	return psdev->pdev->xdev->otherend_id;
--}
--#endif
--
- static void pcistub_exit(void)
- {
- 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
-@@ -1183,7 +1167,6 @@ static void pcistub_exit(void)
- 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
- 
- 	pci_unregister_driver(&pciback_pci_driver);
--	WARN_ON(unregister_msi_get_owner(pciback_get_owner));
- }
- 
- static int __init pcistub_init(void)
-@@ -1241,8 +1224,6 @@ static int __init pcistub_init(void)
- 		err = driver_create_file(&pciback_pci_driver.driver,
- 					 &driver_attr_permissive);
- 
--	if (!err)
--		err = register_msi_get_owner(pciback_get_owner);
- 	if (err)
- 		pcistub_exit();
- 
--- 
-1.7.4
-
-
-From 0617b5c7a8378042b65fa112819e9e12b44c5223 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 22 Dec 2009 13:53:41 -0500
-Subject: [PATCH 183/203] xen-pciback: Fix compiler warning in pci_stub.c.
-
-warning: the frame size of 1036 bytes is larger than 1024 bytes
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/pci_stub.c |    5 +++--
- 1 files changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index 30e7b59..0b5a16b 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -496,16 +496,17 @@ static const struct pci_device_id pcistub_ids[] = {
- 	{0,},
- };
- 
-+#define PCI_NODENAME_MAX 40
- static void kill_domain_by_device(struct pcistub_device *psdev)
- {
- 	struct xenbus_transaction xbt;
- 	int err;
--	char nodename[1024];
-+	char nodename[PCI_NODENAME_MAX];
- 
- 	if (!psdev)
- 		dev_err(&psdev->dev->dev,
- 			"device is NULL when do AER recovery/kill_domain\n");
--	sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
-+	snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
- 		psdev->pdev->xdev->otherend_id);
- 	nodename[strlen(nodename)] = '\0';
- 
--- 
-1.7.4
-
-
-From 33b0a6e48e5f8fb218908648991fcc7377db7519 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 22 Dec 2009 13:53:42 -0500
-Subject: [PATCH 184/203] xen-pciback: Fix compile warning in vpci.c
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-warning: ‘func’ may be used uninitialized in this function
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/vpci.c |    2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
-index 721b81b..2857ab8 100644
---- a/drivers/xen/pciback/vpci.c
-+++ b/drivers/xen/pciback/vpci.c
-@@ -65,7 +65,7 @@ static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
- int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
- 			int devid, publish_pci_dev_cb publish_cb)
- {
--	int err = 0, slot, func;
-+	int err = 0, slot, func = -1;
- 	struct pci_dev_entry *t, *dev_entry;
- 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
- 	unsigned long flags;
--- 
-1.7.4
-
-
-From ea765da03054c02c4847489086aaf3d2dd914cf4 Mon Sep 17 00:00:00 2001
-From: Ian Campbell <ijc at hellion.org.uk>
-Date: Thu, 3 Dec 2009 21:56:20 +0000
-Subject: [PATCH 185/203] xen: rename pciback module to xen-pciback.
-
-pciback is rather generic for a modular distro style kernel.
-
-Signed-off-by: Ian Campbell <ian.campbell at citrix.com>
-Cc: Jeremy Fitzhardinge <jeremy at goop.org>
-Cc: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/Makefile |   24 ++++++++++++------------
- 1 files changed, 12 insertions(+), 12 deletions(-)
-
-diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile
-index 106dae7..38bc123 100644
---- a/drivers/xen/pciback/Makefile
-+++ b/drivers/xen/pciback/Makefile
-@@ -1,16 +1,16 @@
--obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
-+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
- 
--pciback-y := pci_stub.o pciback_ops.o xenbus.o
--pciback-y += conf_space.o conf_space_header.o \
--	     conf_space_capability.o \
--	     conf_space_capability_vpd.o \
--	     conf_space_capability_pm.o \
--             conf_space_quirks.o
--pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
--pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
--pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
--pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
--pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
-+xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
-+xen-pciback-y += conf_space.o conf_space_header.o \
-+		 conf_space_capability.o \
-+		 conf_space_capability_vpd.o \
-+		 conf_space_capability_pm.o \
-+		 conf_space_quirks.o
-+xen-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
-+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
-+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
-+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
-+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
- 
- ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
- EXTRA_CFLAGS += -DDEBUG
--- 
-1.7.4
-
-
-From cb524fe17d83d64d91f8ed52532c9dca49ba7778 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 9 Dec 2009 17:43:15 -0500
-Subject: [PATCH 186/203] xen-pciback: Register the owner (domain) of the PCI device.
-
-When the front-end and back-end start negotiating we register
-the domain that will use the PCI device. Furthermore during shutdown
-of guest or unbinding of the PCI device (and unloading of module)
-from pciback we unregister the domain owner.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
----
- drivers/xen/pciback/pci_stub.c |    2 ++
- drivers/xen/pciback/xenbus.c   |   13 +++++++++++++
- 2 files changed, 15 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index 0b5a16b..02178e2 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -90,6 +90,8 @@ static void pcistub_device_release(struct kref *kref)
- 
- 	dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
- 
-+	xen_unregister_device_domain_owner(psdev->dev);
++	return err;
++}
++
++static int pciback_export_device(struct pciback_device *pdev,
++				 int domain, int bus, int slot, int func,
++				 int devid)
++{
++	struct pci_dev *dev;
++	int err = 0;
++
++	dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
++		domain, bus, slot, func);
++
++	dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
++	if (!dev) {
++		err = -EINVAL;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Couldn't locate PCI device "
++				 "(%04x:%02x:%02x.%01x)! "
++				 "perhaps already in-use?",
++				 domain, bus, slot, func);
++		goto out;
++	}
++
++	err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev);
++	if (err)
++		goto out;
 +
- 	/* Clean-up the device */
- 	pciback_reset_device(psdev->dev);
- 	pciback_config_free_dyn_fields(psdev->dev);
-diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
-index af6c25a..d448bf5 100644
---- a/drivers/xen/pciback/xenbus.c
-+++ b/drivers/xen/pciback/xenbus.c
-@@ -10,6 +10,7 @@
- #include <linux/workqueue.h>
- #include <xen/xenbus.h>
- #include <xen/events.h>
-+#include <asm/xen/pci.h>
- #include <linux/workqueue.h>
- #include "pciback.h"
- 
-@@ -221,6 +222,15 @@ static int pciback_export_device(struct pciback_device *pdev,
- 	if (err)
- 		goto out;
- 
 +	dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
 +	if (xen_register_device_domain_owner(dev,
 +					     pdev->xdev->otherend_id) != 0) {
@@ -39403,1621 +14083,1202 @@ index af6c25a..d448bf5 100644
 +		xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
 +	}
 +
- 	/* TODO: It'd be nice to export a bridge and have all of its children
- 	 * get exported with it. This may be best done in xend (which will
- 	 * have to calculate resource usage anyway) but we probably want to
-@@ -251,6 +261,9 @@ static int pciback_remove_device(struct pciback_device *pdev,
- 		goto out;
- 	}
- 
++	/* TODO: It'd be nice to export a bridge and have all of its children
++	 * get exported with it. This may be best done in xend (which will
++	 * have to calculate resource usage anyway) but we probably want to
++	 * put something in here to ensure that if a bridge gets given to a
++	 * driver domain, that all devices under that bridge are not given
++	 * to other driver domains (as he who controls the bridge can disable
++	 * it and stop the other devices from working).
++	 */
++out:
++	return err;
++}
++
++static int pciback_remove_device(struct pciback_device *pdev,
++				 int domain, int bus, int slot, int func)
++{
++	int err = 0;
++	struct pci_dev *dev;
++
++	dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
++		domain, bus, slot, func);
++
++	dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
++	if (!dev) {
++		err = -EINVAL;
++		dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
++			"(%04x:%02x:%02x.%01x)! not owned by this domain\n",
++			domain, bus, slot, func);
++		goto out;
++	}
++
 +	dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
 +	xen_unregister_device_domain_owner(dev);
 +
- 	pciback_release_pci_dev(pdev, dev);
- 
- out:
--- 
-1.7.4
-
-
-From b9d20e9c2627385fc0b39fa8bc45655a656bb71e Mon Sep 17 00:00:00 2001
-From: Zhao, Yu <yu.zhao at intel.com>
-Date: Wed, 3 Mar 2010 13:27:55 -0500
-Subject: [PATCH 187/203] xen-pciback: guest SR-IOV support for PV guest
-
-These changes are for PV guest to use Virtual Function. Because the VF's
-vendor, device registers in cfg space are 0xffff, which are invalid and
-ignored by PCI device scan. Values in 'struct pci_dev' are fixed up by
-SR-IOV code, and using these values will present correct VID and DID to
-PV guest kernel.
-
-And command registers in the cfg space are read only 0, which means we
-have to emulate MMIO enable bit (VF only uses MMIO  resource) so PV
-kernel can work properly.
-
-Acked-by: jbeulich at novell.com
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/conf_space_header.c |   71 ++++++++++++++++++++++++++++--
- 1 files changed, 66 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
-index 3ae7da1..1f4f86e 100644
---- a/drivers/xen/pciback/conf_space_header.c
-+++ b/drivers/xen/pciback/conf_space_header.c
-@@ -18,6 +18,25 @@ struct pci_bar_info {
- #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
- #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
- 
-+static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
++	pciback_release_pci_dev(pdev, dev);
++
++out:
++	return err;
++}
++
++static int pciback_publish_pci_root(struct pciback_device *pdev,
++				    unsigned int domain, unsigned int bus)
 +{
-+	int i;
-+	int ret;
++	unsigned int d, b;
++	int i, root_num, len, err;
++	char str[64];
 +
-+	ret = pciback_read_config_word(dev, offset, value, data);
-+	if (!atomic_read(&dev->enable_cnt))
-+		return ret;
++	dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
 +
-+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
-+		if (dev->resource[i].flags & IORESOURCE_IO)
-+			*value |= PCI_COMMAND_IO;
-+		if (dev->resource[i].flags & IORESOURCE_MEM)
-+			*value |= PCI_COMMAND_MEMORY;
++	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++			   "root_num", "%d", &root_num);
++	if (err == 0 || err == -ENOENT)
++		root_num = 0;
++	else if (err < 0)
++		goto out;
++
++	/* Verify that we haven't already published this pci root */
++	for (i = 0; i < root_num; i++) {
++		len = snprintf(str, sizeof(str), "root-%d", i);
++		if (unlikely(len >= (sizeof(str) - 1))) {
++			err = -ENOMEM;
++			goto out;
++		}
++
++		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++				   str, "%x:%x", &d, &b);
++		if (err < 0)
++			goto out;
++		if (err != 2) {
++			err = -EINVAL;
++			goto out;
++		}
++
++		if (d == domain && b == bus) {
++			err = 0;
++			goto out;
++		}
 +	}
 +
-+	return ret;
++	len = snprintf(str, sizeof(str), "root-%d", root_num);
++	if (unlikely(len >= (sizeof(str) - 1))) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
++		root_num, domain, bus);
++
++	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
++			    "%04x:%02x", domain, bus);
++	if (err)
++		goto out;
++
++	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
++			    "root_num", "%d", (root_num + 1));
++
++out:
++	return err;
 +}
 +
- static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
- {
- 	int err;
-@@ -142,10 +161,26 @@ static inline void read_dev_bar(struct pci_dev *dev,
- 				struct pci_bar_info *bar_info, int offset,
- 				u32 len_mask)
- {
--	pci_read_config_dword(dev, offset, &bar_info->val);
--	pci_write_config_dword(dev, offset, len_mask);
--	pci_read_config_dword(dev, offset, &bar_info->len_val);
--	pci_write_config_dword(dev, offset, bar_info->val);
-+	int	pos;
-+	struct resource	*res = dev->resource;
++static int pciback_reconfigure(struct pciback_device *pdev)
++{
++	int err = 0;
++	int num_devs;
++	int domain, bus, slot, func;
++	int substate;
++	int i, len;
++	char state_str[64];
++	char dev_str[64];
 +
-+	if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
-+		pos = PCI_ROM_RESOURCE;
-+	else {
-+		pos = (offset - PCI_BASE_ADDRESS_0) / 4;
-+		if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
-+				PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
-+			   (PCI_BASE_ADDRESS_SPACE_MEMORY |
-+				PCI_BASE_ADDRESS_MEM_TYPE_64))) {
-+			bar_info->val = res[pos - 1].start >> 32;
-+			bar_info->len_val = res[pos - 1].end >> 32;
-+			return;
++
++	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
++
++	/* Make sure we only reconfigure once */
++	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
++	    XenbusStateReconfiguring)
++		goto out;
++
++	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
++			   &num_devs);
++	if (err != 1) {
++		if (err >= 0)
++			err = -EINVAL;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error reading number of devices");
++		goto out;
++	}
++
++	for (i = 0; i < num_devs; i++) {
++		len = snprintf(state_str, sizeof(state_str), "state-%d", i);
++		if (unlikely(len >= (sizeof(state_str) - 1))) {
++			err = -ENOMEM;
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "String overflow while reading "
++					 "configuration");
++			goto out;
++		}
++		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
++				   "%d", &substate);
++		if (err != 1)
++			substate = XenbusStateUnknown;
++
++		switch (substate) {
++		case XenbusStateInitialising:
++			dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
++
++			len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
++			if (unlikely(len >= (sizeof(dev_str) - 1))) {
++				err = -ENOMEM;
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "String overflow while "
++						 "reading configuration");
++				goto out;
++			}
++			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++					   dev_str, "%x:%x:%x.%x",
++					   &domain, &bus, &slot, &func);
++			if (err < 0) {
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "Error reading device "
++						 "configuration");
++				goto out;
++			}
++			if (err != 4) {
++				err = -EINVAL;
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "Error parsing pci device "
++						 "configuration");
++				goto out;
++			}
++
++			err = pciback_export_device(pdev, domain, bus, slot,
++						    func, i);
++			if (err)
++				goto out;
++
++			/* Publish pci roots. */
++			err = pciback_publish_pci_roots(pdev,
++						pciback_publish_pci_root);
++			if (err) {
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "Error while publish PCI root"
++						 "buses for frontend");
++				goto out;
++			}
++
++			err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
++					    state_str, "%d",
++					    XenbusStateInitialised);
++			if (err) {
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "Error switching substate of "
++						 "dev-%d\n", i);
++				goto out;
++			}
++			break;
++
++		case XenbusStateClosing:
++			dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
++
++			len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
++			if (unlikely(len >= (sizeof(dev_str) - 1))) {
++				err = -ENOMEM;
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "String overflow while "
++						 "reading configuration");
++				goto out;
++			}
++			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
++					   dev_str, "%x:%x:%x.%x",
++					   &domain, &bus, &slot, &func);
++			if (err < 0) {
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "Error reading device "
++						 "configuration");
++				goto out;
++			}
++			if (err != 4) {
++				err = -EINVAL;
++				xenbus_dev_fatal(pdev->xdev, err,
++						 "Error parsing pci device "
++						 "configuration");
++				goto out;
++			}
++
++			err = pciback_remove_device(pdev, domain, bus, slot,
++						    func);
++			if (err)
++				goto out;
++
++			/* TODO: If at some point we implement support for pci
++			 * root hot-remove on pcifront side, we'll need to
++			 * remove unnecessary xenstore nodes of pci roots here.
++			 */
++
++			break;
++
++		default:
++			break;
 +		}
 +	}
 +
-+	bar_info->val = res[pos].start |
-+			(res[pos].flags & PCI_REGION_FLAG_MASK);
-+	bar_info->len_val = res[pos].end - res[pos].start + 1;
- }
- 
- static void *bar_init(struct pci_dev *dev, int offset)
-@@ -186,6 +221,22 @@ static void bar_release(struct pci_dev *dev, int offset, void *data)
- 	kfree(data);
- }
- 
-+static int pciback_read_vendor(struct pci_dev *dev, int offset,
-+			       u16 *value, void *data)
-+{
-+	*value = dev->vendor;
++	err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
++	if (err) {
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error switching to reconfigured state!");
++		goto out;
++	}
 +
++out:
 +	return 0;
 +}
 +
-+static int pciback_read_device(struct pci_dev *dev, int offset,
-+			       u16 *value, void *data)
++static void pciback_frontend_changed(struct xenbus_device *xdev,
++				     enum xenbus_state fe_state)
 +{
-+	*value = dev->device;
++	struct pciback_device *pdev = dev_get_drvdata(&xdev->dev);
 +
-+	return 0;
-+}
++	dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
 +
- static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
- 			  void *data)
- {
-@@ -213,9 +264,19 @@ out:
- 
- static const struct config_field header_common[] = {
- 	{
-+	 .offset    = PCI_VENDOR_ID,
-+	 .size      = 2,
-+	 .u.w.read  = pciback_read_vendor,
-+	},
-+	{
-+	 .offset    = PCI_DEVICE_ID,
-+	 .size      = 2,
-+	 .u.w.read  = pciback_read_device,
-+	},
-+	{
- 	 .offset    = PCI_COMMAND,
- 	 .size      = 2,
--	 .u.w.read  = pciback_read_config_word,
-+	 .u.w.read  = command_read,
- 	 .u.w.write = command_write,
- 	},
- 	{
--- 
-1.7.4
-
-
-From 93c8ff641cc167d2f7a73645b79a8e193e4d1f8c Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 3 Mar 2010 13:38:43 -0500
-Subject: [PATCH 188/203] xen-pciback: Disable MSI/MSI-X when reseting device
-
-In cases where the guest is abruptly killed and has not disabled
-MSI/MSI-X interrupts we want to do that.
-
-Otherwise when the guest is started up and enables MSI, we would
-get a WARN() that the device already had been enabled.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/pciback_ops.c |    8 ++++++++
- 1 files changed, 8 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
-index 2b9a93e..011db67 100644
---- a/drivers/xen/pciback/pciback_ops.c
-+++ b/drivers/xen/pciback/pciback_ops.c
-@@ -23,6 +23,14 @@ void pciback_reset_device(struct pci_dev *dev)
- 
- 	/* Disable devices (but not bridges) */
- 	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
-+#ifdef CONFIG_PCI_MSI
-+		/* The guest could have been abruptly killed without
-+		 * disabling MSI/MSI-X interrupts.*/
-+		if (dev->msix_enabled)
-+			pci_disable_msix(dev);
-+		if (dev->msi_enabled)
-+			pci_disable_msi(dev);
-+#endif
- 		pci_disable_device(dev);
- 
- 		pci_write_config_word(dev, PCI_COMMAND, 0);
--- 
-1.7.4
-
-
-From 2a42c52bfca255f6754ad5bd53bb3f893fd6992f Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Mon, 12 Apr 2010 11:46:00 -0400
-Subject: [PATCH 189/203] xen-pciback: Allocate IRQ handler for device that is shared with guest.
-
-If the pciback module is loaded with fake_irq_handler=1 we install
-for all devices that are to be passed to the guest domain a IRQ handler.
-The IRQ handler will return IRQ_HANDLED or IRQ_NONE depending on
-on the ack_intr flag.
-
-The trigger to install this IRQ handler is when the enable_isr flag
-is set.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/pci_stub.c    |   13 ++++-
- drivers/xen/pciback/pciback.h     |   12 ++++-
- drivers/xen/pciback/pciback_ops.c |   95 ++++++++++++++++++++++++++++++++++++-
- 3 files changed, 115 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index 02178e2..45bbe99 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -21,6 +21,8 @@
- #include "conf_space.h"
- #include "conf_space_quirks.h"
- 
-+#define DRV_NAME	"pciback"
++	switch (fe_state) {
++	case XenbusStateInitialised:
++		pciback_attach(pdev);
++		break;
 +
- static char *pci_devs_to_hide;
- wait_queue_head_t aer_wait_queue;
- /*Add sem for sync AER handling and pciback remove/reconfigue ops,
-@@ -290,13 +292,20 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)
- 	 * would need to be called somewhere to free the memory allocated
- 	 * here and then to call kfree(pci_get_drvdata(psdev->dev)).
- 	 */
--	dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
-+	dev_data = kzalloc(sizeof(*dev_data) +  strlen(DRV_NAME "[]")
-+				+ strlen(pci_name(dev)) + 1, GFP_ATOMIC);
- 	if (!dev_data) {
- 		err = -ENOMEM;
- 		goto out;
- 	}
- 	pci_set_drvdata(dev, dev_data);
- 
-+	/*
-+	 * Setup name for fake IRQ handler. It will only be enabled
-+	 * once the device is turned on by the guest.
-+	 */
-+	sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
++	case XenbusStateReconfiguring:
++		pciback_reconfigure(pdev);
++		break;
 +
- 	dev_dbg(&dev->dev, "initializing config\n");
- 
- 	init_waitqueue_head(&aer_wait_queue);
-@@ -837,7 +846,7 @@ static struct pci_error_handlers pciback_error_handler = {
-  */
- 
- static struct pci_driver pciback_pci_driver = {
--	.name = "pciback",
-+	.name = DRV_NAME,
- 	.id_table = pcistub_ids,
- 	.probe = pcistub_probe,
- 	.remove = pcistub_remove,
-diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
-index 98e2912..9d1b0a6 100644
---- a/drivers/xen/pciback/pciback.h
-+++ b/drivers/xen/pciback/pciback.h
-@@ -45,8 +45,13 @@ struct pciback_device {
- 
- struct pciback_dev_data {
- 	struct list_head config_fields;
--	int permissive;
--	int warned_on_write;
-+	unsigned int permissive : 1;
-+	unsigned int warned_on_write : 1;
-+	unsigned int enable_intx : 1;
-+	unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */ 
-+	unsigned int ack_intr : 1; /* .. and ACK-ing */
-+	unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
-+	char irq_name[0]; /* pciback[000:04:00.0] */
- };
- 
- /* Used by XenBus and pciback_ops.c */
-@@ -131,3 +136,6 @@ extern int verbose_request;
- void test_and_schedule_op(struct pciback_device *pdev);
- #endif
- 
-+/* Handles shared IRQs that can to device domain and control domain. */
-+void pciback_irq_handler(struct pci_dev *dev, int reset);
-+irqreturn_t pciback_guest_interrupt(int irq, void *dev_id);
-diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
-index 011db67..cb54893 100644
---- a/drivers/xen/pciback/pciback_ops.c
-+++ b/drivers/xen/pciback/pciback_ops.c
-@@ -13,6 +13,78 @@
- int verbose_request;
- module_param(verbose_request, int, 0644);
- 
-+/* Ensure a device is has the fake IRQ handler "turned on/off" and is
-+ * ready to be exported. This MUST be run after pciback_reset_device
-+ * which does the actual PCI device enable/disable.
-+ */
-+void pciback_control_isr(struct pci_dev *dev, int reset)
-+{
-+	struct pciback_dev_data *dev_data;
-+	int rc;
-+	int enable = 0;
++	case XenbusStateConnected:
++		/* pcifront switched its state from reconfiguring to connected.
++		 * Then switch to connected state.
++		 */
++		xenbus_switch_state(xdev, XenbusStateConnected);
++		break;
 +
-+	dev_data = pci_get_drvdata(dev);
-+	if (!dev_data)
-+		return;
++	case XenbusStateClosing:
++		pciback_disconnect(pdev);
++		xenbus_switch_state(xdev, XenbusStateClosing);
++		break;
 +
-+	/* We don't deal with bridges */
-+	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
-+		return;
++	case XenbusStateClosed:
++		pciback_disconnect(pdev);
++		xenbus_switch_state(xdev, XenbusStateClosed);
++		if (xenbus_dev_is_online(xdev))
++			break;
++		/* fall through if not online */
++	case XenbusStateUnknown:
++		dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
++		device_unregister(&xdev->dev);
++		break;
 +
-+	if (reset) {
-+		dev_data->enable_intx = 0;
-+		dev_data->ack_intr = 0;
++	default:
++		break;
 +	}
-+	enable =  dev_data->enable_intx;
++}
 +
-+	/* Asked to disable, but ISR isn't runnig */
-+	if (!enable && !dev_data->isr_on)
-+		return;
++static int pciback_setup_backend(struct pciback_device *pdev)
++{
++	/* Get configuration from xend (if available now) */
++	int domain, bus, slot, func;
++	int err = 0;
++	int i, num_devs;
++	char dev_str[64];
++	char state_str[64];
 +
-+	/* Squirrel away the IRQs in the dev_data. We need this
-+	 * b/c when device transitions to MSI, the dev->irq is
-+	 * overwritten with the MSI vector.
++	/* It's possible we could get the call to setup twice, so make sure
++	 * we're not already connected.
 +	 */
-+	if (enable)
-+		dev_data->irq = dev->irq;
++	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
++	    XenbusStateInitWait)
++		goto out;
 +
-+	dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
-+		dev_data->irq_name,
-+		dev_data->irq,
-+		pci_is_enabled(dev) ? "on" : "off",
-+		dev->msi_enabled ? "MSI" : "",
-+		dev->msix_enabled ? "MSI/X" : "",
-+		dev_data->isr_on ? "enable" : "disable",
-+		enable ? "enable" : "disable");
++	dev_dbg(&pdev->xdev->dev, "getting be setup\n");
 +
-+	if (enable) {
-+		rc = request_irq(dev_data->irq,
-+				pciback_guest_interrupt, IRQF_SHARED,
-+				dev_data->irq_name, dev);
-+		if (rc) {
-+			dev_err(&dev->dev, "%s: failed to install fake IRQ " \
-+				"handler for IRQ %d! (rc:%d)\n", dev_data->irq_name,
-+				dev_data->irq, rc);
++	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
++			   &num_devs);
++	if (err != 1) {
++		if (err >= 0)
++			err = -EINVAL;
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error reading number of devices");
++		goto out;
++	}
++
++	for (i = 0; i < num_devs; i++) {
++		int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
++		if (unlikely(l >= (sizeof(dev_str) - 1))) {
++			err = -ENOMEM;
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "String overflow while reading "
++					 "configuration");
 +			goto out;
 +		}
-+	}
-+	else {
-+		free_irq(dev_data->irq, dev);
-+		dev_data->irq = 0;
-+	}
-+	dev_data->isr_on = enable;
-+	dev_data->ack_intr = enable;
-+out:
-+	dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
-+		dev_data->irq_name,
-+		dev_data->irq,
-+		pci_is_enabled(dev) ? "on" : "off",
-+		dev->msi_enabled ? "MSI" : "",
-+		dev->msix_enabled ? "MSI/X" : "",
-+		enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
-+			(dev_data->isr_on ? "failed to disable" : "disabled"));
-+}
 +
- /* Ensure a device is "turned off" and ready to be exported.
-  * (Also see pciback_config_reset to ensure virtual configuration space is
-  * ready to be re-exported)
-@@ -21,6 +93,8 @@ void pciback_reset_device(struct pci_dev *dev)
- {
- 	u16 cmd;
- 
-+	pciback_control_isr(dev, 1 /* reset device */);
++		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
++				   "%x:%x:%x.%x", &domain, &bus, &slot, &func);
++		if (err < 0) {
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "Error reading device configuration");
++			goto out;
++		}
++		if (err != 4) {
++			err = -EINVAL;
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "Error parsing pci device "
++					 "configuration");
++			goto out;
++		}
 +
- 	/* Disable devices (but not bridges) */
- 	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
- #ifdef CONFIG_PCI_MSI
-@@ -78,13 +152,18 @@ void pciback_do_op(struct work_struct *data)
- 	struct pciback_device *pdev =
- 		container_of(data, struct pciback_device, op_work);
- 	struct pci_dev *dev;
-+	struct pciback_dev_data *dev_data = NULL;
- 	struct xen_pci_op *op = &pdev->sh_info->op;
-+	int test_intx = 0;
- 
- 	dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
- 
- 	if (dev == NULL)
- 		op->err = XEN_PCI_ERR_dev_not_found;
- 	else {
-+		dev_data = pci_get_drvdata(dev);
-+		if (dev_data)
-+			test_intx = dev_data->enable_intx;
- 		switch (op->cmd) {
- 		case XEN_PCI_OP_conf_read:
- 			op->err = pciback_config_read(dev,
-@@ -109,10 +188,15 @@ void pciback_do_op(struct work_struct *data)
- 			break;
- #endif
- 		default:
--			op->err = XEN_PCI_ERR_not_implemented;
-+			op->err = XEN_PCI_ERR_not_implemented;	
- 			break;
- 		}
- 	}
-+	if (!op->err && dev && dev_data) {
-+		/* Transition detected */
-+		if ((dev_data->enable_intx != test_intx))
-+			pciback_control_isr(dev, 0 /* no reset */);
++		err = pciback_export_device(pdev, domain, bus, slot, func, i);
++		if (err)
++			goto out;
++
++		/* Switch substate of this device. */
++		l = snprintf(state_str, sizeof(state_str), "state-%d", i);
++		if (unlikely(l >= (sizeof(state_str) - 1))) {
++			err = -ENOMEM;
++			xenbus_dev_fatal(pdev->xdev, err,
++					 "String overflow while reading "
++					 "configuration");
++			goto out;
++		}
++		err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
++				    "%d", XenbusStateInitialised);
++		if (err) {
++			xenbus_dev_fatal(pdev->xdev, err, "Error switching "
++					 "substate of dev-%d\n", i);
++			goto out;
++		}
++	}
++
++	err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
++	if (err) {
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error while publish PCI root buses "
++				 "for frontend");
++		goto out;
 +	}
- 	/* Tell the driver domain that we're done. */
- 	wmb();
- 	clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
-@@ -137,3 +221,12 @@ irqreturn_t pciback_handle_event(int irq, void *dev_id)
- 
- 	return IRQ_HANDLED;
- }
-+irqreturn_t pciback_guest_interrupt(int irq, void *dev_id)
-+{
-+	struct pci_dev *dev = (struct pci_dev *)dev_id;
-+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
 +
-+	if (dev_data->isr_on && dev_data->ack_intr)
-+		return IRQ_HANDLED;
-+	return IRQ_NONE;
++	err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
++	if (err)
++		xenbus_dev_fatal(pdev->xdev, err,
++				 "Error switching to initialised state!");
++
++out:
++	if (!err)
++		/* see if pcifront is already configured (if not, we'll wait) */
++		pciback_attach(pdev);
++
++	return err;
 +}
--- 
-1.7.4
-
-
-From ff798d2af7f9f02521295a073d38d86f00771739 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Mon, 12 Apr 2010 11:47:15 -0400
-Subject: [PATCH 190/203] xen-pciback: Add SysFS instrumentation for the fake IRQ handler.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/pci_stub.c |   75 +++++++++++++++++++++++++++++++++++++++-
- 1 files changed, 74 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index 45bbe99..ee2cd68 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -1038,6 +1038,70 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
- 
- DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
- 
-+static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
++
++static void pciback_be_watch(struct xenbus_watch *watch,
++			     const char **vec, unsigned int len)
 +{
-+	struct pcistub_device *psdev;
-+	struct pciback_dev_data *dev_data;
-+	size_t count = 0;
-+	unsigned long flags;
++	struct pciback_device *pdev =
++	    container_of(watch, struct pciback_device, be_watch);
 +
-+	spin_lock_irqsave(&pcistub_devices_lock, flags);
-+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+		if (count >= PAGE_SIZE)
-+			break;
-+		if (!psdev->dev)
-+			continue;
-+		dev_data = pci_get_drvdata(psdev->dev);
-+		if (!dev_data)
-+			continue;
-+		count +=
-+		    scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing\n",
-+			      pci_name(psdev->dev),
-+			      dev_data->isr_on ? "on" : "off",
-+			      dev_data->ack_intr ? "ack" : "not ack");
++	switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
++	case XenbusStateInitWait:
++		pciback_setup_backend(pdev);
++		break;
++
++	default:
++		break;
 +	}
-+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+	return count;
 +}
 +
-+DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
-+
-+static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
-+					  const char *buf,
-+					  size_t count)
++static int pciback_xenbus_probe(struct xenbus_device *dev,
++				const struct xenbus_device_id *id)
 +{
-+	struct pcistub_device *psdev;
-+	struct pciback_dev_data *dev_data;
-+	int domain, bus, slot, func;
-+	int err = -ENOENT;
++	int err = 0;
++	struct pciback_device *pdev = alloc_pdev(dev);
 +
-+	err = str_to_slot(buf, &domain, &bus, &slot, &func);
-+	if (err)
++	if (pdev == NULL) {
++		err = -ENOMEM;
++		xenbus_dev_fatal(dev, err,
++				 "Error allocating pciback_device struct");
 +		goto out;
++	}
 +
-+	psdev = pcistub_device_find(domain, bus, slot, func);
-+
-+	if (!psdev)
++	/* wait for xend to configure us */
++	err = xenbus_switch_state(dev, XenbusStateInitWait);
++	if (err)
 +		goto out;
 +
-+	dev_data = pci_get_drvdata(psdev->dev);
-+	if (!dev_data)
++	/* watch the backend node for backend configuration information */
++	err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
++				pciback_be_watch);
++	if (err)
 +		goto out;
 +
-+	dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
-+		dev_data->irq_name, dev_data->isr_on,
-+		!dev_data->isr_on);
++	pdev->be_watching = 1;
++
++	/* We need to force a call to our callback here in case
++	 * xend already configured us!
++	 */
++	pciback_be_watch(&pdev->be_watch, NULL, 0);
 +
-+	dev_data->isr_on = !(dev_data->isr_on);
-+	if (dev_data->isr_on)
-+		dev_data->ack_intr = 1;
 +out:
-+	if (!err)
-+		err = count;
 +	return err;
 +}
-+DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
 +
- static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
- 				 size_t count)
- {
-@@ -1177,7 +1241,10 @@ static void pcistub_exit(void)
- 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
- 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
- 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
--
-+	driver_remove_file(&pciback_pci_driver.driver,
-+			   &driver_attr_irq_handlers);
-+	driver_remove_file(&pciback_pci_driver.driver,
-+			   &driver_attr_irq_handler_state);
- 	pci_unregister_driver(&pciback_pci_driver);
- }
- 
-@@ -1236,6 +1303,12 @@ static int __init pcistub_init(void)
- 		err = driver_create_file(&pciback_pci_driver.driver,
- 					 &driver_attr_permissive);
- 
-+	if (!err)
-+		err = driver_create_file(&pciback_pci_driver.driver,
-+					 &driver_attr_irq_handlers);
-+	if (!err)
-+		err = driver_create_file(&pciback_pci_driver.driver,
-+					&driver_attr_irq_handler_state);
- 	if (err)
- 		pcistub_exit();
- 
--- 
-1.7.4
-
-
-From 446e7d199298a891003b36a2cd29d8db46640443 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Mon, 19 Apr 2010 14:39:10 -0400
-Subject: [PATCH 191/203] xen-pciback: When device transitions to MSI/MSI-X stop ACK-ing on the
- legacy interrupt.
-
-But don't remove the irq handler from the legacy interrupt. The device
-might still transition back to the legacy interrupts.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/conf_space_capability_msi.c |   17 ++++++++++++++++-
- 1 files changed, 16 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
-index b70ea8b..a236e2d 100644
---- a/drivers/xen/pciback/conf_space_capability_msi.c
-+++ b/drivers/xen/pciback/conf_space_capability_msi.c
-@@ -12,6 +12,7 @@
- int pciback_enable_msi(struct pciback_device *pdev,
- 		struct pci_dev *dev, struct xen_pci_op *op)
- {
-+	struct pciback_dev_data *dev_data;
- 	int otherend = pdev->xdev->otherend_id;
- 	int status;
- 
-@@ -27,21 +28,29 @@ int pciback_enable_msi(struct pciback_device *pdev,
- 	/* The value the guest needs is actually the IDT vector, not the
- 	 * the local domain's IRQ number. */
- 	op->value = xen_gsi_from_irq(dev->irq);
-+	dev_data = pci_get_drvdata(dev);
-+	if (dev_data)
-+		dev_data->ack_intr = 0;
- 	return 0;
- }
- 
- int pciback_disable_msi(struct pciback_device *pdev,
- 		struct pci_dev *dev, struct xen_pci_op *op)
- {
-+	struct pciback_dev_data *dev_data;
- 	pci_disable_msi(dev);
- 
- 	op->value = xen_gsi_from_irq(dev->irq);
-+	dev_data = pci_get_drvdata(dev);
-+	if (dev_data)
-+		dev_data->ack_intr = 1;
- 	return 0;
- }
- 
- int pciback_enable_msix(struct pciback_device *pdev,
- 		struct pci_dev *dev, struct xen_pci_op *op)
- {
-+	struct pciback_dev_data *dev_data;
- 	int i, result;
- 	struct msix_entry *entries;
- 
-@@ -68,6 +77,9 @@ int pciback_enable_msix(struct pciback_device *pdev,
- 	kfree(entries);
- 
- 	op->value = result;
-+	dev_data = pci_get_drvdata(dev);
-+	if (dev_data)
-+		dev_data->ack_intr = 0;
- 
- 	return result;
- }
-@@ -75,10 +87,13 @@ int pciback_enable_msix(struct pciback_device *pdev,
- int pciback_disable_msix(struct pciback_device *pdev,
- 		struct pci_dev *dev, struct xen_pci_op *op)
- {
--
-+	struct pciback_dev_data *dev_data;
- 	pci_disable_msix(dev);
- 
- 	op->value = xen_gsi_from_irq(dev->irq);
-+	dev_data = pci_get_drvdata(dev);
-+	if (dev_data)
-+		dev_data->ack_intr = 1;
++static int pciback_xenbus_remove(struct xenbus_device *dev)
++{
++	struct pciback_device *pdev = dev_get_drvdata(&dev->dev);
++
++	if (pdev != NULL)
++		free_pdev(pdev);
++
++	return 0;
++}
++
++static const struct xenbus_device_id xenpci_ids[] = {
++	{"pci"},
++	{""},
++};
++
++static struct xenbus_driver xenbus_pciback_driver = {
++	.name			= "pciback",
++	.owner			= THIS_MODULE,
++	.ids			= xenpci_ids,
++	.probe			= pciback_xenbus_probe,
++	.remove			= pciback_xenbus_remove,
++	.otherend_changed	= pciback_frontend_changed,
++};
++
++int __init pciback_xenbus_register(void)
++{
++	pciback_wq = create_workqueue("pciback_workqueue");
++	if (!pciback_wq) {
++		printk(KERN_ERR "%s: create"
++			"pciback_workqueue failed\n", __func__);
++		return -EFAULT;
++	}
++	return xenbus_register_backend(&xenbus_pciback_driver);
++}
++
++void __exit pciback_xenbus_unregister(void)
++{
++	destroy_workqueue(pciback_wq);
++	xenbus_unregister_driver(&xenbus_pciback_driver);
++}
+diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
+index afbe041..319dd0a 100644
+--- a/drivers/xen/platform-pci.c
++++ b/drivers/xen/platform-pci.c
+@@ -156,9 +156,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
+ 	if (ret)
+ 		goto out;
+ 	xenbus_probe(NULL);
+-	ret = xen_setup_shutdown_event();
+-	if (ret)
+-		goto out;
  	return 0;
- }
  
--- 
-1.7.4
-
-
-From 21c175503ade61702706a1db311a00d054bbdb2f Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Mon, 19 Apr 2010 14:40:38 -0400
-Subject: [PATCH 192/203] xen-pciback: Enable interrupt handler when device is enabled.
-
-And also request it to be disabled when the device has been
-disabled.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/conf_space_header.c |    6 ++++++
- 1 files changed, 6 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
-index 1f4f86e..cb450f4 100644
---- a/drivers/xen/pciback/conf_space_header.c
-+++ b/drivers/xen/pciback/conf_space_header.c
-@@ -39,8 +39,10 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
- 
- static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
- {
-+	struct pciback_dev_data *dev_data;
- 	int err;
+ out:
+diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
+index 1da8af6..2024a74 100644
+--- a/include/drm/ttm/ttm_bo_driver.h
++++ b/include/drm/ttm/ttm_bo_driver.h
+@@ -50,13 +50,15 @@ struct ttm_backend_func {
+ 	 * @pages: Array of pointers to ttm pages.
+ 	 * @dummy_read_page: Page to be used instead of NULL pages in the
+ 	 * array @pages.
++	 * @dma_addrs: Array of DMA (bus) address of the ttm pages.
+ 	 *
+ 	 * Populate the backend with ttm pages. Depending on the backend,
+ 	 * it may or may not copy the @pages array.
+ 	 */
+ 	int (*populate) (struct ttm_backend *backend,
+ 			 unsigned long num_pages, struct page **pages,
+-			 struct page *dummy_read_page);
++			 struct page *dummy_read_page,
++			 dma_addr_t *dma_addrs);
+ 	/**
+ 	 * struct ttm_backend_func member clear
+ 	 *
+@@ -149,6 +151,7 @@ enum ttm_caching_state {
+  * @swap_storage: Pointer to shmem struct file for swap storage.
+  * @caching_state: The current caching state of the pages.
+  * @state: The current binding state of the pages.
++ * @dma_address: The DMA (bus) addresses of the pages (if TTM_PAGE_FLAG_DMA32)
+  *
+  * This is a structure holding the pages, caching- and aperture binding
+  * status for a buffer object that isn't backed by fixed (VRAM / AGP)
+@@ -173,6 +176,8 @@ struct ttm_tt {
+ 		tt_unbound,
+ 		tt_unpopulated,
+ 	} state;
++	dma_addr_t *dma_address;
++	struct device *dev;
+ };
  
-+	dev_data = pci_get_drvdata(dev);
- 	if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
- 		if (unlikely(verbose_request))
- 			printk(KERN_DEBUG "pciback: %s: enable\n",
-@@ -48,11 +50,15 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
- 		err = pci_enable_device(dev);
- 		if (err)
- 			return err;
-+		if (dev_data)
-+			dev_data->enable_intx = 1;
- 	} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
- 		if (unlikely(verbose_request))
- 			printk(KERN_DEBUG "pciback: %s: disable\n",
- 			       pci_name(dev));
- 		pci_disable_device(dev);
-+		if (dev_data)
-+			dev_data->enable_intx = 0;
- 	}
+ #define TTM_MEMTYPE_FLAG_FIXED         (1 << 0)	/* Fixed (on-card) PCI memory */
+@@ -547,6 +552,7 @@ struct ttm_bo_device {
+ 	struct list_head device_list;
+ 	struct ttm_bo_global *glob;
+ 	struct ttm_bo_driver *driver;
++	struct device *dev;
+ 	rwlock_t vm_lock;
+ 	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
+ 	spinlock_t fence_lock;
+@@ -787,6 +793,8 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
+  * @file_page_offset: Offset into the device address space that is available
+  * for buffer data. This ensures compatibility with other users of the
+  * address space.
++ * @need_dma32: Allocate pages under 4GB
++ * @dev: 'struct device' of the PCI device.
+  *
+  * Initializes a struct ttm_bo_device:
+  * Returns:
+@@ -795,7 +803,8 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
+ extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
+ 			      struct ttm_bo_global *glob,
+ 			      struct ttm_bo_driver *driver,
+-			      uint64_t file_page_offset, bool need_dma32);
++			      uint64_t file_page_offset, bool need_dma32,
++			      struct device *dev);
  
- 	if (!dev->is_busmaster && is_master_cmd(value)) {
--- 
-1.7.4
-
-
-From 25a782a051af48a60bd92fe93375eb38a7aa5206 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Mon, 19 Apr 2010 16:23:06 -0400
-Subject: [PATCH 193/203] xen-pciback: Probe the IRQ line to check if it is not shared.
-
-If it is not shared, we stop ACK-ing the IRQ line as there is
-no need for this irq handler to return IRQ_HANDLED.
-
-We have to this check once much much later than the pciback
-and pcifront have started talking as guests doing the hypercall
-that would notify the other guest that the IRQ line is shared
-is done asynchronously.
-
-[v2: xen_ignore_irq->xen_test_irq_shared]
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/pci_stub.c    |    5 +++--
- drivers/xen/pciback/pciback.h     |    1 +
- drivers/xen/pciback/pciback_ops.c |   12 +++++++++++-
- 3 files changed, 15 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index ee2cd68..88c7ca1 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -1055,10 +1055,11 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
- 		if (!dev_data)
- 			continue;
- 		count +=
--		    scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing\n",
-+		    scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing:%ld\n",
- 			      pci_name(psdev->dev),
- 			      dev_data->isr_on ? "on" : "off",
--			      dev_data->ack_intr ? "ack" : "not ack");
-+			      dev_data->ack_intr ? "ack" : "not ack",
-+			      dev_data->handled);
- 	}
- 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
- 	return count;
-diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
-index 9d1b0a6..fc31052 100644
---- a/drivers/xen/pciback/pciback.h
-+++ b/drivers/xen/pciback/pciback.h
-@@ -50,6 +50,7 @@ struct pciback_dev_data {
- 	unsigned int enable_intx : 1;
- 	unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */ 
- 	unsigned int ack_intr : 1; /* .. and ACK-ing */
-+	unsigned long handled;
- 	unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
- 	char irq_name[0]; /* pciback[000:04:00.0] */
- };
-diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
-index cb54893..2c5a90d 100644
---- a/drivers/xen/pciback/pciback_ops.c
-+++ b/drivers/xen/pciback/pciback_ops.c
-@@ -226,7 +226,17 @@ irqreturn_t pciback_guest_interrupt(int irq, void *dev_id)
- 	struct pci_dev *dev = (struct pci_dev *)dev_id;
- 	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+ /**
+  * ttm_bo_unmap_virtual
+diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
+index 1168214..ccb6b7a 100644
+--- a/include/drm/ttm/ttm_page_alloc.h
++++ b/include/drm/ttm/ttm_page_alloc.h
+@@ -36,11 +36,15 @@
+  * @flags: ttm flags for page allocation.
+  * @cstate: ttm caching state for the page.
+  * @count: number of pages to allocate.
++ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
++ * @dev: struct device for appropiate DMA accounting.
+  */
+ int ttm_get_pages(struct list_head *pages,
+ 		  int flags,
+ 		  enum ttm_caching_state cstate,
+-		  unsigned count);
++		  unsigned count,
++		  dma_addr_t *dma_address,
++		  struct device *dev);
+ /**
+  * Put linked list of pages to pool.
+  *
+@@ -49,11 +53,15 @@ int ttm_get_pages(struct list_head *pages,
+  * count.
+  * @flags: ttm flags for page allocation.
+  * @cstate: ttm caching state.
++ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
++ * @dev: struct device for appropiate DMA accounting.
+  */
+ void ttm_put_pages(struct list_head *pages,
+ 		   unsigned page_count,
+ 		   int flags,
+-		   enum ttm_caching_state cstate);
++		   enum ttm_caching_state cstate,
++		   dma_addr_t *dma_address,
++		   struct device *dev);
+ /**
+  * Initialize pool allocator.
+  */
+diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
+index 55e0d42..d746da1 100644
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -55,7 +55,7 @@
+  *                Used by threaded interrupts which need to keep the
+  *                irq line disabled until the threaded handler has been run.
+  * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
+- *
++ * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
+  */
+ #define IRQF_DISABLED		0x00000020
+ #define IRQF_SAMPLE_RANDOM	0x00000040
+@@ -67,6 +67,7 @@
+ #define IRQF_IRQPOLL		0x00001000
+ #define IRQF_ONESHOT		0x00002000
+ #define IRQF_NO_SUSPEND		0x00004000
++#define IRQF_FORCE_RESUME	0x00008000
  
--	if (dev_data->isr_on && dev_data->ack_intr)
-+	if (dev_data->isr_on && dev_data->ack_intr) {
-+		dev_data->handled++;
-+		if ((dev_data->handled % 1000) == 0) {
-+			if (xen_test_irq_shared(irq)) {
-+				printk(KERN_INFO "%s IRQ line is not shared "
-+					"with other domains. Turning ISR off\n",
-+					 dev_data->irq_name);
-+				dev_data->ack_intr = 0;
-+			}
-+		}
- 		return IRQ_HANDLED;
-+	}
- 	return IRQ_NONE;
- }
--- 
-1.7.4
-
-
-From 6191b41a7f3b63c4d762a2d0534f64d388ab2cf2 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Tue, 20 Apr 2010 20:22:40 -0400
-Subject: [PATCH 194/203] xen-pciback: Add debug statements for the MSI/MSI-X configuration module.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/conf_space_capability_msi.c |   11 +++++++++++
- 1 files changed, 11 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
-index a236e2d..b15131e 100644
---- a/drivers/xen/pciback/conf_space_capability_msi.c
-+++ b/drivers/xen/pciback/conf_space_capability_msi.c
-@@ -16,6 +16,9 @@ int pciback_enable_msi(struct pciback_device *pdev,
- 	int otherend = pdev->xdev->otherend_id;
- 	int status;
+ #define IRQF_TIMER		(__IRQF_TIMER | IRQF_NO_SUSPEND)
  
-+ 	if (unlikely(verbose_request))
-+		printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev));
+diff --git a/include/xen/blkif.h b/include/xen/blkif.h
+new file mode 100644
+index 0000000..ab79426
+--- /dev/null
++++ b/include/xen/blkif.h
+@@ -0,0 +1,122 @@
++/*
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this software and associated documentation files (the "Software"), to
++ * deal in the Software without restriction, including without limitation the
++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
++ * sell copies of the Software, and to permit persons to whom the Software is
++ * furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++ * DEALINGS IN THE SOFTWARE.
++ */
 +
- 	status = pci_enable_msi(dev);
- 
- 	if (status) {
-@@ -31,6 +34,7 @@ int pciback_enable_msi(struct pciback_device *pdev,
- 	dev_data = pci_get_drvdata(dev);
- 	if (dev_data)
- 		dev_data->ack_intr = 0;
++#ifndef __XEN_BLKIF_H__
++#define __XEN_BLKIF_H__
 +
- 	return 0;
- }
- 
-@@ -38,6 +42,9 @@ int pciback_disable_msi(struct pciback_device *pdev,
- 		struct pci_dev *dev, struct xen_pci_op *op)
- {
- 	struct pciback_dev_data *dev_data;
++#include <xen/interface/io/ring.h>
++#include <xen/interface/io/blkif.h>
++#include <xen/interface/io/protocols.h>
 +
-+ 	if (unlikely(verbose_request))
-+		printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev));
- 	pci_disable_msi(dev);
++/* Not a real protocol.  Used to generate ring structs which contain
++ * the elements common to all protocols only.  This way we get a
++ * compiler-checkable way to use common struct elements, so we can
++ * avoid using switch(protocol) in a number of places.  */
++struct blkif_common_request {
++	char dummy;
++};
++struct blkif_common_response {
++	char dummy;
++};
++
++/* i386 protocol version */
++#pragma pack(push, 4)
++struct blkif_x86_32_request {
++	uint8_t        operation;    /* BLKIF_OP_???                         */
++	uint8_t        nr_segments;  /* number of segments                   */
++	blkif_vdev_t   handle;       /* only for read/write requests         */
++	uint64_t       id;           /* private guest value, echoed in resp  */
++	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
++	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
++};
++struct blkif_x86_32_response {
++	uint64_t        id;              /* copied from request */
++	uint8_t         operation;       /* copied from request */
++	int16_t         status;          /* BLKIF_RSP_???       */
++};
++typedef struct blkif_x86_32_request blkif_x86_32_request_t;
++typedef struct blkif_x86_32_response blkif_x86_32_response_t;
++#pragma pack(pop)
++
++/* x86_64 protocol version */
++struct blkif_x86_64_request {
++	uint8_t        operation;    /* BLKIF_OP_???                         */
++	uint8_t        nr_segments;  /* number of segments                   */
++	blkif_vdev_t   handle;       /* only for read/write requests         */
++	uint64_t       __attribute__((__aligned__(8))) id;
++	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
++	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
++};
++struct blkif_x86_64_response {
++	uint64_t       __attribute__((__aligned__(8))) id;
++	uint8_t         operation;       /* copied from request */
++	int16_t         status;          /* BLKIF_RSP_???       */
++};
++typedef struct blkif_x86_64_request blkif_x86_64_request_t;
++typedef struct blkif_x86_64_response blkif_x86_64_response_t;
++
++DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response);
++DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response);
++DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response);
++
++union blkif_back_rings {
++	struct blkif_back_ring        native;
++	struct blkif_common_back_ring common;
++	struct blkif_x86_32_back_ring x86_32;
++	struct blkif_x86_64_back_ring x86_64;
++};
++
++enum blkif_protocol {
++	BLKIF_PROTOCOL_NATIVE = 1,
++	BLKIF_PROTOCOL_X86_32 = 2,
++	BLKIF_PROTOCOL_X86_64 = 3,
++};
++
++static void inline blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src)
++{
++	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
++	dst->operation = src->operation;
++	dst->nr_segments = src->nr_segments;
++	dst->handle = src->handle;
++	dst->id = src->id;
++	dst->u.rw.sector_number = src->sector_number;
++	barrier();
++	if (n > dst->nr_segments)
++		n = dst->nr_segments;
++	for (i = 0; i < n; i++)
++		dst->u.rw.seg[i] = src->seg[i];
++}
++
++static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src)
++{
++	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
++	dst->operation = src->operation;
++	dst->nr_segments = src->nr_segments;
++	dst->handle = src->handle;
++	dst->id = src->id;
++	dst->u.rw.sector_number = src->sector_number;
++	barrier();
++	if (n > dst->nr_segments)
++		n = dst->nr_segments;
++	for (i = 0; i < n; i++)
++		dst->u.rw.seg[i] = src->seg[i];
++}
++
++#endif /* __XEN_BLKIF_H__ */
+diff --git a/include/xen/events.h b/include/xen/events.h
+index 00f53dd..a0c8185 100644
+--- a/include/xen/events.h
++++ b/include/xen/events.h
+@@ -23,6 +23,12 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
+ 			   unsigned long irqflags,
+ 			   const char *devname,
+ 			   void *dev_id);
++int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
++					  unsigned int remote_port,
++					  irq_handler_t handler,
++					  unsigned long irqflags,
++					  const char *devname,
++					  void *dev_id);
  
- 	op->value = xen_gsi_from_irq(dev->irq);
-@@ -54,6 +61,8 @@ int pciback_enable_msix(struct pciback_device *pdev,
- 	int i, result;
- 	struct msix_entry *entries;
+ /*
+  * Common unbind function for all event sources. Takes IRQ to unbind from.
+@@ -75,11 +81,10 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
+ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
  
-+ 	if (unlikely(verbose_request))
-+		printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev));
- 	if (op->value > SH_INFO_MAX_VEC)
- 		return -EINVAL;
+ #ifdef CONFIG_PCI_MSI
+-/* Allocate an irq and a pirq to be used with MSIs. */
+-#define XEN_ALLOC_PIRQ (1 << 0)
+-#define XEN_ALLOC_IRQ  (1 << 1)
+-void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_mask);
+-int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
++int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
++int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
++			     int pirq, int vector, const char *name,
++			     domid_t domid);
+ #endif
  
-@@ -88,6 +97,8 @@ int pciback_disable_msix(struct pciback_device *pdev,
- 		struct pci_dev *dev, struct xen_pci_op *op)
- {
- 	struct pciback_dev_data *dev_data;
-+ 	if (unlikely(verbose_request))
-+		printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", pci_name(dev));
- 	pci_disable_msix(dev);
- 
- 	op->value = xen_gsi_from_irq(dev->irq);
--- 
-1.7.4
-
-
-From 72b558f033e163ac371afa880ff7036859f6f0b0 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Fri, 23 Jul 2010 14:35:47 -0400
-Subject: [PATCH 195/203] xen-pciback: Redo spinlock usage.
-
-We were using coarse spinlocks that could end up with a deadlock.
-This patch fixes that and makes the spinlocks much more fine-grained.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/xenbus.c |   34 +++++++++++++++++++++-------------
- 1 files changed, 21 insertions(+), 13 deletions(-)
-
-diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
-index d448bf5..f0d5426 100644
---- a/drivers/xen/pciback/xenbus.c
-+++ b/drivers/xen/pciback/xenbus.c
-@@ -54,23 +54,31 @@ static void pciback_disconnect(struct pciback_device *pdev)
- 		unbind_from_irqhandler(pdev->evtchn_irq, pdev);
- 		pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
- 	}
-+	spin_unlock(&pdev->dev_lock);
+ /* De-allocates the above mentioned physical interrupt. */
+@@ -94,4 +99,10 @@ int xen_gsi_from_irq(unsigned pirq);
+ /* Return irq from pirq */
+ int xen_irq_from_pirq(unsigned pirq);
  
- 	/* If the driver domain started an op, make sure we complete it
- 	 * before releasing the shared memory */
++/* Return the pirq allocated to the irq. */
++int xen_pirq_from_irq(unsigned irq);
 +
-+	/* Note, the workqueue does not use spinlocks at all.*/
- 	flush_workqueue(pciback_wq);
- 
-+	spin_lock(&pdev->dev_lock);
- 	if (pdev->sh_info != NULL) {
- 		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
- 		pdev->sh_info = NULL;
- 	}
--
- 	spin_unlock(&pdev->dev_lock);
++/* Determine whether to ignore this IRQ if it is passed to a guest. */
++int xen_test_irq_shared(int irq);
++
+ #endif	/* _XEN_EVENTS_H */
+diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
+new file mode 100644
+index 0000000..76bd580
+--- /dev/null
++++ b/include/xen/gntalloc.h
+@@ -0,0 +1,82 @@
++/******************************************************************************
++ * gntalloc.h
++ *
++ * Interface to /dev/xen/gntalloc.
++ *
++ * Author: Daniel De Graaf <dgdegra at tycho.nsa.gov>
++ *
++ * This file is in the public domain.
++ */
++
++#ifndef __LINUX_PUBLIC_GNTALLOC_H__
++#define __LINUX_PUBLIC_GNTALLOC_H__
++
++/*
++ * Allocates a new page and creates a new grant reference.
++ */
++#define IOCTL_GNTALLOC_ALLOC_GREF \
++_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
++struct ioctl_gntalloc_alloc_gref {
++	/* IN parameters */
++	/* The ID of the domain to be given access to the grants. */
++	uint16_t domid;
++	/* Flags for this mapping */
++	uint16_t flags;
++	/* Number of pages to map */
++	uint32_t count;
++	/* OUT parameters */
++	/* The offset to be used on a subsequent call to mmap(). */
++	uint64_t index;
++	/* The grant references of the newly created grant, one per page */
++	/* Variable size, depending on count */
++	uint32_t gref_ids[1];
++};
++
++#define GNTALLOC_FLAG_WRITABLE 1
++
++/*
++ * Deallocates the grant reference, allowing the associated page to be freed if
++ * no other domains are using it.
++ */
++#define IOCTL_GNTALLOC_DEALLOC_GREF \
++_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
++struct ioctl_gntalloc_dealloc_gref {
++	/* IN parameters */
++	/* The offset returned in the map operation */
++	uint64_t index;
++	/* Number of references to unmap */
++	uint32_t count;
++};
++
++/*
++ * Sets up an unmap notification within the page, so that the other side can do
++ * cleanup if this side crashes. Required to implement cross-domain robust
++ * mutexes or close notification on communication channels.
++ *
++ * Each mapped page only supports one notification; multiple calls referring to
++ * the same page overwrite the previous notification. You must clear the
++ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
++ * to occur.
++ */
++#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
++_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntalloc_unmap_notify))
++struct ioctl_gntalloc_unmap_notify {
++	/* IN parameters */
++	/* Offset in the file descriptor for a byte within the page (same as
++	 * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
++	 * be cleared. Otherwise, it can be any byte in the page whose
++	 * notification we are adjusting.
++	 */
++	uint64_t index;
++	/* Action(s) to take on unmap */
++	uint32_t action;
++	/* Event channel to notify */
++	uint32_t event_channel_port;
++};
++
++/* Clear (set to zero) the byte specified by index */
++#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
++/* Send an interrupt on the indicated event channel */
++#define UNMAP_NOTIFY_SEND_EVENT 0x2
 +
- }
- 
- static void free_pdev(struct pciback_device *pdev)
- {
--	if (pdev->be_watching)
-+	spin_lock(&pdev->dev_lock);
-+	if (pdev->be_watching) {
- 		unregister_xenbus_watch(&pdev->be_watch);
-+		pdev->be_watching = 0;
-+	}
-+	spin_unlock(&pdev->dev_lock);
- 
- 	pciback_disconnect(pdev);
++#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
+diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
+index eb23f41..5304bd3 100644
+--- a/include/xen/gntdev.h
++++ b/include/xen/gntdev.h
+@@ -116,4 +116,35 @@ struct ioctl_gntdev_set_max_grants {
+ 	uint32_t count;
+ };
  
-@@ -98,7 +106,10 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
- 				"Error mapping other domain page in ours.");
- 		goto out;
- 	}
++/*
++ * Sets up an unmap notification within the page, so that the other side can do
++ * cleanup if this side crashes. Required to implement cross-domain robust
++ * mutexes or close notification on communication channels.
++ *
++ * Each mapped page only supports one notification; multiple calls referring to
++ * the same page overwrite the previous notification. You must clear the
++ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
++ * to occur.
++ */
++#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
++_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
++struct ioctl_gntdev_unmap_notify {
++	/* IN parameters */
++	/* Offset in the file descriptor for a byte within the page (same as
++	 * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
++	 * be cleared. Otherwise, it can be any byte in the page whose
++	 * notification we are adjusting.
++	 */
++	uint64_t index;
++	/* Action(s) to take on unmap */
++	uint32_t action;
++	/* Event channel to notify */
++	uint32_t event_channel_port;
++};
 +
-+	spin_lock(&pdev->dev_lock);
- 	pdev->sh_info = vaddr;
-+	spin_unlock(&pdev->dev_lock);
- 
- 	err = bind_interdomain_evtchn_to_irqhandler(
- 		pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
-@@ -108,7 +119,10 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
- 				 "Error binding event channel to IRQ");
- 		goto out;
- 	}
++/* Clear (set to zero) the byte specified by index */
++#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
++/* Send an interrupt on the indicated event channel */
++#define UNMAP_NOTIFY_SEND_EVENT 0x2
 +
-+	spin_lock(&pdev->dev_lock);
- 	pdev->evtchn_irq = err;
-+	spin_unlock(&pdev->dev_lock);
- 	err = 0;
- 
- 	dev_dbg(&pdev->xdev->dev, "Attached!\n");
-@@ -122,7 +136,6 @@ static int pciback_attach(struct pciback_device *pdev)
- 	int gnt_ref, remote_evtchn;
- 	char *magic = NULL;
- 
--	spin_lock(&pdev->dev_lock);
- 
- 	/* Make sure we only do this setup once */
- 	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-@@ -168,7 +181,6 @@ static int pciback_attach(struct pciback_device *pdev)
- 
- 	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
- out:
--	spin_unlock(&pdev->dev_lock);
- 
- 	kfree(magic);
- 
-@@ -340,7 +352,6 @@ static int pciback_reconfigure(struct pciback_device *pdev)
- 	char state_str[64];
- 	char dev_str[64];
- 
--	spin_lock(&pdev->dev_lock);
- 
- 	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
- 
-@@ -481,8 +492,6 @@ static int pciback_reconfigure(struct pciback_device *pdev)
- 	}
- 
- out:
--	spin_unlock(&pdev->dev_lock);
--
- 	return 0;
- }
- 
-@@ -539,8 +548,6 @@ static int pciback_setup_backend(struct pciback_device *pdev)
- 	char dev_str[64];
- 	char state_str[64];
+ #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
+diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
+index c2d1fa4..61e523a 100644
+--- a/include/xen/interface/io/blkif.h
++++ b/include/xen/interface/io/blkif.h
+@@ -51,11 +51,7 @@ typedef uint64_t blkif_sector_t;
+  */
+ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
  
--	spin_lock(&pdev->dev_lock);
--
- 	/* It's possible we could get the call to setup twice, so make sure
- 	 * we're not already connected.
- 	 */
-@@ -621,8 +628,6 @@ static int pciback_setup_backend(struct pciback_device *pdev)
- 				 "Error switching to initialised state!");
+-struct blkif_request {
+-	uint8_t        operation;    /* BLKIF_OP_???                         */
+-	uint8_t        nr_segments;  /* number of segments                   */
+-	blkif_vdev_t   handle;       /* only for read/write requests         */
+-	uint64_t       id;           /* private guest value, echoed in resp  */
++struct blkif_request_rw {
+ 	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+ 	struct blkif_request_segment {
+ 		grant_ref_t gref;        /* reference to I/O buffer frame        */
+@@ -65,6 +61,16 @@ struct blkif_request {
+ 	} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ };
  
- out:
--	spin_unlock(&pdev->dev_lock);
--
- 	if (!err)
- 		/* see if pcifront is already configured (if not, we'll wait) */
- 		pciback_attach(pdev);
-@@ -669,7 +674,10 @@ static int pciback_xenbus_probe(struct xenbus_device *dev,
- 				pciback_be_watch);
- 	if (err)
- 		goto out;
++struct blkif_request {
++	uint8_t        operation;    /* BLKIF_OP_???                         */
++	uint8_t        nr_segments;  /* number of segments                   */
++	blkif_vdev_t   handle;       /* only for read/write requests         */
++	uint64_t       id;           /* private guest value, echoed in resp  */
++	union {
++		struct blkif_request_rw rw;
++	} u;
++};
 +
-+	spin_lock(&pdev->dev_lock);
- 	pdev->be_watching = 1;
-+	spin_unlock(&pdev->dev_lock);
- 
- 	/* We need to force a call to our callback here in case
- 	 * xend already configured us!
-@@ -708,8 +716,8 @@ int __init pciback_xenbus_register(void)
- {
- 	pciback_wq = create_workqueue("pciback_workqueue");
- 	if (!pciback_wq) {
--		printk(KERN_ERR "pciback_xenbus_register: create"
--			"pciback_workqueue failed\n");
-+		printk(KERN_ERR "%s: create"
-+			"pciback_workqueue failed\n",__FUNCTION__);
- 		return -EFAULT;
- 	}
- 	return xenbus_register_backend(&xenbus_pciback_driver);
--- 
-1.7.4
-
-
-From 0db69b61a6240cb1a5e96c9352cdc4ff435a6449 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 28 Jul 2010 13:28:34 -0400
-Subject: [PATCH 196/203] xen-pciback: Remove spinlock for be->watching state.
-
-There is no need to guard this with a spinlock. It
-is already guarded by the xenwatch_thread against multiple
-customers.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/xenbus.c |    4 ----
- 1 files changed, 0 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
-index f0d5426..993b659 100644
---- a/drivers/xen/pciback/xenbus.c
-+++ b/drivers/xen/pciback/xenbus.c
-@@ -73,12 +73,10 @@ static void pciback_disconnect(struct pciback_device *pdev)
- 
- static void free_pdev(struct pciback_device *pdev)
- {
--	spin_lock(&pdev->dev_lock);
- 	if (pdev->be_watching) {
- 		unregister_xenbus_watch(&pdev->be_watch);
- 		pdev->be_watching = 0;
- 	}
--	spin_unlock(&pdev->dev_lock);
- 
- 	pciback_disconnect(pdev);
- 
-@@ -675,9 +673,7 @@ static int pciback_xenbus_probe(struct xenbus_device *dev,
- 	if (err)
- 		goto out;
- 
--	spin_lock(&pdev->dev_lock);
- 	pdev->be_watching = 1;
--	spin_unlock(&pdev->dev_lock);
- 
- 	/* We need to force a call to our callback here in case
- 	 * xend already configured us!
--- 
-1.7.4
-
-
-From 4c824ee73ed5a83bf289b565b5dc4a99c7999bc5 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 24 Feb 2011 14:40:39 -0500
-Subject: [PATCH 197/203] xen/pciback: Fix checkpatch warnings and errors.
-
-Checkpatch found some extra warnings and errors. This mega
-patch fixes all of them in one swoop.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-
-Conflicts:
-
-	drivers/xen/events.c
----
- drivers/xen/pciback/conf_space.c                |    4 +-
- drivers/xen/pciback/conf_space_capability_msi.c |   11 +++---
- drivers/xen/pciback/conf_space_header.c         |   42 +++++++++++-----------
- drivers/xen/pciback/controller.c                |    2 +-
- drivers/xen/pciback/pci_stub.c                  |    7 ++--
- drivers/xen/pciback/pciback.h                   |   16 ++++----
- drivers/xen/pciback/pciback_ops.c               |    9 ++---
- drivers/xen/pciback/xenbus.c                    |   14 ++++----
- 8 files changed, 53 insertions(+), 52 deletions(-)
-
-diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
-index 370c18e..eb6bba0 100644
---- a/drivers/xen/pciback/conf_space.c
-+++ b/drivers/xen/pciback/conf_space.c
-@@ -18,8 +18,8 @@
- static int permissive;
- module_param(permissive, bool, 0644);
- 
--#define DEFINE_PCI_CONFIG(op, size, type) 			\
--int pciback_##op##_config_##size 				\
-+#define DEFINE_PCI_CONFIG(op, size, type)			\
-+int pciback_##op##_config_##size				\
- (struct pci_dev *dev, int offset, type value, void *data)	\
- {								\
- 	return pci_##op##_config_##size(dev, offset, value);	\
-diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
-index b15131e..3acda69 100644
---- a/drivers/xen/pciback/conf_space_capability_msi.c
-+++ b/drivers/xen/pciback/conf_space_capability_msi.c
-@@ -16,7 +16,7 @@ int pciback_enable_msi(struct pciback_device *pdev,
- 	int otherend = pdev->xdev->otherend_id;
- 	int status;
+ struct blkif_response {
+ 	uint64_t        id;              /* copied from request */
+ 	uint8_t         operation;       /* copied from request */
+@@ -91,4 +97,25 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
+ #define VDISK_REMOVABLE    0x2
+ #define VDISK_READONLY     0x4
+ 
++/* Xen-defined major numbers for virtual disks, they look strangely
++ * familiar */
++#define XEN_IDE0_MAJOR	3
++#define XEN_IDE1_MAJOR	22
++#define XEN_SCSI_DISK0_MAJOR	8
++#define XEN_SCSI_DISK1_MAJOR	65
++#define XEN_SCSI_DISK2_MAJOR	66
++#define XEN_SCSI_DISK3_MAJOR	67
++#define XEN_SCSI_DISK4_MAJOR	68
++#define XEN_SCSI_DISK5_MAJOR	69
++#define XEN_SCSI_DISK6_MAJOR	70
++#define XEN_SCSI_DISK7_MAJOR	71
++#define XEN_SCSI_DISK8_MAJOR	128
++#define XEN_SCSI_DISK9_MAJOR	129
++#define XEN_SCSI_DISK10_MAJOR	130
++#define XEN_SCSI_DISK11_MAJOR	131
++#define XEN_SCSI_DISK12_MAJOR	132
++#define XEN_SCSI_DISK13_MAJOR	133
++#define XEN_SCSI_DISK14_MAJOR	134
++#define XEN_SCSI_DISK15_MAJOR	135
++
+ #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
+diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
+index 518481c..cb94668 100644
+--- a/include/xen/interface/io/netif.h
++++ b/include/xen/interface/io/netif.h
+@@ -22,50 +22,50 @@
  
-- 	if (unlikely(verbose_request))
-+	if (unlikely(verbose_request))
- 		printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev));
+ /*
+  * This is the 'wire' format for packets:
+- *  Request 1: netif_tx_request -- NETTXF_* (any flags)
+- * [Request 2: netif_tx_extra]  (only if request 1 has NETTXF_extra_info)
+- * [Request 3: netif_tx_extra]  (only if request 2 has XEN_NETIF_EXTRA_MORE)
+- *  Request 4: netif_tx_request -- NETTXF_more_data
+- *  Request 5: netif_tx_request -- NETTXF_more_data
++ *  Request 1: xen_netif_tx_request  -- XEN_NETTXF_* (any flags)
++ * [Request 2: xen_netif_extra_info]    (only if request 1 has XEN_NETTXF_extra_info)
++ * [Request 3: xen_netif_extra_info]    (only if request 2 has XEN_NETIF_EXTRA_MORE)
++ *  Request 4: xen_netif_tx_request  -- XEN_NETTXF_more_data
++ *  Request 5: xen_netif_tx_request  -- XEN_NETTXF_more_data
+  *  ...
+- *  Request N: netif_tx_request -- 0
++ *  Request N: xen_netif_tx_request  -- 0
+  */
  
- 	status = pci_enable_msi(dev);
-@@ -43,7 +43,7 @@ int pciback_disable_msi(struct pciback_device *pdev,
- {
- 	struct pciback_dev_data *dev_data;
+ /* Protocol checksum field is blank in the packet (hardware offload)? */
+-#define _NETTXF_csum_blank     (0)
+-#define  NETTXF_csum_blank     (1U<<_NETTXF_csum_blank)
++#define _XEN_NETTXF_csum_blank		(0)
++#define  XEN_NETTXF_csum_blank		(1U<<_XEN_NETTXF_csum_blank)
  
-- 	if (unlikely(verbose_request))
-+	if (unlikely(verbose_request))
- 		printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev));
- 	pci_disable_msi(dev);
+ /* Packet data has been validated against protocol checksum. */
+-#define _NETTXF_data_validated (1)
+-#define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
++#define _XEN_NETTXF_data_validated	(1)
++#define  XEN_NETTXF_data_validated	(1U<<_XEN_NETTXF_data_validated)
  
-@@ -61,7 +61,7 @@ int pciback_enable_msix(struct pciback_device *pdev,
- 	int i, result;
- 	struct msix_entry *entries;
+ /* Packet continues in the next request descriptor. */
+-#define _NETTXF_more_data      (2)
+-#define  NETTXF_more_data      (1U<<_NETTXF_more_data)
++#define _XEN_NETTXF_more_data		(2)
++#define  XEN_NETTXF_more_data		(1U<<_XEN_NETTXF_more_data)
  
-- 	if (unlikely(verbose_request))
-+	if (unlikely(verbose_request))
- 		printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev));
- 	if (op->value > SH_INFO_MAX_VEC)
- 		return -EINVAL;
-@@ -97,8 +97,9 @@ int pciback_disable_msix(struct pciback_device *pdev,
- 		struct pci_dev *dev, struct xen_pci_op *op)
- {
- 	struct pciback_dev_data *dev_data;
-- 	if (unlikely(verbose_request))
--		printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", pci_name(dev));
-+	if (unlikely(verbose_request))
-+		printk(KERN_DEBUG "pciback: %s: disable MSI-X\n",
-+			pci_name(dev));
- 	pci_disable_msix(dev);
+ /* Packet to be followed by extra descriptor(s). */
+-#define _NETTXF_extra_info     (3)
+-#define  NETTXF_extra_info     (1U<<_NETTXF_extra_info)
++#define _XEN_NETTXF_extra_info		(3)
++#define  XEN_NETTXF_extra_info		(1U<<_XEN_NETTXF_extra_info)
  
- 	op->value = xen_gsi_from_irq(dev->irq);
-diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
-index cb450f4..22ad0f5 100644
---- a/drivers/xen/pciback/conf_space_header.c
-+++ b/drivers/xen/pciback/conf_space_header.c
-@@ -316,27 +316,27 @@ static const struct config_field header_common[] = {
- 	{}
+ struct xen_netif_tx_request {
+     grant_ref_t gref;      /* Reference to buffer page */
+     uint16_t offset;       /* Offset within buffer page */
+-    uint16_t flags;        /* NETTXF_* */
++    uint16_t flags;        /* XEN_NETTXF_* */
+     uint16_t id;           /* Echoed in response message. */
+     uint16_t size;         /* Packet size in bytes.       */
  };
  
--#define CFG_FIELD_BAR(reg_offset) 			\
--	{ 						\
--	 .offset     = reg_offset, 			\
--	 .size       = 4, 				\
--	 .init       = bar_init, 			\
--	 .reset      = bar_reset, 			\
--	 .release    = bar_release, 			\
--	 .u.dw.read  = bar_read, 			\
--	 .u.dw.write = bar_write, 			\
--	 }
--
--#define CFG_FIELD_ROM(reg_offset) 			\
--	{ 						\
--	 .offset     = reg_offset, 			\
--	 .size       = 4, 				\
--	 .init       = rom_init, 			\
--	 .reset      = bar_reset, 			\
--	 .release    = bar_release, 			\
--	 .u.dw.read  = bar_read, 			\
--	 .u.dw.write = rom_write, 			\
--	 }
-+#define CFG_FIELD_BAR(reg_offset)			\
-+	{						\
-+	.offset     = reg_offset,			\
-+	.size       = 4,				\
-+	.init       = bar_init,				\
-+	.reset      = bar_reset,			\
-+	.release    = bar_release,			\
-+	.u.dw.read  = bar_read,				\
-+	.u.dw.write = bar_write,			\
-+	}
-+
-+#define CFG_FIELD_ROM(reg_offset)			\
-+	{						\
-+	.offset     = reg_offset,			\
-+	.size       = 4,				\
-+	.init       = rom_init,				\
-+	.reset      = bar_reset,			\
-+	.release    = bar_release,			\
-+	.u.dw.read  = bar_read,				\
-+	.u.dw.write = rom_write,			\
-+	}
- 
- static const struct config_field header_0[] = {
- 	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
-diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
-index 7f04f11..5a7e4cc 100644
---- a/drivers/xen/pciback/controller.c
-+++ b/drivers/xen/pciback/controller.c
-@@ -378,7 +378,7 @@ int pciback_publish_pci_roots(struct pciback_device *pdev,
- 	}
+-/* Types of netif_extra_info descriptors. */
+-#define XEN_NETIF_EXTRA_TYPE_NONE  (0)  /* Never used - invalid */
+-#define XEN_NETIF_EXTRA_TYPE_GSO   (1)  /* u.gso */
+-#define XEN_NETIF_EXTRA_TYPE_MAX   (2)
++/* Types of xen_netif_extra_info descriptors. */
++#define XEN_NETIF_EXTRA_TYPE_NONE	(0)  /* Never used - invalid */
++#define XEN_NETIF_EXTRA_TYPE_GSO	(1)  /* u.gso */
++#define XEN_NETIF_EXTRA_TYPE_MAX	(2)
  
- 	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
--			    "%lx", (sizeof(struct acpi_resource) * 2) + 1);
-+			    "%lx", (sizeof(struct acpi_resource) *2) + 1);
+-/* netif_extra_info flags. */
+-#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
+-#define XEN_NETIF_EXTRA_FLAG_MORE  (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
++/* xen_netif_extra_info flags. */
++#define _XEN_NETIF_EXTRA_FLAG_MORE	(0)
++#define  XEN_NETIF_EXTRA_FLAG_MORE	(1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
  
- out:
- 	spin_unlock(&dev_data->lock);
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index 88c7ca1..c8f6f29 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -13,7 +13,7 @@
- #include <linux/pci.h>
- #include <linux/wait.h>
- #include <linux/sched.h>
--#include <asm/atomic.h>
-+#include <linux/atomic.h>
- #include <xen/events.h>
- #include <asm/xen/pci.h>
- #include <asm/xen/hypervisor.h>
-@@ -603,7 +603,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
- 	if (test_bit(_XEN_PCIF_active,
- 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
- 		dev_dbg(&psdev->dev->dev,
--			"schedule pci_conf service in pciback \n");
-+			"schedule pci_conf service in pciback\n");
- 		test_and_schedule_op(psdev->pdev);
- 	}
+ /* GSO types - only TCPv4 currently supported. */
+-#define XEN_NETIF_GSO_TYPE_TCPV4        (1)
++#define XEN_NETIF_GSO_TYPE_TCPV4	(1)
  
-@@ -1055,7 +1055,8 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
- 		if (!dev_data)
- 			continue;
- 		count +=
--		    scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing:%ld\n",
-+		    scnprintf(buf + count, PAGE_SIZE - count,
-+			      "%s:%s:%sing:%ld\n",
- 			      pci_name(psdev->dev),
- 			      dev_data->isr_on ? "on" : "off",
- 			      dev_data->ack_intr ? "ack" : "not ack",
-diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
-index fc31052..5c14020 100644
---- a/drivers/xen/pciback/pciback.h
-+++ b/drivers/xen/pciback/pciback.h
-@@ -12,7 +12,7 @@
- #include <linux/list.h>
- #include <linux/spinlock.h>
- #include <linux/workqueue.h>
--#include <asm/atomic.h>
-+#include <linux/atomic.h>
- #include <xen/interface/io/pciif.h>
+ /*
+  * This structure needs to fit within both netif_tx_request and
+@@ -107,7 +107,7 @@ struct xen_netif_extra_info {
  
- struct pci_dev_entry {
-@@ -20,8 +20,8 @@ struct pci_dev_entry {
- 	struct pci_dev *dev;
+ struct xen_netif_tx_response {
+ 	uint16_t id;
+-	int16_t  status;       /* NETIF_RSP_* */
++	int16_t  status;       /* XEN_NETIF_RSP_* */
  };
  
--#define _PDEVF_op_active 	(0)
--#define PDEVF_op_active 	(1<<(_PDEVF_op_active))
-+#define _PDEVF_op_active	(0)
-+#define PDEVF_op_active		(1<<(_PDEVF_op_active))
- #define _PCIB_op_pending	(1)
- #define PCIB_op_pending		(1<<(_PCIB_op_pending))
- 
-@@ -45,11 +45,11 @@ struct pciback_device {
- 
- struct pciback_dev_data {
- 	struct list_head config_fields;
--	unsigned int permissive : 1;
--	unsigned int warned_on_write : 1;
--	unsigned int enable_intx : 1;
--	unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */ 
--	unsigned int ack_intr : 1; /* .. and ACK-ing */
-+	unsigned int permissive:1;
-+	unsigned int warned_on_write:1;
-+	unsigned int enable_intx:1;
-+	unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
-+	unsigned int ack_intr:1; /* .. and ACK-ing */
- 	unsigned long handled;
- 	unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
- 	char irq_name[0]; /* pciback[000:04:00.0] */
-diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
-index 2c5a90d..6c398fd 100644
---- a/drivers/xen/pciback/pciback_ops.c
-+++ b/drivers/xen/pciback/pciback_ops.c
-@@ -63,12 +63,11 @@ void pciback_control_isr(struct pci_dev *dev, int reset)
- 				dev_data->irq_name, dev);
- 		if (rc) {
- 			dev_err(&dev->dev, "%s: failed to install fake IRQ " \
--				"handler for IRQ %d! (rc:%d)\n", dev_data->irq_name,
--				dev_data->irq, rc);
-+				"handler for IRQ %d! (rc:%d)\n",
-+				dev_data->irq_name, dev_data->irq, rc);
- 			goto out;
- 		}
--	}
--	else {
-+	} else {
- 		free_irq(dev_data->irq, dev);
- 		dev_data->irq = 0;
- 	}
-@@ -188,7 +187,7 @@ void pciback_do_op(struct work_struct *data)
- 			break;
- #endif
- 		default:
--			op->err = XEN_PCI_ERR_not_implemented;	
-+			op->err = XEN_PCI_ERR_not_implemented;
- 			break;
- 		}
- 	}
-diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
-index 993b659..70030c4 100644
---- a/drivers/xen/pciback/xenbus.c
-+++ b/drivers/xen/pciback/xenbus.c
-@@ -700,12 +700,12 @@ static const struct xenbus_device_id xenpci_ids[] = {
+ struct xen_netif_rx_request {
+@@ -116,25 +116,29 @@ struct xen_netif_rx_request {
  };
  
- static struct xenbus_driver xenbus_pciback_driver = {
--	.name 			= "pciback",
--	.owner 			= THIS_MODULE,
--	.ids 			= xenpci_ids,
--	.probe 			= pciback_xenbus_probe,
--	.remove 		= pciback_xenbus_remove,
--	.otherend_changed 	= pciback_frontend_changed,
-+	.name			= "pciback",
-+	.owner			= THIS_MODULE,
-+	.ids			= xenpci_ids,
-+	.probe			= pciback_xenbus_probe,
-+	.remove			= pciback_xenbus_remove,
-+	.otherend_changed	= pciback_frontend_changed,
- };
+ /* Packet data has been validated against protocol checksum. */
+-#define _NETRXF_data_validated (0)
+-#define  NETRXF_data_validated (1U<<_NETRXF_data_validated)
++#define _XEN_NETRXF_data_validated	(0)
++#define  XEN_NETRXF_data_validated	(1U<<_XEN_NETRXF_data_validated)
  
- int __init pciback_xenbus_register(void)
-@@ -713,7 +713,7 @@ int __init pciback_xenbus_register(void)
- 	pciback_wq = create_workqueue("pciback_workqueue");
- 	if (!pciback_wq) {
- 		printk(KERN_ERR "%s: create"
--			"pciback_workqueue failed\n",__FUNCTION__);
-+			"pciback_workqueue failed\n", __func__);
- 		return -EFAULT;
- 	}
- 	return xenbus_register_backend(&xenbus_pciback_driver);
--- 
-1.7.4
-
-
-From 32bc28c28fa60442d2d1fff6be11344a22db95a3 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Mon, 13 Dec 2010 11:30:29 -0500
-Subject: [PATCH 198/203] xen/xen-pciback: Swap over to DEFINE_PCI_DEVICE_TABLE
-
-[v2: Jan Beulich recommendation to not drop 'static']
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/pci_stub.c |    2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
-
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-index c8f6f29..c4d1071 100644
---- a/drivers/xen/pciback/pci_stub.c
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -497,7 +497,7 @@ static void pcistub_remove(struct pci_dev *dev)
- 	}
- }
+ /* Protocol checksum field is blank in the packet (hardware offload)? */
+-#define _NETRXF_csum_blank     (1)
+-#define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)
++#define _XEN_NETRXF_csum_blank		(1)
++#define  XEN_NETRXF_csum_blank		(1U<<_XEN_NETRXF_csum_blank)
  
--static const struct pci_device_id pcistub_ids[] = {
-+static DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = {
- 	{
- 	 .vendor = PCI_ANY_ID,
- 	 .device = PCI_ANY_ID,
--- 
-1.7.4
-
-
-From 81436578eb9dad88e83f88412f3aed916e8bb89e Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 16 Feb 2011 13:44:38 -0500
-Subject: [PATCH 199/203] xen/pciback: Utilize 'xen_pirq_from_irq' to get PIRQ value.
-
-In the past the Xen IRQ code had overloaded the irq->gsi with
-ton of uses. The 'gsi' would be a real GSI, an vector value, or
-a Xen PIRQ value. We were quite dilligient in having the correct
-value there for the guest, but with the IRQ rework code this
-overloading is gone. As so, use the correct function to retrieve
-the PIRQ value.
-
-Handling of invalid values is fixed in the front-end with patch:
-"xen-pcifront: Sanity check the MSI/MSI-X values"
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/conf_space_capability_msi.c |   26 +++++++++++++++-------
- 1 files changed, 18 insertions(+), 8 deletions(-)
-
-diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
-index 3acda69..c9b0aba 100644
---- a/drivers/xen/pciback/conf_space_capability_msi.c
-+++ b/drivers/xen/pciback/conf_space_capability_msi.c
-@@ -30,7 +30,8 @@ int pciback_enable_msi(struct pciback_device *pdev,
+ /* Packet continues in the next request descriptor. */
+-#define _NETRXF_more_data      (2)
+-#define  NETRXF_more_data      (1U<<_NETRXF_more_data)
++#define _XEN_NETRXF_more_data		(2)
++#define  XEN_NETRXF_more_data		(1U<<_XEN_NETRXF_more_data)
  
- 	/* The value the guest needs is actually the IDT vector, not the
- 	 * the local domain's IRQ number. */
--	op->value = xen_gsi_from_irq(dev->irq);
+ /* Packet to be followed by extra descriptor(s). */
+-#define _NETRXF_extra_info     (3)
+-#define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
++#define _XEN_NETRXF_extra_info		(3)
++#define  XEN_NETRXF_extra_info		(1U<<_XEN_NETRXF_extra_info)
 +
-+	op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
- 	dev_data = pci_get_drvdata(dev);
- 	if (dev_data)
- 		dev_data->ack_intr = 0;
-@@ -47,7 +48,7 @@ int pciback_disable_msi(struct pciback_device *pdev,
- 		printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev));
- 	pci_disable_msi(dev);
- 
--	op->value = xen_gsi_from_irq(dev->irq);
-+	op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
- 	dev_data = pci_get_drvdata(dev);
- 	if (dev_data)
- 		dev_data->ack_intr = 1;
-@@ -77,12 +78,17 @@ int pciback_enable_msix(struct pciback_device *pdev,
- 
- 	result = pci_enable_msix(dev, entries, op->value);
++/* GSO Prefix descriptor. */
++#define _XEN_NETRXF_gso_prefix		(4)
++#define  XEN_NETRXF_gso_prefix		(1U<<_XEN_NETRXF_gso_prefix)
  
--	for (i = 0; i < op->value; i++) {
--		op->msix_entries[i].entry = entries[i].entry;
--		op->msix_entries[i].vector =
--					xen_gsi_from_irq(entries[i].vector);
-+	if (result == 0) {
-+		for (i = 0; i < op->value; i++) {
-+			op->msix_entries[i].entry = entries[i].entry;
-+			if (entries[i].vector)
-+				op->msix_entries[i].vector =
-+					xen_pirq_from_irq(entries[i].vector);
-+		}
-+	} else {
-+		printk(KERN_WARNING "pciback: %s: failed to enable MSI-X: err %d!\n",
-+			pci_name(dev), result);
- 	}
--
- 	kfree(entries);
+ struct xen_netif_rx_response {
+     uint16_t id;
+     uint16_t offset;       /* Offset in page of start of received packet  */
+-    uint16_t flags;        /* NETRXF_* */
++    uint16_t flags;        /* XEN_NETRXF_* */
+     int16_t  status;       /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
+ };
  
- 	op->value = result;
-@@ -102,7 +108,11 @@ int pciback_disable_msix(struct pciback_device *pdev,
- 			pci_name(dev));
- 	pci_disable_msix(dev);
+@@ -149,10 +153,10 @@ DEFINE_RING_TYPES(xen_netif_rx,
+ 		  struct xen_netif_rx_request,
+ 		  struct xen_netif_rx_response);
  
--	op->value = xen_gsi_from_irq(dev->irq);
-+	/*
-+	 * SR-IOV devices (which don't have any legacy IRQ) have
-+	 * an undefined IRQ value of zero.
-+	 */
-+	op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
- 	dev_data = pci_get_drvdata(dev);
- 	if (dev_data)
- 		dev_data->ack_intr = 1;
--- 
-1.7.4
-
-
-From 2e7c6959832ac7aa7551a18ee919ce522444bac1 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Wed, 16 Feb 2011 15:43:25 -0500
-Subject: [PATCH 200/203] xen/pciback: Don't setup an fake IRQ handler for SR-IOV devices.
-
-If we try to setup an fake IRQ handler for legacy interrupts
-for devices that only have MSI-X (most if not all SR-IOV cards),
-we will fail with this:
-
-pciback[0000:01:10.0]: failed to install fake IRQ handler for IRQ 0! (rc:-38)
-
-Since those cards don't have anything in dev->irq.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/pciback_ops.c |    7 +++++++
- 1 files changed, 7 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
-index 6c398fd..28a2a55 100644
---- a/drivers/xen/pciback/pciback_ops.c
-+++ b/drivers/xen/pciback/pciback_ops.c
-@@ -48,6 +48,13 @@ void pciback_control_isr(struct pci_dev *dev, int reset)
- 	if (enable)
- 		dev_data->irq = dev->irq;
+-#define NETIF_RSP_DROPPED         -2
+-#define NETIF_RSP_ERROR           -1
+-#define NETIF_RSP_OKAY             0
+-/* No response: used for auxiliary requests (e.g., netif_tx_extra). */
+-#define NETIF_RSP_NULL             1
++#define XEN_NETIF_RSP_DROPPED	-2
++#define XEN_NETIF_RSP_ERROR	-1
++#define XEN_NETIF_RSP_OKAY	 0
++/* No response: used for auxiliary requests (e.g., xen_netif_extra_info). */
++#define XEN_NETIF_RSP_NULL	 1
  
-+	/*
-+	 * SR-IOV devices in all use MSI-X and have no legacy
-+	 * interrupts, so inhibit creating a fake IRQ handler for them.
-+	 */
-+	if (dev_data->irq == 0)
-+		goto out;
-+
- 	dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
- 		dev_data->irq_name,
- 		dev_data->irq,
--- 
-1.7.4
-
-
-From e0c91c10d4a258c83e1cc593d53338099a49a5c6 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 17 Feb 2011 11:18:00 -0500
-Subject: [PATCH 201/203] xen/pciback: Print out the MSI/MSI-X (PIRQ) values when allocating
-
-If the verbose_request is set (and loglevel high enought), print out
-the MSI/MSI-X values sent to the guest. This should aid in debugging
-issues.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
----
- drivers/xen/pciback/conf_space_capability_msi.c |   15 +++++++++++++++
- 1 files changed, 15 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
-index c9b0aba..041e4aa 100644
---- a/drivers/xen/pciback/conf_space_capability_msi.c
-+++ b/drivers/xen/pciback/conf_space_capability_msi.c
-@@ -32,6 +32,10 @@ int pciback_enable_msi(struct pciback_device *pdev,
- 	 * the local domain's IRQ number. */
+ #endif
+diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
+index 2befa3e..b33257b 100644
+--- a/include/xen/interface/xen.h
++++ b/include/xen/interface/xen.h
+@@ -30,7 +30,7 @@
+ #define __HYPERVISOR_stack_switch          3
+ #define __HYPERVISOR_set_callbacks         4
+ #define __HYPERVISOR_fpu_taskswitch        5
+-#define __HYPERVISOR_sched_op              6
++#define __HYPERVISOR_sched_op_compat       6
+ #define __HYPERVISOR_dom0_op               7
+ #define __HYPERVISOR_set_debugreg          8
+ #define __HYPERVISOR_get_debugreg          9
+@@ -52,7 +52,7 @@
+ #define __HYPERVISOR_mmuext_op            26
+ #define __HYPERVISOR_acm_op               27
+ #define __HYPERVISOR_nmi_op               28
+-#define __HYPERVISOR_sched_op_new         29
++#define __HYPERVISOR_sched_op             29
+ #define __HYPERVISOR_callback_op          30
+ #define __HYPERVISOR_xenoprof_op          31
+ #define __HYPERVISOR_event_channel_op     32
+diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
+index 98b9215..03c85d7 100644
+--- a/include/xen/xen-ops.h
++++ b/include/xen/xen-ops.h
+@@ -5,9 +5,9 @@
+ 
+ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
+ 
+-void xen_pre_suspend(void);
+-void xen_post_suspend(int suspend_cancelled);
+-void xen_hvm_post_suspend(int suspend_cancelled);
++void xen_arch_pre_suspend(void);
++void xen_arch_post_suspend(int suspend_cancelled);
++void xen_arch_hvm_post_suspend(int suspend_cancelled);
+ 
+ void xen_mm_pin_all(void);
+ void xen_mm_unpin_all(void);
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index 9033c1c..2782bac 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -282,8 +282,17 @@ EXPORT_SYMBOL(disable_irq);
  
- 	op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
-+	if (unlikely(verbose_request))
-+		printk(KERN_DEBUG "pciback: %s: MSI: %d\n", pci_name(dev),
-+			op->value);
-+
- 	dev_data = pci_get_drvdata(dev);
- 	if (dev_data)
- 		dev_data->ack_intr = 0;
-@@ -49,6 +53,9 @@ int pciback_disable_msi(struct pciback_device *pdev,
- 	pci_disable_msi(dev);
+ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
+ {
+-	if (resume)
++	if (resume) {
++		if (!(desc->status & IRQ_SUSPENDED)) {
++			if (!desc->action)
++				return;
++			if (!(desc->action->flags & IRQF_FORCE_RESUME))
++				return;
++			/* Pretend that it got disabled ! */
++			desc->depth++;
++		}
+ 		desc->status &= ~IRQ_SUSPENDED;
++	}
  
- 	op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
-+	if (unlikely(verbose_request))
-+		printk(KERN_DEBUG "pciback: %s: MSI: %d\n", pci_name(dev),
-+			op->value);
- 	dev_data = pci_get_drvdata(dev);
- 	if (dev_data)
- 		dev_data->ack_intr = 1;
-@@ -84,6 +91,11 @@ int pciback_enable_msix(struct pciback_device *pdev,
- 			if (entries[i].vector)
- 				op->msix_entries[i].vector =
- 					xen_pirq_from_irq(entries[i].vector);
-+				if (unlikely(verbose_request))
-+					printk(KERN_DEBUG "pciback: %s: " \
-+						"MSI-X[%d]: %d\n",
-+						pci_name(dev), i,
-+						op->msix_entries[i].vector);
- 		}
- 	} else {
- 		printk(KERN_WARNING "pciback: %s: failed to enable MSI-X: err %d!\n",
-@@ -113,6 +125,9 @@ int pciback_disable_msix(struct pciback_device *pdev,
- 	 * an undefined IRQ value of zero.
- 	 */
- 	op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
-+	if (unlikely(verbose_request))
-+		printk(KERN_DEBUG "pciback: %s: MSI-X: %d\n", pci_name(dev),
-+			op->value);
- 	dev_data = pci_get_drvdata(dev);
- 	if (dev_data)
- 		dev_data->ack_intr = 1;
--- 
-1.7.4
-
-
-From eca446355aca91790d71a53a23ff10533b895f02 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Date: Thu, 24 Feb 2011 14:34:31 -0500
-Subject: [PATCH 202/203] xen-pciback: Enable Xen-PCI-back to be compiled.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
-Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge at citrix.com>
-
-Conflicts:
-
-	drivers/xen/Kconfig
-	drivers/xen/Makefile
----
- drivers/xen/Kconfig  |   65 ++++++++++++++++++++++++++++++++++++++++++++++++++
- drivers/xen/Makefile |    1 +
- 2 files changed, 66 insertions(+), 0 deletions(-)
-
-diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
-index 07bec09..ff57e97 100644
---- a/drivers/xen/Kconfig
-+++ b/drivers/xen/Kconfig
-@@ -37,6 +37,71 @@ config XEN_BACKEND
- 	  Support for backend device drivers that provide I/O services
- 	  to other virtual machines.
+ 	switch (desc->depth) {
+ 	case 0:
+diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
+index 0d4005d8..d6bfb89 100644
+--- a/kernel/irq/pm.c
++++ b/kernel/irq/pm.c
+@@ -53,9 +53,6 @@ void resume_device_irqs(void)
+ 	for_each_irq_desc(irq, desc) {
+ 		unsigned long flags;
  
-+config XEN_PCIDEV_BACKEND
-+	tristate "PCI-device backend driver"
-+	depends on PCI
-+	depends on XEN_BACKEND
-+	help
-+	  The PCI device backend driver allows the kernel to export arbitrary
-+	  PCI devices to other guests. If you select this to be a module, you
-+	  will need to make sure no other driver has bound to the device(s)
-+	  you want to make visible to other guests.
-+
-+choice
-+	prompt "PCI Backend Mode"
-+	depends on XEN_PCIDEV_BACKEND
-+	default XEN_PCIDEV_BACKEND_VPCI if !IA64
-+	default XEN_PCIDEV_BACKEND_CONTROLLER if IA64
-+
-+config XEN_PCIDEV_BACKEND_VPCI
-+	bool "Virtual PCI"
-+	---help---
-+	  This PCI Backend hides the true PCI topology and makes the frontend
-+	  think there is a single PCI bus with only the exported devices on it.
-+	  For example, a device at 03:05.0 will be re-assigned to 00:00.0. A
-+	  second device at 02:1a.1 will be re-assigned to 00:01.1.
-+
-+config XEN_PCIDEV_BACKEND_PASS
-+	bool "Passthrough"
-+	---help---
-+	  This PCI Backend provides a real view of the PCI topology to the
-+	  frontend (for example, a device at 06:01.b will still appear at
-+	  06:01.b to the frontend). This is similar to how Xen 2.0.x exposed
-+	  PCI devices to its driver domains. This may be required for drivers
-+	  which depend on finding their hardward in certain bus/slot
-+	  locations.
-+
-+config XEN_PCIDEV_BACKEND_SLOT
-+	bool "Slot"
-+	---help---
-+	  This PCI Backend hides the true PCI topology and makes the frontend
-+	  think there is a single PCI bus with only the exported devices on it.
-+	  Contrary to the virtual PCI backend, a function becomes a new slot.
-+	  For example, a device at 03:05.2 will be re-assigned to 00:00.0. A
-+	  second device at 02:1a.1 will be re-assigned to 00:01.0.
-+
-+config XEN_PCIDEV_BACKEND_CONTROLLER
-+	bool "Controller"
-+	depends on IA64
-+	---help---
-+	  This PCI backend virtualizes the PCI bus topology by providing a
-+	  virtual bus per PCI root device.  Devices which are physically under
-+	  the same root bus will appear on the same virtual bus.  For systems
-+	  with complex I/O addressing, this is the only backend which supports
-+	  extended I/O port spaces and MMIO translation offsets.  This backend
-+	  also supports slot virtualization.  For example, a device at
-+	  0000:01:02.1 will be re-assigned to 0000:00:00.0.  A second device
-+	  at 0000:02:05.0 (behind a P2P bridge on bus 0000:01) will be
-+	  re-assigned to 0000:00:01.0.  A third device at 0000:16:05.0 (under
-+	  a different PCI root bus) will be re-assigned to 0000:01:00.0.
-+
-+endchoice
-+
-+config XEN_PCIDEV_BE_DEBUG
-+	bool "PCI Backend Debugging"
-+	depends on XEN_PCIDEV_BACKEND
-+
-+
- config XENFS
- 	tristate "Xen filesystem"
- 	default y
-diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
-index 5088cc2..b8b93c6 100644
---- a/drivers/xen/Makefile
-+++ b/drivers/xen/Makefile
-@@ -9,6 +9,7 @@ obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
- obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
- obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
- obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
-+obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
- obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
- obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
- obj-$(CONFIG_XENFS)		+= xenfs/
--- 
-1.7.4
-
-
+-		if (!(desc->status & IRQ_SUSPENDED))
+-			continue;
+-
+ 		raw_spin_lock_irqsave(&desc->lock, flags);
+ 		__enable_irq(desc, irq, true);
+ 		raw_spin_unlock_irqrestore(&desc->lock, flags);


More information about the scm-commits mailing list