[oprofile] - Corrections for i386/arch_perfmon filters. - Make nehalem events available. - Add AMD family 12/14

William Eden Cohen wcohen at fedoraproject.org
Thu Jan 6 16:23:48 UTC 2011


commit 845eee59efb803006817435011ac150e0a5df8aa
Author: William Cohen <wcohen at redhat.com>
Date:   Thu Jan 6 11:23:13 2011 -0500

    - Corrections for i386/arch_perfmon filters.
    - Make nehalem events available.
    - Add AMD family 12/14/15h support.
    - Add Intel westemere support.
    - opcontrol numeric argument checking.

 oprofile-amd.patch      | 2147 +++++++++++++++++++++++++++++++++++++++++++++++
 oprofile-check.patch    |   82 ++
 oprofile-iaperf.patch   |   16 +
 oprofile-nehalem.patch  |   46 +
 oprofile-westmere.patch |  676 +++++++++++++++
 oprofile.spec           |   19 +-
 6 files changed, 2985 insertions(+), 1 deletions(-)
---
diff --git a/oprofile-amd.patch b/oprofile-amd.patch
new file mode 100644
index 0000000..80d629b
--- /dev/null
+++ b/oprofile-amd.patch
@@ -0,0 +1,2147 @@
+Index: oprofile/events/Makefile.am
+diff -u oprofile/events/Makefile.am:1.34 oprofile/events/Makefile.am:1.35
+--- oprofile/events/Makefile.am:1.34	Wed Oct 13 18:07:09 2010
++++ oprofile/events/Makefile.am	Mon Nov 29 14:52:36 2010
+@@ -35,6 +35,9 @@
+ 	x86-64/hammer/events x86-64/hammer/unit_masks \
+ 	x86-64/family10/events x86-64/family10/unit_masks \
+ 	x86-64/family11h/events x86-64/family11h/unit_masks \
++	x86-64/family12h/events x86-64/family12h/unit_masks \
++	x86-64/family14h/events x86-64/family14h/unit_masks \
++	x86-64/family15h/events x86-64/family15h/unit_masks \
+ 	arm/xscale1/events arm/xscale1/unit_masks \
+ 	arm/xscale2/events arm/xscale2/unit_masks \
+ 	arm/armv6/events arm/armv6/unit_masks \
+--- /dev/null	2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/x86-64/family12h/events	2011-01-03 14:37:37.384309624 -0500
+@@ -0,0 +1,23 @@
++# AMD Generic performance events
++#
++# Copyright OProfile authors
++# Copyright (c) 2006-2010 Advanced Micro Devices
++# Contributed by Ray Bryant <raybry at amd.com>,
++#		Jason Yeh <jason.yeh at amd.com>
++#		Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
++#
++# Revision: 1.0
++#
++# ChangeLog: 
++#	1.0: 30 August 2010.
++#	- Initial revision
++#
++event:0x40 counters:0,1,2,3 um:zero minimum:500 name:DATA_CACHE_ACCESSES : Data cache accesses
++event:0x41 counters:0,1,2,3 um:zero minimum:500 name:DATA_CACHE_MISSES : Data cache misses
++event:0x42 counters:0,1,2,3 um:moess minimum:500 name:DATA_CACHE_REFILLS_FROM_L2_OR_NORTHBRIDGE : Data cache refills from L2 or Northbridge
++event:0x43 counters:0,1,2,3 um:moesi minimum:500 name:DATA_CACHE_REFILLS_FROM_NORTHBRIDGE : Data cache refills from Northbridge
++event:0x76 counters:0,1,2,3 um:zero minimum:50000 name:CPU_CLK_UNHALTED : Cycles outside of halt state
++event:0xc0 counters:0,1,2,3 um:zero minimum:50000 name:RETIRED_INSTRUCTIONS : Retired instructions (includes exceptions, interrupts, re-syncs)
++event:0xc1 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_UOPS : Retired micro-ops
++event:0xc2 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_BRANCH_INSTRUCTIONS : Retired branches (conditional, unconditional, exceptions, interrupts)
++event:0xc3 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS : Retired mispredicted branch instructions
+--- /dev/null	2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/x86-64/family12h/unit_masks	2011-01-03 14:37:38.374285883 -0500
+@@ -0,0 +1,30 @@
++# AMD Generic unit masks
++#
++# Copyright OProfile authors
++# Copyright (c) 2006-2010 Advanced Micro Devices
++# Contributed by Ray Bryant <raybry at amd.com>,
++#		Jason Yeh <jason.yeh at amd.com>
++#		Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
++#
++# Revision: 1.0
++#
++# ChangeLog: 
++#	1.0: 30 August 2010.
++#	- Initial revision
++#
++name:zero type:mandatory default:0x0
++	0x0 No unit mask
++name:moesi type:bitmask default:0x1f
++	0x01 (I)nvalid cache state
++	0x02 (S)hared cache state
++	0x04 (E)xclusive cache state
++	0x08 (O)wner cache state
++	0x10 (M)odified cache state
++	0x1f All cache states
++name:moess type:bitmask default:0x1e
++	0x01 Refill from northbridge
++	0x02 Shared-state line from L2
++	0x04 Exclusive-state line from L2
++	0x08 Owner-state line from L2
++	0x10 Modified-state line from L2
++	0x1e All cache states except refill from northbridge
+--- /dev/null	2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/x86-64/family14h/events	2011-01-03 14:37:39.383445877 -0500
+@@ -0,0 +1,23 @@
++# AMD Generic performance events
++#
++# Copyright OProfile authors
++# Copyright (c) 2006-2010 Advanced Micro Devices
++# Contributed by Ray Bryant <raybry at amd.com>,
++#		Jason Yeh <jason.yeh at amd.com>
++#		Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
++#
++# Revision: 1.0
++#
++# ChangeLog: 
++#	1.0: 30 August 2010.
++#	- Initial revision
++#
++event:0x40 counters:0,1,2,3 um:zero minimum:500 name:DATA_CACHE_ACCESSES : Data cache accesses
++event:0x41 counters:0,1,2,3 um:zero minimum:500 name:DATA_CACHE_MISSES : Data cache misses
++event:0x42 counters:0,1,2,3 um:moess minimum:500 name:DATA_CACHE_REFILLS_FROM_L2_OR_NORTHBRIDGE : Data cache refills from L2 or Northbridge
++event:0x43 counters:0,1,2,3 um:moesi minimum:500 name:DATA_CACHE_REFILLS_FROM_NORTHBRIDGE : Data cache refills from Northbridge
++event:0x76 counters:0,1,2,3 um:zero minimum:50000 name:CPU_CLK_UNHALTED : Cycles outside of halt state
++event:0xc0 counters:0,1,2,3 um:zero minimum:50000 name:RETIRED_INSTRUCTIONS : Retired instructions (includes exceptions, interrupts, re-syncs)
++event:0xc1 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_UOPS : Retired micro-ops
++event:0xc2 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_BRANCH_INSTRUCTIONS : Retired branches (conditional, unconditional, exceptions, interrupts)
++event:0xc3 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS : Retired mispredicted branch instructions
+--- /dev/null	2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/x86-64/family14h/unit_masks	2011-01-03 14:37:40.384085285 -0500
+@@ -0,0 +1,30 @@
++# AMD Generic unit masks
++#
++# Copyright OProfile authors
++# Copyright (c) 2006-2010 Advanced Micro Devices
++# Contributed by Ray Bryant <raybry at amd.com>,
++#		Jason Yeh <jason.yeh at amd.com>
++#		Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
++#
++# Revision: 1.0
++#
++# ChangeLog: 
++#	1.0: 30 August 2010.
++#	- Initial revision
++#
++name:zero type:mandatory default:0x0
++	0x0 No unit mask
++name:moesi type:bitmask default:0x1f
++	0x01 (I)nvalid cache state
++	0x02 (S)hared cache state
++	0x04 (E)xclusive cache state
++	0x08 (O)wner cache state
++	0x10 (M)odified cache state
++	0x1f All cache states
++name:moess type:bitmask default:0x1e
++	0x01 Refill from northbridge
++	0x02 Shared-state line from L2
++	0x04 Exclusive-state line from L2
++	0x08 Owner-state line from L2
++	0x10 Modified-state line from L2
++	0x1e All cache states except refill from northbridge
+--- /dev/null	2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/x86-64/family15h/events	2011-01-03 14:37:41.391223732 -0500
+@@ -0,0 +1,16 @@
++# AMD Generic performance events
++#
++# Copyright OProfile authors
++# Copyright (c) 2006-2010 Advanced Micro Devices
++# Contributed by Ray Bryant <raybry at amd.com>,
++#		Jason Yeh <jason.yeh at amd.com>
++#		Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
++#
++# Revision: 1.0
++#
++# ChangeLog: 
++#	1.0: 30 August 2010.
++#	- Initial revision
++#
++event:0x76 counters:0,1,2 um:zero minimum:50000 name:CPU_CLK_UNHALTED : Cycles outside of halt state
++event:0xc0 counters:0,1,2,3,4,5 um:zero minimum:50000 name:RETIRED_INSTRUCTIONS : Retired instructions (includes exceptions, interrupts, re-syncs)
+--- /dev/null	2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/x86-64/family15h/unit_masks	2011-01-03 14:37:42.390283478 -0500
+@@ -0,0 +1,16 @@
++# AMD Generic unit masks
++#
++# Copyright OProfile authors
++# Copyright (c) 2006-2010 Advanced Micro Devices
++# Contributed by Ray Bryant <raybry at amd.com>,
++#		Jason Yeh <jason.yeh at amd.com>
++#		Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
++#
++# Revision: 1.0
++#
++# ChangeLog: 
++#	1.0: 30 August 2010.
++#	- Initial revision
++#
++name:zero type:mandatory default:0x0
++	0x0 No unit mask
+Index: oprofile/libop/op_cpu_type.c
+diff -u oprofile/libop/op_cpu_type.c:1.57 oprofile/libop/op_cpu_type.c:1.58
+--- oprofile/libop/op_cpu_type.c:1.57	Fri Aug 27 20:15:06 2010
++++ oprofile/libop/op_cpu_type.c	Mon Nov 29 14:52:36 2010
+@@ -87,6 +87,9 @@
+    	{ "Intel Core/i7", "i386/core_i7", CPU_CORE_I7, 4 },
+    	{ "Intel Atom", "i386/atom", CPU_ATOM, 2 },
+ 	{ "Intel Nehalem microarchitecture", "i386/nehalem", CPU_NEHALEM, 4 },
++	{ "AMD64 family12h", "x86-64/family12h", CPU_FAMILY12H, 4 },
++	{ "AMD64 family14h", "x86-64/family14h", CPU_FAMILY14H, 4 },
++	{ "AMD64 family15h", "x86-64/family15h", CPU_FAMILY15H, 6 },
+ };
+  
+ static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr);
+Index: oprofile/libop/op_cpu_type.h
+diff -u oprofile/libop/op_cpu_type.h:1.50 oprofile/libop/op_cpu_type.h:1.51
+--- oprofile/libop/op_cpu_type.h:1.50	Fri Aug 27 20:15:06 2010
++++ oprofile/libop/op_cpu_type.h	Mon Nov 29 14:52:36 2010
+@@ -84,6 +84,9 @@
+    	CPU_CORE_I7, /* Intel Core i7, Nehalem */
+    	CPU_ATOM, /* First generation Intel Atom */
+ 	CPU_NEHALEM, /* Intel Nehalem microarchitecture */
++	CPU_FAMILY12H, /**< AMD family 12h */
++	CPU_FAMILY14H, /**< AMD family 14h */
++	CPU_FAMILY15H, /**< AMD family 15h */
+ 	MAX_CPU_TYPE
+ } op_cpu;
+ 
+Index: oprofile/libop/op_events.c
+diff -u oprofile/libop/op_events.c:1.101 oprofile/libop/op_events.c:1.102
+--- oprofile/libop/op_events.c:1.101	Fri Aug 27 20:15:06 2010
++++ oprofile/libop/op_events.c	Mon Nov 29 14:52:36 2010
+@@ -972,6 +972,9 @@
+  		case CPU_ATOM:
+  		case CPU_CORE_I7:
+ 		case CPU_NEHALEM:
++		case CPU_FAMILY12H:
++		case CPU_FAMILY14H:
++		case CPU_FAMILY15H:
+ 			descr->name = "CPU_CLK_UNHALTED";
+ 			break;
+ 
+Index: oprofile/utils/ophelp.c
+diff -u oprofile/utils/ophelp.c:1.38 oprofile/utils/ophelp.c:1.39
+--- oprofile/utils/ophelp.c:1.38	Fri Aug 27 20:15:07 2010
++++ oprofile/utils/ophelp.c	Mon Nov 29 14:52:36 2010
+@@ -469,6 +469,18 @@
+ 			"See BIOS and Kernel Developer's Guide for AMD Family 11h Processors\n"
+ 			"(41256.pdf), Section 3.14\n\n";
+ 		break;
++	case CPU_FAMILY12H:
++		event_doc =
++			"See BIOS and Kernel Developer's Guide for AMD Family 12h Processors\n";
++		break;
++	case CPU_FAMILY14H:
++		event_doc =
++			"See BIOS and Kernel Developer's Guide for AMD Family 14h Processors\n";
++		break;
++	case CPU_FAMILY15H:
++		event_doc =
++			"See BIOS and Kernel Developer's Guide for AMD Family 15h Processors\n";
++		break;
+ 	case CPU_ATHLON:
+ 		event_doc =
+ 			"See AMD Athlon Processor x86 Code Optimization Guide\n"
+Index: oprofile/daemon/init.c
+diff -u oprofile/daemon/init.c:1.14 oprofile/daemon/init.c:1.15
+--- oprofile/daemon/init.c:1.14	Fri Aug  8 15:08:19 2008
++++ oprofile/daemon/init.c	Mon Nov 29 15:05:44 2010
+@@ -24,6 +24,7 @@
+ #include "opd_anon.h"
+ #include "opd_perfmon.h"
+ #include "opd_printf.h"
++#include "opd_extended.h"
+ 
+ #include "op_version.h"
+ #include "op_config.h"
+@@ -282,6 +283,8 @@
+ 	opd_do_jitdumps();
+ 	opd_print_stats();
+ 	printf("oprofiled stopped %s", op_get_time());
++	opd_ext_deinitialize();
++
+ 	exit(EXIT_FAILURE);
+ }
+ 
+Index: oprofile/daemon/opd_extended.c
+diff -u oprofile/daemon/opd_extended.c:1.3 oprofile/daemon/opd_extended.c:1.4
+--- oprofile/daemon/opd_extended.c:1.3	Wed May 27 19:12:48 2009
++++ oprofile/daemon/opd_extended.c	Mon Nov 29 15:05:44 2010
+@@ -109,6 +109,20 @@
+ }
+ 
+ 
++int opd_ext_deinitialize()
++{
++	int ret = EXIT_FAILURE;
++
++	if(opd_ext_feat_index == -1) {
++		return 0;
++	}
++
++	ret = ext_feature_table[opd_ext_feat_index].handlers->ext_deinit();
++
++	return ret;
++}
++
++
+ void opd_ext_print_stats()
+ {
+ 	if (is_ext_enabled()
+Index: oprofile/daemon/opd_extended.h
+diff -u oprofile/daemon/opd_extended.h:1.1 oprofile/daemon/opd_extended.h:1.2
+--- oprofile/daemon/opd_extended.h:1.1	Wed Apr  1 20:57:36 2009
++++ oprofile/daemon/opd_extended.h	Mon Nov 29 15:05:44 2010
+@@ -35,6 +35,8 @@
+ struct opd_ext_handlers {
+ 	// Extended init
+ 	int (*ext_init)(char const *);
++	// Extended deinit 
++	int (*ext_deinit)();
+ 	// Extended statistics
+ 	int (*ext_print_stats)();
+ 	// Extended sfile handlers
+@@ -61,6 +63,13 @@
+ extern int opd_ext_initialize(char const * value);
+ 
+ /**
++ * @param value: commandline input option string
++ *
++ * Deinitialize
++ */
++extern int opd_ext_deinitialize();
++
++/**
+  * Print out extended feature statistics in oprofiled.log file
+  */
+ extern void opd_ext_print_stats();
+Index: oprofile/daemon/opd_ibs.c
+diff -u oprofile/daemon/opd_ibs.c:1.2 oprofile/daemon/opd_ibs.c:1.3
+--- oprofile/daemon/opd_ibs.c:1.2	Fri Jun  5 15:26:37 2009
++++ oprofile/daemon/opd_ibs.c	Mon Nov 29 15:05:44 2010
+@@ -2,7 +2,7 @@
+  * @file daemon/opd_ibs.c
+  * AMD Family10h Instruction Based Sampling (IBS) handling.
+  *
+- * @remark Copyright 2007 OProfile authors
++ * @remark Copyright 2007-2010 OProfile authors
+  * @remark Read the file COPYING
+  *
+  * @author Jason Yeh <jason.yeh at amd.com>
+@@ -32,22 +32,37 @@
+ #include <stdio.h>
+ #include <errno.h>
+ #include <string.h>
++#include <limits.h>
++
++#if defined(__i386__) && defined(__PIC__)
++/* %ebx may be the PIC register.  */
++        #define __cpuid(level, a, b, c, d)                      \
++          __asm__ ("xchgl\t%%ebx, %1\n\t"                       \
++                   "cpuid\n\t"                                  \
++                   "xchgl\t%%ebx, %1\n\t"                       \
++                   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)     \
++                   : "0" (level))
++#else
++        #define __cpuid(level, a, b, c, d)                      \
++          __asm__ ("cpuid\n\t"                                  \
++                   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)     \
++                   : "0" (level))
++#endif
+ 
+ extern op_cpu cpu_type;
+ extern int no_event_ok;
+ extern int sfile_equal(struct sfile const * sf, struct sfile const * sf2);
+ extern void sfile_dup(struct sfile * to, struct sfile * from);
++extern char * session_dir;
+ 
+-/* IBS Select Arrays/Counters */
++/* IBS Select Counters */
+ static unsigned int ibs_selected_size;
++
++/* These flags store the IBS-derived events selection. */
+ static unsigned int ibs_fetch_selected_flag;
+-static unsigned int ibs_fetch_selected_size;
+ static unsigned int ibs_op_selected_flag;
+-static unsigned int ibs_op_selected_size;
+ static unsigned int ibs_op_ls_selected_flag;
+-static unsigned int ibs_op_ls_selected_size;
+ static unsigned int ibs_op_nb_selected_flag;
+-static unsigned int ibs_op_nb_selected_size;
+ 
+ /* IBS Statistics */
+ static unsigned long ibs_fetch_sample_stats;
+@@ -64,6 +79,18 @@
+ /* IBS Virtual Counter Index(VCI) Map*/
+ unsigned int ibs_vci_map[OP_MAX_IBS_COUNTERS];
+ 
++/* CPUID information */
++unsigned int ibs_family;
++unsigned int ibs_model;
++unsigned int ibs_stepping;
++
++/* IBS Extended MSRs */
++static unsigned long ibs_bta_enabled;
++
++/* IBS log files */
++FILE * memaccess_log;
++FILE * bta_log;
++
+ /**
+  * This function converts IBS fetch event flags and values into
+  * derived events. If the tagged (sampled) fetched caused a derived
+@@ -75,7 +102,7 @@
+ 	if (!trans_fetch)
+ 		return;
+ 
+-	trans_ibs_fetch(trans, ibs_fetch_selected_flag, ibs_fetch_selected_size);
++	trans_ibs_fetch(trans, ibs_fetch_selected_flag);
+ }
+ 
+ 
+@@ -89,9 +116,16 @@
+ 	if (!trans_op)
+ 		return;
+ 
+-	trans_ibs_op(trans, ibs_op_selected_flag, ibs_op_selected_size);
+-	trans_ibs_op_ls(trans, ibs_op_ls_selected_flag, ibs_op_ls_selected_size);
+-	trans_ibs_op_nb(trans, ibs_op_nb_selected_flag, ibs_op_nb_selected_size);
++	trans_ibs_op_mask_reserved(ibs_family, trans);
++
++	if (trans_ibs_op_rip_invalid(trans) != 0)
++		return;
++
++	trans_ibs_op(trans, ibs_op_selected_flag);
++	trans_ibs_op_ls(trans, ibs_op_ls_selected_flag);
++	trans_ibs_op_nb(trans, ibs_op_nb_selected_flag);
++	trans_ibs_op_ls_memaccess(trans);
++	trans_ibs_op_bta(trans);
+ }
+ 
+ 
+@@ -150,6 +184,26 @@
+ }
+ 
+ 
++static void get_ibs_bta_status()
++{
++	FILE * fp = NULL;
++	char buf[PATH_MAX];
++
++	/* Default to disable */
++	ibs_bta_enabled = 0;
++
++	snprintf(buf, PATH_MAX, "/dev/oprofile/ibs_op/branch_target");
++	fp = fopen(buf, "r");
++	if (!fp)
++		return;
++
++	while (fgets(buf, PATH_MAX, fp) != NULL)
++		ibs_bta_enabled = strtoul(buf, NULL, 10);	
++
++	fclose(fp);
++}
++
++
+ void code_ibs_fetch_sample(struct transient * trans)
+ {
+ 	struct ibs_fetch_sample * trans_fetch = NULL;
+@@ -169,12 +223,12 @@
+ 
+ 	trans_fetch->rip = pop_buffer_value(trans);
+ 
+-	trans_fetch->ibs_fetch_lin_addr_low = pop_buffer_value(trans);
+-	trans_fetch->ibs_fetch_lin_addr_high = pop_buffer_value(trans);
++	trans_fetch->ibs_fetch_lin_addr_low   = pop_buffer_value(trans);
++	trans_fetch->ibs_fetch_lin_addr_high  = pop_buffer_value(trans);
+ 
+-	trans_fetch->ibs_fetch_ctl_low = pop_buffer_value(trans);
+-	trans_fetch->ibs_fetch_ctl_high = pop_buffer_value(trans);
+-	trans_fetch->ibs_fetch_phys_addr_low = pop_buffer_value(trans);
++	trans_fetch->ibs_fetch_ctl_low        = pop_buffer_value(trans);
++	trans_fetch->ibs_fetch_ctl_high       = pop_buffer_value(trans);
++	trans_fetch->ibs_fetch_phys_addr_low  = pop_buffer_value(trans);
+ 	trans_fetch->ibs_fetch_phys_addr_high = pop_buffer_value(trans);
+ 
+ 	verbprintf(vsamples,
+@@ -200,6 +254,30 @@
+ }
+ 
+ 
++static void get_ibs_op_bta_sample(struct transient * trans,
++				    struct ibs_op_sample * trans_op)
++{
++	// Check remaining
++	if (!enough_remaining(trans, 2)) {
++		verbprintf(vext, "not enough remaining\n");
++		trans->remaining = 0;
++		ibs_op_incomplete_stats++;
++		return;
++	}
++
++	if (ibs_bta_enabled == 1) {
++		trans_op->ibs_op_brtgt_addr = pop_buffer_value(trans);
++	
++		// Check if branch target address is valid (MSRC001_1035[37] == 1]
++		if ((trans_op->ibs_op_data1_high & (0x00000001 << 5)) == 0) {
++			trans_op->ibs_op_brtgt_addr = 0;
++		}
++	} else {
++		trans_op->ibs_op_brtgt_addr = 0;
++	}
++}
++
++
+ void code_ibs_op_sample(struct transient * trans)
+ {
+ 	struct ibs_op_sample * trans_op= NULL;
+@@ -233,8 +311,10 @@
+ 	trans_op->ibs_op_phys_addr_low     = pop_buffer_value(trans);
+ 	trans_op->ibs_op_phys_addr_high    = pop_buffer_value(trans);
+ 
++	get_ibs_op_bta_sample(trans, trans_op);
++
+ 	verbprintf(vsamples,
+-		   "IBS_OP_X CPU:%ld PID:%d RIP:%lx D1HI:%x D1LO:%x D2LO:%x D3HI:%x D3LO:%x L_LO:%x P_LO:%x\n",
++	   "IBS_OP_X CPU:%ld PID:%d RIP:%lx D1HI:%x D1LO:%x D2LO:%x D3HI:%x D3LO:%x L_LO:%x P_LO:%x\n",
+ 		   trans->cpu,
+ 		   trans->tgid,
+ 		   trans_op->rip,
+@@ -339,16 +419,12 @@
+ 		// Grouping
+ 		if (IS_IBS_FETCH(event->val)) {
+ 			ibs_fetch_selected_flag |= 1 << IBS_FETCH_OFFSET(event->val);
+-			ibs_fetch_selected_size++;
+ 		} else if (IS_IBS_OP(event->val)) {
+ 			ibs_op_selected_flag |= 1 << IBS_OP_OFFSET(event->val);
+-			ibs_op_selected_size++;
+ 		} else if (IS_IBS_OP_LS(event->val)) {
+ 			ibs_op_ls_selected_flag |= 1 << IBS_OP_LS_OFFSET(event->val);
+-			ibs_op_ls_selected_size++;
+ 		} else if (IS_IBS_OP_NB(event->val)) {
+ 			ibs_op_nb_selected_flag |= 1 << IBS_OP_NB_OFFSET(event->val);
+-			ibs_op_nb_selected_size++;
+ 		} else {
+ 			return -1;
+ 		}
+@@ -402,7 +478,6 @@
+ }
+ 
+ 
+-
+ static int ibs_parse_and_set_um_op(char const * str, unsigned long int * ibs_op_um)
+ {
+ 	char * end = NULL;
+@@ -418,6 +493,31 @@
+ }
+ 
+ 
++static void check_cpuid_family_model_stepping()
++{
++       union {
++                unsigned eax;
++                struct {
++                        unsigned stepping : 4;
++                        unsigned model : 4;
++                        unsigned family : 4;
++                        unsigned res : 4;
++                        unsigned ext_model : 4;
++                        unsigned ext_family : 8;
++                        unsigned res2 : 4;
++                };
++        } v;
++	unsigned ebx, ecx, edx;
++
++	/* CPUID Fn0000_0001_EAX Family, Model, Stepping */
++	__cpuid(1, v.eax, ebx, ecx, edx);
++
++	ibs_family   = v.family + v.ext_family;
++	ibs_model    = v.model + v.ext_model;
++	ibs_stepping = v.stepping;
++}
++
++
+ static int ibs_init(char const * argv)
+ {
+ 	char * tmp, * ptr, * tok1, * tok2 = NULL;
+@@ -532,15 +632,67 @@
+ 
+ 	// Allow no event
+ 	no_event_ok = 1;
++
++	check_cpuid_family_model_stepping();
++
++	get_ibs_bta_status();
++
++	/* Create IBS memory access log */
++	memaccess_log = NULL;
++	if (ibs_op_um & 0x2) {
++		char filename[1024];
++		strncpy(filename, session_dir, 1023);
++		strncat(filename, "/samples/ibs_memaccess.log", 1024);
++		if ((memaccess_log = fopen(filename, "w")) == NULL) {
++			verbprintf(vext, "Warning: Cannot create file %s\n", filename);
++			
++		} else {
++			fprintf (memaccess_log, "# IBS Memory Access Log\n\n");
++			fprintf (memaccess_log, "# Format: app_cookie,cookie,cpu,tgid,tid,pc,branch-target-address,\n");
++			fprintf (memaccess_log, "#         phy-hi:phy-low,lin-hi:lin-low,accese-type,latency\n\n");
++		}
++	}
++
++	// Create IBS Branch Target Address (BTA) log	
++	bta_log = NULL;
++	if (ibs_bta_enabled) {
++		char filename[1024];
++		strncpy(filename, session_dir, 1023);
++		strncat(filename, "/samples/ibs_bta.log", 1024);
++		if ((bta_log = fopen(filename, "w")) == NULL) {
++			verbprintf(vext, "Warning: Cannot create file %s\n", filename);
++		} else {
++			fprintf (bta_log, "# IBS Memory Access Log\n\n");
++			fprintf (bta_log, "# Format: app_cookie,cookie,cpu,tgid,tid,pc,branch-target-address\n\n");
++		}
++	}
++
++	return 0;
++}
++
++
++static int ibs_deinit()
++{
++	if (memaccess_log) {
++		fclose (memaccess_log);
++		memaccess_log = NULL;
++	}
++	
++	if (bta_log) {
++		fclose (bta_log);
++		bta_log = NULL;
++	}
+ 	return 0;
+ }
+ 
+ 
+ static int ibs_print_stats()
+ {
+-	printf("Nr. IBS Fetch samples     : %lu (%lu entries)\n", ibs_fetch_sample_stats, (ibs_fetch_sample_stats * 7));
++	printf("Nr. IBS Fetch samples     : %lu (%lu entries)\n", 
++		ibs_fetch_sample_stats, (ibs_fetch_sample_stats * 7));
+ 	printf("Nr. IBS Fetch incompletes : %lu\n", ibs_fetch_incomplete_stats);
+-	printf("Nr. IBS Op samples        : %lu (%lu entries)\n", ibs_op_sample_stats, (ibs_op_sample_stats * 13));
++	printf("Nr. IBS Op samples        : %lu (%lu entries)\n", 
++		ibs_op_sample_stats, (ibs_op_sample_stats * 13));
+ 	printf("Nr. IBS Op incompletes    : %lu\n", ibs_op_incomplete_stats);
+ 	printf("Nr. IBS derived events    : %lu\n", ibs_derived_event_stats);
+ 	return 0;
+@@ -686,7 +838,8 @@
+ 
+ struct opd_ext_handlers ibs_handlers =
+ {
+-	.ext_init  = &ibs_init,
++	.ext_init        = &ibs_init,
++	.ext_deinit      = &ibs_deinit,
+ 	.ext_print_stats = &ibs_print_stats,
+-	.ext_sfile = &ibs_sfile_handlers
++	.ext_sfile       = &ibs_sfile_handlers
+ };
+Index: oprofile/daemon/opd_ibs.h
+diff -u oprofile/daemon/opd_ibs.h:1.1 oprofile/daemon/opd_ibs.h:1.2
+--- oprofile/daemon/opd_ibs.h:1.1	Fri Apr 17 18:40:43 2009
++++ oprofile/daemon/opd_ibs.h	Mon Nov 29 15:05:44 2010
+@@ -2,7 +2,7 @@
+  * @file daemon/opd_ibs.h
+  * AMD Family10h Instruction Based Sampling (IBS) handling.
+  *
+- * @remark Copyright 2008 OProfile authors
++ * @remark Copyright 2008-2010 OProfile authors
+  * @remark Read the file COPYING
+  *
+  * @author Jason Yeh <jason.yeh at amd.com>
+@@ -70,18 +70,14 @@
+ 	/* MSRC001_1037 IBS Op Data 3 Register */
+ 	unsigned int ibs_op_data3_low;
+ 	unsigned int ibs_op_data3_high;
++	/* MSRC001_1038 IBS DC Linear Address */
+ 	unsigned int ibs_op_ldst_linaddr_low;
+ 	unsigned int ibs_op_ldst_linaddr_high;
++	/* MSRC001_1039 IBS DC Physical Address */
+ 	unsigned int ibs_op_phys_addr_low;
+ 	unsigned int ibs_op_phys_addr_high;
+-};
+-
+-
+-enum IBSL1PAGESIZE {
+-	L1TLB4K = 0,
+-	L1TLB2M,
+-	L1TLB1G,
+-	L1TLB_INVALID
++	/* MSRC001_103B IBS Branch Target Address */
++	unsigned long ibs_op_brtgt_addr;
+ };
+ 
+ 
+Index: oprofile/daemon/opd_ibs_macro.h
+diff -u oprofile/daemon/opd_ibs_macro.h:1.1 oprofile/daemon/opd_ibs_macro.h:1.2
+--- oprofile/daemon/opd_ibs_macro.h:1.1	Fri Apr 17 18:40:43 2009
++++ oprofile/daemon/opd_ibs_macro.h	Mon Nov 29 15:05:44 2010
+@@ -1,8 +1,8 @@
+ /**
+  * @file daemon/opd_ibs_macro.h
+- * AMD Family10h Instruction Based Sampling (IBS) related macro.
++ * AMD Instruction Based Sampling (IBS) related macro.
+  *
+- * @remark Copyright 2008 OProfile authors
++ * @remark Copyright 2008-2010 OProfile authors
+  * @remark Read the file COPYING
+  *
+  * @author Jason Yeh <jason.yeh at amd.com>
+@@ -16,7 +16,8 @@
+ 
+ /**
+  * The following defines are bit masks that are used to select
+- * IBS fetch event flags and values at the MSR level.
++ * IBS fetch event flags and values at the
++ * MSRC001_1030 IBS Fetch Control Register (IbsFetchCtl)
+  */
+ #define FETCH_MASK_LATENCY  0x0000ffff
+ #define FETCH_MASK_COMPLETE 0x00040000
+@@ -34,7 +35,10 @@
+  * The following defines are bit masks that are used to select
+  * IBS op event flags and values at the MSR level.
+  */
++
++/* MSRC001_1035 IBS Op Data Register (IbsOpData) */
+ #define BR_MASK_RETIRE           0x0000ffff
++#define MASK_RIP_INVALID         0x00000040
+ #define BR_MASK_BRN_RET          0x00000020
+ #define BR_MASK_BRN_MISP         0x00000010
+ #define BR_MASK_BRN_TAKEN        0x00000008
+@@ -42,17 +46,19 @@
+ #define BR_MASK_MISP_RETURN      0x00000002
+ #define BR_MASK_BRN_RESYNC       0x00000001
+ 
++/* MSRC001_1036 IBS Op Data Register (IbsOpData2) */
+ #define NB_MASK_L3_STATE         0x00000020
+ #define NB_MASK_REQ_DST_PROC     0x00000010
+ #define NB_MASK_REQ_DATA_SRC     0x00000007
+ 
++/* MSRC001_1037 IBS Op Data Register (IbsOpData3) */
+ #define DC_MASK_L2_HIT_1G        0x00080000
+ #define DC_MASK_PHY_ADDR_VALID   0x00040000
+ #define DC_MASK_LIN_ADDR_VALID   0x00020000
+ #define DC_MASK_MAB_HIT          0x00010000
+ #define DC_MASK_LOCKED_OP        0x00008000
+-#define DC_MASK_WC_MEM_ACCESS    0x00004000
+-#define DC_MASK_UC_MEM_ACCESS    0x00002000
++#define DC_MASK_UC_MEM_ACCESS    0x00004000
++#define DC_MASK_WC_MEM_ACCESS    0x00002000
+ #define DC_MASK_ST_TO_LD_CANCEL  0x00001000
+ #define DC_MASK_ST_TO_LD_FOR     0x00000800
+ #define DC_MASK_ST_BANK_CONFLICT 0x00000400
+@@ -77,10 +83,9 @@
+  * at 0xf000.
+  *
+  * The definitions in this file *must* match definitions
+- * of IBS derived events in gh-events.xml and in the
+- * oprofile AMD Family 10h events file. More information
++ * of IBS derived events. More information
+  * about IBS derived events is given in the Software Oprimization
+- * Guide for AMD Family 10h Processors.
++ * Guide.
+  */
+ 
+ /**
+@@ -108,6 +113,8 @@
+ #define IBS_FETCH_MAX            (IBS_FETCH_END - IBS_FETCH_BASE + 1)
+ #define IS_IBS_FETCH(x)          (IBS_FETCH_BASE <= x && x <= IBS_FETCH_END)
+ #define IBS_FETCH_OFFSET(x)      (x - IBS_FETCH_BASE)
++#define CHECK_FETCH_SELECTED_FLAG(x)	if ( selected_flag & (1 << IBS_FETCH_OFFSET(x)))
++
+ 
+ /**
+  * The following defines associate a 16-bit select value with an IBS
+@@ -129,6 +136,8 @@
+ #define IBS_OP_MAX                (IBS_OP_END - IBS_OP_BASE + 1)
+ #define IS_IBS_OP(x)              (IBS_OP_BASE <= x && x <= IBS_OP_END)
+ #define IBS_OP_OFFSET(x)          (x - IBS_OP_BASE)
++#define CHECK_OP_SELECTED_FLAG(x)	if ( selected_flag & (1 << IBS_OP_OFFSET(x)))
++
+ 
+ /**
+  * The following defines associate a 16-bit select value with an IBS
+@@ -166,6 +175,7 @@
+ #define IBS_OP_LS_MAX            (IBS_OP_LS_END - IBS_OP_LS_BASE + 1)
+ #define IS_IBS_OP_LS(x)          (IBS_OP_LS_BASE <= x && x <= IBS_OP_LS_END)
+ #define IBS_OP_LS_OFFSET(x)      (x - IBS_OP_LS_BASE)
++#define CHECK_OP_LS_SELECTED_FLAG(x)	if ( selected_flag & (1 << IBS_OP_LS_OFFSET(x)))
+ 
+ 
+ /**
+@@ -191,6 +201,7 @@
+ #define IBS_OP_NB_MAX            (IBS_OP_NB_END - IBS_OP_NB_BASE + 1)
+ #define IS_IBS_OP_NB(x)          (IBS_OP_NB_BASE <= x && x <= IBS_OP_NB_END)
+ #define IBS_OP_NB_OFFSET(x)      (x - IBS_OP_NB_BASE)
++#define CHECK_OP_NB_SELECTED_FLAG(x)	if ( selected_flag & (1 << IBS_OP_NB_OFFSET(x)))
+ 
+ 
+ #define OP_MAX_IBS_COUNTERS      (IBS_FETCH_MAX + IBS_OP_MAX + IBS_OP_LS_MAX + IBS_OP_NB_MAX)
+@@ -215,8 +226,18 @@
+ /** Bit 52 IbsPhyAddrValid: instruction fetch physical address valid. */
+ #define IBS_FETCH_PHYS_ADDR_VALID(x)            ((x->ibs_fetch_ctl_high & FETCH_MASK_PHY_ADDR) != 0)
+ 
++enum IBSL1PAGESIZE {
++	L1TLB4K = 0,
++	L1TLB2M,
++	L1TLB1G,
++	L1TLB_INVALID
++};
++
+ /** Bits 54:53 IbsL1TlbPgSz: instruction cache L1TLB page size. */
+ #define IBS_FETCH_TLB_PAGE_SIZE(x)              ((unsigned short)((x->ibs_fetch_ctl_high >> 21) & 0x3))
++#define IBS_FETCH_TLB_PAGE_SIZE_4K(x)           (IBS_FETCH_TLB_PAGE_SIZE(x) == L1TLB4K)
++#define IBS_FETCH_TLB_PAGE_SIZE_2M(x)           (IBS_FETCH_TLB_PAGE_SIZE(x) == L1TLB2M)
++#define IBS_FETCH_TLB_PAGE_SIZE_1G(x)           (IBS_FETCH_TLB_PAGE_SIZE(x) == L1TLB1G)
+ 
+ /** Bit 55 IbsL1TlbMiss: instruction cache L1TLB miss. */
+ #define IBS_FETCH_M_L1_TLB_MISS(x)              ((x->ibs_fetch_ctl_high & FETCH_MASK_L1_MISS) != 0)
+@@ -252,22 +273,25 @@
+ #define IBS_OP_TAG_TO_RETIRE_CYCLES(x)          ((unsigned short)((x->ibs_op_data1_low >> 16) & BR_MASK_RETIRE))
+ 
+ /** 32 op_branch_resync : resync macro-op. */
+-#define IBS_OP_OP_BRANCH_RESYNC(x)              ((x->ibs_op_data1_high & BR_MASK_BRN_RESYNC) != 0)
++#define IBS_OP_BRANCH_RESYNC(x)                 ((x->ibs_op_data1_high & BR_MASK_BRN_RESYNC) != 0)
+ 
+ /** 33 op_mispredict_return : mispredicted return macro-op. */
+-#define IBS_OP_OP_MISPREDICT_RETURN(x)          ((x->ibs_op_data1_high & BR_MASK_MISP_RETURN) != 0)
++#define IBS_OP_MISPREDICT_RETURN(x)             ((x->ibs_op_data1_high & BR_MASK_MISP_RETURN) != 0)
+ 
+ /** 34 IbsOpReturn: return macro-op. */
+-#define IBS_OP_OP_RETURN(x)                     ((x->ibs_op_data1_high & BR_MASK_RETURN) != 0)
++#define IBS_OP_RETURN(x)                        ((x->ibs_op_data1_high & BR_MASK_RETURN) != 0)
+ 
+ /** 35 IbsOpBrnTaken: taken branch macro-op. */
+-#define IBS_OP_OP_BRANCH_TAKEN(x)               ((x->ibs_op_data1_high & BR_MASK_BRN_TAKEN) != 0)
++#define IBS_OP_BRANCH_TAKEN(x)                  ((x->ibs_op_data1_high & BR_MASK_BRN_TAKEN) != 0)
+ 
+ /** 36 IbsOpBrnMisp: mispredicted branch macro-op.  */
+-#define IBS_OP_OP_BRANCH_MISPREDICT(x)          ((x->ibs_op_data1_high & BR_MASK_BRN_MISP) != 0)
++#define IBS_OP_BRANCH_MISPREDICT(x)             ((x->ibs_op_data1_high & BR_MASK_BRN_MISP) != 0)
+ 
+ /** 37 IbsOpBrnRet: branch macro-op retired. */
+-#define IBS_OP_OP_BRANCH_RETIRED(x)             ((x->ibs_op_data1_high & BR_MASK_BRN_RET) != 0)
++#define IBS_OP_BRANCH_RETIRED(x)                ((x->ibs_op_data1_high & BR_MASK_BRN_RET) != 0)
++
++/** 38 IbsRipInvalid: RIP invalid. */
++#define IBS_OP_RIP_INVALID(x)                   ((x->ibs_op_data1_high & MASK_RIP_INVALID) != 0)
+ 
+ /**
+  * MSRC001_1036 IBS Op Data 2 Register (IbsOpData2)
+@@ -282,10 +306,18 @@
+ /** 2:0 NbIbsReqSrc: Northbridge IBS request data source */
+ #define IBS_OP_NB_IBS_REQ_SRC(x)                ((unsigned char)(x->ibs_op_data2_low & NB_MASK_REQ_DATA_SRC))
+ 
++#define IBS_OP_NB_IBS_REQ_SRC_01(x)             (IBS_OP_NB_IBS_REQ_SRC(x) == 0x01)
++
++#define IBS_OP_NB_IBS_REQ_SRC_02(x)             (IBS_OP_NB_IBS_REQ_SRC(x) == 0x02)
++
++#define IBS_OP_NB_IBS_REQ_SRC_03(x)             (IBS_OP_NB_IBS_REQ_SRC(x) == 0x03)
++
++#define IBS_OP_NB_IBS_REQ_SRC_07(x)             (IBS_OP_NB_IBS_REQ_SRC(x) == 0x07)
++
+ /**
+  * MSRC001_1037 IBS Op Data3 Register
+  *
+- * Bits 48:32   IbsDcMissLat
++ * Bits 47:32   IbsDcMissLat
+  */
+ #define IBS_OP_DC_MISS_LATENCY(x)               ((unsigned short)(x->ibs_op_data3_high & 0xffff))
+ 
+@@ -328,12 +360,12 @@
+ /** 12 ibs_dc_st_to_ld_can: Data forwarding from store to load operation cancelled */
+ #define IBS_OP_IBS_DC_ST_TO_LD_CAN(x)           ((x->ibs_op_data3_low & DC_MASK_ST_TO_LD_CANCEL) != 0)
+ 
+-/** 13 ibs_dc_uc_mem_acc: UC memory access */
+-#define IBS_OP_IBS_DC_UC_MEM_ACC(x)             ((x->ibs_op_data3_low & DC_MASK_UC_MEM_ACCESS) != 0)
+-
+-/** 14 ibs_dc_wc_mem_acc : WC memory access */
++/** 13 ibs_dc_wc_mem_acc : WC memory access */
+ #define IBS_OP_IBS_DC_WC_MEM_ACC(x)             ((x->ibs_op_data3_low & DC_MASK_WC_MEM_ACCESS) != 0)
+ 
++/** 14 ibs_dc_uc_mem_acc : UC memory access */
++#define IBS_OP_IBS_DC_UC_MEM_ACC(x)             ((x->ibs_op_data3_low & DC_MASK_UC_MEM_ACCESS) != 0)
++
+ /** 15 ibs_locked_op: Locked operation */
+ #define IBS_OP_IBS_LOCKED_OP(x)                 ((x->ibs_op_data3_low & DC_MASK_LOCKED_OP) != 0)
+ 
+@@ -362,5 +394,4 @@
+  */
+ #define AGG_IBS_COUNT(EV, COUNT)        opd_log_ibs_count(EV, trans, COUNT)
+ 
+-
+ #endif /*OPD_IBS_MACRO_H*/
+Index: oprofile/daemon/opd_ibs_trans.c
+diff -u oprofile/daemon/opd_ibs_trans.c:1.1 oprofile/daemon/opd_ibs_trans.c:1.2
+--- oprofile/daemon/opd_ibs_trans.c:1.1	Fri Apr 17 18:40:43 2009
++++ oprofile/daemon/opd_ibs_trans.c	Mon Nov 29 15:05:44 2010
+@@ -1,8 +1,8 @@
+ /**
+  * @file daemon/opd_ibs_trans.c
+- * AMD Family10h Instruction Based Sampling (IBS) translation.
++ * AMD Instruction Based Sampling (IBS) translation.
+  *
+- * @remark Copyright 2008 OProfile authors
++ * @remark Copyright 2008 - 2010 OProfile authors
+  * @remark Read the file COPYING
+  *
+  * @author Jason Yeh <jason.yeh at amd.com>
+@@ -20,205 +20,185 @@
+ #include <stdlib.h>
+ #include <stdio.h>
+ 
+-#define MAX_EVENTS_PER_GROUP	32
++extern FILE * bta_log;
++extern FILE * memaccess_log;
+ 
+ /*
+- * --------------------- OP DERIVED FUNCTION
++ * --------------------- FETCH DERIVED FUNCTION
+  */
+-void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag, unsigned int size)
++void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag)
+ {
+ 	struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch;
+-	unsigned int i, j, mask = 1;
+ 
+-	for (i = IBS_FETCH_BASE, j =0 ; i <= IBS_FETCH_END && j < size ; i++, mask = mask << 1) {
++	if ((selected_flag) == 0)
++		return;
+ 
+-		if ((selected_flag & mask) == 0)
+-			continue;
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ALL) {
++		/* IBS all fetch samples (kills + attempts) */
++		AGG_IBS_EVENT(DE_IBS_FETCH_ALL);
++	}		
++
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_KILLED) {
++		/* IBS killed fetches ("case 0") -- All interesting event
++		 * flags are clear */
++		if (IBS_FETCH_KILLED(trans_fetch))
++			AGG_IBS_EVENT(DE_IBS_FETCH_KILLED);
++	}
+ 
+-		j++;
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ATTEMPTED) {
++		/* Any non-killed fetch is an attempted fetch */
++		AGG_IBS_EVENT(DE_IBS_FETCH_ATTEMPTED);
++	}
+ 
+-		switch (i) {
+-
+-		case DE_IBS_FETCH_ALL:
+-			/* IBS all fetch samples (kills + attempts) */
+-			AGG_IBS_EVENT(DE_IBS_FETCH_ALL);
+-			break;
+-
+-		case DE_IBS_FETCH_KILLED:
+-			/* IBS killed fetches ("case 0") -- All interesting event
+-			 * flags are clear */
+-			if (IBS_FETCH_KILLED(trans_fetch))
+-				AGG_IBS_EVENT(DE_IBS_FETCH_KILLED);
+-			break;
+-
+-		case DE_IBS_FETCH_ATTEMPTED:
+-			/* Any non-killed fetch is an attempted fetch */
+-			AGG_IBS_EVENT(DE_IBS_FETCH_ATTEMPTED);
+-			break;
+-
+-		case DE_IBS_FETCH_COMPLETED:
+-			if (IBS_FETCH_FETCH_COMPLETION(trans_fetch))
+-				/* IBS Fetch Completed */
+-				AGG_IBS_EVENT(DE_IBS_FETCH_COMPLETED);
+-			break;
+-
+-		case DE_IBS_FETCH_ABORTED:
+-			if (!IBS_FETCH_FETCH_COMPLETION(trans_fetch))
+-				/* IBS Fetch Aborted */
+-				AGG_IBS_EVENT(DE_IBS_FETCH_ABORTED);
+-			break;
+-
+-		case DE_IBS_L1_ITLB_HIT:
+-			/* IBS L1 ITLB hit */
+-			if (IBS_FETCH_L1_TLB_HIT(trans_fetch))
+-				AGG_IBS_EVENT(DE_IBS_L1_ITLB_HIT);
+-			break;
+-
+-		case DE_IBS_ITLB_L1M_L2H:
+-			/* IBS L1 ITLB miss and L2 ITLB hit */
+-			if (IBS_FETCH_ITLB_L1M_L2H(trans_fetch))
+-				AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2H);
+-			break;
+-
+-		case DE_IBS_ITLB_L1M_L2M:
+-			/* IBS L1 & L2 ITLB miss; complete ITLB miss */
+-			if (IBS_FETCH_ITLB_L1M_L2M(trans_fetch))
+-				AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2M);
+-			break;
+-
+-		case DE_IBS_IC_MISS:
+-			/* IBS instruction cache miss */
+-			if (IBS_FETCH_INST_CACHE_MISS(trans_fetch))
+-				AGG_IBS_EVENT(DE_IBS_IC_MISS);
+-			break;
+-
+-		case DE_IBS_IC_HIT:
+-			/* IBS instruction cache hit */
+-			if (IBS_FETCH_INST_CACHE_HIT(trans_fetch))
+-				AGG_IBS_EVENT(DE_IBS_IC_HIT);
+-			break;
+-
+-		case DE_IBS_FETCH_4K_PAGE:
+-			if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
+-			    && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) ==  L1TLB4K)
+-				AGG_IBS_EVENT(DE_IBS_FETCH_4K_PAGE);
+-			break;
+-
+-		case DE_IBS_FETCH_2M_PAGE:
+-			if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
+-			    && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) ==  L1TLB2M)
+-				AGG_IBS_EVENT(DE_IBS_FETCH_2M_PAGE);
+-			break;
+-
+-		case DE_IBS_FETCH_1G_PAGE:
+-			if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
+-			    && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) ==  L1TLB1G)
+-				AGG_IBS_EVENT(DE_IBS_FETCH_1G_PAGE);
+-			break;
+-
+-		case DE_IBS_FETCH_XX_PAGE:
+-			break;
+-
+-		case DE_IBS_FETCH_LATENCY:
+-			if (IBS_FETCH_FETCH_LATENCY(trans_fetch))
+-				AGG_IBS_COUNT(DE_IBS_FETCH_LATENCY,
+-					      IBS_FETCH_FETCH_LATENCY(trans_fetch));
+-			break;
+-		default:
+-			break;
+-		}
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_COMPLETED) {
++		if (IBS_FETCH_FETCH_COMPLETION(trans_fetch))
++			/* IBS Fetch Completed */
++			AGG_IBS_EVENT(DE_IBS_FETCH_COMPLETED);
++	}
++
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ABORTED) {
++		if (!IBS_FETCH_FETCH_COMPLETION(trans_fetch))
++			/* IBS Fetch Aborted */
++			AGG_IBS_EVENT(DE_IBS_FETCH_ABORTED);
++	}
++
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_L1_ITLB_HIT) {
++		/* IBS L1 ITLB hit */
++		if (IBS_FETCH_L1_TLB_HIT(trans_fetch))
++			AGG_IBS_EVENT(DE_IBS_L1_ITLB_HIT);
++	}
++
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_ITLB_L1M_L2H) {
++		/* IBS L1 ITLB miss and L2 ITLB hit */
++		if (IBS_FETCH_ITLB_L1M_L2H(trans_fetch))
++			AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2H);
++	}
++
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_ITLB_L1M_L2M) {
++		/* IBS L1 & L2 ITLB miss; complete ITLB miss */
++		if (IBS_FETCH_ITLB_L1M_L2M(trans_fetch))
++			AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2M);
++	}
++
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_IC_MISS) {
++		/* IBS instruction cache miss */
++		if (IBS_FETCH_INST_CACHE_MISS(trans_fetch))
++			AGG_IBS_EVENT(DE_IBS_IC_MISS);
++	}
++
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_IC_HIT) {
++		/* IBS instruction cache hit */
++		if (IBS_FETCH_INST_CACHE_HIT(trans_fetch))
++			AGG_IBS_EVENT(DE_IBS_IC_HIT);
++	}
++
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_4K_PAGE) {
++		if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
++		    && IBS_FETCH_TLB_PAGE_SIZE_4K(trans_fetch))
++			AGG_IBS_EVENT(DE_IBS_FETCH_4K_PAGE);
++	}
++
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_2M_PAGE) {
++		if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
++		    && IBS_FETCH_TLB_PAGE_SIZE_2M(trans_fetch))
++			AGG_IBS_EVENT(DE_IBS_FETCH_2M_PAGE);
++	}
++
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_1G_PAGE) {
++		if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
++		    && IBS_FETCH_TLB_PAGE_SIZE_1G(trans_fetch))
++			AGG_IBS_EVENT(DE_IBS_FETCH_1G_PAGE);
++	}
++
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_XX_PAGE) {
++	}
++
++	CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_LATENCY) {
++		if (IBS_FETCH_FETCH_LATENCY(trans_fetch))
++			AGG_IBS_COUNT(DE_IBS_FETCH_LATENCY,
++				      IBS_FETCH_FETCH_LATENCY(trans_fetch));
+ 	}
+ }
+ 
++
+ /*
+  * --------------------- OP DERIVED FUNCTION
+  */
+-void trans_ibs_op (struct transient * trans, unsigned int selected_flag, unsigned int size)
++void trans_ibs_op (struct transient * trans, unsigned int selected_flag)
+ {
+ 	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
+-	unsigned int i, j, mask = 1;
+ 
+-	for (i = IBS_OP_BASE, j =0 ; i <= IBS_OP_END && j < size ; i++, mask = mask << 1) {
++	if ((selected_flag) == 0)
++		return;
+ 
+-		if ((selected_flag & mask) == 0)
+-			continue;
++	CHECK_OP_SELECTED_FLAG(DE_IBS_OP_ALL) {
++		/* All IBS op samples */
++		AGG_IBS_EVENT(DE_IBS_OP_ALL);
++	}
+ 
+-		j++;
++	CHECK_OP_SELECTED_FLAG(DE_IBS_OP_TAG_TO_RETIRE) {
++		/* Tally retire cycle counts for all sampled macro-ops
++		 * IBS tag to retire cycles */
++		if (IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op))
++			AGG_IBS_COUNT(DE_IBS_OP_TAG_TO_RETIRE,
++				IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op));
++	}
+ 
+-		switch (i) {
+-
+-		case DE_IBS_OP_ALL:
+-			/* All IBS op samples */
+-			AGG_IBS_EVENT(DE_IBS_OP_ALL);
+-			break;
+-
+-		case DE_IBS_OP_TAG_TO_RETIRE:
+-			/* Tally retire cycle counts for all sampled macro-ops
+-			 * IBS tag to retire cycles */
+-			if (IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op))
+-				AGG_IBS_COUNT(DE_IBS_OP_TAG_TO_RETIRE,
+-					IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op));
+-			break;
+-
+-		case DE_IBS_OP_COMP_TO_RETIRE:
+-			/* IBS completion to retire cycles */
+-			if (IBS_OP_COM_TO_RETIRE_CYCLES(trans_op))
+-				AGG_IBS_COUNT(DE_IBS_OP_COMP_TO_RETIRE,
+-					IBS_OP_COM_TO_RETIRE_CYCLES(trans_op));
+-			break;
+-
+-		case DE_IBS_BRANCH_RETIRED:
+-			if (IBS_OP_OP_BRANCH_RETIRED(trans_op))
+-				/* IBS Branch retired op */
+-				AGG_IBS_EVENT(DE_IBS_BRANCH_RETIRED) ;
+-			break;
+-
+-		case DE_IBS_BRANCH_MISP:
+-			if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+-			    /* Test branch-specific event flags */
+-			    /* IBS mispredicted Branch op */
+-			    && IBS_OP_OP_BRANCH_MISPREDICT(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_BRANCH_MISP) ;
+-			break;
+-
+-		case DE_IBS_BRANCH_TAKEN:
+-			if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+-			    /* IBS taken Branch op */
+-			    && IBS_OP_OP_BRANCH_TAKEN(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_BRANCH_TAKEN);
+-			break;
+-
+-		case DE_IBS_BRANCH_MISP_TAKEN:
+-			if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+-			    /* IBS mispredicted taken branch op */
+-			    && IBS_OP_OP_BRANCH_TAKEN(trans_op)
+-			    && IBS_OP_OP_BRANCH_MISPREDICT(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_BRANCH_MISP_TAKEN);
+-			break;
+-
+-		case DE_IBS_RETURN:
+-			if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+-			    /* IBS return op */
+-			    && IBS_OP_OP_RETURN(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_RETURN);
+-			break;
+-
+-		case DE_IBS_RETURN_MISP:
+-			if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+-			    /* IBS mispredicted return op */
+-			    && IBS_OP_OP_RETURN(trans_op)
+-			    && IBS_OP_OP_BRANCH_MISPREDICT(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_RETURN_MISP);
+-			break;
+-
+-		case DE_IBS_RESYNC:
+-			/* Test for a resync macro-op */
+-			if (IBS_OP_OP_BRANCH_RESYNC(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_RESYNC);
+-			break;
+-		default:
+-			break;
+-		}
++	CHECK_OP_SELECTED_FLAG(DE_IBS_OP_COMP_TO_RETIRE) {
++		/* IBS completion to retire cycles */
++		if (IBS_OP_COM_TO_RETIRE_CYCLES(trans_op))
++			AGG_IBS_COUNT(DE_IBS_OP_COMP_TO_RETIRE,
++				IBS_OP_COM_TO_RETIRE_CYCLES(trans_op));
++	}
++
++	CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_RETIRED) {
++		if (IBS_OP_BRANCH_RETIRED(trans_op))
++			/* IBS Branch retired op */
++			AGG_IBS_EVENT(DE_IBS_BRANCH_RETIRED) ;
++	}
++
++	CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_MISP) {
++		if (IBS_OP_BRANCH_RETIRED(trans_op)
++		    /* Test branch-specific event flags */
++		    /* IBS mispredicted Branch op */
++		    && IBS_OP_BRANCH_MISPREDICT(trans_op))
++			AGG_IBS_EVENT(DE_IBS_BRANCH_MISP) ;
++	}
++
++	CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_TAKEN) {
++		if (IBS_OP_BRANCH_RETIRED(trans_op)
++		    /* IBS taken Branch op */
++		    && IBS_OP_BRANCH_TAKEN(trans_op))
++			AGG_IBS_EVENT(DE_IBS_BRANCH_TAKEN);
++	}
++
++	CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_MISP_TAKEN) {
++		if (IBS_OP_BRANCH_RETIRED(trans_op)
++		    /* IBS mispredicted taken branch op */
++		    && IBS_OP_BRANCH_TAKEN(trans_op)
++		    && IBS_OP_BRANCH_MISPREDICT(trans_op))
++			AGG_IBS_EVENT(DE_IBS_BRANCH_MISP_TAKEN);
++	}
++
++	CHECK_OP_SELECTED_FLAG(DE_IBS_RETURN) {
++		if (IBS_OP_BRANCH_RETIRED(trans_op)
++		    /* IBS return op */
++		    && IBS_OP_RETURN(trans_op))
++			AGG_IBS_EVENT(DE_IBS_RETURN);
++	}
++
++	CHECK_OP_SELECTED_FLAG(DE_IBS_RETURN_MISP) {
++		if (IBS_OP_BRANCH_RETIRED(trans_op)
++		    /* IBS mispredicted return op */
++		    && IBS_OP_RETURN(trans_op)
++		    && IBS_OP_BRANCH_MISPREDICT(trans_op))
++			AGG_IBS_EVENT(DE_IBS_RETURN_MISP);
++	}
++
++	CHECK_OP_SELECTED_FLAG(DE_IBS_RESYNC) {
++		/* Test for a resync macro-op */
++		if (IBS_OP_BRANCH_RESYNC(trans_op))
++			AGG_IBS_EVENT(DE_IBS_RESYNC);
+ 	}
+ }
+ 
+@@ -226,213 +206,201 @@
+ /*
+  * --------------------- OP LS DERIVED FUNCTION
+  */
+-void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag, unsigned int size)
++void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag)
+ {
+ 	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
+-	unsigned int i, j, mask = 1;
+ 
+ 	/* Preliminary check */
+ 	if (!IBS_OP_IBS_LD_OP(trans_op) && !IBS_OP_IBS_ST_OP(trans_op))
+ 		return;
+ 
+ 
+-	for (i = IBS_OP_LS_BASE, j =0 ; i <= IBS_OP_LS_END && j < size ; i++, mask = mask << 1) {
++	if ((selected_flag) == 0)
++		return;
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_ALL_OP) {
++		/* Count the number of LS op samples */
++		AGG_IBS_EVENT(DE_IBS_LS_ALL_OP) ;
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_LOAD_OP) {
++		if (IBS_OP_IBS_LD_OP(trans_op))
++			/* TALLy an IBS load derived event */
++			AGG_IBS_EVENT(DE_IBS_LS_LOAD_OP) ;
++	}
+ 
+-		if ((selected_flag & mask) == 0)
+-			continue;
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STORE_OP) {
++		if (IBS_OP_IBS_ST_OP(trans_op))
++			/* Count and handle store operations */
++			AGG_IBS_EVENT(DE_IBS_LS_STORE_OP);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1H) {
++		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++		    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op))
++			/* L1 DTLB hit -- This is the most frequent case */
++			AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1H);
++	}
+ 
+-		j++;
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1M_L2H) {
++		/* l2_translation_size = 1 */
++		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++		    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++		    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op))
++			/* L1 DTLB miss, L2 DTLB hit */
++			AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2H);
++	}
+ 
+-		switch (i) {
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1M_L2M) {
++		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++		    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++		    && IBS_OP_IBS_DC_L2_TLB_MISS(trans_op))
++			/* L1 DTLB miss, L2 DTLB miss */
++			AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2M);
++	}
+ 
+-		case DE_IBS_LS_ALL_OP:
+-			/* Count the number of LS op samples */
+-			AGG_IBS_EVENT(DE_IBS_LS_ALL_OP) ;
+-			break;
+-
+-		case DE_IBS_LS_LOAD_OP:
+-			if (IBS_OP_IBS_LD_OP(trans_op))
+-				/* TALLy an IBS load derived event */
+-				AGG_IBS_EVENT(DE_IBS_LS_LOAD_OP) ;
+-			break;
+-
+-		case DE_IBS_LS_STORE_OP:
+-			if (IBS_OP_IBS_ST_OP(trans_op))
+-				/* Count and handle store operations */
+-				AGG_IBS_EVENT(DE_IBS_LS_STORE_OP);
+-			break;
+-
+-		case DE_IBS_LS_DTLB_L1H:
+-			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+-			    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op))
+-				/* L1 DTLB hit -- This is the most frequent case */
+-				AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1H);
+-			break;
+-
+-		case DE_IBS_LS_DTLB_L1M_L2H:
+-			/* l2_translation_size = 1 */
+-			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+-			    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+-			    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op))
+-				/* L1 DTLB miss, L2 DTLB hit */
+-				AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2H);
+-			break;
+-
+-		case DE_IBS_LS_DTLB_L1M_L2M:
+-			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+-			    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+-			    && IBS_OP_IBS_DC_L2_TLB_MISS(trans_op))
+-				/* L1 DTLB miss, L2 DTLB miss */
+-				AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2M);
+-			break;
+-
+-		case DE_IBS_LS_DC_MISS:
+-			if (IBS_OP_IBS_DC_MISS(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_LS_DC_MISS);
+-			break;
+-
+-		case DE_IBS_LS_DC_HIT:
+-			if (!IBS_OP_IBS_DC_MISS(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_LS_DC_HIT);
+-			break;
+-
+-		case DE_IBS_LS_MISALIGNED:
+-			if (IBS_OP_IBS_DC_MISS_ACC(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_LS_MISALIGNED);
+-			break;
+-
+-		case DE_IBS_LS_BNK_CONF_LOAD:
+-			if (IBS_OP_IBS_DC_LD_BNK_CON(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_LOAD);
+-			break;
+-
+-		case DE_IBS_LS_BNK_CONF_STORE:
+-			if (IBS_OP_IBS_DC_ST_BNK_CON(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_STORE);
+-			break;
+-
+-		case DE_IBS_LS_STL_FORWARDED:
+-			if (IBS_OP_IBS_LD_OP(trans_op)
+-			    /* Data forwarding info are valid only for load ops */
+-			    && IBS_OP_IBS_DC_ST_TO_LD_FWD(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_LS_STL_FORWARDED) ;
+-			break;
+-
+-		case DE_IBS_LS_STL_CANCELLED:
+-			if (IBS_OP_IBS_LD_OP(trans_op))
+-			if (IBS_OP_IBS_DC_ST_TO_LD_CAN(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_LS_STL_CANCELLED) ;
+-			break;
+-
+-		case DE_IBS_LS_UC_MEM_ACCESS:
+-			if (IBS_OP_IBS_DC_UC_MEM_ACC(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_LS_UC_MEM_ACCESS);
+-			break;
+-
+-		case DE_IBS_LS_WC_MEM_ACCESS:
+-			if (IBS_OP_IBS_DC_WC_MEM_ACC(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_LS_WC_MEM_ACCESS);
+-			break;
+-
+-		case DE_IBS_LS_LOCKED_OP:
+-			if (IBS_OP_IBS_LOCKED_OP(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_LS_LOCKED_OP);
+-			break;
+-
+-		case DE_IBS_LS_MAB_HIT:
+-			if (IBS_OP_IBS_DC_MAB_HIT(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_LS_MAB_HIT);
+-			break;
+-
+-		case DE_IBS_LS_L1_DTLB_4K:
+-			/* l1_translation */
+-			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+-			    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+-
+-			    && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)
+-			    && !IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op))
+-				/* This is the most common case, unfortunately */
+-				AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_4K) ;
+-			break;
+-
+-		case DE_IBS_LS_L1_DTLB_2M:
+-			/* l1_translation */
+-			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+-			    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+-
+-			    && IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op))
+-				/* 2M L1 DTLB page translation */
+-				AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_2M);
+-			break;
+-
+-		case DE_IBS_LS_L1_DTLB_1G:
+-			/* l1_translation */
+-			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+-			    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+-
+-			    && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)
+-			    && IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op))
+-				/* 1G L1 DTLB page translation */
+-				AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_1G);
+-			break;
+-
+-		case DE_IBS_LS_L1_DTLB_RES:
+-			break;
+-
+-		case DE_IBS_LS_L2_DTLB_4K:
+-			/* l2_translation_size = 1 */
+-			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+-			    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+-			    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
+-
+-			    /* L2 DTLB page translation */
+-			    && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
+-			    && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
+-				/* 4K L2 DTLB page translation */
+-				AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_4K);
+-			break;
+-
+-		case DE_IBS_LS_L2_DTLB_2M:
+-			/* l2_translation_size = 1 */
+-			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+-			    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+-			    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
+-
+-			    /* L2 DTLB page translation */
+-			    && IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
+-			    && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
+-				/* 2M L2 DTLB page translation */
+-				AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_2M);
+-			break;
+-
+-		case DE_IBS_LS_L2_DTLB_1G:
+-			/* l2_translation_size = 1 */
+-			if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+-			    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+-			    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
+-
+-			    /* L2 DTLB page translation */
+-			    && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
+-			    && IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
+-				/* 2M L2 DTLB page translation */
+-				AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_1G);
+-			break;
+-
+-		case DE_IBS_LS_L2_DTLB_RES2:
+-			break;
+-
+-		case DE_IBS_LS_DC_LOAD_LAT:
+-			if (IBS_OP_IBS_LD_OP(trans_op)
+-			    /* If the load missed in DC, tally the DC load miss latency */
+-			    && IBS_OP_IBS_DC_MISS(trans_op))
+-				/* DC load miss latency is only reliable for load ops */
+-				AGG_IBS_COUNT(DE_IBS_LS_DC_LOAD_LAT,
+-					      IBS_OP_DC_MISS_LATENCY(trans_op)) ;
+-			break;
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_MISS) {
++		if (IBS_OP_IBS_DC_MISS(trans_op))
++			AGG_IBS_EVENT(DE_IBS_LS_DC_MISS);
++	}
+ 
+-		default:
+-			break;
+-		}
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_HIT) {
++		if (!IBS_OP_IBS_DC_MISS(trans_op))
++			AGG_IBS_EVENT(DE_IBS_LS_DC_HIT);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_MISALIGNED) {
++		if (IBS_OP_IBS_DC_MISS_ACC(trans_op))
++			AGG_IBS_EVENT(DE_IBS_LS_MISALIGNED);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_BNK_CONF_LOAD) {
++		if (IBS_OP_IBS_DC_LD_BNK_CON(trans_op))
++			AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_LOAD);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_BNK_CONF_STORE) {
++		if (IBS_OP_IBS_DC_ST_BNK_CON(trans_op))
++			AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_STORE);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STL_FORWARDED) {
++		if (IBS_OP_IBS_LD_OP(trans_op)
++		    /* Data forwarding info are valid only for load ops */
++		    && IBS_OP_IBS_DC_ST_TO_LD_FWD(trans_op))
++			AGG_IBS_EVENT(DE_IBS_LS_STL_FORWARDED) ;
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STL_CANCELLED) {
++		if (IBS_OP_IBS_LD_OP(trans_op))
++		if (IBS_OP_IBS_DC_ST_TO_LD_CAN(trans_op))
++			AGG_IBS_EVENT(DE_IBS_LS_STL_CANCELLED) ;
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_UC_MEM_ACCESS) {
++		if (IBS_OP_IBS_DC_UC_MEM_ACC(trans_op))
++			AGG_IBS_EVENT(DE_IBS_LS_UC_MEM_ACCESS);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_WC_MEM_ACCESS) {
++		if (IBS_OP_IBS_DC_WC_MEM_ACC(trans_op))
++			AGG_IBS_EVENT(DE_IBS_LS_WC_MEM_ACCESS);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_LOCKED_OP) {
++		if (IBS_OP_IBS_LOCKED_OP(trans_op))
++			AGG_IBS_EVENT(DE_IBS_LS_LOCKED_OP);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_MAB_HIT) {
++		if (IBS_OP_IBS_DC_MAB_HIT(trans_op))
++			AGG_IBS_EVENT(DE_IBS_LS_MAB_HIT);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_4K) {
++		/* l1_translation */
++		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++		    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++
++		    && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)
++		    && !IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op))
++			/* This is the most common case, unfortunately */
++			AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_4K) ;
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_2M) {
++		/* l1_translation */
++		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++		    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++
++		    && IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op))
++			/* 2M L1 DTLB page translation */
++			AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_2M);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_1G) {
++		/* l1_translation */
++		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++		    && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++
++		    && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)
++		    && IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op))
++			/* 1G L1 DTLB page translation */
++			AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_1G);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_RES) {
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_4K) {
++		/* l2_translation_size = 1 */
++		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++		    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++		    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
++
++		    /* L2 DTLB page translation */
++		    && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
++		    && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
++			/* 4K L2 DTLB page translation */
++			AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_4K);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_2M) {
++		/* l2_translation_size = 1 */
++		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++		    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++		    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
++
++		    /* L2 DTLB page translation */
++		    && IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
++		    && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
++			/* 2M L2 DTLB page translation */
++			AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_2M);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_1G) {
++		/* l2_translation_size = 1 */
++		if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++		    && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++		    && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
++
++		    /* L2 DTLB page translation */
++		    && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
++		    && IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
++			/* 2M L2 DTLB page translation */
++			AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_1G);
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_RES2) {
++	}
++
++	CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_LOAD_LAT) {
++		if (IBS_OP_IBS_LD_OP(trans_op)
++		    /* If the load missed in DC, tally the DC load miss latency */
++		    && IBS_OP_IBS_DC_MISS(trans_op))
++			/* DC load miss latency is only reliable for load ops */
++			AGG_IBS_COUNT(DE_IBS_LS_DC_LOAD_LAT,
++				      IBS_OP_DC_MISS_LATENCY(trans_op)) ;
+ 	}
+ }
+ 
+@@ -443,12 +411,14 @@
+  * that miss in L1 and L2 cache. NB data arrives too late
+  * to be reliable for store operations
+  */
+-void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag, unsigned int size)
++void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag)
+ {
+ 	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
+-	unsigned int i, j, mask = 1;
+ 
+ 	/* Preliminary check */
++	if ((selected_flag) == 0)
++		return;
++
+ 	if (!IBS_OP_IBS_LD_OP(trans_op))
+ 		return;
+ 
+@@ -458,97 +428,219 @@
+ 	if (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0)
+ 		return;
+ 
+-	for (i = IBS_OP_NB_BASE, j =0 ; i <= IBS_OP_NB_END && j < size ; i++, mask = mask << 1) {
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL) {
++		if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
++			/* Request was serviced by local processor */
++			AGG_IBS_EVENT(DE_IBS_NB_LOCAL) ;
++	}
+ 
+-		if ((selected_flag & mask) == 0)
+-			continue;
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE) {
++		if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
++			/* Request was serviced by remote processor */
++			AGG_IBS_EVENT(DE_IBS_NB_REMOTE) ;
++	}
+ 
+-		j++;
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_L3) {
++		if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++		    &&  IBS_OP_NB_IBS_REQ_SRC_01(trans_op))
++			AGG_IBS_EVENT(DE_IBS_NB_LOCAL_L3);
++	}
++
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_CACHE) {
++		if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++		    &&  IBS_OP_NB_IBS_REQ_SRC_02(trans_op))
++			AGG_IBS_EVENT(DE_IBS_NB_LOCAL_CACHE);
++	}
++
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_CACHE) {
++		if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++		    &&  IBS_OP_NB_IBS_REQ_SRC_02(trans_op))
++			AGG_IBS_EVENT(DE_IBS_NB_REMOTE_CACHE) ;
++	}
+ 
+-		switch (i) {
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_DRAM) {
++		if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++		    &&  IBS_OP_NB_IBS_REQ_SRC_03(trans_op))
++			AGG_IBS_EVENT(DE_IBS_NB_LOCAL_DRAM);
++	}
+ 
+-		case DE_IBS_NB_LOCAL:
+-			if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
+-				/* Request was serviced by local processor */
+-				AGG_IBS_EVENT(DE_IBS_NB_LOCAL) ;
+-			break;
+-
+-		case DE_IBS_NB_REMOTE:
+-			if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
+-				/* Request was serviced by remote processor */
+-				AGG_IBS_EVENT(DE_IBS_NB_REMOTE) ;
+-			break;
+-
+-		case DE_IBS_NB_LOCAL_L3:
+-			if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+-			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x1))
+-				AGG_IBS_EVENT(DE_IBS_NB_LOCAL_L3);
+-			break;
+-
+-		case DE_IBS_NB_LOCAL_CACHE:
+-			if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+-			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2))
+-				AGG_IBS_EVENT(DE_IBS_NB_LOCAL_CACHE);
+-			break;
+-
+-		case DE_IBS_NB_REMOTE_CACHE:
+-			if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+-			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2))
+-				AGG_IBS_EVENT(DE_IBS_NB_REMOTE_CACHE) ;
+-			break;
+-
+-		case DE_IBS_NB_LOCAL_DRAM:
+-			if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+-			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x3))
+-				AGG_IBS_EVENT(DE_IBS_NB_LOCAL_DRAM);
+-			break;
+-
+-		case DE_IBS_NB_REMOTE_DRAM:
+-			if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+-			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x3))
+-				AGG_IBS_EVENT(DE_IBS_NB_REMOTE_DRAM) ;
+-			break;
+-
+-		case DE_IBS_NB_LOCAL_OTHER:
+-			if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+-			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x7))
+-				AGG_IBS_EVENT(DE_IBS_NB_LOCAL_OTHER);
+-			break;
+-
+-		case DE_IBS_NB_REMOTE_OTHER:
+-			if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+-			    && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x7))
+-				AGG_IBS_EVENT(DE_IBS_NB_REMOTE_OTHER) ;
+-			break;
+-
+-		case DE_IBS_NB_CACHE_STATE_M:
+-			if ((IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2)
+-			    && !IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_M) ;
+-			break;
+-
+-		case DE_IBS_NB_CACHE_STATE_O:
+-			if ((IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2)
+-			    && IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op))
+-				AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_O) ;
+-			break;
+-
+-		case DE_IBS_NB_LOCAL_LATENCY:
+-			if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
+-				/* Request was serviced by local processor */
+-				AGG_IBS_COUNT(DE_IBS_NB_LOCAL_LATENCY,
+-					      IBS_OP_DC_MISS_LATENCY(trans_op));
+-			break;
+-
+-		case DE_IBS_NB_REMOTE_LATENCY:
+-			if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
+-				/* Request was serviced by remote processor */
+-				AGG_IBS_COUNT(DE_IBS_NB_REMOTE_LATENCY,
+-					      IBS_OP_DC_MISS_LATENCY(trans_op));
+-			break;
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_DRAM) {
++		if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++		    &&  IBS_OP_NB_IBS_REQ_SRC_03(trans_op))
++			AGG_IBS_EVENT(DE_IBS_NB_REMOTE_DRAM) ;
++	}
+ 
+-		default:
+-			break;
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_OTHER) {
++		if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++		    &&  IBS_OP_NB_IBS_REQ_SRC_07(trans_op))
++			AGG_IBS_EVENT(DE_IBS_NB_LOCAL_OTHER);
++	}
++
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_OTHER) {
++		if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++		    &&  IBS_OP_NB_IBS_REQ_SRC_07(trans_op))
++			AGG_IBS_EVENT(DE_IBS_NB_REMOTE_OTHER) ;
++	}
++
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_CACHE_STATE_M) {
++		if (IBS_OP_NB_IBS_REQ_SRC_02(trans_op)
++		    && !IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op))
++			AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_M) ;
++	}
++
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_CACHE_STATE_O) {
++		if (IBS_OP_NB_IBS_REQ_SRC_02(trans_op)
++		    && IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op))
++			AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_O) ;
++	}
++
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_LATENCY) {
++		if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
++			/* Request was serviced by local processor */
++			AGG_IBS_COUNT(DE_IBS_NB_LOCAL_LATENCY,
++				      IBS_OP_DC_MISS_LATENCY(trans_op));
++	}
++
++	CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_LATENCY) {
++		if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
++			/* Request was serviced by remote processor */
++			AGG_IBS_COUNT(DE_IBS_NB_REMOTE_LATENCY,
++				      IBS_OP_DC_MISS_LATENCY(trans_op));
++	}
++}
++
++
++int trans_ibs_op_rip_invalid (struct transient * trans)
++{
++	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
++
++	if (IBS_OP_RIP_INVALID(trans_op))
++		return 1;	
++
++	return 0;
++}
++
++
++void trans_ibs_op_mask_reserved (unsigned int family, struct transient * trans)
++{
++	struct ibs_op_sample * trans_op    = ((struct ibs_sample*)(trans->ext))->op;
++
++	switch (family) {
++	case 0x10:
++		/* Reserved IbsRipInvalid (MSRC001_1035[38])*/
++		trans_op->ibs_op_data1_high &= ~MASK_RIP_INVALID;
++		break;
++	case 0x12:
++		/* Reserved NbIbsReqDstProc (MSRCC001_1036[4]) */
++		trans_op->ibs_op_data2_low &= ~NB_MASK_REQ_DST_PROC;
++		/* Reserved NbIbsReqCacheHitSt (MSRCC001_1036[5]) */
++		trans_op->ibs_op_data2_low &= ~NB_MASK_L3_STATE;
++		break;
++	case 0x14:
++		/* Reserved NbIbsReqDstProc (MSRCC001_1036[4]) */
++		trans_op->ibs_op_data2_low &= ~NB_MASK_REQ_DST_PROC;
++		/* Reserved NbIbsReqCacheHitSt (MSRCC001_1036[5]) */
++		trans_op->ibs_op_data2_low &= ~NB_MASK_L3_STATE;
++		/* Reserved IbsDcL1tlbHit1G (MSRC001_1037[5]) */
++		trans_op->ibs_op_data3_low &= ~DC_MASK_L1_HIT_1G;
++		/* Reserved IbsDcLdBnkCon (MSRC001_1037[9]) */
++		trans_op->ibs_op_data3_low &= ~DC_MASK_LD_BANK_CONFLICT;
++		/* Reserved IbsDcStBnkCon (MSRC001_1037[10]) */
++		trans_op->ibs_op_data3_low &= ~DC_MASK_ST_BANK_CONFLICT;
++		/* Reserved IbsDcStToLdCan (MSRC001_1037[12]) */
++		trans_op->ibs_op_data3_low &= ~DC_MASK_ST_TO_LD_CANCEL;
++		/* Reserved IbsDcL2tlbHit1G (MSRC001_1037[19]) */
++		trans_op->ibs_op_data3_low &= ~DC_MASK_L2_HIT_1G;
++		
++		break;
++	case 0x15:
++	default:
++		break;
++	
++	}
++}
++
++
++void trans_ibs_op_bta(struct transient * trans)
++{
++	static cookie_t old_cookie     = NO_COOKIE;
++	static cookie_t old_app_cookie = NO_COOKIE;
++	static char const * mod        = NULL;
++	static char const * app        = NULL;
++	const char vmlinux[10]         = "vmlinux";
++	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
++
++	if (!bta_log)
++		return;
++
++	if (!trans_op->ibs_op_brtgt_addr)
++		return;
++
++	if( old_app_cookie == INVALID_COOKIE 
++	||  old_app_cookie == NO_COOKIE 
++	||  old_app_cookie != trans->app_cookie) {
++		app = find_cookie(trans->app_cookie);
++		old_app_cookie = trans->cookie;
++	}
++
++	if (trans->in_kernel == 1) {
++		mod = vmlinux;
++		old_cookie = NO_COOKIE;
++	} else {
++		if( old_cookie == INVALID_COOKIE 
++		||  old_cookie == NO_COOKIE 
++		||  old_cookie != trans->cookie) {
++			mod = find_cookie(trans->cookie);
++			old_cookie = trans->cookie;
+ 		}
+ 	}
++
++	fprintf(bta_log, "0x%016llx,0x%016llx,%02lu %08u,%08u,0x%08x,0x%08lx\n",
++                        trans->app_cookie, trans->cookie, trans->cpu, trans->tgid, trans->tid, (unsigned int)trans->pc,
++			trans_op->ibs_op_brtgt_addr);
++}
++
++
++void trans_ibs_op_ls_memaccess(struct transient * trans)
++{
++	static cookie_t old_cookie     = NO_COOKIE;
++	static cookie_t old_app_cookie = NO_COOKIE;
++	static char const * mod        = NULL;
++	static char const * app        = NULL;
++	const char vmlinux[10]         = "vmlinux";
++	struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
++
++	if (!memaccess_log)
++		return;
++
++	if( old_app_cookie == INVALID_COOKIE 
++	||  old_app_cookie == NO_COOKIE 
++	||  old_app_cookie != trans->app_cookie) {
++		app = find_cookie(trans->app_cookie);
++		old_app_cookie = trans->cookie;
++	}
++
++	if (trans->in_kernel == 1) {
++		mod = vmlinux;
++		old_cookie = NO_COOKIE;
++	} else {
++		if( old_cookie == INVALID_COOKIE 
++		||  old_cookie == NO_COOKIE 
++		||  old_cookie != trans->cookie) {
++			mod = find_cookie(trans->cookie);
++			old_cookie = trans->cookie;
++		}
++	}
++
++	fprintf(memaccess_log, "0x%016llx,0x%016llx,%02lu,%08u,%08u,0x%08x,0x%08u:%08x,0x%08x:%08x,%s,%08u\n",
++                        trans->app_cookie, 
++trans->cookie, 
++trans->cpu, 
++trans->tgid, 
++trans->tid, 
++(unsigned int)trans->pc, 
++			trans_op->ibs_op_phys_addr_high, trans_op->ibs_op_phys_addr_low, 
++			trans_op->ibs_op_ldst_linaddr_high, trans_op->ibs_op_ldst_linaddr_low, 
++			(IBS_OP_IBS_LD_OP(trans_op))? "LD": "ST", 
++			(unsigned int) IBS_OP_DC_MISS_LATENCY(trans_op));
+ }
+Index: oprofile/daemon/opd_ibs_trans.h
+diff -u oprofile/daemon/opd_ibs_trans.h:1.1 oprofile/daemon/opd_ibs_trans.h:1.2
+--- oprofile/daemon/opd_ibs_trans.h:1.1	Fri Apr 17 18:40:43 2009
++++ oprofile/daemon/opd_ibs_trans.h	Mon Nov 29 15:05:44 2010
+@@ -24,8 +24,12 @@
+ };
+ 
+ 
+-extern void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag, unsigned int size);
+-extern void trans_ibs_op (struct transient * trans, unsigned int selected_flag, unsigned int size);
+-extern void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag, unsigned int size);
+-extern void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag, unsigned int size);
++extern void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag);
++extern void trans_ibs_op (struct transient * trans, unsigned int selected_flag);
++extern void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag);
++extern void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag);
++extern int  trans_ibs_op_rip_invalid (struct transient * trans);
++extern void trans_ibs_op_mask_reserved (unsigned int family, struct transient * trans);
++extern void trans_ibs_op_ls_memaccess(struct transient * trans);
++extern void trans_ibs_op_bta (struct transient * trans);
+ #endif // OPD_IBS_TRANS_H
+Index: oprofile/events/x86-64/family10/unit_masks
+diff -u oprofile/events/x86-64/family10/unit_masks:1.6 oprofile/events/x86-64/family10/unit_masks:1.7
+--- oprofile/events/x86-64/family10/unit_masks:1.6	Fri Oct 23 14:21:09 2009
++++ oprofile/events/x86-64/family10/unit_masks	Mon Nov 29 15:05:44 2010
+@@ -363,6 +363,7 @@
+ name:ibs_op type:bitmask default:0x01
+ 	0x00 Using IBS OP cycle count mode
+ 	0x01 Using IBS OP dispatch count mode
++	0x02 Enable IBS OP Memory Access Log 
+ name:non_cancelled_l3_read_requests type:bitmask default:0xf7
+ 	0x01 RbBlk
+ 	0x02 RbBlkS
+Index: oprofile/utils/opcontrol
+diff -u oprofile/utils/opcontrol:1.169 oprofile/utils/opcontrol:1.170
+--- oprofile/utils/opcontrol:1.169	Fri Aug 13 15:42:16 2010
++++ oprofile/utils/opcontrol	Mon Nov 29 15:05:44 2010
+@@ -1867,13 +1867,20 @@
+ 			IBS_FETCH_COUNT=$IBS_COUNT
+ 			IBS_FETCH_MASK=$IBS_MASK
+ 		elif test "$IBS_FETCH_COUNT" != "$IBS_COUNT" ; then
+-			echo "All IBS Fetch must have the same count."
++			echo "ERROR: All IBS Fetch must have the same count."
+ 			exit 1
+ 		fi
+ 
+ 		# Check IBS_MASK consistency
+ 		if test "$IBS_FETCH_MASK" != "$IBS_MASK" ; then
+-			echo "All IBS Fetch must have the same unitmask."
++			echo "ERROR: All IBS Fetch must have the same unitmask."
++			exit 1
++		fi
++
++		# Check IBS_FETCH_COUNT within range
++		if test "$IBS_FETCH_COUNT" -gt 1048575 ; then 
++			echo "ERROR: IBS Fetch count is too large."
++			echo "       The maximum IBS-fetch count is 1048575."
+ 			exit 1
+ 		fi
+ 
+@@ -1892,6 +1899,28 @@
+ 			echo "All IBS Op must have the same unitmask."
+ 			exit 1
+ 		fi
++		
++		# Check IBS_OP_COUNT within range
++		case "$CPUTYPE" in
++			x86-64/family10)
++				if test "$IBS_OP_COUNT" -gt 1048575 ; then 
++					echo "ERROR: IBS Op count is too large."
++					echo "       The maximum IBS-fetch count is 1048575."
++					exit 1
++				fi
++				;;
++
++			x86-64/family12h|\
++			x86-64/family14h|\
++			x86-64/family15h)
++				if test "$IBS_OP_COUNT" -gt 134217727 ; then 
++					echo "ERROR: IBS Op count is too large."
++					echo "       The maximum IBS-Op count is 134217727."
++					exit 1
++				fi
++				;;
++			*)
++		esac
+ 	fi
+ 
+ 	return
+@@ -1944,10 +1973,32 @@
+ 			# NOTE: We default to use dispatched_op if available. 
+ 			#       Some of the older family10 system does not have
+ 			#       dispatched_ops feature.
+-			#	dispatched op is enabled by bit 1 of the unitmask
++			#       Dispatched op is enabled by bit 0 of the unitmask
++			IBS_OP_DISPATCHED_OP=$(( IBS_OP_UNITMASK & 0x1 ))
+ 			if test -f $MOUNT/ibs_op/dispatched_ops ; then
+-				IBS_OP_DISPATCHED_OP=$(( IBS_OP_UNITMASK & 0x1 ))
+ 				set_param ibs_op/dispatched_ops $IBS_OP_DISPATCHED_OP
++			else
++				if test $IBS_OP_DISPATCHED_OP -eq 1 ; then
++					echo "ERROR: IBS Op dispatched ops is not supported."
++					exit 1
++				fi
++			fi
++		
++			# NOTE: BTA is enabled by bit 2 of the unitmask
++			IBS_OP_BTA=$(( IBS_OP_UNITMASK & 0x4 ))
++			if test -f $MOUNT/ibs_op/branch_target; then
++				if [ "$IBS_OP_BTA" = "4" ] ; then
++					set_param ibs_op/branch_target 1
++				else
++					set_param ibs_op/branch_target 0
++				fi
++
++				# TODO: Check if write successful
++			else
++				if test $IBS_OP_BTA -eq 1 ; then
++					echo "ERROR: IBS Op Branch Target Address is not supported."
++					exit 1
++				fi
+ 			fi
+ 		else
+ 			set_param ibs_op/enable 0
+Index: oprofile/daemon/opd_ibs.c
+diff -u oprofile/daemon/opd_ibs.c:1.3 oprofile/daemon/opd_ibs.c:1.4
+--- oprofile/daemon/opd_ibs.c:1.3	Mon Nov 29 15:05:44 2010
++++ oprofile/daemon/opd_ibs.c	Tue Dec 14 17:40:20 2010
+@@ -34,21 +34,6 @@
+ #include <string.h>
+ #include <limits.h>
+ 
+-#if defined(__i386__) && defined(__PIC__)
+-/* %ebx may be the PIC register.  */
+-        #define __cpuid(level, a, b, c, d)                      \
+-          __asm__ ("xchgl\t%%ebx, %1\n\t"                       \
+-                   "cpuid\n\t"                                  \
+-                   "xchgl\t%%ebx, %1\n\t"                       \
+-                   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)     \
+-                   : "0" (level))
+-#else
+-        #define __cpuid(level, a, b, c, d)                      \
+-          __asm__ ("cpuid\n\t"                                  \
+-                   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)     \
+-                   : "0" (level))
+-#endif
+-
+ extern op_cpu cpu_type;
+ extern int no_event_ok;
+ extern int sfile_equal(struct sfile const * sf, struct sfile const * sf2);
+@@ -495,6 +480,7 @@
+ 
+ static void check_cpuid_family_model_stepping()
+ {
++#if defined(__i386__) || defined(__x86_64__) 
+        union {
+                 unsigned eax;
+                 struct {
+@@ -510,11 +496,16 @@
+ 	unsigned ebx, ecx, edx;
+ 
+ 	/* CPUID Fn0000_0001_EAX Family, Model, Stepping */
+-	__cpuid(1, v.eax, ebx, ecx, edx);
++	asm ("cpuid" : "=a" (v.eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (1));
+ 
+ 	ibs_family   = v.family + v.ext_family;
+ 	ibs_model    = v.model + v.ext_model;
+ 	ibs_stepping = v.stepping;
++#else
++	ibs_family   = 0;
++	ibs_model    = 0;
++	ibs_stepping = 0;
++#endif
+ }
+ 
+ 
diff --git a/oprofile-check.patch b/oprofile-check.patch
new file mode 100644
index 0000000..4a65e8e
--- /dev/null
+++ b/oprofile-check.patch
@@ -0,0 +1,82 @@
+---------------------
+PatchSet 2938 
+Date: 2011/01/05 16:16:06
+Author: maynardj
+Branch: HEAD
+Tag: (none) 
+Branches: 
+Log:
+Add argument checking for numerical arguments
+
+Members: 
+	ChangeLog:1.1940->1.1941 
+	utils/opcontrol:1.170->1.171 
+
+Index: oprofile/utils/opcontrol
+diff -u oprofile/utils/opcontrol:1.170 oprofile/utils/opcontrol:1.171
+--- oprofile/utils/opcontrol:1.170	Mon Nov 29 15:05:44 2010
++++ oprofile/utils/opcontrol	Wed Jan  5 21:16:08 2011
+@@ -49,6 +49,31 @@
+ 	fi
+ }
+ 
++# guess_number_base() checks if string is a valid octal(8), hexidecimal(16),
++# or decimal number(10). The value is returned in $?. Returns 0, if string
++# isn't a octal, hexidecimal, or decimal number.
++guess_number_base()
++{
++	if [[ "$1" =~ ^0[0-7]*$ ]] ; then 
++		return 8;
++	elif [[ "$1" =~ ^0x[0-9a-fA-F]+$ ]] ; then
++		return 16;
++	elif [[ "$1" =~ ^[1-9][0-9]*$ ]] ; then
++		return 10;
++	else
++		return 0;
++	fi
++}
++
++# check value is a valid number
++error_if_not_number()
++{
++	guess_number_base $2
++	if test "$?" -eq 0 ; then
++		echo "Argument for $1, $2, is not a valid number." >&2
++		exit 1
++	fi
++}
+ 
+ # rm_device arguments $1=file_name
+ rm_device()
+@@ -754,6 +779,7 @@
+ 				;;
+ 			--buffer-size)
+ 				error_if_empty $arg $val
++				error_if_not_number $arg $val
+ 				BUF_SIZE=$val
+ 				DO_SETUP=yes
+ 				;;
+@@ -763,6 +789,7 @@
+ 					exit 1
+ 				fi
+ 				error_if_empty $arg $val
++				error_if_not_number $arg $val
+ 				BUF_WATERSHED=$val
+ 				DO_SETUP=yes
+ 				;;
+@@ -772,6 +799,7 @@
+ 					exit 1
+ 				fi
+ 				error_if_empty $arg $val
++				error_if_not_number $arg $val
+ 				CPU_BUF_SIZE=$val
+ 				DO_SETUP=yes
+ 				;;
+@@ -802,6 +830,7 @@
+ 					echo "Call-graph profiling unsupported on this kernel/hardware" >&2
+ 					exit 1
+ 				fi
++				error_if_not_number $arg $val
+ 				CALLGRAPH=$val
+ 				DO_SETUP=yes
+ 				;;
diff --git a/oprofile-iaperf.patch b/oprofile-iaperf.patch
new file mode 100644
index 0000000..4090a0c
--- /dev/null
+++ b/oprofile-iaperf.patch
@@ -0,0 +1,16 @@
+Index: oprofile/events/i386/arch_perfmon/events
+diff -u oprofile/events/i386/arch_perfmon/events:1.3 oprofile/events/i386/arch_perfmon/events:1.4
+--- oprofile/events/i386/arch_perfmon/events:1.3	Fri May  1 20:34:24 2009
++++ oprofile/events/i386/arch_perfmon/events	Wed Oct 13 14:58:42 2010
+@@ -4,7 +4,7 @@
+ event:0x3c counters:cpuid um:zero minimum:6000 filter:0 name:CPU_CLK_UNHALTED : Clock cycles when not halted
+ event:0x3c counters:cpuid um:one minimum:6000 filter:2 name:UNHALTED_REFERENCE_CYCLES : Unhalted reference cycles
+ event:0xc0 counters:cpuid um:zero minimum:6000 filter:1 name:INST_RETIRED : number of instructions retired
+-event:0x2e counters:cpuid um:x41 minimum:6000 filter:5 name:LLC_MISSES : Last level cache demand requests from this core that missed the LLC
+-event:0x2e counters:cpuid um:x4f minimum:6000 filter:4 name:LLC_REFS : Last level cache demand requests from this core
+-event:0xc4 counters:cpuid um:zero minimum:500 filter:6 name:BR_INST_RETIRED : number of branch instructions retired
+-event:0xc5 counters:cpuid um:zero minimum:500 filter:7 name:BR_MISS_PRED_RETIRED : number of mispredicted branches retired (precise)
++event:0x2e counters:cpuid um:x41 minimum:6000 filter:4 name:LLC_MISSES : Last level cache demand requests from this core that missed the LLC
++event:0x2e counters:cpuid um:x4f minimum:6000 filter:3 name:LLC_REFS : Last level cache demand requests from this core
++event:0xc4 counters:cpuid um:zero minimum:500 filter:5 name:BR_INST_RETIRED : number of branch instructions retired
++event:0xc5 counters:cpuid um:zero minimum:500 filter:6 name:BR_MISS_PRED_RETIRED : number of mispredicted branches retired (precise)
diff --git a/oprofile-nehalem.patch b/oprofile-nehalem.patch
new file mode 100644
index 0000000..fa5d191
--- /dev/null
+++ b/oprofile-nehalem.patch
@@ -0,0 +1,46 @@
+Index: oprofile/libop/op_cpu_type.c
+diff -u oprofile/libop/op_cpu_type.c:1.54 oprofile/libop/op_cpu_type.c:1.55
+--- oprofile/libop/op_cpu_type.c:1.54	Wed Nov 25 20:35:19 2009
++++ oprofile/libop/op_cpu_type.c	Mon Jun 21 23:12:11 2010
+@@ -83,5 +83,6 @@
+    	{ "Intel Core/i7", "i386/core_i7", CPU_CORE_I7, 4 },
+    	{ "Intel Atom", "i386/atom", CPU_ATOM, 2 },
++	{ "Intel Nehalem microarchitecture", "i386/nehalem", CPU_NEHALEM, 4 },
+ };
+  
+ static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr);
+Index: oprofile/libop/op_cpu_type.h
+diff -u oprofile/libop/op_cpu_type.h:1.47 oprofile/libop/op_cpu_type.h:1.48
+--- oprofile/libop/op_cpu_type.h:1.47	Wed Nov 25 20:35:19 2009
++++ oprofile/libop/op_cpu_type.h	Mon Jun 21 23:12:11 2010
+@@ -80,5 +80,6 @@
+    	CPU_CORE_I7, /* Intel Core i7, Nehalem */
+    	CPU_ATOM, /* First generation Intel Atom */
++	CPU_NEHALEM, /* Intel Nehalem microarchitecture */
+ 	MAX_CPU_TYPE
+ } op_cpu;
+ 
+Index: oprofile/libop/op_events.c
+diff -u oprofile/libop/op_events.c:1.98 oprofile/libop/op_events.c:1.99
+--- oprofile/libop/op_events.c:1.98	Wed Nov 25 20:35:19 2009
++++ oprofile/libop/op_events.c	Mon Jun 21 23:12:11 2010
+@@ -970,6 +970,7 @@
+ 		case CPU_FAMILY11H:
+  		case CPU_ATOM:
+  		case CPU_CORE_I7:
++		case CPU_NEHALEM:
+ 			descr->name = "CPU_CLK_UNHALTED";
+ 			break;
+ 
+Index: oprofile/utils/ophelp.c
+diff -u oprofile/utils/ophelp.c:1.35 oprofile/utils/ophelp.c:1.36
+--- oprofile/utils/ophelp.c:1.35	Wed Nov 25 20:35:20 2009
++++ oprofile/utils/ophelp.c	Mon Jun 21 23:12:11 2010
+@@ -483,6 +483,7 @@
+ 	case CPU_CORE:
+ 	case CPU_CORE_2:
+ 	case CPU_CORE_I7:
++	case CPU_NEHALEM:
+ 	case CPU_ATOM:
+ 		event_doc =
+ 			"See Intel Architecture Developer's Manual Volume 3B, Appendix A and\n"
diff --git a/oprofile-westmere.patch b/oprofile-westmere.patch
new file mode 100644
index 0000000..bdb448f
--- /dev/null
+++ b/oprofile-westmere.patch
@@ -0,0 +1,676 @@
+Index: oprofile/libop/op_cpu_type.c
+diff -u oprofile/libop/op_cpu_type.c:1.58 oprofile/libop/op_cpu_type.c:1.59
+--- oprofile/libop/op_cpu_type.c:1.58	Mon Nov 29 14:52:36 2010
++++ oprofile/libop/op_cpu_type.c	Wed Dec 15 20:31:09 2010
+@@ -94,6 +94,32 @@
+  
+ static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr);
+ 
++int op_cpu_variations(op_cpu cpu_type)
++{
++	switch (cpu_type) {
++	case  CPU_ARCH_PERFMON:
++		return 1;
++	default:
++		return 0;
++	}
++}
++
++
++op_cpu op_cpu_base_type(op_cpu cpu_type)
++{
++	/* All the processors that support CPU_ARCH_PERFMON */
++	switch (cpu_type) {
++	case CPU_CORE_2:
++	case CPU_CORE_I7:
++	case CPU_ATOM:
++	case CPU_NEHALEM:
++		return CPU_ARCH_PERFMON;
++	default:
++		/* assume processor in a class by itself */
++		return cpu_type;
++	}
++}
++
+ op_cpu op_get_cpu_type(void)
+ {
+ 	int cpu_type = CPU_NO_GOOD;
+@@ -118,6 +144,9 @@
+ 
+ 	cpu_type = op_get_cpu_number(str);
+ 
++	if (op_cpu_variations(cpu_type))
++		cpu_type = op_cpu_specific_type(cpu_type);
++
+ 	fclose(fp);
+ 
+ 	return cpu_type;
+Index: oprofile/libop/op_cpu_type.h
+diff -u oprofile/libop/op_cpu_type.h:1.51 oprofile/libop/op_cpu_type.h:1.52
+--- oprofile/libop/op_cpu_type.h:1.51	Mon Nov 29 14:52:36 2010
++++ oprofile/libop/op_cpu_type.h	Wed Dec 15 20:31:09 2010
+@@ -91,6 +91,20 @@
+ } op_cpu;
+ 
+ /**
++ * the CPU lowest common denominator
++ *
++ * returns 1 if there are variations for the base cpu type;
++ */
++int op_cpu_variations(op_cpu cpu_type);
++
++/**
++ * get the CPU lowest common denominator
++ *
++ * returns cpu_type if cpu_type does not have a lowest common denominator.
++ */
++op_cpu op_cpu_base_type(op_cpu cpu_type);
++
++/**
+  * get the CPU type from the kernel
+  *
+  * returns CPU_NO_GOOD if the CPU could not be identified.
+Index: oprofile/libop/op_hw_specific.h
+diff -u oprofile/libop/op_hw_specific.h:1.3 oprofile/libop/op_hw_specific.h:1.4
+--- oprofile/libop/op_hw_specific.h:1.3	Tue May 19 18:45:19 2009
++++ oprofile/libop/op_hw_specific.h	Wed Dec 15 20:31:09 2010
+@@ -24,38 +24,52 @@
+ 	return !strncmp(v.v, vnd, 12);
+ }
+ 
++static inline unsigned int cpuid_signature()
++{
++	unsigned eax;
++	asm("cpuid" : "=a" (eax) : "0" (1) : "ecx","ebx","edx");
++	return eax;
++}
++
++static inline unsigned int cpu_model(unsigned int eax)
++{
++	unsigned model = (eax & 0xf0) >> 4;
++	unsigned ext_model = (eax & 0xf0000) >> 12;
++	return  ext_model + model;
++}
++
++static inline unsigned int cpu_family(unsigned int eax)
++{
++	unsigned family =  (eax & 0xf00) >> 8;
++	unsigned ext_family = (eax & 0xff00000) >> 20;
++	return ext_family + family;
++}
++
++static inline unsigned int cpu_stepping(unsigned int eax)
++{
++	return (eax & 0xf);
++}
++
++
+ /* Work around Nehalem spec update AAJ79: CPUID incorrectly indicates
+    unhalted reference cycle architectural event is supported. We assume
+    steppings after C0 report correct data in CPUID. */
+ static inline void workaround_nehalem_aaj79(unsigned *ebx)
+ {
+-	union {
+-		unsigned eax;
+-		struct {
+-			unsigned stepping : 4;
+-			unsigned model : 4;
+-			unsigned family : 4;
+-			unsigned type : 2;
+-			unsigned res : 2;
+-			unsigned ext_model : 4;
+-			unsigned ext_family : 8;
+-			unsigned res2 : 4;
+-		};
+-	} v;
+-	unsigned model;
++	unsigned eax;
+ 
+ 	if (!cpuid_vendor("GenuineIntel"))
+ 		return;
+-	asm("cpuid" : "=a" (v.eax) : "0" (1) : "ecx","ebx","edx");
+-	model = (v.ext_model << 4) + v.model;
+-	if (v.family != 6 || model != 26 || v.stepping > 4)
++	eax = cpuid_signature();
++	if (cpu_family(eax) != 6 || cpu_model(eax) != 26
++		|| cpu_stepping(eax) > 4)
+ 		return;
+ 	*ebx |= (1 << 2);	/* disable unsupported event */
+ }
+ 
+ static inline unsigned arch_get_filter(op_cpu cpu_type)
+ {
+-	if (cpu_type == CPU_ARCH_PERFMON) { 
++	if (op_cpu_base_type(cpu_type) == CPU_ARCH_PERFMON) { 
+ 		unsigned ebx, eax;
+ 		asm("cpuid" : "=a" (eax), "=b" (ebx) : "0" (0xa) : "ecx","edx");
+ 		workaround_nehalem_aaj79(&ebx);
+@@ -66,7 +80,7 @@
+ 
+ static inline int arch_num_counters(op_cpu cpu_type) 
+ {
+-	if (cpu_type == CPU_ARCH_PERFMON) {
++	if (op_cpu_base_type(cpu_type) == CPU_ARCH_PERFMON) {
+ 		unsigned v;
+ 		asm("cpuid" : "=a" (v) : "0" (0xa) : "ebx","ecx","edx");
+ 		return (v >> 8) & 0xff;
+@@ -81,6 +95,28 @@
+ 	return num_to_mask((v >> 8) & 0xff);	
+ }
+ 
++static inline op_cpu op_cpu_specific_type(op_cpu cpu_type)
++{
++	if (cpu_type == CPU_ARCH_PERFMON) {
++		/* Already know is Intel family 6, so just check the model. */
++		int model = cpu_model(cpuid_signature());
++		switch(model) {
++		case 0x0f:
++		case 0x16:
++		case 0x17:
++		case 0x1d:
++			return CPU_CORE_2;
++		case 0x1a:
++		case 0x1e:
++		case 0x2e:
++			return CPU_CORE_I7;
++		case 0x1c:
++			return CPU_ATOM;
++		}
++	}
++	return cpu_type;
++}
++
+ #else
+ 
+ static inline unsigned arch_get_filter(op_cpu cpu_type)
+@@ -104,4 +140,8 @@
+ 	return 0;
+ }
+ 
++static inline op_cpu op_cpu_specific_type(op_cpu cpu_type)
++{
++	return cpu_type;
++}
+ #endif
+Index: oprofile/events/Makefile.am
+diff -u oprofile/events/Makefile.am:1.35 oprofile/events/Makefile.am:1.36
+--- oprofile/events/Makefile.am:1.35	Mon Nov 29 14:52:36 2010
++++ oprofile/events/Makefile.am	Thu Dec 16 17:28:54 2010
+@@ -17,6 +17,7 @@
+ 	i386/atom/events i386/atom/unit_masks \
+ 	i386/core_i7/events i386/core_i7/unit_masks \
+ 	i386/nehalem/events i386/nehalem/unit_masks \
++	i386/westmere/events i386/westmere/unit_masks \
+ 	ia64/ia64/events ia64/ia64/unit_masks \
+ 	ia64/itanium2/events ia64/itanium2/unit_masks \
+ 	ia64/itanium/events ia64/itanium/unit_masks \
+--- /dev/null	2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/i386/westmere/events	2011-01-03 14:38:43.889646330 -0500
+@@ -0,0 +1,88 @@
++#
++# Intel "Westmere" microarchitecture core events.
++#
++# See http://ark.intel.com/ for help in identifying Westmere based CPUs
++#
++# Note the minimum counts are not discovered experimentally and could be likely
++# lowered in many cases without ill effect.
++#
++include:i386/arch_perfmon
++event:0x03 counters:0,1,2,3 um:x02 minimum:200000 name:LOAD_BLOCK : Loads that partially overlap an earlier store
++event:0x04 counters:0,1,2,3 um:x07 minimum:200000 name:SB_DRAIN : All Store buffer stall cycles
++event:0x05 counters:0,1,2,3 um:x02 minimum:200000 name:MISALIGN_MEM_REF : Misaligned store references
++event:0x06 counters:0,1,2,3 um:store_blocks minimum:200000 name:STORE_BLOCKS : Loads delayed with at-Retirement block code
++event:0x07 counters:0,1,2,3 um:x01 minimum:200000 name:PARTIAL_ADDRESS_ALIAS : False dependencies due to partial address aliasing
++event:0x08 counters:0,1,2,3 um:dtlb_load_misses minimum:200000 name:DTLB_LOAD_MISSES : DTLB load misses
++event:0x0b counters:0,1,2,3 um:mem_inst_retired minimum:2000000 name:MEM_INST_RETIRED : Memory instructions retired above 0 clocks (Precise Event)
++event:0x0c counters:0,1,2,3 um:x01 minimum:200000 name:MEM_STORE_RETIRED : Retired stores that miss the DTLB (Precise Event)
++event:0x0e counters:0,1,2,3 um:uops_issued minimum:2000000 name:UOPS_ISSUED : Uops issued
++event:0x0f counters:0,1,2,3 um:mem_uncore_retired minimum:40000 name:MEM_UNCORE_RETIRED : Load instructions retired that HIT modified data in sibling core (Precise Event)
++event:0x10 counters:0,1,2,3 um:fp_comp_ops_exe minimum:2000000 name:FP_COMP_OPS_EXE : MMX Uops
++event:0x12 counters:0,1,2,3 um:simd_int_128 minimum:200000 name:SIMD_INT_128 : 128 bit SIMD integer pack operations
++event:0x13 counters:0,1,2,3 um:load_dispatch minimum:2000000 name:LOAD_DISPATCH : All loads dispatched
++event:0x14 counters:0,1,2,3 um:arith minimum:2000000 name:ARITH : Cycles the divider is busy
++event:0x17 counters:0,1,2,3 um:x01 minimum:2000000 name:INST_QUEUE_WRITES : Instructions written to instruction queue.
++event:0x18 counters:0,1,2,3 um:x01 minimum:2000000 name:INST_DECODED : Instructions that must be decoded by decoder 0
++event:0x19 counters:0,1,2,3 um:x01 minimum:2000000 name:TWO_UOP_INSTS_DECODED : Two Uop instructions decoded
++event:0x1e counters:0,1,2,3 um:x01 minimum:2000000 name:INST_QUEUE_WRITE_CYCLES : Cycles instructions are written to the instruction queue
++event:0x20 counters:0,1,2,3 um:x01 minimum:2000000 name:LSD_OVERFLOW : Loops that can't stream from the instruction queue
++event:0x24 counters:0,1,2,3 um:l2_rqsts minimum:200000 name:L2_RQSTS : L2 instruction fetch hits
++event:0x26 counters:0,1,2,3 um:l2_data_rqsts minimum:200000 name:L2_DATA_RQSTS : All L2 data requests
++event:0x27 counters:0,1,2,3 um:l2_write minimum:100000 name:L2_WRITE : L2 demand lock RFOs in E state
++event:0x28 counters:0,1,2,3 um:l1d_wb_l2 minimum:100000 name:L1D_WB_L2 : L1 writebacks to L2 in E state
++event:0x2e counters:0,1,2,3 um:longest_lat_cache minimum:100000 name:LONGEST_LAT_CACHE : Longest latency cache miss
++event:0x3c counters:0,1,2,3 um:cpu_clk_unhalted minimum:100000 name:CPU_CLK_UNHALTED : Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)
++event:0x49 counters:0,1,2,3 um:dtlb_misses minimum:200000 name:DTLB_MISSES : DTLB misses
++event:0x4c counters:0,1 um:x01 minimum:200000 name:LOAD_HIT_PRE : Load operations conflicting with software prefetches
++event:0x4e counters:0,1 um:l1d_prefetch minimum:200000 name:L1D_PREFETCH : L1D hardware prefetch misses
++event:0x4f counters:0,1,2,3 um:x10 minimum:2000000 name:EPT : Extended Page Table walk cycles
++event:0x51 counters:0,1 um:l1d minimum:2000000 name:L1D : L1D cache lines replaced in M state 
++event:0x52 counters:0,1 um:x01 minimum:2000000 name:L1D_CACHE_PREFETCH_LOCK_FB_HIT : L1D prefetch load lock accepted in fill buffer
++event:0x60 counters:0 um:offcore_requests_outstanding minimum:2000000 name:OFFCORE_REQUESTS_OUTSTANDING : Outstanding offcore reads
++event:0x63 counters:0,1 um:cache_lock_cycles minimum:2000000 name:CACHE_LOCK_CYCLES : Cycles L1D locked
++event:0x6c counters:0,1,2,3 um:x01 minimum:2000000 name:IO_TRANSACTIONS : I/O transactions
++event:0x80 counters:0,1,2,3 um:l1i minimum:2000000 name:L1I : L1I instruction fetch stall cycles
++event:0x82 counters:0,1,2,3 um:x01 minimum:200000 name:LARGE_ITLB : Large ITLB hit
++event:0x85 counters:0,1,2,3 um:itlb_misses minimum:200000 name:ITLB_MISSES : ITLB miss
++event:0x87 counters:0,1,2,3 um:ild_stall minimum:2000000 name:ILD_STALL : Any Instruction Length Decoder stall cycles
++event:0x88 counters:0,1,2,3 um:br_inst_exec minimum:200000 name:BR_INST_EXEC : Branch instructions executed
++event:0x89 counters:0,1,2,3 um:br_misp_exec minimum:20000 name:BR_MISP_EXEC : Mispredicted branches executed
++event:0xa2 counters:0,1,2,3 um:resource_stalls minimum:2000000 name:RESOURCE_STALLS : Resource related stall cycles
++event:0xa6 counters:0,1,2,3 um:x01 minimum:2000000 name:MACRO_INSTS : Macro-fused instructions decoded
++event:0xa7 counters:0,1,2,3 um:x01 minimum:2000000 name:BACLEAR_FORCE_IQ : Instruction queue forced BACLEAR
++event:0xa8 counters:0,1,2,3 um:x01 minimum:2000000 name:LSD : Cycles when uops were delivered by the LSD
++event:0xae counters:0,1,2,3 um:x01 minimum:2000000 name:ITLB_FLUSH : ITLB flushes
++event:0xb0 counters:0,1,2,3 um:offcore_requests minimum:100000 name:OFFCORE_REQUESTS : All offcore requests
++event:0xb1 counters:0,1,2,3 um:uops_executed minimum:2000000 name:UOPS_EXECUTED : Cycles Uops executed on any port (core count)
++event:0xb2 counters:0,1,2,3 um:x01 minimum:100000 name:OFFCORE_REQUESTS_SQ_FULL : Offcore requests blocked due to Super Queue full
++event:0xb3 counters:0 um:snoopq_requests_outstanding minimum:2000000 name:SNOOPQ_REQUESTS_OUTSTANDING : Outstanding snoop code requests
++event:0xb4 counters:0,1,2,3 um:snoopq_requests minimum:100000 name:SNOOPQ_REQUESTS : Snoop code requests
++event:0xb7 counters:2 um:x01 minimum:100000 name:OFFCORE_RESPONSE_ANY_DATA : REQUEST = ANY_DATA read and RESPONSE = ANY_CACHE_DRAM
++event:0xb8 counters:0,1,2,3 um:snoop_response minimum:100000 name:SNOOP_RESPONSE : Thread responded HIT to snoop
++event:0xbb counters:1 um:x01 minimum:100000 name:OFFCORE_RESPONSE_ANY_DATA : REQUEST = ANY_DATA read and RESPONSE = ANY_CACHE_DRAM
++event:0xc0 counters:0,1,2,3 um:inst_retired minimum:2000000 name:INST_RETIRED : Instructions retired (Programmable counter and Precise Event)
++event:0xc2 counters:0,1,2,3 um:uops_retired minimum:2000000 name:UOPS_RETIRED : Cycles Uops are being retired
++event:0xc3 counters:0,1,2,3 um:machine_clears minimum:20000 name:MACHINE_CLEARS : Cycles machine clear asserted
++event:0xc4 counters:0,1,2,3 um:br_inst_retired minimum:200000 name:BR_INST_RETIRED : Retired branch instructions (Precise Event)
++event:0xc5 counters:0,1,2,3 um:br_misp_retired minimum:20000 name:BR_MISP_RETIRED : Mispredicted retired branch instructions (Precise Event)
++event:0xc7 counters:0,1,2,3 um:ssex_uops_retired minimum:200000 name:SSEX_UOPS_RETIRED : SIMD Packed-Double Uops retired (Precise Event)
++event:0xc8 counters:0,1,2,3 um:x20 minimum:200000 name:ITLB_MISS_RETIRED : Retired instructions that missed the ITLB (Precise Event)
++event:0xcb counters:0,1,2,3 um:mem_load_retired minimum:200000 name:MEM_LOAD_RETIRED : Retired loads that miss the DTLB (Precise Event)
++event:0xcc counters:0,1,2,3 um:fp_mmx_trans minimum:2000000 name:FP_MMX_TRANS : All Floating Point to and from MMX transitions
++event:0xd0 counters:0,1,2,3 um:x01 minimum:2000000 name:MACRO_INSTS : Instructions decoded
++event:0xd1 counters:0,1,2,3 um:uops_decoded minimum:2000000 name:UOPS_DECODED : Stack pointer instructions decoded
++event:0xd2 counters:0,1,2,3 um:rat_stalls minimum:2000000 name:RAT_STALLS : All RAT stall cycles
++event:0xd4 counters:0,1,2,3 um:x01 minimum:2000000 name:SEG_RENAME_STALLS : Segment rename stall cycles
++event:0xd5 counters:0,1,2,3 um:x01 minimum:2000000 name:ES_REG_RENAMES : ES segment renames
++event:0xdb counters:0,1,2,3 um:x01 minimum:2000000 name:UOP_UNFUSION : Uop unfusions due to FP exceptions
++event:0xe0 counters:0,1,2,3 um:x01 minimum:2000000 name:BR_INST_DECODED : Branch instructions decoded
++event:0xe5 counters:0,1,2,3 um:x01 minimum:2000000 name:BPU_MISSED_CALL_RET : Branch prediction unit missed call or return
++event:0xe6 counters:0,1,2,3 um:baclear minimum:2000000 name:BACLEAR : BACLEAR asserted with bad target address
++event:0xe8 counters:0,1,2,3 um:bpu_clears minimum:2000000 name:BPU_CLEARS : Early Branch Prediction Unit clears
++event:0xf0 counters:0,1,2,3 um:l2_transactions minimum:200000 name:L2_TRANSACTIONS : All L2 transactions
++event:0xf1 counters:0,1,2,3 um:l2_lines_in minimum:100000 name:L2_LINES_IN : L2 lines alloacated
++event:0xf2 counters:0,1,2,3 um:l2_lines_out minimum:100000 name:L2_LINES_OUT : L2 lines evicted
++event:0xf4 counters:0,1,2,3 um:sq_misc minimum:2000000 name:SQ_MISC : Super Queue LRU hints sent to LLC
++event:0xf6 counters:0,1,2,3 um:x01 minimum:2000000 name:SQ_FULL_STALL_CYCLES : Super Queue full stall cycles
++event:0xf7 counters:0,1,2,3 um:fp_assist minimum:20000 name:FP_ASSIST : X87 Floating point assists (Precise Event)
++event:0xfd counters:0,1,2,3 um:simd_int_64 minimum:200000 name:SIMD_INT_64 : SIMD integer 64 bit pack operations
+--- /dev/null	2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/i386/westmere/unit_masks	2011-01-03 14:38:44.973013699 -0500
+@@ -0,0 +1,307 @@
++#
++# Unit masks for the Intel "Westmere" micro architecture
++#
++# See http://ark.intel.com/ for help in identifying Westmere based CPUs
++#
++include:i386/arch_perfmon
++
++name:x01 type:mandatory default:0x01
++	0x01 No unit mask
++name:x02 type:mandatory default:0x02
++	0x02 No unit mask
++name:x07 type:mandatory default:0x07
++	0x07 No unit mask
++name:x10 type:mandatory default:0x10
++	0x10 No unit mask
++name:x20 type:mandatory default:0x20
++	0x20 No unit mask
++name:arith type:bitmask default:0x01
++	0x01 cycles_div_busy Cycles the divider is busy
++	0x02 mul Multiply operations executed
++name:baclear type:bitmask default:0x01
++	0x01 clear BACLEAR asserted, regardless of cause 
++	0x02 bad_target BACLEAR asserted with bad target address
++name:bpu_clears type:bitmask default:0x01
++	0x01 early Early Branch Prediction Unit clears
++	0x02 late Late Branch Prediction Unit clears
++name:br_inst_exec type:bitmask default:0x7f
++	0x01 cond Conditional branch instructions executed
++	0x02 direct Unconditional branches executed
++	0x04 indirect_non_call Indirect non call branches executed
++	0x07 non_calls All non call branches executed
++	0x08 return_near Indirect return branches executed
++	0x10 direct_near_call Unconditional call branches executed
++	0x20 indirect_near_call Indirect call branches executed
++	0x30 near_calls Call branches executed
++	0x40 taken Taken branches executed
++	0x7f any Branch instructions executed
++name:br_inst_retired type:bitmask default:0x04
++	0x01 conditional Retired conditional branch instructions (Precise Event)
++	0x02 near_call Retired near call instructions (Precise Event)
++	0x04 all_branches Retired branch instructions (Precise Event)
++name:br_misp_exec type:bitmask default:0x7f
++	0x01 cond Mispredicted conditional branches executed
++	0x02 direct Mispredicted unconditional branches executed
++	0x04 indirect_non_call Mispredicted indirect non call branches executed
++	0x07 non_calls Mispredicted non call branches executed
++	0x08 return_near Mispredicted return branches executed
++	0x10 direct_near_call Mispredicted non call branches executed
++	0x20 indirect_near_call Mispredicted indirect call branches executed
++	0x30 near_calls Mispredicted call branches executed
++	0x40 taken Mispredicted taken branches executed
++	0x7f any Mispredicted branches executed
++name:br_misp_retired type:bitmask default:0x04
++	0x01 conditional Mispredicted conditional retired branches (Precise Event)
++	0x02 near_call Mispredicted near retired calls (Precise Event)
++	0x04 all_branches Mispredicted retired branch instructions (Precise Event)
++name:cache_lock_cycles type:bitmask default:0x01
++	0x01 l1d_l2 Cycles L1D and L2 locked
++	0x02 l1d Cycles L1D locked
++name:cpu_clk_unhalted type:bitmask default:0x00
++	0x00 thread_p Cycles when thread is not halted (programmable counter)
++	0x01 ref_p Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)
++name:dtlb_load_misses type:bitmask default:0x01
++	0x01 any DTLB load misses
++	0x02 walk_completed DTLB load miss page walks complete
++	0x04 walk_cycles DTLB load miss page walk cycles
++	0x10 stlb_hit DTLB second level hit
++	0x20 pde_miss DTLB load miss caused by low part of address
++	0x80 large_walk_completed DTLB load miss large page walks
++name:dtlb_misses type:bitmask default:0x01
++	0x01 any DTLB misses
++	0x02 walk_completed DTLB miss page walks
++	0x04 walk_cycles DTLB miss page walk cycles
++	0x10 stlb_hit DTLB first level misses but second level hit
++	0x20 pde_miss DTLB misses casued by low part of address
++	0x80 large_walk_completed DTLB miss large page walks
++name:fp_assist type:bitmask default:0x01
++	0x01 all X87 Floating point assists (Precise Event)
++	0x02 output X87 Floating point assists for invalid output value (Precise Event)
++	0x04 input X87 Floating poiint assists for invalid input value (Precise Event)
++name:fp_comp_ops_exe type:bitmask default:0x01
++	0x01 x87 Computational floating-point operations executed
++	0x02 mmx MMX Uops
++	0x04 sse_fp SSE and SSE2 FP Uops
++	0x08 sse2_integer SSE2 integer Uops
++	0x10 sse_fp_packed SSE FP packed Uops
++	0x20 sse_fp_scalar SSE FP scalar Uops
++	0x40 sse_single_precision SSE* FP single precision Uops
++	0x80 sse_double_precision SSE* FP double precision Uops
++name:fp_mmx_trans type:bitmask default:0x03
++	0x01 to_fp Transitions from MMX to Floating Point instructions
++	0x02 to_mmx Transitions from Floating Point to MMX instructions
++	0x03 any All Floating Point to and from MMX transitions
++name:ild_stall type:bitmask default:0x0f
++	0x01 lcp Length Change Prefix stall cycles
++	0x02 mru Stall cycles due to BPU MRU bypass
++	0x04 iq_full Instruction Queue full stall cycles
++	0x08 regen Regen stall cycles
++	0x0f any Any Instruction Length Decoder stall cycles
++name:inst_retired type:bitmask default:0x01
++	0x01 any_p Instructions retired (Programmable counter and Precise Event)
++	0x02 x87 Retired floating-point operations (Precise Event)
++	0x04 mmx Retired MMX instructions (Precise Event)
++name:itlb_misses type:bitmask default:0x01
++	0x01 any ITLB miss
++	0x02 walk_completed ITLB miss page walks
++	0x04 walk_cycles ITLB miss page walk cycles
++	0x80 large_walk_completed ITLB miss large page walks
++name:l1d type:bitmask default:0x01
++	0x01 repl L1 data cache lines allocated
++	0x02 m_repl L1D cache lines allocated in the M state
++	0x04 m_evict L1D cache lines replaced in M state 
++	0x08 m_snoop_evict L1D snoop eviction of cache lines in M state
++name:l1d_prefetch type:bitmask default:0x01
++	0x01 requests L1D hardware prefetch requests
++	0x02 miss L1D hardware prefetch misses
++	0x04 triggers L1D hardware prefetch requests triggered
++name:l1d_wb_l2 type:bitmask default:0x0f
++	0x01 i_state L1 writebacks to L2 in I state (misses)
++	0x02 s_state L1 writebacks to L2 in S state
++	0x04 e_state L1 writebacks to L2 in E state
++	0x08 m_state L1 writebacks to L2 in M state
++	0x0f mesi All L1 writebacks to L2
++name:l1i type:bitmask default:0x01
++	0x01 hits L1I instruction fetch hits
++	0x02 misses L1I instruction fetch misses
++	0x03 reads L1I Instruction fetches
++	0x04 cycles_stalled L1I instruction fetch stall cycles
++name:l2_data_rqsts type:bitmask default:0xff
++	0x01 demand_i_state L2 data demand loads in I state (misses)
++	0x02 demand_s_state L2 data demand loads in S state
++	0x04 demand_e_state L2 data demand loads in E state
++	0x08 demand_m_state L2 data demand loads in M state
++	0x0f demand_mesi L2 data demand requests
++	0x10 prefetch_i_state L2 data prefetches in the I state (misses)
++	0x20 prefetch_s_state L2 data prefetches in the S state
++	0x40 prefetch_e_state L2 data prefetches in E state
++	0x80 prefetch_m_state L2 data prefetches in M state
++	0xf0 prefetch_mesi All L2 data prefetches
++	0xff any All L2 data requests
++name:l2_lines_in type:bitmask default:0x07
++	0x02 s_state L2 lines allocated in the S state
++	0x04 e_state L2 lines allocated in the E state
++	0x07 any L2 lines alloacated
++name:l2_lines_out type:bitmask default:0x0f
++	0x01 demand_clean L2 lines evicted by a demand request
++	0x02 demand_dirty L2 modified lines evicted by a demand request
++	0x04 prefetch_clean L2 lines evicted by a prefetch request
++	0x08 prefetch_dirty L2 modified lines evicted by a prefetch request
++	0x0f any L2 lines evicted
++name:l2_rqsts type:bitmask default:0x01
++	0x01 ld_hit L2 load hits
++	0x02 ld_miss L2 load misses
++	0x03 loads L2 requests
++	0x04 rfo_hit L2 RFO hits
++	0x08 rfo_miss L2 RFO misses
++	0x0c rfos L2 RFO requests
++	0x10 ifetch_hit L2 instruction fetch hits
++	0x20 ifetch_miss L2 instruction fetch misses
++	0x30 ifetches L2 instruction fetches
++	0x40 prefetch_hit L2 prefetch hits
++	0x80 prefetch_miss L2 prefetch misses
++	0xaa miss All L2 misses
++	0xc0 prefetches All L2 prefetches
++	0xff references All L2 requests
++name:l2_transactions type:bitmask default:0x80
++	0x01 load L2 Load transactions
++	0x02 rfo L2 RFO transactions
++	0x04 ifetch L2 instruction fetch transactions
++	0x08 prefetch L2 prefetch transactions
++	0x10 l1d_wb L1D writeback to L2 transactions
++	0x20 fill L2 fill transactions
++	0x40 wb L2 writeback to LLC transactions
++	0x80 any All L2 transactions
++name:l2_write type:bitmask default:0x01
++	0x01 rfo_i_state L2 demand store RFOs in I state (misses)
++	0x02 rfo_s_state L2 demand store RFOs in S state
++	0x08 rfo_m_state L2 demand store RFOs in M state
++	0x0e rfo_hit All L2 demand store RFOs that hit the cache
++	0x0f rfo_mesi All L2 demand store RFOs
++	0x10 lock_i_state L2 demand lock RFOs in I state (misses)
++	0x20 lock_s_state L2 demand lock RFOs in S state
++	0x40 lock_e_state L2 demand lock RFOs in E state
++	0x80 lock_m_state L2 demand lock RFOs in M state
++	0xe0 lock_hit All demand L2 lock RFOs that hit the cache
++	0xf0 lock_mesi All demand L2 lock RFOs
++name:load_dispatch type:bitmask default:0x07
++	0x01 rs Loads dispatched that bypass the MOB
++	0x02 rs_delayed Loads dispatched from stage 305
++	0x04 mob Loads dispatched from the MOB
++	0x07 any All loads dispatched
++name:longest_lat_cache type:bitmask default:0x01
++	0x01 miss Longest latency cache miss
++	0x02 reference Longest latency cache reference
++name:machine_clears type:bitmask default:0x01
++	0x01 cycles Cycles machine clear asserted
++	0x02 mem_order Execution pipeline restart due to Memory ordering conflicts 
++	0x04 smc Self-Modifying Code detected
++name:mem_inst_retired type:bitmask default:0x01
++	0x01 loads Instructions retired which contains a load (Precise Event)
++	0x02 stores Instructions retired which contains a store (Precise Event)
++	0x10 latency_above_threshold_0 Memory instructions retired above 0 clocks (Precise Event) (MSR_INDEX: 0x03F6 MSR_VALUE: 0x0000)
++name:mem_load_retired type:bitmask default:0x01
++	0x01 l1d_hit Retired loads that hit the L1 data cache (Precise Event)
++	0x02 l2_hit Retired loads that hit the L2 cache (Precise Event)
++	0x04 llc_unshared_hit Retired loads that hit valid versions in the LLC cache (Precise Event)
++	0x08 other_core_l2_hit_hitm Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)
++	0x10 llc_miss Retired loads that miss the LLC cache (Precise Event)
++	0x40 hit_lfb Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)
++	0x80 dtlb_miss Retired loads that miss the DTLB (Precise Event)
++name:mem_uncore_retired type:bitmask default:0x02
++	0x02 local_hitm Load instructions retired that HIT modified data in sibling core (Precise Event)
++	0x04 remote_hitm Retired loads that hit remote socket in modified state (Precise Event)
++	0x08 local_dram_and_remote_cache_hit Load instructions retired local dram and remote cache HIT data sources (Precise Event)
++	0x10 remote_dram Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)
++	0x80 uncacheable Load instructions retired IO (Precise Event)
++name:offcore_requests type:bitmask default:0x80
++	0x01 demand_read_data Offcore demand data read requests
++	0x02 demand_read_code Offcore demand code read requests
++	0x04 demand_rfo Offcore demand RFO requests
++	0x08 any_read Offcore read requests
++	0x10 any_rfo Offcore RFO requests
++	0x40 l1d_writeback Offcore L1 data cache writebacks
++	0x80 any All offcore requests
++name:offcore_requests_outstanding type:bitmask default:0x08
++	0x01 demand_read_data Outstanding offcore demand data reads
++	0x02 demand_read_code Outstanding offcore demand code reads
++	0x04 demand_rfo Outstanding offcore demand RFOs
++	0x08 any_read Outstanding offcore reads
++name:rat_stalls type:bitmask default:0x0f
++	0x01 flags Flag stall cycles
++	0x02 registers Partial register stall cycles
++	0x04 rob_read_port ROB read port stalls cycles
++	0x08 scoreboard Scoreboard stall cycles
++	0x0f any All RAT stall cycles
++name:resource_stalls type:bitmask default:0x01
++	0x01 any Resource related stall cycles
++	0x02 load Load buffer stall cycles
++	0x04 rs_full Reservation Station full stall cycles
++	0x08 store Store buffer stall cycles
++	0x10 rob_full ROB full stall cycles
++	0x20 fpcw FPU control word write stall cycles
++	0x40 mxcsr MXCSR rename stall cycles
++	0x80 other Other Resource related stall cycles
++name:simd_int_128 type:bitmask default:0x01
++	0x01 packed_mpy 128 bit SIMD integer multiply operations
++	0x02 packed_shift 128 bit SIMD integer shift operations
++	0x04 pack 128 bit SIMD integer pack operations
++	0x08 unpack 128 bit SIMD integer unpack operations
++	0x10 packed_logical 128 bit SIMD integer logical operations
++	0x20 packed_arith 128 bit SIMD integer arithmetic operations
++	0x40 shuffle_move 128 bit SIMD integer shuffle/move operations
++name:simd_int_64 type:bitmask default:0x01
++	0x01 packed_mpy SIMD integer 64 bit packed multiply operations
++	0x02 packed_shift SIMD integer 64 bit shift operations
++	0x04 pack SIMD integer 64 bit pack operations
++	0x08 unpack SIMD integer 64 bit unpack operations
++	0x10 packed_logical SIMD integer 64 bit logical operations
++	0x20 packed_arith SIMD integer 64 bit arithmetic operations
++	0x40 shuffle_move SIMD integer 64 bit shuffle/move operations
++name:snoopq_requests type:bitmask default:0x01
++	0x01 data Snoop data requests
++	0x02 invalidate Snoop invalidate requests
++	0x04 code Snoop code requests
++name:snoopq_requests_outstanding type:bitmask default:0x01
++	0x01 data Outstanding snoop data requests
++	0x02 invalidate Outstanding snoop invalidate requests
++	0x04 code Outstanding snoop code requests
++name:snoop_response type:bitmask default:0x01
++	0x01 hit Thread responded HIT to snoop
++	0x02 hite Thread responded HITE to snoop
++	0x04 hitm Thread responded HITM to snoop
++name:sq_misc type:bitmask default:0x04
++	0x04 lru_hints Super Queue LRU hints sent to LLC
++	0x10 split_lock Super Queue lock splits across a cache line
++name:ssex_uops_retired type:bitmask default:0x01
++	0x01 packed_single SIMD Packed-Single Uops retired (Precise Event)
++	0x02 scalar_single SIMD Scalar-Single Uops retired (Precise Event)
++	0x04 packed_double SIMD Packed-Double Uops retired (Precise Event)
++	0x08 scalar_double SIMD Scalar-Double Uops retired (Precise Event)
++	0x10 vector_integer SIMD Vector Integer Uops retired (Precise Event)
++name:store_blocks type:bitmask default:0x04
++	0x04 at_ret Loads delayed with at-Retirement block code
++	0x08 l1d_block Cacheable loads delayed with L1D block code
++name:uops_decoded type:bitmask default:0x01
++	0x01 stall_cycles Cycles no Uops are decoded
++	0x02 ms_cycles_active Uops decoded by Microcode Sequencer
++	0x04 esp_folding Stack pointer instructions decoded
++	0x08 esp_sync Stack pointer sync operations
++name:uops_executed type:bitmask default:0x3f
++	0x01 port0 Uops executed on port 0
++	0x02 port1 Uops executed on port 1
++	0x04 port2_core Uops executed on port 2 (core count)
++	0x08 port3_core Uops executed on port 3 (core count)
++	0x10 port4_core Uops executed on port 4 (core count)
++	0x1f core_active_cycles_no_port5 Cycles Uops executed on ports 0-4 (core count)
++	0x20 port5 Uops executed on port 5
++	0x3f core_active_cycles Cycles Uops executed on any port (core count)
++	0x40 port015 Uops issued on ports 0, 1 or 5
++	0x80 port234_core Uops issued on ports 2, 3 or 4
++name:uops_issued type:bitmask default:0x01
++	0x01 any Uops issued
++	0x02 fused Fused Uops issued
++name:uops_retired type:bitmask default:0x01
++	0x01 active_cycles Cycles Uops are being retired
++	0x02 retire_slots Retirement slots used (Precise Event)
++	0x04 macro_fused Macro-fused Uops retired (Precise Event)
+Index: oprofile/libop/op_cpu_type.c
+diff -u oprofile/libop/op_cpu_type.c:1.59 oprofile/libop/op_cpu_type.c:1.60
+--- oprofile/libop/op_cpu_type.c:1.59	Wed Dec 15 20:31:09 2010
++++ oprofile/libop/op_cpu_type.c	Thu Dec 16 17:28:54 2010
+@@ -90,6 +90,7 @@
+ 	{ "AMD64 family12h", "x86-64/family12h", CPU_FAMILY12H, 4 },
+ 	{ "AMD64 family14h", "x86-64/family14h", CPU_FAMILY14H, 4 },
+ 	{ "AMD64 family15h", "x86-64/family15h", CPU_FAMILY15H, 6 },
++	{ "Intel Westmere microarchitecture", "i386/westmere", CPU_WESTMERE, 4 },
+ };
+  
+ static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr);
+@@ -113,6 +114,7 @@
+ 	case CPU_CORE_I7:
+ 	case CPU_ATOM:
+ 	case CPU_NEHALEM:
++	case CPU_WESTMERE:
+ 		return CPU_ARCH_PERFMON;
+ 	default:
+ 		/* assume processor in a class by itself */
+Index: oprofile/libop/op_cpu_type.h
+diff -u oprofile/libop/op_cpu_type.h:1.52 oprofile/libop/op_cpu_type.h:1.53
+--- oprofile/libop/op_cpu_type.h:1.52	Wed Dec 15 20:31:09 2010
++++ oprofile/libop/op_cpu_type.h	Thu Dec 16 17:28:54 2010
+@@ -87,6 +87,7 @@
+ 	CPU_FAMILY12H, /**< AMD family 12h */
+ 	CPU_FAMILY14H, /**< AMD family 14h */
+ 	CPU_FAMILY15H, /**< AMD family 15h */
++	CPU_WESTMERE, /* Intel Westmere microarchitecture */
+ 	MAX_CPU_TYPE
+ } op_cpu;
+ 
+Index: oprofile/libop/op_events.c
+diff -u oprofile/libop/op_events.c:1.102 oprofile/libop/op_events.c:1.103
+--- oprofile/libop/op_events.c:1.102	Mon Nov 29 14:52:36 2010
++++ oprofile/libop/op_events.c	Thu Dec 16 17:28:54 2010
+@@ -971,6 +971,7 @@
+  		case CPU_ATOM:
+  		case CPU_CORE_I7:
+ 		case CPU_NEHALEM:
++		case CPU_WESTMERE:
+ 		case CPU_FAMILY12H:
+ 		case CPU_FAMILY14H:
+ 		case CPU_FAMILY15H:
+Index: oprofile/libop/op_hw_specific.h
+diff -u oprofile/libop/op_hw_specific.h:1.4 oprofile/libop/op_hw_specific.h:1.5
+--- oprofile/libop/op_hw_specific.h:1.4	Wed Dec 15 20:31:09 2010
++++ oprofile/libop/op_hw_specific.h	Thu Dec 16 17:28:54 2010
+@@ -112,6 +112,8 @@
+ 			return CPU_CORE_I7;
+ 		case 0x1c:
+ 			return CPU_ATOM;
++		case 0x25:
++			return CPU_WESTMERE;
+ 		}
+ 	}
+ 	return cpu_type;
+Index: oprofile/utils/ophelp.c
+diff -u oprofile/utils/ophelp.c:1.39 oprofile/utils/ophelp.c:1.40
+--- oprofile/utils/ophelp.c:1.39	Mon Nov 29 14:52:36 2010
++++ oprofile/utils/ophelp.c	Thu Dec 16 17:28:54 2010
+@@ -496,6 +496,7 @@
+ 	case CPU_CORE_2:
+ 	case CPU_CORE_I7:
+ 	case CPU_NEHALEM:
++	case CPU_WESTMERE:
+ 	case CPU_ATOM:
+ 		event_doc =
+ 			"See Intel Architecture Developer's Manual Volume 3B, Appendix A and\n"
diff --git a/oprofile.spec b/oprofile.spec
index 44a4fac..f891c72 100644
--- a/oprofile.spec
+++ b/oprofile.spec
@@ -1,7 +1,7 @@
 Summary: System wide profiler
 Name: oprofile
 Version: 0.9.6
-Release: 6%{?dist}
+Release: 10%{?dist}
 License: GPLv2
 Group: Development/System
 #
@@ -15,6 +15,11 @@ Patch83: oprofile-0.9.3-xen.patch
 #Patch104: oprofile-jvmpi-lgpl.patch
 #Patch105: oprofile-0.9.5-timer.patch
 Patch106: oprofile-sect.patch
+Patch120: oprofile-iaperf.patch
+Patch121: oprofile-nehalem.patch
+Patch122: oprofile-amd.patch
+Patch123: oprofile-westmere.patch
+Patch124: oprofile-check.patch
 
 URL: http://oprofile.sf.net
 
@@ -83,6 +88,11 @@ agent library.
 %patch10 -p1 -b .guess2
 %patch63 -p1 -b .libs
 %patch106 -p1 -b .sect
+%patch120 -p1
+%patch121 -p1
+%patch122 -p1
+%patch123 -p1
+%patch124 -p1
 
 ./autogen.sh
 
@@ -230,6 +240,13 @@ exit 0
 /etc/ld.so.conf.d/*
 
 %changelog
+* Thu Jan 6 2011 Will Cohen <wcohen at redhat.com> - 0.9.6-10
+- Corrections for i386/arch_perfmon filters.
+- Make nehalem events available.
+- Add AMD family 12/14/15h support.
+- Add Intel westemere support.
+- opcontrol numeric argument checking.
+
 * Wed Apr 21 2010 Will Cohen <wcohen at redhat.com> - 0.9.6-6
 - Bump version and rebuild.
 


More information about the scm-commits mailing list