[oprofile] - Corrections for i386/arch_perfmon filters. - Make nehalem events available. - Add AMD family 12/14
William Eden Cohen
wcohen at fedoraproject.org
Thu Jan 6 16:23:48 UTC 2011
commit 845eee59efb803006817435011ac150e0a5df8aa
Author: William Cohen <wcohen at redhat.com>
Date: Thu Jan 6 11:23:13 2011 -0500
- Corrections for i386/arch_perfmon filters.
- Make nehalem events available.
- Add AMD family 12/14/15h support.
- Add Intel westemere support.
- opcontrol numeric argument checking.
oprofile-amd.patch | 2147 +++++++++++++++++++++++++++++++++++++++++++++++
oprofile-check.patch | 82 ++
oprofile-iaperf.patch | 16 +
oprofile-nehalem.patch | 46 +
oprofile-westmere.patch | 676 +++++++++++++++
oprofile.spec | 19 +-
6 files changed, 2985 insertions(+), 1 deletions(-)
---
diff --git a/oprofile-amd.patch b/oprofile-amd.patch
new file mode 100644
index 0000000..80d629b
--- /dev/null
+++ b/oprofile-amd.patch
@@ -0,0 +1,2147 @@
+Index: oprofile/events/Makefile.am
+diff -u oprofile/events/Makefile.am:1.34 oprofile/events/Makefile.am:1.35
+--- oprofile/events/Makefile.am:1.34 Wed Oct 13 18:07:09 2010
++++ oprofile/events/Makefile.am Mon Nov 29 14:52:36 2010
+@@ -35,6 +35,9 @@
+ x86-64/hammer/events x86-64/hammer/unit_masks \
+ x86-64/family10/events x86-64/family10/unit_masks \
+ x86-64/family11h/events x86-64/family11h/unit_masks \
++ x86-64/family12h/events x86-64/family12h/unit_masks \
++ x86-64/family14h/events x86-64/family14h/unit_masks \
++ x86-64/family15h/events x86-64/family15h/unit_masks \
+ arm/xscale1/events arm/xscale1/unit_masks \
+ arm/xscale2/events arm/xscale2/unit_masks \
+ arm/armv6/events arm/armv6/unit_masks \
+--- /dev/null 2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/x86-64/family12h/events 2011-01-03 14:37:37.384309624 -0500
+@@ -0,0 +1,23 @@
++# AMD Generic performance events
++#
++# Copyright OProfile authors
++# Copyright (c) 2006-2010 Advanced Micro Devices
++# Contributed by Ray Bryant <raybry at amd.com>,
++# Jason Yeh <jason.yeh at amd.com>
++# Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
++#
++# Revision: 1.0
++#
++# ChangeLog:
++# 1.0: 30 August 2010.
++# - Initial revision
++#
++event:0x40 counters:0,1,2,3 um:zero minimum:500 name:DATA_CACHE_ACCESSES : Data cache accesses
++event:0x41 counters:0,1,2,3 um:zero minimum:500 name:DATA_CACHE_MISSES : Data cache misses
++event:0x42 counters:0,1,2,3 um:moess minimum:500 name:DATA_CACHE_REFILLS_FROM_L2_OR_NORTHBRIDGE : Data cache refills from L2 or Northbridge
++event:0x43 counters:0,1,2,3 um:moesi minimum:500 name:DATA_CACHE_REFILLS_FROM_NORTHBRIDGE : Data cache refills from Northbridge
++event:0x76 counters:0,1,2,3 um:zero minimum:50000 name:CPU_CLK_UNHALTED : Cycles outside of halt state
++event:0xc0 counters:0,1,2,3 um:zero minimum:50000 name:RETIRED_INSTRUCTIONS : Retired instructions (includes exceptions, interrupts, re-syncs)
++event:0xc1 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_UOPS : Retired micro-ops
++event:0xc2 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_BRANCH_INSTRUCTIONS : Retired branches (conditional, unconditional, exceptions, interrupts)
++event:0xc3 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS : Retired mispredicted branch instructions
+--- /dev/null 2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/x86-64/family12h/unit_masks 2011-01-03 14:37:38.374285883 -0500
+@@ -0,0 +1,30 @@
++# AMD Generic unit masks
++#
++# Copyright OProfile authors
++# Copyright (c) 2006-2010 Advanced Micro Devices
++# Contributed by Ray Bryant <raybry at amd.com>,
++# Jason Yeh <jason.yeh at amd.com>
++# Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
++#
++# Revision: 1.0
++#
++# ChangeLog:
++# 1.0: 30 August 2010.
++# - Initial revision
++#
++name:zero type:mandatory default:0x0
++ 0x0 No unit mask
++name:moesi type:bitmask default:0x1f
++ 0x01 (I)nvalid cache state
++ 0x02 (S)hared cache state
++ 0x04 (E)xclusive cache state
++ 0x08 (O)wner cache state
++ 0x10 (M)odified cache state
++ 0x1f All cache states
++name:moess type:bitmask default:0x1e
++ 0x01 Refill from northbridge
++ 0x02 Shared-state line from L2
++ 0x04 Exclusive-state line from L2
++ 0x08 Owner-state line from L2
++ 0x10 Modified-state line from L2
++ 0x1e All cache states except refill from northbridge
+--- /dev/null 2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/x86-64/family14h/events 2011-01-03 14:37:39.383445877 -0500
+@@ -0,0 +1,23 @@
++# AMD Generic performance events
++#
++# Copyright OProfile authors
++# Copyright (c) 2006-2010 Advanced Micro Devices
++# Contributed by Ray Bryant <raybry at amd.com>,
++# Jason Yeh <jason.yeh at amd.com>
++# Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
++#
++# Revision: 1.0
++#
++# ChangeLog:
++# 1.0: 30 August 2010.
++# - Initial revision
++#
++event:0x40 counters:0,1,2,3 um:zero minimum:500 name:DATA_CACHE_ACCESSES : Data cache accesses
++event:0x41 counters:0,1,2,3 um:zero minimum:500 name:DATA_CACHE_MISSES : Data cache misses
++event:0x42 counters:0,1,2,3 um:moess minimum:500 name:DATA_CACHE_REFILLS_FROM_L2_OR_NORTHBRIDGE : Data cache refills from L2 or Northbridge
++event:0x43 counters:0,1,2,3 um:moesi minimum:500 name:DATA_CACHE_REFILLS_FROM_NORTHBRIDGE : Data cache refills from Northbridge
++event:0x76 counters:0,1,2,3 um:zero minimum:50000 name:CPU_CLK_UNHALTED : Cycles outside of halt state
++event:0xc0 counters:0,1,2,3 um:zero minimum:50000 name:RETIRED_INSTRUCTIONS : Retired instructions (includes exceptions, interrupts, re-syncs)
++event:0xc1 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_UOPS : Retired micro-ops
++event:0xc2 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_BRANCH_INSTRUCTIONS : Retired branches (conditional, unconditional, exceptions, interrupts)
++event:0xc3 counters:0,1,2,3 um:zero minimum:500 name:RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS : Retired mispredicted branch instructions
+--- /dev/null 2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/x86-64/family14h/unit_masks 2011-01-03 14:37:40.384085285 -0500
+@@ -0,0 +1,30 @@
++# AMD Generic unit masks
++#
++# Copyright OProfile authors
++# Copyright (c) 2006-2010 Advanced Micro Devices
++# Contributed by Ray Bryant <raybry at amd.com>,
++# Jason Yeh <jason.yeh at amd.com>
++# Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
++#
++# Revision: 1.0
++#
++# ChangeLog:
++# 1.0: 30 August 2010.
++# - Initial revision
++#
++name:zero type:mandatory default:0x0
++ 0x0 No unit mask
++name:moesi type:bitmask default:0x1f
++ 0x01 (I)nvalid cache state
++ 0x02 (S)hared cache state
++ 0x04 (E)xclusive cache state
++ 0x08 (O)wner cache state
++ 0x10 (M)odified cache state
++ 0x1f All cache states
++name:moess type:bitmask default:0x1e
++ 0x01 Refill from northbridge
++ 0x02 Shared-state line from L2
++ 0x04 Exclusive-state line from L2
++ 0x08 Owner-state line from L2
++ 0x10 Modified-state line from L2
++ 0x1e All cache states except refill from northbridge
+--- /dev/null 2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/x86-64/family15h/events 2011-01-03 14:37:41.391223732 -0500
+@@ -0,0 +1,16 @@
++# AMD Generic performance events
++#
++# Copyright OProfile authors
++# Copyright (c) 2006-2010 Advanced Micro Devices
++# Contributed by Ray Bryant <raybry at amd.com>,
++# Jason Yeh <jason.yeh at amd.com>
++# Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
++#
++# Revision: 1.0
++#
++# ChangeLog:
++# 1.0: 30 August 2010.
++# - Initial revision
++#
++event:0x76 counters:0,1,2 um:zero minimum:50000 name:CPU_CLK_UNHALTED : Cycles outside of halt state
++event:0xc0 counters:0,1,2,3,4,5 um:zero minimum:50000 name:RETIRED_INSTRUCTIONS : Retired instructions (includes exceptions, interrupts, re-syncs)
+--- /dev/null 2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/x86-64/family15h/unit_masks 2011-01-03 14:37:42.390283478 -0500
+@@ -0,0 +1,16 @@
++# AMD Generic unit masks
++#
++# Copyright OProfile authors
++# Copyright (c) 2006-2010 Advanced Micro Devices
++# Contributed by Ray Bryant <raybry at amd.com>,
++# Jason Yeh <jason.yeh at amd.com>
++# Suravee Suthikulpanit <suravee.suthikulpanit at amd.com>
++#
++# Revision: 1.0
++#
++# ChangeLog:
++# 1.0: 30 August 2010.
++# - Initial revision
++#
++name:zero type:mandatory default:0x0
++ 0x0 No unit mask
+Index: oprofile/libop/op_cpu_type.c
+diff -u oprofile/libop/op_cpu_type.c:1.57 oprofile/libop/op_cpu_type.c:1.58
+--- oprofile/libop/op_cpu_type.c:1.57 Fri Aug 27 20:15:06 2010
++++ oprofile/libop/op_cpu_type.c Mon Nov 29 14:52:36 2010
+@@ -87,6 +87,9 @@
+ { "Intel Core/i7", "i386/core_i7", CPU_CORE_I7, 4 },
+ { "Intel Atom", "i386/atom", CPU_ATOM, 2 },
+ { "Intel Nehalem microarchitecture", "i386/nehalem", CPU_NEHALEM, 4 },
++ { "AMD64 family12h", "x86-64/family12h", CPU_FAMILY12H, 4 },
++ { "AMD64 family14h", "x86-64/family14h", CPU_FAMILY14H, 4 },
++ { "AMD64 family15h", "x86-64/family15h", CPU_FAMILY15H, 6 },
+ };
+
+ static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr);
+Index: oprofile/libop/op_cpu_type.h
+diff -u oprofile/libop/op_cpu_type.h:1.50 oprofile/libop/op_cpu_type.h:1.51
+--- oprofile/libop/op_cpu_type.h:1.50 Fri Aug 27 20:15:06 2010
++++ oprofile/libop/op_cpu_type.h Mon Nov 29 14:52:36 2010
+@@ -84,6 +84,9 @@
+ CPU_CORE_I7, /* Intel Core i7, Nehalem */
+ CPU_ATOM, /* First generation Intel Atom */
+ CPU_NEHALEM, /* Intel Nehalem microarchitecture */
++ CPU_FAMILY12H, /**< AMD family 12h */
++ CPU_FAMILY14H, /**< AMD family 14h */
++ CPU_FAMILY15H, /**< AMD family 15h */
+ MAX_CPU_TYPE
+ } op_cpu;
+
+Index: oprofile/libop/op_events.c
+diff -u oprofile/libop/op_events.c:1.101 oprofile/libop/op_events.c:1.102
+--- oprofile/libop/op_events.c:1.101 Fri Aug 27 20:15:06 2010
++++ oprofile/libop/op_events.c Mon Nov 29 14:52:36 2010
+@@ -972,6 +972,9 @@
+ case CPU_ATOM:
+ case CPU_CORE_I7:
+ case CPU_NEHALEM:
++ case CPU_FAMILY12H:
++ case CPU_FAMILY14H:
++ case CPU_FAMILY15H:
+ descr->name = "CPU_CLK_UNHALTED";
+ break;
+
+Index: oprofile/utils/ophelp.c
+diff -u oprofile/utils/ophelp.c:1.38 oprofile/utils/ophelp.c:1.39
+--- oprofile/utils/ophelp.c:1.38 Fri Aug 27 20:15:07 2010
++++ oprofile/utils/ophelp.c Mon Nov 29 14:52:36 2010
+@@ -469,6 +469,18 @@
+ "See BIOS and Kernel Developer's Guide for AMD Family 11h Processors\n"
+ "(41256.pdf), Section 3.14\n\n";
+ break;
++ case CPU_FAMILY12H:
++ event_doc =
++ "See BIOS and Kernel Developer's Guide for AMD Family 12h Processors\n";
++ break;
++ case CPU_FAMILY14H:
++ event_doc =
++ "See BIOS and Kernel Developer's Guide for AMD Family 14h Processors\n";
++ break;
++ case CPU_FAMILY15H:
++ event_doc =
++ "See BIOS and Kernel Developer's Guide for AMD Family 15h Processors\n";
++ break;
+ case CPU_ATHLON:
+ event_doc =
+ "See AMD Athlon Processor x86 Code Optimization Guide\n"
+Index: oprofile/daemon/init.c
+diff -u oprofile/daemon/init.c:1.14 oprofile/daemon/init.c:1.15
+--- oprofile/daemon/init.c:1.14 Fri Aug 8 15:08:19 2008
++++ oprofile/daemon/init.c Mon Nov 29 15:05:44 2010
+@@ -24,6 +24,7 @@
+ #include "opd_anon.h"
+ #include "opd_perfmon.h"
+ #include "opd_printf.h"
++#include "opd_extended.h"
+
+ #include "op_version.h"
+ #include "op_config.h"
+@@ -282,6 +283,8 @@
+ opd_do_jitdumps();
+ opd_print_stats();
+ printf("oprofiled stopped %s", op_get_time());
++ opd_ext_deinitialize();
++
+ exit(EXIT_FAILURE);
+ }
+
+Index: oprofile/daemon/opd_extended.c
+diff -u oprofile/daemon/opd_extended.c:1.3 oprofile/daemon/opd_extended.c:1.4
+--- oprofile/daemon/opd_extended.c:1.3 Wed May 27 19:12:48 2009
++++ oprofile/daemon/opd_extended.c Mon Nov 29 15:05:44 2010
+@@ -109,6 +109,20 @@
+ }
+
+
++int opd_ext_deinitialize()
++{
++ int ret = EXIT_FAILURE;
++
++ if(opd_ext_feat_index == -1) {
++ return 0;
++ }
++
++ ret = ext_feature_table[opd_ext_feat_index].handlers->ext_deinit();
++
++ return ret;
++}
++
++
+ void opd_ext_print_stats()
+ {
+ if (is_ext_enabled()
+Index: oprofile/daemon/opd_extended.h
+diff -u oprofile/daemon/opd_extended.h:1.1 oprofile/daemon/opd_extended.h:1.2
+--- oprofile/daemon/opd_extended.h:1.1 Wed Apr 1 20:57:36 2009
++++ oprofile/daemon/opd_extended.h Mon Nov 29 15:05:44 2010
+@@ -35,6 +35,8 @@
+ struct opd_ext_handlers {
+ // Extended init
+ int (*ext_init)(char const *);
++ // Extended deinit
++ int (*ext_deinit)();
+ // Extended statistics
+ int (*ext_print_stats)();
+ // Extended sfile handlers
+@@ -61,6 +63,13 @@
+ extern int opd_ext_initialize(char const * value);
+
+ /**
++ * @param value: commandline input option string
++ *
++ * Deinitialize
++ */
++extern int opd_ext_deinitialize();
++
++/**
+ * Print out extended feature statistics in oprofiled.log file
+ */
+ extern void opd_ext_print_stats();
+Index: oprofile/daemon/opd_ibs.c
+diff -u oprofile/daemon/opd_ibs.c:1.2 oprofile/daemon/opd_ibs.c:1.3
+--- oprofile/daemon/opd_ibs.c:1.2 Fri Jun 5 15:26:37 2009
++++ oprofile/daemon/opd_ibs.c Mon Nov 29 15:05:44 2010
+@@ -2,7 +2,7 @@
+ * @file daemon/opd_ibs.c
+ * AMD Family10h Instruction Based Sampling (IBS) handling.
+ *
+- * @remark Copyright 2007 OProfile authors
++ * @remark Copyright 2007-2010 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Jason Yeh <jason.yeh at amd.com>
+@@ -32,22 +32,37 @@
+ #include <stdio.h>
+ #include <errno.h>
+ #include <string.h>
++#include <limits.h>
++
++#if defined(__i386__) && defined(__PIC__)
++/* %ebx may be the PIC register. */
++ #define __cpuid(level, a, b, c, d) \
++ __asm__ ("xchgl\t%%ebx, %1\n\t" \
++ "cpuid\n\t" \
++ "xchgl\t%%ebx, %1\n\t" \
++ : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
++ : "0" (level))
++#else
++ #define __cpuid(level, a, b, c, d) \
++ __asm__ ("cpuid\n\t" \
++ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
++ : "0" (level))
++#endif
+
+ extern op_cpu cpu_type;
+ extern int no_event_ok;
+ extern int sfile_equal(struct sfile const * sf, struct sfile const * sf2);
+ extern void sfile_dup(struct sfile * to, struct sfile * from);
++extern char * session_dir;
+
+-/* IBS Select Arrays/Counters */
++/* IBS Select Counters */
+ static unsigned int ibs_selected_size;
++
++/* These flags store the IBS-derived events selection. */
+ static unsigned int ibs_fetch_selected_flag;
+-static unsigned int ibs_fetch_selected_size;
+ static unsigned int ibs_op_selected_flag;
+-static unsigned int ibs_op_selected_size;
+ static unsigned int ibs_op_ls_selected_flag;
+-static unsigned int ibs_op_ls_selected_size;
+ static unsigned int ibs_op_nb_selected_flag;
+-static unsigned int ibs_op_nb_selected_size;
+
+ /* IBS Statistics */
+ static unsigned long ibs_fetch_sample_stats;
+@@ -64,6 +79,18 @@
+ /* IBS Virtual Counter Index(VCI) Map*/
+ unsigned int ibs_vci_map[OP_MAX_IBS_COUNTERS];
+
++/* CPUID information */
++unsigned int ibs_family;
++unsigned int ibs_model;
++unsigned int ibs_stepping;
++
++/* IBS Extended MSRs */
++static unsigned long ibs_bta_enabled;
++
++/* IBS log files */
++FILE * memaccess_log;
++FILE * bta_log;
++
+ /**
+ * This function converts IBS fetch event flags and values into
+ * derived events. If the tagged (sampled) fetched caused a derived
+@@ -75,7 +102,7 @@
+ if (!trans_fetch)
+ return;
+
+- trans_ibs_fetch(trans, ibs_fetch_selected_flag, ibs_fetch_selected_size);
++ trans_ibs_fetch(trans, ibs_fetch_selected_flag);
+ }
+
+
+@@ -89,9 +116,16 @@
+ if (!trans_op)
+ return;
+
+- trans_ibs_op(trans, ibs_op_selected_flag, ibs_op_selected_size);
+- trans_ibs_op_ls(trans, ibs_op_ls_selected_flag, ibs_op_ls_selected_size);
+- trans_ibs_op_nb(trans, ibs_op_nb_selected_flag, ibs_op_nb_selected_size);
++ trans_ibs_op_mask_reserved(ibs_family, trans);
++
++ if (trans_ibs_op_rip_invalid(trans) != 0)
++ return;
++
++ trans_ibs_op(trans, ibs_op_selected_flag);
++ trans_ibs_op_ls(trans, ibs_op_ls_selected_flag);
++ trans_ibs_op_nb(trans, ibs_op_nb_selected_flag);
++ trans_ibs_op_ls_memaccess(trans);
++ trans_ibs_op_bta(trans);
+ }
+
+
+@@ -150,6 +184,26 @@
+ }
+
+
++static void get_ibs_bta_status()
++{
++ FILE * fp = NULL;
++ char buf[PATH_MAX];
++
++ /* Default to disable */
++ ibs_bta_enabled = 0;
++
++ snprintf(buf, PATH_MAX, "/dev/oprofile/ibs_op/branch_target");
++ fp = fopen(buf, "r");
++ if (!fp)
++ return;
++
++ while (fgets(buf, PATH_MAX, fp) != NULL)
++ ibs_bta_enabled = strtoul(buf, NULL, 10);
++
++ fclose(fp);
++}
++
++
+ void code_ibs_fetch_sample(struct transient * trans)
+ {
+ struct ibs_fetch_sample * trans_fetch = NULL;
+@@ -169,12 +223,12 @@
+
+ trans_fetch->rip = pop_buffer_value(trans);
+
+- trans_fetch->ibs_fetch_lin_addr_low = pop_buffer_value(trans);
+- trans_fetch->ibs_fetch_lin_addr_high = pop_buffer_value(trans);
++ trans_fetch->ibs_fetch_lin_addr_low = pop_buffer_value(trans);
++ trans_fetch->ibs_fetch_lin_addr_high = pop_buffer_value(trans);
+
+- trans_fetch->ibs_fetch_ctl_low = pop_buffer_value(trans);
+- trans_fetch->ibs_fetch_ctl_high = pop_buffer_value(trans);
+- trans_fetch->ibs_fetch_phys_addr_low = pop_buffer_value(trans);
++ trans_fetch->ibs_fetch_ctl_low = pop_buffer_value(trans);
++ trans_fetch->ibs_fetch_ctl_high = pop_buffer_value(trans);
++ trans_fetch->ibs_fetch_phys_addr_low = pop_buffer_value(trans);
+ trans_fetch->ibs_fetch_phys_addr_high = pop_buffer_value(trans);
+
+ verbprintf(vsamples,
+@@ -200,6 +254,30 @@
+ }
+
+
++static void get_ibs_op_bta_sample(struct transient * trans,
++ struct ibs_op_sample * trans_op)
++{
++ // Check remaining
++ if (!enough_remaining(trans, 2)) {
++ verbprintf(vext, "not enough remaining\n");
++ trans->remaining = 0;
++ ibs_op_incomplete_stats++;
++ return;
++ }
++
++ if (ibs_bta_enabled == 1) {
++ trans_op->ibs_op_brtgt_addr = pop_buffer_value(trans);
++
++ // Check if branch target address is valid (MSRC001_1035[37] == 1]
++ if ((trans_op->ibs_op_data1_high & (0x00000001 << 5)) == 0) {
++ trans_op->ibs_op_brtgt_addr = 0;
++ }
++ } else {
++ trans_op->ibs_op_brtgt_addr = 0;
++ }
++}
++
++
+ void code_ibs_op_sample(struct transient * trans)
+ {
+ struct ibs_op_sample * trans_op= NULL;
+@@ -233,8 +311,10 @@
+ trans_op->ibs_op_phys_addr_low = pop_buffer_value(trans);
+ trans_op->ibs_op_phys_addr_high = pop_buffer_value(trans);
+
++ get_ibs_op_bta_sample(trans, trans_op);
++
+ verbprintf(vsamples,
+- "IBS_OP_X CPU:%ld PID:%d RIP:%lx D1HI:%x D1LO:%x D2LO:%x D3HI:%x D3LO:%x L_LO:%x P_LO:%x\n",
++ "IBS_OP_X CPU:%ld PID:%d RIP:%lx D1HI:%x D1LO:%x D2LO:%x D3HI:%x D3LO:%x L_LO:%x P_LO:%x\n",
+ trans->cpu,
+ trans->tgid,
+ trans_op->rip,
+@@ -339,16 +419,12 @@
+ // Grouping
+ if (IS_IBS_FETCH(event->val)) {
+ ibs_fetch_selected_flag |= 1 << IBS_FETCH_OFFSET(event->val);
+- ibs_fetch_selected_size++;
+ } else if (IS_IBS_OP(event->val)) {
+ ibs_op_selected_flag |= 1 << IBS_OP_OFFSET(event->val);
+- ibs_op_selected_size++;
+ } else if (IS_IBS_OP_LS(event->val)) {
+ ibs_op_ls_selected_flag |= 1 << IBS_OP_LS_OFFSET(event->val);
+- ibs_op_ls_selected_size++;
+ } else if (IS_IBS_OP_NB(event->val)) {
+ ibs_op_nb_selected_flag |= 1 << IBS_OP_NB_OFFSET(event->val);
+- ibs_op_nb_selected_size++;
+ } else {
+ return -1;
+ }
+@@ -402,7 +478,6 @@
+ }
+
+
+-
+ static int ibs_parse_and_set_um_op(char const * str, unsigned long int * ibs_op_um)
+ {
+ char * end = NULL;
+@@ -418,6 +493,31 @@
+ }
+
+
++static void check_cpuid_family_model_stepping()
++{
++ union {
++ unsigned eax;
++ struct {
++ unsigned stepping : 4;
++ unsigned model : 4;
++ unsigned family : 4;
++ unsigned res : 4;
++ unsigned ext_model : 4;
++ unsigned ext_family : 8;
++ unsigned res2 : 4;
++ };
++ } v;
++ unsigned ebx, ecx, edx;
++
++ /* CPUID Fn0000_0001_EAX Family, Model, Stepping */
++ __cpuid(1, v.eax, ebx, ecx, edx);
++
++ ibs_family = v.family + v.ext_family;
++ ibs_model = v.model + v.ext_model;
++ ibs_stepping = v.stepping;
++}
++
++
+ static int ibs_init(char const * argv)
+ {
+ char * tmp, * ptr, * tok1, * tok2 = NULL;
+@@ -532,15 +632,67 @@
+
+ // Allow no event
+ no_event_ok = 1;
++
++ check_cpuid_family_model_stepping();
++
++ get_ibs_bta_status();
++
++ /* Create IBS memory access log */
++ memaccess_log = NULL;
++ if (ibs_op_um & 0x2) {
++ char filename[1024];
++ strncpy(filename, session_dir, 1023);
++ strncat(filename, "/samples/ibs_memaccess.log", 1024);
++ if ((memaccess_log = fopen(filename, "w")) == NULL) {
++ verbprintf(vext, "Warning: Cannot create file %s\n", filename);
++
++ } else {
++ fprintf (memaccess_log, "# IBS Memory Access Log\n\n");
++ fprintf (memaccess_log, "# Format: app_cookie,cookie,cpu,tgid,tid,pc,branch-target-address,\n");
++ fprintf (memaccess_log, "# phy-hi:phy-low,lin-hi:lin-low,accese-type,latency\n\n");
++ }
++ }
++
++ // Create IBS Branch Target Address (BTA) log
++ bta_log = NULL;
++ if (ibs_bta_enabled) {
++ char filename[1024];
++ strncpy(filename, session_dir, 1023);
++ strncat(filename, "/samples/ibs_bta.log", 1024);
++ if ((bta_log = fopen(filename, "w")) == NULL) {
++ verbprintf(vext, "Warning: Cannot create file %s\n", filename);
++ } else {
++ fprintf (bta_log, "# IBS Memory Access Log\n\n");
++ fprintf (bta_log, "# Format: app_cookie,cookie,cpu,tgid,tid,pc,branch-target-address\n\n");
++ }
++ }
++
++ return 0;
++}
++
++
++static int ibs_deinit()
++{
++ if (memaccess_log) {
++ fclose (memaccess_log);
++ memaccess_log = NULL;
++ }
++
++ if (bta_log) {
++ fclose (bta_log);
++ bta_log = NULL;
++ }
+ return 0;
+ }
+
+
+ static int ibs_print_stats()
+ {
+- printf("Nr. IBS Fetch samples : %lu (%lu entries)\n", ibs_fetch_sample_stats, (ibs_fetch_sample_stats * 7));
++ printf("Nr. IBS Fetch samples : %lu (%lu entries)\n",
++ ibs_fetch_sample_stats, (ibs_fetch_sample_stats * 7));
+ printf("Nr. IBS Fetch incompletes : %lu\n", ibs_fetch_incomplete_stats);
+- printf("Nr. IBS Op samples : %lu (%lu entries)\n", ibs_op_sample_stats, (ibs_op_sample_stats * 13));
++ printf("Nr. IBS Op samples : %lu (%lu entries)\n",
++ ibs_op_sample_stats, (ibs_op_sample_stats * 13));
+ printf("Nr. IBS Op incompletes : %lu\n", ibs_op_incomplete_stats);
+ printf("Nr. IBS derived events : %lu\n", ibs_derived_event_stats);
+ return 0;
+@@ -686,7 +838,8 @@
+
+ struct opd_ext_handlers ibs_handlers =
+ {
+- .ext_init = &ibs_init,
++ .ext_init = &ibs_init,
++ .ext_deinit = &ibs_deinit,
+ .ext_print_stats = &ibs_print_stats,
+- .ext_sfile = &ibs_sfile_handlers
++ .ext_sfile = &ibs_sfile_handlers
+ };
+Index: oprofile/daemon/opd_ibs.h
+diff -u oprofile/daemon/opd_ibs.h:1.1 oprofile/daemon/opd_ibs.h:1.2
+--- oprofile/daemon/opd_ibs.h:1.1 Fri Apr 17 18:40:43 2009
++++ oprofile/daemon/opd_ibs.h Mon Nov 29 15:05:44 2010
+@@ -2,7 +2,7 @@
+ * @file daemon/opd_ibs.h
+ * AMD Family10h Instruction Based Sampling (IBS) handling.
+ *
+- * @remark Copyright 2008 OProfile authors
++ * @remark Copyright 2008-2010 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Jason Yeh <jason.yeh at amd.com>
+@@ -70,18 +70,14 @@
+ /* MSRC001_1037 IBS Op Data 3 Register */
+ unsigned int ibs_op_data3_low;
+ unsigned int ibs_op_data3_high;
++ /* MSRC001_1038 IBS DC Linear Address */
+ unsigned int ibs_op_ldst_linaddr_low;
+ unsigned int ibs_op_ldst_linaddr_high;
++ /* MSRC001_1039 IBS DC Physical Address */
+ unsigned int ibs_op_phys_addr_low;
+ unsigned int ibs_op_phys_addr_high;
+-};
+-
+-
+-enum IBSL1PAGESIZE {
+- L1TLB4K = 0,
+- L1TLB2M,
+- L1TLB1G,
+- L1TLB_INVALID
++ /* MSRC001_103B IBS Branch Target Address */
++ unsigned long ibs_op_brtgt_addr;
+ };
+
+
+Index: oprofile/daemon/opd_ibs_macro.h
+diff -u oprofile/daemon/opd_ibs_macro.h:1.1 oprofile/daemon/opd_ibs_macro.h:1.2
+--- oprofile/daemon/opd_ibs_macro.h:1.1 Fri Apr 17 18:40:43 2009
++++ oprofile/daemon/opd_ibs_macro.h Mon Nov 29 15:05:44 2010
+@@ -1,8 +1,8 @@
+ /**
+ * @file daemon/opd_ibs_macro.h
+- * AMD Family10h Instruction Based Sampling (IBS) related macro.
++ * AMD Instruction Based Sampling (IBS) related macro.
+ *
+- * @remark Copyright 2008 OProfile authors
++ * @remark Copyright 2008-2010 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Jason Yeh <jason.yeh at amd.com>
+@@ -16,7 +16,8 @@
+
+ /**
+ * The following defines are bit masks that are used to select
+- * IBS fetch event flags and values at the MSR level.
++ * IBS fetch event flags and values at the
++ * MSRC001_1030 IBS Fetch Control Register (IbsFetchCtl)
+ */
+ #define FETCH_MASK_LATENCY 0x0000ffff
+ #define FETCH_MASK_COMPLETE 0x00040000
+@@ -34,7 +35,10 @@
+ * The following defines are bit masks that are used to select
+ * IBS op event flags and values at the MSR level.
+ */
++
++/* MSRC001_1035 IBS Op Data Register (IbsOpData) */
+ #define BR_MASK_RETIRE 0x0000ffff
++#define MASK_RIP_INVALID 0x00000040
+ #define BR_MASK_BRN_RET 0x00000020
+ #define BR_MASK_BRN_MISP 0x00000010
+ #define BR_MASK_BRN_TAKEN 0x00000008
+@@ -42,17 +46,19 @@
+ #define BR_MASK_MISP_RETURN 0x00000002
+ #define BR_MASK_BRN_RESYNC 0x00000001
+
++/* MSRC001_1036 IBS Op Data Register (IbsOpData2) */
+ #define NB_MASK_L3_STATE 0x00000020
+ #define NB_MASK_REQ_DST_PROC 0x00000010
+ #define NB_MASK_REQ_DATA_SRC 0x00000007
+
++/* MSRC001_1037 IBS Op Data Register (IbsOpData3) */
+ #define DC_MASK_L2_HIT_1G 0x00080000
+ #define DC_MASK_PHY_ADDR_VALID 0x00040000
+ #define DC_MASK_LIN_ADDR_VALID 0x00020000
+ #define DC_MASK_MAB_HIT 0x00010000
+ #define DC_MASK_LOCKED_OP 0x00008000
+-#define DC_MASK_WC_MEM_ACCESS 0x00004000
+-#define DC_MASK_UC_MEM_ACCESS 0x00002000
++#define DC_MASK_UC_MEM_ACCESS 0x00004000
++#define DC_MASK_WC_MEM_ACCESS 0x00002000
+ #define DC_MASK_ST_TO_LD_CANCEL 0x00001000
+ #define DC_MASK_ST_TO_LD_FOR 0x00000800
+ #define DC_MASK_ST_BANK_CONFLICT 0x00000400
+@@ -77,10 +83,9 @@
+ * at 0xf000.
+ *
+ * The definitions in this file *must* match definitions
+- * of IBS derived events in gh-events.xml and in the
+- * oprofile AMD Family 10h events file. More information
++ * of IBS derived events. More information
+ * about IBS derived events is given in the Software Oprimization
+- * Guide for AMD Family 10h Processors.
++ * Guide.
+ */
+
+ /**
+@@ -108,6 +113,8 @@
+ #define IBS_FETCH_MAX (IBS_FETCH_END - IBS_FETCH_BASE + 1)
+ #define IS_IBS_FETCH(x) (IBS_FETCH_BASE <= x && x <= IBS_FETCH_END)
+ #define IBS_FETCH_OFFSET(x) (x - IBS_FETCH_BASE)
++#define CHECK_FETCH_SELECTED_FLAG(x) if ( selected_flag & (1 << IBS_FETCH_OFFSET(x)))
++
+
+ /**
+ * The following defines associate a 16-bit select value with an IBS
+@@ -129,6 +136,8 @@
+ #define IBS_OP_MAX (IBS_OP_END - IBS_OP_BASE + 1)
+ #define IS_IBS_OP(x) (IBS_OP_BASE <= x && x <= IBS_OP_END)
+ #define IBS_OP_OFFSET(x) (x - IBS_OP_BASE)
++#define CHECK_OP_SELECTED_FLAG(x) if ( selected_flag & (1 << IBS_OP_OFFSET(x)))
++
+
+ /**
+ * The following defines associate a 16-bit select value with an IBS
+@@ -166,6 +175,7 @@
+ #define IBS_OP_LS_MAX (IBS_OP_LS_END - IBS_OP_LS_BASE + 1)
+ #define IS_IBS_OP_LS(x) (IBS_OP_LS_BASE <= x && x <= IBS_OP_LS_END)
+ #define IBS_OP_LS_OFFSET(x) (x - IBS_OP_LS_BASE)
++#define CHECK_OP_LS_SELECTED_FLAG(x) if ( selected_flag & (1 << IBS_OP_LS_OFFSET(x)))
+
+
+ /**
+@@ -191,6 +201,7 @@
+ #define IBS_OP_NB_MAX (IBS_OP_NB_END - IBS_OP_NB_BASE + 1)
+ #define IS_IBS_OP_NB(x) (IBS_OP_NB_BASE <= x && x <= IBS_OP_NB_END)
+ #define IBS_OP_NB_OFFSET(x) (x - IBS_OP_NB_BASE)
++#define CHECK_OP_NB_SELECTED_FLAG(x) if ( selected_flag & (1 << IBS_OP_NB_OFFSET(x)))
+
+
+ #define OP_MAX_IBS_COUNTERS (IBS_FETCH_MAX + IBS_OP_MAX + IBS_OP_LS_MAX + IBS_OP_NB_MAX)
+@@ -215,8 +226,18 @@
+ /** Bit 52 IbsPhyAddrValid: instruction fetch physical address valid. */
+ #define IBS_FETCH_PHYS_ADDR_VALID(x) ((x->ibs_fetch_ctl_high & FETCH_MASK_PHY_ADDR) != 0)
+
++enum IBSL1PAGESIZE {
++ L1TLB4K = 0,
++ L1TLB2M,
++ L1TLB1G,
++ L1TLB_INVALID
++};
++
+ /** Bits 54:53 IbsL1TlbPgSz: instruction cache L1TLB page size. */
+ #define IBS_FETCH_TLB_PAGE_SIZE(x) ((unsigned short)((x->ibs_fetch_ctl_high >> 21) & 0x3))
++#define IBS_FETCH_TLB_PAGE_SIZE_4K(x) (IBS_FETCH_TLB_PAGE_SIZE(x) == L1TLB4K)
++#define IBS_FETCH_TLB_PAGE_SIZE_2M(x) (IBS_FETCH_TLB_PAGE_SIZE(x) == L1TLB2M)
++#define IBS_FETCH_TLB_PAGE_SIZE_1G(x) (IBS_FETCH_TLB_PAGE_SIZE(x) == L1TLB1G)
+
+ /** Bit 55 IbsL1TlbMiss: instruction cache L1TLB miss. */
+ #define IBS_FETCH_M_L1_TLB_MISS(x) ((x->ibs_fetch_ctl_high & FETCH_MASK_L1_MISS) != 0)
+@@ -252,22 +273,25 @@
+ #define IBS_OP_TAG_TO_RETIRE_CYCLES(x) ((unsigned short)((x->ibs_op_data1_low >> 16) & BR_MASK_RETIRE))
+
+ /** 32 op_branch_resync : resync macro-op. */
+-#define IBS_OP_OP_BRANCH_RESYNC(x) ((x->ibs_op_data1_high & BR_MASK_BRN_RESYNC) != 0)
++#define IBS_OP_BRANCH_RESYNC(x) ((x->ibs_op_data1_high & BR_MASK_BRN_RESYNC) != 0)
+
+ /** 33 op_mispredict_return : mispredicted return macro-op. */
+-#define IBS_OP_OP_MISPREDICT_RETURN(x) ((x->ibs_op_data1_high & BR_MASK_MISP_RETURN) != 0)
++#define IBS_OP_MISPREDICT_RETURN(x) ((x->ibs_op_data1_high & BR_MASK_MISP_RETURN) != 0)
+
+ /** 34 IbsOpReturn: return macro-op. */
+-#define IBS_OP_OP_RETURN(x) ((x->ibs_op_data1_high & BR_MASK_RETURN) != 0)
++#define IBS_OP_RETURN(x) ((x->ibs_op_data1_high & BR_MASK_RETURN) != 0)
+
+ /** 35 IbsOpBrnTaken: taken branch macro-op. */
+-#define IBS_OP_OP_BRANCH_TAKEN(x) ((x->ibs_op_data1_high & BR_MASK_BRN_TAKEN) != 0)
++#define IBS_OP_BRANCH_TAKEN(x) ((x->ibs_op_data1_high & BR_MASK_BRN_TAKEN) != 0)
+
+ /** 36 IbsOpBrnMisp: mispredicted branch macro-op. */
+-#define IBS_OP_OP_BRANCH_MISPREDICT(x) ((x->ibs_op_data1_high & BR_MASK_BRN_MISP) != 0)
++#define IBS_OP_BRANCH_MISPREDICT(x) ((x->ibs_op_data1_high & BR_MASK_BRN_MISP) != 0)
+
+ /** 37 IbsOpBrnRet: branch macro-op retired. */
+-#define IBS_OP_OP_BRANCH_RETIRED(x) ((x->ibs_op_data1_high & BR_MASK_BRN_RET) != 0)
++#define IBS_OP_BRANCH_RETIRED(x) ((x->ibs_op_data1_high & BR_MASK_BRN_RET) != 0)
++
++/** 38 IbsRipInvalid: RIP invalid. */
++#define IBS_OP_RIP_INVALID(x) ((x->ibs_op_data1_high & MASK_RIP_INVALID) != 0)
+
+ /**
+ * MSRC001_1036 IBS Op Data 2 Register (IbsOpData2)
+@@ -282,10 +306,18 @@
+ /** 2:0 NbIbsReqSrc: Northbridge IBS request data source */
+ #define IBS_OP_NB_IBS_REQ_SRC(x) ((unsigned char)(x->ibs_op_data2_low & NB_MASK_REQ_DATA_SRC))
+
++#define IBS_OP_NB_IBS_REQ_SRC_01(x) (IBS_OP_NB_IBS_REQ_SRC(x) == 0x01)
++
++#define IBS_OP_NB_IBS_REQ_SRC_02(x) (IBS_OP_NB_IBS_REQ_SRC(x) == 0x02)
++
++#define IBS_OP_NB_IBS_REQ_SRC_03(x) (IBS_OP_NB_IBS_REQ_SRC(x) == 0x03)
++
++#define IBS_OP_NB_IBS_REQ_SRC_07(x) (IBS_OP_NB_IBS_REQ_SRC(x) == 0x07)
++
+ /**
+ * MSRC001_1037 IBS Op Data3 Register
+ *
+- * Bits 48:32 IbsDcMissLat
++ * Bits 47:32 IbsDcMissLat
+ */
+ #define IBS_OP_DC_MISS_LATENCY(x) ((unsigned short)(x->ibs_op_data3_high & 0xffff))
+
+@@ -328,12 +360,12 @@
+ /** 12 ibs_dc_st_to_ld_can: Data forwarding from store to load operation cancelled */
+ #define IBS_OP_IBS_DC_ST_TO_LD_CAN(x) ((x->ibs_op_data3_low & DC_MASK_ST_TO_LD_CANCEL) != 0)
+
+-/** 13 ibs_dc_uc_mem_acc: UC memory access */
+-#define IBS_OP_IBS_DC_UC_MEM_ACC(x) ((x->ibs_op_data3_low & DC_MASK_UC_MEM_ACCESS) != 0)
+-
+-/** 14 ibs_dc_wc_mem_acc : WC memory access */
++/** 13 ibs_dc_wc_mem_acc : WC memory access */
+ #define IBS_OP_IBS_DC_WC_MEM_ACC(x) ((x->ibs_op_data3_low & DC_MASK_WC_MEM_ACCESS) != 0)
+
++/** 14 ibs_dc_uc_mem_acc : UC memory access */
++#define IBS_OP_IBS_DC_UC_MEM_ACC(x) ((x->ibs_op_data3_low & DC_MASK_UC_MEM_ACCESS) != 0)
++
+ /** 15 ibs_locked_op: Locked operation */
+ #define IBS_OP_IBS_LOCKED_OP(x) ((x->ibs_op_data3_low & DC_MASK_LOCKED_OP) != 0)
+
+@@ -362,5 +394,4 @@
+ */
+ #define AGG_IBS_COUNT(EV, COUNT) opd_log_ibs_count(EV, trans, COUNT)
+
+-
+ #endif /*OPD_IBS_MACRO_H*/
+Index: oprofile/daemon/opd_ibs_trans.c
+diff -u oprofile/daemon/opd_ibs_trans.c:1.1 oprofile/daemon/opd_ibs_trans.c:1.2
+--- oprofile/daemon/opd_ibs_trans.c:1.1 Fri Apr 17 18:40:43 2009
++++ oprofile/daemon/opd_ibs_trans.c Mon Nov 29 15:05:44 2010
+@@ -1,8 +1,8 @@
+ /**
+ * @file daemon/opd_ibs_trans.c
+- * AMD Family10h Instruction Based Sampling (IBS) translation.
++ * AMD Instruction Based Sampling (IBS) translation.
+ *
+- * @remark Copyright 2008 OProfile authors
++ * @remark Copyright 2008 - 2010 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Jason Yeh <jason.yeh at amd.com>
+@@ -20,205 +20,185 @@
+ #include <stdlib.h>
+ #include <stdio.h>
+
+-#define MAX_EVENTS_PER_GROUP 32
++extern FILE * bta_log;
++extern FILE * memaccess_log;
+
+ /*
+- * --------------------- OP DERIVED FUNCTION
++ * --------------------- FETCH DERIVED FUNCTION
+ */
+-void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag, unsigned int size)
++void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag)
+ {
+ struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch;
+- unsigned int i, j, mask = 1;
+
+- for (i = IBS_FETCH_BASE, j =0 ; i <= IBS_FETCH_END && j < size ; i++, mask = mask << 1) {
++ if ((selected_flag) == 0)
++ return;
+
+- if ((selected_flag & mask) == 0)
+- continue;
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ALL) {
++ /* IBS all fetch samples (kills + attempts) */
++ AGG_IBS_EVENT(DE_IBS_FETCH_ALL);
++ }
++
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_KILLED) {
++ /* IBS killed fetches ("case 0") -- All interesting event
++ * flags are clear */
++ if (IBS_FETCH_KILLED(trans_fetch))
++ AGG_IBS_EVENT(DE_IBS_FETCH_KILLED);
++ }
+
+- j++;
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ATTEMPTED) {
++ /* Any non-killed fetch is an attempted fetch */
++ AGG_IBS_EVENT(DE_IBS_FETCH_ATTEMPTED);
++ }
+
+- switch (i) {
+-
+- case DE_IBS_FETCH_ALL:
+- /* IBS all fetch samples (kills + attempts) */
+- AGG_IBS_EVENT(DE_IBS_FETCH_ALL);
+- break;
+-
+- case DE_IBS_FETCH_KILLED:
+- /* IBS killed fetches ("case 0") -- All interesting event
+- * flags are clear */
+- if (IBS_FETCH_KILLED(trans_fetch))
+- AGG_IBS_EVENT(DE_IBS_FETCH_KILLED);
+- break;
+-
+- case DE_IBS_FETCH_ATTEMPTED:
+- /* Any non-killed fetch is an attempted fetch */
+- AGG_IBS_EVENT(DE_IBS_FETCH_ATTEMPTED);
+- break;
+-
+- case DE_IBS_FETCH_COMPLETED:
+- if (IBS_FETCH_FETCH_COMPLETION(trans_fetch))
+- /* IBS Fetch Completed */
+- AGG_IBS_EVENT(DE_IBS_FETCH_COMPLETED);
+- break;
+-
+- case DE_IBS_FETCH_ABORTED:
+- if (!IBS_FETCH_FETCH_COMPLETION(trans_fetch))
+- /* IBS Fetch Aborted */
+- AGG_IBS_EVENT(DE_IBS_FETCH_ABORTED);
+- break;
+-
+- case DE_IBS_L1_ITLB_HIT:
+- /* IBS L1 ITLB hit */
+- if (IBS_FETCH_L1_TLB_HIT(trans_fetch))
+- AGG_IBS_EVENT(DE_IBS_L1_ITLB_HIT);
+- break;
+-
+- case DE_IBS_ITLB_L1M_L2H:
+- /* IBS L1 ITLB miss and L2 ITLB hit */
+- if (IBS_FETCH_ITLB_L1M_L2H(trans_fetch))
+- AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2H);
+- break;
+-
+- case DE_IBS_ITLB_L1M_L2M:
+- /* IBS L1 & L2 ITLB miss; complete ITLB miss */
+- if (IBS_FETCH_ITLB_L1M_L2M(trans_fetch))
+- AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2M);
+- break;
+-
+- case DE_IBS_IC_MISS:
+- /* IBS instruction cache miss */
+- if (IBS_FETCH_INST_CACHE_MISS(trans_fetch))
+- AGG_IBS_EVENT(DE_IBS_IC_MISS);
+- break;
+-
+- case DE_IBS_IC_HIT:
+- /* IBS instruction cache hit */
+- if (IBS_FETCH_INST_CACHE_HIT(trans_fetch))
+- AGG_IBS_EVENT(DE_IBS_IC_HIT);
+- break;
+-
+- case DE_IBS_FETCH_4K_PAGE:
+- if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
+- && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) == L1TLB4K)
+- AGG_IBS_EVENT(DE_IBS_FETCH_4K_PAGE);
+- break;
+-
+- case DE_IBS_FETCH_2M_PAGE:
+- if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
+- && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) == L1TLB2M)
+- AGG_IBS_EVENT(DE_IBS_FETCH_2M_PAGE);
+- break;
+-
+- case DE_IBS_FETCH_1G_PAGE:
+- if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
+- && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) == L1TLB1G)
+- AGG_IBS_EVENT(DE_IBS_FETCH_1G_PAGE);
+- break;
+-
+- case DE_IBS_FETCH_XX_PAGE:
+- break;
+-
+- case DE_IBS_FETCH_LATENCY:
+- if (IBS_FETCH_FETCH_LATENCY(trans_fetch))
+- AGG_IBS_COUNT(DE_IBS_FETCH_LATENCY,
+- IBS_FETCH_FETCH_LATENCY(trans_fetch));
+- break;
+- default:
+- break;
+- }
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_COMPLETED) {
++ if (IBS_FETCH_FETCH_COMPLETION(trans_fetch))
++ /* IBS Fetch Completed */
++ AGG_IBS_EVENT(DE_IBS_FETCH_COMPLETED);
++ }
++
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ABORTED) {
++ if (!IBS_FETCH_FETCH_COMPLETION(trans_fetch))
++ /* IBS Fetch Aborted */
++ AGG_IBS_EVENT(DE_IBS_FETCH_ABORTED);
++ }
++
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_L1_ITLB_HIT) {
++ /* IBS L1 ITLB hit */
++ if (IBS_FETCH_L1_TLB_HIT(trans_fetch))
++ AGG_IBS_EVENT(DE_IBS_L1_ITLB_HIT);
++ }
++
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_ITLB_L1M_L2H) {
++ /* IBS L1 ITLB miss and L2 ITLB hit */
++ if (IBS_FETCH_ITLB_L1M_L2H(trans_fetch))
++ AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2H);
++ }
++
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_ITLB_L1M_L2M) {
++ /* IBS L1 & L2 ITLB miss; complete ITLB miss */
++ if (IBS_FETCH_ITLB_L1M_L2M(trans_fetch))
++ AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2M);
++ }
++
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_IC_MISS) {
++ /* IBS instruction cache miss */
++ if (IBS_FETCH_INST_CACHE_MISS(trans_fetch))
++ AGG_IBS_EVENT(DE_IBS_IC_MISS);
++ }
++
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_IC_HIT) {
++ /* IBS instruction cache hit */
++ if (IBS_FETCH_INST_CACHE_HIT(trans_fetch))
++ AGG_IBS_EVENT(DE_IBS_IC_HIT);
++ }
++
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_4K_PAGE) {
++ if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
++ && IBS_FETCH_TLB_PAGE_SIZE_4K(trans_fetch))
++ AGG_IBS_EVENT(DE_IBS_FETCH_4K_PAGE);
++ }
++
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_2M_PAGE) {
++ if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
++ && IBS_FETCH_TLB_PAGE_SIZE_2M(trans_fetch))
++ AGG_IBS_EVENT(DE_IBS_FETCH_2M_PAGE);
++ }
++
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_1G_PAGE) {
++ if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch)
++ && IBS_FETCH_TLB_PAGE_SIZE_1G(trans_fetch))
++ AGG_IBS_EVENT(DE_IBS_FETCH_1G_PAGE);
++ }
++
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_XX_PAGE) {
++ }
++
++ CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_LATENCY) {
++ if (IBS_FETCH_FETCH_LATENCY(trans_fetch))
++ AGG_IBS_COUNT(DE_IBS_FETCH_LATENCY,
++ IBS_FETCH_FETCH_LATENCY(trans_fetch));
+ }
+ }
+
++
+ /*
+ * --------------------- OP DERIVED FUNCTION
+ */
+-void trans_ibs_op (struct transient * trans, unsigned int selected_flag, unsigned int size)
++void trans_ibs_op (struct transient * trans, unsigned int selected_flag)
+ {
+ struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
+- unsigned int i, j, mask = 1;
+
+- for (i = IBS_OP_BASE, j =0 ; i <= IBS_OP_END && j < size ; i++, mask = mask << 1) {
++ if ((selected_flag) == 0)
++ return;
+
+- if ((selected_flag & mask) == 0)
+- continue;
++ CHECK_OP_SELECTED_FLAG(DE_IBS_OP_ALL) {
++ /* All IBS op samples */
++ AGG_IBS_EVENT(DE_IBS_OP_ALL);
++ }
+
+- j++;
++ CHECK_OP_SELECTED_FLAG(DE_IBS_OP_TAG_TO_RETIRE) {
++ /* Tally retire cycle counts for all sampled macro-ops
++ * IBS tag to retire cycles */
++ if (IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op))
++ AGG_IBS_COUNT(DE_IBS_OP_TAG_TO_RETIRE,
++ IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op));
++ }
+
+- switch (i) {
+-
+- case DE_IBS_OP_ALL:
+- /* All IBS op samples */
+- AGG_IBS_EVENT(DE_IBS_OP_ALL);
+- break;
+-
+- case DE_IBS_OP_TAG_TO_RETIRE:
+- /* Tally retire cycle counts for all sampled macro-ops
+- * IBS tag to retire cycles */
+- if (IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op))
+- AGG_IBS_COUNT(DE_IBS_OP_TAG_TO_RETIRE,
+- IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op));
+- break;
+-
+- case DE_IBS_OP_COMP_TO_RETIRE:
+- /* IBS completion to retire cycles */
+- if (IBS_OP_COM_TO_RETIRE_CYCLES(trans_op))
+- AGG_IBS_COUNT(DE_IBS_OP_COMP_TO_RETIRE,
+- IBS_OP_COM_TO_RETIRE_CYCLES(trans_op));
+- break;
+-
+- case DE_IBS_BRANCH_RETIRED:
+- if (IBS_OP_OP_BRANCH_RETIRED(trans_op))
+- /* IBS Branch retired op */
+- AGG_IBS_EVENT(DE_IBS_BRANCH_RETIRED) ;
+- break;
+-
+- case DE_IBS_BRANCH_MISP:
+- if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+- /* Test branch-specific event flags */
+- /* IBS mispredicted Branch op */
+- && IBS_OP_OP_BRANCH_MISPREDICT(trans_op))
+- AGG_IBS_EVENT(DE_IBS_BRANCH_MISP) ;
+- break;
+-
+- case DE_IBS_BRANCH_TAKEN:
+- if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+- /* IBS taken Branch op */
+- && IBS_OP_OP_BRANCH_TAKEN(trans_op))
+- AGG_IBS_EVENT(DE_IBS_BRANCH_TAKEN);
+- break;
+-
+- case DE_IBS_BRANCH_MISP_TAKEN:
+- if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+- /* IBS mispredicted taken branch op */
+- && IBS_OP_OP_BRANCH_TAKEN(trans_op)
+- && IBS_OP_OP_BRANCH_MISPREDICT(trans_op))
+- AGG_IBS_EVENT(DE_IBS_BRANCH_MISP_TAKEN);
+- break;
+-
+- case DE_IBS_RETURN:
+- if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+- /* IBS return op */
+- && IBS_OP_OP_RETURN(trans_op))
+- AGG_IBS_EVENT(DE_IBS_RETURN);
+- break;
+-
+- case DE_IBS_RETURN_MISP:
+- if (IBS_OP_OP_BRANCH_RETIRED(trans_op)
+- /* IBS mispredicted return op */
+- && IBS_OP_OP_RETURN(trans_op)
+- && IBS_OP_OP_BRANCH_MISPREDICT(trans_op))
+- AGG_IBS_EVENT(DE_IBS_RETURN_MISP);
+- break;
+-
+- case DE_IBS_RESYNC:
+- /* Test for a resync macro-op */
+- if (IBS_OP_OP_BRANCH_RESYNC(trans_op))
+- AGG_IBS_EVENT(DE_IBS_RESYNC);
+- break;
+- default:
+- break;
+- }
++ CHECK_OP_SELECTED_FLAG(DE_IBS_OP_COMP_TO_RETIRE) {
++ /* IBS completion to retire cycles */
++ if (IBS_OP_COM_TO_RETIRE_CYCLES(trans_op))
++ AGG_IBS_COUNT(DE_IBS_OP_COMP_TO_RETIRE,
++ IBS_OP_COM_TO_RETIRE_CYCLES(trans_op));
++ }
++
++ CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_RETIRED) {
++ if (IBS_OP_BRANCH_RETIRED(trans_op))
++ /* IBS Branch retired op */
++ AGG_IBS_EVENT(DE_IBS_BRANCH_RETIRED) ;
++ }
++
++ CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_MISP) {
++ if (IBS_OP_BRANCH_RETIRED(trans_op)
++ /* Test branch-specific event flags */
++ /* IBS mispredicted Branch op */
++ && IBS_OP_BRANCH_MISPREDICT(trans_op))
++ AGG_IBS_EVENT(DE_IBS_BRANCH_MISP) ;
++ }
++
++ CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_TAKEN) {
++ if (IBS_OP_BRANCH_RETIRED(trans_op)
++ /* IBS taken Branch op */
++ && IBS_OP_BRANCH_TAKEN(trans_op))
++ AGG_IBS_EVENT(DE_IBS_BRANCH_TAKEN);
++ }
++
++ CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_MISP_TAKEN) {
++ if (IBS_OP_BRANCH_RETIRED(trans_op)
++ /* IBS mispredicted taken branch op */
++ && IBS_OP_BRANCH_TAKEN(trans_op)
++ && IBS_OP_BRANCH_MISPREDICT(trans_op))
++ AGG_IBS_EVENT(DE_IBS_BRANCH_MISP_TAKEN);
++ }
++
++ CHECK_OP_SELECTED_FLAG(DE_IBS_RETURN) {
++ if (IBS_OP_BRANCH_RETIRED(trans_op)
++ /* IBS return op */
++ && IBS_OP_RETURN(trans_op))
++ AGG_IBS_EVENT(DE_IBS_RETURN);
++ }
++
++ CHECK_OP_SELECTED_FLAG(DE_IBS_RETURN_MISP) {
++ if (IBS_OP_BRANCH_RETIRED(trans_op)
++ /* IBS mispredicted return op */
++ && IBS_OP_RETURN(trans_op)
++ && IBS_OP_BRANCH_MISPREDICT(trans_op))
++ AGG_IBS_EVENT(DE_IBS_RETURN_MISP);
++ }
++
++ CHECK_OP_SELECTED_FLAG(DE_IBS_RESYNC) {
++ /* Test for a resync macro-op */
++ if (IBS_OP_BRANCH_RESYNC(trans_op))
++ AGG_IBS_EVENT(DE_IBS_RESYNC);
+ }
+ }
+
+@@ -226,213 +206,201 @@
+ /*
+ * --------------------- OP LS DERIVED FUNCTION
+ */
+-void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag, unsigned int size)
++void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag)
+ {
+ struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
+- unsigned int i, j, mask = 1;
+
+ /* Preliminary check */
+ if (!IBS_OP_IBS_LD_OP(trans_op) && !IBS_OP_IBS_ST_OP(trans_op))
+ return;
+
+
+- for (i = IBS_OP_LS_BASE, j =0 ; i <= IBS_OP_LS_END && j < size ; i++, mask = mask << 1) {
++ if ((selected_flag) == 0)
++ return;
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_ALL_OP) {
++ /* Count the number of LS op samples */
++ AGG_IBS_EVENT(DE_IBS_LS_ALL_OP) ;
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_LOAD_OP) {
++ if (IBS_OP_IBS_LD_OP(trans_op))
++ /* TALLy an IBS load derived event */
++ AGG_IBS_EVENT(DE_IBS_LS_LOAD_OP) ;
++ }
+
+- if ((selected_flag & mask) == 0)
+- continue;
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STORE_OP) {
++ if (IBS_OP_IBS_ST_OP(trans_op))
++ /* Count and handle store operations */
++ AGG_IBS_EVENT(DE_IBS_LS_STORE_OP);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1H) {
++ if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++ && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op))
++ /* L1 DTLB hit -- This is the most frequent case */
++ AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1H);
++ }
+
+- j++;
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1M_L2H) {
++ /* l2_translation_size = 1 */
++ if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++ && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++ && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op))
++ /* L1 DTLB miss, L2 DTLB hit */
++ AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2H);
++ }
+
+- switch (i) {
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1M_L2M) {
++ if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++ && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++ && IBS_OP_IBS_DC_L2_TLB_MISS(trans_op))
++ /* L1 DTLB miss, L2 DTLB miss */
++ AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2M);
++ }
+
+- case DE_IBS_LS_ALL_OP:
+- /* Count the number of LS op samples */
+- AGG_IBS_EVENT(DE_IBS_LS_ALL_OP) ;
+- break;
+-
+- case DE_IBS_LS_LOAD_OP:
+- if (IBS_OP_IBS_LD_OP(trans_op))
+- /* TALLy an IBS load derived event */
+- AGG_IBS_EVENT(DE_IBS_LS_LOAD_OP) ;
+- break;
+-
+- case DE_IBS_LS_STORE_OP:
+- if (IBS_OP_IBS_ST_OP(trans_op))
+- /* Count and handle store operations */
+- AGG_IBS_EVENT(DE_IBS_LS_STORE_OP);
+- break;
+-
+- case DE_IBS_LS_DTLB_L1H:
+- if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+- && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op))
+- /* L1 DTLB hit -- This is the most frequent case */
+- AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1H);
+- break;
+-
+- case DE_IBS_LS_DTLB_L1M_L2H:
+- /* l2_translation_size = 1 */
+- if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+- && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+- && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op))
+- /* L1 DTLB miss, L2 DTLB hit */
+- AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2H);
+- break;
+-
+- case DE_IBS_LS_DTLB_L1M_L2M:
+- if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+- && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+- && IBS_OP_IBS_DC_L2_TLB_MISS(trans_op))
+- /* L1 DTLB miss, L2 DTLB miss */
+- AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2M);
+- break;
+-
+- case DE_IBS_LS_DC_MISS:
+- if (IBS_OP_IBS_DC_MISS(trans_op))
+- AGG_IBS_EVENT(DE_IBS_LS_DC_MISS);
+- break;
+-
+- case DE_IBS_LS_DC_HIT:
+- if (!IBS_OP_IBS_DC_MISS(trans_op))
+- AGG_IBS_EVENT(DE_IBS_LS_DC_HIT);
+- break;
+-
+- case DE_IBS_LS_MISALIGNED:
+- if (IBS_OP_IBS_DC_MISS_ACC(trans_op))
+- AGG_IBS_EVENT(DE_IBS_LS_MISALIGNED);
+- break;
+-
+- case DE_IBS_LS_BNK_CONF_LOAD:
+- if (IBS_OP_IBS_DC_LD_BNK_CON(trans_op))
+- AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_LOAD);
+- break;
+-
+- case DE_IBS_LS_BNK_CONF_STORE:
+- if (IBS_OP_IBS_DC_ST_BNK_CON(trans_op))
+- AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_STORE);
+- break;
+-
+- case DE_IBS_LS_STL_FORWARDED:
+- if (IBS_OP_IBS_LD_OP(trans_op)
+- /* Data forwarding info are valid only for load ops */
+- && IBS_OP_IBS_DC_ST_TO_LD_FWD(trans_op))
+- AGG_IBS_EVENT(DE_IBS_LS_STL_FORWARDED) ;
+- break;
+-
+- case DE_IBS_LS_STL_CANCELLED:
+- if (IBS_OP_IBS_LD_OP(trans_op))
+- if (IBS_OP_IBS_DC_ST_TO_LD_CAN(trans_op))
+- AGG_IBS_EVENT(DE_IBS_LS_STL_CANCELLED) ;
+- break;
+-
+- case DE_IBS_LS_UC_MEM_ACCESS:
+- if (IBS_OP_IBS_DC_UC_MEM_ACC(trans_op))
+- AGG_IBS_EVENT(DE_IBS_LS_UC_MEM_ACCESS);
+- break;
+-
+- case DE_IBS_LS_WC_MEM_ACCESS:
+- if (IBS_OP_IBS_DC_WC_MEM_ACC(trans_op))
+- AGG_IBS_EVENT(DE_IBS_LS_WC_MEM_ACCESS);
+- break;
+-
+- case DE_IBS_LS_LOCKED_OP:
+- if (IBS_OP_IBS_LOCKED_OP(trans_op))
+- AGG_IBS_EVENT(DE_IBS_LS_LOCKED_OP);
+- break;
+-
+- case DE_IBS_LS_MAB_HIT:
+- if (IBS_OP_IBS_DC_MAB_HIT(trans_op))
+- AGG_IBS_EVENT(DE_IBS_LS_MAB_HIT);
+- break;
+-
+- case DE_IBS_LS_L1_DTLB_4K:
+- /* l1_translation */
+- if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+- && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+-
+- && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)
+- && !IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op))
+- /* This is the most common case, unfortunately */
+- AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_4K) ;
+- break;
+-
+- case DE_IBS_LS_L1_DTLB_2M:
+- /* l1_translation */
+- if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+- && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+-
+- && IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op))
+- /* 2M L1 DTLB page translation */
+- AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_2M);
+- break;
+-
+- case DE_IBS_LS_L1_DTLB_1G:
+- /* l1_translation */
+- if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+- && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+-
+- && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)
+- && IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op))
+- /* 1G L1 DTLB page translation */
+- AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_1G);
+- break;
+-
+- case DE_IBS_LS_L1_DTLB_RES:
+- break;
+-
+- case DE_IBS_LS_L2_DTLB_4K:
+- /* l2_translation_size = 1 */
+- if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+- && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+- && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
+-
+- /* L2 DTLB page translation */
+- && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
+- && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
+- /* 4K L2 DTLB page translation */
+- AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_4K);
+- break;
+-
+- case DE_IBS_LS_L2_DTLB_2M:
+- /* l2_translation_size = 1 */
+- if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+- && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+- && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
+-
+- /* L2 DTLB page translation */
+- && IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
+- && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
+- /* 2M L2 DTLB page translation */
+- AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_2M);
+- break;
+-
+- case DE_IBS_LS_L2_DTLB_1G:
+- /* l2_translation_size = 1 */
+- if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
+- && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
+- && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
+-
+- /* L2 DTLB page translation */
+- && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
+- && IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
+- /* 2M L2 DTLB page translation */
+- AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_1G);
+- break;
+-
+- case DE_IBS_LS_L2_DTLB_RES2:
+- break;
+-
+- case DE_IBS_LS_DC_LOAD_LAT:
+- if (IBS_OP_IBS_LD_OP(trans_op)
+- /* If the load missed in DC, tally the DC load miss latency */
+- && IBS_OP_IBS_DC_MISS(trans_op))
+- /* DC load miss latency is only reliable for load ops */
+- AGG_IBS_COUNT(DE_IBS_LS_DC_LOAD_LAT,
+- IBS_OP_DC_MISS_LATENCY(trans_op)) ;
+- break;
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_MISS) {
++ if (IBS_OP_IBS_DC_MISS(trans_op))
++ AGG_IBS_EVENT(DE_IBS_LS_DC_MISS);
++ }
+
+- default:
+- break;
+- }
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_HIT) {
++ if (!IBS_OP_IBS_DC_MISS(trans_op))
++ AGG_IBS_EVENT(DE_IBS_LS_DC_HIT);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_MISALIGNED) {
++ if (IBS_OP_IBS_DC_MISS_ACC(trans_op))
++ AGG_IBS_EVENT(DE_IBS_LS_MISALIGNED);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_BNK_CONF_LOAD) {
++ if (IBS_OP_IBS_DC_LD_BNK_CON(trans_op))
++ AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_LOAD);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_BNK_CONF_STORE) {
++ if (IBS_OP_IBS_DC_ST_BNK_CON(trans_op))
++ AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_STORE);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STL_FORWARDED) {
++ if (IBS_OP_IBS_LD_OP(trans_op)
++ /* Data forwarding info are valid only for load ops */
++ && IBS_OP_IBS_DC_ST_TO_LD_FWD(trans_op))
++ AGG_IBS_EVENT(DE_IBS_LS_STL_FORWARDED) ;
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STL_CANCELLED) {
++ if (IBS_OP_IBS_LD_OP(trans_op))
++ if (IBS_OP_IBS_DC_ST_TO_LD_CAN(trans_op))
++ AGG_IBS_EVENT(DE_IBS_LS_STL_CANCELLED) ;
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_UC_MEM_ACCESS) {
++ if (IBS_OP_IBS_DC_UC_MEM_ACC(trans_op))
++ AGG_IBS_EVENT(DE_IBS_LS_UC_MEM_ACCESS);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_WC_MEM_ACCESS) {
++ if (IBS_OP_IBS_DC_WC_MEM_ACC(trans_op))
++ AGG_IBS_EVENT(DE_IBS_LS_WC_MEM_ACCESS);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_LOCKED_OP) {
++ if (IBS_OP_IBS_LOCKED_OP(trans_op))
++ AGG_IBS_EVENT(DE_IBS_LS_LOCKED_OP);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_MAB_HIT) {
++ if (IBS_OP_IBS_DC_MAB_HIT(trans_op))
++ AGG_IBS_EVENT(DE_IBS_LS_MAB_HIT);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_4K) {
++ /* l1_translation */
++ if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++ && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++
++ && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)
++ && !IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op))
++ /* This is the most common case, unfortunately */
++ AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_4K) ;
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_2M) {
++ /* l1_translation */
++ if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++ && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++
++ && IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op))
++ /* 2M L1 DTLB page translation */
++ AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_2M);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_1G) {
++ /* l1_translation */
++ if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++ && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++
++ && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)
++ && IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op))
++ /* 1G L1 DTLB page translation */
++ AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_1G);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_RES) {
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_4K) {
++ /* l2_translation_size = 1 */
++ if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++ && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++ && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
++
++ /* L2 DTLB page translation */
++ && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
++ && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
++ /* 4K L2 DTLB page translation */
++ AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_4K);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_2M) {
++ /* l2_translation_size = 1 */
++ if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++ && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++ && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
++
++ /* L2 DTLB page translation */
++ && IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
++ && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
++ /* 2M L2 DTLB page translation */
++ AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_2M);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_1G) {
++ /* l2_translation_size = 1 */
++ if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op)
++ && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)
++ && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)
++
++ /* L2 DTLB page translation */
++ && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op)
++ && IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op))
++ /* 2M L2 DTLB page translation */
++ AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_1G);
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_RES2) {
++ }
++
++ CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_LOAD_LAT) {
++ if (IBS_OP_IBS_LD_OP(trans_op)
++ /* If the load missed in DC, tally the DC load miss latency */
++ && IBS_OP_IBS_DC_MISS(trans_op))
++ /* DC load miss latency is only reliable for load ops */
++ AGG_IBS_COUNT(DE_IBS_LS_DC_LOAD_LAT,
++ IBS_OP_DC_MISS_LATENCY(trans_op)) ;
+ }
+ }
+
+@@ -443,12 +411,14 @@
+ * that miss in L1 and L2 cache. NB data arrives too late
+ * to be reliable for store operations
+ */
+-void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag, unsigned int size)
++void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag)
+ {
+ struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
+- unsigned int i, j, mask = 1;
+
+ /* Preliminary check */
++ if ((selected_flag) == 0)
++ return;
++
+ if (!IBS_OP_IBS_LD_OP(trans_op))
+ return;
+
+@@ -458,97 +428,219 @@
+ if (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0)
+ return;
+
+- for (i = IBS_OP_NB_BASE, j =0 ; i <= IBS_OP_NB_END && j < size ; i++, mask = mask << 1) {
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL) {
++ if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
++ /* Request was serviced by local processor */
++ AGG_IBS_EVENT(DE_IBS_NB_LOCAL) ;
++ }
+
+- if ((selected_flag & mask) == 0)
+- continue;
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE) {
++ if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
++ /* Request was serviced by remote processor */
++ AGG_IBS_EVENT(DE_IBS_NB_REMOTE) ;
++ }
+
+- j++;
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_L3) {
++ if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++ && IBS_OP_NB_IBS_REQ_SRC_01(trans_op))
++ AGG_IBS_EVENT(DE_IBS_NB_LOCAL_L3);
++ }
++
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_CACHE) {
++ if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++ && IBS_OP_NB_IBS_REQ_SRC_02(trans_op))
++ AGG_IBS_EVENT(DE_IBS_NB_LOCAL_CACHE);
++ }
++
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_CACHE) {
++ if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++ && IBS_OP_NB_IBS_REQ_SRC_02(trans_op))
++ AGG_IBS_EVENT(DE_IBS_NB_REMOTE_CACHE) ;
++ }
+
+- switch (i) {
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_DRAM) {
++ if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++ && IBS_OP_NB_IBS_REQ_SRC_03(trans_op))
++ AGG_IBS_EVENT(DE_IBS_NB_LOCAL_DRAM);
++ }
+
+- case DE_IBS_NB_LOCAL:
+- if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
+- /* Request was serviced by local processor */
+- AGG_IBS_EVENT(DE_IBS_NB_LOCAL) ;
+- break;
+-
+- case DE_IBS_NB_REMOTE:
+- if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
+- /* Request was serviced by remote processor */
+- AGG_IBS_EVENT(DE_IBS_NB_REMOTE) ;
+- break;
+-
+- case DE_IBS_NB_LOCAL_L3:
+- if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+- && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x1))
+- AGG_IBS_EVENT(DE_IBS_NB_LOCAL_L3);
+- break;
+-
+- case DE_IBS_NB_LOCAL_CACHE:
+- if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+- && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2))
+- AGG_IBS_EVENT(DE_IBS_NB_LOCAL_CACHE);
+- break;
+-
+- case DE_IBS_NB_REMOTE_CACHE:
+- if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+- && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2))
+- AGG_IBS_EVENT(DE_IBS_NB_REMOTE_CACHE) ;
+- break;
+-
+- case DE_IBS_NB_LOCAL_DRAM:
+- if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+- && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x3))
+- AGG_IBS_EVENT(DE_IBS_NB_LOCAL_DRAM);
+- break;
+-
+- case DE_IBS_NB_REMOTE_DRAM:
+- if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+- && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x3))
+- AGG_IBS_EVENT(DE_IBS_NB_REMOTE_DRAM) ;
+- break;
+-
+- case DE_IBS_NB_LOCAL_OTHER:
+- if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+- && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x7))
+- AGG_IBS_EVENT(DE_IBS_NB_LOCAL_OTHER);
+- break;
+-
+- case DE_IBS_NB_REMOTE_OTHER:
+- if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
+- && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x7))
+- AGG_IBS_EVENT(DE_IBS_NB_REMOTE_OTHER) ;
+- break;
+-
+- case DE_IBS_NB_CACHE_STATE_M:
+- if ((IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2)
+- && !IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op))
+- AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_M) ;
+- break;
+-
+- case DE_IBS_NB_CACHE_STATE_O:
+- if ((IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2)
+- && IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op))
+- AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_O) ;
+- break;
+-
+- case DE_IBS_NB_LOCAL_LATENCY:
+- if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
+- /* Request was serviced by local processor */
+- AGG_IBS_COUNT(DE_IBS_NB_LOCAL_LATENCY,
+- IBS_OP_DC_MISS_LATENCY(trans_op));
+- break;
+-
+- case DE_IBS_NB_REMOTE_LATENCY:
+- if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
+- /* Request was serviced by remote processor */
+- AGG_IBS_COUNT(DE_IBS_NB_REMOTE_LATENCY,
+- IBS_OP_DC_MISS_LATENCY(trans_op));
+- break;
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_DRAM) {
++ if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++ && IBS_OP_NB_IBS_REQ_SRC_03(trans_op))
++ AGG_IBS_EVENT(DE_IBS_NB_REMOTE_DRAM) ;
++ }
+
+- default:
+- break;
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_OTHER) {
++ if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++ && IBS_OP_NB_IBS_REQ_SRC_07(trans_op))
++ AGG_IBS_EVENT(DE_IBS_NB_LOCAL_OTHER);
++ }
++
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_OTHER) {
++ if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)
++ && IBS_OP_NB_IBS_REQ_SRC_07(trans_op))
++ AGG_IBS_EVENT(DE_IBS_NB_REMOTE_OTHER) ;
++ }
++
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_CACHE_STATE_M) {
++ if (IBS_OP_NB_IBS_REQ_SRC_02(trans_op)
++ && !IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op))
++ AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_M) ;
++ }
++
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_CACHE_STATE_O) {
++ if (IBS_OP_NB_IBS_REQ_SRC_02(trans_op)
++ && IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op))
++ AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_O) ;
++ }
++
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_LATENCY) {
++ if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
++ /* Request was serviced by local processor */
++ AGG_IBS_COUNT(DE_IBS_NB_LOCAL_LATENCY,
++ IBS_OP_DC_MISS_LATENCY(trans_op));
++ }
++
++ CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_LATENCY) {
++ if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op))
++ /* Request was serviced by remote processor */
++ AGG_IBS_COUNT(DE_IBS_NB_REMOTE_LATENCY,
++ IBS_OP_DC_MISS_LATENCY(trans_op));
++ }
++}
++
++
++int trans_ibs_op_rip_invalid (struct transient * trans)
++{
++ struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
++
++ if (IBS_OP_RIP_INVALID(trans_op))
++ return 1;
++
++ return 0;
++}
++
++
++void trans_ibs_op_mask_reserved (unsigned int family, struct transient * trans)
++{
++ struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
++
++ switch (family) {
++ case 0x10:
++ /* Reserved IbsRipInvalid (MSRC001_1035[38])*/
++ trans_op->ibs_op_data1_high &= ~MASK_RIP_INVALID;
++ break;
++ case 0x12:
++ /* Reserved NbIbsReqDstProc (MSRCC001_1036[4]) */
++ trans_op->ibs_op_data2_low &= ~NB_MASK_REQ_DST_PROC;
++ /* Reserved NbIbsReqCacheHitSt (MSRCC001_1036[5]) */
++ trans_op->ibs_op_data2_low &= ~NB_MASK_L3_STATE;
++ break;
++ case 0x14:
++ /* Reserved NbIbsReqDstProc (MSRCC001_1036[4]) */
++ trans_op->ibs_op_data2_low &= ~NB_MASK_REQ_DST_PROC;
++ /* Reserved NbIbsReqCacheHitSt (MSRCC001_1036[5]) */
++ trans_op->ibs_op_data2_low &= ~NB_MASK_L3_STATE;
++ /* Reserved IbsDcL1tlbHit1G (MSRC001_1037[5]) */
++ trans_op->ibs_op_data3_low &= ~DC_MASK_L1_HIT_1G;
++ /* Reserved IbsDcLdBnkCon (MSRC001_1037[9]) */
++ trans_op->ibs_op_data3_low &= ~DC_MASK_LD_BANK_CONFLICT;
++ /* Reserved IbsDcStBnkCon (MSRC001_1037[10]) */
++ trans_op->ibs_op_data3_low &= ~DC_MASK_ST_BANK_CONFLICT;
++ /* Reserved IbsDcStToLdCan (MSRC001_1037[12]) */
++ trans_op->ibs_op_data3_low &= ~DC_MASK_ST_TO_LD_CANCEL;
++ /* Reserved IbsDcL2tlbHit1G (MSRC001_1037[19]) */
++ trans_op->ibs_op_data3_low &= ~DC_MASK_L2_HIT_1G;
++
++ break;
++ case 0x15:
++ default:
++ break;
++
++ }
++}
++
++
++void trans_ibs_op_bta(struct transient * trans)
++{
++ static cookie_t old_cookie = NO_COOKIE;
++ static cookie_t old_app_cookie = NO_COOKIE;
++ static char const * mod = NULL;
++ static char const * app = NULL;
++ const char vmlinux[10] = "vmlinux";
++ struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
++
++ if (!bta_log)
++ return;
++
++ if (!trans_op->ibs_op_brtgt_addr)
++ return;
++
++ if( old_app_cookie == INVALID_COOKIE
++ || old_app_cookie == NO_COOKIE
++ || old_app_cookie != trans->app_cookie) {
++ app = find_cookie(trans->app_cookie);
++ old_app_cookie = trans->cookie;
++ }
++
++ if (trans->in_kernel == 1) {
++ mod = vmlinux;
++ old_cookie = NO_COOKIE;
++ } else {
++ if( old_cookie == INVALID_COOKIE
++ || old_cookie == NO_COOKIE
++ || old_cookie != trans->cookie) {
++ mod = find_cookie(trans->cookie);
++ old_cookie = trans->cookie;
+ }
+ }
++
++ fprintf(bta_log, "0x%016llx,0x%016llx,%02lu %08u,%08u,0x%08x,0x%08lx\n",
++ trans->app_cookie, trans->cookie, trans->cpu, trans->tgid, trans->tid, (unsigned int)trans->pc,
++ trans_op->ibs_op_brtgt_addr);
++}
++
++
++void trans_ibs_op_ls_memaccess(struct transient * trans)
++{
++ static cookie_t old_cookie = NO_COOKIE;
++ static cookie_t old_app_cookie = NO_COOKIE;
++ static char const * mod = NULL;
++ static char const * app = NULL;
++ const char vmlinux[10] = "vmlinux";
++ struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op;
++
++ if (!memaccess_log)
++ return;
++
++ if( old_app_cookie == INVALID_COOKIE
++ || old_app_cookie == NO_COOKIE
++ || old_app_cookie != trans->app_cookie) {
++ app = find_cookie(trans->app_cookie);
++ old_app_cookie = trans->cookie;
++ }
++
++ if (trans->in_kernel == 1) {
++ mod = vmlinux;
++ old_cookie = NO_COOKIE;
++ } else {
++ if( old_cookie == INVALID_COOKIE
++ || old_cookie == NO_COOKIE
++ || old_cookie != trans->cookie) {
++ mod = find_cookie(trans->cookie);
++ old_cookie = trans->cookie;
++ }
++ }
++
++ fprintf(memaccess_log, "0x%016llx,0x%016llx,%02lu,%08u,%08u,0x%08x,0x%08u:%08x,0x%08x:%08x,%s,%08u\n",
++ trans->app_cookie,
++trans->cookie,
++trans->cpu,
++trans->tgid,
++trans->tid,
++(unsigned int)trans->pc,
++ trans_op->ibs_op_phys_addr_high, trans_op->ibs_op_phys_addr_low,
++ trans_op->ibs_op_ldst_linaddr_high, trans_op->ibs_op_ldst_linaddr_low,
++ (IBS_OP_IBS_LD_OP(trans_op))? "LD": "ST",
++ (unsigned int) IBS_OP_DC_MISS_LATENCY(trans_op));
+ }
+Index: oprofile/daemon/opd_ibs_trans.h
+diff -u oprofile/daemon/opd_ibs_trans.h:1.1 oprofile/daemon/opd_ibs_trans.h:1.2
+--- oprofile/daemon/opd_ibs_trans.h:1.1 Fri Apr 17 18:40:43 2009
++++ oprofile/daemon/opd_ibs_trans.h Mon Nov 29 15:05:44 2010
+@@ -24,8 +24,12 @@
+ };
+
+
+-extern void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag, unsigned int size);
+-extern void trans_ibs_op (struct transient * trans, unsigned int selected_flag, unsigned int size);
+-extern void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag, unsigned int size);
+-extern void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag, unsigned int size);
++extern void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag);
++extern void trans_ibs_op (struct transient * trans, unsigned int selected_flag);
++extern void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag);
++extern void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag);
++extern int trans_ibs_op_rip_invalid (struct transient * trans);
++extern void trans_ibs_op_mask_reserved (unsigned int family, struct transient * trans);
++extern void trans_ibs_op_ls_memaccess(struct transient * trans);
++extern void trans_ibs_op_bta (struct transient * trans);
+ #endif // OPD_IBS_TRANS_H
+Index: oprofile/events/x86-64/family10/unit_masks
+diff -u oprofile/events/x86-64/family10/unit_masks:1.6 oprofile/events/x86-64/family10/unit_masks:1.7
+--- oprofile/events/x86-64/family10/unit_masks:1.6 Fri Oct 23 14:21:09 2009
++++ oprofile/events/x86-64/family10/unit_masks Mon Nov 29 15:05:44 2010
+@@ -363,6 +363,7 @@
+ name:ibs_op type:bitmask default:0x01
+ 0x00 Using IBS OP cycle count mode
+ 0x01 Using IBS OP dispatch count mode
++ 0x02 Enable IBS OP Memory Access Log
+ name:non_cancelled_l3_read_requests type:bitmask default:0xf7
+ 0x01 RbBlk
+ 0x02 RbBlkS
+Index: oprofile/utils/opcontrol
+diff -u oprofile/utils/opcontrol:1.169 oprofile/utils/opcontrol:1.170
+--- oprofile/utils/opcontrol:1.169 Fri Aug 13 15:42:16 2010
++++ oprofile/utils/opcontrol Mon Nov 29 15:05:44 2010
+@@ -1867,13 +1867,20 @@
+ IBS_FETCH_COUNT=$IBS_COUNT
+ IBS_FETCH_MASK=$IBS_MASK
+ elif test "$IBS_FETCH_COUNT" != "$IBS_COUNT" ; then
+- echo "All IBS Fetch must have the same count."
++ echo "ERROR: All IBS Fetch must have the same count."
+ exit 1
+ fi
+
+ # Check IBS_MASK consistency
+ if test "$IBS_FETCH_MASK" != "$IBS_MASK" ; then
+- echo "All IBS Fetch must have the same unitmask."
++ echo "ERROR: All IBS Fetch must have the same unitmask."
++ exit 1
++ fi
++
++ # Check IBS_FETCH_COUNT within range
++ if test "$IBS_FETCH_COUNT" -gt 1048575 ; then
++ echo "ERROR: IBS Fetch count is too large."
++ echo " The maximum IBS-fetch count is 1048575."
+ exit 1
+ fi
+
+@@ -1892,6 +1899,28 @@
+ echo "All IBS Op must have the same unitmask."
+ exit 1
+ fi
++
++ # Check IBS_OP_COUNT within range
++ case "$CPUTYPE" in
++ x86-64/family10)
++ if test "$IBS_OP_COUNT" -gt 1048575 ; then
++ echo "ERROR: IBS Op count is too large."
++ echo " The maximum IBS-fetch count is 1048575."
++ exit 1
++ fi
++ ;;
++
++ x86-64/family12h|\
++ x86-64/family14h|\
++ x86-64/family15h)
++ if test "$IBS_OP_COUNT" -gt 134217727 ; then
++ echo "ERROR: IBS Op count is too large."
++ echo " The maximum IBS-Op count is 134217727."
++ exit 1
++ fi
++ ;;
++ *)
++ esac
+ fi
+
+ return
+@@ -1944,10 +1973,32 @@
+ # NOTE: We default to use dispatched_op if available.
+ # Some of the older family10 system does not have
+ # dispatched_ops feature.
+- # dispatched op is enabled by bit 1 of the unitmask
++ # Dispatched op is enabled by bit 0 of the unitmask
++ IBS_OP_DISPATCHED_OP=$(( IBS_OP_UNITMASK & 0x1 ))
+ if test -f $MOUNT/ibs_op/dispatched_ops ; then
+- IBS_OP_DISPATCHED_OP=$(( IBS_OP_UNITMASK & 0x1 ))
+ set_param ibs_op/dispatched_ops $IBS_OP_DISPATCHED_OP
++ else
++ if test $IBS_OP_DISPATCHED_OP -eq 1 ; then
++ echo "ERROR: IBS Op dispatched ops is not supported."
++ exit 1
++ fi
++ fi
++
++ # NOTE: BTA is enabled by bit 2 of the unitmask
++ IBS_OP_BTA=$(( IBS_OP_UNITMASK & 0x4 ))
++ if test -f $MOUNT/ibs_op/branch_target; then
++ if [ "$IBS_OP_BTA" = "4" ] ; then
++ set_param ibs_op/branch_target 1
++ else
++ set_param ibs_op/branch_target 0
++ fi
++
++ # TODO: Check if write successful
++ else
++ if test $IBS_OP_BTA -eq 1 ; then
++ echo "ERROR: IBS Op Branch Target Address is not supported."
++ exit 1
++ fi
+ fi
+ else
+ set_param ibs_op/enable 0
+Index: oprofile/daemon/opd_ibs.c
+diff -u oprofile/daemon/opd_ibs.c:1.3 oprofile/daemon/opd_ibs.c:1.4
+--- oprofile/daemon/opd_ibs.c:1.3 Mon Nov 29 15:05:44 2010
++++ oprofile/daemon/opd_ibs.c Tue Dec 14 17:40:20 2010
+@@ -34,21 +34,6 @@
+ #include <string.h>
+ #include <limits.h>
+
+-#if defined(__i386__) && defined(__PIC__)
+-/* %ebx may be the PIC register. */
+- #define __cpuid(level, a, b, c, d) \
+- __asm__ ("xchgl\t%%ebx, %1\n\t" \
+- "cpuid\n\t" \
+- "xchgl\t%%ebx, %1\n\t" \
+- : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
+- : "0" (level))
+-#else
+- #define __cpuid(level, a, b, c, d) \
+- __asm__ ("cpuid\n\t" \
+- : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
+- : "0" (level))
+-#endif
+-
+ extern op_cpu cpu_type;
+ extern int no_event_ok;
+ extern int sfile_equal(struct sfile const * sf, struct sfile const * sf2);
+@@ -495,6 +480,7 @@
+
+ static void check_cpuid_family_model_stepping()
+ {
++#if defined(__i386__) || defined(__x86_64__)
+ union {
+ unsigned eax;
+ struct {
+@@ -510,11 +496,16 @@
+ unsigned ebx, ecx, edx;
+
+ /* CPUID Fn0000_0001_EAX Family, Model, Stepping */
+- __cpuid(1, v.eax, ebx, ecx, edx);
++ asm ("cpuid" : "=a" (v.eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (1));
+
+ ibs_family = v.family + v.ext_family;
+ ibs_model = v.model + v.ext_model;
+ ibs_stepping = v.stepping;
++#else
++ ibs_family = 0;
++ ibs_model = 0;
++ ibs_stepping = 0;
++#endif
+ }
+
+
diff --git a/oprofile-check.patch b/oprofile-check.patch
new file mode 100644
index 0000000..4a65e8e
--- /dev/null
+++ b/oprofile-check.patch
@@ -0,0 +1,82 @@
+---------------------
+PatchSet 2938
+Date: 2011/01/05 16:16:06
+Author: maynardj
+Branch: HEAD
+Tag: (none)
+Branches:
+Log:
+Add argument checking for numerical arguments
+
+Members:
+ ChangeLog:1.1940->1.1941
+ utils/opcontrol:1.170->1.171
+
+Index: oprofile/utils/opcontrol
+diff -u oprofile/utils/opcontrol:1.170 oprofile/utils/opcontrol:1.171
+--- oprofile/utils/opcontrol:1.170 Mon Nov 29 15:05:44 2010
++++ oprofile/utils/opcontrol Wed Jan 5 21:16:08 2011
+@@ -49,6 +49,31 @@
+ fi
+ }
+
++# guess_number_base() checks if string is a valid octal(8), hexidecimal(16),
++# or decimal number(10). The value is returned in $?. Returns 0, if string
++# isn't a octal, hexidecimal, or decimal number.
++guess_number_base()
++{
++ if [[ "$1" =~ ^0[0-7]*$ ]] ; then
++ return 8;
++ elif [[ "$1" =~ ^0x[0-9a-fA-F]+$ ]] ; then
++ return 16;
++ elif [[ "$1" =~ ^[1-9][0-9]*$ ]] ; then
++ return 10;
++ else
++ return 0;
++ fi
++}
++
++# check value is a valid number
++error_if_not_number()
++{
++ guess_number_base $2
++ if test "$?" -eq 0 ; then
++ echo "Argument for $1, $2, is not a valid number." >&2
++ exit 1
++ fi
++}
+
+ # rm_device arguments $1=file_name
+ rm_device()
+@@ -754,6 +779,7 @@
+ ;;
+ --buffer-size)
+ error_if_empty $arg $val
++ error_if_not_number $arg $val
+ BUF_SIZE=$val
+ DO_SETUP=yes
+ ;;
+@@ -763,6 +789,7 @@
+ exit 1
+ fi
+ error_if_empty $arg $val
++ error_if_not_number $arg $val
+ BUF_WATERSHED=$val
+ DO_SETUP=yes
+ ;;
+@@ -772,6 +799,7 @@
+ exit 1
+ fi
+ error_if_empty $arg $val
++ error_if_not_number $arg $val
+ CPU_BUF_SIZE=$val
+ DO_SETUP=yes
+ ;;
+@@ -802,6 +830,7 @@
+ echo "Call-graph profiling unsupported on this kernel/hardware" >&2
+ exit 1
+ fi
++ error_if_not_number $arg $val
+ CALLGRAPH=$val
+ DO_SETUP=yes
+ ;;
diff --git a/oprofile-iaperf.patch b/oprofile-iaperf.patch
new file mode 100644
index 0000000..4090a0c
--- /dev/null
+++ b/oprofile-iaperf.patch
@@ -0,0 +1,16 @@
+Index: oprofile/events/i386/arch_perfmon/events
+diff -u oprofile/events/i386/arch_perfmon/events:1.3 oprofile/events/i386/arch_perfmon/events:1.4
+--- oprofile/events/i386/arch_perfmon/events:1.3 Fri May 1 20:34:24 2009
++++ oprofile/events/i386/arch_perfmon/events Wed Oct 13 14:58:42 2010
+@@ -4,7 +4,7 @@
+ event:0x3c counters:cpuid um:zero minimum:6000 filter:0 name:CPU_CLK_UNHALTED : Clock cycles when not halted
+ event:0x3c counters:cpuid um:one minimum:6000 filter:2 name:UNHALTED_REFERENCE_CYCLES : Unhalted reference cycles
+ event:0xc0 counters:cpuid um:zero minimum:6000 filter:1 name:INST_RETIRED : number of instructions retired
+-event:0x2e counters:cpuid um:x41 minimum:6000 filter:5 name:LLC_MISSES : Last level cache demand requests from this core that missed the LLC
+-event:0x2e counters:cpuid um:x4f minimum:6000 filter:4 name:LLC_REFS : Last level cache demand requests from this core
+-event:0xc4 counters:cpuid um:zero minimum:500 filter:6 name:BR_INST_RETIRED : number of branch instructions retired
+-event:0xc5 counters:cpuid um:zero minimum:500 filter:7 name:BR_MISS_PRED_RETIRED : number of mispredicted branches retired (precise)
++event:0x2e counters:cpuid um:x41 minimum:6000 filter:4 name:LLC_MISSES : Last level cache demand requests from this core that missed the LLC
++event:0x2e counters:cpuid um:x4f minimum:6000 filter:3 name:LLC_REFS : Last level cache demand requests from this core
++event:0xc4 counters:cpuid um:zero minimum:500 filter:5 name:BR_INST_RETIRED : number of branch instructions retired
++event:0xc5 counters:cpuid um:zero minimum:500 filter:6 name:BR_MISS_PRED_RETIRED : number of mispredicted branches retired (precise)
diff --git a/oprofile-nehalem.patch b/oprofile-nehalem.patch
new file mode 100644
index 0000000..fa5d191
--- /dev/null
+++ b/oprofile-nehalem.patch
@@ -0,0 +1,46 @@
+Index: oprofile/libop/op_cpu_type.c
+diff -u oprofile/libop/op_cpu_type.c:1.54 oprofile/libop/op_cpu_type.c:1.55
+--- oprofile/libop/op_cpu_type.c:1.54 Wed Nov 25 20:35:19 2009
++++ oprofile/libop/op_cpu_type.c Mon Jun 21 23:12:11 2010
+@@ -83,5 +83,6 @@
+ { "Intel Core/i7", "i386/core_i7", CPU_CORE_I7, 4 },
+ { "Intel Atom", "i386/atom", CPU_ATOM, 2 },
++ { "Intel Nehalem microarchitecture", "i386/nehalem", CPU_NEHALEM, 4 },
+ };
+
+ static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr);
+Index: oprofile/libop/op_cpu_type.h
+diff -u oprofile/libop/op_cpu_type.h:1.47 oprofile/libop/op_cpu_type.h:1.48
+--- oprofile/libop/op_cpu_type.h:1.47 Wed Nov 25 20:35:19 2009
++++ oprofile/libop/op_cpu_type.h Mon Jun 21 23:12:11 2010
+@@ -80,5 +80,6 @@
+ CPU_CORE_I7, /* Intel Core i7, Nehalem */
+ CPU_ATOM, /* First generation Intel Atom */
++ CPU_NEHALEM, /* Intel Nehalem microarchitecture */
+ MAX_CPU_TYPE
+ } op_cpu;
+
+Index: oprofile/libop/op_events.c
+diff -u oprofile/libop/op_events.c:1.98 oprofile/libop/op_events.c:1.99
+--- oprofile/libop/op_events.c:1.98 Wed Nov 25 20:35:19 2009
++++ oprofile/libop/op_events.c Mon Jun 21 23:12:11 2010
+@@ -970,6 +970,7 @@
+ case CPU_FAMILY11H:
+ case CPU_ATOM:
+ case CPU_CORE_I7:
++ case CPU_NEHALEM:
+ descr->name = "CPU_CLK_UNHALTED";
+ break;
+
+Index: oprofile/utils/ophelp.c
+diff -u oprofile/utils/ophelp.c:1.35 oprofile/utils/ophelp.c:1.36
+--- oprofile/utils/ophelp.c:1.35 Wed Nov 25 20:35:20 2009
++++ oprofile/utils/ophelp.c Mon Jun 21 23:12:11 2010
+@@ -483,6 +483,7 @@
+ case CPU_CORE:
+ case CPU_CORE_2:
+ case CPU_CORE_I7:
++ case CPU_NEHALEM:
+ case CPU_ATOM:
+ event_doc =
+ "See Intel Architecture Developer's Manual Volume 3B, Appendix A and\n"
diff --git a/oprofile-westmere.patch b/oprofile-westmere.patch
new file mode 100644
index 0000000..bdb448f
--- /dev/null
+++ b/oprofile-westmere.patch
@@ -0,0 +1,676 @@
+Index: oprofile/libop/op_cpu_type.c
+diff -u oprofile/libop/op_cpu_type.c:1.58 oprofile/libop/op_cpu_type.c:1.59
+--- oprofile/libop/op_cpu_type.c:1.58 Mon Nov 29 14:52:36 2010
++++ oprofile/libop/op_cpu_type.c Wed Dec 15 20:31:09 2010
+@@ -94,6 +94,32 @@
+
+ static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr);
+
++int op_cpu_variations(op_cpu cpu_type)
++{
++ switch (cpu_type) {
++ case CPU_ARCH_PERFMON:
++ return 1;
++ default:
++ return 0;
++ }
++}
++
++
++op_cpu op_cpu_base_type(op_cpu cpu_type)
++{
++ /* All the processors that support CPU_ARCH_PERFMON */
++ switch (cpu_type) {
++ case CPU_CORE_2:
++ case CPU_CORE_I7:
++ case CPU_ATOM:
++ case CPU_NEHALEM:
++ return CPU_ARCH_PERFMON;
++ default:
++ /* assume processor in a class by itself */
++ return cpu_type;
++ }
++}
++
+ op_cpu op_get_cpu_type(void)
+ {
+ int cpu_type = CPU_NO_GOOD;
+@@ -118,6 +144,9 @@
+
+ cpu_type = op_get_cpu_number(str);
+
++ if (op_cpu_variations(cpu_type))
++ cpu_type = op_cpu_specific_type(cpu_type);
++
+ fclose(fp);
+
+ return cpu_type;
+Index: oprofile/libop/op_cpu_type.h
+diff -u oprofile/libop/op_cpu_type.h:1.51 oprofile/libop/op_cpu_type.h:1.52
+--- oprofile/libop/op_cpu_type.h:1.51 Mon Nov 29 14:52:36 2010
++++ oprofile/libop/op_cpu_type.h Wed Dec 15 20:31:09 2010
+@@ -91,6 +91,20 @@
+ } op_cpu;
+
+ /**
++ * the CPU lowest common denominator
++ *
++ * returns 1 if there are variations for the base cpu type;
++ */
++int op_cpu_variations(op_cpu cpu_type);
++
++/**
++ * get the CPU lowest common denominator
++ *
++ * returns cpu_type if cpu_type does not have a lowest common denominator.
++ */
++op_cpu op_cpu_base_type(op_cpu cpu_type);
++
++/**
+ * get the CPU type from the kernel
+ *
+ * returns CPU_NO_GOOD if the CPU could not be identified.
+Index: oprofile/libop/op_hw_specific.h
+diff -u oprofile/libop/op_hw_specific.h:1.3 oprofile/libop/op_hw_specific.h:1.4
+--- oprofile/libop/op_hw_specific.h:1.3 Tue May 19 18:45:19 2009
++++ oprofile/libop/op_hw_specific.h Wed Dec 15 20:31:09 2010
+@@ -24,38 +24,52 @@
+ return !strncmp(v.v, vnd, 12);
+ }
+
++static inline unsigned int cpuid_signature()
++{
++ unsigned eax;
++ asm("cpuid" : "=a" (eax) : "0" (1) : "ecx","ebx","edx");
++ return eax;
++}
++
++static inline unsigned int cpu_model(unsigned int eax)
++{
++ unsigned model = (eax & 0xf0) >> 4;
++ unsigned ext_model = (eax & 0xf0000) >> 12;
++ return ext_model + model;
++}
++
++static inline unsigned int cpu_family(unsigned int eax)
++{
++ unsigned family = (eax & 0xf00) >> 8;
++ unsigned ext_family = (eax & 0xff00000) >> 20;
++ return ext_family + family;
++}
++
++static inline unsigned int cpu_stepping(unsigned int eax)
++{
++ return (eax & 0xf);
++}
++
++
+ /* Work around Nehalem spec update AAJ79: CPUID incorrectly indicates
+ unhalted reference cycle architectural event is supported. We assume
+ steppings after C0 report correct data in CPUID. */
+ static inline void workaround_nehalem_aaj79(unsigned *ebx)
+ {
+- union {
+- unsigned eax;
+- struct {
+- unsigned stepping : 4;
+- unsigned model : 4;
+- unsigned family : 4;
+- unsigned type : 2;
+- unsigned res : 2;
+- unsigned ext_model : 4;
+- unsigned ext_family : 8;
+- unsigned res2 : 4;
+- };
+- } v;
+- unsigned model;
++ unsigned eax;
+
+ if (!cpuid_vendor("GenuineIntel"))
+ return;
+- asm("cpuid" : "=a" (v.eax) : "0" (1) : "ecx","ebx","edx");
+- model = (v.ext_model << 4) + v.model;
+- if (v.family != 6 || model != 26 || v.stepping > 4)
++ eax = cpuid_signature();
++ if (cpu_family(eax) != 6 || cpu_model(eax) != 26
++ || cpu_stepping(eax) > 4)
+ return;
+ *ebx |= (1 << 2); /* disable unsupported event */
+ }
+
+ static inline unsigned arch_get_filter(op_cpu cpu_type)
+ {
+- if (cpu_type == CPU_ARCH_PERFMON) {
++ if (op_cpu_base_type(cpu_type) == CPU_ARCH_PERFMON) {
+ unsigned ebx, eax;
+ asm("cpuid" : "=a" (eax), "=b" (ebx) : "0" (0xa) : "ecx","edx");
+ workaround_nehalem_aaj79(&ebx);
+@@ -66,7 +80,7 @@
+
+ static inline int arch_num_counters(op_cpu cpu_type)
+ {
+- if (cpu_type == CPU_ARCH_PERFMON) {
++ if (op_cpu_base_type(cpu_type) == CPU_ARCH_PERFMON) {
+ unsigned v;
+ asm("cpuid" : "=a" (v) : "0" (0xa) : "ebx","ecx","edx");
+ return (v >> 8) & 0xff;
+@@ -81,6 +95,28 @@
+ return num_to_mask((v >> 8) & 0xff);
+ }
+
++static inline op_cpu op_cpu_specific_type(op_cpu cpu_type)
++{
++ if (cpu_type == CPU_ARCH_PERFMON) {
++ /* Already know is Intel family 6, so just check the model. */
++ int model = cpu_model(cpuid_signature());
++ switch(model) {
++ case 0x0f:
++ case 0x16:
++ case 0x17:
++ case 0x1d:
++ return CPU_CORE_2;
++ case 0x1a:
++ case 0x1e:
++ case 0x2e:
++ return CPU_CORE_I7;
++ case 0x1c:
++ return CPU_ATOM;
++ }
++ }
++ return cpu_type;
++}
++
+ #else
+
+ static inline unsigned arch_get_filter(op_cpu cpu_type)
+@@ -104,4 +140,8 @@
+ return 0;
+ }
+
++static inline op_cpu op_cpu_specific_type(op_cpu cpu_type)
++{
++ return cpu_type;
++}
+ #endif
+Index: oprofile/events/Makefile.am
+diff -u oprofile/events/Makefile.am:1.35 oprofile/events/Makefile.am:1.36
+--- oprofile/events/Makefile.am:1.35 Mon Nov 29 14:52:36 2010
++++ oprofile/events/Makefile.am Thu Dec 16 17:28:54 2010
+@@ -17,6 +17,7 @@
+ i386/atom/events i386/atom/unit_masks \
+ i386/core_i7/events i386/core_i7/unit_masks \
+ i386/nehalem/events i386/nehalem/unit_masks \
++ i386/westmere/events i386/westmere/unit_masks \
+ ia64/ia64/events ia64/ia64/unit_masks \
+ ia64/itanium2/events ia64/itanium2/unit_masks \
+ ia64/itanium/events ia64/itanium/unit_masks \
+--- /dev/null 2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/i386/westmere/events 2011-01-03 14:38:43.889646330 -0500
+@@ -0,0 +1,88 @@
++#
++# Intel "Westmere" microarchitecture core events.
++#
++# See http://ark.intel.com/ for help in identifying Westmere based CPUs
++#
++# Note the minimum counts are not discovered experimentally and could be likely
++# lowered in many cases without ill effect.
++#
++include:i386/arch_perfmon
++event:0x03 counters:0,1,2,3 um:x02 minimum:200000 name:LOAD_BLOCK : Loads that partially overlap an earlier store
++event:0x04 counters:0,1,2,3 um:x07 minimum:200000 name:SB_DRAIN : All Store buffer stall cycles
++event:0x05 counters:0,1,2,3 um:x02 minimum:200000 name:MISALIGN_MEM_REF : Misaligned store references
++event:0x06 counters:0,1,2,3 um:store_blocks minimum:200000 name:STORE_BLOCKS : Loads delayed with at-Retirement block code
++event:0x07 counters:0,1,2,3 um:x01 minimum:200000 name:PARTIAL_ADDRESS_ALIAS : False dependencies due to partial address aliasing
++event:0x08 counters:0,1,2,3 um:dtlb_load_misses minimum:200000 name:DTLB_LOAD_MISSES : DTLB load misses
++event:0x0b counters:0,1,2,3 um:mem_inst_retired minimum:2000000 name:MEM_INST_RETIRED : Memory instructions retired above 0 clocks (Precise Event)
++event:0x0c counters:0,1,2,3 um:x01 minimum:200000 name:MEM_STORE_RETIRED : Retired stores that miss the DTLB (Precise Event)
++event:0x0e counters:0,1,2,3 um:uops_issued minimum:2000000 name:UOPS_ISSUED : Uops issued
++event:0x0f counters:0,1,2,3 um:mem_uncore_retired minimum:40000 name:MEM_UNCORE_RETIRED : Load instructions retired that HIT modified data in sibling core (Precise Event)
++event:0x10 counters:0,1,2,3 um:fp_comp_ops_exe minimum:2000000 name:FP_COMP_OPS_EXE : MMX Uops
++event:0x12 counters:0,1,2,3 um:simd_int_128 minimum:200000 name:SIMD_INT_128 : 128 bit SIMD integer pack operations
++event:0x13 counters:0,1,2,3 um:load_dispatch minimum:2000000 name:LOAD_DISPATCH : All loads dispatched
++event:0x14 counters:0,1,2,3 um:arith minimum:2000000 name:ARITH : Cycles the divider is busy
++event:0x17 counters:0,1,2,3 um:x01 minimum:2000000 name:INST_QUEUE_WRITES : Instructions written to instruction queue.
++event:0x18 counters:0,1,2,3 um:x01 minimum:2000000 name:INST_DECODED : Instructions that must be decoded by decoder 0
++event:0x19 counters:0,1,2,3 um:x01 minimum:2000000 name:TWO_UOP_INSTS_DECODED : Two Uop instructions decoded
++event:0x1e counters:0,1,2,3 um:x01 minimum:2000000 name:INST_QUEUE_WRITE_CYCLES : Cycles instructions are written to the instruction queue
++event:0x20 counters:0,1,2,3 um:x01 minimum:2000000 name:LSD_OVERFLOW : Loops that can't stream from the instruction queue
++event:0x24 counters:0,1,2,3 um:l2_rqsts minimum:200000 name:L2_RQSTS : L2 instruction fetch hits
++event:0x26 counters:0,1,2,3 um:l2_data_rqsts minimum:200000 name:L2_DATA_RQSTS : All L2 data requests
++event:0x27 counters:0,1,2,3 um:l2_write minimum:100000 name:L2_WRITE : L2 demand lock RFOs in E state
++event:0x28 counters:0,1,2,3 um:l1d_wb_l2 minimum:100000 name:L1D_WB_L2 : L1 writebacks to L2 in E state
++event:0x2e counters:0,1,2,3 um:longest_lat_cache minimum:100000 name:LONGEST_LAT_CACHE : Longest latency cache miss
++event:0x3c counters:0,1,2,3 um:cpu_clk_unhalted minimum:100000 name:CPU_CLK_UNHALTED : Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)
++event:0x49 counters:0,1,2,3 um:dtlb_misses minimum:200000 name:DTLB_MISSES : DTLB misses
++event:0x4c counters:0,1 um:x01 minimum:200000 name:LOAD_HIT_PRE : Load operations conflicting with software prefetches
++event:0x4e counters:0,1 um:l1d_prefetch minimum:200000 name:L1D_PREFETCH : L1D hardware prefetch misses
++event:0x4f counters:0,1,2,3 um:x10 minimum:2000000 name:EPT : Extended Page Table walk cycles
++event:0x51 counters:0,1 um:l1d minimum:2000000 name:L1D : L1D cache lines replaced in M state
++event:0x52 counters:0,1 um:x01 minimum:2000000 name:L1D_CACHE_PREFETCH_LOCK_FB_HIT : L1D prefetch load lock accepted in fill buffer
++event:0x60 counters:0 um:offcore_requests_outstanding minimum:2000000 name:OFFCORE_REQUESTS_OUTSTANDING : Outstanding offcore reads
++event:0x63 counters:0,1 um:cache_lock_cycles minimum:2000000 name:CACHE_LOCK_CYCLES : Cycles L1D locked
++event:0x6c counters:0,1,2,3 um:x01 minimum:2000000 name:IO_TRANSACTIONS : I/O transactions
++event:0x80 counters:0,1,2,3 um:l1i minimum:2000000 name:L1I : L1I instruction fetch stall cycles
++event:0x82 counters:0,1,2,3 um:x01 minimum:200000 name:LARGE_ITLB : Large ITLB hit
++event:0x85 counters:0,1,2,3 um:itlb_misses minimum:200000 name:ITLB_MISSES : ITLB miss
++event:0x87 counters:0,1,2,3 um:ild_stall minimum:2000000 name:ILD_STALL : Any Instruction Length Decoder stall cycles
++event:0x88 counters:0,1,2,3 um:br_inst_exec minimum:200000 name:BR_INST_EXEC : Branch instructions executed
++event:0x89 counters:0,1,2,3 um:br_misp_exec minimum:20000 name:BR_MISP_EXEC : Mispredicted branches executed
++event:0xa2 counters:0,1,2,3 um:resource_stalls minimum:2000000 name:RESOURCE_STALLS : Resource related stall cycles
++event:0xa6 counters:0,1,2,3 um:x01 minimum:2000000 name:MACRO_INSTS : Macro-fused instructions decoded
++event:0xa7 counters:0,1,2,3 um:x01 minimum:2000000 name:BACLEAR_FORCE_IQ : Instruction queue forced BACLEAR
++event:0xa8 counters:0,1,2,3 um:x01 minimum:2000000 name:LSD : Cycles when uops were delivered by the LSD
++event:0xae counters:0,1,2,3 um:x01 minimum:2000000 name:ITLB_FLUSH : ITLB flushes
++event:0xb0 counters:0,1,2,3 um:offcore_requests minimum:100000 name:OFFCORE_REQUESTS : All offcore requests
++event:0xb1 counters:0,1,2,3 um:uops_executed minimum:2000000 name:UOPS_EXECUTED : Cycles Uops executed on any port (core count)
++event:0xb2 counters:0,1,2,3 um:x01 minimum:100000 name:OFFCORE_REQUESTS_SQ_FULL : Offcore requests blocked due to Super Queue full
++event:0xb3 counters:0 um:snoopq_requests_outstanding minimum:2000000 name:SNOOPQ_REQUESTS_OUTSTANDING : Outstanding snoop code requests
++event:0xb4 counters:0,1,2,3 um:snoopq_requests minimum:100000 name:SNOOPQ_REQUESTS : Snoop code requests
++event:0xb7 counters:2 um:x01 minimum:100000 name:OFFCORE_RESPONSE_ANY_DATA : REQUEST = ANY_DATA read and RESPONSE = ANY_CACHE_DRAM
++event:0xb8 counters:0,1,2,3 um:snoop_response minimum:100000 name:SNOOP_RESPONSE : Thread responded HIT to snoop
++event:0xbb counters:1 um:x01 minimum:100000 name:OFFCORE_RESPONSE_ANY_DATA : REQUEST = ANY_DATA read and RESPONSE = ANY_CACHE_DRAM
++event:0xc0 counters:0,1,2,3 um:inst_retired minimum:2000000 name:INST_RETIRED : Instructions retired (Programmable counter and Precise Event)
++event:0xc2 counters:0,1,2,3 um:uops_retired minimum:2000000 name:UOPS_RETIRED : Cycles Uops are being retired
++event:0xc3 counters:0,1,2,3 um:machine_clears minimum:20000 name:MACHINE_CLEARS : Cycles machine clear asserted
++event:0xc4 counters:0,1,2,3 um:br_inst_retired minimum:200000 name:BR_INST_RETIRED : Retired branch instructions (Precise Event)
++event:0xc5 counters:0,1,2,3 um:br_misp_retired minimum:20000 name:BR_MISP_RETIRED : Mispredicted retired branch instructions (Precise Event)
++event:0xc7 counters:0,1,2,3 um:ssex_uops_retired minimum:200000 name:SSEX_UOPS_RETIRED : SIMD Packed-Double Uops retired (Precise Event)
++event:0xc8 counters:0,1,2,3 um:x20 minimum:200000 name:ITLB_MISS_RETIRED : Retired instructions that missed the ITLB (Precise Event)
++event:0xcb counters:0,1,2,3 um:mem_load_retired minimum:200000 name:MEM_LOAD_RETIRED : Retired loads that miss the DTLB (Precise Event)
++event:0xcc counters:0,1,2,3 um:fp_mmx_trans minimum:2000000 name:FP_MMX_TRANS : All Floating Point to and from MMX transitions
++event:0xd0 counters:0,1,2,3 um:x01 minimum:2000000 name:MACRO_INSTS : Instructions decoded
++event:0xd1 counters:0,1,2,3 um:uops_decoded minimum:2000000 name:UOPS_DECODED : Stack pointer instructions decoded
++event:0xd2 counters:0,1,2,3 um:rat_stalls minimum:2000000 name:RAT_STALLS : All RAT stall cycles
++event:0xd4 counters:0,1,2,3 um:x01 minimum:2000000 name:SEG_RENAME_STALLS : Segment rename stall cycles
++event:0xd5 counters:0,1,2,3 um:x01 minimum:2000000 name:ES_REG_RENAMES : ES segment renames
++event:0xdb counters:0,1,2,3 um:x01 minimum:2000000 name:UOP_UNFUSION : Uop unfusions due to FP exceptions
++event:0xe0 counters:0,1,2,3 um:x01 minimum:2000000 name:BR_INST_DECODED : Branch instructions decoded
++event:0xe5 counters:0,1,2,3 um:x01 minimum:2000000 name:BPU_MISSED_CALL_RET : Branch prediction unit missed call or return
++event:0xe6 counters:0,1,2,3 um:baclear minimum:2000000 name:BACLEAR : BACLEAR asserted with bad target address
++event:0xe8 counters:0,1,2,3 um:bpu_clears minimum:2000000 name:BPU_CLEARS : Early Branch Prediction Unit clears
++event:0xf0 counters:0,1,2,3 um:l2_transactions minimum:200000 name:L2_TRANSACTIONS : All L2 transactions
++event:0xf1 counters:0,1,2,3 um:l2_lines_in minimum:100000 name:L2_LINES_IN : L2 lines alloacated
++event:0xf2 counters:0,1,2,3 um:l2_lines_out minimum:100000 name:L2_LINES_OUT : L2 lines evicted
++event:0xf4 counters:0,1,2,3 um:sq_misc minimum:2000000 name:SQ_MISC : Super Queue LRU hints sent to LLC
++event:0xf6 counters:0,1,2,3 um:x01 minimum:2000000 name:SQ_FULL_STALL_CYCLES : Super Queue full stall cycles
++event:0xf7 counters:0,1,2,3 um:fp_assist minimum:20000 name:FP_ASSIST : X87 Floating point assists (Precise Event)
++event:0xfd counters:0,1,2,3 um:simd_int_64 minimum:200000 name:SIMD_INT_64 : SIMD integer 64 bit pack operations
+--- /dev/null 2010-12-20 18:01:43.861988919 -0500
++++ oprofile/events/i386/westmere/unit_masks 2011-01-03 14:38:44.973013699 -0500
+@@ -0,0 +1,307 @@
++#
++# Unit masks for the Intel "Westmere" micro architecture
++#
++# See http://ark.intel.com/ for help in identifying Westmere based CPUs
++#
++include:i386/arch_perfmon
++
++name:x01 type:mandatory default:0x01
++ 0x01 No unit mask
++name:x02 type:mandatory default:0x02
++ 0x02 No unit mask
++name:x07 type:mandatory default:0x07
++ 0x07 No unit mask
++name:x10 type:mandatory default:0x10
++ 0x10 No unit mask
++name:x20 type:mandatory default:0x20
++ 0x20 No unit mask
++name:arith type:bitmask default:0x01
++ 0x01 cycles_div_busy Cycles the divider is busy
++ 0x02 mul Multiply operations executed
++name:baclear type:bitmask default:0x01
++ 0x01 clear BACLEAR asserted, regardless of cause
++ 0x02 bad_target BACLEAR asserted with bad target address
++name:bpu_clears type:bitmask default:0x01
++ 0x01 early Early Branch Prediction Unit clears
++ 0x02 late Late Branch Prediction Unit clears
++name:br_inst_exec type:bitmask default:0x7f
++ 0x01 cond Conditional branch instructions executed
++ 0x02 direct Unconditional branches executed
++ 0x04 indirect_non_call Indirect non call branches executed
++ 0x07 non_calls All non call branches executed
++ 0x08 return_near Indirect return branches executed
++ 0x10 direct_near_call Unconditional call branches executed
++ 0x20 indirect_near_call Indirect call branches executed
++ 0x30 near_calls Call branches executed
++ 0x40 taken Taken branches executed
++ 0x7f any Branch instructions executed
++name:br_inst_retired type:bitmask default:0x04
++ 0x01 conditional Retired conditional branch instructions (Precise Event)
++ 0x02 near_call Retired near call instructions (Precise Event)
++ 0x04 all_branches Retired branch instructions (Precise Event)
++name:br_misp_exec type:bitmask default:0x7f
++ 0x01 cond Mispredicted conditional branches executed
++ 0x02 direct Mispredicted unconditional branches executed
++ 0x04 indirect_non_call Mispredicted indirect non call branches executed
++ 0x07 non_calls Mispredicted non call branches executed
++ 0x08 return_near Mispredicted return branches executed
++ 0x10 direct_near_call Mispredicted non call branches executed
++ 0x20 indirect_near_call Mispredicted indirect call branches executed
++ 0x30 near_calls Mispredicted call branches executed
++ 0x40 taken Mispredicted taken branches executed
++ 0x7f any Mispredicted branches executed
++name:br_misp_retired type:bitmask default:0x04
++ 0x01 conditional Mispredicted conditional retired branches (Precise Event)
++ 0x02 near_call Mispredicted near retired calls (Precise Event)
++ 0x04 all_branches Mispredicted retired branch instructions (Precise Event)
++name:cache_lock_cycles type:bitmask default:0x01
++ 0x01 l1d_l2 Cycles L1D and L2 locked
++ 0x02 l1d Cycles L1D locked
++name:cpu_clk_unhalted type:bitmask default:0x00
++ 0x00 thread_p Cycles when thread is not halted (programmable counter)
++ 0x01 ref_p Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)
++name:dtlb_load_misses type:bitmask default:0x01
++ 0x01 any DTLB load misses
++ 0x02 walk_completed DTLB load miss page walks complete
++ 0x04 walk_cycles DTLB load miss page walk cycles
++ 0x10 stlb_hit DTLB second level hit
++ 0x20 pde_miss DTLB load miss caused by low part of address
++ 0x80 large_walk_completed DTLB load miss large page walks
++name:dtlb_misses type:bitmask default:0x01
++ 0x01 any DTLB misses
++ 0x02 walk_completed DTLB miss page walks
++ 0x04 walk_cycles DTLB miss page walk cycles
++ 0x10 stlb_hit DTLB first level misses but second level hit
++ 0x20 pde_miss DTLB misses casued by low part of address
++ 0x80 large_walk_completed DTLB miss large page walks
++name:fp_assist type:bitmask default:0x01
++ 0x01 all X87 Floating point assists (Precise Event)
++ 0x02 output X87 Floating point assists for invalid output value (Precise Event)
++ 0x04 input X87 Floating poiint assists for invalid input value (Precise Event)
++name:fp_comp_ops_exe type:bitmask default:0x01
++ 0x01 x87 Computational floating-point operations executed
++ 0x02 mmx MMX Uops
++ 0x04 sse_fp SSE and SSE2 FP Uops
++ 0x08 sse2_integer SSE2 integer Uops
++ 0x10 sse_fp_packed SSE FP packed Uops
++ 0x20 sse_fp_scalar SSE FP scalar Uops
++ 0x40 sse_single_precision SSE* FP single precision Uops
++ 0x80 sse_double_precision SSE* FP double precision Uops
++name:fp_mmx_trans type:bitmask default:0x03
++ 0x01 to_fp Transitions from MMX to Floating Point instructions
++ 0x02 to_mmx Transitions from Floating Point to MMX instructions
++ 0x03 any All Floating Point to and from MMX transitions
++name:ild_stall type:bitmask default:0x0f
++ 0x01 lcp Length Change Prefix stall cycles
++ 0x02 mru Stall cycles due to BPU MRU bypass
++ 0x04 iq_full Instruction Queue full stall cycles
++ 0x08 regen Regen stall cycles
++ 0x0f any Any Instruction Length Decoder stall cycles
++name:inst_retired type:bitmask default:0x01
++ 0x01 any_p Instructions retired (Programmable counter and Precise Event)
++ 0x02 x87 Retired floating-point operations (Precise Event)
++ 0x04 mmx Retired MMX instructions (Precise Event)
++name:itlb_misses type:bitmask default:0x01
++ 0x01 any ITLB miss
++ 0x02 walk_completed ITLB miss page walks
++ 0x04 walk_cycles ITLB miss page walk cycles
++ 0x80 large_walk_completed ITLB miss large page walks
++name:l1d type:bitmask default:0x01
++ 0x01 repl L1 data cache lines allocated
++ 0x02 m_repl L1D cache lines allocated in the M state
++ 0x04 m_evict L1D cache lines replaced in M state
++ 0x08 m_snoop_evict L1D snoop eviction of cache lines in M state
++name:l1d_prefetch type:bitmask default:0x01
++ 0x01 requests L1D hardware prefetch requests
++ 0x02 miss L1D hardware prefetch misses
++ 0x04 triggers L1D hardware prefetch requests triggered
++name:l1d_wb_l2 type:bitmask default:0x0f
++ 0x01 i_state L1 writebacks to L2 in I state (misses)
++ 0x02 s_state L1 writebacks to L2 in S state
++ 0x04 e_state L1 writebacks to L2 in E state
++ 0x08 m_state L1 writebacks to L2 in M state
++ 0x0f mesi All L1 writebacks to L2
++name:l1i type:bitmask default:0x01
++ 0x01 hits L1I instruction fetch hits
++ 0x02 misses L1I instruction fetch misses
++ 0x03 reads L1I Instruction fetches
++ 0x04 cycles_stalled L1I instruction fetch stall cycles
++name:l2_data_rqsts type:bitmask default:0xff
++ 0x01 demand_i_state L2 data demand loads in I state (misses)
++ 0x02 demand_s_state L2 data demand loads in S state
++ 0x04 demand_e_state L2 data demand loads in E state
++ 0x08 demand_m_state L2 data demand loads in M state
++ 0x0f demand_mesi L2 data demand requests
++ 0x10 prefetch_i_state L2 data prefetches in the I state (misses)
++ 0x20 prefetch_s_state L2 data prefetches in the S state
++ 0x40 prefetch_e_state L2 data prefetches in E state
++ 0x80 prefetch_m_state L2 data prefetches in M state
++ 0xf0 prefetch_mesi All L2 data prefetches
++ 0xff any All L2 data requests
++name:l2_lines_in type:bitmask default:0x07
++ 0x02 s_state L2 lines allocated in the S state
++ 0x04 e_state L2 lines allocated in the E state
++ 0x07 any L2 lines alloacated
++name:l2_lines_out type:bitmask default:0x0f
++ 0x01 demand_clean L2 lines evicted by a demand request
++ 0x02 demand_dirty L2 modified lines evicted by a demand request
++ 0x04 prefetch_clean L2 lines evicted by a prefetch request
++ 0x08 prefetch_dirty L2 modified lines evicted by a prefetch request
++ 0x0f any L2 lines evicted
++name:l2_rqsts type:bitmask default:0x01
++ 0x01 ld_hit L2 load hits
++ 0x02 ld_miss L2 load misses
++ 0x03 loads L2 requests
++ 0x04 rfo_hit L2 RFO hits
++ 0x08 rfo_miss L2 RFO misses
++ 0x0c rfos L2 RFO requests
++ 0x10 ifetch_hit L2 instruction fetch hits
++ 0x20 ifetch_miss L2 instruction fetch misses
++ 0x30 ifetches L2 instruction fetches
++ 0x40 prefetch_hit L2 prefetch hits
++ 0x80 prefetch_miss L2 prefetch misses
++ 0xaa miss All L2 misses
++ 0xc0 prefetches All L2 prefetches
++ 0xff references All L2 requests
++name:l2_transactions type:bitmask default:0x80
++ 0x01 load L2 Load transactions
++ 0x02 rfo L2 RFO transactions
++ 0x04 ifetch L2 instruction fetch transactions
++ 0x08 prefetch L2 prefetch transactions
++ 0x10 l1d_wb L1D writeback to L2 transactions
++ 0x20 fill L2 fill transactions
++ 0x40 wb L2 writeback to LLC transactions
++ 0x80 any All L2 transactions
++name:l2_write type:bitmask default:0x01
++ 0x01 rfo_i_state L2 demand store RFOs in I state (misses)
++ 0x02 rfo_s_state L2 demand store RFOs in S state
++ 0x08 rfo_m_state L2 demand store RFOs in M state
++ 0x0e rfo_hit All L2 demand store RFOs that hit the cache
++ 0x0f rfo_mesi All L2 demand store RFOs
++ 0x10 lock_i_state L2 demand lock RFOs in I state (misses)
++ 0x20 lock_s_state L2 demand lock RFOs in S state
++ 0x40 lock_e_state L2 demand lock RFOs in E state
++ 0x80 lock_m_state L2 demand lock RFOs in M state
++ 0xe0 lock_hit All demand L2 lock RFOs that hit the cache
++ 0xf0 lock_mesi All demand L2 lock RFOs
++name:load_dispatch type:bitmask default:0x07
++ 0x01 rs Loads dispatched that bypass the MOB
++ 0x02 rs_delayed Loads dispatched from stage 305
++ 0x04 mob Loads dispatched from the MOB
++ 0x07 any All loads dispatched
++name:longest_lat_cache type:bitmask default:0x01
++ 0x01 miss Longest latency cache miss
++ 0x02 reference Longest latency cache reference
++name:machine_clears type:bitmask default:0x01
++ 0x01 cycles Cycles machine clear asserted
++ 0x02 mem_order Execution pipeline restart due to Memory ordering conflicts
++ 0x04 smc Self-Modifying Code detected
++name:mem_inst_retired type:bitmask default:0x01
++ 0x01 loads Instructions retired which contains a load (Precise Event)
++ 0x02 stores Instructions retired which contains a store (Precise Event)
++ 0x10 latency_above_threshold_0 Memory instructions retired above 0 clocks (Precise Event) (MSR_INDEX: 0x03F6 MSR_VALUE: 0x0000)
++name:mem_load_retired type:bitmask default:0x01
++ 0x01 l1d_hit Retired loads that hit the L1 data cache (Precise Event)
++ 0x02 l2_hit Retired loads that hit the L2 cache (Precise Event)
++ 0x04 llc_unshared_hit Retired loads that hit valid versions in the LLC cache (Precise Event)
++ 0x08 other_core_l2_hit_hitm Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)
++ 0x10 llc_miss Retired loads that miss the LLC cache (Precise Event)
++ 0x40 hit_lfb Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)
++ 0x80 dtlb_miss Retired loads that miss the DTLB (Precise Event)
++name:mem_uncore_retired type:bitmask default:0x02
++ 0x02 local_hitm Load instructions retired that HIT modified data in sibling core (Precise Event)
++ 0x04 remote_hitm Retired loads that hit remote socket in modified state (Precise Event)
++ 0x08 local_dram_and_remote_cache_hit Load instructions retired local dram and remote cache HIT data sources (Precise Event)
++ 0x10 remote_dram Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)
++ 0x80 uncacheable Load instructions retired IO (Precise Event)
++name:offcore_requests type:bitmask default:0x80
++ 0x01 demand_read_data Offcore demand data read requests
++ 0x02 demand_read_code Offcore demand code read requests
++ 0x04 demand_rfo Offcore demand RFO requests
++ 0x08 any_read Offcore read requests
++ 0x10 any_rfo Offcore RFO requests
++ 0x40 l1d_writeback Offcore L1 data cache writebacks
++ 0x80 any All offcore requests
++name:offcore_requests_outstanding type:bitmask default:0x08
++ 0x01 demand_read_data Outstanding offcore demand data reads
++ 0x02 demand_read_code Outstanding offcore demand code reads
++ 0x04 demand_rfo Outstanding offcore demand RFOs
++ 0x08 any_read Outstanding offcore reads
++name:rat_stalls type:bitmask default:0x0f
++ 0x01 flags Flag stall cycles
++ 0x02 registers Partial register stall cycles
++ 0x04 rob_read_port ROB read port stalls cycles
++ 0x08 scoreboard Scoreboard stall cycles
++ 0x0f any All RAT stall cycles
++name:resource_stalls type:bitmask default:0x01
++ 0x01 any Resource related stall cycles
++ 0x02 load Load buffer stall cycles
++ 0x04 rs_full Reservation Station full stall cycles
++ 0x08 store Store buffer stall cycles
++ 0x10 rob_full ROB full stall cycles
++ 0x20 fpcw FPU control word write stall cycles
++ 0x40 mxcsr MXCSR rename stall cycles
++ 0x80 other Other Resource related stall cycles
++name:simd_int_128 type:bitmask default:0x01
++ 0x01 packed_mpy 128 bit SIMD integer multiply operations
++ 0x02 packed_shift 128 bit SIMD integer shift operations
++ 0x04 pack 128 bit SIMD integer pack operations
++ 0x08 unpack 128 bit SIMD integer unpack operations
++ 0x10 packed_logical 128 bit SIMD integer logical operations
++ 0x20 packed_arith 128 bit SIMD integer arithmetic operations
++ 0x40 shuffle_move 128 bit SIMD integer shuffle/move operations
++name:simd_int_64 type:bitmask default:0x01
++ 0x01 packed_mpy SIMD integer 64 bit packed multiply operations
++ 0x02 packed_shift SIMD integer 64 bit shift operations
++ 0x04 pack SIMD integer 64 bit pack operations
++ 0x08 unpack SIMD integer 64 bit unpack operations
++ 0x10 packed_logical SIMD integer 64 bit logical operations
++ 0x20 packed_arith SIMD integer 64 bit arithmetic operations
++ 0x40 shuffle_move SIMD integer 64 bit shuffle/move operations
++name:snoopq_requests type:bitmask default:0x01
++ 0x01 data Snoop data requests
++ 0x02 invalidate Snoop invalidate requests
++ 0x04 code Snoop code requests
++name:snoopq_requests_outstanding type:bitmask default:0x01
++ 0x01 data Outstanding snoop data requests
++ 0x02 invalidate Outstanding snoop invalidate requests
++ 0x04 code Outstanding snoop code requests
++name:snoop_response type:bitmask default:0x01
++ 0x01 hit Thread responded HIT to snoop
++ 0x02 hite Thread responded HITE to snoop
++ 0x04 hitm Thread responded HITM to snoop
++name:sq_misc type:bitmask default:0x04
++ 0x04 lru_hints Super Queue LRU hints sent to LLC
++ 0x10 split_lock Super Queue lock splits across a cache line
++name:ssex_uops_retired type:bitmask default:0x01
++ 0x01 packed_single SIMD Packed-Single Uops retired (Precise Event)
++ 0x02 scalar_single SIMD Scalar-Single Uops retired (Precise Event)
++ 0x04 packed_double SIMD Packed-Double Uops retired (Precise Event)
++ 0x08 scalar_double SIMD Scalar-Double Uops retired (Precise Event)
++ 0x10 vector_integer SIMD Vector Integer Uops retired (Precise Event)
++name:store_blocks type:bitmask default:0x04
++ 0x04 at_ret Loads delayed with at-Retirement block code
++ 0x08 l1d_block Cacheable loads delayed with L1D block code
++name:uops_decoded type:bitmask default:0x01
++ 0x01 stall_cycles Cycles no Uops are decoded
++ 0x02 ms_cycles_active Uops decoded by Microcode Sequencer
++ 0x04 esp_folding Stack pointer instructions decoded
++ 0x08 esp_sync Stack pointer sync operations
++name:uops_executed type:bitmask default:0x3f
++ 0x01 port0 Uops executed on port 0
++ 0x02 port1 Uops executed on port 1
++ 0x04 port2_core Uops executed on port 2 (core count)
++ 0x08 port3_core Uops executed on port 3 (core count)
++ 0x10 port4_core Uops executed on port 4 (core count)
++ 0x1f core_active_cycles_no_port5 Cycles Uops executed on ports 0-4 (core count)
++ 0x20 port5 Uops executed on port 5
++ 0x3f core_active_cycles Cycles Uops executed on any port (core count)
++ 0x40 port015 Uops issued on ports 0, 1 or 5
++ 0x80 port234_core Uops issued on ports 2, 3 or 4
++name:uops_issued type:bitmask default:0x01
++ 0x01 any Uops issued
++ 0x02 fused Fused Uops issued
++name:uops_retired type:bitmask default:0x01
++ 0x01 active_cycles Cycles Uops are being retired
++ 0x02 retire_slots Retirement slots used (Precise Event)
++ 0x04 macro_fused Macro-fused Uops retired (Precise Event)
+Index: oprofile/libop/op_cpu_type.c
+diff -u oprofile/libop/op_cpu_type.c:1.59 oprofile/libop/op_cpu_type.c:1.60
+--- oprofile/libop/op_cpu_type.c:1.59 Wed Dec 15 20:31:09 2010
++++ oprofile/libop/op_cpu_type.c Thu Dec 16 17:28:54 2010
+@@ -90,6 +90,7 @@
+ { "AMD64 family12h", "x86-64/family12h", CPU_FAMILY12H, 4 },
+ { "AMD64 family14h", "x86-64/family14h", CPU_FAMILY14H, 4 },
+ { "AMD64 family15h", "x86-64/family15h", CPU_FAMILY15H, 6 },
++ { "Intel Westmere microarchitecture", "i386/westmere", CPU_WESTMERE, 4 },
+ };
+
+ static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr);
+@@ -113,6 +114,7 @@
+ case CPU_CORE_I7:
+ case CPU_ATOM:
+ case CPU_NEHALEM:
++ case CPU_WESTMERE:
+ return CPU_ARCH_PERFMON;
+ default:
+ /* assume processor in a class by itself */
+Index: oprofile/libop/op_cpu_type.h
+diff -u oprofile/libop/op_cpu_type.h:1.52 oprofile/libop/op_cpu_type.h:1.53
+--- oprofile/libop/op_cpu_type.h:1.52 Wed Dec 15 20:31:09 2010
++++ oprofile/libop/op_cpu_type.h Thu Dec 16 17:28:54 2010
+@@ -87,6 +87,7 @@
+ CPU_FAMILY12H, /**< AMD family 12h */
+ CPU_FAMILY14H, /**< AMD family 14h */
+ CPU_FAMILY15H, /**< AMD family 15h */
++ CPU_WESTMERE, /* Intel Westmere microarchitecture */
+ MAX_CPU_TYPE
+ } op_cpu;
+
+Index: oprofile/libop/op_events.c
+diff -u oprofile/libop/op_events.c:1.102 oprofile/libop/op_events.c:1.103
+--- oprofile/libop/op_events.c:1.102 Mon Nov 29 14:52:36 2010
++++ oprofile/libop/op_events.c Thu Dec 16 17:28:54 2010
+@@ -971,6 +971,7 @@
+ case CPU_ATOM:
+ case CPU_CORE_I7:
+ case CPU_NEHALEM:
++ case CPU_WESTMERE:
+ case CPU_FAMILY12H:
+ case CPU_FAMILY14H:
+ case CPU_FAMILY15H:
+Index: oprofile/libop/op_hw_specific.h
+diff -u oprofile/libop/op_hw_specific.h:1.4 oprofile/libop/op_hw_specific.h:1.5
+--- oprofile/libop/op_hw_specific.h:1.4 Wed Dec 15 20:31:09 2010
++++ oprofile/libop/op_hw_specific.h Thu Dec 16 17:28:54 2010
+@@ -112,6 +112,8 @@
+ return CPU_CORE_I7;
+ case 0x1c:
+ return CPU_ATOM;
++ case 0x25:
++ return CPU_WESTMERE;
+ }
+ }
+ return cpu_type;
+Index: oprofile/utils/ophelp.c
+diff -u oprofile/utils/ophelp.c:1.39 oprofile/utils/ophelp.c:1.40
+--- oprofile/utils/ophelp.c:1.39 Mon Nov 29 14:52:36 2010
++++ oprofile/utils/ophelp.c Thu Dec 16 17:28:54 2010
+@@ -496,6 +496,7 @@
+ case CPU_CORE_2:
+ case CPU_CORE_I7:
+ case CPU_NEHALEM:
++ case CPU_WESTMERE:
+ case CPU_ATOM:
+ event_doc =
+ "See Intel Architecture Developer's Manual Volume 3B, Appendix A and\n"
diff --git a/oprofile.spec b/oprofile.spec
index 44a4fac..f891c72 100644
--- a/oprofile.spec
+++ b/oprofile.spec
@@ -1,7 +1,7 @@
Summary: System wide profiler
Name: oprofile
Version: 0.9.6
-Release: 6%{?dist}
+Release: 10%{?dist}
License: GPLv2
Group: Development/System
#
@@ -15,6 +15,11 @@ Patch83: oprofile-0.9.3-xen.patch
#Patch104: oprofile-jvmpi-lgpl.patch
#Patch105: oprofile-0.9.5-timer.patch
Patch106: oprofile-sect.patch
+Patch120: oprofile-iaperf.patch
+Patch121: oprofile-nehalem.patch
+Patch122: oprofile-amd.patch
+Patch123: oprofile-westmere.patch
+Patch124: oprofile-check.patch
URL: http://oprofile.sf.net
@@ -83,6 +88,11 @@ agent library.
%patch10 -p1 -b .guess2
%patch63 -p1 -b .libs
%patch106 -p1 -b .sect
+%patch120 -p1
+%patch121 -p1
+%patch122 -p1
+%patch123 -p1
+%patch124 -p1
./autogen.sh
@@ -230,6 +240,13 @@ exit 0
/etc/ld.so.conf.d/*
%changelog
+* Thu Jan 6 2011 Will Cohen <wcohen at redhat.com> - 0.9.6-10
+- Corrections for i386/arch_perfmon filters.
+- Make nehalem events available.
+- Add AMD family 12/14/15h support.
+- Add Intel westemere support.
+- opcontrol numeric argument checking.
+
* Wed Apr 21 2010 Will Cohen <wcohen at redhat.com> - 0.9.6-6
- Bump version and rebuild.
More information about the scm-commits
mailing list