]> bbs.cooldavid.org Git - net-next-2.6.git/commitdiff
Merge branch 'perf/urgent' into perf/core
authorIngo Molnar <mingo@elte.hu>
Tue, 5 Oct 2010 07:47:14 +0000 (09:47 +0200)
committerIngo Molnar <mingo@elte.hu>
Tue, 5 Oct 2010 07:47:14 +0000 (09:47 +0200)
Conflicts:
tools/perf/util/ui/browsers/hists.c

Merge reason: fix the conflict and merge in changes for dependent patch.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
1  2 
arch/arm/oprofile/common.c
arch/x86/kernel/Makefile
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_p4.c
tools/perf/Makefile

index aad63e611b36a90bd156c8e1a0caed99cd2182be,72e09eb642dd7c7a00d47fce210bb1290e926fd9..d1fb5b2f5218c10cce1ca88450ef6dc09ad66de2
@@@ -96,12 -96,13 +96,13 @@@ static int op_create_counter(int cpu, i
                return ret;
  
        pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
 -                                                cpu, -1,
 +                                                cpu, NULL,
                                                  op_overflow_handler);
  
        if (IS_ERR(pevent)) {
                ret = PTR_ERR(pevent);
        } else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
+               perf_event_release_kernel(pevent);
                pr_warning("oprofile: failed to enable event %d "
                                "on CPU %d\n", event, cpu);
                ret = -EBUSY;
@@@ -365,6 -366,7 +366,7 @@@ int __init oprofile_arch_init(struct op
        ret = init_driverfs();
        if (ret) {
                kfree(counter_config);
+               counter_config = NULL;
                return ret;
        }
  
@@@ -402,7 -404,6 +404,6 @@@ void oprofile_arch_exit(void
        struct perf_event *event;
  
        if (*perf_events) {
-               exit_driverfs();
                for_each_possible_cpu(cpu) {
                        for (id = 0; id < perf_num_counters; ++id) {
                                event = perf_events[cpu][id];
                }
        }
  
-       if (counter_config)
+       if (counter_config) {
                kfree(counter_config);
+               exit_driverfs();
+       }
  }
  #else
  int __init oprofile_arch_init(struct oprofile_operations *ops)
diff --combined arch/x86/kernel/Makefile
index 24fa1718ddb964e03f13f0c5319d1ddfcd0ec29b,fedf32a8c3ecdd7a1aaed41b2884d2c9b10bf033..9d3f485e5dd0114d0e868511e89e8e423d545ff4
@@@ -11,6 -11,8 +11,8 @@@ ifdef CONFIG_FUNCTION_TRACE
  CFLAGS_REMOVE_tsc.o = -pg
  CFLAGS_REMOVE_rtc.o = -pg
  CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
+ CFLAGS_REMOVE_pvclock.o = -pg
+ CFLAGS_REMOVE_kvmclock.o = -pg
  CFLAGS_REMOVE_ftrace.o = -pg
  CFLAGS_REMOVE_early_printk.o = -pg
  endif
@@@ -32,7 -34,7 +34,7 @@@ GCOV_PROFILE_paravirt.o               := 
  obj-y                 := process_$(BITS).o signal.o entry_$(BITS).o
  obj-y                 += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
  obj-y                 += time.o ioport.o ldt.o dumpstack.o
 -obj-y                 += setup.o x86_init.o i8259.o irqinit.o
 +obj-y                 += setup.o x86_init.o i8259.o irqinit.o jump_label.o
  obj-$(CONFIG_X86_VISWS)       += visws_quirks.o
  obj-$(CONFIG_X86_32)  += probe_roms_32.o
  obj-$(CONFIG_X86_32)  += sys_i386_32.o i386_ksyms_32.o
index 0fb17050360fb846806aa7f1bc1c9b564219246e,03a5b0385ad6c4402d2192b42aa14e9bfaccc80e..6526a86616a77fefb433c61c3c219633444b9089
@@@ -102,6 -102,7 +102,7 @@@ struct cpu_hw_events 
         */
        struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
        unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+       unsigned long           running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
        int                     enabled;
  
        int                     n_events;
@@@ -530,7 -531,7 +531,7 @@@ static int x86_pmu_hw_config(struct per
  /*
   * Setup the hardware configuration for a given attr_type
   */
 -static int __hw_perf_event_init(struct perf_event *event)
 +static int __x86_pmu_event_init(struct perf_event *event)
  {
        int err;
  
@@@ -583,7 -584,7 +584,7 @@@ static void x86_pmu_disable_all(void
        }
  }
  
 -void hw_perf_disable(void)
 +static void x86_pmu_disable(struct pmu *pmu)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  
@@@ -618,7 -619,7 +619,7 @@@ static void x86_pmu_enable_all(int adde
        }
  }
  
 -static const struct pmu pmu;
 +static struct pmu pmu;
  
  static inline int is_x86_event(struct perf_event *event)
  {
@@@ -800,10 -801,10 +801,10 @@@ static inline int match_prev_assignment
                hwc->last_tag == cpuc->tags[i];
  }
  
 -static int x86_pmu_start(struct perf_event *event);
 -static void x86_pmu_stop(struct perf_event *event);
 +static void x86_pmu_start(struct perf_event *event, int flags);
 +static void x86_pmu_stop(struct perf_event *event, int flags);
  
 -void hw_perf_enable(void)
 +static void x86_pmu_enable(struct pmu *pmu)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct perf_event *event;
                            match_prev_assignment(hwc, cpuc, i))
                                continue;
  
 -                      x86_pmu_stop(event);
 +                      /*
 +                       * Ensure we don't accidentally enable a stopped
 +                       * counter simply because we rescheduled.
 +                       */
 +                      if (hwc->state & PERF_HES_STOPPED)
 +                              hwc->state |= PERF_HES_ARCH;
 +
 +                      x86_pmu_stop(event, PERF_EF_UPDATE);
                }
  
                for (i = 0; i < cpuc->n_events; i++) {
                        else if (i < n_running)
                                continue;
  
 -                      x86_pmu_start(event);
 +                      if (hwc->state & PERF_HES_ARCH)
 +                              continue;
 +
 +                      x86_pmu_start(event, PERF_EF_RELOAD);
                }
                cpuc->n_added = 0;
                perf_events_lapic_init();
@@@ -962,12 -953,15 +963,12 @@@ static void x86_pmu_enable_event(struc
  }
  
  /*
 - * activate a single event
 + * Add a single event to the PMU.
   *
   * The event is added to the group of enabled events
   * but only if it can be scehduled with existing events.
 - *
 - * Called with PMU disabled. If successful and return value 1,
 - * then guaranteed to call perf_enable() and hw_perf_enable()
   */
 -static int x86_pmu_enable(struct perf_event *event)
 +static int x86_pmu_add(struct perf_event *event, int flags)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc;
  
        hwc = &event->hw;
  
 +      perf_pmu_disable(event->pmu);
        n0 = cpuc->n_events;
 -      n = collect_events(cpuc, event, false);
 -      if (n < 0)
 -              return n;
 +      ret = n = collect_events(cpuc, event, false);
 +      if (ret < 0)
 +              goto out;
 +
 +      hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 +      if (!(flags & PERF_EF_START))
 +              hwc->state |= PERF_HES_ARCH;
  
        /*
         * If group events scheduling transaction was started,
         * skip the schedulability test here, it will be peformed
 -       * at commit time(->commit_txn) as a whole
 +       * at commit time (->commit_txn) as a whole
         */
        if (cpuc->group_flag & PERF_EVENT_TXN)
 -              goto out;
 +              goto done_collect;
  
        ret = x86_pmu.schedule_events(cpuc, n, assign);
        if (ret)
 -              return ret;
 +              goto out;
        /*
         * copy new assignment, now we know it is possible
         * will be used by hw_perf_enable()
         */
        memcpy(cpuc->assign, assign, n*sizeof(int));
  
 -out:
 +done_collect:
        cpuc->n_events = n;
        cpuc->n_added += n - n0;
        cpuc->n_txn += n - n0;
  
 -      return 0;
 +      ret = 0;
 +out:
 +      perf_pmu_enable(event->pmu);
 +      return ret;
  }
  
 -static int x86_pmu_start(struct perf_event *event)
 +static void x86_pmu_start(struct perf_event *event, int flags)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int idx = event->hw.idx;
  
 -      if (idx == -1)
 -              return -EAGAIN;
 +      if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
 +              return;
 +
 +      if (WARN_ON_ONCE(idx == -1))
 +              return;
 +
 +      if (flags & PERF_EF_RELOAD) {
 +              WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
 +              x86_perf_event_set_period(event);
 +      }
 +
 +      event->hw.state = 0;
  
 -      x86_perf_event_set_period(event);
        cpuc->events[idx] = event;
        __set_bit(idx, cpuc->active_mask);
+       __set_bit(idx, cpuc->running);
        x86_pmu.enable(event);
        perf_event_update_userpage(event);
 -
 -      return 0;
 -}
 -
 -static void x86_pmu_unthrottle(struct perf_event *event)
 -{
 -      int ret = x86_pmu_start(event);
 -      WARN_ON_ONCE(ret);
  }
  
  void perf_event_print_debug(void)
        local_irq_restore(flags);
  }
  
 -static void x86_pmu_stop(struct perf_event *event)
 +static void x86_pmu_stop(struct perf_event *event, int flags)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
 -      int idx = hwc->idx;
  
 -      if (!__test_and_clear_bit(idx, cpuc->active_mask))
 -              return;
 -
 -      x86_pmu.disable(event);
 -
 -      /*
 -       * Drain the remaining delta count out of a event
 -       * that we are disabling:
 -       */
 -      x86_perf_event_update(event);
 +      if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
 +              x86_pmu.disable(event);
 +              cpuc->events[hwc->idx] = NULL;
 +              WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 +              hwc->state |= PERF_HES_STOPPED;
 +      }
  
 -      cpuc->events[idx] = NULL;
 +      if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 +              /*
 +               * Drain the remaining delta count out of a event
 +               * that we are disabling:
 +               */
 +              x86_perf_event_update(event);
 +              hwc->state |= PERF_HES_UPTODATE;
 +      }
  }
  
 -static void x86_pmu_disable(struct perf_event *event)
 +static void x86_pmu_del(struct perf_event *event, int flags)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int i;
        if (cpuc->group_flag & PERF_EVENT_TXN)
                return;
  
 -      x86_pmu_stop(event);
 +      x86_pmu_stop(event, PERF_EF_UPDATE);
  
        for (i = 0; i < cpuc->n_events; i++) {
                if (event == cpuc->event_list[i]) {
@@@ -1159,8 -1143,16 +1161,16 @@@ static int x86_pmu_handle_irq(struct pt
        cpuc = &__get_cpu_var(cpu_hw_events);
  
        for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               if (!test_bit(idx, cpuc->active_mask))
+               if (!test_bit(idx, cpuc->active_mask)) {
+                       /*
+                        * Though we deactivated the counter some cpus
+                        * might still deliver spurious interrupts still
+                        * in flight. Catch them:
+                        */
+                       if (__test_and_clear_bit(idx, cpuc->running))
+                               handled++;
                        continue;
+               }
  
                event = cpuc->events[idx];
                hwc = &event->hw;
                        continue;
  
                if (perf_event_overflow(event, 1, &data, regs))
 -                      x86_pmu_stop(event);
 +                      x86_pmu_stop(event, 0);
        }
  
        if (handled)
@@@ -1396,6 -1388,7 +1406,6 @@@ void __init init_hw_perf_events(void
                x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
        }
        x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
 -      perf_max_events = x86_pmu.num_counters;
  
        if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
                WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
        pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
        pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
  
 +      perf_pmu_register(&pmu);
        perf_cpu_notifier(x86_pmu_notifier);
  }
  
@@@ -1445,11 -1437,10 +1455,11 @@@ static inline void x86_pmu_read(struct 
   * Set the flag to make pmu::enable() not perform the
   * schedulability test, it will be performed at commit time
   */
 -static void x86_pmu_start_txn(const struct pmu *pmu)
 +static void x86_pmu_start_txn(struct pmu *pmu)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  
 +      perf_pmu_disable(pmu);
        cpuc->group_flag |= PERF_EVENT_TXN;
        cpuc->n_txn = 0;
  }
   * Clear the flag and pmu::enable() will perform the
   * schedulability test.
   */
 -static void x86_pmu_cancel_txn(const struct pmu *pmu)
 +static void x86_pmu_cancel_txn(struct pmu *pmu)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  
         */
        cpuc->n_added -= cpuc->n_txn;
        cpuc->n_events -= cpuc->n_txn;
 +      perf_pmu_enable(pmu);
  }
  
  /*
   * Perform the group schedulability test as a whole
   * Return 0 if success
   */
 -static int x86_pmu_commit_txn(const struct pmu *pmu)
 +static int x86_pmu_commit_txn(struct pmu *pmu)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int assign[X86_PMC_IDX_MAX];
        memcpy(cpuc->assign, assign, n*sizeof(int));
  
        cpuc->group_flag &= ~PERF_EVENT_TXN;
 -
 +      perf_pmu_enable(pmu);
        return 0;
  }
  
 -static const struct pmu pmu = {
 -      .enable         = x86_pmu_enable,
 -      .disable        = x86_pmu_disable,
 -      .start          = x86_pmu_start,
 -      .stop           = x86_pmu_stop,
 -      .read           = x86_pmu_read,
 -      .unthrottle     = x86_pmu_unthrottle,
 -      .start_txn      = x86_pmu_start_txn,
 -      .cancel_txn     = x86_pmu_cancel_txn,
 -      .commit_txn     = x86_pmu_commit_txn,
 -};
 -
  /*
   * validate that we can schedule this event
   */
        return ret;
  }
  
 -const struct pmu *hw_perf_event_init(struct perf_event *event)
 +int x86_pmu_event_init(struct perf_event *event)
  {
 -      const struct pmu *tmp;
 +      struct pmu *tmp;
        int err;
  
 -      err = __hw_perf_event_init(event);
 +      switch (event->attr.type) {
 +      case PERF_TYPE_RAW:
 +      case PERF_TYPE_HARDWARE:
 +      case PERF_TYPE_HW_CACHE:
 +              break;
 +
 +      default:
 +              return -ENOENT;
 +      }
 +
 +      err = __x86_pmu_event_init(event);
        if (!err) {
                /*
                 * we temporarily connect event to its pmu
        if (err) {
                if (event->destroy)
                        event->destroy(event);
 -              return ERR_PTR(err);
        }
  
 -      return &pmu;
 +      return err;
  }
  
 -/*
 - * callchain support
 - */
 +static struct pmu pmu = {
 +      .pmu_enable     = x86_pmu_enable,
 +      .pmu_disable    = x86_pmu_disable,
  
 -static inline
 -void callchain_store(struct perf_callchain_entry *entry, u64 ip)
 -{
 -      if (entry->nr < PERF_MAX_STACK_DEPTH)
 -              entry->ip[entry->nr++] = ip;
 -}
 +      .event_init     = x86_pmu_event_init,
  
 -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
 -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
 +      .add            = x86_pmu_add,
 +      .del            = x86_pmu_del,
 +      .start          = x86_pmu_start,
 +      .stop           = x86_pmu_stop,
 +      .read           = x86_pmu_read,
 +
 +      .start_txn      = x86_pmu_start_txn,
 +      .cancel_txn     = x86_pmu_cancel_txn,
 +      .commit_txn     = x86_pmu_commit_txn,
 +};
  
 +/*
 + * callchain support
 + */
  
  static void
  backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
@@@ -1658,7 -1645,7 +1668,7 @@@ static void backtrace_address(void *dat
  {
        struct perf_callchain_entry *entry = data;
  
 -      callchain_store(entry, addr);
 +      perf_callchain_store(entry, addr);
  }
  
  static const struct stacktrace_ops backtrace_ops = {
        .walk_stack             = print_context_stack_bp,
  };
  
 -static void
 -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 +void
 +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
  {
 -      callchain_store(entry, PERF_CONTEXT_KERNEL);
 -      callchain_store(entry, regs->ip);
 +      if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 +              /* TODO: We don't support guest os callchain now */
 +              return;
 +      }
 +
 +      perf_callchain_store(entry, regs->ip);
  
        dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
  }
@@@ -1706,7 -1689,7 +1716,7 @@@ perf_callchain_user32(struct pt_regs *r
                if (fp < compat_ptr(regs->sp))
                        break;
  
 -              callchain_store(entry, frame.return_address);
 +              perf_callchain_store(entry, frame.return_address);
                fp = compat_ptr(frame.next_frame);
        }
        return 1;
@@@ -1719,20 -1702,19 +1729,20 @@@ perf_callchain_user32(struct pt_regs *r
  }
  #endif
  
 -static void
 -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 +void
 +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
  {
        struct stack_frame frame;
        const void __user *fp;
  
 -      if (!user_mode(regs))
 -              regs = task_pt_regs(current);
 +      if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 +              /* TODO: We don't support guest os callchain now */
 +              return;
 +      }
  
        fp = (void __user *)regs->bp;
  
 -      callchain_store(entry, PERF_CONTEXT_USER);
 -      callchain_store(entry, regs->ip);
 +      perf_callchain_store(entry, regs->ip);
  
        if (perf_callchain_user32(regs, entry))
                return;
                if ((unsigned long)fp < regs->sp)
                        break;
  
 -              callchain_store(entry, frame.return_address);
 +              perf_callchain_store(entry, frame.return_address);
                fp = frame.next_frame;
        }
  }
  
 -static void
 -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
 -{
 -      int is_user;
 -
 -      if (!regs)
 -              return;
 -
 -      is_user = user_mode(regs);
 -
 -      if (is_user && current->state != TASK_RUNNING)
 -              return;
 -
 -      if (!is_user)
 -              perf_callchain_kernel(regs, entry);
 -
 -      if (current->mm)
 -              perf_callchain_user(regs, entry);
 -}
 -
 -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 -{
 -      struct perf_callchain_entry *entry;
 -
 -      if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 -              /* TODO: We don't support guest os callchain now */
 -              return NULL;
 -      }
 -
 -      if (in_nmi())
 -              entry = &__get_cpu_var(pmc_nmi_entry);
 -      else
 -              entry = &__get_cpu_var(pmc_irq_entry);
 -
 -      entry->nr = 0;
 -
 -      perf_do_callchain(regs, entry);
 -
 -      return entry;
 -}
 -
  unsigned long perf_instruction_pointer(struct pt_regs *regs)
  {
        unsigned long ip;
index c70c878ee02a022b0a92a070139b57e78d3ef943,2490151739921e074085a5d4a39a6cfb783f7187..81400b93e69483e18bedf1c541753e2f915910cd
@@@ -18,8 -18,6 +18,8 @@@
  struct p4_event_bind {
        unsigned int opcode;                    /* Event code and ESCR selector */
        unsigned int escr_msr[2];               /* ESCR MSR for this event */
 +      unsigned int escr_emask;                /* valid ESCR EventMask bits */
 +      unsigned int shared;                    /* event is shared across threads */
        char cntr[2][P4_CNTR_LIMIT];            /* counter index (offset), -1 on abscence */
  };
  
@@@ -68,435 -66,231 +68,435 @@@ static struct p4_event_bind p4_event_bi
        [P4_EVENT_TC_DELIVER_MODE] = {
                .opcode         = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
                .escr_msr       = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)                 |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB)                 |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI)                 |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD)                 |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB)                 |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI)                 |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
 +              .shared         = 1,
                .cntr           = { {4, 5, -1}, {6, 7, -1} },
        },
        [P4_EVENT_BPU_FETCH_REQUEST] = {
                .opcode         = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
                .escr_msr       = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_ITLB_REFERENCE] = {
                .opcode         = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
                .escr_msr       = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT)                 |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS)                |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_MEMORY_CANCEL] = {
                .opcode         = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
                .escr_msr       = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL)           |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_MEMORY_COMPLETE] = {
                .opcode         = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
                .escr_msr       = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC)                |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_LOAD_PORT_REPLAY] = {
                .opcode         = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
                .escr_msr       = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_STORE_PORT_REPLAY] = {
                .opcode         = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
                .escr_msr       = { MSR_P4_SAAT_ESCR0 ,  MSR_P4_SAAT_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_MOB_LOAD_REPLAY] = {
                .opcode         = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
                .escr_msr       = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA)             |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD)             |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA)       |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_PAGE_WALK_TYPE] = {
                .opcode         = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
                .escr_msr       = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS)              |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
 +              .shared         = 1,
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_BSQ_CACHE_REFERENCE] = {
                .opcode         = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
                .escr_msr       = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)   |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)   |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM)   |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS)   |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE)   |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)   |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)   |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)   |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_IOQ_ALLOCATION] = {
                .opcode         = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT)             |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ)            |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE)           |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC)              |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC)              |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT)              |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP)              |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB)              |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN)                 |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER)               |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_IOQ_ACTIVE_ENTRIES] = {       /* shared ESCR */
                .opcode         = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
                .escr_msr       = { MSR_P4_FSB_ESCR1,  MSR_P4_FSB_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT)         |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ)        |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE)       |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC)          |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC)          |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT)          |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP)          |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB)          |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN)             |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER)           |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
                .cntr           = { {2, -1, -1}, {3, -1, -1} },
        },
        [P4_EVENT_FSB_DATA_ACTIVITY] = {
                .opcode         = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV)         |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)         |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER)       |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV)         |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN)         |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
 +              .shared         = 1,
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_BSQ_ALLOCATION] = {           /* shared ESCR, broken CCCR1 */
                .opcode         = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
                .escr_msr       = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0)           |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1)           |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0)            |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1)            |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE)         |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE)       |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE)      |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE)      |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE)        |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE)        |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0)           |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1)           |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
                .cntr           = { {0, -1, -1}, {1, -1, -1} },
        },
        [P4_EVENT_BSQ_ACTIVE_ENTRIES] = {       /* shared ESCR */
                .opcode         = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
                .escr_msr       = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0)       |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1)       |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0)        |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1)        |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE)     |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE)   |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE)  |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE)  |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE)    |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE)    |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0)       |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1)       |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
                .cntr           = { {2, -1, -1}, {3, -1, -1} },
        },
        [P4_EVENT_SSE_INPUT_ASSIST] = {
                .opcode         = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
 +              .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_PACKED_SP_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
 +              .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_PACKED_DP_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
 +              .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_SCALAR_SP_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
 +              .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_SCALAR_DP_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
 +              .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_64BIT_MMX_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
 +              .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_128BIT_MMX_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
 +              .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_X87_FP_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_X87_FP_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
 +              .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_TC_MISC] = {
                .opcode         = P4_OPCODE(P4_EVENT_TC_MISC),
                .escr_msr       = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
                .cntr           = { {4, 5, -1}, {6, 7, -1} },
        },
        [P4_EVENT_GLOBAL_POWER_EVENTS] = {
                .opcode         = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_TC_MS_XFER] = {
                .opcode         = P4_OPCODE(P4_EVENT_TC_MS_XFER),
                .escr_msr       = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
                .cntr           = { {4, 5, -1}, {6, 7, -1} },
        },
        [P4_EVENT_UOP_QUEUE_WRITES] = {
                .opcode         = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
                .escr_msr       = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD)     |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER)   |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
                .cntr           = { {4, 5, -1}, {6, 7, -1} },
        },
        [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
                .opcode         = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
                .escr_msr       = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL)    |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL)           |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN)         |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
                .cntr           = { {4, 5, -1}, {6, 7, -1} },
        },
        [P4_EVENT_RETIRED_BRANCH_TYPE] = {
                .opcode         = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
                .escr_msr       = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL)    |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL)           |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN)         |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
                .cntr           = { {4, 5, -1}, {6, 7, -1} },
        },
        [P4_EVENT_RESOURCE_STALL] = {
                .opcode         = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
                .escr_msr       = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_WC_BUFFER] = {
                .opcode         = P4_OPCODE(P4_EVENT_WC_BUFFER),
                .escr_msr       = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS)               |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
 +              .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_B2B_CYCLES] = {
                .opcode         = P4_OPCODE(P4_EVENT_B2B_CYCLES),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 +              .escr_emask     = 0,
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_BNR] = {
                .opcode         = P4_OPCODE(P4_EVENT_BNR),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 +              .escr_emask     = 0,
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_SNOOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_SNOOP),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 +              .escr_emask     = 0,
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_RESPONSE] = {
                .opcode         = P4_OPCODE(P4_EVENT_RESPONSE),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
 +              .escr_emask     = 0,
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_FRONT_END_EVENT] = {
                .opcode         = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
                .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS)             |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_EXECUTION_EVENT] = {
                .opcode         = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
                .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)            |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)            |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)            |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)            |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0)             |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1)             |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2)             |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_REPLAY_EVENT] = {
                .opcode         = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
                .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS)                |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_INSTR_RETIRED] = {
                .opcode         = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
                .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG)           |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG)            |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)            |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_UOPS_RETIRED] = {
                .opcode         = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
                .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS)                |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_UOP_TYPE] = {
                .opcode         = P4_OPCODE(P4_EVENT_UOP_TYPE),
                .escr_msr       = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS)                  |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_BRANCH_RETIRED] = {
                .opcode         = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
                .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP)                |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM)                |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP)                |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
                .opcode         = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
                .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
 +              .escr_emask     =
 +              P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_X87_ASSIST] = {
                .opcode         = P4_OPCODE(P4_EVENT_X87_ASSIST),
                .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU)                    |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO)                    |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO)                    |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU)                    |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_MACHINE_CLEAR] = {
                .opcode         = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
                .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR)                |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR)              |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_INSTR_COMPLETED] = {
                .opcode         = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
                .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
 +              .escr_emask     =
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS)             |
 +                      P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
  };
@@@ -634,73 -428,29 +634,73 @@@ static u64 p4_pmu_event_map(int hw_even
        return config;
  }
  
 +/* check cpu model specifics */
 +static bool p4_event_match_cpu_model(unsigned int event_idx)
 +{
 +      /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
 +      if (event_idx == P4_EVENT_INSTR_COMPLETED) {
 +              if (boot_cpu_data.x86_model != 3 &&
 +                      boot_cpu_data.x86_model != 4 &&
 +                      boot_cpu_data.x86_model != 6)
 +                      return false;
 +      }
 +
 +      /*
 +       * For info
 +       * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
 +       */
 +
 +      return true;
 +}
 +
  static int p4_validate_raw_event(struct perf_event *event)
  {
 -      unsigned int v;
 +      unsigned int v, emask;
  
 -      /* user data may have out-of-bound event index */
 +      /* User data may have out-of-bound event index */
        v = p4_config_unpack_event(event->attr.config);
 -      if (v >= ARRAY_SIZE(p4_event_bind_map)) {
 -              pr_warning("P4 PMU: Unknown event code: %d\n", v);
 +      if (v >= ARRAY_SIZE(p4_event_bind_map))
 +              return -EINVAL;
 +
 +      /* It may be unsupported: */
 +      if (!p4_event_match_cpu_model(v))
                return -EINVAL;
 +
 +      /*
 +       * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
 +       * in Architectural Performance Monitoring, it means not
 +       * on _which_ logical cpu to count but rather _when_, ie it
 +       * depends on logical cpu state -- count event if one cpu active,
 +       * none, both or any, so we just allow user to pass any value
 +       * desired.
 +       *
 +       * In turn we always set Tx_OS/Tx_USR bits bound to logical
 +       * cpu without their propagation to another cpu
 +       */
 +
 +      /*
 +       * if an event is shared accross the logical threads
 +       * the user needs special permissions to be able to use it
 +       */
 +      if (p4_event_bind_map[v].shared) {
 +              if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
 +                      return -EACCES;
        }
  
 +      /* ESCR EventMask bits may be invalid */
 +      emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
 +      if (emask & ~p4_event_bind_map[v].escr_emask)
 +              return -EINVAL;
 +
        /*
 -       * it may have some screwed PEBS bits
 +       * it may have some invalid PEBS bits
         */
 -      if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
 -              pr_warning("P4 PMU: PEBS are not supported yet\n");
 +      if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
                return -EINVAL;
 -      }
 +
        v = p4_config_unpack_metric(event->attr.config);
 -      if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
 -              pr_warning("P4 PMU: Unknown metric code: %d\n", v);
 +      if (v >= ARRAY_SIZE(p4_pebs_bind_map))
                return -EINVAL;
 -      }
  
        return 0;
  }
@@@ -728,21 -478,27 +728,21 @@@ static int p4_hw_config(struct perf_eve
  
        if (event->attr.type == PERF_TYPE_RAW) {
  
 +              /*
 +               * Clear bits we reserve to be managed by kernel itself
 +               * and never allowed from a user space
 +               */
 +               event->attr.config &= P4_CONFIG_MASK;
 +
                rc = p4_validate_raw_event(event);
                if (rc)
                        goto out;
  
                /*
 -               * We don't control raw events so it's up to the caller
 -               * to pass sane values (and we don't count the thread number
 -               * on HT machine but allow HT-compatible specifics to be
 -               * passed on)
 -               *
                 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
                 * bits since we keep additional info here (for cache events and etc)
 -               *
 -               * XXX: HT wide things should check perf_paranoid_cpu() &&
 -               *      CAP_SYS_ADMIN
                 */
 -              event->hw.config |= event->attr.config &
 -                      (p4_config_pack_escr(P4_ESCR_MASK_HT) |
 -                       p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
 -
 -              event->hw.config &= ~P4_CCCR_FORCE_OVF;
 +              event->hw.config |= event->attr.config;
        }
  
        rc = x86_setup_perfctr(event);
@@@ -904,8 -660,12 +904,12 @@@ static int p4_pmu_handle_irq(struct pt_
        for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                int overflow;
  
-               if (!test_bit(idx, cpuc->active_mask))
+               if (!test_bit(idx, cpuc->active_mask)) {
+                       /* catch in-flight IRQs */
+                       if (__test_and_clear_bit(idx, cpuc->running))
+                               handled++;
                        continue;
+               }
  
                event = cpuc->events[idx];
                hwc = &event->hw;
diff --combined tools/perf/Makefile
index fe1e30722f3b2a9c8d0d2bc1501b0be693379aba,1950e19af1cf6b23be56f0250bcf8cbb52b3d9f3..d1db0f676a4bf14850fa0264e78fe3d482d376dc
@@@ -313,9 -313,6 +313,9 @@@ TEST_PROGRAMS 
  
  SCRIPT_SH += perf-archive.sh
  
 +grep-libs = $(filter -l%,$(1))
 +strip-libs = $(filter-out -l%,$(1))
 +
  #
  # No Perl scripts right now:
  #
@@@ -591,17 -588,14 +591,17 @@@ endi
  ifdef NO_LIBPERL
        BASIC_CFLAGS += -DNO_LIBPERL
  else
 -      PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
 +       PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
 +       PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
 +       PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
        PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
        FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
  
        ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y)
                BASIC_CFLAGS += -DNO_LIBPERL
        else
 -              ALL_LDFLAGS += $(PERL_EMBED_LDOPTS)
 +               ALL_LDFLAGS += $(PERL_EMBED_LDFLAGS)
 +               EXTLIBS += $(PERL_EMBED_LIBADD)
                LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
                LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
        endif
@@@ -610,16 -604,13 +610,16 @@@ endi
  ifdef NO_LIBPYTHON
        BASIC_CFLAGS += -DNO_LIBPYTHON
  else
 -      PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null`
 +       PYTHON_EMBED_LDOPTS = $(shell python-config --ldflags 2>/dev/null)
 +       PYTHON_EMBED_LDFLAGS = $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
 +       PYTHON_EMBED_LIBADD = $(call grep-libs,$(PYTHON_EMBED_LDOPTS))
        PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
        FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
        ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
                BASIC_CFLAGS += -DNO_LIBPYTHON
        else
 -              ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS)
 +               ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
 +               EXTLIBS += $(PYTHON_EMBED_LIBADD)
                LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
                LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
        endif
@@@ -662,15 -653,6 +662,15 @@@ els
        endif
  endif
  
 +
 +ifdef NO_STRLCPY
 +      BASIC_CFLAGS += -DNO_STRLCPY
 +else
 +      ifneq ($(call try-cc,$(SOURCE_STRLCPY),),y)
 +              BASIC_CFLAGS += -DNO_STRLCPY
 +      endif
 +endif
 +
  ifndef CC_LD_DYNPATH
        ifdef NO_R_TO_GCC_LINKER
                # Some gcc does not accept and pass -R to the linker to specify
@@@ -928,8 -910,8 +928,8 @@@ $(OUTPUT)perf.o: perf.c $(OUTPUT)common
                $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
  
  $(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
 -      $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(OUTPUT)perf.o \
 -              $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS)
 +      $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \
 +               $(BUILTIN_OBJS) $(LIBS) -o $@
  
  $(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
        $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
@@@ -1035,7 -1017,7 +1035,7 @@@ builtin-revert.o wt-status.o: wt-status
  # we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
  # we depend the various files onto their directories.
  DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
- $(DIRECTORY_DEPS): $(sort $(dir $(DIRECTORY_DEPS)))
+ $(DIRECTORY_DEPS): $(sort $(dir $(DIRECTORY_DEPS)))
  # In the second step, we make a rule to actually create these directories
  $(sort $(dir $(DIRECTORY_DEPS))):
        $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null