ARM Cycle Counter

From Tuxamito
Jump to: navigation, search

To enable the ARM Cycle Counters the desired core has to run these instructions in kernel mode:

asm ("MCR p15, 0, %0, C9, C14, 0\n\t" :: "r"(1));
asm ("MCR p15, 0, %0, C9, C14, 2\n\t" :: "r"(0x8000000f));

So the best option is to create a module that we can load, eacc.c:

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>

MODULE_AUTHOR("Daniel Rubio Bonilla");
MODULE_DESCRIPTION("Enable ARM Cycle Counter");

static int __init eacc_init(void) {
  asm ("MCR p15, 0, %0, C9, C14, 0\n\t" :: "r"(1));
  asm ("MCR p15, 0, %0, C9, C14, 2\n\t" :: "r"(0x8000000f));

  printk(KERN_INFO "EACC loaded\n");
  return 0;

static void __exit eacc_exit(void) {
  printk(KERN_INFO "EACC unloaded\n");


To compile use this Makefile:

obj-m += eacc.o

	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules

	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean

And load it using taskset, to make sure it is run by core number X:

taskset -c X insmod eacc.ko

Then you will need these functions in the application's code:

static inline unsigned int get_cyclecount (void) {
  unsigned int value;
  // Read CCNT Register
  asm volatile ("MRC p15, 0, %0, c9, c13, 0\t\n": "=r"(value));
  return value;

static inline void init_perfcounters (int32_t do_reset, int32_t enable_divider) {
  // in general enable all counters (including cycle counter)
  int32_t value = 1;

  // peform reset:
  if (do_reset) {
      value |= 2;     // reset all counters to zero.
      value |= 4;     // reset cycle counter to zero.

  if (enable_divider)
    value |= 8;     // enable "by 64" divider for CCNT.

  value |= 16;

  // program the performance-counter control-register:
  asm volatile ("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(value));

  // enable all counters:
  asm volatile ("MCR p15, 0, %0, c9, c12, 1\t\n" :: "r"(0x8000000f));

  // clear overflows:
  asm volatile ("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x8000000f));

The overhead of reading the counter can be calculated in this way:

  init_perfcounters (1, 0);

  // measure the counting overhead:
  unsigned int overhead = get_cyclecount();
  overhead = get_cyclecount() - overhead;

This is an example on how to use it:

  unsigned int t = get_cyclecount();

  // do some stuff
  printf("Hello World!!\n");

  t = get_cyclecount() - t;

  printf ("function took exactly %d cycles (including function call)\n", t - overhead);