2 * builtin-timechart.c - make an svg timechart of system activity
4 * (C) Copyright 2009 Intel Corporation
7 * Arjan van de Ven <arjan@linux.intel.com>
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; version 2
17 #include "util/util.h"
19 #include "util/color.h"
20 #include <linux/list.h>
21 #include "util/cache.h"
22 #include <linux/rbtree.h>
23 #include "util/symbol.h"
24 #include "util/string.h"
25 #include "util/callchain.h"
26 #include "util/strlist.h"
29 #include "util/header.h"
30 #include "util/parse-options.h"
31 #include "util/parse-events.h"
32 #include "util/svghelper.h"
34 static char const *input_name = "perf.data";
35 static char const *output_name = "output.svg";
38 static unsigned long page_size;
39 static unsigned long mmap_window = 32;
40 static u64 sample_type;
42 static unsigned int numcpus;
43 static u64 min_freq; /* Lowest CPU frequency seen */
44 static u64 max_freq; /* Highest CPU frequency seen */
45 static u64 turbo_frequency;
47 static u64 first_time, last_time;
49 static int power_only;
52 static struct perf_header *header;
61 struct sample_wrapper;
64 * Datastructure layout:
65 * We keep an list of "pid"s, matching the kernels notion of a task struct.
66 * Each "pid" entry, has a list of "comm"s.
67 * this is because we want to track different programs different, while
68 * exec will reuse the original pid (by design).
69 * Each comm has a list of samples that will be used to draw
84 struct per_pidcomm *all;
85 struct per_pidcomm *current;
92 struct per_pidcomm *next;
106 struct cpu_sample *samples;
109 struct sample_wrapper {
110 struct sample_wrapper *next;
113 unsigned char data[0];
117 #define TYPE_RUNNING 1
118 #define TYPE_WAITING 2
119 #define TYPE_BLOCKED 3
122 struct cpu_sample *next;
130 static struct per_pid *all_data;
136 struct power_event *next;
145 struct wake_event *next;
151 static struct power_event *power_events;
152 static struct wake_event *wake_events;
154 struct sample_wrapper *all_samples;
157 struct process_filter;
158 struct process_filter {
161 struct process_filter *next;
164 static struct process_filter *process_filter;
167 static struct per_pid *find_create_pid(int pid)
169 struct per_pid *cursor = all_data;
172 if (cursor->pid == pid)
174 cursor = cursor->next;
176 cursor = malloc(sizeof(struct per_pid));
177 assert(cursor != NULL);
178 memset(cursor, 0, sizeof(struct per_pid));
180 cursor->next = all_data;
185 static void pid_set_comm(int pid, char *comm)
188 struct per_pidcomm *c;
189 p = find_create_pid(pid);
192 if (c->comm && strcmp(c->comm, comm) == 0) {
197 c->comm = strdup(comm);
203 c = malloc(sizeof(struct per_pidcomm));
205 memset(c, 0, sizeof(struct per_pidcomm));
206 c->comm = strdup(comm);
212 static void pid_fork(int pid, int ppid, u64 timestamp)
214 struct per_pid *p, *pp;
215 p = find_create_pid(pid);
216 pp = find_create_pid(ppid);
218 if (pp->current && pp->current->comm && !p->current)
219 pid_set_comm(pid, pp->current->comm);
221 p->start_time = timestamp;
223 p->current->start_time = timestamp;
224 p->current->state_since = timestamp;
228 static void pid_exit(int pid, u64 timestamp)
231 p = find_create_pid(pid);
232 p->end_time = timestamp;
234 p->current->end_time = timestamp;
238 pid_put_sample(int pid, int type, unsigned int cpu, u64 start, u64 end)
241 struct per_pidcomm *c;
242 struct cpu_sample *sample;
244 p = find_create_pid(pid);
247 c = malloc(sizeof(struct per_pidcomm));
249 memset(c, 0, sizeof(struct per_pidcomm));
255 sample = malloc(sizeof(struct cpu_sample));
256 assert(sample != NULL);
257 memset(sample, 0, sizeof(struct cpu_sample));
258 sample->start_time = start;
259 sample->end_time = end;
261 sample->next = c->samples;
265 if (sample->type == TYPE_RUNNING && end > start && start > 0) {
266 c->total_time += (end-start);
267 p->total_time += (end-start);
270 if (c->start_time == 0 || c->start_time > start)
271 c->start_time = start;
272 if (p->start_time == 0 || p->start_time > start)
273 p->start_time = start;
279 #define MAX_CPUS 4096
281 static u64 cpus_cstate_start_times[MAX_CPUS];
282 static int cpus_cstate_state[MAX_CPUS];
283 static u64 cpus_pstate_start_times[MAX_CPUS];
284 static u64 cpus_pstate_state[MAX_CPUS];
287 process_comm_event(event_t *event)
289 pid_set_comm(event->comm.pid, event->comm.comm);
293 process_fork_event(event_t *event)
295 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
300 process_exit_event(event_t *event)
302 pid_exit(event->fork.pid, event->fork.time);
310 unsigned char preempt_count;
316 struct trace_entry te;
321 #define TASK_COMM_LEN 16
322 struct wakeup_entry {
323 struct trace_entry te;
324 char comm[TASK_COMM_LEN];
331 * trace_flag_type is an enumeration that holds different
332 * states when a trace occurs. These are:
333 * IRQS_OFF - interrupts were disabled
334 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
335 * NEED_RESCED - reschedule is requested
336 * HARDIRQ - inside an interrupt handler
337 * SOFTIRQ - inside a softirq handler
339 enum trace_flag_type {
340 TRACE_FLAG_IRQS_OFF = 0x01,
341 TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
342 TRACE_FLAG_NEED_RESCHED = 0x04,
343 TRACE_FLAG_HARDIRQ = 0x08,
344 TRACE_FLAG_SOFTIRQ = 0x10,
349 struct sched_switch {
350 struct trace_entry te;
351 char prev_comm[TASK_COMM_LEN];
354 long prev_state; /* Arjan weeps. */
355 char next_comm[TASK_COMM_LEN];
360 static void c_state_start(int cpu, u64 timestamp, int state)
362 cpus_cstate_start_times[cpu] = timestamp;
363 cpus_cstate_state[cpu] = state;
366 static void c_state_end(int cpu, u64 timestamp)
368 struct power_event *pwr;
369 pwr = malloc(sizeof(struct power_event));
372 memset(pwr, 0, sizeof(struct power_event));
374 pwr->state = cpus_cstate_state[cpu];
375 pwr->start_time = cpus_cstate_start_times[cpu];
376 pwr->end_time = timestamp;
379 pwr->next = power_events;
384 static void p_state_change(int cpu, u64 timestamp, u64 new_freq)
386 struct power_event *pwr;
387 pwr = malloc(sizeof(struct power_event));
389 if (new_freq > 8000000) /* detect invalid data */
394 memset(pwr, 0, sizeof(struct power_event));
396 pwr->state = cpus_pstate_state[cpu];
397 pwr->start_time = cpus_pstate_start_times[cpu];
398 pwr->end_time = timestamp;
401 pwr->next = power_events;
403 if (!pwr->start_time)
404 pwr->start_time = first_time;
408 cpus_pstate_state[cpu] = new_freq;
409 cpus_pstate_start_times[cpu] = timestamp;
411 if ((u64)new_freq > max_freq)
414 if (new_freq < min_freq || min_freq == 0)
417 if (new_freq == max_freq - 1000)
418 turbo_frequency = max_freq;
422 sched_wakeup(int cpu, u64 timestamp, int pid, struct trace_entry *te)
424 struct wake_event *we;
426 struct wakeup_entry *wake = (void *)te;
428 we = malloc(sizeof(struct wake_event));
432 memset(we, 0, sizeof(struct wake_event));
433 we->time = timestamp;
436 if ((te->flags & TRACE_FLAG_HARDIRQ) || (te->flags & TRACE_FLAG_SOFTIRQ))
439 we->wakee = wake->pid;
440 we->next = wake_events;
442 p = find_create_pid(we->wakee);
444 if (p && p->current && p->current->state == TYPE_NONE) {
445 p->current->state_since = timestamp;
446 p->current->state = TYPE_WAITING;
448 if (p && p->current && p->current->state == TYPE_BLOCKED) {
449 pid_put_sample(p->pid, p->current->state, cpu, p->current->state_since, timestamp);
450 p->current->state_since = timestamp;
451 p->current->state = TYPE_WAITING;
455 static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
457 struct per_pid *p = NULL, *prev_p;
458 struct sched_switch *sw = (void *)te;
461 prev_p = find_create_pid(sw->prev_pid);
463 p = find_create_pid(sw->next_pid);
465 if (prev_p->current && prev_p->current->state != TYPE_NONE)
466 pid_put_sample(sw->prev_pid, TYPE_RUNNING, cpu, prev_p->current->state_since, timestamp);
467 if (p && p->current) {
468 if (p->current->state != TYPE_NONE)
469 pid_put_sample(sw->next_pid, p->current->state, cpu, p->current->state_since, timestamp);
471 p->current->state_since = timestamp;
472 p->current->state = TYPE_RUNNING;
475 if (prev_p->current) {
476 prev_p->current->state = TYPE_NONE;
477 prev_p->current->state_since = timestamp;
478 if (sw->prev_state & 2)
479 prev_p->current->state = TYPE_BLOCKED;
480 if (sw->prev_state == 0)
481 prev_p->current->state = TYPE_WAITING;
487 process_sample_event(event_t *event)
494 struct trace_entry *te;
496 if (sample_type & PERF_SAMPLE_IP)
499 if (sample_type & PERF_SAMPLE_TID) {
500 pid = event->sample.array[cursor]>>32;
503 if (sample_type & PERF_SAMPLE_TIME) {
504 stamp = event->sample.array[cursor++];
506 if (!first_time || first_time > stamp)
508 if (last_time < stamp)
512 if (sample_type & PERF_SAMPLE_ADDR)
513 addr = event->sample.array[cursor++];
514 if (sample_type & PERF_SAMPLE_ID)
516 if (sample_type & PERF_SAMPLE_STREAM_ID)
518 if (sample_type & PERF_SAMPLE_CPU)
519 cpu = event->sample.array[cursor++] & 0xFFFFFFFF;
520 if (sample_type & PERF_SAMPLE_PERIOD)
523 te = (void *)&event->sample.array[cursor];
525 if (sample_type & PERF_SAMPLE_RAW && te->size > 0) {
527 struct power_entry *pe;
531 event_str = perf_header__find_event(te->type);
536 if (strcmp(event_str, "power:power_start") == 0)
537 c_state_start(cpu, stamp, pe->value);
539 if (strcmp(event_str, "power:power_end") == 0)
540 c_state_end(cpu, stamp);
542 if (strcmp(event_str, "power:power_frequency") == 0)
543 p_state_change(cpu, stamp, pe->value);
545 if (strcmp(event_str, "sched:sched_wakeup") == 0)
546 sched_wakeup(cpu, stamp, pid, te);
548 if (strcmp(event_str, "sched:sched_switch") == 0)
549 sched_switch(cpu, stamp, te);
555 * After the last sample we need to wrap up the current C/P state
556 * and close out each CPU for these.
558 static void end_sample_processing(void)
561 struct power_event *pwr;
563 for (cpu = 0; cpu <= numcpus; cpu++) {
564 pwr = malloc(sizeof(struct power_event));
567 memset(pwr, 0, sizeof(struct power_event));
571 pwr->state = cpus_cstate_state[cpu];
572 pwr->start_time = cpus_cstate_start_times[cpu];
573 pwr->end_time = last_time;
576 pwr->next = power_events;
582 pwr = malloc(sizeof(struct power_event));
585 memset(pwr, 0, sizeof(struct power_event));
587 pwr->state = cpus_pstate_state[cpu];
588 pwr->start_time = cpus_pstate_start_times[cpu];
589 pwr->end_time = last_time;
592 pwr->next = power_events;
594 if (!pwr->start_time)
595 pwr->start_time = first_time;
597 pwr->state = min_freq;
602 static u64 sample_time(event_t *event)
607 if (sample_type & PERF_SAMPLE_IP)
609 if (sample_type & PERF_SAMPLE_TID)
611 if (sample_type & PERF_SAMPLE_TIME)
612 return event->sample.array[cursor];
618 * We first queue all events, sorted backwards by insertion.
619 * The order will get flipped later.
622 queue_sample_event(event_t *event)
624 struct sample_wrapper *copy, *prev;
627 size = event->sample.header.size + sizeof(struct sample_wrapper) + 8;
633 memset(copy, 0, size);
636 copy->timestamp = sample_time(event);
638 memcpy(©->data, event, event->sample.header.size);
640 /* insert in the right place in the list */
643 /* first sample ever */
648 if (all_samples->timestamp < copy->timestamp) {
649 /* insert at the head of the list */
650 copy->next = all_samples;
657 if (prev->next->timestamp < copy->timestamp) {
658 copy->next = prev->next;
664 /* insert at the end of the list */
670 static void sort_queued_samples(void)
672 struct sample_wrapper *cursor, *next;
674 cursor = all_samples;
679 cursor->next = all_samples;
680 all_samples = cursor;
686 * Sort the pid datastructure
688 static void sort_pids(void)
690 struct per_pid *new_list, *p, *cursor, *prev;
691 /* sort by ppid first, then by pid, lowest to highest */
700 if (new_list == NULL) {
708 if (cursor->ppid > p->ppid ||
709 (cursor->ppid == p->ppid && cursor->pid > p->pid)) {
710 /* must insert before */
712 p->next = prev->next;
725 cursor = cursor->next;
734 static void draw_c_p_states(void)
736 struct power_event *pwr;
740 * two pass drawing so that the P state bars are on top of the C state blocks
743 if (pwr->type == CSTATE)
744 svg_cstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state);
750 if (pwr->type == PSTATE) {
752 pwr->state = min_freq;
753 svg_pstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state);
759 static void draw_wakeups(void)
761 struct wake_event *we;
763 struct per_pidcomm *c;
767 int from = 0, to = 0;
768 char *task_from = NULL, *task_to = NULL;
770 /* locate the column of the waker and wakee */
773 if (p->pid == we->waker || p->pid == we->wakee) {
776 if (c->Y && c->start_time <= we->time && c->end_time >= we->time) {
777 if (p->pid == we->waker && !from) {
779 task_from = strdup(c->comm);
781 if (p->pid == we->wakee && !to) {
783 task_to = strdup(c->comm);
790 if (p->pid == we->waker && !from) {
792 task_from = strdup(c->comm);
794 if (p->pid == we->wakee && !to) {
796 task_to = strdup(c->comm);
805 task_from = malloc(40);
806 sprintf(task_from, "[%i]", we->waker);
809 task_to = malloc(40);
810 sprintf(task_to, "[%i]", we->wakee);
814 svg_interrupt(we->time, to);
815 else if (from && to && abs(from - to) == 1)
816 svg_wakeline(we->time, from, to);
818 svg_partial_wakeline(we->time, from, task_from, to, task_to);
826 static void draw_cpu_usage(void)
829 struct per_pidcomm *c;
830 struct cpu_sample *sample;
837 if (sample->type == TYPE_RUNNING)
838 svg_process(sample->cpu, sample->start_time, sample->end_time, "sample", c->comm);
840 sample = sample->next;
848 static void draw_process_bars(void)
851 struct per_pidcomm *c;
852 struct cpu_sample *sample;
867 svg_box(Y, c->start_time, c->end_time, "process");
870 if (sample->type == TYPE_RUNNING)
871 svg_sample(Y, sample->cpu, sample->start_time, sample->end_time);
872 if (sample->type == TYPE_BLOCKED)
873 svg_box(Y, sample->start_time, sample->end_time, "blocked");
874 if (sample->type == TYPE_WAITING)
875 svg_waiting(Y, sample->start_time, sample->end_time);
876 sample = sample->next;
881 if (c->total_time > 5000000000) /* 5 seconds */
882 sprintf(comm, "%s:%i (%2.2fs)", c->comm, p->pid, c->total_time / 1000000000.0);
884 sprintf(comm, "%s:%i (%3.1fms)", c->comm, p->pid, c->total_time / 1000000.0);
886 svg_text(Y, c->start_time, comm);
896 static void add_process_filter(const char *string)
898 struct process_filter *filt;
901 pid = strtoull(string, NULL, 10);
902 filt = malloc(sizeof(struct process_filter));
906 filt->name = strdup(string);
908 filt->next = process_filter;
910 process_filter = filt;
913 static int passes_filter(struct per_pid *p, struct per_pidcomm *c)
915 struct process_filter *filt;
919 filt = process_filter;
921 if (filt->pid && p->pid == filt->pid)
923 if (strcmp(filt->name, c->comm) == 0)
930 static int determine_display_tasks_filtered(void)
933 struct per_pidcomm *c;
939 if (p->start_time == 1)
940 p->start_time = first_time;
942 /* no exit marker, task kept running to the end */
943 if (p->end_time == 0)
944 p->end_time = last_time;
951 if (c->start_time == 1)
952 c->start_time = first_time;
954 if (passes_filter(p, c)) {
960 if (c->end_time == 0)
961 c->end_time = last_time;
970 static int determine_display_tasks(u64 threshold)
973 struct per_pidcomm *c;
977 return determine_display_tasks_filtered();
982 if (p->start_time == 1)
983 p->start_time = first_time;
985 /* no exit marker, task kept running to the end */
986 if (p->end_time == 0)
987 p->end_time = last_time;
988 if (p->total_time >= threshold && !power_only)
996 if (c->start_time == 1)
997 c->start_time = first_time;
999 if (c->total_time >= threshold && !power_only) {
1004 if (c->end_time == 0)
1005 c->end_time = last_time;
1016 #define TIME_THRESH 10000000
1018 static void write_svg_file(const char *filename)
1026 count = determine_display_tasks(TIME_THRESH);
1028 /* We'd like to show at least 15 tasks; be less picky if we have fewer */
1030 count = determine_display_tasks(TIME_THRESH / 10);
1032 open_svg(filename, numcpus, count, first_time, last_time);
1037 for (i = 0; i < numcpus; i++)
1038 svg_cpu_box(i, max_freq, turbo_frequency);
1041 draw_process_bars();
1049 process_event(event_t *event)
1052 switch (event->header.type) {
1054 case PERF_RECORD_COMM:
1055 return process_comm_event(event);
1056 case PERF_RECORD_FORK:
1057 return process_fork_event(event);
1058 case PERF_RECORD_EXIT:
1059 return process_exit_event(event);
1060 case PERF_RECORD_SAMPLE:
1061 return queue_sample_event(event);
1064 * We dont process them right now but they are fine:
1066 case PERF_RECORD_MMAP:
1067 case PERF_RECORD_THROTTLE:
1068 case PERF_RECORD_UNTHROTTLE:
1078 static void process_samples(void)
1080 struct sample_wrapper *cursor;
1083 sort_queued_samples();
1085 cursor = all_samples;
1087 event = (void *)&cursor->data;
1088 cursor = cursor->next;
1089 process_sample_event(event);
1094 static int __cmd_timechart(void)
1096 int err, rc = EXIT_FAILURE;
1097 unsigned long offset = 0;
1098 unsigned long head, shift;
1099 struct stat statbuf;
1105 input = open(input_name, O_RDONLY);
1107 fprintf(stderr, " failed to open file: %s", input_name);
1108 if (!strcmp(input_name, "perf.data"))
1109 fprintf(stderr, " (try 'perf record' first)");
1110 fprintf(stderr, "\n");
1114 err = fstat(input, &statbuf);
1116 perror("failed to stat file");
1120 if (!statbuf.st_size) {
1121 fprintf(stderr, "zero-sized file, nothing to do!\n");
1125 header = perf_header__new();
1129 err = perf_header__read(header, input);
1131 perf_header__delete(header);
1135 head = header->data_offset;
1137 sample_type = perf_header__sample_type(header);
1139 shift = page_size * (head / page_size);
1144 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
1145 MAP_SHARED, input, offset);
1146 if (buf == MAP_FAILED) {
1147 perror("failed to mmap file");
1152 event = (event_t *)(buf + head);
1154 size = event->header.size;
1158 if (head + event->header.size >= page_size * mmap_window) {
1161 shift = page_size * (head / page_size);
1163 ret2 = munmap(buf, page_size * mmap_window);
1171 size = event->header.size;
1173 if (!size || process_event(event) < 0) {
1174 pr_warning("%p [%p]: skipping unknown header type: %d\n",
1175 (void *)(offset + head),
1176 (void *)(long)(event->header.size),
1177 event->header.type);
1179 * assume we lost track of the stream, check alignment, and
1180 * increment a single u64 in the hope to catch on again 'soon'.
1183 if (unlikely(head & 7))
1191 if (offset + head >= header->data_offset + header->data_size)
1194 if (offset + head < (unsigned long)statbuf.st_size)
1204 end_sample_processing();
1208 write_svg_file(output_name);
1210 pr_info("Written %2.1f seconds of trace to %s.\n",
1211 (last_time - first_time) / 1000000000.0, output_name);
1216 static const char * const timechart_usage[] = {
1217 "perf timechart [<options>] {record}",
1221 static const char *record_args[] = {
1228 "-e", "power:power_start",
1229 "-e", "power:power_end",
1230 "-e", "power:power_frequency",
1231 "-e", "sched:sched_wakeup",
1232 "-e", "sched:sched_switch",
1235 static int __cmd_record(int argc, const char **argv)
1237 unsigned int rec_argc, i, j;
1238 const char **rec_argv;
1240 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1241 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1243 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1244 rec_argv[i] = strdup(record_args[i]);
1246 for (j = 1; j < (unsigned int)argc; j++, i++)
1247 rec_argv[i] = argv[j];
1249 return cmd_record(i, rec_argv, NULL);
1253 parse_process(const struct option *opt __used, const char *arg, int __used unset)
1256 add_process_filter(arg);
1260 static const struct option options[] = {
1261 OPT_STRING('i', "input", &input_name, "file",
1263 OPT_STRING('o', "output", &output_name, "file",
1264 "output file name"),
1265 OPT_INTEGER('w', "width", &svg_page_width,
1267 OPT_BOOLEAN('P', "power-only", &power_only,
1268 "output power data only"),
1269 OPT_CALLBACK('p', "process", NULL, "process",
1270 "process selector. Pass a pid or process name.",
1276 int cmd_timechart(int argc, const char **argv, const char *prefix __used)
1280 page_size = getpagesize();
1282 argc = parse_options(argc, argv, options, timechart_usage,
1283 PARSE_OPT_STOP_AT_NON_OPTION);
1285 if (argc && !strncmp(argv[0], "rec", 3))
1286 return __cmd_record(argc, argv);
1288 usage_with_options(timechart_usage, options);
1292 return __cmd_timechart();