]> bbs.cooldavid.org Git - net-next-2.6.git/blob - kernel/trace/trace_sysprof.c
c080956f4d8eb96b3a6e03c4e879598de9303830
[net-next-2.6.git] / kernel / trace / trace_sysprof.c
1 /*
2  * trace stack traces
3  *
4  * Copyright (C) 2004-2008, Soeren Sandmann
5  * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  */
8 #include <linux/kallsyms.h>
9 #include <linux/debugfs.h>
10 #include <linux/hrtimer.h>
11 #include <linux/uaccess.h>
12 #include <linux/ftrace.h>
13 #include <linux/module.h>
14 #include <linux/irq.h>
15 #include <linux/fs.h>
16
17 #include <asm/stacktrace.h>
18
19 #include "trace.h"
20
21 static struct trace_array       *sysprof_trace;
22 static int __read_mostly        tracer_enabled;
23
24 /*
25  * 1 msec sample interval by default:
26  */
27 static unsigned long sample_period = 1000000;
28 static const unsigned int sample_max_depth = 512;
29
30 static DEFINE_MUTEX(sample_timer_lock);
31 /*
32  * Per CPU hrtimers that do the profiling:
33  */
34 static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
35
36 struct stack_frame_user {
37         const void __user       *next_fp;
38         unsigned long           return_address;
39 };
40
41 static int
42 copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
43 {
44         int ret;
45
46         if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
47                 return 0;
48
49         ret = 1;
50         pagefault_disable();
51         if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
52                 ret = 0;
53         pagefault_enable();
54
55         return ret;
56 }
57
58 struct backtrace_info {
59         struct trace_array_cpu  *data;
60         struct trace_array      *tr;
61         int                     pos;
62 };
63
64 static void
65 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
66 {
67         /* Ignore warnings */
68 }
69
70 static void backtrace_warning(void *data, char *msg)
71 {
72         /* Ignore warnings */
73 }
74
75 static int backtrace_stack(void *data, char *name)
76 {
77         /* Don't bother with IRQ stacks for now */
78         return -1;
79 }
80
81 static void backtrace_address(void *data, unsigned long addr, int reliable)
82 {
83         struct backtrace_info *info = data;
84
85         if (info->pos < sample_max_depth && reliable) {
86                 __trace_special(info->tr, info->data, 1, addr, 0);
87
88                 info->pos++;
89         }
90 }
91
92 static const struct stacktrace_ops backtrace_ops = {
93         .warning                = backtrace_warning,
94         .warning_symbol         = backtrace_warning_symbol,
95         .stack                  = backtrace_stack,
96         .address                = backtrace_address,
97         .walk_stack             = print_context_stack,
98 };
99
100 static int
101 trace_kernel(struct pt_regs *regs, struct trace_array *tr,
102              struct trace_array_cpu *data)
103 {
104         struct backtrace_info info;
105         unsigned long bp;
106         char *stack;
107
108         info.tr = tr;
109         info.data = data;
110         info.pos = 1;
111
112         __trace_special(info.tr, info.data, 1, regs->ip, 0);
113
114         stack = ((char *)regs + sizeof(struct pt_regs));
115 #ifdef CONFIG_FRAME_POINTER
116         bp = regs->bp;
117 #else
118         bp = 0;
119 #endif
120
121         dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info);
122
123         return info.pos;
124 }
125
126 static void timer_notify(struct pt_regs *regs, int cpu)
127 {
128         struct trace_array_cpu *data;
129         struct stack_frame_user frame;
130         struct trace_array *tr;
131         const void __user *fp;
132         int is_user;
133         int i;
134
135         if (!regs)
136                 return;
137
138         tr = sysprof_trace;
139         data = tr->data[cpu];
140         is_user = user_mode(regs);
141
142         if (!current || current->pid == 0)
143                 return;
144
145         if (is_user && current->state != TASK_RUNNING)
146                 return;
147
148         __trace_special(tr, data, 0, 0, current->pid);
149
150         if (!is_user)
151                 i = trace_kernel(regs, tr, data);
152         else
153                 i = 0;
154
155         /*
156          * Trace user stack if we are not a kernel thread
157          */
158         if (current->mm && i < sample_max_depth) {
159                 regs = (struct pt_regs *)current->thread.sp0 - 1;
160
161                 fp = (void __user *)regs->bp;
162
163                 __trace_special(tr, data, 2, regs->ip, 0);
164
165                 while (i < sample_max_depth) {
166                         frame.next_fp = NULL;
167                         frame.return_address = 0;
168                         if (!copy_stack_frame(fp, &frame))
169                                 break;
170                         if ((unsigned long)fp < regs->sp)
171                                 break;
172
173                         __trace_special(tr, data, 2, frame.return_address,
174                                         (unsigned long)fp);
175                         fp = frame.next_fp;
176
177                         i++;
178                 }
179
180         }
181
182         /*
183          * Special trace entry if we overflow the max depth:
184          */
185         if (i == sample_max_depth)
186                 __trace_special(tr, data, -1, -1, -1);
187
188         __trace_special(tr, data, 3, current->pid, i);
189 }
190
191 static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
192 {
193         /* trace here */
194         timer_notify(get_irq_regs(), smp_processor_id());
195
196         hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
197
198         return HRTIMER_RESTART;
199 }
200
201 static void start_stack_timer(void *unused)
202 {
203         struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer);
204
205         hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
206         hrtimer->function = stack_trace_timer_fn;
207
208         hrtimer_start(hrtimer, ns_to_ktime(sample_period),
209                       HRTIMER_MODE_REL_PINNED);
210 }
211
212 static void start_stack_timers(void)
213 {
214         on_each_cpu(start_stack_timer, NULL, 1);
215 }
216
217 static void stop_stack_timer(int cpu)
218 {
219         struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
220
221         hrtimer_cancel(hrtimer);
222 }
223
224 static void stop_stack_timers(void)
225 {
226         int cpu;
227
228         for_each_online_cpu(cpu)
229                 stop_stack_timer(cpu);
230 }
231
232 static void stop_stack_trace(struct trace_array *tr)
233 {
234         mutex_lock(&sample_timer_lock);
235         stop_stack_timers();
236         tracer_enabled = 0;
237         mutex_unlock(&sample_timer_lock);
238 }
239
240 static int stack_trace_init(struct trace_array *tr)
241 {
242         sysprof_trace = tr;
243
244         tracing_start_cmdline_record();
245
246         mutex_lock(&sample_timer_lock);
247         start_stack_timers();
248         tracer_enabled = 1;
249         mutex_unlock(&sample_timer_lock);
250         return 0;
251 }
252
253 static void stack_trace_reset(struct trace_array *tr)
254 {
255         tracing_stop_cmdline_record();
256         stop_stack_trace(tr);
257 }
258
259 static struct tracer stack_trace __read_mostly =
260 {
261         .name           = "sysprof",
262         .init           = stack_trace_init,
263         .reset          = stack_trace_reset,
264 #ifdef CONFIG_FTRACE_SELFTEST
265         .selftest    = trace_selftest_startup_sysprof,
266 #endif
267 };
268
269 __init static int init_stack_trace(void)
270 {
271         return register_tracer(&stack_trace);
272 }
273 device_initcall(init_stack_trace);
274
275 #define MAX_LONG_DIGITS 22
276
277 static ssize_t
278 sysprof_sample_read(struct file *filp, char __user *ubuf,
279                     size_t cnt, loff_t *ppos)
280 {
281         char buf[MAX_LONG_DIGITS];
282         int r;
283
284         r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
285
286         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
287 }
288
289 static ssize_t
290 sysprof_sample_write(struct file *filp, const char __user *ubuf,
291                      size_t cnt, loff_t *ppos)
292 {
293         char buf[MAX_LONG_DIGITS];
294         unsigned long val;
295
296         if (cnt > MAX_LONG_DIGITS-1)
297                 cnt = MAX_LONG_DIGITS-1;
298
299         if (copy_from_user(&buf, ubuf, cnt))
300                 return -EFAULT;
301
302         buf[cnt] = 0;
303
304         val = simple_strtoul(buf, NULL, 10);
305         /*
306          * Enforce a minimum sample period of 100 usecs:
307          */
308         if (val < 100)
309                 val = 100;
310
311         mutex_lock(&sample_timer_lock);
312         stop_stack_timers();
313         sample_period = val * 1000;
314         start_stack_timers();
315         mutex_unlock(&sample_timer_lock);
316
317         return cnt;
318 }
319
320 static const struct file_operations sysprof_sample_fops = {
321         .read           = sysprof_sample_read,
322         .write          = sysprof_sample_write,
323 };
324
325 void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
326 {
327
328         trace_create_file("sysprof_sample_period", 0644,
329                         d_tracer, NULL, &sysprof_sample_fops);
330 }