]> bbs.cooldavid.org Git - net-next-2.6.git/blame - kernel/trace/trace_syscalls.c
tracing: Create class struct for events
[net-next-2.6.git] / kernel / trace / trace_syscalls.c
CommitLineData
47788c58 1#include <trace/syscall.h>
1c569f02 2#include <trace/events/syscalls.h>
5a0e3ad6 3#include <linux/slab.h>
ee08c6ec 4#include <linux/kernel.h>
fb34a08c 5#include <linux/ftrace.h>
cdd6c482 6#include <linux/perf_event.h>
ee08c6ec
FW
7#include <asm/syscall.h>
8
9#include "trace_output.h"
10#include "trace.h"
11
5be71b61 12static DEFINE_MUTEX(syscall_trace_lock);
fb34a08c
JB
13static int sys_refcount_enter;
14static int sys_refcount_exit;
57421dbb
JB
15static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
16static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
ee08c6ec 17
8f082018
SR
18struct ftrace_event_class event_class_syscalls = {
19 .system = "syscalls"
20};
21
c44fc770
FW
22extern unsigned long __start_syscalls_metadata[];
23extern unsigned long __stop_syscalls_metadata[];
24
25static struct syscall_metadata **syscalls_metadata;
26
27static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
28{
29 struct syscall_metadata *start;
30 struct syscall_metadata *stop;
31 char str[KSYM_SYMBOL_LEN];
32
33
34 start = (struct syscall_metadata *)__start_syscalls_metadata;
35 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
36 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
37
38 for ( ; start < stop; start++) {
39 /*
40 * Only compare after the "sys" prefix. Archs that use
41 * syscall wrappers may have syscalls symbols aliases prefixed
42 * with "SyS" instead of "sys", leading to an unwanted
43 * mismatch.
44 */
45 if (start->name && !strcmp(start->name + 3, str + 3))
46 return start;
47 }
48 return NULL;
49}
50
51static struct syscall_metadata *syscall_nr_to_meta(int nr)
52{
53 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
54 return NULL;
55
56 return syscalls_metadata[nr];
57}
58
bed1ffca
FW
59enum print_line_t
60print_syscall_enter(struct trace_iterator *iter, int flags)
61{
62 struct trace_seq *s = &iter->seq;
63 struct trace_entry *ent = iter->ent;
64 struct syscall_trace_enter *trace;
65 struct syscall_metadata *entry;
66 int i, ret, syscall;
67
64c12e04 68 trace = (typeof(trace))ent;
bed1ffca 69 syscall = trace->nr;
bed1ffca 70 entry = syscall_nr_to_meta(syscall);
64c12e04 71
bed1ffca
FW
72 if (!entry)
73 goto end;
74
fcc19438 75 if (entry->enter_event->id != ent->type) {
64c12e04
JB
76 WARN_ON_ONCE(1);
77 goto end;
78 }
79
bed1ffca
FW
80 ret = trace_seq_printf(s, "%s(", entry->name);
81 if (!ret)
82 return TRACE_TYPE_PARTIAL_LINE;
83
84 for (i = 0; i < entry->nb_args; i++) {
85 /* parameter types */
ba8b3a40 86 if (trace_flags & TRACE_ITER_VERBOSE) {
bed1ffca
FW
87 ret = trace_seq_printf(s, "%s ", entry->types[i]);
88 if (!ret)
89 return TRACE_TYPE_PARTIAL_LINE;
90 }
91 /* parameter values */
4539f077 92 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
bed1ffca 93 trace->args[i],
4539f077 94 i == entry->nb_args - 1 ? "" : ", ");
bed1ffca
FW
95 if (!ret)
96 return TRACE_TYPE_PARTIAL_LINE;
97 }
98
4539f077
LZ
99 ret = trace_seq_putc(s, ')');
100 if (!ret)
101 return TRACE_TYPE_PARTIAL_LINE;
102
bed1ffca 103end:
4539f077
LZ
104 ret = trace_seq_putc(s, '\n');
105 if (!ret)
106 return TRACE_TYPE_PARTIAL_LINE;
107
bed1ffca
FW
108 return TRACE_TYPE_HANDLED;
109}
110
111enum print_line_t
112print_syscall_exit(struct trace_iterator *iter, int flags)
113{
114 struct trace_seq *s = &iter->seq;
115 struct trace_entry *ent = iter->ent;
116 struct syscall_trace_exit *trace;
117 int syscall;
118 struct syscall_metadata *entry;
119 int ret;
120
64c12e04 121 trace = (typeof(trace))ent;
bed1ffca 122 syscall = trace->nr;
bed1ffca 123 entry = syscall_nr_to_meta(syscall);
64c12e04 124
bed1ffca
FW
125 if (!entry) {
126 trace_seq_printf(s, "\n");
127 return TRACE_TYPE_HANDLED;
128 }
129
fcc19438 130 if (entry->exit_event->id != ent->type) {
64c12e04
JB
131 WARN_ON_ONCE(1);
132 return TRACE_TYPE_UNHANDLED;
133 }
134
bed1ffca
FW
135 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
136 trace->ret);
137 if (!ret)
138 return TRACE_TYPE_PARTIAL_LINE;
139
140 return TRACE_TYPE_HANDLED;
141}
142
e6971969
LZ
143extern char *__bad_type_size(void);
144
145#define SYSCALL_FIELD(type, name) \
146 sizeof(type) != sizeof(trace.name) ? \
147 __bad_type_size() : \
26a50744
TZ
148 #type, #name, offsetof(typeof(trace), name), \
149 sizeof(trace.name), is_signed_type(type)
e6971969 150
50307a45
LJ
151static
152int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
153{
154 int i;
155 int pos = 0;
156
157 /* When len=0, we just calculate the needed length */
158#define LEN_OR_ZERO (len ? len - pos : 0)
159
160 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
161 for (i = 0; i < entry->nb_args; i++) {
162 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
163 entry->args[i], sizeof(unsigned long),
164 i == entry->nb_args - 1 ? "" : ", ");
165 }
166 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
167
168 for (i = 0; i < entry->nb_args; i++) {
169 pos += snprintf(buf + pos, LEN_OR_ZERO,
170 ", ((unsigned long)(REC->%s))", entry->args[i]);
171 }
172
173#undef LEN_OR_ZERO
174
175 /* return the length of print_fmt */
176 return pos;
177}
178
179static int set_syscall_print_fmt(struct ftrace_event_call *call)
180{
181 char *print_fmt;
182 int len;
183 struct syscall_metadata *entry = call->data;
184
185 if (entry->enter_event != call) {
186 call->print_fmt = "\"0x%lx\", REC->ret";
187 return 0;
188 }
189
190 /* First: called with 0 length to calculate the needed length */
191 len = __set_enter_print_fmt(entry, NULL, 0);
192
193 print_fmt = kmalloc(len + 1, GFP_KERNEL);
194 if (!print_fmt)
195 return -ENOMEM;
196
197 /* Second: actually write the @print_fmt */
198 __set_enter_print_fmt(entry, print_fmt, len + 1);
199 call->print_fmt = print_fmt;
200
201 return 0;
202}
203
204static void free_syscall_print_fmt(struct ftrace_event_call *call)
205{
206 struct syscall_metadata *entry = call->data;
207
208 if (entry->enter_event == call)
209 kfree(call->print_fmt);
210}
211
540b7b8d
LZ
212int syscall_enter_define_fields(struct ftrace_event_call *call)
213{
214 struct syscall_trace_enter trace;
31c16b13 215 struct syscall_metadata *meta = call->data;
540b7b8d 216 int ret;
540b7b8d
LZ
217 int i;
218 int offset = offsetof(typeof(trace), args);
219
0f1ef51d
LJ
220 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
221 if (ret)
222 return ret;
223
540b7b8d 224 for (i = 0; i < meta->nb_args; i++) {
aeaeae11
FW
225 ret = trace_define_field(call, meta->types[i],
226 meta->args[i], offset,
43b51ead
LZ
227 sizeof(unsigned long), 0,
228 FILTER_OTHER);
540b7b8d
LZ
229 offset += sizeof(unsigned long);
230 }
231
232 return ret;
233}
234
235int syscall_exit_define_fields(struct ftrace_event_call *call)
236{
237 struct syscall_trace_exit trace;
238 int ret;
239
0f1ef51d
LJ
240 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
241 if (ret)
242 return ret;
243
26a50744 244 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
43b51ead 245 FILTER_OTHER);
540b7b8d
LZ
246
247 return ret;
248}
249
fb34a08c 250void ftrace_syscall_enter(struct pt_regs *regs, long id)
ee08c6ec 251{
bed1ffca
FW
252 struct syscall_trace_enter *entry;
253 struct syscall_metadata *sys_data;
254 struct ring_buffer_event *event;
e77405ad 255 struct ring_buffer *buffer;
bed1ffca 256 int size;
ee08c6ec
FW
257 int syscall_nr;
258
259 syscall_nr = syscall_get_nr(current, regs);
cd0980fc
HB
260 if (syscall_nr < 0)
261 return;
fb34a08c
JB
262 if (!test_bit(syscall_nr, enabled_enter_syscalls))
263 return;
ee08c6ec 264
bed1ffca
FW
265 sys_data = syscall_nr_to_meta(syscall_nr);
266 if (!sys_data)
267 return;
268
269 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
270
fcc19438
LJ
271 event = trace_current_buffer_lock_reserve(&buffer,
272 sys_data->enter_event->id, size, 0, 0);
bed1ffca
FW
273 if (!event)
274 return;
275
276 entry = ring_buffer_event_data(event);
277 entry->nr = syscall_nr;
278 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
279
e77405ad
SR
280 if (!filter_current_check_discard(buffer, sys_data->enter_event,
281 entry, event))
282 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
ee08c6ec
FW
283}
284
fb34a08c 285void ftrace_syscall_exit(struct pt_regs *regs, long ret)
ee08c6ec 286{
bed1ffca
FW
287 struct syscall_trace_exit *entry;
288 struct syscall_metadata *sys_data;
289 struct ring_buffer_event *event;
e77405ad 290 struct ring_buffer *buffer;
ee08c6ec
FW
291 int syscall_nr;
292
293 syscall_nr = syscall_get_nr(current, regs);
cd0980fc
HB
294 if (syscall_nr < 0)
295 return;
fb34a08c
JB
296 if (!test_bit(syscall_nr, enabled_exit_syscalls))
297 return;
ee08c6ec 298
bed1ffca
FW
299 sys_data = syscall_nr_to_meta(syscall_nr);
300 if (!sys_data)
301 return;
302
fcc19438
LJ
303 event = trace_current_buffer_lock_reserve(&buffer,
304 sys_data->exit_event->id, sizeof(*entry), 0, 0);
bed1ffca
FW
305 if (!event)
306 return;
307
308 entry = ring_buffer_event_data(event);
309 entry->nr = syscall_nr;
310 entry->ret = syscall_get_return_value(current, regs);
311
e77405ad
SR
312 if (!filter_current_check_discard(buffer, sys_data->exit_event,
313 entry, event))
314 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
ee08c6ec
FW
315}
316
bd1a5c84 317int reg_event_syscall_enter(struct ftrace_event_call *call)
ee08c6ec 318{
fb34a08c
JB
319 int ret = 0;
320 int num;
fb34a08c 321
c252f657 322 num = ((struct syscall_metadata *)call->data)->syscall_nr;
57421dbb 323 if (num < 0 || num >= NR_syscalls)
fb34a08c
JB
324 return -ENOSYS;
325 mutex_lock(&syscall_trace_lock);
326 if (!sys_refcount_enter)
1c569f02 327 ret = register_trace_sys_enter(ftrace_syscall_enter);
3b8e4273 328 if (!ret) {
fb34a08c
JB
329 set_bit(num, enabled_enter_syscalls);
330 sys_refcount_enter++;
331 }
332 mutex_unlock(&syscall_trace_lock);
333 return ret;
ee08c6ec
FW
334}
335
bd1a5c84 336void unreg_event_syscall_enter(struct ftrace_event_call *call)
ee08c6ec 337{
fb34a08c 338 int num;
ee08c6ec 339
c252f657 340 num = ((struct syscall_metadata *)call->data)->syscall_nr;
57421dbb 341 if (num < 0 || num >= NR_syscalls)
fb34a08c
JB
342 return;
343 mutex_lock(&syscall_trace_lock);
344 sys_refcount_enter--;
345 clear_bit(num, enabled_enter_syscalls);
346 if (!sys_refcount_enter)
1c569f02 347 unregister_trace_sys_enter(ftrace_syscall_enter);
fb34a08c
JB
348 mutex_unlock(&syscall_trace_lock);
349}
ee08c6ec 350
bd1a5c84 351int reg_event_syscall_exit(struct ftrace_event_call *call)
ee08c6ec 352{
fb34a08c
JB
353 int ret = 0;
354 int num;
fb34a08c 355
c252f657 356 num = ((struct syscall_metadata *)call->data)->syscall_nr;
57421dbb 357 if (num < 0 || num >= NR_syscalls)
fb34a08c
JB
358 return -ENOSYS;
359 mutex_lock(&syscall_trace_lock);
360 if (!sys_refcount_exit)
1c569f02 361 ret = register_trace_sys_exit(ftrace_syscall_exit);
3b8e4273 362 if (!ret) {
fb34a08c
JB
363 set_bit(num, enabled_exit_syscalls);
364 sys_refcount_exit++;
ee08c6ec 365 }
fb34a08c
JB
366 mutex_unlock(&syscall_trace_lock);
367 return ret;
368}
ee08c6ec 369
bd1a5c84 370void unreg_event_syscall_exit(struct ftrace_event_call *call)
fb34a08c
JB
371{
372 int num;
ee08c6ec 373
c252f657 374 num = ((struct syscall_metadata *)call->data)->syscall_nr;
57421dbb 375 if (num < 0 || num >= NR_syscalls)
fb34a08c
JB
376 return;
377 mutex_lock(&syscall_trace_lock);
378 sys_refcount_exit--;
379 clear_bit(num, enabled_exit_syscalls);
380 if (!sys_refcount_exit)
1c569f02 381 unregister_trace_sys_exit(ftrace_syscall_exit);
fb34a08c 382 mutex_unlock(&syscall_trace_lock);
ee08c6ec 383}
fb34a08c 384
a1301da0
LJ
385int init_syscall_trace(struct ftrace_event_call *call)
386{
387 int id;
388
50307a45
LJ
389 if (set_syscall_print_fmt(call) < 0)
390 return -ENOMEM;
391
c7ef3a90
SR
392 id = trace_event_raw_init(call);
393
394 if (id < 0) {
50307a45 395 free_syscall_print_fmt(call);
c7ef3a90 396 return id;
50307a45 397 }
c7ef3a90
SR
398
399 return id;
a1301da0
LJ
400}
401
e7b8e675
MF
402unsigned long __init arch_syscall_addr(int nr)
403{
404 return (unsigned long)sys_call_table[nr];
405}
406
c44fc770
FW
407int __init init_ftrace_syscalls(void)
408{
409 struct syscall_metadata *meta;
410 unsigned long addr;
411 int i;
412
413 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
414 NR_syscalls, GFP_KERNEL);
415 if (!syscalls_metadata) {
416 WARN_ON(1);
417 return -ENOMEM;
418 }
419
420 for (i = 0; i < NR_syscalls; i++) {
421 addr = arch_syscall_addr(i);
422 meta = find_syscall_meta(addr);
c252f657
LJ
423 if (!meta)
424 continue;
425
426 meta->syscall_nr = i;
c44fc770
FW
427 syscalls_metadata[i] = meta;
428 }
429
430 return 0;
431}
432core_initcall(init_ftrace_syscalls);
433
07b139c8 434#ifdef CONFIG_PERF_EVENTS
19007a67 435
97d5a220
FW
436static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
437static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
438static int sys_perf_refcount_enter;
439static int sys_perf_refcount_exit;
f4b5ffcc 440
97d5a220 441static void perf_syscall_enter(struct pt_regs *regs, long id)
f4b5ffcc
JB
442{
443 struct syscall_metadata *sys_data;
20ab4425
FW
444 struct syscall_trace_enter *rec;
445 unsigned long flags;
f4b5ffcc 446 int syscall_nr;
4ed7c92d 447 int rctx;
19007a67 448 int size;
f4b5ffcc
JB
449
450 syscall_nr = syscall_get_nr(current, regs);
97d5a220 451 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
f4b5ffcc
JB
452 return;
453
454 sys_data = syscall_nr_to_meta(syscall_nr);
455 if (!sys_data)
456 return;
457
19007a67
FW
458 /* get the size after alignment with the u32 buffer size field */
459 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
460 size = ALIGN(size + sizeof(u32), sizeof(u64));
461 size -= sizeof(u32);
462
97d5a220
FW
463 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
464 "perf buffer not large enough"))
20ab4425
FW
465 return;
466
97d5a220 467 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
430ad5a6
XG
468 sys_data->enter_event->id, &rctx, &flags);
469 if (!rec)
470 return;
20ab4425 471
20ab4425
FW
472 rec->nr = syscall_nr;
473 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
474 (unsigned long *)&rec->args);
97d5a220 475 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
f4b5ffcc
JB
476}
477
97d5a220 478int perf_sysenter_enable(struct ftrace_event_call *call)
f4b5ffcc
JB
479{
480 int ret = 0;
481 int num;
482
3bbe84e9 483 num = ((struct syscall_metadata *)call->data)->syscall_nr;
f4b5ffcc
JB
484
485 mutex_lock(&syscall_trace_lock);
97d5a220
FW
486 if (!sys_perf_refcount_enter)
487 ret = register_trace_sys_enter(perf_syscall_enter);
f4b5ffcc
JB
488 if (ret) {
489 pr_info("event trace: Could not activate"
490 "syscall entry trace point");
491 } else {
97d5a220
FW
492 set_bit(num, enabled_perf_enter_syscalls);
493 sys_perf_refcount_enter++;
f4b5ffcc
JB
494 }
495 mutex_unlock(&syscall_trace_lock);
496 return ret;
497}
498
97d5a220 499void perf_sysenter_disable(struct ftrace_event_call *call)
f4b5ffcc
JB
500{
501 int num;
502
3bbe84e9 503 num = ((struct syscall_metadata *)call->data)->syscall_nr;
f4b5ffcc
JB
504
505 mutex_lock(&syscall_trace_lock);
97d5a220
FW
506 sys_perf_refcount_enter--;
507 clear_bit(num, enabled_perf_enter_syscalls);
508 if (!sys_perf_refcount_enter)
509 unregister_trace_sys_enter(perf_syscall_enter);
f4b5ffcc
JB
510 mutex_unlock(&syscall_trace_lock);
511}
512
97d5a220 513static void perf_syscall_exit(struct pt_regs *regs, long ret)
f4b5ffcc
JB
514{
515 struct syscall_metadata *sys_data;
20ab4425
FW
516 struct syscall_trace_exit *rec;
517 unsigned long flags;
f4b5ffcc 518 int syscall_nr;
4ed7c92d 519 int rctx;
20ab4425 520 int size;
f4b5ffcc
JB
521
522 syscall_nr = syscall_get_nr(current, regs);
97d5a220 523 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
f4b5ffcc
JB
524 return;
525
526 sys_data = syscall_nr_to_meta(syscall_nr);
527 if (!sys_data)
528 return;
529
20ab4425
FW
530 /* We can probably do that at build time */
531 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
532 size -= sizeof(u32);
19007a67 533
20ab4425
FW
534 /*
535 * Impossible, but be paranoid with the future
536 * How to put this check outside runtime?
537 */
97d5a220
FW
538 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
539 "exit event has grown above perf buffer size"))
20ab4425
FW
540 return;
541
97d5a220 542 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
430ad5a6
XG
543 sys_data->exit_event->id, &rctx, &flags);
544 if (!rec)
545 return;
20ab4425 546
20ab4425
FW
547 rec->nr = syscall_nr;
548 rec->ret = syscall_get_return_value(current, regs);
549
97d5a220 550 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
f4b5ffcc
JB
551}
552
97d5a220 553int perf_sysexit_enable(struct ftrace_event_call *call)
f4b5ffcc
JB
554{
555 int ret = 0;
556 int num;
557
3bbe84e9 558 num = ((struct syscall_metadata *)call->data)->syscall_nr;
f4b5ffcc
JB
559
560 mutex_lock(&syscall_trace_lock);
97d5a220
FW
561 if (!sys_perf_refcount_exit)
562 ret = register_trace_sys_exit(perf_syscall_exit);
f4b5ffcc
JB
563 if (ret) {
564 pr_info("event trace: Could not activate"
6574658b 565 "syscall exit trace point");
f4b5ffcc 566 } else {
97d5a220
FW
567 set_bit(num, enabled_perf_exit_syscalls);
568 sys_perf_refcount_exit++;
f4b5ffcc
JB
569 }
570 mutex_unlock(&syscall_trace_lock);
571 return ret;
572}
573
97d5a220 574void perf_sysexit_disable(struct ftrace_event_call *call)
f4b5ffcc
JB
575{
576 int num;
577
3bbe84e9 578 num = ((struct syscall_metadata *)call->data)->syscall_nr;
f4b5ffcc
JB
579
580 mutex_lock(&syscall_trace_lock);
97d5a220
FW
581 sys_perf_refcount_exit--;
582 clear_bit(num, enabled_perf_exit_syscalls);
583 if (!sys_perf_refcount_exit)
584 unregister_trace_sys_exit(perf_syscall_exit);
f4b5ffcc
JB
585 mutex_unlock(&syscall_trace_lock);
586}
587
07b139c8 588#endif /* CONFIG_PERF_EVENTS */
f4b5ffcc 589