在调试进程时dmesg碰到如下信息
|
====================== == Upstream sync On == ====================== eth3: link up, full duplex, speed 1 Gbps INFO: task PonMgr:510 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. PonMgr D c03e08cc 0 510 509 0x00000000 Backtrace: [<c03e0514>] (__schedule+0x0/0x494) from [<c03e0ad0>] (schedule+0x84/0x8c) [<c03e0a4c>] (schedule+0x0/0x8c) from [<c03dee44>] (schedule_timeout+0x20/0x1e4) [<c03dee24>] (schedule_timeout+0x0/0x1e4) from [<c03dfe98>] (__down+0x80/0xb4) r7:00000000 r6:c7b67bc0 r5:7fffffff r4:bf038044 [<c03dfe18>] (__down+0x0/0xb4) from [<c012e190>] (down+0x34/0x48) r8:00000000 r7:00000034 r6:be1ffa40 r5:a0000013 r4:bf038044 [<c012e15c>] (down+0x0/0x48) from [<bf0229a8>] (gpon_evt_read+0x1c/0xc8 [gpon_drv]) r5:be1ffa40 r4:00000034 [<bf02298c>] (gpon_evt_read+0x0/0xc8 [gpon_drv]) from [<c0195e80>] (vfs_read+0xb8/0x134) r7:c5c05f70 r6:be1ffa40 r5:c6b84bc0 r4:00000034 [<c0195dc8>] (vfs_read+0x0/0x134) from [<c0195f40>] (sys_read+0x44/0x70) r8:00000000 r7:00000003 r6:00000034 r5:be1ffa40 r4:c6b84bc0 [<c0195efc>] (sys_read+0x0/0x70) from [<c000d7a0>] (ret_fast_syscall+0x0/0x30) r9:c5c04000 r8:c000d948 r6:00000034 r5:0000000c r4:be1ffa40 |
开始认为是代码存在问题,导致Backtrace;后搜索代码发现这是内核的一个保护机制;出现这个是因为编写的read函数一直在阻塞,没有接收到有效信息;
内核线程启动如下:
|
static int __init hung_task_init(void) { atomic_notifier_chain_register(&panic_notifier_list, &panic_block); watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); return 0; } module_init(hung_task_init); |
|
/* * kthread which checks for tasks stuck in D state */ static int watchdog(void *dummy) { set_user_nice(current, 0); for ( ; ; ) { unsigned long timeout = sysctl_hung_task_timeout_secs; while (schedule_timeout_interruptible(timeout_jiffies(timeout))) timeout = sysctl_hung_task_timeout_secs; check_hung_uninterruptible_tasks(timeout); } return 0; } |
.config文件中配置sysctl_hung_task_timeout_secs为120,那么这个机制没隔2分钟检测一次;依次检测每一个进程控制块;当进程处于TASK_UNINTERRUPTIBLE状态时,调用check_hung_task;
|
do_each_thread(g, t) { if (!max_count--) goto unlock; if (!--batch_count) { batch_count = HUNG_TASK_BATCHING; if (!rcu_lock_break(g, t)) goto unlock; } /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ if (t->state == TASK_UNINTERRUPTIBLE) check_hung_task(t, timeout); } while_each_thread(g, t); |
接下来判断该进程是否试图进行过调度:
|
static void check_hung_task(struct task_struct *t, unsigned long timeout) |
|
unsigned long switch_count = t->nvcsw + t->nivcsw; /* * Ensure the task is not frozen. * Also, skip vfork and any other user process that freezer should skip. */ if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP))) return; /* * When a freshly created task is scheduled once, changes its state to * TASK_UNINTERRUPTIBLE without having ever been switched out once, it * musn't be checked. */ if (unlikely(!switch_count)) return; if (switch_count != t->last_switch_count) { t->last_switch_count = switch_count; return; } if (!sysctl_hung_task_warnings) return; sysctl_hung_task_warnings--; |
首先搞清nvcsw,nivcsw两个变量的含义:
nvcsw: Context voluntary switch counter
nivcsw: Context involuntary switch counter
很明显这是上下文切换 自愿/非自愿的次数统计;
last_switch_count的意义是:hung task detection, 也就是只供khungtaskd检测TASK_UNINTERRUPTIBLE进程使用;
t->nvcsw + t->nivcsw==last_switch_count 就表示这两分钟内该进程没有被调度过,正是要找的进程,
这里last_switch_count==10,也就是 该机制最多只打印10次信息有效,10次后的检测结果就看不到了;
接下来打印堆栈信息
|
printk(KERN_ERR "INFO: task %s:%d blocked for more than " "%ld seconds.\n", t->comm, t->pid, timeout); printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" " disables this message.\n"); sched_show_task(t); debug_show_held_locks(t); touch_nmi_watchdog(); if (sysctl_hung_task_panic) panic("hung_task: blocked tasks"); |
sysctl_hung_task_panic=CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
.config中没有定义CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE, 所以内核不会panic;