提到signal与thread的关系,就得先提POSIX标准。POSIX标准决定了Linux为何将signal如此实现:
1 信号处理函数必须在多线程应用的所有线程之间共享,但是,每个线程要有自己的挂起信号掩码和阻塞信号掩码。
2 POSIX 函数kill/sigqueue必须面向所有的多线程应用而不是某个特殊的线程。
3 每个发给多线程应用的信号仅传送给1个线程,这个线程是由内核从不会阻塞该信号的线程中随意选出。
4 如果发送一个致命信号到多线程,那么内核将杀死该应用的所有线程,而不仅仅是接收信号的那个线程。
上面是POSIX标准,也就是提出来的要求,Linux要遵循POSIX标准,那Linux是怎么做到的呢?
到了此处,我们需要理清一些基本的概念:
-
struct task_struct {
-
-
pid_t pid;
-
pid_t tgid
-
.....
-
struct task_struct *group_leader; /* threadgroup leader */
-
......
-
struct list_head thread_group;
-
....
- }
有点绕是不是?对于一个多线程的程序,无论是哪个线程执行getpid,结果都是一样的,最终返回的同一个值 tgid。如果我们实现了gettid(很不幸的是glibc没有这个函数,所以我们要用syscall),我们就会发现,各个线程返回的值不同,此时,返回的值是内核task_struct中的pid。对于多线程应用/proc/pid/task可以看到的,就是线程的thread id,也就是task_struct中的pid。
我在我的博文Linux线程之线程 线程组 进程 轻量级进程(LWP)提到了这个问题。我不想多浪费笔墨赘述。
group leader字段,指向线程组的第一个线程。对于我们自己的程序而言,main函数所在的线程,也就是线程组的第一个线程,所以group leader就会他自己。一旦用pthread_create创建了线程,那么main所在的线程,还有创建出来的线程,隶属于同一个线程组,线程的group leader还是main函数所在的线程id。
thread_group,同一线程组的所有线程的队列。对于group_leader,这是一个队列头,对于同一线程组的其他线程,通过这个字段挂入队列。可以根据这个队列,遍历线程组的所有线程。
是时候看看内核代码了,下面的代码属于do_fork函数及copy_process函数的一些代码。
-
p->pid = pid_nr(pid);
-
p->tgid = p->pid;
-
if (clone_flags & CLONE_THREAD)//创建线程,tgid等于当前线程的
-
p->tgid = current->tgid;
-
-
-
p->group_leader = p;
-
INIT_LIST_HEAD(&p->thread_group);
-
-
if (clone_flags & CLONE_THREAD) { //线程处理部分,group_leader都是第一个线程。同时挂入队列
-
current->signal->nr_threads++;
-
atomic_inc(¤t->signal->live);
-
atomic_inc(¤t->signal->sigcnt);
-
p->group_leader = current->group_leader;
-
list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
- }
有了线程组的概念,我们就可以进一步解释signal相关的内容了。
-
/* signal handlers */
-
struct signal_struct *signal;
-
struct sighand_struct *sighand;
-
-
sigset_t blocked, real_blocked;
-
sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
- struct sigpending pending;
线程组里面的所有成员共享一个signal_struct类型结构,同一线程组的多线程的task_struct 中的signal指针都是指向同一个signal_struct。sighand成员变量也是如此,统一个线程组的多个线程指向同一个signalhand_struct结构。
-
static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
-
{
-
struct signal_struct *sig;
-
-
if (clone_flags & CLONE_THREAD) //线程,直接返回,表明同一线程组共享
-
return 0;
-
-
sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
-
tsk->signal = sig;
-
if (!sig)
-
return -ENOMEM;
-
-
sig->nr_threads = 1;
-
atomic_set(&sig->live, 1);
-
atomic_set(&sig->sigcnt, 1);
-
init_waitqueue_head(&sig->wait_chldexit);
-
sig->curr_target = tsk;
-
。。。。
-
}
-
-
-
static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
-
{
-
struct sighand_struct *sig;
-
-
if (clone_flags & CLONE_SIGHAND) {
- atomic_inc(¤t->sighand->count); //如果发现是线程,直接讲引用计数++,无需分配sighand_struct结构
- return 0;
-
}
-
sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
-
rcu_assign_pointer(tsk->sighand, sig);
-
if (!sig)
-
return -ENOMEM;
-
atomic_set(&sig->count, 1);
-
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
-
return 0;
- }
上一篇博文提到,signal->shared_pending 和pending两个挂起信号相关的数据结构,此处我们可以具体讲解了。signal是线程组共享的结构,自然下属的shared_pending也是线程组共享的。就像POSIX提到的,kill/sigqueue发送信号,发送的对象并不是线程组某个特定的线程,而是整个线程组。自然,如果kernel会将信号记录在全线程组共享的signal->shared_pending,表示,线程组收到信号X一枚。
有筒子说了,我就要给某个特定的线程发信号,有没有办法,内核怎么办?这是个好问题。
-
int tkill(int tid, int sig);
-
- int tgkill(int tgid, int tid, int sig)
- pending = group ? &t->signal->shared_pending : &t->pending;
-
static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
-
int group, int from_ancestor_ns)
-
{
-
struct sigpending *pending;
-
struct sigqueue *q;
-
int override_rlimit;
-
int ret = 0, result;
-
-
assert_spin_locked(&t->sighand->siglock);
-
-
result = TRACE_SIGNAL_IGNORED;
-
if (!prepare_signal(sig, t,
-
from_ancestor_ns || (info == SEND_SIG_FORCED)))
-
goto ret;
-
- pending = group ? &t->signal->shared_pending : &t->pending; // tkill用的自己的pending,
- // kill/sigqueue用的线程组共享的signal->shared_pending
-
/*
-
* Short-circuit ignored signals and support queuing
-
* exactly one non-rt signal, so that we can get more
-
* detailed information about the cause of the signal.
-
*/
-
result = TRACE_SIGNAL_ALREADY_PENDING;
-
if (legacy_queue(pending, sig))
-
goto ret;
-
-
result = TRACE_SIGNAL_DELIVERED;
-
/*
-
* fast-pathed signals for kernel-internal things like SIGSTOP
-
* or SIGKILL.
-
*/
-
if (info == SEND_SIG_FORCED)
-
goto out_set;
-
-
/*
-
* Real-time signals must be queued if sent by sigqueue, or
-
* some other real-time mechanism. It is implementation
-
* defined whether kill() does so. We attempt to do so, on
-
* the principle of least surprise, but since kill is not
-
* allowed to fail with EAGAIN when low on memory we just
-
* make sure at least one signal gets delivered and don't
-
* pass on the info struct.
-
*/
-
if (sig < SIGRTMIN)
-
override_rlimit = (is_si_special(info) || info->si_code >= 0);
-
else
-
override_rlimit = 0;
-
-
q = __sigqueue_alloc(sig, t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE,
-
override_rlimit);
-
if (q) {
-
list_add_tail(&q->list, &pending->list);
-
switch ((unsigned long) info) {
-
case (unsigned long) SEND_SIG_NOINFO:
-
q->info.si_signo = sig;
-
q->info.si_errno = 0;
-
q->info.si_code = SI_USER;
-
q->info.si_pid = task_tgid_nr_ns(current,
-
task_active_pid_ns(t));
-
q->info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
-
break;
-
case (unsigned long) SEND_SIG_PRIV:
-
q->info.si_signo = sig;
-
q->info.si_errno = 0;
-
q->info.si_code = SI_KERNEL;
-
q->info.si_pid = 0;
-
q->info.si_uid = 0;
-
break;
-
default:
-
copy_siginfo(&q->info, info);
-
if (from_ancestor_ns)
-
q->info.si_pid = 0;
-
break;
-
}
-
-
userns_fixup_signal_uid(&q->info, t);
-
-
} else if (!is_si_special(info)) {
-
if (sig >= SIGRTMIN && info->si_code != SI_USER) {
-
/*
-
* Queue overflow, abort. We may abort if the
-
* signal was rt and sent by user using something
-
* other than kill().
-
*/
-
result = TRACE_SIGNAL_OVERFLOW_FAIL;
-
ret = -EAGAIN;
-
goto ret;
-
} else {
-
/*
-
* This is a silent loss of information. We still
-
* send the signal, but the *info bits are lost.
-
*/
-
result = TRACE_SIGNAL_LOSE_INFO;
-
}
-
}
-
-
out_set:
-
signalfd_notify(t, sig);
-
sigaddset(&pending->signal, sig);//修改位图,表明该信号存在挂起信号。
-
complete_signal(sig, t, group);
-
ret:
-
trace_signal_generate(sig, info, t, group, result);
-
return ret;
- }
do_signal--->get_signal_to_deliver中,会选择信号,如果发现需要退出,会执行do_group_exit。这个名字顾名思义了,线程组退出。
-
void
-
do_group_exit(int exit_code)
-
{
-
struct signal_struct *sig = current->signal;
-
-
BUG_ON(exit_code & 0x80); /* core dumps don't get here */
-
-
if (signal_group_exit(sig))
-
exit_code = sig->group_exit_code;
-
else if (!thread_group_empty(current)) {
-
struct sighand_struct *const sighand = current->sighand;
-
spin_lock_irq(&sighand->siglock);
-
if (signal_group_exit(sig))
-
/* Another thread got here before we took the lock. */
-
exit_code = sig->group_exit_code;
-
else {
-
sig->group_exit_code = exit_code;
-
sig->flags = SIGNAL_GROUP_EXIT;
-
zap_other_threads(current);
-
}
-
spin_unlock_irq(&sighand->siglock);
-
}
-
-
do_exit(exit_code);
-
/* NOTREACHED */
- }
-
/*
-
* Nuke all other threads in the group.
-
*/
-
int zap_other_threads(struct task_struct *p)
-
{
-
struct task_struct *t = p;
-
int count = 0;
-
-
p->signal->group_stop_count = 0;
-
-
while_each_thread(p, t) {
-
task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
-
count++;
-
-
/* Don't bother with already dead threads */
-
if (t->exit_state)
-
continue;
-
sigaddset(&t->pending.signal, SIGKILL);
-
signal_wake_up(t, 1);
-
}
-
-
return count;
- }
讲完这些,需要讲block了。我第一篇就讲到,我们有时候需要阻塞某些信号。POSIX说了多线程中每个线程要有自己的阻塞信号。不必说,task_struct中的blocked就是阻塞信号位图。我们的glibc的sigprocmask函数,就是设置进程的blocked。
那些block的信号为何不能传递,内核是怎么做到的?
-
int next_signal(struct sigpending *pending, sigset_t *mask)
-
{
-
unsigned long i, *s, *m, x;
-
int sig = 0;
-
-
s = pending->signal.sig;
-
m = mask->sig;
-
-
/*
-
* Handle the first word specially: it contains the
-
* synchronous signals that need to be dequeued first.
-
*/
-
x = *s &~ *m;
-
if (x) {
-
if (x & SYNCHRONOUS_MASK)
-
x &= SYNCHRONOUS_MASK;
-
sig = ffz(~x) + 1;
-
return sig;
-
}
-
-
switch (_NSIG_WORDS) {
-
default:
-
for (i = 1; i < _NSIG_WORDS; ++i) {
-
x = *++s &~ *++m;
-
if (!x)
-
continue;
-
sig = ffz(~x) + i*_NSIG_BPW + 1;
-
break;
-
}
-
break;
-
-
case 2:
-
x = s[1] &~ m[1];
-
if (!x)
-
break;
-
sig = ffz(~x) + _NSIG_BPW + 1;
-
break;
-
-
case 1:
-
/* Nothing to do */
-
break;
-
}
-
-
return sig;
- }
-
#define SYNCHRONOUS_MASK \
-
(sigmask(SIGSEGV) | sigmask(SIGBUS) | sigmask(SIGILL) | \
- sigmask(SIGTRAP) | sigmask(SIGFPE) | sigmask(SIGSYS))
另外,dequeue很有意思,先去task_struct中的pending中取,取不到再去整个线程组共享的shered_pending位图去取。
-
int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
-
{
-
int signr;
-
-
/* We only dequeue private signals from ourselves, we don't let
-
* signalfd steal them
-
*/
-
signr = __dequeue_signal(&tsk->pending, mask, info);
-
if (!signr) {
-
signr = __dequeue_signal(&tsk->signal->shared_pending,
- mask, info);
。。。。
}
}
参考文献:
1 2 Linux kernel 3.8.0