-
一、概述
-
在Linux系统里,假设有两处代码(比如不同线程的两个函数F1和F2)都要获取两个锁(分别为L1和L2),如果F1持有L1后再去获取L2,而此时恰好由F2持有L2且它也正在尝试获取L1,那么此时就是处于死锁的状态,这是一个最简单的死锁例子,也即所谓的AB-BA死锁。
-
-
死锁导致的最终结果无需多说,关于如何避免死锁在教科书上也有提到,最简单直观的做法就是按顺序上锁,以破坏死锁的环形等待条件。但对于拥有成千上万个锁的整个系统来说,完全定义它们之间的顺序是非常困难的,所以一种更可行的办法就是尽量提前发现这其中潜在的死锁风险,而不是等到最后真正出现死锁时给用户带来切实的困惑。
-
已有很多工具用于发现可能的死锁风险,而本文介绍的调试/检测模块lockdep,即是属于这一类工具的一种。调试模块lockdep从2006年引入内核,经过实践验证,其对提前发现死锁起到了巨大的效果
-
-
官方文档有介绍调试模块lockdep的设计原理,这里按照我自己的理解描述一下。
-
1,lockdep操作的基本单元并非单个的锁实例,而是锁类(lock-class)。比如,struct inode结构体中的自旋锁i_lock字段就代表了这一类锁,而具体每个inode节点的锁只是该类锁中的一个实例。对所有这些实例,lockdep会把它们当作一个整体做处理,即把判断粒度放大,否则对可能有成千上万个的实例进行逐一判断,那处理难度可想而知,而且也没有必要。当然,在具体的处理中,可能会记录某些特性情况下的实例的部分相关信息,以便提供事后问题排查。
-
2,lockdep跟踪每个锁类的自身状态,也跟踪各个锁类之间的依赖关系,通过一系列的验证规则,以确保锁类状态和锁类之间的依赖总是正确的。另外,锁类一旦在初次使用时被注册,那么后续就会一直存在,所有它的具体实例都会关联到它。
-
-
lockdep是linux内核的一个调试模块,用来检查内核互斥机制尤其是自旋锁潜在的死锁问题。自旋锁由于是查询方式等待,不释放处理器,比一般的互斥机制更容易死锁,故引入lockdep检查以下几种情况可能的死锁。
-
1.同一个进程递归地加锁同一把锁.
-
2.一把锁既在中断(或中断下半部)使能的情况下执行过加锁操作,又在中断(或中断下半部)里执行过加锁操作。这样该锁有可能在锁定时由于中断发生又试图在同一处理器上加锁,加锁后导致依赖图产生成闭环,这是典型的死锁现象。
-
-
二、 lockdep验证规则
-
(1)单锁状态规则(Single-lock state rules)
-
1,一个软中断不安全(softirq-unsafe)的锁类同样也是硬中断不安全(hardirq-unsafe)的。
-
2,对于任何一个锁类,它不可能同时是hardirq-safe和hardirq-unsafe,也不可能同时是softirq-safe和softirq-unsafe,即这两对对应状态是互斥的。
-
上面这两条就是lockdep判断单锁是否会发生死锁的检测规则。
-
(2)多锁依赖规则(Multi-lock dependency rules)
-
1,同一个锁类不能被获取两次,因为这会导致递归死锁。
-
2,不能以不同的顺序获取两个锁类,即如此这样:
-
<L1> -> <L2>
-
<L2> -> <L1>
-
是不行的。因为这会非常容易的导致本文最先提到的AB-BA死锁。当然,下面这样的情况也不行:
-
<L1> -> <L3> -> <L4> -> <L2>
-
<L2> -> <L3> -> <L4> -> <L1>
-
即在中间插入了其它正常顺序的锁也能被lockdep检测出来。
-
3,同一个锁实例在任何两个锁类之间不能出现这样的情况:
-
<hardirq-safe> -> <hardirq-unsafe>
-
<softirq-safe> -> <softirq-unsafe>
-
这意味着,如果同一个锁实例,在某些地方是hardirq-safe(即采用spin_lock_irqsave(…)),而在某些地方又是hardirq-unsafe(即采用spin_lock(…)),那么就存在死锁的风险。这应该容易理解,比如在进程上下文中持有锁A,并且锁A是hardirq-unsafe,如果此时触发硬中断,而硬中断处理函数又要去获取锁A,那么就导致了死锁。
-
在锁类状态发生变化时,进行如下几个规则检测,判断是否存在潜在死锁。比较简单,就是判断hardirq-safe和hardirq-unsafe以及softirq-safe和softirq-unsafe是否发生了碰撞.
-
-
三、相关结构体
-
1.struct held_lock
-
在每个进程的task_struct结构体中定义了struct held_lock held_locks[MAX_LOCK_DEPTH]成员,用来记录锁。
-
struct held_lock {
-
215 /*
-
216 * One-way hash of the dependency chain up to this point. We
-
217 * hash the hashes step by step as the dependency chain grows.
-
218 *
-
219 * We use it for dependency-caching and we skip detection
-
220 * passes and dependency-updates if there is a cache-hit, so
-
221 * it is absolutely critical for 100% coverage of the validator
-
222 * to have a unique key value for every unique dependency path
-
223 * that can occur in the system, to make a unique hash value
-
224 * as likely as possible - hence the 64-bit width.
-
225 *
-
226 * The task struct holds the current hash value (initialized
-
227 * with zero), here we store the previous hash value:
-
228 */
-
u64 prev_chain_key;
-
unsigned long acquire_ip;
-
struct lockdep_map *instance;
-
struct lockdep_map *nest_lock;
-
#ifdef CONFIG_LOCK_STAT
-
u64 waittime_stamp;
-
u64 holdtime_stamp;
-
#endif
-
unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS;
-
238 /*
-
239 * The lock-stack is unified in that the lock chains of interrupt
-
240 * contexts nest ontop of process context chains, but we 'separate'
-
241 * the hashes by starting with 0 if we cross into an interrupt
-
242 * context, and we also keep do not add cross-context lock
-
243 * dependencies - the lock usage graph walking covers that area
-
244 * anyway, and we'd just unnecessarily increase the number of
-
245 * dependencies otherwise. [Note: hardirq and softirq contexts
-
246 * are separated from each other too.]
-
247 *
-
248 * The following field is used to detect when we cross into an
-
249 * interrupt context:
-
250 */
-
unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */
-
unsigned int trylock:1; /* 16 bits */
-
-
unsigned int read:2; /* see lock_acquire() comment */
-
unsigned int check:2; /* see lock_acquire() comment */
-
unsigned int hardirqs_off:1;
-
unsigned int references:11; /* 32 bits */
-
};
-
-
2.lockdep_map
-
各种锁结构体中如mutex、rawspinlock、semaphore内嵌该结构体,用于对锁检测。
-
struct lockdep_map {
-
struct lock_class_key *key;
-
struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
-
const char *name;
-
#ifdef CONFIG_LOCK_STAT
-
int cpu; //对结构体初始化时所在的cpu号
-
unsigned long ip;
-
#endif
-
};
-
-
3.lock_class
-
struct lock_class {
-
struct list_head hash_entry;
-
struct list_head lock_entry;
-
-
struct lockdep_subclass_key *key;
-
unsigned int subclass;
-
unsigned int dep_gen_id;
-
-
unsigned long usage_mask;
-
struct stack_trace usage_traces[XXX_LOCK_USAGE_STATES];
-
-
struct list_head locks_after, locks_before;
-
unsigned int version;
-
unsigned long ops;
-
-
const char *name;
-
int name_version;
-
-
#ifdef CONFIG_LOCK_STAT
-
unsigned long contention_point[LOCKSTAT_POINTS];
-
unsigned long contending_point[LOCKSTAT_POINTS];
-
#endif
-
};
-
-
4.lock_class_key
-
struct lock_class_key {
-
struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
-
};
-
-
5.lockdep_subclass_key
-
struct lockdep_subclass_key {
-
char __one_byte;
-
} __attribute__ ((__packed__));
-
-
-
三、lockdep初始化
-
建立两个散列表calsshash_table和chainhash_table,并初始化全局变量lockdep_initialized,标志已初始化完成。
-
static struct list_head classhash_table[CLASSHASH_SIZE];
-
static struct list_head chainhash_table[CHAINHASH_SIZE];
-
void lockdep_init(void)
-
{
-
int i;
-
-
if (lockdep_initialized)
-
return;
-
-
for (i = 0; i < CLASSHASH_SIZE; i++)
-
INIT_LIST_HEAD(classhash_table + i);
-
-
for (i = 0; i < CHAINHASH_SIZE; i++)
-
INIT_LIST_HEAD(chainhash_table + i);
-
-
lockdep_initialized = 1;
-
}
-
-
四、提供接口
-
1. lockdep_init_map
-
用于初始化锁内嵌的lockdep_map结构体
-
static inline void sema_init(struct semaphore *sem, int val)
-
{
-
static struct lock_class_key __key;
-
*sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val);
-
lockdep_init_map(&sem->lock.dep_map, "semaphore->lock", &__key, 0);
-
}
-
-
void lockdep_init_map(struct lockdep_map *lock, const char *name,struct lock_class_key *key, int subclass)
-
{
-
int i;
-
-
//arm上是空函数
-
kmemcheck_mark_initialized(lock, sizeof(*lock));
-
-
//初始化lock_class结构体的class_cache成员
-
for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
-
lock->class_cache[i] = NULL;
-
-
#ifdef CONFIG_LOCK_STAT
-
lock->cpu = raw_smp_processor_id();
-
#endif
-
//name不能为空
-
if (DEBUG_LOCKS_WARN_ON(!name)) {
-
lock->name = "NULL";
-
return;
-
}
-
//设置name
-
lock->name = name;
-
-
//key不能为空
-
if (DEBUG_LOCKS_WARN_ON(!key))
-
return;
-
-
//对key的地址进行健康检查,确保在内核.data地址空间,percpu空间或者module空间
-
if (!static_obj(key)) {
-
printk("BUG: key %p not in .data!\n", key);
-
DEBUG_LOCKS_WARN_ON(1);
-
return;
-
}
-
//设置key
-
lock->key = key;
-
-
if (unlikely(!debug_locks))
-
return;
-
-
//subclass不为0,将lockdep_map注册到类中
-
if (subclass)
-
register_lock_class(lock, subclass, 1);
-
}
-
-
2.
-
void lock_acquire(struct lockdep_map *lock, unsigned int subclass,int trylock, int read, int check,struct lockdep_map *nest_lock, unsigned long ip)
-
{
-
unsigned long flags;
-
-
if (unlikely(current->lockdep_recursion))
-
return;
-
-
raw_local_irq_save(flags);
-
check_flags(flags);
-
-
current->lockdep_recursion = 1;
-
//空函数
-
trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
-
__lock_acquire(lock, subclass, trylock, read, check,irqs_disabled_flags(flags), nest_lock, ip, 0);
-
current->lockdep_recursion = 0;
-
raw_local_irq_restore(flags);
-
}
-
-
2. debug_check_no_locks_freed
-
用于检测一个锁是不是被多次初始化,或者一块内存在释放时还持有锁。
-
void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
-
{
-
struct task_struct *curr = current;
-
struct held_lock *hlock;
-
unsigned long flags;
-
int i;
-
-
if (unlikely(!debug_locks))
-
return;
-
-
local_irq_save(flags);
-
//遍历当前进程所拥有的held_lock
-
for (i = 0; i < curr->lockdep_depth; i++) {
-
hlock = curr->held_locks + i;
-
-
//检查hlock是否在(mem_from,mem_from+mem_len)区间里,不在此区间则继续循环
-
if (not_in_range(mem_from, mem_len, hlock->instance,sizeof(*hlock->instance)))
-
continue;
-
-
print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock);
-
break;
-
}
-
local_irq_restore(flags);
-
}
-
-
static inline int not_in_range(const void* mem_from, unsigned long mem_len,
-
const void* lock_from, unsigned long lock_len)
-
{
-
return lock_from + lock_len <= mem_from || mem_from + mem_len <= lock_from;
-
}
-
-
static void print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
-
const void *mem_to, struct held_lock *hlock)
-
{
-
//如果关闭所有lock-debugging,则退出
-
if (!debug_locks_off())
-
return;
-
//
-
if (debug_locks_silent)
-
return;
-
-
printk("\n");
-
printk("=========================\n");
-
printk("[ BUG: held lock freed! ]\n");
-
print_kernel_ident();
-
printk("-------------------------\n");
-
printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
-
curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
-
print_lock(hlock);//打印锁信息
-
lockdep_print_held_locks(curr);
-
-
printk("\nstack backtrace:\n");
-
dump_stack();//打印堆栈信息
-
}
-
-
//Generic 'turn off all lock debugging' function:
-
int debug_locks_off(void)
-
{
-
if (__debug_locks_off()) {
-
if (!debug_locks_silent) {
-
console_verbose();
-
return 1;
-
}
-
}
-
return 0;
-
}
-
-
//debug_locks为1表示打开lock-debugging,为0表示关闭所有lock-debugging
-
static inline int __debug_locks_off(void)
-
{
-
return xchg(&debug_locks, 0);
-
}
-
-
static void print_kernel_ident(void)
-
{
-
printk("%s %.*s %s\n", init_utsname()->release,
-
(int)strcspn(init_utsname()->version, " "),
-
init_utsname()->version,
-
print_tainted());
-
}
-
-
static void print_lock(struct held_lock *hlock)
-
{
-
print_lock_name(hlock_class(hlock));
-
printk(", at: ");
-
print_ip_sym(hlock->acquire_ip);
-
}
-
-
static inline struct lock_class *hlock_class(struct held_lock *hlock)
-
{
-
if (!hlock->class_idx) {
-
DEBUG_LOCKS_WARN_ON(1);
-
return NULL;
-
}
-
return lock_classes + hlock->class_idx - 1;
-
}
-
-
static void print_lock_name(struct lock_class *class)
-
{
-
529 char usage[LOCK_USAGE_CHARS];
-
530
-
531 get_usage_chars(class, usage);
-
532
-
533 printk(" (");
-
534 __print_lock_name(class);
-
535 printk("){%s}", usage);
-
}
-
-
static void __print_lock_name(struct lock_class *class)
-
{
-
511 char str[KSYM_NAME_LEN];
-
512 const char *name;
-
513
-
514 name = class->name;
-
515 if (!name) {
-
516 name = __get_key_name(class->key, str);
-
517 printk("%s", name);
-
518 } else {
-
519 printk("%s", name);
-
520 if (class->name_version > 1)
-
521 printk("#%d", class->name_version);
-
522 if (class->subclass)
-
523 printk("/%d", class->subclass);
-
524 }
-
}
-
-
static inline void print_ip_sym(unsigned long ip)
-
{
-
printk("[<%p>] %pS\n", (void *) ip, (void *) ip);
-
}
-
-
static void lockdep_print_held_locks(struct task_struct *curr)
-
{
-
int i, depth = curr->lockdep_depth;
-
-
if (!depth) {
-
printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
-
return;
-
}
-
printk("%d lock%s held by %s/%d:\n",
-
depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
-
-
for (i = 0; i < depth; i++) {
-
printk(" #%d: ", i);
-
print_lock(curr->held_locks + i);
-
}
-
}
-
-
2.
-
-
- 参考http://www.lenky.info/archives/2013/04/2253