我们用过pthread_create接口,也用过pthread_self接口,请看manual中的声明:
- 
			#include <pthread.h>
 
- 
			
 
- 
			       int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
 
- 
			                          void *(*start_routine) (void *), void *arg);
 
- pthread_t pthread_self(void)
- 
			#include <stdio.h>
 
- 
			#include <pthread.h>
 
- 
			#include <sys/syscall.h>
 
- 
			#include <assert.h>
 
- 
			
 
- 
			#define gettid() syscall(__NR_gettid)
 
- 
			
 
- 
			pthread_key_t   key;
 
- 
			__thread int count = 2222;
 
- 
			__thread unsigned long long  count2 ;
 
- 
			static  __thread int count3;
 
- 
			void echomsg(char* string)
 {
 printf("destructor excuted in thread %x,address (%p) param=%s\n",pthread_self(),string,string);
 free(string);
 }
 
- 
			
 
- 
			void * child1(void *arg)
 
- 
			{
 
- 
			    int b;
 
- 
			    int tid=pthread_self();
 
- 
			
 
- 
			    printf("I am the child1  pthread_self return %p gettid return %d\n",tid,gettid());
 
- 
			
 
- 
			    char* key_content = malloc(8);
 
- 
			    if(key_content != NULL)
 
- 
			    {
 
- 
			        strcpy(key_content,"ACACACA");
 
- 
			    }
 
- 
			    pthread_setspecific(key,(void *)key_content);
 
- 
			    
 
- 
			    count=666666;
 
- 
			    count2=1023;
 
- 
			    count3=2048;
 
- 
			    printf("I am child1 , tid=%x ,count (%p) = %8d,count2(%p) = %6llu,count3(%p) = %6d\n",tid,&count,count,&count2,count2,&count3,count3);
 
- 
			    asm volatile("movl %%gs:0, %0;"
 
- 
			            :"=r"(b) /* output */ 
 
- 
			            );
 
- 
			
 
- 
			    printf("I am child1 , GS address %p\n",b);
 
- 
			    
 
- 
			    sleep(2);
 
- 
			    printf("thread %x returns %x\n",tid,pthread_getspecific(key));
 
- 
			    sleep(50);
 
- 
			}
 
- 
			
 
- 
			void * child2(void *arg)
 
- 
			{
 
- 
			    int b;
 
- 
			    int tid=pthread_self();
 
- 
			
 
- 
			    printf("I am the child2  pthread_self return %p gettid return %d\n",tid,gettid());
 
- 
			
 
- 
			    char* key_content = malloc(8);
 
- 
			    if(key_content != NULL)
 
- 
			    {
 
- 
			        strcpy(key_content,"ABCDEFG");
 
- 
			    }
 
- 
			    pthread_setspecific(key,(void *)key_content);
 
- 
			    count=88888888;
 
- 
			    count2=1024;
 
- 
			    count3=2047;
 
- 
			    printf("I am child2 , tid=%x ,count (%p) = %8d,count2(%p) = %6llu,count3(%p) = %6d\n",tid,&count,count,&count2,count2,&count3,count3);
 
- 
			    
 
- 
			    
 
- 
			    asm volatile("movl %%gs:0, %0;"
 
- 
			            :"=r"(b) /* output */ 
 
- 
			            );
 
- 
			
 
- 
			    printf("I am child2 , GS address %p\n",b);
 
- 
			    
 
- 
			    sleep(1);
 
- 
			    printf("thread %x returns %x\n",tid,pthread_getspecific(key));
 
- 
			    sleep(50);
 
- 
			}
 
- 
			
 
- 
			
 
- 
			int main(void)
 
- 
			{
 
- 
			    int b;
 
- 
			    pthread_t  tid1,tid2;
 
- 
			    printf("hello\n");
 
- 
			
 
- 
			    
 
- 
			    pthread_key_create(&key,echomsg);
 
- 
			
 
- 
			    asm volatile("movl %%gs:0, %0;"
 
- 
			            :"=r"(b) /* output */ 
 
- 
			            );
 
- 
			
 
- 
			    printf("I am the main , GS address %x\n",b);
 
- 
			    
 
- 
			    pthread_create(&tid1,NULL,child1,NULL);
 
- 
			    pthread_create(&tid2,NULL,child2,NULL);
 
- 
			
 
- 
			    printf("pthread_create tid1 = %p\n",tid1);
 
- 
			    printf("pthread_create tid2 = %p\n",tid2);
 
- 
			
 
- 
			    sleep(60);
 
- 
			    pthread_key_delete(key);
 
- 
			    printf("main thread exit\n");
 
- 
			    return 0;
 
- }

我们惊奇的发现对于child1
1 pthread_create第一参数返回pthread_t类型的值为0xb7530b40
2 pthread_self返回的pthread_t类型的值为0xb7530b40
3 GS指示的段(GDT的第六个段)存储的内容还是 0xb7530b40
对于child2也有类似的情况,三者返回同一个值(每次执行,值都不一样,这是栈的随机化造成的,不必困扰,这三个值相同是我表达的重点),what is the magic number mean?只能求助glibc。幸好我们有了源码。首先从pthread_create搞起。
代码在nptl目录下的pthread_create.c下面,比较有意思的是居然没有一个函数叫pthread_create。
- 
			__pthread_create_2_0
 
- 
			__pthread_create_2_1
 
- compat_symbol (libpthread, __pthread_create_2_0, pthread_create, GLIBC_2_0)
- 
			int
 
- 
			__pthread_create_2_1 (newthread, attr, start_routine, arg)
 
- 
			    pthread_t *newthread;
 
- 
			    const pthread_attr_t *attr;
 
- 
			    void *(*start_routine) (void *);
 
- 
			    void *arg
 
- 
			{
 
- 
			     ...
 
- 
			    struct pthread *pd = NULL;
 
- 
			    int err = ALLOCATE_STACK (iattr, &pd); 
 
- 
			     ....
 
- 
			    /* Pass the descriptor to the caller. */
 
- 
			    *newthread = (pthread_t) pd;
 
- 
			
 
- 
			    /* Start the thread. */
 
- 
			    return create_thread (pd, iattr, STACK_VARIABLES_ARGS);
 
- 
			
 
- 
			
 
- }
ALLOCATE_STACK,我的智商不高,我也看出来它老人家用处是给线程分配栈的。比较下图,ALLOCATE_STACK之前和之后,虚拟地址空间变化。最主要的变化是多了8200KB的一块内存空间。这块区域是在allocate_stack(ALLOCATE_STACK是个宏,本质是allocate_stack函数)函数里面分配的。


在分析这个allocate_stack之前,需要指出的一点是还没有调用clone系统调用,也就是还没到kernel呢,更没有分配task_struct等等。好,开始分析:
- 
			    struct pthread *pd;
 
- 
			    size_t size;
 
- 
			    size_t pagesize_m1 = __getpagesize () - 1;
 
- 
			    void *stacktop;
 
- 
			
 
- 
			    assert (attr != NULL);
 
- 
			    assert (powerof2 (pagesize_m1 + 1));
 
- 
			    assert (TCB_ALIGNMENT >= STACK_ALIGN);
 
- 
			
 
- 
			    /* Get the stack size from the attribute if it is set. Otherwise we
 
- 
			       use the default we determined at start time. */
 
- 
			    size = attr->stacksize ?: __default_stacksize;//此处决定了size是8M,如果user指定了stack_size此处会是用户指定的值。
 
- 
			
 
- 
			    /* Get memory for the stack. */
 
- if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
- {
- ...
- }
- else
- {
- ...
- }
至于代码中的if/else,如果用户指定了stack的基址(pthread_attr_setstack)走入if分支,否则走入else分支,我们是普通青年,轻易不会干pthread_attr_setstack这么妖娆的事情,所以我们走入else分支。
- 
			        pd = get_cached_stack (&size, &mem);
 
- 
			        if (pd == NULL)
 
- 
			        {
 
- 
			            /* To avoid aliasing effects on a larger scale than pages we
 
- 
			               adjust the allocated stack size if necessary. This way
 
- 
			               allocations directly following each other will not have
 
- 
			               aliasing problems. */
 
- 
			#if MULTI_PAGE_ALIASING != 0
 
- 
			            if ((size % MULTI_PAGE_ALIASING) == 0)
 
- 
			                size += pagesize_m1 + 1;
 
- 
			#endif
 
- 
			
 
- 
			            mem = mmap (NULL, size, prot,
 
- 
			                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
 
- 
			
 
- 
			            if (__builtin_expect (mem == MAP_FAILED, 0))
 
- return errno
这个例子告诉我们,线程退出之后,它占据的堆栈空间还在,如果这种属性不是我们期望的,NPTL提供了两个方法:首当其冲的是pthread_join。简单说叫起线程的这个主LWP可以调用pthread_join为线程收尸,销毁线程的资源。主LWP用pthread_create创建了线程,然后pthread_join为退出的线程销毁资源,有种白发人送黑发人的感觉。这种方法不好的地方在于阻塞,主LWP会堵在此处,直到线程推出。那第二个方法就是pthread_detach(pthread_self()),意思线程自己会把后事交代清楚,线程退出前,自会自我了断,该释放的资源都会释放。
我们是初次创建线程,get_cached_stack自然是无功而返。但是MULTI_PAGE_ALIASING=64KB,我们的8M是64KB的整数倍,所以size=8M+4KB=8196KB。然后我们可以调用mmap了。
- 
			#if TLS_TCB_AT_TP
 
- pd = (struct pthread *) ((char *) mem + size - coloring) - 1; //我们走这个分支,而pd将填入
- //pthread_create第一个参数指针对应的地址。
- #elif TLS_DTV_AT_TP
- 
			            pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
 
- 
			                            - __static_tls_size)
 
- 
			                        & ~__static_tls_align_m1)
 
- 
			                    - TLS_PRE_TCB_SIZE);
 
- 
			#endif
 
- 
			
 
- 
			            /* Remember the stack-related values. */
 
- 
			            pd->stackblock = mem;
 
- 
			            pd->stackblock_size = size;
 
- 
			
 
- 
			            /* We allocated the first block thread-specific data array.
 
- 
			               This address will not change for the lifetime of this
 
- 
			               descriptor. */
 
- 
			            pd->specific[0] = pd->specific_1stblock;
 
- 
			
 
- 
			            /* This is at least the second thread. */
 
- pd->header.multiple_threads = 1

接下来的内容就是这几天折磨的哥死去活来的内容了,TLS,传说中的thread local storage。坦率讲,现在也不懂:
- 
			/* Allocate the DTV for this thread. */
 
- 
			            if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
 
- 
			            {
 
- 
			                /* Something went wrong. */
 
- 
			                assert (errno == ENOMEM);
 
- 
			
 
- 
			                /* Free the stack memory we just allocated. */
 
- 
			                (void) munmap (mem, size);
 
- 
			
 
- 
			                return errno;
 
- }
- 
			__thread int count = 2222;
 
- 
			__thread unsigned long long count2 ;
 
- static __thread int count3
- 
			int pthread_key_create(pthread_key_t *key,
 
- 
			void (*destructor) (void *));
 
- 
			int pthread_setspecific(pthread_key_t key, const void *value);
 
- 
			int pthread_getspecific(pthread_key_t key);
 
- int pthread_key_delete(pthread_key_t *key)
- #define PTHREAD_KEY_MAX 1024
首先pthread_key_create表示我要占个坑,最多是0~1023。到了真正调用pthread_setspecific的时候,是怎么实现的呢?这时候需要看下struct pthread。我们知道,pthread_self返回的就是struct pthread的地址。OK我们看下pthread的定义:
- 
			struct pthread
 
- {
- 
			  union
 {
 #if !TLS_DTV_AT_TP
 /* This overlaps the TCB as used for TLS without threads (see tls.h). */
 tcbhead_t header; // tcb mean thread control blcok
 #else
 struct
 {
 int multiple_threads;
 int gscope_flag;
 # ifndef __ASSUME_PRIVATE_FUTEX
 int private_futex;
 # endif
 } header;
 #endif
 void *__padding[24];
 };
 
 
- 
			   ....
 
- 
			  struct pthread_key_data
 
- 
			  {
 
- ...
- uintptr_t seq;
- 
			    void *data;
 
- 
			  } specific_1stblock[PTHREAD_KEY_2NDLEVEL_SIZE]; //PTHREAD_KEY_2NDLEVEL_SIZE=32
 
- 
			
 
- 
			  struct pthread_key_data *specific[PTHREAD_KEY_1STLEVEL_SIZE];//PTHREAD_KEY_1STLEVEL_SIZE=32
 
- 
			  ...
 
- 
			  void *(*start_routine) (void *);
 void *arg;
 ...
 
- 
			  void *stackblock; //mmap分配的8192+4=8196KB的起始地址
 
- size_t stackblock_size; //8196KB
- size_t guardsize;
- 
			  size_t reported_guardsize;
 
- 
			
 
- ...
- 
			  struct priority_protection_data *tpp;
 
- }
allocate_stack 函数:
/* The first TSD block is included in the TCB. */
pd->specific[0] = pd->specific_1stblock;
好,软柿子终于捏完了,该捏核桃了。核桃就是前面提到的TLS,接口是__thread关键字。这种方法就自然多了,只要声明是__thread,后面引用变量就像引用普通变量。线程是如何做到的呢?我们下一篇再讨论。
还没讨论的问题有GS寄存器是干啥的? 进程切换(或者LWP切换更准确),发生了些什么?TLS到底是如何实现的? 话说TLS的确是快硬核桃,我多次试图搞懂多次都失败,今天是不行了,要陪老婆散步去了。
最后给出一个线程栈的图

两篇参考文献都非常的好,其中第二篇博客给我的启发最大,正是这篇博文让我鼓起勇气再次探索TLS,然我这几天痛的死去活来。
 linux线程之线程栈.pdf
linux线程之线程栈.pdf参考文献
1 2 关于Linux线程的线程栈以及TLS
