在start_kernel的最后会调用rest_init-->kenel_thread 创建出一个进程init
init -->execve("/bin/sh", argv,env) 的最后在返回时,会将内核态的各个段寄存器设置为用户态的各个段寄存器
最后调用iret就从内核态切换到了用户态
二.代码分析
2.1 内核空间:在init/main.c中init进程的创建与执行
-
static void rest_init(void)
-
{
- //kernel_thread最终会do_fork创建了一个进程,其pid=1
- //创建完进程后系统调用返回,然后就会执行init函数
-
kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
-
unlock_kernel();
-
current->need_resched = 1;
-
cpu_idle();
- }
-
int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -
{
-
long retval, d0;
- //fn=init=0xc0105044,arg=NULL, flags=0x10e00
-
__asm__ __volatile__(
-
“movl _NR_clone, %eax" //将系统调用号eax设为NR_clone
-
"movl flags|CLONE_VM, %ebx" //将flags放在ebx中
-
"movl %%esp,%%esi"
-
"int $0x80" //第1个系统调用
-
"cmpl %%esp,%%esi" //fork之后子进程会获得新esp,但父进程还是用它原先的esp,根据这一点可以区分父子进程
-
"je 1f " //相等则为父进程,直接跳出 parent-->jump
- "movl %4,%%eax" //子进程:将args压栈
-
"pushl %%eax" //子进程:
-
"call *%5" //子进程:调用fn=init函数
-
"movl %3,%0" //子进程执行完fn=init函数后调用exit
-
"int $0x80" //子进程:第2个系统调用exit
-
"1:\t"
-
:"=&a" (retval), "=&S" (d0)
-
:"0" (__NR_clone), "i" (__NR_exit),
-
"r" (arg), "r" (fn),
-
"b" (flags | CLONE_VM)
-
: "memory");
-
return retval;
- }
-
static int init(void * unused)
-
{
- ...... //不关心
-
if (execute_command) //这个execute_command=/bin/sh
-
execve(execute_command,argv_init,envp_init);
-
execve("/sbin/init",argv_init,envp_init); //后面这个不会执行
-
execve("/etc/init",argv_init,envp_init);
-
execve("/bin/init",argv_init,envp_init);
-
execve("/bin/sh",argv_init,envp_init);
-
panic("No init found. Try passing init= option to kernel.");
- }
在arch/i386/kernel/entry.S中L194
-
ENTRY(system_call)
-
pushl %eax
-
SAVE_ALL //2.2.1.1保存所有的寄存器
-
GET_CURRENT(%ebx) //获取current指针
-
testb $0x02,tsk_ptrace(%ebx) //检查系统调用号是不是越界
-
jne tracesys
-
cmpl $(NR_syscalls),%eax
-
jae badsys
-
call *SYMBOL_NAME(sys_call_table)(,%eax,4) //这儿是调用sys_execve
-
movl %eax,EAX(%esp) //将返回值保存在esp+6th
-
ENTRY(ret_from_sys_call)
-
cli # need_resched and signals atomic test
-
cmpl $0,need_resched(%ebx)
-
jne reschedule
-
cmpl $0,sigpending(%ebx)
-
jne signal_return
-
restore_all:
- RESTORE_ALL
-
#define SAVE_ALL \
-
cld; \
-
pushl %es; \
-
pushl %ds; \
-
pushl %eax; \
-
pushl %ebp; \
-
pushl %edi; \
-
pushl %esi; \
-
pushl %edx; \
-
pushl %ecx; \
-
pushl %ebx; \
-
movl $(__KERNEL_DS),%edx; \
-
movl %edx,%ds; \
- movl %edx,%es
-
(gdb) info r
-
eax 0xb 11
-
ecx 0xc03ac1c0
-
edx 0xc03ac200
-
ebx 0xc0447fd3
-
esp 0xf7deffa4 //这个esp就是下面的struct pt_regs regs
-
ebp 0xe000
-
esi 0xc03c7fc4
-
edi 0xc01051c8
-
eip 0xc010928b 0xc010928b <system_call+11>
-
eflags 0x286 [ PF SF IF ]
-
cs 0x10 16
-
ss 0x18 24
-
ds 0x18 24
-
es 0x18 24
-
fs 0x18 24
-
gs 0x18 24
-
-
(gdb) x /32wx 0xf7deffa4
-
0xf7deffa4: 0xc0447fd3 0xc03ac1c0 0xc03ac200 0xc03c7fc4
-
0xf7deffb4: 0xc01051c8 0x0000e000 0x0000000b 0x00000018
-
0xf7deffc4: 0x00000018 0x0000000b 0xc010558a 0x00000010
-
0xf7deffd4: 0x00000286 0x00000010 0x00010f00 0xc0105235
-
0xf7deffe4: 0xc0447fd3 0xc03ac1c0 0xc03ac200 0xc0105c4b
-
0xf7defff4: 0x00000000 0x00000078 0xc0124774 0x00000000
-
0xf7df0004: 0x00000000 0x00000000 0x00000000 0x00000000
- 0xf7df0014: 0x00000000 0x00000000 0x00000000 0x00000000
- 下面这个是进入do_execve之后读出来的寄存器
-
(gdb) p /x regs
$6 = {ebx=0xc0447fd3, ecx=0xc03ac1c0, edx=0xc03ac200, esi=0xc03c7fc4, - edi=0xc01051c8, ebp=0xe000, eax=0xb, xds=0x18,
- xes=0x18, orig_eax=0xb, eip=0xc010558a, xcs = 0x10,
- eflags=0x286, esp=0x10, xss=0x10f00}
-
asmlinkage int sys_execve(struct pt_regs regs)
-
{
-
int error;
- char * filename;
- (gdb) p ?s
- $5 = (struct pt_regs *) 0xf7deffa4 //有没有发现这个数值似曾相识?对,这个就是esp
- (gdb) p /x regs
- $6 = {ebx=0xc0447fd3, ecx=0xc03ac1c0, edx=0xc03ac200, esi=0xc03c7fc4, edi=0xc01051c8, ebp=0xe000, eax=0xb,
- xds=0x18, xes=0x18, orig_eax=0xb, eip=0xc010558a, xcs = 0x10, eflags=0x286, esp=0x10, xss=0x10f00}
-
filename = getname((char *) regs.ebx);
-
error = PTR_ERR(filename);
-
if (IS_ERR(filename))
-
goto out;
-
error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, ?s);
-
if (error == 0)
-
current->ptrace &= ~PT_DTRACE;
-
(gdb) p /x regs -->执行完do_execve之后再看堆栈的情况,特别注意段寄存器的变化
$1 = {ebx = 0x0, ecx = 0x0, edx = 0x0, esi = 0x0, edi = 0x0, ebp = 0x0, eax = 0x0, xds = 0x2b, xes = 0x2b,
orig_eax = 0xb, eip = 0x40000be0, xcs = 0x23, eflags = 0x286, esp = 0xbfffff40, xss = 0x2b }
-
putname(filename);
-
out:
-
return error;
- }
regs指向己压入栈的所有寄存器,改变了regs的值就是改变了己压栈的寄存器的值,等pop时会真正改变寄存器的值
2.4 再次回到system_call
-
ENTRY(system_call)
-
pushl %eax # save orig_eax
-
SAVE_ALL
-
GET_CURRENT(%ebx)
-
testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS
-
jne tracesys
-
cmpl $(NR_syscalls),%eax
-
jae badsys
-
call *SYMBOL_NAME(sys_call_table)(,%eax,4) -->执行do_execve
-
movl %eax,EAX(%esp) -->do_execve执行完成后返回
-
ENTRY(ret_from_sys_call)
-
cli # need_resched and signals atomic test
-
cmpl $0,need_resched(%ebx)
-
jne reschedule
-
cmpl $0,sigpending(%ebx)
-
jne signal_return
-
restore_all:
- RESTORE_ALL -->到这儿开始恢复寄存器的值
但是就是在do_execve中改变了压栈中的寄存器,所以这个RESTORE_ALL就不是一般的返回了。
2.5关于RESTORE_ALL
-
#define RESTORE_ALL \
-
popl %ebx; \ //将SAVE_ALL中压栈的寄存器恢复
-
popl %ecx; \
-
popl %edx; \
-
popl %esi; \
-
popl %edi; \
-
popl %ebp; \
-
popl %eax; \
-
1: popl %ds; \
-
2: popl %es; \
-
addl $4,%esp; \ //system_call一开始压栈的eax直接加4丢掉了
-
3: iret; \ //从内核空间返回用户空间,iret的出栈是有顺序的
-
.section .fixup,"ax"; \
-
4: movl $0,(%esp); \
-
jmp 1b; \
-
5: movl $0,(%esp); \
-
jmp 2b; \
-
6: pushl %ss; \
-
popl %ds; \
-
pushl %ss; \
-
popl %es; \
-
pushl $11; \
-
call do_exit; \
-
.previous; \
-
.section __ex_table,"a";\
-
.align 4; \
-
.long 1b,4b; \
-
.long 2b,5b; \
-
.long 3b,6b; \
- .previous
intel手册中IRET只会将EIP,CS,EFLAGS弹出,但是当有特权级的切换时,SS:ESP也被弹出
iret之后的出栈顺序是固定的,如下:
EIP --> CS --> EFLAGS --> ESP --> SS
b.执行iret之前,寄存器与栈中的值如下所示
-
(gdb) info r
-
eax 0x0 0
-
ecx 0x0 0
-
edx 0x0 0
-
ebx 0x0 0
-
esp 0xf7deffcc 0xf7deffcc
-
ebp 0x0 0x0
-
esi 0x0 0
-
edi 0x0 0
-
eip 0xc01092dd 0xc01092dd <restore_all+12>
-
eflags 0x86 [ PF SF ]
-
cs 0x10 16
-
ss 0x18 24
-
ds 0x2b 43
-
es 0x2b 43
-
fs 0x0 0
-
gs 0x0 0
-
(gdb) x /32wx 0xf7deffcc
-
0xf7deffcc: 0x40000be0 0x00000023 0x00000286 0xbfffff40
-
EIP CS EFLAGS ESP
-
0xf7deffdc: 0x0000002b 0xc0105235 0xc0447fd3 0xc03ac1c0
- SS
-
1: x/i $pc
-
=> 0x40000be0: <error: Cannot access memory at address 0x40000be0> //pc切换到了0x40000be0
-
(gdb) info r
-
eax 0x0 0
-
ecx 0x0 0
-
edx 0x0 0
-
ebx 0x0 0
-
esp 0xbfffff40 //有背景颜色的说明寄存器有改变
-
ebp 0x0 0x0
-
esi 0x0 0
-
edi 0x0 0
-
eip 0x40000be0
-
eflags 0x286 [ PF SF IF ]
-
cs 0x23 35
-
ss 0x2b 43
-
ds 0x2b 43
-
es 0x2b 43
-
fs 0x0 0
- gs 0x0 0
SS=0x18=11000=index=3,Ti=0,RPL=0 -->内核态
SS=0x1b=101011=index=5,Ti=0,RPL=3 -->用户态
所以这儿是由内核态切换到了用户态
2.6 还有一个问题struct pt_regs regs中的值是在什么地方改变的?
sys_execve
-->do_execve
-->search_binary_handler
--> load_elf_binary
--> start_thread
-
#define start_thread(regs, new_eip, new_esp) do { \
-
__asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \
-
set_fs(USER_DS); \
-
regs->xds = __USER_DS; =0x2B \
-
regs->xes = __USER_DS; =0x2B \
-
regs->xss = __USER_DS; =0x2B \
-
regs->xcs = __USER_CS; =0x23 \
-
regs->eip = new_eip; //这个eip=0x40000be0 是不是也很熟悉 \
-
regs->esp = new_esp; //这个esp=0xbfffff40 \
- } while (0)