六: wait4 ()系統調用
在父進程中,用wait4()可以獲得子進程的退出狀態,並且防止在父進程退出前,子進程退出造成僵死 狀態。這是我們這節分析的最後一個小節了。
關於wait4()在用戶空間的調用方式可以自行參考相關資料,在這裡只是討論內核對這個系統調用的實 現過程。
Wait4()的系統調用入口為sys_wait4().代碼如下所示:
asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr,
int options, struct rusage __user *ru)
{
long ret;
//options的標志為須為WNOHANG…__WALL的組合,否則會出錯
//相關標志的作用在do_wait()中再進行分析
if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
__WNOTHREAD|__WCLONE|__WALL))
return -EINVAL;
ret = do_wait(pid, options | WEXITED, NULL, stat_addr, ru);
/* avoid REGPARM breakage on x86: */
prevent_tail_call(ret);
return ret;
}
do_wait()是其中的核心處理函數。代碼如下:
static long do_wait(pid_t pid, int options, struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
//初始化一個等待隊列
DECLARE_WAITQUEUE(wait, current);
struct task_struct *tsk;
int flag, retval;
int allowed, denied;
//將當前進程加入等待隊列,子進程退出給父進程發送信號會wake up些等待隊列
add_wait_queue(¤t->signal->wait_chldexit,&wait);
repeat:
flag = 0;
allowed = denied = 0;
//設置進程狀態為TASK_INTERRUPTIBLE.下次調度必須要等到子進程喚醒才可以了
current->state = TASK_INTERRUPTIBLE;
read_lock(&tasklist_lock);
tsk = current;
do {
struct task_struct *p;
struct list_head *_p;
int ret;
//遍歷進程下的子進程
list_for_each(_p,&tsk->children) {
p = list_entry(_p, struct task_struct, sibling);
//判斷是否是我們要wait 的子進程
ret = eligible_child(pid, options, p);
if (!ret)
continue;
if (unlikely(ret < 0)) {
denied = ret;
continue;
}
allowed = 1;
switch (p->state) {
//子進程為TASK_TRACED.即處於跟蹤狀態。則取子進程的相關信息
case TASK_TRACED:
flag = 1;
//判斷是否是被父進程跟蹤的子進程
//如果是則返回1..不是返回0
if (!my_ptrace_child(p))
continue;
/*FALLTHROUGH*/
case TASK_STOPPED:
flag = 1;
//WUNTRACED:子進程是停止的,也馬上返回
//沒有定義WUNTRACED 參數.繼續遍歷子進程
/*從此看出.生父進程是不會處理STOP狀態的子進程的.只有
發起跟蹤的進程才會
*/
if (!(options & WUNTRACED) &&
!my_ptrace_child(p))
continue;
//WNOWAIT:不會將zombie子進程的退出狀態撤銷
//下次調用wait系列函數的時候還可以繼續獲得這個退出狀態
retval = wait_task_stopped(p, ret == 2,
(options & WNOWAIT),
infop,
stat_addr, ru);
if (retval == -EAGAIN)
goto repeat;
if (retval != 0) /* He released the lock. */
goto end;
break;
default:
// case EXIT_DEAD:
//不需要處理DEAD狀態
if (p->exit_state == EXIT_DEAD)
continue;
// case EXIT_ZOMBIE:
//子進程為僵屍狀態
if (p->exit_state == EXIT_ZOMBIE) {
if (ret == 2)
goto check_continued;
if (!likely(options & WEXITED))
continue;
retval = wait_task_zombie(
p, (options & WNOWAIT),
infop, stat_addr, ru);
/* He released the lock. */
if (retval != 0)
goto end;
break;
}
check_continued:
/*
* It's running now, so it might later
* exit, stop, or stop and then continue.
*/
flag = 1;
//WCONTINUED:報告任何繼續運行的指定進程號的子進程的狀態
if (!unlikely(options & WCONTINUED))
continue;
//取進程的相關狀態
retval = wait_task_continued(
p, (options & WNOWAIT),
infop, stat_addr, ru);
if (retval != 0) /* He released the lock. */
goto end;
break;
}
}
//遍歷被跟蹤出去的子進程
//從這裡可以看出.如果一個子進程被跟蹤出去了.那麼子進程的退出
//操作並不是由生父進程進行了
if (!flag) {
list_for_each(_p, &tsk->ptrace_children) {
p = list_entry(_p, struct task_struct,
ptrace_list);
if (!eligible_child(pid, options, p))
continue;
flag = 1;
break;
}
}
if (options & __WNOTHREAD)
break;
//也有可能是進程中的線程在wait其fork出來的子進程
tsk = next_thread(tsk);
BUG_ON(tsk->signal != current->signal);
} while (tsk != current);
//
read_unlock(&tasklist_lock);
if (flag) {
retval = 0;
//如果定義了WHNOHANG:馬上退出
if (options & WNOHANG)
goto end;
retval = -ERESTARTSYS;
if (signal_pending(current))
goto end;
schedule();
goto repeat;
}
retval = -ECHILD;
if (unlikely(denied) && !allowed)
retval = denied;
end:
//將進程設為運行狀態,從等待隊列中移除
current->state = TASK_RUNNING;
remove_wait_queue(¤t->signal->wait_chldexit,&wait);
if (infop) {
if (retval > 0)
retval = 0;
else {
/*
* For a WNOHANG return, clear out all the fields
* we would set so the user can easily tell the
* difference.
*/
if (!retval)
retval = put_user(0, &infop->si_signo);
if (!retval)
retval = put_user(0, &infop->si_errno);
if (!retval)
retval = put_user(0, &infop->si_code);
if (!retval)
retval = put_user(0, &infop->si_pid);
if (!retval)
retval = put_user(0, &infop->si_uid);
if (!retval)
retval = put_user(0, &infop->si_status);
}
}
return retval;
}
這代段碼還是比較簡單。先遍歷進程的子進程,再遍歷被跟蹤出去的進程,再遍歷線程中的線程。我 們分析一下裡面用到的幾個重要的子函數。
eligible_child()用來判斷子進程是否是我們想要wait的子進程.代碼如下:
static int eligible_child(pid_t pid, int options, struct task_struct *p)
{
int err;
//根據PID判斷是不是我們要wait的子進程
//pid >0:等待的子程程的進程號等於pid
//pid = 0:等待進程組號等於當前進程組號的所有子進程
//pid < -1 :等待任何進程組號等於pid絕對值的子進程
//pid == -1 :等待任何子進程
if (pid > 0) {
if (p->pid != pid)
return 0;
} else if (!pid) {
if (process_group(p) != process_group(current))
return 0;
} else if (pid != -1) {
if (process_group(p) != -pid)
return 0;
}
//如果子進程exit_signal ==-1且沒有被跟蹤.那不會對子進程進行回收
if (p->exit_signal == -1 && !p->ptrace)
return 0;
if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
&& !(options & __WALL))
return 0;
/*
* Do not consider thread group leaders that are
* in a non-empty thread group:
*/
//如果子進程是進程組leader,且進程組不為空
if (delay_group_leader(p))
return 2;
err = security_task_wait(p);
if (err)
return err;
return 1;
}
對TASK_TRACED和TASK_STOPPED狀態的子進程操作是在wait_task_stopped()中完成的。它的代碼如下:
static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
int noreap, struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
int retval, exit_code;
//進程退出狀態碼為零.沒有相關退出信息
if (!p->exit_code)
return 0;
//
if (delayed_group_leader && !(p->ptrace & PT_PTRACED) &&
p->signal && p->signal->group_stop_count > 0)
return 0;
//正在取task裡面的信息,為了防止意外釋放,先增加它的引用計數
get_task_struct(p);
read_unlock(&tasklist_lock);
//如果WNOWAIT 被定義
if (unlikely(noreap)) {
pid_t pid = p->pid;
uid_t uid = p->uid;
int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
exit_code = p->exit_code;
//退出狀態碼為零,但是過程已經處於退出狀態中(僵屍或者是死進程)
if (unlikely(!exit_code) || unlikely(p->exit_state))
goto bail_ref;
//把子進程的各項信息保存起來
//返回值是退出子進程的PID
return wait_noreap_copyout(p, pid, uid,
why, exit_code,
infop, ru);
}
write_lock_irq(&tasklist_lock);
//如果子進程沒有退出.只要取子進程的退出信息,再清除子進程的退出信息
//即可
exit_code = xchg(&p->exit_code, 0);
if (unlikely(p->exit_state)) {
p->exit_code = exit_code;
exit_code = 0;
}
if (unlikely(exit_code == 0)) {
write_unlock_irq(&tasklist_lock);
bail_ref:
put_task_struct(p);
return -EAGAIN;
}
//將子進程加到父進程子鏈表的末尾
remove_parent(p);
add_parent(p);
write_unlock_irq(&tasklist_lock);
//收集相關的信息
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
if (!retval && stat_addr)
retval = put_user((exit_code << 8) | 0x7f, stat_addr);
if (!retval && infop)
retval = put_user(SIGCHLD, &infop->si_signo);
if (!retval && infop)
retval = put_user(0, &infop->si_errno);
if (!retval && infop)
retval = put_user((short)((p->ptrace & PT_PTRACED)
? CLD_TRAPPED : CLD_STOPPED),
&infop->si_code);
if (!retval && infop)
retval = put_user(exit_code, &infop->si_status);
if (!retval && infop)
retval = put_user(p->pid, &infop->si_pid);
if (!retval && infop)
retval = put_user(p->uid, &infop->si_uid);
if (!retval)
retval = p->pid;
//減少task的引用計數
put_task_struct(p);
BUG_ON(!retval);
return retval;
}
對僵屍進程的操作是由wait_task_zombie()完成的。代如如下:
static int wait_task_zombie(struct task_struct *p, int noreap,
struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
unsigned long state;
int retval;
int status;
//WNOWAIT被設置.不需要釋放子進程的資源,只要取相關信息即可
if (unlikely(noreap)) {
pid_t pid = p->pid;
uid_t uid = p->uid;
int exit_code = p->exit_code;
int why, status;
//子進程不為EXIT_ZOMBIE .異常退出
if (unlikely(p->exit_state != EXIT_ZOMBIE))
return 0;
//沒有退出信號具沒有被跟蹤.退出
if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
return 0;
//增加引用計數
get_task_struct(p);
read_unlock(&tasklist_lock);
if ((exit_code & 0x7f) == 0) {
why = CLD_EXITED;
status = exit_code >> 8;
} else {
why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
status = exit_code & 0x7f;
}
//取相關信息
return wait_noreap_copyout(p, pid, uid, why,
status, infop, ru);
}
/*
* Try to move the task's state to DEAD
* only one thread is allowed to do this:
*/
//將子進程狀態設為EXIT_DEAD狀態
state = xchg(&p->exit_state, EXIT_DEAD);
//如果子進程不為EXIT_ZOMBIE狀態,異常退出
if (state != EXIT_ZOMBIE) {
BUG_ON(state != EXIT_DEAD);
return 0;
}
//沒有退出信號,且沒有被跟蹤
if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) {
return 0;
}
//子進程的real_parent等於當前父進程.說明子進程並沒有被跟蹤出去
if (likely(p->real_parent == p->parent) && likely(p->signal)) {
struct signal_struct *psig;
struct signal_struct *sig;
//更新父進程的一些統計信息
spin_lock_irq(&p->parent->sighand->siglock);
psig = p->parent->signal;
sig = p->signal;
psig->cutime =
cputime_add(psig->cutime,
cputime_add(p->utime,
cputime_add(sig->utime,
sig->cutime)));
psig->cstime =
cputime_add(psig->cstime,
cputime_add(p->stime,
cputime_add(sig->stime,
sig->cstime)));
psig->cmin_flt +=
p->min_flt + sig->min_flt + sig->cmin_flt;
psig->cmaj_flt +=
p->maj_flt + sig->maj_flt + sig->cmaj_flt;
psig->cnvcsw +=
p->nvcsw + sig->nvcsw + sig->cnvcsw;
psig->cnivcsw +=
p->nivcsw + sig->nivcsw + sig->cnivcsw;
psig->cinblock +=
task_io_get_inblock(p) +
sig->inblock + sig->cinblock;
psig->coublock +=
task_io_get_oublock(p) +
sig->oublock + sig->coublock;
spin_unlock_irq(&p->parent->sighand->siglock);
}
/*
* Now we are sure this task is interesting, and no other
* thread can reap it because we set its state to EXIT_DEAD.
*/
//取得相關的退出信息
read_unlock(&tasklist_lock);
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
status = (p->signal->flags & SIGNAL_GROUP_EXIT)
? p->signal->group_exit_code : p->exit_code;
if (!retval && stat_addr)
retval = put_user(status, stat_addr);
if (!retval && infop)
retval = put_user(SIGCHLD, &infop->si_signo);
if (!retval && infop)
retval = put_user(0, &infop->si_errno);
if (!retval && infop) {
int why;
if ((status & 0x7f) == 0) {
why = CLD_EXITED;
status >>= 8;
} else {
why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
status &= 0x7f;
}
retval = put_user((short)why, &infop->si_code);
if (!retval)
retval = put_user(status, &infop->si_status);
}
if (!retval && infop)
retval = put_user(p->pid, &infop->si_pid);
if (!retval && infop)
retval = put_user(p->uid, &infop->si_uid);
if (retval) {
// TODO: is this safe?
p->exit_state = EXIT_ZOMBIE;
return retval;
}
retval = p->pid;
//當前進程不是生父進程.則說明進程是被跟蹤出去了
// TODO:子進程exit退出的時候,只會向其當前父進程發送信號的哦^_^
if (p->real_parent != p->parent) {
write_lock_irq(&tasklist_lock);
/* Double-check with lock held. */
if (p->real_parent != p->parent) {
//將進程從跟蹤鏈表中脫落,並設置父進程為生父進程
__ptrace_unlink(p);
// TODO: is this safe?
//重新設置為EXIT_ZOMBI狀態
p->exit_state = EXIT_ZOMBIE;
/*
* If this is not a detached task, notify the parent.
* If it's still not detached after that, don't release
* it now.
*/
//如果允許發送信息,則給生父進程發送相關信號
if (p->exit_signal != -1) {
do_notify_parent(p, p->exit_signal);
if (p->exit_signal != -1)
p = NULL;
}
}
write_unlock_irq(&tasklist_lock);
}
//釋放子進程的剩余資源
if (p != NULL)
release_task(p);
BUG_ON(!retval);
return retval;
}
至此,我們看到了繼子進程退出之後的完整處理。在此,值得注意的是。子進程在退出的時候會給父 進程發送相應的信號(例如SIG_CHILD),默認的信號處理函數也會進行相應的處理。
七:等待隊列的操作
在這裡,我們第一次接觸到了等待隊列,我們就以上面的代碼做為例子來分析一下。
1:申請一個等待隊列:
DECLARE_WAITQUEUE():
//name:等待隊列的名字。Tsk:所要操作的task
#define DECLARE_WAITQUEUE(name, tsk) \
wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
#define __WAITQUEUE_INITIALIZER(name, tsk) { \
.private = tsk, \
.func = default_wake_function, \
.task_list = { NULL, NULL } }
default_wake_function()為默認的喚醒處理函數。
2:添加等待隊列。
在上面的代碼中,有:
add_wait_queue(¤t->signal->wait_chldexit,&wait);
它的意思是將wait添加至¤t->signal->wait_chldexit中。代碼如下:
void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
//為了防止競爭。加鎖
spin_lock_irqsave(&q->lock, flags);
//添加至隊列
__add_wait_queue(q, wait);
//解鎖
spin_unlock_irqrestore(&q->lock, flags);
}
3:喚醒操作:
在do_notify_parent()中有這樣的代碼片段:
……
__wake_up_parent(tsk, tsk->parent);
……
__wake_up_parent()的代碼如下:
static inline void __wake_up_parent(struct task_struct *p,
struct task_struct *parent)
{
wake_up_interruptible_sync(&parent->signal->wait_chldexit);
}
parent->signal->wait_chldexit這個隊列很熟吧?我們在父進程中添加的等待隊列就是添加在 這裡哦。^_^
喚醒隊列的操作是由wake_up_interruptible_sync()完成的,代碼如下:
wake_up_interruptible_sync() à __wake_up_sync()à__wake_up_common():
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int sync, void *key)
{
struct list_head *tmp, *next;
list_for_each_safe(tmp, next, &q->task_list) {
wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
unsigned flags = curr->flags;
if (curr->func(curr, mode, sync, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}
上述操作會遍歷整個等待隊列,然後運行對應的函數。我們在前面申請等待隊列的時候,默認的函數 為:default_wake_function()。它會將操作的task放入運行隊列,並將狀態設為RUNING這個函數等之 後我們分析進程切換與調度的時候再來分析。
八:小結
通過分析進程的創建,執行與消息等過程,可以對子程管理子系統有一個大概的了解。該子系統與其 它子系統關系十分密切。對進程資源的管理和釋放是理解這個子系統的難點。在下一個小節點,我們接著 分析進程的切換與調度。