在文件讀寫之前,我們必須先打開文件。從應用程序的角度來看,這是通過標准庫的open函數完成的,該函數返回一個文件描述符。內核中是由系統調用sys_open()函數完成。
[cpp]
- /*sys_open*/
- SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
- {
- long ret;
- /*檢查是否應該不考慮用戶層傳遞的標志、總是強行設置
- O_LARGEFILE標志。如果底層處理器的字長不是32位,就是這種
- 情況*/
- if (force_o_largefile())
- flags |= O_LARGEFILE;
- /*實際工作*/
- ret = do_sys_open(AT_FDCWD, filename, flags, mode);
- /* avoid REGPARM breakage on x86: */
- asmlinkage_protect(3, ret, filename, flags, mode);
- return ret;
- }
實際實現工作
[cpp]
- <pre class="cpp" name="code">long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
- {
- /*從進程地址空間讀取該文件的路徑名*/
- char *tmp = getname(filename);
- int fd = PTR_ERR(tmp);
-
- if (!IS_ERR(tmp)) {
- /*在內核中,每個打開的文件由一個文件描述符表示
- 該描述符在特定於進程的數組中充當位置索引(數組是
- task_struct->files->fd_arry),該數組的元素包含了file結構,其中
- 包括每個打開文件的所有必要信息。因此,調用下面
- 函數查找一個未使用的文件描述符,返回的是上面
- 說的數組的下標*/
- fd = get_unused_fd_flags(flags);
- if (fd >= 0) {
- /*fd獲取成功則開始打開文件,此函數是主要完成打開功能的函數*/
- struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);
- if (IS_ERR(f)) {
- put_unused_fd(fd);
- fd = PTR_ERR(f);
- } else {
- fsnotify_open(f->f_path.dentry);
- fd_install(fd, f);
- }
- }
- putname(tmp);
- }
- return fd;
- }
打開文件主體實現
[cpp]
- /*
- * Note that the low bits of the passed in "open_flag"
- * are not the same as in the local variable "flag". See
- * open_to_namei_flags() for more details.
- */
- struct file *do_filp_open(int dfd, const char *pathname,
- int open_flag, int mode, int acc_mode)
- {
- struct file *filp;
- struct nameidata nd;
- int error;
- struct path path;
- struct dentry *dir;
- int count = 0;
- int will_write;
- /*改變參數flag的值,具體做法是flag+1*/
- int flag = open_to_namei_flags(open_flag);
- /*設置訪問權限*/
- if (!acc_mode)
- acc_mode = MAY_OPEN | ACC_MODE(flag);
-
- /* O_TRUNC implies we need access checks for write permissions */
-
- /*根據 O_TRUNC標志設置寫權限 */
- if (flag & O_TRUNC)
- acc_mode |= MAY_WRITE;
-
- /* Allow the LSM permission hook to distinguish append
- access from general write access. */
- /* 設置O_APPEND 標志*/
- if (flag & O_APPEND)
- acc_mode |= MAY_APPEND;
-
- /*
- * The simplest case - just a plain lookup.
- */
- /*如果不是創建文件*/
- if (!(flag & O_CREAT)) {
- /*當內核要訪問一個文件的時候,第一步要做的是找到這個文件,
- 而查找文件的過程在vfs裡面是由path_lookup或者path_lookup_open函數來完成的。
- 這兩個函數將用戶傳進來的字符串表示的文件路徑轉換成一個dentry結構,
- 並建立好相應的inode和file結構,將指向file的描述符返回用戶。用戶隨後
- 通過文件描述符,來訪問這些數據結構*/
- error = path_lookup_open(dfd, pathname, lookup_flags(flag),
- &nd, flag);
- if (error)
- return ERR_PTR(error);
- goto ok;/*跳過下面的創建部分*/
- }
-
- /*
- * Create - we need to know the parent.
- */
- /*到此則是要創建文件*/
- /* path-init為查找作准備工作,path_walk真正上路查找,
- 這兩個函數聯合起來根據一段路徑名找到對應的dentry */
- error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
- if (error)
- return ERR_PTR(error);
- error = path_walk(pathname, &nd);
- if (error) {
- if (nd.root.mnt)
- path_put(&nd.root);
- return ERR_PTR(error);
- }
- if (unlikely(!audit_dummy_context()))
- /*保存inode節點信息*/
- audit_inode(pathname, nd.path.dentry);
-
- /*
- * We have the parent and last component. First of all, check
- * that we are not asked to creat(2) an obvious directory - that
- * will not do.
- */
- error = -EISDIR;
- /*父節點信息*/
- if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
- goto exit_parent;
-
- error = -ENFILE;
- /*獲取文件指針*/
- filp = get_empty_filp();
- if (filp == NULL)
- goto exit_parent;
- /*填充nameidata 結構*/
- nd.intent.open.file = filp;
- nd.intent.open.flags = flag;
- nd.intent.open.create_mode = mode;
- dir = nd.path.dentry;
- nd.flags &= ~LOOKUP_PARENT;
- nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
- if (flag & O_EXCL)
- nd.flags |= LOOKUP_EXCL;
- mutex_lock(&dir->d_inode->i_mutex);
- /*從哈希表中查找目的文件對應的dentry,上面路徑搜索的是父節點
- 也就是目的文件的上一層目錄,為了得到目的文件的
- path結構,我們用nd中的last結構和上一層目錄的dentry結構
- 可以找到*/
- path.dentry = lookup_hash(&nd);
- path.mnt = nd.path.mnt;
- /*到此目標節點的path結構已經找到*/
- do_last:
- error = PTR_ERR(path.dentry);
- if (IS_ERR(path.dentry)) {
- mutex_unlock(&dir->d_inode->i_mutex);
- goto exit;
- }
-
- if (IS_ERR(nd.intent.open.file)) {
- error = PTR_ERR(nd.intent.open.file);
- goto exit_mutex_unlock;
- }
-
- /* Negative dentry, just create the file */
- /*如果此dentry結構沒有對應的inode節點,說明是無效的,應該創建文件節點 */
- if (!path.dentry->d_inode) {
- /*
- * This write is needed to ensure that a
- * ro->rw transition does not occur between
- * the time when the file is created and when
- * a permanent write count is taken through
- * the 'struct file' in nameidata_to_filp().
- */
- /*write權限是必需的*/
- error = mnt_want_write(nd.path.mnt);
- if (error)
- goto exit_mutex_unlock;
- /*按照namei格式的flag open*,主要是創建inode*/
- error = __open_namei_create(&nd, &path, flag, mode);
- if (error) {
- mnt_drop_write(nd.path.mnt);
- goto exit;
- }
- /*根據nameidata 得到相應的file結構*/
- filp = nameidata_to_filp(&nd, open_flag);
- if (IS_ERR(filp))
- ima_counts_put(&nd.path,
- acc_mode & (MAY_READ | MAY_WRITE |
- MAY_EXEC));
- /*放棄寫權限*/
- mnt_drop_write(nd.path.mnt);
- if (nd.root.mnt)
- path_put(&nd.root);
- return filp;
- }
-
- /*
- * It already exists.
- */
- /*要打開的文件已經存在*/
- mutex_unlock(&dir->d_inode->i_mutex);
- /*保存inode節點*/
- audit_inode(pathname, path.dentry);
-
- error = -EEXIST;
- if (flag & O_EXCL)
- goto exit_dput;
- /*如果path上安裝了文件系統,則依次往下找,直到找到
- 的文件系統沒有安裝別的文件系統,更新path結構為
- 此文件系統的根目錄信息*/
- if (__follow_mount(&path)) {
- error = -ELOOP;
- if (flag & O_NOFOLLOW)
- goto exit_dput;
- }
-
- error = -ENOENT;
- if (!path.dentry->d_inode)
- goto exit_dput;
- if (path.dentry->d_inode->i_op->follow_link)
- goto do_link;/*順次遍歷符號鏈接*/
- /*路徑轉化為相應的nameidata 結構*/
- path_to_nameidata(&path, &nd);
- error = -EISDIR;
- /*如果是文件夾*/
- if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
- goto exit;
- /*到這裡,nd結構中存放的信息已經是最後的目的文件信息*/
- ok:
- /*
- * Consider:
- * 1. may_open() truncates a file
- * 2. a rw->ro mount transition occurs
- * 3. nameidata_to_filp() fails due to
- * the ro mount.
- * That would be inconsistent, and should
- * be avoided. Taking this mnt write here
- * ensures that (2) can not occur.
- */
- will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
- if (will_write) {
- error = mnt_want_write(nd.path.mnt);
- if (error)
- goto exit;
- }
- /*may_open執行權限檢測、文件打開和truncate的操作*/
- error = may_open(&nd.path, acc_mode, flag);
- if (error) {
- if (will_write)
- mnt_drop_write(nd.path.mnt);
- goto exit;
- }
- /*將nameidata轉化為file*/
- filp = nameidata_to_filp(&nd, open_flag);
- if (IS_ERR(filp))
- ima_counts_put(&nd.path,
- acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
- /*
- * It is now safe to drop the mnt write
- * because the filp has had a write taken
- * on its behalf.
- */
- if (will_write)
- /*釋放寫權限*/
- mnt_drop_write(nd.path.mnt);
- if (nd.root.mnt)
- /*釋放引用計數*/
- path_put(&nd.root);
- return filp;
-
- exit_mutex_unlock:
- mutex_unlock(&dir->d_inode->i_mutex);
- exit_dput:
- path_put_conditional(&path, &nd);
- exit:
- if (!IS_ERR(nd.intent.open.file))
- release_open_intent(&nd);
- exit_parent:
- if (nd.root.mnt)
- path_put(&nd.root);
- path_put(&nd.path);
- return ERR_PTR(error);
- /*允許遍歷連接文件,則手工找到連接文件對應的文件*/
- do_link:
- error = -ELOOP;
- if (flag & O_NOFOLLOW)
- goto exit_dput;/*不允許遍歷連接文件,返回錯誤*/
- /*
- * This is subtle. Instead of calling do_follow_link() we do the
- * thing by hands. The reason is that this way we have zero link_count
- * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
- * After that we have the parent and last component, i.e.
- * we are in the same situation as after the first path_walk().
- * Well, almost - if the last component is normal we get its copy
- * stored in nd->last.name and we will have to putname() it when we
- * are done. Procfs-like symlinks just set LAST_BIND.
- */
- /*以下是手工找到鏈接文件對應的文件dentry結構代碼
- */
- /*設置查找LOOKUP_PARENT標志*/
- nd.flags |= LOOKUP_PARENT;
- /*判斷操作是否安全*/
- error = security_inode_follow_link(path.dentry, &nd);
- if (error)
- goto exit_dput;
- /*處理符號鏈接,即路徑搜索,結果放入nd中*/
- error = __do_follow_link(&path, &nd);
- if (error) {
- /* Does someone understand code flow here? Or it is only
- * me so stupid? Anathema to whoever designed this non-sense
- * with "intent.open".
- */
- release_open_intent(&nd);
- if (nd.root.mnt)
- path_put(&nd.root);
- return ERR_PTR(error);
- }
- nd.flags &= ~LOOKUP_PARENT;
- /*檢查最後一段文件或目錄名的屬性情況*/
- if (nd.last_type == LAST_BIND)
- goto ok;
- error = -EISDIR;
- if (nd.last_type != LAST_NORM)
- goto exit;
- if (nd.last.name[nd.last.len]) {
- __putname(nd.last.name);
- goto exit;
- }
- error = -ELOOP;
- /*出現回環標志: 循環超過32次*/
- if (count++==32) {
- __putname(nd.last.name);
- goto exit;
- }
- dir = nd.path.dentry;
- mutex_lock(&dir->d_inode->i_mutex);
- /*更新路徑的掛接點和dentry*/
- path.dentry = lookup_hash(&nd);
- path.mnt = nd.path.mnt;
- __putname(nd.last.name);
- goto do_last;
- }
在內核中要打開一個文件,首先應該找到這個文件,而查找文件的過程在vfs裡面是由do_path_lookup或者path_lookup_open函數來完成的,關於文件路徑查找在前面已經分析過相關的代碼了。這兩個函數將用戶傳進來的字符串表示的文件路徑轉換成一個dentry結構,並建立好相應的inode和file結構,將指向file的描述符返回用戶。用戶隨後通過文件描述符,來訪問這些數據結構。