Linux pipe 源代码分析

管道pipe作为Unix中历史最悠久的IPC机制，存在各个版本号的Unix中，主要用于父子进程之间的通信（使用fork，从而子进程会获得父进程的打开文件表）。pipe()系统调用底层的实现就相当于一个特殊的文件系统，每次调用的时候创建一个inode关联着两个file。一个用于读，一个用于写。从而实现数据的单向流动。

用户层API：

 #include <unistd.h>

       int pipe(int pipefd[2]);

       #define _GNU_SOURCE             /* See feature_test_macros(7) */

       #include <unistd.h>

       int pipe2(int pipefd[2], int flags);

内核源代码路径例如以下：

// sys_pipe(.......)

SYSCALL_DEFINE1(pipe, int __user *, fildes)

{

     return sys_pipe2(fildes, 0);

}

SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)

{

     struct file *files[2];

     int fd[2];

     int error;

     // 核心是do_pipe

     error = __do_pipe_flags(fd, files, flags);

     if (!error) {

          // 一切准备就绪后 把刚才和管道关联的2个fd复制到用户空间

          if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {

               fput(files[0]);

               fput(files[1]);

               put_unused_fd(fd[0]);

               put_unused_fd(fd[1]);

               error = -EFAULT;

          } else {

               // 把fd和file的映射关系更新到该进程的文件描写叙述表中fdtable

               fd_install(fd[0], files[0]);

               fd_install(fd[1], files[1]);

          }

     }

     return error;

}

static int __do_pipe_flags(int *fd, struct file **files, int flags)

{

     int error;

     int fdw, fdr;

     if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))

          return -EINVAL;

     // 为该管道创建俩struct file

     error = create_pipe_files(files, flags);

     if (error)

          return error;

     // 获得两个能用的文件描写叙述符

     error = get_unused_fd_flags(flags);

     if (error < 0)

          goto err_read_pipe;

     fdr = error;

     error = get_unused_fd_flags(flags);

     if (error < 0)

          goto err_fdr;

     fdw = error;

     audit_fd_pair(fdr, fdw);

     fd[0] = fdr;

     fd[1] = fdw;

     return 0;

err_fdr:

     put_unused_fd(fdr);

err_read_pipe:

     fput(files[0]);

     fput(files[1]);

     return error;

}

/*

* 为管道创建两个file实例

*/

int create_pipe_files(struct file **res, int flags)

{

     int err;

     // 为pipe创建一个inode并做一定的初始化

     struct inode *inode = get_pipe_inode();

     struct file *f;

     struct path path;

     static struct qstr name = { .name = "" }; // quick string ??

     if (!inode)

          return -ENFILE;

     err = -ENOMEM;

     // 分配一个directory entry

     path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);

     if (!path.dentry)

          goto err_inode;

     path.mnt = mntget(pipe_mnt);  // 引用计数加1

     d_instantiate(path.dentry, inode);

     err = -ENFILE;

     f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);

     if (IS_ERR(f))

          goto err_dentry;

     f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));

     f->private_data = inode->i_pipe;

     // 所以你会明确 fd[0]是读 fd[1]是写

     res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);

     if (IS_ERR(res[0]))

          goto err_file;

     path_get(&path);

     res[0]->private_data = inode->i_pipe;

     res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);

     res[1] = f;

     return 0;

err_file:

     put_filp(f);

err_dentry:

     free_pipe_info(inode->i_pipe);

     path_put(&path);

     return err;

err_inode:

     free_pipe_info(inode->i_pipe);

     iput(inode);

     return err;

}

static struct inode * get_pipe_inode(void)

{

     struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);

     struct pipe_inode_info *pipe;

     if (!inode)

          goto fail_inode;

     // 分配一个inode号

     inode->i_ino = get_next_ino();

     // 分配一个pipe的内核级对象

     pipe = alloc_pipe_info();

     if (!pipe)

          goto fail_iput;

     inode->i_pipe = pipe;

     pipe->files = 2;

     pipe->readers = pipe->writers = 1;

     inode->i_fop = &pipefifo_fops;

     /*

     * Mark the inode dirty from the very beginning,

     * that way it will never be moved to the dirty

     * list because "mark_inode_dirty()" will think

     * that it already _is_ on the dirty list.

     */

     inode->i_state = I_DIRTY;

     inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;

     inode->i_uid = current_fsuid();

     inode->i_gid = current_fsgid();

     inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;

     return inode;

fail_iput:

     iput(inode);

fail_inode:

     return NULL;

}

// 针对pipe的文件操作实例

const struct file_operations pipefifo_fops = {

     .open          = fifo_open,

     .llseek          = no_llseek,

     .read          = new_sync_read,

     .read_iter     = pipe_read,

     .write          = new_sync_write,

     .write_iter     = pipe_write,

     .poll          = pipe_poll,

     .unlocked_ioctl     = pipe_ioctl,

     .release     = pipe_release,

     .fasync          = pipe_fasync,

};

总体的逻辑图能够这样：

TODO：详细读写的实现细节new_sync_read/write()有待分析。

參考：

（1）Linux kernel 3.18 source code

（2）Linux man page

（3）Linux内核源代码情景分析

秒客网

Linux pipe 源代码分析

相关文章