Linux pipe 源代码分析

时间:2023-03-09 00:59:32
Linux pipe 源代码分析

Linux pipe 源代码分析

     管道pipe作为Unix中历史最悠久的IPC机制,存在各个版本号的Unix中,主要用于父子进程之间的通信(使用fork,从而子进程会获得父进程的打开文件表)。pipe()系统调用底层的实现就相当于一个特殊的文件系统,每次调用的时候创建一个inode关联着两个file。一个用于读,一个用于写。从而实现数据的单向流动。
用户层API:
 #include <unistd.h>

       int pipe(int pipefd[2]);

       #define _GNU_SOURCE             /* See feature_test_macros(7) */
#include <unistd.h> int pipe2(int pipefd[2], int flags);

内核源代码路径例如以下:
// sys_pipe(.......)
SYSCALL_DEFINE1(pipe, int __user *, fildes)
{
return sys_pipe2(fildes, 0);
} SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
{
struct file *files[2];
int fd[2];
int error;
// 核心是do_pipe
error = __do_pipe_flags(fd, files, flags);
if (!error) {
// 一切准备就绪后 把刚才和管道关联的2个fd复制到用户空间
if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
fput(files[0]);
fput(files[1]);
put_unused_fd(fd[0]);
put_unused_fd(fd[1]);
error = -EFAULT;
} else {
// 把fd和file的映射关系更新到该进程的文件描写叙述表中fdtable
fd_install(fd[0], files[0]);
fd_install(fd[1], files[1]);
}
}
return error;
} static int __do_pipe_flags(int *fd, struct file **files, int flags)
{
int error;
int fdw, fdr; if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
return -EINVAL;
// 为该管道创建俩struct file
error = create_pipe_files(files, flags);
if (error)
return error;
// 获得两个能用的文件描写叙述符
error = get_unused_fd_flags(flags);
if (error < 0)
goto err_read_pipe;
fdr = error; error = get_unused_fd_flags(flags);
if (error < 0)
goto err_fdr;
fdw = error; audit_fd_pair(fdr, fdw);
fd[0] = fdr;
fd[1] = fdw;
return 0; err_fdr:
put_unused_fd(fdr);
err_read_pipe:
fput(files[0]);
fput(files[1]);
return error;
} /*
* 为管道创建两个file实例
*/
int create_pipe_files(struct file **res, int flags)
{
int err;
// 为pipe创建一个inode并做一定的初始化
struct inode *inode = get_pipe_inode();
struct file *f;
struct path path;
static struct qstr name = { .name = "" }; // quick string ?? if (!inode)
return -ENFILE; err = -ENOMEM;
// 分配一个directory entry
path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
if (!path.dentry)
goto err_inode;
path.mnt = mntget(pipe_mnt); // 引用计数加1 d_instantiate(path.dentry, inode); err = -ENFILE;
f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
if (IS_ERR(f))
goto err_dentry; f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
f->private_data = inode->i_pipe;
// 所以你会明确 fd[0]是读 fd[1]是写
res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
if (IS_ERR(res[0]))
goto err_file; path_get(&path);
res[0]->private_data = inode->i_pipe;
res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
res[1] = f;
return 0; err_file:
put_filp(f);
err_dentry:
free_pipe_info(inode->i_pipe);
path_put(&path);
return err; err_inode:
free_pipe_info(inode->i_pipe);
iput(inode);
return err;
} static struct inode * get_pipe_inode(void)
{
struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
struct pipe_inode_info *pipe; if (!inode)
goto fail_inode;
// 分配一个inode号
inode->i_ino = get_next_ino();
// 分配一个pipe的内核级对象
pipe = alloc_pipe_info();
if (!pipe)
goto fail_iput; inode->i_pipe = pipe;
pipe->files = 2;
pipe->readers = pipe->writers = 1;
inode->i_fop = &pipefifo_fops; /*
* Mark the inode dirty from the very beginning,
* that way it will never be moved to the dirty
* list because "mark_inode_dirty()" will think
* that it already _is_ on the dirty list.
*/
inode->i_state = I_DIRTY;
inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; return inode; fail_iput:
iput(inode); fail_inode:
return NULL;
} // 针对pipe的文件操作实例
const struct file_operations pipefifo_fops = {
.open = fifo_open,
.llseek = no_llseek,
.read = new_sync_read,
.read_iter = pipe_read,
.write = new_sync_write,
.write_iter = pipe_write,
.poll = pipe_poll,
.unlocked_ioctl = pipe_ioctl,
.release = pipe_release,
.fasync = pipe_fasync,
};
总体的逻辑图能够这样:
Linux pipe 源代码分析
TODO:详细读写的实现细节new_sync_read/write()有待分析。
參考:
(1)Linux kernel 3.18 source code 
(2)Linux man page
(3)Linux内核源代码情景分析