----sys_open:linux\fs\open.c----
asmlinkage int sys_open(const char * filename,int flags,int mode)
{
char * tmp;
int fd, error;
//获取打开文件描述符(进程打开文件结构指针数组的下标)
fd = get_unused_fd();
if (fd < 0)
return fd;
//从用户空间拷贝文件名到内核空间(分配一物理页),tmp指向内核空间的文件名字符串
error = getname(filename, &tmp);
if (!error) {
//打开文件,current->files->fd[fd]指向打开的文件结构
error = do_open(tmp,flags,mode, fd);
putname(tmp);
if (!error)
return fd;
}
put_unused_fd(fd);
return error;
}
----do_open:linux\fs\open.c----
/*
* Note that while the flag value (low two bits) for sys_open means:
* 00 - read-only
* 01 - write-only
* 10 - read-write
* 11 - special
* it is changed into
* 00 - no permissions needed
* 01 - read-permission
* 10 - write-permission
* 11 - read-write
* for the internal routines (ie open_namei()/follow_link() etc). 00 is
* used by symlinks.
*/
static int do_open(const char * filename,int flags,int mode, int fd)
{
struct inode * inode;
struct file * f;
int flag,error;
//从first_file链表获取空闲的file结构,必要时调用grow_files()填充该链表
f = get_empty_filp();
if (!f)
return -ENFILE;
f->f_flags = flag = flags;
f->f_mode = (flag+1) & O_ACCMODE;
if (f->f_mode)
flag++;
if (flag & O_TRUNC)
flag |= 2;
//通过文件路径名搜寻获取文件的i节点,由inode指向
error = open_namei(filename,flag,mode,&inode,NULL);
if (error)
goto cleanup_file;
//如果以写方式打开文件,检查是否有拒绝写的文件映射内存区域(mpnt->vm_flags & VM_DENYWRITE),有则出错;
//否则inode->i_writecount++
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
if (error)
goto cleanup_inode;
}
//file结构初始化
f->f_inode = inode;
f->f_pos = 0;
f->f_reada = 0;
f->f_op = NULL;
if (inode->i_op)
f->f_op = inode->i_op->default_file_ops;
if (f->f_op && f->f_op->open) {
error = f->f_op->open(inode,f);
if (error)
goto cleanup_all;
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
//设置进程的打开文件指针数组项(fd为索引)
current->files->fd[fd] = f;
return 0;
cleanup_all:
if (f->f_mode & FMODE_WRITE)
put_write_access(inode);
cleanup_inode:
iput(inode);
cleanup_file:
f->f_count--;
return error;
}
----open_namei:linux\fs\open.c----
/*
* open_namei()
*
* namei for open - this is in fact almost the whole open-routine.
*
* Note that the low bits of "flag" aren't the same as in the open
* system call - they are 00 - no permissions needed
* 01 - read permission needed
* 10 - write permission needed
* 11 - read/write permissions needed
* which is a lot more logical, and also allows the "no perm" needed
* for symlinks (where the permissions are checked later).
*/
int open_namei(const char * pathname, int flag, int mode,
struct inode ** res_inode, struct inode * base)
{
const char * basename;
int namelen,error;
struct inode * dir, *inode;
mode &= S_IALLUGO & ~current->fs->umask;
mode |= S_IFREG;
error = dir_namei(pathname, &namelen, &basename, base, &dir);
if (error)
return error;
if (!namelen) {/* special case: '/usr/' etc */
if (flag & 2) {
iput(dir);
return -EISDIR;
}
/* thanks to Paul Pluzhnikov for noticing this was missing.. */
if ((error = permission(dir,ACC_MODE(flag))) != 0) {
iput(dir);
return error;
}
*res_inode=dir;
return 0;
}
dir->i_count++;/* lookup eats the dir */
if (flag & O_CREAT) {
down(&dir->i_sem);
error = lookup(dir, basename, namelen, &inode);
if (!error) {
if (flag & O_EXCL) {
iput(inode);
error = -EEXIST;
}
} else if (IS_RDONLY(dir))
error = -EROFS;
else if (!dir->i_op || !dir->i_op->create)
error = -EACCES;
else if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0)
; /* error is already set! */
else {
dir->i_count++;/* create eats the dir */
if (dir->i_sb && dir->i_sb->dq_op)
dir->i_sb->dq_op->initialize(dir, -1);
error = dir->i_op->create(dir, basename, namelen, mode, res_inode);
up(&dir->i_sem);
iput(dir);
return error;
}
up(&dir->i_sem);
} else
error = lookup(dir, basename, namelen, &inode);
if (error) {
iput(dir);
return error;
}
error = follow_link(dir,inode,flag,mode,&inode);
if (error)
return error;
if (S_ISDIR(inode->i_mode) && (flag & 2)) {
iput(inode);
return -EISDIR;
}
if ((error = permission(inode,ACC_MODE(flag))) != 0) {
iput(inode);
return error;
}
if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
/*
* 2-Feb-1995 Bruce Perens <[email protected]>
* Allow opens of Unix domain sockets and FIFOs for write on
* read-only filesystems. Their data does not live on the disk.
*
* If there was something like IS_NODEV(inode) for
* pipes and/or sockets I'd check it here.
*/
flag &= ~O_TRUNC;
}
else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
if (IS_NODEV(inode)) {
iput(inode);
return -EACCES;
}
flag &= ~O_TRUNC;
} else {
if (IS_RDONLY(inode) && (flag & 2)) {
iput(inode);
return -EROFS;
}
}
/*
* An append-only file must be opened in append mode for writing
* Additionally, we must disallow O_TRUNC -- cevans
*/
if (IS_APPEND(inode) && (((flag & FMODE_WRITE) && !(flag & O_APPEND)) || (flag & O_TRUNC))) {
iput(inode);
return -EPERM;
}
if (flag & O_TRUNC) {
if ((error = get_write_access(inode))) {
iput(inode);
return error;
}
/*
* Refuse to truncate files with mandatory locks held on them
*/
error = locks_verify_locked(inode);
if (error) {
iput(inode);
return error;
}
if (inode->i_sb && inode->i_sb->dq_op)
inode->i_sb->dq_op->initialize(inode, -1);
error = do_truncate(inode, 0);
put_write_access(inode);
if (error) {
iput(inode);
return error;
}
} else
if (flag & FMODE_WRITE)
if (inode->i_sb && inode->i_sb->dq_op)
inode->i_sb->dq_op->initialize(inode, -1);
*res_inode = inode;
return 0;
}