目錄
檔案鎖
非阻塞io
select
poll
epoll
readv和writev
mmap
參考
檔案鎖
#<fcntl.h>
//第二個參數cmd是F_GETLK, F_SETLK,F_SETLKW三種類型,表示擷取,設定,測試記錄鎖
int fcntl(int fd, int cmd, .../* stutct flock *flockptr */);
struct flock {
short l_type; /* F_RDLCK, F_WRLCK, F_UNLCK */
short l_whence; /* SEEK_SET, SEEK_CUR, SEEK_END */
off_t l_start; /* offset in bytes,relative ot l_whence */
off_t l_len; /* length, in bytes, 0 means lock to EOF */
pid_t l_pid; /* returned with F_GETLK */
}
//l_type的類型
F_RDLCK:讀鎖
F_WRLCK:寫鎖
F_UNLCK:解鎖
//l_whence的類型
SEEK_SET:目前位置為檔案的開頭,新位置為偏移量的大小
SEEK_CUR:目前位置為檔案指針的位置,新位置為目前位置加上偏移量
SEEK_END:目前位置為檔案的結尾,新位置為檔案的大小加上偏移量的大小
一個例子
#include<stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/file.h>
#include <errno.h>
#include <string.h>
#include <time.h>
#include <fcntl.h>
//#include "rwlock.h"
static int lock_reg(int fd,int cmd,int type,off_t offset,int whence,off_t len) {
struct flock lock;
lock.l_type = type;
lock.l_start = offset;
lock.l_whence = whence;
lock.l_len = len;
return (fcntl(fd,cmd,&lock));
}
static pid_t lock_test(int fd,int type,off_t offset,int whence,off_t len) {
struct flock lock;
lock.l_type = type;
lock.l_start = offset;
lock.l_whence = whence;
lock.l_len = len;
if(fcntl(fd,F_GETLK,&lock) == -1) {
return -1;
}
if(lock.l_type = F_UNLCK) {
return 0;
}
return lock.l_pid;
}
int read_lock(int fd,off_t offset,int whence,off_t len) {
return lock_reg(fd,F_SETLKW,F_RDLCK,offset,whence,len);
}
int read_lock_try(int fd,off_t offset,int whence,off_t len) {
return lock_reg(fd,F_SETLK,F_RDLCK,offset,whence,len);
}
int write_lock(int fd,off_t offset,int whence,off_t len) {
return lock_reg(fd,F_SETLKW,F_WRLCK,offset,whence,len);
}
int write_lock_try(int fd,off_t offset,int whence,off_t len) {
return lock_reg(fd,F_SETLK,F_WRLCK,offset,whence,len);
}
int unlock(int fd,off_t offset, int whence,off_t len) {
return lock_reg(fd,F_SETLK,F_UNLCK,offset,whence,len);
}
int is_read_lockable(int fd, off_t offset,int whence,off_t len) {
return !lock_test(fd,F_RDLCK,offset,whence,len);
}
int is_write_lockable(int fd, off_t offset,int whence,off_t len) {
return !lock_test(fd,F_WRLCK,offset,whence,len);
}
int main(int argc, char *argv[]) {
int fd = open("aa.log",O_RDWR|O_APPEND);
write_lock(fd, 0, SEEK_SET, 10);
pid_t pid = fork();
if(pid > 0) {
printf("sleep -> parent 10 second\n");
sleep(10);
}
else if(pid == 0) {
write_lock(fd, 20, SEEK_SET, 10);
printf("chiild get write_lock ok\n");
unlock(fd, 20, SEEK_SET, 10);
printf("unlock child lock\n");
exit(0);
}
else {
printf("fork error ->%d\n",errno);
exit(errno);
}
unlock(fd, 0, SEEK_SET, 10);
printf("parent unlock ok\n");
return 0;
}
//執行結果
sleep -> parent 10 second
chiild get write_lock ok
unlock child lock
parent unlock ok
//如果将子程序中的改為
else if(pid == 0) {
write_lock(fd,8 , SEEK_SET, 10);
。。。
//執行結果為
sleep -> parent 10 second
parent unlock ok
chiild get write_lock ok
unlock child lock
//用strace分析程式
open("aa.log", O_RDWR|O_APPEND) = 3
fcntl(3, F_SETLKW, {l_type=F_WRLCK, l_whence=SEEK_SET, l_start=0, l_len=10}) = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f818487aa10) = 32736
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8184884000
write(1, "sleep -> parent 10 second\n", 26sleep -> parent 10 second
) = 26
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({10, 0}, strace: Process 32736 attached
<unfinished ...>
[pid 32736] fcntl(3, F_SETLKW, {l_type=F_WRLCK, l_whence=SEEK_SET, l_start=20, l_len=10}) = 0
[pid 32736] fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
[pid 32736] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8184884000
[pid 32736] write(1, "chiild get write_lock ok\n", 25chiild get write_lock ok
) = 25
[pid 32736] fcntl(3, F_SETLK, {l_type=F_UNLCK, l_whence=SEEK_SET, l_start=20, l_len=10}) = 0
[pid 32736] write(1, "unlock child lock\n", 18unlock child lock
) = 18
[pid 32736] exit_group(0) = ?
[pid 32736] +++ exited with 0 +++
<... nanosleep resumed> {9, 999332497}) = ? ERESTART_RESTARTBLOCK (Interrupted by signal)
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=32736, si_uid=0, si_status=0, si_utime=0, si_stime=0} ---
restart_syscall(<... resuming interrupted nanosleep ...>
) = 0
fcntl(3, F_SETLK, {l_type=F_UNLCK, l_whence=SEEK_SET, l_start=0, l_len=10}) = 0
write(1, "parent unlock ok\n", 17parent unlock ok
) = 17
exit_group(0) = ?
關于記錄鎖的FreeBSD實作
![](https://img.laitimes.com/img/_0nNw4CM6IyYiwiM6ICdiwiIwczX0xiRGZkRGZ0Xy9GbvNGL2EzXlpXazxidGdVY1lzVZBHazIGasdUZwhmMMBjVtJWd0ckW65UbM5WOHJWa5kHT20ESjBjUIF2X0hXZ0xCMx81dvRWYoNHLrdEZwZ1Rh5WNXp1bwNjW1ZUba9VZwlHdssmch1mclRXY39CXldWYtlWPzNXZj9mcw1ycz9WL49zZuBnLzITMwEzNxgTM1AjMxgTMwIzLc52YucWbp5GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.png)
非阻塞io
#<fcntl.h>
int fcntl(int fd, int cmd, .../* stutct flock *flockptr */);
flag = fcntl(0, F_GETFL); //get 描述符狀态
fcntl(socket_fd, F_SETFL, flags | O_NONBLOCK); //設定描述符為非阻塞
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <fcntl.h>
#include <errno.h>
int max_len = 500000;
int main(int argc, char *argv[]) {
char buf[max_len];
int read_count = read(STDIN_FILENO, buf, sizeof(buf));
fprintf(stderr, "read %d bytes\n",read_count);
fcntl(STDOUT_FILENO, O_NONBLOCK);
char *ptr = buf;
int nwrite = 0;
while(read_count > 0) {
errno = 0;
nwrite = write(STDOUT_FILENO,ptr,read_count);
sleep(1);
fprintf(stderr, "nwrite = %dkerrno=%d\n", nwrite,errno);
if(nwrite > 0) {
ptr += nwrite;
read_count -= nwrite;
}
}
fcntl(STDOUT_FILENO, ~O_NONBLOCK);
return 0;
}
//strace執行結果
execve("./noblock", ["./noblock", "2"], [/* 23 vars */]) = 0
brk(NULL) = 0x670000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca564a000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=30479, ...}) = 0
mmap(NULL, 30479, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca5642000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\35\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2127336, ...}) = 0
mmap(NULL, 3940800, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4ca5067000
mprotect(0x7f4ca521f000, 2097152, PROT_NONE) = 0
mmap(0x7f4ca541f000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b8000) = 0x7f4ca541f000
mmap(0x7f4ca5425000, 16832, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f4ca5425000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca5641000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca563f000
arch_prctl(ARCH_SET_FS, 0x7f4ca563f740) = 0
mprotect(0x7f4ca541f000, 16384, PROT_READ) = 0
mprotect(0x600000, 4096, PROT_READ) = 0
mprotect(0x7f4ca564b000, 4096, PROT_READ) = 0
munmap(0x7f4ca5642000, 30479) = 0
read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 500000) = 500000
write(2, "read 500000 bytes\n", 18read 500000 bytes
) = 18
fcntl(1, 0x800 /* F_??? */, 0x7f4ca54259f0) = -1 EINVAL (Invalid argument)
write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 500000) = 500000
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0}, 0x7fff9868c350) = 0
write(2, "nwrite = 500000, errno=0\n", 25nwrite = 500000, errno=0
) = 25
fcntl(1, 0xfffff7ff /* F_??? */, 0x7f4ca54259f0) = -1 EINVAL (Invalid argument)
exit_group(0) = ?
+++ exited with 0 +++
另一個例子
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include<fcntl.h>
#include <errno.h>
int main(int argc, char *argv[]) {
int fd = open("/etc/profile", O_RDONLY | O_NONBLOCK);
char buf[100];
int read_size = 0;
errno = 0;
struct stat f_stat;
fstat(fd,&f_stat);
int total_size = f_stat.st_size;
while(1) {
read_size = read(fd, buf, sizeof(buf));
if(read_size < 0) {
if(EAGAIN == errno) {
printf("EAGAIN...\n");
continue;
}
printf("error\n");
exit(1);
}
else {
printf("read_size -> %d\n",read_size);
//sleep(1);
//fprintf(stdout,buf);
fwrite(buf,sizeof(char),read_size,stdout);
total_size -= read_size;
}
if(total_size <= 0 ) {
break;
}
}
return 0;
}
//執行後列印出 /etc/profile的内容
//執行多次,并沒有出現 EAGAIN 這樣的異常
一個非阻塞狀态機的例子
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#define BUFSIZE 1024
#define TTY "/dev/tty"
#define TTY "/dev/tty"
enum {
STATE_R,
STATE_W,
STATE_Ex,
STATE_T
};
struct fsm_st {
int state; /* 狀态機目前的狀态 */
int sfd; /* 讀取的來源檔案描述符 */
int dfd; /* 寫入的目标檔案描述符 */
char buf[BUFSIZE]; /* 緩沖 */
int len; /* 一次讀取到的實際資料量 */
int pos; /* buf 的偏移量,用于記錄堅持寫夠 n 個位元組時每次循環寫到了哪裡 */
char *errstr; /* 錯誤消息 */
};
/* 狀态機驅動 */
static void fsm_driver(struct fsm_st *fsm) {
int ret;
switch(fsm->state)
{
case STATE_R: /* 讀态 */
fsm->len = read(fsm->sfd,fsm->buf,BUFSIZE);
if(fsm->len == 0) /* 讀到了檔案末尾,将狀态機推向 T态 */
fsm->state = STATE_T;
else if(fsm->len < 0) /* 讀取出現異常 */
{
if(errno == EAGAIN) /* 如果是假錯就推到 讀态,重新讀一次 */
fsm->state = STATE_R;
else // 如果是真錯就推到 異常态
{
fsm->errstr = "read()";
fsm->state = STATE_Ex;
}
}
else // 成功讀取到了資料,将狀态機推到 寫态
{
fsm->pos = 0;
fsm->state = STATE_W;
}
break;
case STATE_W: /* 寫态 */
ret = write(fsm->dfd,fsm->buf+fsm->pos,fsm->len);
if(ret < 0) /* 寫入出現異常 */
{
if(errno == EAGAIN) /* 如果是假錯就再次推到 寫态,重新再寫入一次 */
fsm->state = STATE_W;
else /* 如果是真錯就推到 異常态 */
{
fsm->errstr = "write()";
fsm->state = STATE_Ex;
}
}
else /* 成功寫入了資料 */
{
fsm->pos += ret;
fsm->len -= ret;
if(fsm->len == 0) /* 如果将讀到的資料完全寫出去了就将狀态機推向 讀态,開始下一輪讀取 */
fsm->state = STATE_R;
else /* 如果沒有将讀到的資料完全寫出去,那麼狀态機依然推到 寫态,下次繼續寫入沒寫完的資料,實作“堅持寫夠 n 個位元組” */
fsm->state = STATE_W;
}
break;
case STATE_Ex: /* 異常态,列印異常并将狀态機推到 T态 */
perror(fsm->errstr);
fsm->state = STATE_T;
break;
case STATE_T: /* 結束态,在這個例子中結束态沒有什麼需要做的事情,是以空着 */
/*do sth */
break;
default: /* 程式很可能發生了溢出等不可預料的情況,為了避免異常擴大直接自殺 */
abort();
}
}
/* 推動狀态機 */
static void relay(int fd1,int fd2) {
int fd1_save,fd2_save;
/* 因為是讀 tty1 寫 tty2;讀 tty2 寫 tty1,是以這裡的兩個狀态機直接取名為 fsm12 和 fsm21 */
struct fsm_st fsm12,fsm21;
fd1_save = fcntl(fd1,F_GETFL);
/* 使用狀态機操作 IO 一般都采用非阻塞的形式,避免狀态機被阻塞 */
fcntl(fd1,F_SETFL,fd1_save|O_NONBLOCK);
fd2_save = fcntl(fd2,F_GETFL);
fcntl(fd2,F_SETFL,fd2_save|O_NONBLOCK);
/* 在啟動狀态機之前将狀态機推向 讀态 */
fsm12.state = STATE_R;
/* 設定狀态機中讀寫的來源和目标,這樣狀态機的讀寫接口就統一了。
在狀态機裡面不用管到底是 讀tty1 寫tty2 還是 讀tty2 寫tty1 了,它隻需要知道是 讀src 寫des 就可以了。*/
fsm12.sfd = fd1;
fsm12.dfd = fd2;
/* 同上 */
fsm21.state = STATE_R;
fsm21.sfd = fd2;
fsm21.dfd = fd1;
/* 開始推狀态機,隻要不是 T态 就一直推 */
while(fsm12.state != STATE_T || fsm21.state != STATE_T) {
/* 調用狀态機驅動函數,狀态機開始工作 */
fsm_driver(&fsm12);
fsm_driver(&fsm21);
}
fcntl(fd1,F_SETFL,fd1_save);
fcntl(fd2,F_SETFL,fd2_save);
}
int main() {
int fd_r,fd_w;
/* 假設這裡忘記将裝置 tty 以非阻塞的形式打開也沒關系,因為推動狀态機之前會重新設定檔案描述符為非阻塞形式 */
fd_r = open(TTY,O_RDWR);
if(fd_r < 0) {
perror("open()");
exit(1);
}
write(fd_r,"TTY\n",5);
fd_w = open(TTY,O_RDWR|O_NONBLOCK);
if(fd_w < 0) {
perror("open()");
exit(1);
}
write(fd_w,"TTY\n",5);
relay(fd_r,fd_w);
close(fd_r);
close(fd_w);
exit(0);
}
//strace結果,設定成非阻塞之後,fd=3,fd=4的兩個描述符就不斷出現EAGAIN 錯誤
。。。
open("/dev/tty", O_RDWR) = 3
write(3, "TTY\n\0", 5TTY
) = 5
open("/dev/tty", O_RDWR|O_NONBLOCK) = 4
write(4, "TTY\n\0", 5TTY
) = 5
fcntl(3, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK|O_LARGEFILE) = 0
fcntl(4, F_GETFL) = 0x8802 (flags O_RDWR|O_NONBLOCK|O_LARGEFILE)
fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK|O_LARGEFILE) = 0
read(3, 0x7ffffbdb12bc, 1024) = -1 EAGAIN (Resource temporarily unavailable)
read(4, 0x7ffffbdb0e9c, 1024) = -1 EAGAIN (Resource temporarily unavailable)
read(3, 0x7ffffbdb12bc, 1024) = -1 EAGAIN (Resource temporarily unavailable)
read(4, 0x7ffffbdb0e9c, 1024) = -1 EAGAIN (Resource temporarily unavailable)
。。。
select
從select傳回時,核心告訴我們:
已準備好的描述符的數量。
對于讀、寫或異常這三個狀态中的每一個,哪些描述符已準備好。
#include <sys/select.h>
int select(int maxfdp1,fd_set *readfds,fd_set *writefds,fd_set *exceptfds,struct timeval *tvptr);
//傳回值:準備就緒的描述符數,若逾時則傳回0,若出錯則傳回-1
struct timeval{
long tv_sec; //seconds
long tv_usec;//and microseconds
}
#include <sys/select.>
int FD_ISSET(int fd,fd_set *fdset);//傳回值:若fd在描述符集中則傳回非0值,否則傳回0
void FD_CLR(int fd,fd_set *fdset);
void FD_SET(int fd,fd_set *fdset);
void FD_ZERO(fd_set *fdset);
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/select.h>
#include <sys/time.h>
#include <sys/types.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
int max(int a, int b) {
if(a < b) {
return a;
}
return b;
}
int get_write_fileno() {
//return open("f_1",O_WRONLY|O_NONBLOCK);
int fd_1 = open("f_1",O_RDONLY|O_NONBLOCK);
return fd_1;
}
void io_driver(int fd_1, int fd_2) {
printf("fd_1 -> %d\n", fd_1);
printf("fd_2 -> %d\n", fd_2);
fd_set r_set,w_set;
int fd_1_save = fcntl(fd_1, F_GETFL);
int fd_2_save = fcntl(fd_2, F_GETFL);
fcntl(fd_1, F_SETFL, fd_1_save|O_NONBLOCK);
fcntl(fd_2, F_SETFL, fd_2_save|O_NONBLOCK);
char buf_1[100];
char buf_2[100];
int read_count_1;
int read_count_2;
printf("sizeof --> %d\n", sizeof(buf_1));
int write_fd = get_write_fileno();
printf("fifo fd -> %d\n",write_fd);
//FD_ZERO(&r_set);
//FD_SET(fd_1, &r_set);
//FD_SET(fd_2, &r_set);
while(1) {
FD_ZERO(&r_set);
FD_SET(fd_1, &r_set);
FD_SET(fd_2, &r_set);
if( select( max(fd_1,fd_2)+1, &r_set, NULL, NULL, NULL) < 0) {
printf("error\n");
exit(1);
}
//break;
if( FD_ISSET(fd_1,&r_set) ) {
printf("read fd_1...\n");
read_count_1 = read(fd_1,buf_1,sizeof(buf_1));
write(STDOUT_FILENO ,buf_1, read_count_1);
//write(write_fd ,buf_1, read_count_1);
//exit(0);
}
if( FD_ISSET(fd_2,&r_set) ) {
printf("read fd_2...\n");
read_count_1 = read(fd_1,buf_1,sizeof(buf_1));
//write(write_fd ,buf_2, read_count_2);
}
if( strncmp("end", buf_1, 3)==0 ) {
break;
}
if( strncmp("end", buf_2, 3)==0 ) {
break;
}
}
fcntl(fd_1,F_SETFL,fd_1_save);
fcntl(fd_2,F_SETFL,fd_2_save);
}
int main(int argc, char *argv[]) {
int fd_1 = open("/dev/tty", O_RDONLY|O_NONBLOCK);
int fd_2 = open("/dev/tty", O_RDONLY|O_NONBLOCK);
io_driver(fd_1, fd_2);
close(fd_1);
close(fd_2);
return 0;
}
//列印結果
write(1, "fd_1 -> 3\n", 10fd_1 -> 3
) = 10
write(1, "fd_2 -> 4\n", 10fd_2 -> 4
) = 10
fcntl(3, F_GETFL) = 0x8800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE)
fcntl(4, F_GETFL) = 0x8800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE)
fcntl(3, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
fcntl(4, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
write(1, "sizeof --> 100\n", 15sizeof --> 100
) = 15
open("f_1", O_RDONLY|O_NONBLOCK) = 5
write(1, "fifo fd -> 5\n", 13fifo fd -> 5
) = 13
select(4, [3], NULL, NULL, NULL111111111111
) = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
) = 13
read(3, "111111111111\n", 100) = 13
write(1, "111111111111\n", 13111111111111
) = 13
select(4, [3], NULL, NULL, NULL1111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333333333334444444444444444444444444444444444444444444444444444455555555555555555555555555555555555555555566666666666666666666666666666666666666667777777777777777777777777777777777777777
) = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
) = 13
read(3, "11111111111111222222222222222222"..., 100) = 100
write(1, "11111111111111222222222222222222"..., 1001111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222222222) = 100
select(4, [3], NULL, NULL, NULL) = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
) = 13
read(3, "22222222222222222222222222222222"..., 100) = 100
write(1, "22222222222222222222222222222222"..., 1002222222222222222222222222222222222222233333333333333333333333333333333333333333333333333344444444444) = 100
select(4, [3], NULL, NULL, NULL) = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
) = 13
read(3, "44444444444444444444444444444444"..., 100) = 100
write(1, "44444444444444444444444444444444"..., 1004444444444444444444444444444444444444444445555555555555555555555555555555555555555556666666666666666) = 100
select(4, [3], NULL, NULL, NULL) = 1 (in [3])
write(1, "read fd_1...\n", 13read fd_1...
) = 13
read(3, "66666666666666666666666677777777"..., 100) = 65
write(1, "66666666666666666666666677777777"..., 656666666666666666666666667777777777777777777777777777777777777777
) = 65
select(4, [3], NULL, NULL, NULL
改用pthread方式去實作
int fd_1_no;
int fd_2_no;
void *io_driver(void *arg) {
//内容不變
}
int main(int argc, char *argv[]) {
fd_1_no = open("/dev/tty", O_RDONLY|O_NONBLOCK);
fd_2_no = open("/dev/tty", O_RDONLY|O_NONBLOCK);
pthread_t p_1;
void *ret_1;
pthread_create(&p_1,NULL,io_driver,(void*)0);
pthread_join(p_1,&ret_1);
//io_driver(fd_1, fd_2);
close(fd_1_no);
close(fd_2_no);
return 0;
}
//strace -ff 結果
[pid 10141] select(4, [3], NULL, NULL, NULL111111111111111
) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "111111111111111\n", 100) = 16
[pid 10141] write(1, "111111111111111\n", 16111111111111111
) = 16
[pid 10141] select(4, [3], NULL, NULL, NULL222222222222222222222222222222222222222222222222222222222333333333333333333333333333333333333333333333333333333333334444444444444444444444444444444444444444444444444445555555555555555555555555555555555555566666666666666666666666666666666666666666677777777777777777777777777777777778888888888888888888888888888888888888888888888888899999999999999999
) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "22222222222222222222222222222222"..., 100) = 100
[pid 10141] write(1, "22222222222222222222222222222222"..., 1002222222222222222222222222222222222222222222222222222222223333333333333333333333333333333333333333333) = 100
[pid 10141] select(4, [3], NULL, NULL, NULL) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "33333333333333334444444444444444"..., 100) = 100
[pid 10141] write(1, "33333333333333334444444444444444"..., 1003333333333333333444444444444444444444444444444444444444444444444444555555555555555555555555555555555) = 100
[pid 10141] select(4, [3], NULL, NULL, NULL) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "55555666666666666666666666666666"..., 100) = 100
[pid 10141] write(1, "55555666666666666666666666666666"..., 1005555566666666666666666666666666666666666666666677777777777777777777777777777777778888888888888888888) = 100
[pid 10141] select(4, [3], NULL, NULL, NULL) = 1 (in [3])
[pid 10141] write(1, "read fd_1...\n", 13read fd_1...
) = 13
[pid 10141] read(3, "88888888888888888888888888888889"..., 100) = 49
[pid 10141] write(1, "88888888888888888888888888888889"..., 49888888888888888888888888888888899999999999999999
) = 49
[pid 10141] select(4, [3], NULL, NULL, NULL
poll
//poll - wait for some event on a file descriptor
#include <poll.h>
//fds:實際上是一個數組的首位址,因為 poll可以幫助我們監視多個檔案描述符,而一個檔案描述放到一個 //struct pollfd 結構體中,多個檔案描述符就需要一個數組來存儲了。
//nfds:fds 這個數組的長度。在參數清單中使用數組首位址 + 長度的做法還是比較常見的。
//timeout:阻塞等待的逾時時間。傳入 -1 則始終阻塞,不逾時。
int poll(struct pollfd *fds, nfds_t nfds, int timeout);
struct pollfd {
int fd; /* 需要監視的檔案描述符 */
short events; /* 要監視的事件 */
short revents; /* 該檔案描述符發生了的事件 */
};
//結構體中的事件可以指定下面七種事件,同時監視多個事件可以使用按位或(|)添加:
POLLIN 檔案描述符可讀
POLLPRI 可以非阻塞的讀高優先級的資料
POLLOUT 檔案描述符可寫
POLLRDHUP 流式套接字連接配接點關閉,或者關閉寫半連接配接。
POLLERR 已出錯
POLLHUP 已挂斷(一般指裝置)
POLLNVAL 參數非法
一個例子
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/select.h>
#include <sys/time.h>
#include <sys/types.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <poll.h>
#include <errno.h>
int max(int a, int b) {
if(a < b) {
return b;
}
return a;
}
int driver(int fd_1, int fd_2) {
struct pollfd pfd[2];
int fd_1_save = fcntl(fd_1,F_GETFL);
int fd_2_save = fcntl(fd_2,F_GETFL);
fcntl(fd_1, F_SETFL, fd_1_save|O_NONBLOCK);
fcntl(fd_2, F_SETFL, fd_2_save|O_NONBLOCK);
pfd[0].fd = fd_1;
pfd[0].events |= POLLIN;
pfd[1].fd = fd_2;
pfd[1].events |= POLLIN;
int read_count_1;
int read_count_2;
char buf_1[100];
char buf_2[100];
while(1) {
if( poll(pfd,2,-1)<0 ) {
if(errno == EINTR) {
continue;
}
printf("error\n");
exit(1);
}
if(pfd[0].revents & POLLIN) {
printf("read fd_1\n");
read_count_1 = read(fd_1,buf_1,sizeof(buf_1));
write(STDOUT_FILENO,buf_1,read_count_1);
}
if(pfd[1].revents & POLLIN) {
printf("read fd_2\n");
read_count_2 = read(fd_2,buf_2,sizeof(buf_2));
write(STDOUT_FILENO,buf_2,read_count_2);
}
if( strncmp("end",buf_1,3)==0 ) {
break;
}
if( strncmp("end",buf_2,3)==0 ) {
break;
}
}//end while
fcntl(fd_1,F_SETFL,fd_1_save);
fcntl(fd_2,F_SETFL,fd_2_save);
}
int main(int argc, char *argv[]) {
int fd_1 = open("/dev/tty",O_RDONLY);
int fd_2 = open("/dev/tty",O_RDONLY|O_NONBLOCK);
int fds[2];
//pipe(fds);
//int fd_w = fds[1];
driver(fd_1,fd_2);
close(fd_1);
close(fd_2);
return 0;
}
//用strace分析程式
open("/dev/tty", O_RDONLY) = 3
open("/dev/tty", O_RDONLY|O_NONBLOCK) = 4
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fcntl(4, F_GETFL) = 0x8800 (flags O_RDONLY|O_NONBLOCK|O_LARGEFILE)
fcntl(3, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
fcntl(4, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
poll([{fd=3, events=POLLIN}, {fd=4, events=POLLIN}], 2, -1
aaaaaaaaaaaaaaaaaaaaaaaaa
) = 2 ([{fd=3, revents=POLLIN}, {fd=4, revents=POLLIN}])
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 7), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f515010f000
write(1, "read fd_1\n", 10read fd_1
) = 10
read(3, "aaaaaaaaaaaaaaaaaaaaaaaaa\n", 100) = 26
write(1, "aaaaaaaaaaaaaaaaaaaaaaaaa\n", 26aaaaaaaaaaaaaaaaaaaaaaaaa
) = 26
write(1, "read fd_2\n", 10read fd_2
) = 10
read(4, 0x7ffdada914f0, 100) = -1 EAGAIN (Resource temporarily unavailable)
write(1, "", 18446744073709551615) = -1 EFAULT (Bad address)
poll([{fd=3, events=POLLIN}, {fd=4, events=POLLIN}], 2, -1
end
) = 2 ([{fd=3, revents=POLLIN}, {fd=4, revents=POLLIN}])
write(1, "read fd_1\n", 10read fd_1
) = 10
read(3, "end\n", 100) = 4
write(1, "end\n", 4end
) = 4
write(1, "read fd_2\n", 10read fd_2
) = 10
read(4, 0x7ffdada914f0, 100) = -1 EAGAIN (Resource temporarily unavailable)
write(1, "", 18446744073709551615) = -1 EFAULT (Bad address)
fcntl(3, F_SETFL, O_RDONLY|O_LARGEFILE) = 0
fcntl(4, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
close(3) = 0
close(4) = 0
exit_group(0) = ?
epoll
#include <sys/epoll.h>
//建立若幹個epoll監控元素,并傳回epoll的fd
int epoll_create(int size);
//操作需要監聽的事件
int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
struct epoll_event {
uint32_t events; /* epoll 監視的事件,與 poll(2) 能監視的事件差不多 */
epoll_data_t data; /* 使用者資料,除了能儲存檔案描述符以外,還能儲存其它有關資料 */
}
//epoll_ctl第二個參數op的操作定義
EPOLL_CTL_ADD 增加要監視的檔案描述符
EPOLL_CTL_MOD 更改目标檔案描述符的事件
EPOLL_CTL_DEL 删除要監視的檔案描述符,event 參數會被忽略,可以傳入 NULL
//阻塞監視并傳回監視結果
//epfd,要操作的 epoll 執行個體
//events + maxevents:共同指定了一個結構體數組,數組的起始位置和長度
//timeout:逾時等待的時間,設定為 -1 則始終阻塞監視,不逾時
int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout);
一個例子
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/select.h>
#include <sys/time.h>
#include <sys/types.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#include <sys/epoll.h>
int driver(int fd_1, int fd_2) {
int fd_1_save = fcntl(fd_1, F_GETFL);
int fd_2_save = fcntl(fd_2, F_GETFL);
fcntl(fd_1, F_SETFL, fd_1_save|O_NONBLOCK);
fcntl(fd_2, F_SETFL, fd_2_save|O_NONBLOCK);
int epoll_fd = epoll_create(10);
struct epoll_event event;
event.events = EPOLLIN;
event.data.fd = fd_1;
epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd_1, &event);
event.events = EPOLLIN;
event.data.fd = fd_2;
epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd_2, &event);
int read_count_1, read_count_2;
char buf_1[100], buf_2[100];
while(1) {
if( epoll_wait(epoll_fd, &event, 1, -1)<0 ) {
printf("epoll_wait error\n");
exit(1);
}
if(event.data.fd==fd_1 && event.events&EPOLLIN) {
printf("fd_1 read...\n");
read_count_1 = read(fd_1,buf_1,sizeof(buf_1));
write(STDOUT_FILENO,buf_1,read_count_1);
printf("str->%s\n",buf_1);
}
if(event.data.fd== fd_2 && event.events&EPOLLIN) {
printf("fd_2 reaad.....\n");
read_count_2 = read(fd_2,buf_2,sizeof(buf_2));
write(STDOUT_FILENO, buf_2, read_count_2);
printf("str->%s\n",buf_2);
}
} /* end while */
fcntl(fd_1, F_SETFL, fd_1_save);
fcntl(fd_2, F_SETFL, fd_2_save);
}
int main(int argc, char *argv[]) {
int fd_1 = open("/dev/tty",O_RDONLY);
int fd_2 = open("/dev/tty",O_RDONLY);
driver(fd_1,fd_2);
close(fd_1);
close(fd_2);
return 0;
}
//用strace分析程式
open("/dev/tty", O_RDONLY) = 3
open("/dev/tty", O_RDONLY) = 4
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fcntl(4, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fcntl(3, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
fcntl(4, F_SETFL, O_RDONLY|O_NONBLOCK|O_LARGEFILE) = 0
epoll_create(10) = 5
epoll_ctl(5, EPOLL_CTL_ADD, 3, {EPOLLIN, {u32=3, u64=3}}) = 0
epoll_ctl(5, EPOLL_CTL_ADD, 4, {EPOLLIN, {u32=4, u64=4}}) = 0
epoll_wait(5,
aaaaaa
[{EPOLLIN, {u32=4, u64=4}}], 1, -1) = 1
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f11f3bd6000
write(1, "fd_2 reaad.....\n", 16fd_2 reaad.....) = 16
read(4, "aaaaaa\n", 100) = 7
write(1, "aaaaaa\n", 7aaaaaa) = 7
write(1, "str->aaaaaa\n", 12str->aaaaaa) = 12
write(1, "\n", 1) = 1
epoll_wait(5
java的nio例子
import java.io.IOException;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
import java.nio.channels.SocketChannel;
import java.util.Iterator;
import java.util.Set;
/**
* * Created by yang.wang09 on 2018-12-04 14:00.
* */
public class X {
public static void main(String[] args) throws IOException {
go();
}
public static void go() throws IOException {
String host = "www.baidu.com";
host = "220.181.111.37";
int port = 80;
InetSocketAddress ias = new InetSocketAddress(host, port);
SocketChannel channel = SocketChannel.open(ias);
channel.configureBlocking(false);
Selector selector = Selector.open();
SelectionKey key = channel.register(selector, SelectionKey.OP_READ);
ByteBuffer buf = ByteBuffer.allocate(100);
String message = "GET / HTTP/1.1\r\nHost:220.181.111.37\r\n\r\n\r\n";
buf.put(message.getBytes());
channel.write(buf);
boolean isLoop = true;
while(isLoop) {
int readyChannels = selector.select();
selector.selectedKeys();
if(readyChannels == 0) continue;
Set selectedKeys = selector.selectedKeys();
Iterator keyIterator = selectedKeys.iterator();
while(keyIterator.hasNext()) {
keyIterator.next();
if(key.isAcceptable()) {
/* a connection was accepted by a ServerSocketChannel. */
} else if (key.isConnectable()) {
/* a connection was established with a remote server. */
} else if (key.isReadable()) {
/* a channel is ready for reading */
System.out.println("read ok");
isLoop = false;
break;
} else if (key.isWritable()) {
/* a channel is ready for writing */
channel.write(buf);
System.out.println("write ok");
isLoop = false;
break;
}
keyIterator.remove();
}
}
}
}
strace -o xx.log -ff java X
會生成很多log檔案,将這些log檔案都放到 log目錄下,然後grep "select" *,grep "poll" *,發現隻有epoll,其他的都沒有
java version "1.8.0_131"
Java(TM) SE Runtime Environment (build 1.8.0_131-b11)
Java HotSpot(TM) 64-Bit Server VM (build 25.131-b11, mixed mode)
可見java 8 的多路複用底層是 epoll實作的
xx.log.16795:epoll_create(256) = 7
xx.log.16795:epoll_ctl(7, EPOLL_CTL_ADD, 5, {EPOLLIN, {u32=5, u64=15903069758740758533}}) = 0
xx.log.16795:epoll_ctl(7, EPOLL_CTL_ADD, 4, {EPOLLIN, {u32=4, u64=16044752212915650564}}) = 0
xx.log.16795:epoll_wait(7, [{EPOLLIN, {u32=4, u64=16044752212915650564}}], 8192, -1) = 1
readv和writev
#include<sys/uio.h>
ssize_t readv(int filedes, const struct iovec *iov, int iovcnt);
ssize_t writev(int filedes, const struct iovec *iov, int iovcnt);
/*若成功則傳回已讀,寫的位元組數,若出錯則傳回-1。 */
//這兩個函數的第二個參數是指向iovec結構數組的一個指針:
struct iovec{
void *iov_base; //starting address of buffer
size_t iov_len; //size of buffer
}
//iov數組中的元素數由iovcnt說明。下圖說明了readv和writev的參數和iovec結構。
writev以順序iov[0],iov[1]至iov[iovcnt-1]從緩沖區中聚集輸出資料。writev傳回輸出的位元組總數。
readv則将讀入的資料按照上述同樣順序散布到緩沖區中,readv總是先填滿一個緩沖區,然後再填寫下一個。readv傳回讀到
的總位元組數。如果遇到檔案結尾,已無資料可讀,則傳回0。
列子
#include <stdio.h>
#include <sys/uio.h>
#include <fcntl.h>
int main(){
char buf1[5],buf2[10];
struct iovec iov[2];
iov[0].iov_base = buf1;
iov[0].iov_len = 5;
iov[1].iov_base = buf2;
iov[1].iov_len = 10;
int fd = open("a.txt",O_RDWR);
if(fd < 0){
perror("open");
return -1;
}
int rsize = readv(fd, iov, 2);
printf("rsize = %d\n",rsize);
close(fd);
fd = open("b.txt", O_RDWR|O_CREAT, S_IRUSR|S_IWUSR);
if(fd < 0){
perror("open");
return -1;
}
int wsize = writev(fd,iov,2);
printf("wsize = %d\n",wsize);
close(fd);
return 0;
}
mmap
mmap将一個檔案或者其它對象映射進記憶體。檔案被映射到多個頁上,如果檔案的大小不是所有頁的大小之和,最後一個頁不被使用的空間将會清零。mmap在使用者空間映射調用系統中作用很大。
具體函數
#include<sys/mman.h>
//addr 起始位址
//len 需要映射的長度
//port PROT_READ,映射區可讀;
// PROT_WRITE,映射區可寫;PROT_EXEC,映射區可執行;PROT_NONE,映射區不可通路
//flag
// fd,off 需要映射的fd和起始位置
void* mmap(void* addr, size_t len, int port, int flag, int fd, off_t off)
//addr 起始位址
//len 長度
//port 跟mmap的一樣
int mprotect(void* addr, size_t len, int port)
//addr和len 同mmap函數
//flag
//MS_ASYNC,這實際上不要求核心做什麼,讓核心自主去執行同步
//MS_SYNC,要求核心在傳回之前把寫操作完成
//MS_INVALIDATE,是一個可選的标志,它告訴核心丢棄沒有同步的部分
int msync(void* addr, size_t len, int flags)
int munmap(void* addr, size_t len)
mmap的flag
MAP_FIXED //使用指定的映射起始位址,如果由start和len參數指定的記憶體區重疊于現存的映射空間,重疊部分将會被丢棄。如果指定的起始位址不可用,操作将會失敗。并且起始位址必須落在頁的邊界上。
MAP_SHARED //與其它所有映射這個對象的程序共享映射空間。對共享區的寫入,相當于輸出到檔案。直到msync()或者munmap()被調用,檔案實際上不會被更新。
MAP_PRIVATE //建立一個寫入時拷貝的私有映射。記憶體區域的寫入不會影響到原檔案。這個标志和以上标志是互斥的,隻能使用其中一個。
MAP_DENYWRITE //這個标志被忽略。
MAP_EXECUTABLE //同上
MAP_NORESERVE //不要為這個映射保留交換空間。當交換空間被保留,對映射區修改的可能會得到保證。當交換空間不被保留,同時記憶體不足,對映射區的修改會引起段違例信号。
MAP_LOCKED //鎖定映射區的頁面,進而防止頁面被交換出記憶體。
MAP_GROWSDOWN //用于堆棧,告訴核心VM系統,映射區可以向下擴充。
MAP_ANONYMOUS //匿名映射,映射區不與任何檔案關聯。
MAP_ANON //MAP_ANONYMOUS的别稱,不再被使用。
MAP_FILE //相容标志,被忽略。
MAP_32BIT //将映射區放在程序位址空間的低2GB,MAP_FIXED指定時會被忽略。目前這個标志隻在x86-64平台上得到支援。
MAP_POPULATE //為檔案映射通過預讀的方式準備好頁表。随後對映射區的通路不會被頁違例阻塞。
MAP_NONBLOCK //僅和MAP_POPULATE一起使用時才有意義。不執行預讀,隻為已存在于記憶體中的頁面建立頁表入口。
例子
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <string.h>
#define COPYING (1024*1024*1024) /* 1 GB */
int main(int argc, char *argv[]) {
int fd_in,fd_out;
void *src, *dest;
size_t copy_size;
struct stat stat_buf;
off_t off_size = 0;
if(argc != 3) {
printf("usage %s <form_file> <to_file>\n",argv[0]);
}
fd_in = open(argv[1], O_RDONLY);
fd_out = open(argv[2], O_RDWR|O_CREAT|O_TRUNC,0777);
fstat(fd_in,&stat_buf);
//如果不加這句ftruncate就會報錯
// --- SIGBUS {si_signo=SIGBUS, si_code=BUS_ADRERR, si_addr=0x7f593cdba1d0}
ftruncate(fd_out,stat_buf.st_size);
while(off_size < stat_buf.st_size) {
if((stat_buf.st_size - off_size) > COPYING) {
copy_size = COPYING;
}
else {
copy_size = stat_buf.st_size - off_size;
}
src = mmap(0,copy_size,PROT_READ, MAP_SHARED,fd_in,off_size);
dest = mmap(0,copy_size,PROT_READ|PROT_WRITE, MAP_SHARED, fd_out, off_size);
memcpy(dest, src, copy_size);
munmap(src,copy_size);
munmap(dest,copy_size);
off_size += copy_size;
}
return 0;
}
參考
檔案映射IO函數
mmap函數和mprotect
進階IO--存儲映射
linux驅動mmap記憶體映射
一起學 Unix 環境進階程式設計 (APUE) 之 進階 IO
select、poll、epoll之間的差別總結[整理]
使用SocketChannel的NIO客戶機伺服器通信示例
Linux 系統 檔案鎖 fcntl函數詳解
檔案鎖 flock及fcntl flock