reactor
把epoll返回的fd读写事件进行封装,并为每种事件设置回调函数,把所有关注的fd以及对应的事件存储在一个数据结构里,与epoll内部的红黑树的节点形成一一对应的关系。epoll返回时使用data.ptr得到我们数据结构中对应的entry,再进行处理
对fd的封装如下
typedef int (*NCALLBACK)(int fd, int events, void *arg);
struct ntyevent {
int fd;
int events; //监听的事件:EPOLLIN或EPOLLOUT
void *arg; //传给回调函数的额外参数(reactor指针)
NCALLBACK callback; //回调函数
int used; //当前entry是有有效
char buffer[BUFFER_LENGTH]; //读写共用一个buffer,因为当前监听的事件只能是读或写,不会两个都有
int length; //buffer中有效数据的长度
long last_active; //当前fd上一次活跃的事件
int sticky; //如果置为1,则fd不会因为长时间不活跃而被关闭(用于listenfd)
};
reactor数据结构的定义如下
struct ntyreactor {
int epfd;
int block_num;
struct ntyevent **events; //array of ntyevent *, length = block_num, each block has 1024 ntyevent
};
events是一个指针数组,可扩容。每个元素指向一个1024长度的ntyevent数组
整个服务器大致有如下函数
int accept_cb(int fd, int events, void *arg);
int recv_cb(int fd, int events, void *arg);
int send_cb(int fd, int events, void *arg);
void nty_event_update(struct ntyevent *ev, int fd, NCALLBACK callback, void *arg);
int nty_event_add(int epfd, int events, struct ntyevent *ev);
int nty_event_del(int epfd, struct ntyevent *ev);
int start_listen(short port);
int ntyreactor_init(struct ntyreactor *reactor);
struct ntyevent *ntyreactor_get_event(struct ntyreactor *reactor, int fd);
int ntyreactor_destory(struct ntyreactor *reactor);
int ntyreactor_addlistener(struct ntyreactor *reactor, int listenfd, NCALLBACK acceptor);
int ntyreactor_run(struct ntyreactor *reactor);
先看main函数
#define BUFFER_LENGTH 4096
#define EVENTS_BLOCK_SIZE 1024
#define EVENT_BATCH_SIZE 1024
#define SERVER_PORT 8888
#define PORT_COUNT 100
#define CLIENT_TIMEOUT 15
int main(int argc, char *argv[]) {
unsigned short port = SERVER_PORT;
if (argc == 2) {
port = atoi(argv[1]);
}
struct ntyreactor *reactor = (struct ntyreactor*)calloc(1, sizeof(struct ntyreactor));
ntyreactor_init(reactor);
int i;
for(i = 0; i < PORT_COUNT; i++) {
int listenfd = start_listen(port + i);
ntyreactor_addlistener(reactor, listenfd, accept_cb);
}
ntyreactor_run(reactor);
ntyreactor_destory(reactor);
free(reactor);
return 0;
}
PORT_COUNT设为100时,程序监听从8888开始的100个端口,每个端口至少能接受1万个port,所以连接数能达到一百万。(因为测试的客户端数量比较少,如果不多监听一些端口,会导致(src-ip, src-port, dst-ip, dst-port) 四元组的数量不够用)
其中
ntyreactor_run
是程序主循环
int ntyreactor_run(struct ntyreactor *reactor) {
if (reactor == NULL) return -1;
if (reactor->epfd < 0) return -1;
if (reactor->events == NULL) return -1;
struct epoll_event events[EVENT_BATCH_SIZE];
int block_idx = 0, check_interval = 10, i, cur; // check every 10 loops
while (1) {
#if CHECK_TIMEOUT
cur++;
if (cur % check_interval == 9) {
if (block_idx >= reactor->block_num) {
block_idx = 0;
}
ntyreactor_check_timeout(reactor, block_idx++);
}
#endif
int nready = epoll_wait(reactor->epfd, events, EVENT_BATCH_SIZE, 1000);
if (nready < 0) {
printf("epoll_wait error\n");
continue;
}
for (i = 0; i < nready; i++) {
struct ntyevent *ev = (struct ntyevent*)events[i].data.ptr;
int err = 0;
if ((events[i].events & EPOLLIN) && (ev->events & EPOLLIN)) {
err = ev->callback(ev->fd, events[i].events, ev->arg);
}
if ((events[i].events & EPOLLOUT) && (ev->events & EPOLLOUT)) {
err = ev->callback(ev->fd, events[i].events, ev->arg);
}
}
}
}
主循环中对于每个监听到的事件直接调用回调函数
accept_cb把每个新连接添加到epoll和reactor中监控
int accept_cb(int fd, int events, void *arg) {
struct ntyreactor *reactor = (struct ntyreactor*)arg;
if (reactor == NULL) return -1;
struct sockaddr_in client_addr;
socklen_t len = sizeof(client_addr);
int clientfd;
if ((clientfd = accept(fd, (struct sockaddr*)&client_addr, &len)) == -1) {
if (errno != EAGAIN && errno != EINTR) {
}
printf("accept: %s\n", strerror(errno));
return -1;
}
int i = 0;
struct ntyevent *event = ntyreactor_get_event(reactor, clientfd);
if (event == NULL) return -1;
do {
// 设置clientfd非阻塞
if (fcntl(clientfd, F_SETFL, O_NONBLOCK) < 0) {
printf("%s: fcntl nonblocking failed, %d\n", __func__, EVENTS_BLOCK_SIZE);
break;
}
nty_event_update(event, clientfd, recv_cb, reactor);
nty_event_add(reactor->epfd, EPOLLIN, event);
} while (0);
printf("new connect [%s:%d][time:%ld], pos[%d]\n",
inet_ntoa(client_addr.sin_addr), ntohs(client_addr.sin_port), event->last_active, i);
return 0;
}
recv_cb 和 send_cb分别负责读和写
这里要提到水平触发和边沿触发的问题
水平触发是只要条件为真就一直触发(例如只要buffer里数据还没读完,epoll就一直触发EPOLLIN),而边沿触发只在条件从假变为真时触发一次
通常,数据量较大时使用水平触发(一次读不完buffer当中所有数据时)。数据较小是可以使用边沿触发。边沿触发每次必须用while循环读取所有数据。
这段代码使用的是水平触发。
int recv_cb(int fd, int events, void *arg) {
struct ntyreactor *reactor = (struct ntyreactor*)arg;
struct ntyevent *ev = ntyreactor_get_event(reactor, fd);
if(ev == NULL) return -1;
int len = recv(fd, ev->buffer, BUFFER_LENGTH, 0);
nty_event_del(reactor->epfd, ev);
if (len > 0) {
ev->length = len;
ev->buffer[len] = '\0';
printf("C[%d]:%s\n", fd, ev->buffer);
nty_event_update(ev, fd, send_cb, reactor);
nty_event_add(reactor->epfd, EPOLLOUT, ev);
} else if (len == 0) {
close(ev->fd);
printf("[fd=%d] closed\n", fd);
} else {
close(ev->fd);
printf("recv[fd=%d] error[%d]:%s\n", fd, errno, strerror(errno));
}
return len;
}
int send_cb(int fd, int events, void *arg) {
struct ntyreactor *reactor = (struct ntyreactor*)arg;
struct ntyevent *ev = ntyreactor_get_event(reactor, fd);
if(ev == NULL) return -1;
int len = send(fd, ev->buffer, ev->length, 0);
if (len > 0) {
printf("send[fd=%d], [%d]%s\n", fd, len, ev->buffer);
nty_event_del(reactor->epfd, ev);
nty_event_update(ev, fd, recv_cb, reactor);
nty_event_add(reactor->epfd, EPOLLIN, ev);
} else {
close(ev->fd);
nty_event_del(reactor->epfd, ev);
printf("send[fd=%d] error %s\n", fd, strerror(errno));
}
return len;
}
reactor自动扩容通过ntyreactor_get_event实现
//输入fd,返回对应的ntyevent
struct ntyevent *ntyreactor_get_event(struct ntyreactor *reactor, int fd) {
int b = fd / EVENTS_BLOCK_SIZE;
int i = fd % EVENTS_BLOCK_SIZE;
if(b >= reactor->block_num) {
int new_block_num = reactor->block_num == 0 ? 1 : 2 * reactor->block_num;
while(new_block_num <= b) new_block_num <<= 1;
struct ntyevent **new_event_blocks = (struct ntyevent **)realloc(
reactor->events, new_block_num * (sizeof(struct ntyevent *))
);
if(new_event_blocks == NULL) {
printf("cannot allocate block in %s for fd = %d\n", __func__, fd);
return NULL;
}
memset(
new_event_blocks + reactor->block_num, 0,
(new_block_num - reactor->block_num) * sizeof(struct ntyevent*));
reactor->block_num = new_block_num;
reactor->events = new_event_blocks;
}
if(reactor->events[b] == NULL) {
reactor->events[b] = (struct ntyevent *)calloc(EVENTS_BLOCK_SIZE, sizeof(struct ntyevent));
if (reactor->events[b] == NULL) {
printf("cannot allocate block in %s for fd = %d\n", __func__, fd);
return NULL;
}
}
return &reactor->events[b][i];
}
使用 ntyreactor_check_timeout自动检查block_idx中长时间不活跃的fd并关闭连接
int ntyreactor_check_timeout(struct ntyreactor *reactor, int block_idx) {
if(!reactor || !reactor->events || block_idx >= reactor->block_num) return 0;
struct ntyevent *block = reactor->events[block_idx];
if (!block) return 0;
long now = time(NULL);
int i;
for(i = 0; i < EVENTS_BLOCK_SIZE; i++) {
if(!block[i].used || block[i].sticky) continue;
long duration = now - block[i].last_active;
if (duration >= 60) {
close(block[i].fd);
printf("[fd=%d] timeout\n", block[i].fd);
nty_event_del(reactor->epfd, &block[i]);
}
}
}
nty_event_* 这几个函数负责把fd和监听的event加入到epoll和reactor中,并设置回调函数
void nty_event_update(struct ntyevent *ev, int fd, NCALLBACK callback, void *arg) {
ev->fd = fd;
ev->callback = callback;
ev->events = 0;
ev->arg = arg;
ev->last_active = time(NULL);
ev->sticky = 0;
}
// add/update ev on epfd with events
int nty_event_add(int epfd, int events, struct ntyevent *ev) {
struct epoll_event ep_ev = {0, {0}};
ep_ev.data.ptr = ev;
ep_ev.events = ev->events = events;
int op;
if (ev->used == 1) {
op = EPOLL_CTL_MOD;
} else {
op = EPOLL_CTL_ADD;
ev->used = 1;
}
if (epoll_ctl(epfd, op, ev->fd, &ep_ev) < 0) {
printf("event add failed [fd=%d], events[%d]\n", ev->fd, events);
return -1;
}
return 0;
}
// remove ev from epfd
int nty_event_del(int epfd, struct ntyevent *ev) {
struct epoll_event ep_ev = {0, {0}};
if (ev->used != 1) {
return -1;
}
ep_ev.data.ptr = ev;
ev->used = 0;
epoll_ctl(epfd, EPOLL_CTL_DEL, ev->fd, NULL);
return 0;
}
还有其他一些函数一并写在这里
int start_listen(short port) {
int fd = socket(AF_INET, SOCK_STREAM, 0);
fcntl(fd, F_SETFL, O_NONBLOCK);
struct sockaddr_in server_addr;
memset(&server_addr, 0, sizeof(server_addr));
server_addr.sin_family = AF_INET;
server_addr.sin_addr.s_addr = htonl(INADDR_ANY);
server_addr.sin_port = htons(port);
bind(fd, (struct sockaddr*)&server_addr, sizeof(server_addr));
if (listen(fd, 20) < 0) {
printf("listen failed : %s\n", strerror(errno));
}
printf("listening to port %hd with fd = %d\n", port, fd);
return fd;
}
int ntyreactor_init(struct ntyreactor *reactor) {
if (reactor == NULL) return -1;
memset(reactor, 0, sizeof(struct ntyreactor));
reactor->epfd = epoll_create(1);
if (reactor->epfd <= 0) {
printf("create epfd in %s err %s\n", __func__, strerror(errno));
return -2;
}
reactor->events = (struct ntyevent **)calloc(1, sizeof(struct ntyevent *));
if (reactor->events == NULL) {
printf("cannot allocate events block in %s\n", __func__);
return -3;
}
reactor->block_num = 1;
return 0;
}
int ntyreactor_destory(struct ntyreactor *reactor) {
close(reactor->epfd);
int i;
for(i = 0; i < reactor->block_num; i++) {
free(reactor->events[i]);
}
free(reactor->events);
}
int ntyreactor_addlistener(struct ntyreactor *reactor, int listenfd, NCALLBACK acceptor) {
if (reactor == NULL) return -1;
if (reactor->events == NULL) return -1;
struct ntyevent *event = ntyreactor_get_event(reactor, listenfd);
if (event == NULL) return -1;
nty_event_update(event, listenfd, acceptor, reactor);
event->sticky = 1; //don't close listen fd on timeout
nty_event_add(reactor->epfd, EPOLLIN, event);
return 0;
}
其他注意事项
为了达到百万连接数,我们还要增加系统的最大open files
使用
ulimit -a
查看open files
用-n设置,如果不行,就更改
sudo vim /etc/security/limits.conf
这个
末尾加上这两行
* soft nofile 1048576
* hard nofile 1048576
并且关掉shell并重新登录
此时应该就行了
并且可能还需要更改内核tcp收发缓存的大小
可以参考这里
参考资料
[1] 零声教育 Linux C/C++后端服务器架构开发 2.1网络开发