reactor
把epoll傳回的fd讀寫事件進行封裝,并為每種事件設定回調函數,把所有關注的fd以及對應的事件存儲在一個資料結構裡,與epoll内部的紅黑樹的節點形成一一對應的關系。epoll傳回時使用data.ptr得到我們資料結構中對應的entry,再進行處理
對fd的封裝如下
typedef int (*NCALLBACK)(int fd, int events, void *arg);
struct ntyevent {
int fd;
int events; //監聽的事件:EPOLLIN或EPOLLOUT
void *arg; //傳給回調函數的額外參數(reactor指針)
NCALLBACK callback; //回調函數
int used; //目前entry是有有效
char buffer[BUFFER_LENGTH]; //讀寫共用一個buffer,因為目前監聽的事件隻能是讀或寫,不會兩個都有
int length; //buffer中有效資料的長度
long last_active; //目前fd上一次活躍的事件
int sticky; //如果置為1,則fd不會因為長時間不活躍而被關閉(用于listenfd)
};
reactor資料結構的定義如下
struct ntyreactor {
int epfd;
int block_num;
struct ntyevent **events; //array of ntyevent *, length = block_num, each block has 1024 ntyevent
};
events是一個指針數組,可擴容。每個元素指向一個1024長度的ntyevent數組
整個伺服器大緻有如下函數
int accept_cb(int fd, int events, void *arg);
int recv_cb(int fd, int events, void *arg);
int send_cb(int fd, int events, void *arg);
void nty_event_update(struct ntyevent *ev, int fd, NCALLBACK callback, void *arg);
int nty_event_add(int epfd, int events, struct ntyevent *ev);
int nty_event_del(int epfd, struct ntyevent *ev);
int start_listen(short port);
int ntyreactor_init(struct ntyreactor *reactor);
struct ntyevent *ntyreactor_get_event(struct ntyreactor *reactor, int fd);
int ntyreactor_destory(struct ntyreactor *reactor);
int ntyreactor_addlistener(struct ntyreactor *reactor, int listenfd, NCALLBACK acceptor);
int ntyreactor_run(struct ntyreactor *reactor);
先看main函數
#define BUFFER_LENGTH 4096
#define EVENTS_BLOCK_SIZE 1024
#define EVENT_BATCH_SIZE 1024
#define SERVER_PORT 8888
#define PORT_COUNT 100
#define CLIENT_TIMEOUT 15
int main(int argc, char *argv[]) {
unsigned short port = SERVER_PORT;
if (argc == 2) {
port = atoi(argv[1]);
}
struct ntyreactor *reactor = (struct ntyreactor*)calloc(1, sizeof(struct ntyreactor));
ntyreactor_init(reactor);
int i;
for(i = 0; i < PORT_COUNT; i++) {
int listenfd = start_listen(port + i);
ntyreactor_addlistener(reactor, listenfd, accept_cb);
}
ntyreactor_run(reactor);
ntyreactor_destory(reactor);
free(reactor);
return 0;
}
PORT_COUNT設為100時,程式監聽從8888開始的100個端口,每個端口至少能接受1萬個port,是以連接配接數能達到一百萬。(因為測試的用戶端數量比較少,如果不多監聽一些端口,會導緻(src-ip, src-port, dst-ip, dst-port) 四元組的數量不夠用)
其中
ntyreactor_run
是程式主循環
int ntyreactor_run(struct ntyreactor *reactor) {
if (reactor == NULL) return -1;
if (reactor->epfd < 0) return -1;
if (reactor->events == NULL) return -1;
struct epoll_event events[EVENT_BATCH_SIZE];
int block_idx = 0, check_interval = 10, i, cur; // check every 10 loops
while (1) {
#if CHECK_TIMEOUT
cur++;
if (cur % check_interval == 9) {
if (block_idx >= reactor->block_num) {
block_idx = 0;
}
ntyreactor_check_timeout(reactor, block_idx++);
}
#endif
int nready = epoll_wait(reactor->epfd, events, EVENT_BATCH_SIZE, 1000);
if (nready < 0) {
printf("epoll_wait error\n");
continue;
}
for (i = 0; i < nready; i++) {
struct ntyevent *ev = (struct ntyevent*)events[i].data.ptr;
int err = 0;
if ((events[i].events & EPOLLIN) && (ev->events & EPOLLIN)) {
err = ev->callback(ev->fd, events[i].events, ev->arg);
}
if ((events[i].events & EPOLLOUT) && (ev->events & EPOLLOUT)) {
err = ev->callback(ev->fd, events[i].events, ev->arg);
}
}
}
}
主循環中對于每個監聽到的事件直接調用回調函數
accept_cb把每個新連接配接添加到epoll和reactor中監控
int accept_cb(int fd, int events, void *arg) {
struct ntyreactor *reactor = (struct ntyreactor*)arg;
if (reactor == NULL) return -1;
struct sockaddr_in client_addr;
socklen_t len = sizeof(client_addr);
int clientfd;
if ((clientfd = accept(fd, (struct sockaddr*)&client_addr, &len)) == -1) {
if (errno != EAGAIN && errno != EINTR) {
}
printf("accept: %s\n", strerror(errno));
return -1;
}
int i = 0;
struct ntyevent *event = ntyreactor_get_event(reactor, clientfd);
if (event == NULL) return -1;
do {
// 設定clientfd非阻塞
if (fcntl(clientfd, F_SETFL, O_NONBLOCK) < 0) {
printf("%s: fcntl nonblocking failed, %d\n", __func__, EVENTS_BLOCK_SIZE);
break;
}
nty_event_update(event, clientfd, recv_cb, reactor);
nty_event_add(reactor->epfd, EPOLLIN, event);
} while (0);
printf("new connect [%s:%d][time:%ld], pos[%d]\n",
inet_ntoa(client_addr.sin_addr), ntohs(client_addr.sin_port), event->last_active, i);
return 0;
}
recv_cb 和 send_cb分别負責讀和寫
這裡要提到水準觸發和邊沿觸發的問題
水準觸發是隻要條件為真就一直觸發(例如隻要buffer裡資料還沒讀完,epoll就一直觸發EPOLLIN),而邊沿觸發隻在條件從假變為真時觸發一次
通常,資料量較大時使用水準觸發(一次讀不完buffer當中所有資料時)。資料較小是可以使用邊沿觸發。邊沿觸發每次必須用while循環讀取所有資料。
這段代碼使用的是水準觸發。
int recv_cb(int fd, int events, void *arg) {
struct ntyreactor *reactor = (struct ntyreactor*)arg;
struct ntyevent *ev = ntyreactor_get_event(reactor, fd);
if(ev == NULL) return -1;
int len = recv(fd, ev->buffer, BUFFER_LENGTH, 0);
nty_event_del(reactor->epfd, ev);
if (len > 0) {
ev->length = len;
ev->buffer[len] = '\0';
printf("C[%d]:%s\n", fd, ev->buffer);
nty_event_update(ev, fd, send_cb, reactor);
nty_event_add(reactor->epfd, EPOLLOUT, ev);
} else if (len == 0) {
close(ev->fd);
printf("[fd=%d] closed\n", fd);
} else {
close(ev->fd);
printf("recv[fd=%d] error[%d]:%s\n", fd, errno, strerror(errno));
}
return len;
}
int send_cb(int fd, int events, void *arg) {
struct ntyreactor *reactor = (struct ntyreactor*)arg;
struct ntyevent *ev = ntyreactor_get_event(reactor, fd);
if(ev == NULL) return -1;
int len = send(fd, ev->buffer, ev->length, 0);
if (len > 0) {
printf("send[fd=%d], [%d]%s\n", fd, len, ev->buffer);
nty_event_del(reactor->epfd, ev);
nty_event_update(ev, fd, recv_cb, reactor);
nty_event_add(reactor->epfd, EPOLLIN, ev);
} else {
close(ev->fd);
nty_event_del(reactor->epfd, ev);
printf("send[fd=%d] error %s\n", fd, strerror(errno));
}
return len;
}
reactor自動擴容通過ntyreactor_get_event實作
//輸入fd,傳回對應的ntyevent
struct ntyevent *ntyreactor_get_event(struct ntyreactor *reactor, int fd) {
int b = fd / EVENTS_BLOCK_SIZE;
int i = fd % EVENTS_BLOCK_SIZE;
if(b >= reactor->block_num) {
int new_block_num = reactor->block_num == 0 ? 1 : 2 * reactor->block_num;
while(new_block_num <= b) new_block_num <<= 1;
struct ntyevent **new_event_blocks = (struct ntyevent **)realloc(
reactor->events, new_block_num * (sizeof(struct ntyevent *))
);
if(new_event_blocks == NULL) {
printf("cannot allocate block in %s for fd = %d\n", __func__, fd);
return NULL;
}
memset(
new_event_blocks + reactor->block_num, 0,
(new_block_num - reactor->block_num) * sizeof(struct ntyevent*));
reactor->block_num = new_block_num;
reactor->events = new_event_blocks;
}
if(reactor->events[b] == NULL) {
reactor->events[b] = (struct ntyevent *)calloc(EVENTS_BLOCK_SIZE, sizeof(struct ntyevent));
if (reactor->events[b] == NULL) {
printf("cannot allocate block in %s for fd = %d\n", __func__, fd);
return NULL;
}
}
return &reactor->events[b][i];
}
使用 ntyreactor_check_timeout自動檢查block_idx中長時間不活躍的fd并關閉連接配接
int ntyreactor_check_timeout(struct ntyreactor *reactor, int block_idx) {
if(!reactor || !reactor->events || block_idx >= reactor->block_num) return 0;
struct ntyevent *block = reactor->events[block_idx];
if (!block) return 0;
long now = time(NULL);
int i;
for(i = 0; i < EVENTS_BLOCK_SIZE; i++) {
if(!block[i].used || block[i].sticky) continue;
long duration = now - block[i].last_active;
if (duration >= 60) {
close(block[i].fd);
printf("[fd=%d] timeout\n", block[i].fd);
nty_event_del(reactor->epfd, &block[i]);
}
}
}
nty_event_* 這幾個函數負責把fd和監聽的event加入到epoll和reactor中,并設定回調函數
void nty_event_update(struct ntyevent *ev, int fd, NCALLBACK callback, void *arg) {
ev->fd = fd;
ev->callback = callback;
ev->events = 0;
ev->arg = arg;
ev->last_active = time(NULL);
ev->sticky = 0;
}
// add/update ev on epfd with events
int nty_event_add(int epfd, int events, struct ntyevent *ev) {
struct epoll_event ep_ev = {0, {0}};
ep_ev.data.ptr = ev;
ep_ev.events = ev->events = events;
int op;
if (ev->used == 1) {
op = EPOLL_CTL_MOD;
} else {
op = EPOLL_CTL_ADD;
ev->used = 1;
}
if (epoll_ctl(epfd, op, ev->fd, &ep_ev) < 0) {
printf("event add failed [fd=%d], events[%d]\n", ev->fd, events);
return -1;
}
return 0;
}
// remove ev from epfd
int nty_event_del(int epfd, struct ntyevent *ev) {
struct epoll_event ep_ev = {0, {0}};
if (ev->used != 1) {
return -1;
}
ep_ev.data.ptr = ev;
ev->used = 0;
epoll_ctl(epfd, EPOLL_CTL_DEL, ev->fd, NULL);
return 0;
}
還有其他一些函數一并寫在這裡
int start_listen(short port) {
int fd = socket(AF_INET, SOCK_STREAM, 0);
fcntl(fd, F_SETFL, O_NONBLOCK);
struct sockaddr_in server_addr;
memset(&server_addr, 0, sizeof(server_addr));
server_addr.sin_family = AF_INET;
server_addr.sin_addr.s_addr = htonl(INADDR_ANY);
server_addr.sin_port = htons(port);
bind(fd, (struct sockaddr*)&server_addr, sizeof(server_addr));
if (listen(fd, 20) < 0) {
printf("listen failed : %s\n", strerror(errno));
}
printf("listening to port %hd with fd = %d\n", port, fd);
return fd;
}
int ntyreactor_init(struct ntyreactor *reactor) {
if (reactor == NULL) return -1;
memset(reactor, 0, sizeof(struct ntyreactor));
reactor->epfd = epoll_create(1);
if (reactor->epfd <= 0) {
printf("create epfd in %s err %s\n", __func__, strerror(errno));
return -2;
}
reactor->events = (struct ntyevent **)calloc(1, sizeof(struct ntyevent *));
if (reactor->events == NULL) {
printf("cannot allocate events block in %s\n", __func__);
return -3;
}
reactor->block_num = 1;
return 0;
}
int ntyreactor_destory(struct ntyreactor *reactor) {
close(reactor->epfd);
int i;
for(i = 0; i < reactor->block_num; i++) {
free(reactor->events[i]);
}
free(reactor->events);
}
int ntyreactor_addlistener(struct ntyreactor *reactor, int listenfd, NCALLBACK acceptor) {
if (reactor == NULL) return -1;
if (reactor->events == NULL) return -1;
struct ntyevent *event = ntyreactor_get_event(reactor, listenfd);
if (event == NULL) return -1;
nty_event_update(event, listenfd, acceptor, reactor);
event->sticky = 1; //don't close listen fd on timeout
nty_event_add(reactor->epfd, EPOLLIN, event);
return 0;
}
其他注意事項
為了達到百萬連接配接數,我們還要增加系統的最大open files
使用
ulimit -a
檢視open files
用-n設定,如果不行,就更改
sudo vim /etc/security/limits.conf
這個
末尾加上這兩行
* soft nofile 1048576
* hard nofile 1048576
并且關掉shell并重新登入
此時應該就行了
并且可能還需要更改核心tcp收發緩存的大小
可以參考這裡
參考資料
[1] 零聲教育 Linux C/C++後端伺服器架構開發 2.1網絡開發