解析请求头 · Nginx开发从入门到精通

解析请求头在ngx_http_process_request_line函数中，解析完请求行之后，如果请求行的uri里面包含了域名部分，则将其保存在请求结构的headers_in成员的server字段，headers_in用来保存所有请求头，它的类型为ngx_http_headers_in_t： typedef struct { ngx_list_t headers; ngx_table_elt_t *host; ngx_table_elt_t *connection; ngx_table_elt_t *if_modified_since; ngx_table_elt_t *if_unmodified_since; ngx_table_elt_t *user_agent; ngx_table_elt_t *referer; ngx_table_elt_t *content_length; ngx_table_elt_t *content_type; ngx_table_elt_t *range; ngx_table_elt_t *if_range; ngx_table_elt_t *transfer_encoding; ngx_table_elt_t *expect; #if (NGX_HTTP_GZIP) ngx_table_elt_t *accept_encoding; ngx_table_elt_t *via; #endif ngx_table_elt_t *authorization; ngx_table_elt_t *keep_alive; #if (NGX_HTTP_PROXY || NGX_HTTP_REALIP || NGX_HTTP_GEO) ngx_table_elt_t *x_forwarded_for; #endif #if (NGX_HTTP_REALIP) ngx_table_elt_t *x_real_ip; #endif #if (NGX_HTTP_HEADERS) ngx_table_elt_t *accept; ngx_table_elt_t *accept_language; #endif #if (NGX_HTTP_DAV) ngx_table_elt_t *depth; ngx_table_elt_t *destination; ngx_table_elt_t *overwrite; ngx_table_elt_t *date; #endif ngx_str_t user; ngx_str_t passwd; ngx_array_t cookies; ngx_str_t server; off_t content_length_n; time_t keep_alive_n; unsigned connection_type:2; unsigned msie:1; unsigned msie6:1; unsigned opera:1; unsigned gecko:1; unsigned chrome:1; unsigned safari:1; unsigned konqueror:1; } ngx_http_headers_in_t; 接着，该函数会检查进来的请求是否使用的是http0.9，如果是的话则使用从请求行里得到的域名，调用ngx_http_find_virtual_server（）函数来查找用来处理该请求的虚拟服务器配置，之前通过端口和地址找到的默认配置不再使用，找到相应的配置之后，则直接调用ngx_http_process_request（）函数处理该请求，因为http0.9是最原始的http协议，它里面没有定义任何请求头，显然就不需要读取请求头的操作。 if (r->host_start && r->host_end) { host = r->host_start; n = ngx_http_validate_host(r, &host, r->host_end - r->host_start, 0); if (n == 0) { ngx_log_error(NGX_LOG_INFO, c->log, 0, "client sent invalid host in request line"); ngx_http_finalize_request(r, NGX_HTTP_BAD_REQUEST); return; } if (n < 0) { ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR); return; } r->headers_in.server.len = n; r->headers_in.server.data = host; } if (r->http_version < NGX_HTTP_VERSION_10) { if (ngx_http_find_virtual_server(r, r->headers_in.server.data, r->headers_in.server.len) == NGX_ERROR) { ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR); return; } ngx_http_process_request(r); return; } 当然，如果是1.0或者更新的http协议，接下来要做的就是读取请求头了，首先nginx会为请求头分配空间，ngx_http_headers_in_t结构的headers字段为一个链表结构，它被用来保存所有请求头，初始为它分配了20个节点，每个节点的类型为ngx_table_elt_t，保存请求头的name/value值对，还可以看到ngx_http_headers_in_t结构有很多类型为ngx_table_elt_t*的指针成员，而且从它们的命名可以看出是一些常见的请求头名字，nginx对这些常用的请求头在ngx_http_headers_in_t结构里面保存了一份引用，后续需要使用的话，可以直接通过这些成员得到，另外也事先为cookie头分配了2个元素的数组空间，做完这些内存准备工作之后，该请求对应的读事件结构的处理函数被设置为ngx_http_process_request_headers，并随后马上调用了该函数。 if (ngx_list_init(&r->headers_in.headers, r->pool, 20, sizeof(ngx_table_elt_t)) != NGX_OK) { ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR); return; } if (ngx_array_init(&r->headers_in.cookies, r->pool, 2, sizeof(ngx_table_elt_t *)) != NGX_OK) { ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR); return; } c->log->action = "reading client request headers"; rev->handler = ngx_http_process_request_headers; ngx_http_process_request_headers(rev); ngx_http_process_request_headers函数循环的读取所有的请求头，并保存和初始化和请求头相关的结构，下面详细分析一下该函数：因为nginx对读取请求头有超时限制，ngx_http_process_request_headers函数作为读事件处理函数，一并处理了超时事件，如果读超时了，nginx直接给该请求返回408错误： if (rev->timedout) { ngx_log_error(NGX_LOG_INFO, c->log, NGX_ETIMEDOUT, "client timed out"); c->timedout = 1; ngx_http_close_request(r, NGX_HTTP_REQUEST_TIME_OUT); return; } 读取和解析请求头的逻辑和处理请求行差不多，总的流程也是循环的调用ngx_http_read_request_header（）函数读取数据，然后再调用一个解析函数来从读取的数据中解析请求头，直到解析完所有请求头，或者发生解析错误为主。当然由于涉及到网络io，这个流程可能发生在多个io事件的上下文中。接着来细看该函数，先调用了ngx_http_read_request_header（）函数读取数据，如果当前连接并没有数据过来，再直接返回，等待下一次读事件到来，如果读到了一些数据则调用ngx_http_parse_header_line（）函数来解析，同样的该解析函数实现为一个有限状态机，逻辑很简单，只是根据http协议来解析请求头，每次调用该函数最多解析出一个请求头，该函数返回4种不同返回值，表示不同解析结果： 1，返回NGX_OK，表示解析出了一行请求头，这时还要判断解析出的请求头名字里面是否有非法字符，名字里面合法的字符包括字母，数字和连字符（-），另外如果设置了underscores_in_headers指令为on，则下划线也是合法字符，但是nginx默认下划线不合法，当请求头里面包含了非法的字符，nginx默认只是忽略这一行请求头；如果一切都正常，nginx会将该请求头及请求头名字的hash值保存在请求结构体的headers_in成员的headers链表,而且对于一些常见的请求头，如Host，Connection，nginx采用了类似于配置指令的方式，事先给这些请求头分配了一个处理函数，当解析出一个请求头时，会检查该请求头是否有设置处理函数，有的话则调用之，nginx所有有处理函数的请求头都记录在ngx_http_headers_in全局数组中： typedef struct { ngx_str_t name; ngx_uint_t offset; ngx_http_header_handler_pt handler; } ngx_http_header_t; ngx_http_header_t ngx_http_headers_in[] = { { ngx_string("Host"), offsetof(ngx_http_headers_in_t, host), ngx_http_process_host }, { ngx_string("Connection"), offsetof(ngx_http_headers_in_t, connection), ngx_http_process_connection }, { ngx_string("If-Modified-Since"), offsetof(ngx_http_headers_in_t, if_modified_since), ngx_http_process_unique_header_line }, { ngx_string("If-Unmodified-Since"), offsetof(ngx_http_headers_in_t, if_unmodified_since), ngx_http_process_unique_header_line }, { ngx_string("User-Agent"), offsetof(ngx_http_headers_in_t, user_agent), ngx_http_process_user_agent }, { ngx_string("Referer"), offsetof(ngx_http_headers_in_t, referer), ngx_http_process_header_line }, { ngx_string("Content-Length"), offsetof(ngx_http_headers_in_t, content_length), ngx_http_process_unique_header_line }, { ngx_string("Content-Type"), offsetof(ngx_http_headers_in_t, content_type), ngx_http_process_header_line }, { ngx_string("Range"), offsetof(ngx_http_headers_in_t, range), ngx_http_process_header_line }, { ngx_string("If-Range"), offsetof(ngx_http_headers_in_t, if_range), ngx_http_process_unique_header_line }, { ngx_string("Transfer-Encoding"), offsetof(ngx_http_headers_in_t, transfer_encoding), ngx_http_process_header_line }, { ngx_string("Expect"), offsetof(ngx_http_headers_in_t, expect), ngx_http_process_unique_header_line }, #if (NGX_HTTP_GZIP) { ngx_string("Accept-Encoding"), offsetof(ngx_http_headers_in_t, accept_encoding), ngx_http_process_header_line }, { ngx_string("Via"), offsetof(ngx_http_headers_in_t, via), ngx_http_process_header_line }, #endif { ngx_string("Authorization"), offsetof(ngx_http_headers_in_t, authorization), ngx_http_process_unique_header_line }, { ngx_string("Keep-Alive"), offsetof(ngx_http_headers_in_t, keep_alive), ngx_http_process_header_line }, #if (NGX_HTTP_PROXY || NGX_HTTP_REALIP || NGX_HTTP_GEO) { ngx_string("X-Forwarded-For"), offsetof(ngx_http_headers_in_t, x_forwarded_for), ngx_http_process_header_line }, #endif #if (NGX_HTTP_REALIP) { ngx_string("X-Real-IP"), offsetof(ngx_http_headers_in_t, x_real_ip), ngx_http_process_header_line }, #endif #if (NGX_HTTP_HEADERS) { ngx_string("Accept"), offsetof(ngx_http_headers_in_t, accept), ngx_http_process_header_line }, { ngx_string("Accept-Language"), offsetof(ngx_http_headers_in_t, accept_language), ngx_http_process_header_line }, #endif #if (NGX_HTTP_DAV) { ngx_string("Depth"), offsetof(ngx_http_headers_in_t, depth), ngx_http_process_header_line }, { ngx_string("Destination"), offsetof(ngx_http_headers_in_t, destination), ngx_http_process_header_line }, { ngx_string("Overwrite"), offsetof(ngx_http_headers_in_t, overwrite), ngx_http_process_header_line }, { ngx_string("Date"), offsetof(ngx_http_headers_in_t, date), ngx_http_process_header_line }, #endif { ngx_string("Cookie"), 0, ngx_http_process_cookie }, { ngx_null_string, 0, NULL } }; ngx_http_headers_in数组当前包含了25个常用的请求头，每个请求头都设置了一个处理函数，其中一部分请求头设置的是公共处理函数，这里有2个公共处理函数，ngx_http_process_header_line和ngx_http_process_unique_header_line。先来看一下处理函数的函数指针定义： typedef ngx_int_t (*ngx_http_header_handler_pt)(ngx_http_request_t *r, ngx_table_elt_t *h, ngx_uint_t offset); 它有3个参数，r为对应的请求结构，h为指向该请求头在headers_in.headers链表中对应节点的指针，offset为该请求头对应字段在ngx_http_headers_in_t结构中的偏移。再来看ngx_http_process_header_line函数： static ngx_int_t ngx_http_process_header_line(ngx_http_request_t *r, ngx_table_elt_t *h, ngx_uint_t offset) { ngx_table_elt_t **ph; ph = (ngx_table_elt_t **) ((char *) &r->headers_in + offset); if (*ph == NULL) { *ph = h; } return NGX_OK; } 这个函数只是简单将该请求头在ngx_http_headers_in_t结构中保存一份引用。ngx_http_process_unique_header_line功能类似，不同点在于该函数会检查这个请求头是否是重复的，如果是的话，则给该请求返回400错误。 ngx_http_headers_in数组中剩下的请求头都有自己特殊的处理函数，这些特殊的函数根据对应的请求头有一些特殊的处理，下面拿Host头的处理函数ngx_http_process_host做一下介绍： static ngx_int_t ngx_http_process_host(ngx_http_request_t *r, ngx_table_elt_t *h, ngx_uint_t offset) { u_char *host; ssize_t len; if (r->headers_in.host == NULL) { r->headers_in.host = h; } host = h->value.data; len = ngx_http_validate_host(r, &host, h->value.len, 0); if (len == 0) { ngx_log_error(NGX_LOG_INFO, r->connection->log, 0, "client sent invalid host header"); ngx_http_finalize_request(r, NGX_HTTP_BAD_REQUEST); return NGX_ERROR; } if (len < 0) { ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR); return NGX_ERROR; } if (r->headers_in.server.len) { return NGX_OK; } r->headers_in.server.len = len; r->headers_in.server.data = host; return NGX_OK; } 此函数的目的也是保存Host头的快速引用，它会对Host头的值做一些合法性检查，并从中解析出域名，保存在headers_in.server字段，实际上前面在解析请求行时，headers_in.server可能已经被赋值为从请求行中解析出来的域名，根据http协议的规范，如果请求行中的uri带有域名的话，则域名以它为准，所以这里需检查一下headers_in.server是否为空，如果不为空则不需要再赋值。其他请求头的特殊处理函数，不再做介绍，大致都是根据该请求头在http协议中规定的意义及其值设置请求的一些属性，必备后续使用。对一个合法的请求头的处理大致为如上所述； 2，返回NGX_AGAIN，表示当前接收到的数据不够，一行请求头还未结束，需要继续下一轮循环。在下一轮循环中，nginx首先检查请求头缓冲区header_in是否已满，如够满了，则调用ngx_http_alloc_large_header_buffer（）函数分配更多缓冲区，下面分析一下ngx_http_alloc_large_header_buffer函数： static ngx_int_t ngx_http_alloc_large_header_buffer(ngx_http_request_t *r, ngx_uint_t request_line) { u_char *old, *new; ngx_buf_t *b; ngx_http_connection_t *hc; ngx_http_core_srv_conf_t *cscf; ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, "http alloc large header buffer"); /* * 在解析请求行阶段，如果客户端在发送请求行之前发送了大量回车换行符将 * 缓冲区塞满了，针对这种情况，nginx只是简单的重置缓冲区，丢弃这些垃圾 * 数据，不需要分配更大的内存。 */ if (request_line && r->state == 0) { /* the client fills up the buffer with "\r\n" */ r->request_length += r->header_in->end - r->header_in->start; r->header_in->pos = r->header_in->start; r->header_in->last = r->header_in->start; return NGX_OK; } /* 保存请求行或者请求头在旧缓冲区中的起始地址 */ old = request_line ? r->request_start : r->header_name_start; cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module); /* 如果一个大缓冲区还装不下请求行或者一个请求头，则返回错误 */ if (r->state != 0 && (size_t) (r->header_in->pos - old) >= cscf->large_client_header_buffers.size) { return NGX_DECLINED; } hc = r->http_connection; /* 首先在ngx_http_connection_t结构中查找是否有空闲缓冲区，有的话，直接取之 */ if (hc->nfree) { b = hc->free[--hc->nfree]; ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, "http large header free: %p %uz", b->pos, b->end - b->last); /* 检查给该请求分配的请求头缓冲区个数是否已经超过限制，默认最大个数为4个 */ } else if (hc->nbusy < cscf->large_client_header_buffers.num) { if (hc->busy == NULL) { hc->busy = ngx_palloc(r->connection->pool, cscf->large_client_header_buffers.num * sizeof(ngx_buf_t *)); if (hc->busy == NULL) { return NGX_ERROR; } } /* 如果还没有达到最大分配数量，则分配一个新的大缓冲区 */ b = ngx_create_temp_buf(r->connection->pool, cscf->large_client_header_buffers.size); if (b == NULL) { return NGX_ERROR; } ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, "http large header alloc: %p %uz", b->pos, b->end - b->last); } else { /* 如果已经达到最大的分配限制，则返回错误 */ return NGX_DECLINED; } /* 将从空闲队列取得的或者新分配的缓冲区加入已使用队列 */ hc->busy[hc->nbusy++] = b; /* * 因为nginx中，所有的请求头的保存形式都是指针（起始和结束地址）， * 所以一行完整的请求头必须放在连续的内存块中。如果旧的缓冲区不能 * 再放下整行请求头，则分配新缓冲区，并从旧缓冲区拷贝已经读取的部分请求头， * 拷贝完之后，需要修改所有相关指针指向到新缓冲区。 * status为0表示解析完一行请求头之后，缓冲区正好被用完，这种情况不需要拷贝 */ if (r->state == 0) { /* * r->state == 0 means that a header line was parsed successfully * and we do not need to copy incomplete header line and * to relocate the parser header pointers */ r->request_length += r->header_in->end - r->header_in->start; r->header_in = b; return NGX_OK; } ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, "http large header copy: %d", r->header_in->pos - old); r->request_length += old - r->header_in->start; new = b->start; /* 拷贝旧缓冲区中不完整的请求头 */ ngx_memcpy(new, old, r->header_in->pos - old); b->pos = new + (r->header_in->pos - old); b->last = new + (r->header_in->pos - old); /* 修改相应的指针指向新缓冲区 */ if (request_line) { r->request_start = new; if (r->request_end) { r->request_end = new + (r->request_end - old); } r->method_end = new + (r->method_end - old); r->uri_start = new + (r->uri_start - old); r->uri_end = new + (r->uri_end - old); if (r->schema_start) { r->schema_start = new + (r->schema_start - old); r->schema_end = new + (r->schema_end - old); } if (r->host_start) { r->host_start = new + (r->host_start - old); if (r->host_end) { r->host_end = new + (r->host_end - old); } } if (r->port_start) { r->port_start = new + (r->port_start - old); r->port_end = new + (r->port_end - old); } if (r->uri_ext) { r->uri_ext = new + (r->uri_ext - old); } if (r->args_start) { r->args_start = new + (r->args_start - old); } if (r->http_protocol.data) { r->http_protocol.data = new + (r->http_protocol.data - old); } } else { r->header_name_start = new; r->header_name_end = new + (r->header_name_end - old); r->header_start = new + (r->header_start - old); r->header_end = new + (r->header_end - old); } r->header_in = b; return NGX_OK; } 当ngx_http_alloc_large_header_buffer函数返回NGX_DECLINED时，表示客户端发送了一行过大的请求头，或者是整个请求头部超过了限制，nginx会返回494错误，注意到nginx在返回494错误之前将请求的lingering_close标识置为了1，这样做的目的是在返回响应之前丢弃掉客户端发过来的其他数据； 3，返回NGX_HTTP_PARSE_INVALID_HEADER，表示请求头解析过程中遇到错误，一般为客户端发送了不符合协议规范的头部，此时nginx返回400错误； 4，返回NGX_HTTP_PARSE_HEADER_DONE，表示所有请求头已经成功的解析，这时请求的状态被设置为NGX_HTTP_PROCESS_REQUEST_STATE，意味着结束了请求读取阶段，正式进入了请求处理阶段，但是实际上请求可能含有请求体，nginx在请求读取阶段并不会去读取请求体，这个工作交给了后续的请求处理阶段的模块，这样做的目的是nginx本身并不知道这些请求体是否有用，如果后续模块并不需要的话，一方面请求体一般较大，如果全部读取进内存，则白白耗费大量的内存空间，另一方面即使nginx将请求体写进磁盘，但是涉及到磁盘io，会耗费比较多时间。所以交由后续模块来决定读取还是丢弃请求体是最明智的办法。读取完请求头之后，nginx调用了ngx_http_process_request_header（）函数，这个函数主要做了两个方面的事情，一是调用ngx_http_find_virtual_server（）函数查找虚拟服务器配置；二是对一些请求头做一些协议的检查。比如对那些使用http1.1协议但是却没有发送Host头的请求，nginx给这些请求返回400错误。还有nginx现在的版本并不支持chunked格式的输入，如果某些请求申明自己使用了chunked格式的输入（请求带有值为chunked的transfer_encoding头部)，nginx给这些请求返回411错误。等等。最后调用ngx_http_process_request（）函数处理请求,至此，nginx请求头接收流程就介绍完毕。 ### 请求体读取(100%)[](http://tengine.taobao.org/book/chapter_12.html#id5 "永久链接至标题") 上节说到nginx核心本身不会主动读取请求体，这个工作是交给请求处理阶段的模块来做，但是nginx核心提供了ngx_http_read_client_request_body()接口来读取请求体，另外还提供了一个丢弃请求体的接口-ngx_http_discard_request_body()，在请求执行的各个阶段中，任何一个阶段的模块如果对请求体感兴趣或者希望丢掉客户端发过来的请求体，可以分别调用这两个接口来完成。这两个接口是nginx核心提供的处理请求体的标准接口，如果希望配置文件中一些请求体相关的指令（比如client_body_in_file_only，client_body_buffer_size等）能够预期工作，以及能够正常使用nginx内置的一些和请求体相关的变量（比如$request_body和$request_body_file），一般来说所有模块都必须调用这些接口来完成相应操作，如果需要自定义接口来处理请求体，也应尽量兼容nginx默认的行为。 #### 读取请求体[](http://tengine.taobao.org/book/chapter_12.html#id6 "永久链接至标题") 请求体的读取一般发生在nginx的content handler中，一些nginx内置的模块，比如proxy模块，fastcgi模块，uwsgi模块等，这些模块的行为必须将客户端过来的请求体（如果有的话）以相应协议完整的转发到后端服务进程，所有的这些模块都是调用了ngx_http_read_client_request_body()接口来完成请求体读取。值得注意的是这些模块会把客户端的请求体完整的读取后才开始往后端转发数据。由于内存的限制，ngx_http_read_client_request_body()接口读取的请求体会部分或者全部写入一个临时文件中，根据请求体的大小以及相关的指令配置，请求体可能完整放置在一块连续内存中，也可能分别放置在两块不同内存中，还可能全部存在一个临时文件中，最后还可能一部分在内存，剩余部分在临时文件中。下面先介绍一下和这些不同存储行为相关的指令： <table class="docutils field-list" frame="void" rules="none" style="margin: 0px -0.5em; border: 0px;"><colgroup><col class="field-name"/><col class="field-body"/></colgroup><tbody valign="top"><tr class="field-odd field"><th class="field-name" colspan="2" style="padding: 1px 8px 1px 5px; border: 0px !important;">client_body_buffer_size:</th></tr><tr class="field-odd field"><td style="padding: 1px 8px 1px 5px; border: 0px !important;"> </td><td class="field-body" style="padding: 1px 8px 1px 5px; border: 0px !important;">设置缓存请求体的buffer大小，默认为系统页大小的2倍，当请求体的大小超过此大小时，nginx会把请求体写入到临时文件中。可以根据业务需求设置合适的大小，尽量避免磁盘io操作;</td></tr><tr class="field-even field"><th class="field-name" colspan="2" style="padding: 1px 8px 1px 5px; border: 0px !important;">client_body_in_single_buffer:</th></tr><tr class="field-even field"><td style="padding: 1px 8px 1px 5px; border: 0px !important;"> </td><td class="field-body" style="padding: 1px 8px 1px 5px; border: 0px !important;">指示是否将请求体完整的存储在一块连续的内存中，默认为off，如果此指令被设置为on，则nginx会保证请求体在不大于client_body_buffer_size设置的值时，被存放在一块连续的内存中，但超过大小时会被整个写入一个临时文件;</td></tr><tr class="field-odd field"><th class="field-name" colspan="2" style="padding: 1px 8px 1px 5px; border: 0px !important;">client_body_in_file_only:</th></tr><tr class="field-odd field"><td style="padding: 1px 8px 1px 5px; border: 0px !important;"> </td><td class="field-body" style="padding: 1px 8px 1px 5px; border: 0px !important;">设置是否总是将请求体保存在临时文件中，默认为off，当此指定被设置为on时，即使客户端显式指示了请求体长度为0时，nginx还是会为请求创建一个临时文件。</td></tr></tbody></table> 接着介绍ngx_http_read_client_request_body()接口的实现，它的定义如下： ngx_int_t ngx_http_read_client_request_body(ngx_http_request_t *r, ngx_http_client_body_handler_pt post_handler) 该接口有2个参数，第1个为指向请求结构的指针，第2个为一个函数指针，当请求体读完时，它会被调用。之前也说到根据nginx现有行为，模块逻辑会在请求体读完后执行，这个回调函数一般就是模块的逻辑处理函数。ngx_http_read_client_request_body()函数首先将参数r对应的主请求的引用加1，这样做的目的和该接口被调用的上下文有关，一般而言，模块是在content handler中调用此接口，一个典型的调用如下： static ngx_int_t ngx_http_proxy_handler(ngx_http_request_t *r) { ... rc = ngx_http_read_client_request_body(r, ngx_http_upstream_init); if (rc >= NGX_HTTP_SPECIAL_RESPONSE) { return rc; } return NGX_DONE; } 上面的代码是在porxy模块的content handler，ngx_http_proxy_handler()中调用了ngx_http_read_client_request_body()函数，其中ngx_http_upstream_init()被作为回调函数传入进接口中，另外nginx中模块的content handler调用的上下文如下： ngx_int_t ngx_http_core_content_phase(ngx_http_request_t *r, ngx_http_phase_handler_t *ph) { ... if (r->content_handler) { r->write_event_handler = ngx_http_request_empty_handler; ngx_http_finalize_request(r, r->content_handler(r)); return NGX_OK; } ... } 上面的代码中，content handler调用之后，它的返回值作为参数调用了ngx_http_finalize_request()函数，在请求体没有被接收完全时，ngx_http_read_client_request_body()函数返回值为NGX_AGAIN，此时content handler，比如ngx_http_proxy_handler()会返回NGX_DONE，而NGX_DONE作为参数传给ngx_http_finalize_request()函数会导致主请求的引用计数减1，所以正好抵消了ngx_http_read_client_request_body()函数开头对主请求计数的加1。接下来回到ngx_http_read_client_request_body()函数，它会检查该请求的请求体是否已经被读取或者被丢弃了，如果是的话，则直接调用回调函数并返回NGX_OK，这里实际上是为子请求检查，子请求是nginx中的一个概念，nginx中可以在当前请求中发起另外一个或多个全新的子请求来访问其他的location，关于子请求的具体介绍会在后面的章节作详细分析，一般而言子请求不需要自己去读取请求体。函数接着调用ngx_http_test_expect()检查客户端是否发送了Expect: 100-continue头，是的话则给客户端回复”HTTP/1.1 100 Continue”，根据http 1.1协议，客户端可以发送一个Expect头来向服务器表明期望发送请求体，服务器如果允许客户端发送请求体，则会回复”HTTP/1.1 100 Continue”，客户端收到时，才会开始发送请求体。接着继续为接收请求体做准备工作，分配一个ngx_http_request_body_t结构，并保存在r->request_body，这个结构用来保存请求体读取过程用到的缓存引用，临时文件引用，剩余请求体大小等信息，它的定义如下: typedef struct { ngx_temp_file_t *temp_file; ngx_chain_t *bufs; ngx_buf_t *buf; off_t rest; ngx_chain_t *to_write; ngx_http_client_body_handler_pt post_handler; } ngx_http_request_body_t; | temp_file: | 指向储存请求体的临时文件的指针； | |-----|-----| | bufs: | 指向保存请求体的链表头； | | buf: | 指向当前用于保存请求体的内存缓存； | | rest: | 当前剩余的请求体大小； | | post_handler: | 保存传给ngx_http_read_client_request_body()函数的回调函数。 | 做好准备工作之后，函数开始检查请求是否带有content_length头，如果没有该头或者客户端发送了一个值为0的content_length头，表明没有请求体，这时直接调用回调函数并返回NGX_OK即可。当然如果client_body_in_file_only指令被设置为on，且content_length为0时，该函数在调用回调函数之前，会创建一个空的临时文件。进入到函数下半部分，表明客户端请求确实表明了要发送请求体，该函数会先检查是否在读取请求头时预读了请求体，这里的检查是通过判断保存请求头的缓存(r->header_in)中是否还有未处理的数据。如果有预读数据，则分配一个ngx_buf_t结构，并将r->header_in中的预读数据保存在其中，并且如果r->header_in中还有剩余空间，并且能够容下剩余未读取的请求体，这些空间将被继续使用，而不用分配新的缓存，当然甚至如果请求体已经被整个预读了，则不需要继续处理了，此时调用回调函数后返回。如果没有预读数据或者预读不完整，该函数会分配一块新的内存（除非r->header_in还有足够的剩余空间），另外如果request_body_in_single_buf指令被设置为no，则预读的数据会被拷贝进新开辟的内存块中，真正读取请求体的操作是在ngx_http_do_read_client_request_body()函数，该函数循环的读取请求体并保存在缓存中，如果缓存被写满了，其中的数据会被清空并写回到临时文件中。当然这里有可能不能一次将数据读到，该函数会挂载读事件并设置读事件handler为ngx_http_read_client_request_body_handler，另外nginx核心对两次请求体的读事件之间也做了超时设置，client_body_timeout指令可以设置这个超时时间，默认为60秒，如果下次读事件超时了，nginx会返回408给客户端。最终读完请求体后，ngx_http_do_read_client_request_body()会根据配置，将请求体调整到预期的位置(内存或者文件)，所有情况下请求体都可以从r->request_body的bufs链表得到，该链表最多可能有2个节点，每个节点为一个buffer，但是这个buffer的内容可能是保存在内存中，也可能是保存在磁盘文件中。另外$request_body变量只在当请求体已经被读取并且是全部保存在内存中，才能取得相应的数据。 #### 丢弃请求体[](http://tengine.taobao.org/book/chapter_12.html#id7 "永久链接至标题") 一个模块想要主动的丢弃客户端发过的请求体，可以调用nginx核心提供的ngx_http_discard_request_body()接口，主动丢弃的原因可能有很多种，如模块的业务逻辑压根不需要请求体，客户端发送了过大的请求体，另外为了兼容http1.1协议的pipeline请求，模块有义务主动丢弃不需要的请求体。总之为了保持良好的客户端兼容性，nginx必须主动丢弃无用的请求体。下面开始分析ngx_http_discard_request_body()函数： ngx_int_t ngx_http_discard_request_body(ngx_http_request_t *r) { ssize_t size; ngx_event_t *rev; if (r != r->main || r->discard_body) { return NGX_OK; } if (ngx_http_test_expect(r) != NGX_OK) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } rev = r->connection->read; ngx_log_debug0(NGX_LOG_DEBUG_HTTP, rev->log, 0, "http set discard body"); if (rev->timer_set) { ngx_del_timer(rev); } if (r->headers_in.content_length_n <= 0 || r->request_body) { return NGX_OK; } size = r->header_in->last - r->header_in->pos; if (size) { if (r->headers_in.content_length_n > size) { r->header_in->pos += size; r->headers_in.content_length_n -= size; } else { r->header_in->pos += (size_t) r->headers_in.content_length_n; r->headers_in.content_length_n = 0; return NGX_OK; } } r->read_event_handler = ngx_http_discarded_request_body_handler; if (ngx_handle_read_event(rev, 0) != NGX_OK) { return NGX_HTTP_INTERNAL_SERVER_ERROR; } if (ngx_http_read_discarded_request_body(r) == NGX_OK) { r->lingering_close = 0; } else { r->count++; r->discard_body = 1; } return NGX_OK; } 由于函数不长，这里把它完整的列出来了，函数的开始同样先判断了不需要再做处理的情况：子请求不需要处理，已经调用过此函数的也不需要再处理。接着调用ngx_http_test_expect() 处理http1.1 expect的情况，根据http1.1的expect机制，如果客户端发送了expect头，而服务端不希望接收请求体时，必须返回417(Expectation Failed)错误。nginx并没有这样做，它只是简单的让客户端把请求体发送过来，然后丢弃掉。接下来，函数删掉了读事件上的定时器，因为这时本身就不需要请求体，所以也无所谓客户端发送的快还是慢了，当然后面还会讲到，当nginx已经处理完该请求但客户端还没有发送完无用的请求体时，nginx会在读事件上再挂上定时器。客户端如果打算发送请求体，就必须发送content-length头，所以函数会检查请求头中的content-length头，同时还会查看其他地方是不是已经读取了请求体。如果确实有待处理的请求体，函数接着检查请求头buffer中预读的数据，预读的数据会直接被丢掉，当然如果请求体已经被全部预读，函数就直接返回了。接下来，如果还有剩余的请求体未处理，该函数调用ngx_handle_read_event()在事件处理机制中挂载好读事件，并把读事件的处理函数设置为ngx_http_discarded_request_body_handler。做好这些准备之后，该函数最后调用ngx_http_read_discarded_request_body()接口读取客户端过来的请求体并丢弃。如果客户端并没有一次将请求体发过来，函数会返回，剩余的数据等到下一次读事件过来时，交给ngx_http_discarded_request_body_handler()来处理，这时，请求的discard_body将被设置为1用来标识这种情况。另外请求的引用数(count)也被加1，这样做的目的是客户端可能在nginx处理完请求之后仍未完整发送待发送的请求体，增加引用是防止nginx核心在处理完请求后直接释放了请求的相关资源。 ngx_http_read_discarded_request_body()函数非常简单，它循环的从链接中读取数据并丢弃，直到读完接收缓冲区的所有数据，如果请求体已经被读完了，该函数会设置读事件的处理函数为ngx_http_block_reading，这个函数仅仅删除水平触发的读事件，防止同一事件不断被触发。最后看一下读事件的处理函数ngx_http_discarded_request_body_handler，这个函数每次读事件来时会被调用，先看一下它的源码： void ngx_http_discarded_request_body_handler(ngx_http_request_t *r) { ... c = r->connection; rev = c->read; if (rev->timedout) { c->timedout = 1; c->error = 1; ngx_http_finalize_request(r, NGX_ERROR); return; } if (r->lingering_time) { timer = (ngx_msec_t) (r->lingering_time - ngx_time()); if (timer <= 0) { r->discard_body = 0; r->lingering_close = 0; ngx_http_finalize_request(r, NGX_ERROR); return; } } else { timer = 0; } rc = ngx_http_read_discarded_request_body(r); if (rc == NGX_OK) { r->discard_body = 0; r->lingering_close = 0; ngx_http_finalize_request(r, NGX_DONE); return; } /* rc == NGX_AGAIN */ if (ngx_handle_read_event(rev, 0) != NGX_OK) { c->error = 1; ngx_http_finalize_request(r, NGX_ERROR); return; } if (timer) { clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module); timer *= 1000; if (timer > clcf->lingering_timeout) { timer = clcf->lingering_timeout; } ngx_add_timer(rev, timer); } } 函数一开始就处理了读事件超时的情况，之前说到在ngx_http_discard_request_body()函数中已经删除了读事件的定时器，那么什么时候会设置定时器呢？答案就是在nginx已经处理完该请求，但是又没有完全将该请求的请求体丢弃的时候（客户端可能还没有发送过来），在ngx_http_finalize_connection()函数中，如果检查到还有未丢弃的请求体时，nginx会添加一个读事件定时器，它的时长为lingering_timeout指令所指定，默认为5秒，不过这个时间仅仅两次读事件之间的超时时间，等待请求体的总时长为lingering_time指令所指定，默认为30秒。这种情况中，该函数如果检测到超时事件则直接返回并断开连接。同样，还需要控制整个丢弃请求体的时长不能超过lingering_time设置的时间，如果超过了最大时长，也会直接返回并断开连接。如果读事件发生在请求处理完之前，则不用处理超时事件，也不用设置定时器，函数只是简单的调用ngx_http_read_discarded_request_body()来读取并丢弃数据。