前言
要想构建一个简单的web server
,最简单的办法当然是使用python
辣,但是为了能够更清楚的了解web server
的运行原理,在linux
上用assembly
是最合适的~
首先需要了解的一点是:web server是构建在linux操作系统上的应用程序,其与外界进行互动时,需要让linux操作系统来充当中介
1. 构建web server所需的system call与结构体
int socket(
int domain, //socket() creates an endpoint for communication
int type, //and returns a file descriptor
int protocol //that refers to that endpoint.
)
int bind(
int sockfd, //When a socket(2) is created with socket,
struct sockaddr *addr, //it exists in a name space (address family) but has no address assigned to it.
socklen_t addrlen//bind() assigns the address specified by addr to the socket referred to by the file descriptor sockfd.
)
这里出现了关键的结构体:
struct sockaddr {
uint16_t sa_family;
uint8_t sa_data[14];
};
// sockaddr结构体用来描述一个网络连接还是太粗糙了,于是有了下面的改进版本
struct sockaddr_in {
uint16_t sin_family;
uint16_t sin_port;
uint32_t sin_addr;
uint8_t __pad[8];
}
// 可以看到sockaddr_in和sockaddr本质上是一个东西,只不过sockaddr_in划分结构体成员更加精细
int listen(
int sockfd, //listen() marks the socket referred to by sockfd as a passive socket,
int backlog//that is, as a socket that will be used to accept incoming connection requests using accept(2).
)
int accept(
int sockfd, //The accept() system call is used with connection-based socket types (SOCK_STREAM, SOCK_SEQPACKET).
struct sockaddr *addr,//It extracts the first connection request on the queue of pending connections for the listening socket, sockfd,
socklen_t *addrlen//creates a new connected socket, and returns a new file descriptor referring to that socket.
)
一般服务器接收请求的步骤如下:
socket(AF_INET, SOCK_STREAM, IPPROTO_IP) = 3
bind(3,
{sa_family=AF_INET,
sin_port=htons(80),
sin_addr=inet_addr("0.0.0.0")},
16) = 0
listen(3, 0) = 0
accept(3, NULL, NULL) = 4
read(4,
"GET /flag HTTP/1.0\r\n\r\n",
256) = 19
open("/flag", O_RDONLY) = 5
read(5, "FLAG", 256) = 4
write(4,
"HTTP/1.0 200 OK\r\n\r\nFLAG",
27) = 27
close(4)
// 上述只处理一条链接,实际上的网络会发送多条链接,解决办法:
socket(AF_INET, SOCK_STREAM, IPPROTO_IP) = 3
bind(3,
{sa_family=AF_INET,
sin_port=htons(80),
sin_addr=inet_addr("0.0.0.0")},
16) = 0
listen(3, 0) = 0
accept(3, NULL, NULL) = 4
fork() = 43 fork() = 0
close(4) = 0
close(3) = 0
accept(3, NULL, NULL) = 4
1.1 用汇编来实现上述步骤
使用as -o server.o server.s && ld -o server server.o
命令来完成编译。
#assembler grammar, GNU Assembler(GAS)
.intel_syntax noprefix
.globl _start
.section .data
get:
.asciz "GET"
get_len:
.long 3
post:
.asciz "POST"
post_len:
.long 4
file_fd:
.long 0
file_path:
.space 40
file_content:
.space 0x200
file_content_len:
.long 0
accept_content:
.space 0x200
accept_content_len:
.long 0
acceptfd:
.long 0
sockfd:
.long 0 # safed fd
sockaddr_in:
.word 2 # sa_family= AF_INET
.word 0x5000 # sin_port (htons(bind_poer))
.long 0x00000000 # sin_addr (inet_addr(bind_address))
.long 0, 0 # sin_zero
http_response:
.asciz "HTTP/1.0 200 OK\r\n\r\n"
http_response_len:
.long 19
.section .text
_start:
mov rdi, 2 # domain = AF_INET
mov rsi, 1 # type = SOCK_STREAM (tcp)
mov rdx, 0 # protocol = 0 (default)
mov rax, 41 # SYS_socket
syscall
mov [sockfd], eax
xor rdi, rdi
mov edi, [sockfd]
lea rsi, [sockaddr_in]
mov rdx, 16
mov rax, 49 # SYS_bind
syscall
xor rdi, rdi
mov edi, [sockfd]
mov rsi, 0
mov rax, 50 # SYS_listen
syscall
accept_loop:
xor rdi, rdi
mov edi, [sockfd]
mov rsi, 0
mov rdx, 0
mov rax, 43 # SYS_accept
syscall
mov [acceptfd], eax
xor rdi, rdi
mov rax, 57 # SYS_fork
syscall
cmp rax, 0
jnz close_accept_fd
xor rdi, rdi
mov edi, [sockfd]
mov rax, 3 # SYS_close
syscall
xor rdi, rdi
mov edi, [acceptfd]
lea rsi, [accept_content]
mov rdx, 0x200
mov rax, 0 # SYS_read
syscall
mov [accept_content_len], eax
call parse_request
ret
close_accept_fd:
xor rdi, rdi
mov edi, [acceptfd]
mov rax, 3
syscall
jmp accept_loop
# process get request
parse_request:
lea rdi, [accept_content]
lea rsi, [get]
xor rcx, rcx
mov ecx, [get_len]
repe cmpsb
jne process_post
process_get:
mov rsi, rdi
inc rsi
lea rdi, [file_path]
path_loop_get:
lodsb
stosb
mov al, [rsi]
cmp al, 32
jz parse_get
loop path_loop_get
parse_get:
xor rdi, rdi
lea rdi, [file_path]
mov rsi, 0 # O_RDONLY
mov rax, 2 # SYS_open
syscall
mov [file_fd], eax
xor rdi, rdi
mov edi, [file_fd]
lea rsi, [file_content]
mov rdx, 0x200
mov rax, 0 # SYS_read
syscall
mov [file_content_len], eax
xor rdi, rdi
mov edi, [file_fd]
mov rax, 3 # SYS_close
syscall
xor rdi, rdi
mov edi, [acceptfd]
lea rsi, [http_response]
mov rdx, 19
mov rax, 1 # SYS_write
syscall
xor rdi, rdi
mov edi, [acceptfd]
lea rsi, [file_content]
xor rdx, rdx
mov edx, file_content_len
mov rax, 1 # SYS_write
syscall
xor rdi, rdi
mov edi, [acceptfd]
mov rax, 3 # SYS_close
syscall
mov rax, 60 # SYS_exit
mov rdi, 0
syscall
# process post request
process_post:
lea rdi, [accept_content]
lea rsi, [post]
xor rcx, rcx
mov ecx, [post_len]
repe cmpsb
jnz parse_end
mov rsi, rdi
inc rsi
lea rdi, [file_path]
path_loop_post:
lodsb
stosb
mov al, [rsi]
cmp al, 32
jz find_crlfcrlf
loop path_loop_post
lea rsi, [accept_content]
find_crlfcrlf:
mov al, [rsi]
cmp al, 13 # '\r'
jne next_byte
cmp byte ptr [rsi + 1], 10 # '\n'
jne next_byte
cmp byte ptr [rsi + 2], 13 # '\r'
jne next_byte
cmp byte ptr [rsi + 3], 10 # '\n'
jne next_byte
# 找到 "\r\n\r\n",POST数据从 (rsi + 4) 开始
add rsi, 4
lea rdi, [file_content]
mov ecx, 0
loop_gain_data:
mov al, [rsi]
mov [rdi], al
inc rdi
inc rsi
inc ecx
cmp al, 0
jne loop_gain_data
sub ecx, 1
mov [file_content_len], ecx
jmp parse_post
next_byte:
inc rsi
loop find_crlfcrlf
parse_post:
xor rdi, rdi
lea rdi, [file_path]
mov rsi, 65 # O_WRONLY | O_CREAT
mov rdx, 511 # 0o777
mov rax, 2 # SYS_open
syscall
mov [file_fd], eax
xor rdi, rdi
mov edi, [file_fd]
lea rsi, [file_content]
mov edx, [file_content_len]
mov rax, 1 # SYS_write
syscall
xor rdi, rdi
mov edi, [file_fd]
mov rax, 3 # SYS_close
syscall
xor rdi, rdi
mov edi, [acceptfd]
lea rsi, [http_response]
mov rdx, 19
mov rax, 1 # SYS_write
syscall
mov rax, 60 # SYS_exit
mov rdi, 0
syscall
parse_end:
ret