forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathhttp-parse-simple.py
More file actions
153 lines (127 loc) · 5.14 KB
/
http-parse-simple.py
File metadata and controls
153 lines (127 loc) · 5.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/python
#
#Bertrone Matteo - Polytechnic of Turin
#November 2015
#
#eBPF application that parses HTTP packets
#and extracts (and prints on screen) the URL contained in the GET/POST request.
#
#eBPF program http_filter is used as SOCKET_FILTER attached to eth0 interface.
#only packet of type ip and tcp containing HTTP GET/POST are returned to userspace, others dropped
#
#python script uses bcc BPF Compiler Collection by iovisor (https://github.com/iovisor/bcc)
#and prints on stdout the first line of the HTTP GET/POST request containing the url
from __future__ import print_function
from bcc import BPF
from sys import argv
import sys
import socket
import os
#args
def usage():
print("USAGE: %s [-i <if_name>]" % argv[0])
print("")
print("Try '%s -h' for more options." % argv[0])
exit()
#help
def help():
print("USAGE: %s [-i <if_name>]" % argv[0])
print("")
print("optional arguments:")
print(" -h print this help")
print(" -i if_name select interface if_name. Default is eth0")
print("")
print("examples:")
print(" http-parse # bind socket to eth0")
print(" http-parse -i wlan0 # bind socket to wlan0")
exit()
#arguments
interface="eth0"
if len(argv) == 2:
if str(argv[1]) == '-h':
help()
else:
usage()
if len(argv) == 3:
if str(argv[1]) == '-i':
interface = argv[2]
else:
usage()
if len(argv) > 3:
usage()
print ("binding socket to '%s'" % interface)
# initialize BPF - load source code from http-parse-simple.c
bpf = BPF(src_file = "http-parse-simple.c",debug = 0)
#load eBPF program http_filter of type SOCKET_FILTER into the kernel eBPF vm
#more info about eBPF program types
#http://man7.org/linux/man-pages/man2/bpf.2.html
function_http_filter = bpf.load_func("http_filter", BPF.SOCKET_FILTER)
#create raw socket, bind it to interface
#attach bpf program to socket created
BPF.attach_raw_socket(function_http_filter, interface)
#get file descriptor of the socket previously created inside BPF.attach_raw_socket
socket_fd = function_http_filter.sock
#create python socket object, from the file descriptor
sock = socket.fromfd(socket_fd,socket.PF_PACKET,socket.SOCK_RAW,socket.IPPROTO_IP)
#set it as blocking socket
sock.setblocking(True)
while 1:
#retrieve raw packet from socket
packet_str = os.read(socket_fd,2048)
#DEBUG - print raw packet in hex format
#packet_hex = toHex(packet_str)
#print ("%s" % packet_hex)
#convert packet into bytearray
packet_bytearray = bytearray(packet_str)
#ethernet header length
ETH_HLEN = 14
#IP HEADER
#https://tools.ietf.org/html/rfc791
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# |Version| IHL |Type of Service| Total Length |
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
#
#IHL : Internet Header Length is the length of the internet header
#value to multiply * 4 byte
#e.g. IHL = 5 ; IP Header Length = 5 * 4 byte = 20 byte
#
#Total length: This 16-bit field defines the entire packet size,
#including header and data, in bytes.
#calculate packet total length
total_length = packet_bytearray[ETH_HLEN + 2] #load MSB
total_length = total_length << 8 #shift MSB
total_length = total_length + packet_bytearray[ETH_HLEN+3] #add LSB
#calculate ip header length
ip_header_length = packet_bytearray[ETH_HLEN] #load Byte
ip_header_length = ip_header_length & 0x0F #mask bits 0..3
ip_header_length = ip_header_length << 2 #shift to obtain length
#TCP HEADER
#https://www.rfc-editor.org/rfc/rfc793.txt
# 12 13 14 15
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# | Data | |U|A|P|R|S|F| |
# | Offset| Reserved |R|C|S|S|Y|I| Window |
# | | |G|K|H|T|N|N| |
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
#
#Data Offset: This indicates where the data begins.
#The TCP header is an integral number of 32 bits long.
#value to multiply * 4 byte
#e.g. DataOffset = 5 ; TCP Header Length = 5 * 4 byte = 20 byte
#calculate tcp header length
tcp_header_length = packet_bytearray[ETH_HLEN + ip_header_length + 12] #load Byte
tcp_header_length = tcp_header_length & 0xF0 #mask bit 4..7
tcp_header_length = tcp_header_length >> 2 #SHR 4 ; SHL 2 -> SHR 2
#calculate payload offset
payload_offset = ETH_HLEN + ip_header_length + tcp_header_length
#print first line of the HTTP GET/POST request
#line ends with 0xOD 0xOA (\r\n)
#(if we want to print all the header print until \r\n\r\n)
for i in range (payload_offset,len(packet_bytearray)-1):
if (packet_bytearray[i]== 0x0A):
if (packet_bytearray[i-1] == 0x0D):
break
print ("%c" % chr(packet_bytearray[i]), end = "")
print("")