1 源码结构
.
|-- AUTHORS
|-- COPYING
|-- ChangeLog
|-- INSTALL
|-- Makefile.am
|-- Makefile.in
|-- NEWS
|-- README
|-- acconfig.h
|-- acinclude.m4
|-- aclocal.m4
|-- config.guess
|-- config.sub
|-- configure
|-- configure.in
|-- doc
| |-- Makefile.am
| |-- Makefile.in
| `-- tcpflow.1.in
|-- install-sh
|-- missing
|-- mkinstalldirs
|-- src
| |-- Makefile.am
| |-- Makefile.in
| |-- conf.h.in
| |-- datalink.c
| |-- flow.c
| |-- main.c
| |-- stamp-h.in
| |-- sysdep.h
| |-- tcpflow.h
| |-- tcpip.c
| `-- util.c
`-- tcpflow.spec.in
|-- AUTHORS
|-- COPYING
|-- ChangeLog
|-- INSTALL
|-- Makefile.am
|-- Makefile.in
|-- NEWS
|-- README
|-- acconfig.h
|-- acinclude.m4
|-- aclocal.m4
|-- config.guess
|-- config.sub
|-- configure
|-- configure.in
|-- doc
| |-- Makefile.am
| |-- Makefile.in
| `-- tcpflow.1.in
|-- install-sh
|-- missing
|-- mkinstalldirs
|-- src
| |-- Makefile.am
| |-- Makefile.in
| |-- conf.h.in
| |-- datalink.c
| |-- flow.c
| |-- main.c
| |-- stamp-h.in
| |-- sysdep.h
| |-- tcpflow.h
| |-- tcpip.c
| `-- util.c
`-- tcpflow.spec.in
源代码只有八个文件
./src/sysdep.h
./src/tcpflow.h
./acconfig.h
./src/tcpflow.h
./acconfig.h
./src/flow.c
./src/datalink.c
./src/util.c
./src/main.c
./src/tcpip.c
./src/datalink.c
./src/util.c
./src/main.c
./src/tcpip.c
2 程序流程
从程序的入口main()开始追踪下去
main:
(1)获取用户输入的一些参数,对于必要的而用户又没有指定的参数,程序会给出一个默认参数值
(2.1)当输入是之前抓包的存储文件时,调用pd = pcap_open_offline(infile, error)
(2.2)当输入是网卡的实时数据时,调用pd = pcap_open_live(device, SNAPLEN, !no_promisc, 1000, error)
(3)第2步中通过pd这个返回值获取了读取设备的句柄,再通过
dlt = pcap_datalink(pd); //获取网络协议类型
handler = find_handler(dlt, device);//根据网络协议类型查找对应的处理函数
handler = find_handler(dlt, device);//根据网络协议类型查找对应的处理函数
(4)获取用户输入参数 expression = copy_argv(&argv[optind]);
(5)利用libpcap库设置用户过滤规则
if (pcap_compile(pd, &fcode, expression, 1, 0) < 0)
die("%s", pcap_geterr(pd));
die("%s", pcap_geterr(pd));
if (pcap_setfilter(pd, &fcode) < 0)
die("%s", pcap_geterr(pd));
die("%s", pcap_geterr(pd));
(6)初始化流存储结构
init_flow_state();
(7)安装信号处理函数,当获取对应信号时,触发相应的动作
portable_signal(SIGTERM, terminate);
portable_signal(SIGINT, terminate);
portable_signal(SIGHUP, terminate);
portable_signal(SIGINT, terminate);
portable_signal(SIGHUP, terminate);
(8)循环监听设备,并处理监听到的数据流
if (pcap_loop(pd, -1, handler, NULL) < 0)
die("%s", pcap_geterr(pd));
die("%s", pcap_geterr(pd));
find_handler函数实现
//实现非常简单,将接口的类型和所有的注册类型进行循环匹配,匹配成功时,返回该类型对应的函数
pcap_handler find_handler(int datalink_type, char *device)
{
int i;
{
int i;
struct {
pcap_handler handler;
int type;
} handlers[] = {
{ dl_null, DLT_NULL },
#ifdef DLT_RAW /* older versions of libpcap do not have DLT_RAW */
{ dl_raw, DLT_RAW },
#endif
{ dl_ethernet, DLT_EN10MB },
{ dl_ethernet, DLT_IEEE802 },
{ dl_ppp, DLT_PPP },
#ifdef DLT_LINUX_SLL
{ dl_linux_sll, DLT_LINUX_SLL },
#endif
{ NULL, 0 },
};
pcap_handler handler;
int type;
} handlers[] = {
{ dl_null, DLT_NULL },
#ifdef DLT_RAW /* older versions of libpcap do not have DLT_RAW */
{ dl_raw, DLT_RAW },
#endif
{ dl_ethernet, DLT_EN10MB },
{ dl_ethernet, DLT_IEEE802 },
{ dl_ppp, DLT_PPP },
#ifdef DLT_LINUX_SLL
{ dl_linux_sll, DLT_LINUX_SLL },
#endif
{ NULL, 0 },
};
DEBUG(2) ("looking for handler for datalink type %d for interface %s",
datalink_type, device);
datalink_type, device);
for (i = 0; handlers[i].handler != NULL; i++)
if (handlers[i].type == datalink_type)
return handlers[i].handler;
if (handlers[i].type == datalink_type)
return handlers[i].handler;
die("sorry - unknown datalink type %d on interface %s", datalink_type,
device);
/* NOTREACHED */
return NULL;
}
device);
/* NOTREACHED */
return NULL;
}
3 处理函数分析
从上面的find_handler中可以看出,tcpflow支持的接口类型有:
DLT_NULL: localhost loop-back接口dl_null
DLT_RAW: 原始IP接口,处理函数dl_raw
DLT_EN10MB,DLT_IEEE802: 以太网接口,处理函数dl_ethernet
DLT_PPP:点对点协议接口,处理函数 dl_ppp
DLT_LINUX_SLL:不知道是什么接口,处理函数是 dl_linux_sll
现在以dl_ethernet为例来说明该函数的实现
void dl_ethernet(u_char *user, const struct pcap_pkthdr *h, const u_char *p)
{
u_int caplen = h->caplen; //抓取的数据长度
u_int length = h->len; //期望的数据长度
struct ether_header *eth_header = (struct ether_header *) p;
{
u_int caplen = h->caplen; //抓取的数据长度
u_int length = h->len; //期望的数据长度
struct ether_header *eth_header = (struct ether_header *) p;
if (length != caplen) {
DEBUG(6) ("warning: only captured %d bytes of %d byte ether frame",
caplen, length);
}
DEBUG(6) ("warning: only captured %d bytes of %d byte ether frame",
caplen, length);
}
if (caplen < sizeof(struct ether_header)) { //抓取的数据长度小于以太网帧头长度,后续的分析无法进行了
DEBUG(6) ("warning: received incomplete ethernet frame");
return;
}
DEBUG(6) ("warning: received incomplete ethernet frame");
return;
}
/* we're only expecting IP datagrams, nothing else */
if (ntohs(eth_header->ether_type) != ETHERTYPE_IP) { //不对非IP包进行分析
DEBUG(6) ("warning: received ethernet frame with unknown type %x",
ntohs(eth_header->ether_type));
return;
}
if (ntohs(eth_header->ether_type) != ETHERTYPE_IP) { //不对非IP包进行分析
DEBUG(6) ("warning: received ethernet frame with unknown type %x",
ntohs(eth_header->ether_type));
return;
}
process_ip(p + sizeof(struct ether_header),
caplen - sizeof(struct ether_header));//对IP包进行分析
}
caplen - sizeof(struct ether_header));//对IP包进行分析
}
下面看看process_ip函数的实现:
void process_ip(const u_char *data, u_int32_t caplen)
{
const struct ip *ip_header = (struct ip *) data;
u_int ip_header_len;
u_int ip_total_len;
{
const struct ip *ip_header = (struct ip *) data;
u_int ip_header_len;
u_int ip_total_len;
/* make sure that the packet is at least as long as the min IP header */
if (caplen < sizeof(struct ip)) {//抓取的数据长度小于IP头的长度,后续分析无法进行
DEBUG(6) ("received truncated IP datagram!");
return;
}
if (caplen < sizeof(struct ip)) {//抓取的数据长度小于IP头的长度,后续分析无法进行
DEBUG(6) ("received truncated IP datagram!");
return;
}
/* for now we're only looking for TCP; throw away everything else */
if (ip_header->ip_p != IPPROTO_TCP) { //对于非TCP包,不进行后续分析
DEBUG(50) ("got non-TCP frame -- IP proto %d", ip_header->ip_p);
return;
}
if (ip_header->ip_p != IPPROTO_TCP) { //对于非TCP包,不进行后续分析
DEBUG(50) ("got non-TCP frame -- IP proto %d", ip_header->ip_p);
return;
}
/* check and see if we got everything. NOTE: we must use
* ip_total_len after this, because we may have captured bytes
* beyond the end of the packet (e.g. ethernet padding). */
ip_total_len = ntohs(ip_header->ip_len);
if (caplen < ip_total_len) {//抓取的IP包数据不完整
DEBUG(6) ("warning: captured only %ld bytes of %ld-byte IP datagram",
(long) caplen, (long) ip_total_len);
}
* ip_total_len after this, because we may have captured bytes
* beyond the end of the packet (e.g. ethernet padding). */
ip_total_len = ntohs(ip_header->ip_len);
if (caplen < ip_total_len) {//抓取的IP包数据不完整
DEBUG(6) ("warning: captured only %ld bytes of %ld-byte IP datagram",
(long) caplen, (long) ip_total_len);
}
/* XXX - throw away everything but fragment 0; this version doesn't
* know how to do fragment reassembly. */
if (ntohs(ip_header->ip_off) & 0x1fff) {//丢弃分片的IP包
DEBUG(2) ("warning: throwing away IP fragment from X to X");
return;
}
* know how to do fragment reassembly. */
if (ntohs(ip_header->ip_off) & 0x1fff) {//丢弃分片的IP包
DEBUG(2) ("warning: throwing away IP fragment from X to X");
return;
}
/* figure out where the IP header ends */
ip_header_len = ip_header->ip_hl * 4;
ip_header_len = ip_header->ip_hl * 4;
/* make sure there's some data */
if (ip_header_len > ip_total_len) {//异常处理,当IP包长度不足以容纳IP头时,不进行后续分析
DEBUG(6) ("received truncated IP datagram!");
return;
}
if (ip_header_len > ip_total_len) {//异常处理,当IP包长度不足以容纳IP头时,不进行后续分析
DEBUG(6) ("received truncated IP datagram!");
return;
}
/* do TCP processing */
process_tcp(data + ip_header_len, ip_total_len - ip_header_len,
ntohl(ip_header->ip_src.s_addr),
ntohl(ip_header->ip_dst.s_addr));//处理TCP数据
}
process_tcp(data + ip_header_len, ip_total_len - ip_header_len,
ntohl(ip_header->ip_src.s_addr),
ntohl(ip_header->ip_dst.s_addr));//处理TCP数据
}
看process_tcp的实现
void process_tcp(const u_char *data, u_int32_t length, u_int32_t src,
u_int32_t dst)
{
struct tcphdr *tcp_header = (struct tcphdr *) data;
flow_t this_flow;
u_int tcp_header_len;
tcp_seq seq;
u_int32_t dst)
{
struct tcphdr *tcp_header = (struct tcphdr *) data;
flow_t this_flow;
u_int tcp_header_len;
tcp_seq seq;
if (length < sizeof(struct tcphdr)) {//异常情况处理,当数据长度小于tcp头长度时,后续处理无法进行
DEBUG(6) ("received truncated TCP segment!");
return;
}
DEBUG(6) ("received truncated TCP segment!");
return;
}
/* calculate the total length of the TCP header including options */
tcp_header_len = tcp_header->th_off * 4;
tcp_header_len = tcp_header->th_off * 4;
/* return if this packet doesn't have any data (e.g., just an ACK) */
if (length <= tcp_header_len) {//异常情况处理,当数据无法容纳TCP头时,无法进行后续分析
DEBUG(50) ("got TCP segment with no data");
return;
}
if (length <= tcp_header_len) {//异常情况处理,当数据无法容纳TCP头时,无法进行后续分析
DEBUG(50) ("got TCP segment with no data");
return;
}
/* fill in the flow_t structure with info that identifies this flow */
//获取四元组信息和TCP序号
this_flow.src = src;
this_flow.dst = dst;
this_flow.sport = ntohs(tcp_header->th_sport);
this_flow.dport = ntohs(tcp_header->th_dport);
seq = ntohl(tcp_header->th_seq);
this_flow.src = src;
this_flow.dst = dst;
this_flow.sport = ntohs(tcp_header->th_sport);
this_flow.dport = ntohs(tcp_header->th_dport);
seq = ntohl(tcp_header->th_seq);
/* recalculate the beginning of data and its length, moving past the
* TCP header */
data += tcp_header_len;
length -= tcp_header_len;
* TCP header */
data += tcp_header_len;
length -= tcp_header_len;
/* strip nonprintable characters if necessary */
if (strip_nonprint)
data = do_strip_nonprint(data, length);
if (strip_nonprint)
data = do_strip_nonprint(data, length);
/* store or print the output */
if (console_only) {
print_packet(this_flow, data, length);
} else {
store_packet(this_flow, data, length, seq);//存储tcp流
}
}
if (console_only) {
print_packet(this_flow, data, length);
} else {
store_packet(this_flow, data, length, seq);//存储tcp流
}
}
终于到了主流程中的最后一个功能函数store_packet
void store_packet(flow_t flow, const u_char *data, u_int32_t length,
u_int32_t seq)
{
flow_state_t *state;
tcp_seq offset;
long fpos;
u_int32_t seq)
{
flow_state_t *state;
tcp_seq offset;
long fpos;
/* see if we have state about this flow; if not, create it */
if ((state = find_flow_state(flow)) == NULL) {//判断是否需要为该流创建存储文件
state = create_flow_state(flow, seq);
}
if ((state = find_flow_state(flow)) == NULL) {//判断是否需要为该流创建存储文件
state = create_flow_state(flow, seq);
}
/* if we're done collecting for this flow, return now */
if (IS_SET(state->flags, FLOW_FINISHED))
return;
if (IS_SET(state->flags, FLOW_FINISHED))
return;
/* calculate the offset into this flow -- should handle seq num
* wrapping correctly because tcp_seq is the right size */
offset = seq - state->isn; //获取当前包数据与起始数的偏移值
* wrapping correctly because tcp_seq is the right size */
offset = seq - state->isn; //获取当前包数据与起始数的偏移值
/* I want to guard against receiving a packet with a sequence number
* slightly less than what we consider the ISN to be; the max
* (though admittedly non-scaled) window of 64K should be enough */
if (offset >= 0xffff0000) {
DEBUG(2) ("dropped packet with seq < isn on %s", flow_filename(flow));
return;
}
* slightly less than what we consider the ISN to be; the max
* (though admittedly non-scaled) window of 64K should be enough */
if (offset >= 0xffff0000) {
DEBUG(2) ("dropped packet with seq < isn on %s", flow_filename(flow));
return;
}
/* reject this packet if it falls entirely outside of the range of
* bytes we want to receive for the flow */
if (bytes_per_flow && (offset > bytes_per_flow)) //当收到的数据包起始地址偏移大于阈值时,不再保存
return;
* bytes we want to receive for the flow */
if (bytes_per_flow && (offset > bytes_per_flow)) //当收到的数据包起始地址偏移大于阈值时,不再保存
return;
/* if we don't have a file open for this flow, try to open it.
* return if the open fails. Note that we don't have to explicitly
* save the return value because open_file() puts the file pointer
* into the structure for us. */
if (state->fp == NULL) {
if (open_file(state) == NULL) {
return;
}
}
* return if the open fails. Note that we don't have to explicitly
* save the return value because open_file() puts the file pointer
* into the structure for us. */
if (state->fp == NULL) {
if (open_file(state) == NULL) {
return;
}
}
/* We are go for launch! Everything's ready for us to do a write. */
/* reduce length if it goes beyond the number of bytes per flow */
if (bytes_per_flow && (offset + length > bytes_per_flow)) {//对大于阈值的数据予以截断
SET_BIT(state->flags, FLOW_FINISHED);
length = bytes_per_flow - offset;
}
if (bytes_per_flow && (offset + length > bytes_per_flow)) {//对大于阈值的数据予以截断
SET_BIT(state->flags, FLOW_FINISHED);
length = bytes_per_flow - offset;
}
/* if we're not at the correct point in the file, seek there */
if (offset != state->pos) {//找到本次数据包记录的正确记录地址,tcpflow中介绍的对乱序、丢包等问题的处理主要是通过该功能实现的,存在同一数据从不同起始位置被写多次的情况,但实现起来最简单
fpos = offset;
FSETPOS(state->fp, &fpos);
}
if (offset != state->pos) {//找到本次数据包记录的正确记录地址,tcpflow中介绍的对乱序、丢包等问题的处理主要是通过该功能实现的,存在同一数据从不同起始位置被写多次的情况,但实现起来最简单
fpos = offset;
FSETPOS(state->fp, &fpos);
}
/* write the data into the file */
DEBUG(25) ("%s: writing %ld bytes @%ld", flow_filename(state->flow),
(long) length, (long) offset);
DEBUG(25) ("%s: writing %ld bytes @%ld", flow_filename(state->flow),
(long) length, (long) offset);
if (fwrite(data, length, 1, state->fp) != 1) {//将数据写入对应的文件中
/* sigh... this should be a nice, plain DEBUG statement that
* passes strerrror() as an argument, but SunOS 4.1.3 doesn't seem
* to have strerror. */
if (debug_level >= 1) {
DEBUG(1) ("write to %s failed: ", flow_filename(state->flow));
perror("");
}
}
fflush(state->fp);
/* sigh... this should be a nice, plain DEBUG statement that
* passes strerrror() as an argument, but SunOS 4.1.3 doesn't seem
* to have strerror. */
if (debug_level >= 1) {
DEBUG(1) ("write to %s failed: ", flow_filename(state->flow));
perror("");
}
}
fflush(state->fp);
/* remember the position for next time */
state->pos = offset + length;
state->pos = offset + length;
if (IS_SET(state->flags, FLOW_FINISHED)) {
DEBUG(5) ("%s: stopping capture", flow_filename(state->flow));
close_file(state);
}
}
DEBUG(5) ("%s: stopping capture", flow_filename(state->flow));
close_file(state);
}
}
4 总结
总体来说,tcpflow的主流程是相当清晰的:
获取用户输入->设置过滤规则->根据网络接口类型选择接口数据处理回调函数->循环监听网络接口,当有数据包到达时,调用回调函数处理->回调函数不断拆包(以太网帧->IP包->TCP包)->存储包的内容到文件