tcpflow源码分析（1）-xbjpkpk-ChinaUnix博客

1 源码结构

源代码只有八个文件

./src/sysdep.h
./src/tcpflow.h
./acconfig.h

./src/flow.c
./src/datalink.c
./src/util.c
./src/main.c
./src/tcpip.c

2 程序流程

从程序的入口main（）开始追踪下去

main:

(1)获取用户输入的一些参数，对于必要的而用户又没有指定的参数，程序会给出一个默认参数值

(2.1)当输入是之前抓包的存储文件时，调用pd = pcap_open_offline(infile, error)

(2.2)当输入是网卡的实时数据时，调用pd = pcap_open_live(device, SNAPLEN, !no_promisc, 1000, error)

(3)第2步中通过pd这个返回值获取了读取设备的句柄，再通过

dlt = pcap_datalink(pd); //获取网络协议类型
handler = find_handler(dlt, device);//根据网络协议类型查找对应的处理函数

(4)获取用户输入参数 expression = copy_argv(&argv[optind]);

(5)利用libpcap库设置用户过滤规则

if (pcap_compile(pd, &fcode, expression, 1, 0) < 0)
die("%s", pcap_geterr(pd));

if (pcap_setfilter(pd, &fcode) < 0)
die("%s", pcap_geterr(pd));

(6)初始化流存储结构

init_flow_state();

(7)安装信号处理函数，当获取对应信号时，触发相应的动作

     portable_signal(SIGTERM, terminate);
     portable_signal(SIGINT, terminate);
     portable_signal(SIGHUP, terminate);

(8)循环监听设备，并处理监听到的数据流

if (pcap_loop(pd, -1, handler, NULL) < 0)
die("%s", pcap_geterr(pd));

find_handler函数实现

//实现非常简单，将接口的类型和所有的注册类型进行循环匹配，匹配成功时，返回该类型对应的函数

pcap_handler find_handler(int datalink_type, char *device)
{
int i;

struct {
    pcap_handler handler;
    int type;
} handlers[] = {
    { dl_null, DLT_NULL },
#ifdef DLT_RAW /* older versions of libpcap do not have DLT_RAW */
    { dl_raw, DLT_RAW },
#endif
    { dl_ethernet, DLT_EN10MB },
    { dl_ethernet, DLT_IEEE802 },
    { dl_ppp, DLT_PPP },
#ifdef DLT_LINUX_SLL
    { dl_linux_sll, DLT_LINUX_SLL },
#endif
    { NULL, 0 },
};

DEBUG(2) ("looking for handler for datalink type %d for interface %s",
datalink_type, device);

for (i = 0; handlers[i].handler != NULL; i++)
if (handlers[i].type == datalink_type)
return handlers[i].handler;

die("sorry - unknown datalink type %d on interface %s", datalink_type,
device);
/* NOTREACHED */
return NULL;
}

3 处理函数分析

从上面的find_handler中可以看出，tcpflow支持的接口类型有：

DLT_NULL： localhost loop-back接口dl_null

DLT_RAW: 原始IP接口，处理函数dl_raw

DLT_EN10MB，DLT_IEEE802：以太网接口，处理函数dl_ethernet

DLT_PPP:点对点协议接口，处理函数 dl_ppp

DLT_LINUX_SLL：不知道是什么接口，处理函数是 dl_linux_sll

现在以dl_ethernet为例来说明该函数的实现

void dl_ethernet(u_char *user, const struct pcap_pkthdr *h, const u_char *p)
{
u_int caplen = h->caplen; //抓取的数据长度
u_int length = h->len; //期望的数据长度
struct ether_header *eth_header = (struct ether_header *) p;

if (length != caplen) {
DEBUG(6) ("warning: only captured %d bytes of %d byte ether frame",
caplen, length);
}

if (caplen < sizeof(struct ether_header)) { //抓取的数据长度小于以太网帧头长度，后续的分析无法进行了
DEBUG(6) ("warning: received incomplete ethernet frame");
return;
}

/* we're only expecting IP datagrams, nothing else */
if (ntohs(eth_header->ether_type) != ETHERTYPE_IP) { //不对非IP包进行分析
    DEBUG(6) ("warning: received ethernet frame with unknown type %x",
   ntohs(eth_header->ether_type));
    return;
}

process_ip(p + sizeof(struct ether_header),
caplen - sizeof(struct ether_header));//对IP包进行分析
}

下面看看process_ip函数的实现：

void process_ip(const u_char *data, u_int32_t caplen)
{
const struct ip *ip_header = (struct ip *) data;
u_int ip_header_len;
u_int ip_total_len;

/* make sure that the packet is at least as long as the min IP header */
if (caplen < sizeof(struct ip)) {//抓取的数据长度小于IP头的长度，后续分析无法进行
DEBUG(6) ("received truncated IP datagram!");
return;
}

/* for now we're only looking for TCP; throw away everything else */
if (ip_header->ip_p != IPPROTO_TCP) { //对于非TCP包，不进行后续分析
DEBUG(50) ("got non-TCP frame -- IP proto %d", ip_header->ip_p);
return;
}

/* check and see if we got everything. NOTE: we must use
   * ip_total_len after this, because we may have captured bytes
   * beyond the end of the packet (e.g. ethernet padding). */
ip_total_len = ntohs(ip_header->ip_len);
if (caplen < ip_total_len) {//抓取的IP包数据不完整
    DEBUG(6) ("warning: captured only %ld bytes of %ld-byte IP datagram",
(long) caplen, (long) ip_total_len);
}

/* XXX - throw away everything but fragment 0; this version doesn't
   * know how to do fragment reassembly. */
if (ntohs(ip_header->ip_off) & 0x1fff) {//丢弃分片的IP包
    DEBUG(2) ("warning: throwing away IP fragment from X to X");
    return;
}

/* figure out where the IP header ends */
ip_header_len = ip_header->ip_hl * 4;

/* make sure there's some data */
if (ip_header_len > ip_total_len) {//异常处理，当IP包长度不足以容纳IP头时，不进行后续分析
DEBUG(6) ("received truncated IP datagram!");
return;
}

/* do TCP processing */
process_tcp(data + ip_header_len, ip_total_len - ip_header_len,
ntohl(ip_header->ip_src.s_addr),
ntohl(ip_header->ip_dst.s_addr));//处理TCP数据
}

看process_tcp的实现

void process_tcp(const u_char *data, u_int32_t length, u_int32_t src,
u_int32_t dst)
{
struct tcphdr *tcp_header = (struct tcphdr *) data;
flow_t this_flow;
u_int tcp_header_len;
tcp_seq seq;

if (length < sizeof(struct tcphdr)) {//异常情况处理，当数据长度小于tcp头长度时，后续处理无法进行
DEBUG(6) ("received truncated TCP segment!");
return;
}

/* calculate the total length of the TCP header including options */
tcp_header_len = tcp_header->th_off * 4;

/* return if this packet doesn't have any data (e.g., just an ACK) */
if (length <= tcp_header_len) {//异常情况处理，当数据无法容纳TCP头时，无法进行后续分析
DEBUG(50) ("got TCP segment with no data");
return;
}

/* fill in the flow_t structure with info that identifies this flow */

//获取四元组信息和TCP序号
this_flow.src = src;
this_flow.dst = dst;
this_flow.sport = ntohs(tcp_header->th_sport);
this_flow.dport = ntohs(tcp_header->th_dport);
seq = ntohl(tcp_header->th_seq);

/* recalculate the beginning of data and its length, moving past the
* TCP header */
data += tcp_header_len;
length -= tcp_header_len;

/* strip nonprintable characters if necessary */
if (strip_nonprint)
data = do_strip_nonprint(data, length);

/* store or print the output */
if (console_only) {
print_packet(this_flow, data, length);
} else {
store_packet(this_flow, data, length, seq);//存储tcp流
}
}

终于到了主流程中的最后一个功能函数store_packet

void store_packet(flow_t flow, const u_char *data, u_int32_t length,
u_int32_t seq)
{
flow_state_t *state;
tcp_seq offset;
long fpos;

/* see if we have state about this flow; if not, create it */
if ((state = find_flow_state(flow)) == NULL) {//判断是否需要为该流创建存储文件
state = create_flow_state(flow, seq);
}

/* if we're done collecting for this flow, return now */
if (IS_SET(state->flags, FLOW_FINISHED))
return;

/* calculate the offset into this flow -- should handle seq num
* wrapping correctly because tcp_seq is the right size */
offset = seq - state->isn; //获取当前包数据与起始数的偏移值

/* I want to guard against receiving a packet with a sequence number
   * slightly less than what we consider the ISN to be; the max
   * (though admittedly non-scaled) window of 64K should be enough */
if (offset >= 0xffff0000) {
    DEBUG(2) ("dropped packet with seq < isn on %s", flow_filename(flow));
    return;
}

/* reject this packet if it falls entirely outside of the range of
* bytes we want to receive for the flow */
if (bytes_per_flow && (offset > bytes_per_flow)) //当收到的数据包起始地址偏移大于阈值时，不再保存
return;

/* if we don't have a file open for this flow, try to open it.
   * return if the open fails. Note that we don't have to explicitly
   * save the return value because open_file() puts the file pointer
   * into the structure for us. */
if (state->fp == NULL) {
    if (open_file(state) == NULL) {
      return;
    }
}

/* We are go for launch! Everything's ready for us to do a write. */

/* reduce length if it goes beyond the number of bytes per flow */
if (bytes_per_flow && (offset + length > bytes_per_flow)) {//对大于阈值的数据予以截断
SET_BIT(state->flags, FLOW_FINISHED);
length = bytes_per_flow - offset;
}

/* if we're not at the correct point in the file, seek there */
if (offset != state->pos) {//找到本次数据包记录的正确记录地址，tcpflow中介绍的对乱序、丢包等问题的处理主要是通过该功能实现的,存在同一数据从不同起始位置被写多次的情况，但实现起来最简单
fpos = offset;
FSETPOS(state->fp, &fpos);
}

/* write the data into the file */
DEBUG(25) ("%s: writing %ld bytes @%ld", flow_filename(state->flow),
(long) length, (long) offset);

if (fwrite(data, length, 1, state->fp) != 1) {//将数据写入对应的文件中
    /* sigh... this should be a nice, plain DEBUG statement that
     * passes strerrror() as an argument, but SunOS 4.1.3 doesn't seem
     * to have strerror. */
    if (debug_level >= 1) {
      DEBUG(1) ("write to %s failed: ", flow_filename(state->flow));
      perror("");
    }
}
fflush(state->fp);

/* remember the position for next time */
state->pos = offset + length;

if (IS_SET(state->flags, FLOW_FINISHED)) {
DEBUG(5) ("%s: stopping capture", flow_filename(state->flow));
close_file(state);
}
}

4 总结

总体来说，tcpflow的主流程是相当清晰的：

获取用户输入->设置过滤规则->根据网络接口类型选择接口数据处理回调函数->循环监听网络接口，当有数据包到达时，调用回调函数处理->回调函数不断拆包（以太网帧->IP包->TCP包）->存储包的内容到文件