tcpflow源码分析(1)

761阅读 0评论2012-03-13 xbjpkpk
分类:

1 源码结构
.
|-- AUTHORS
|-- COPYING
|-- ChangeLog
|-- INSTALL
|-- Makefile.am
|-- Makefile.in
|-- NEWS
|-- README
|-- acconfig.h
|-- acinclude.m4
|-- aclocal.m4
|-- config.guess
|-- config.sub
|-- configure
|-- configure.in
|-- doc
|   |-- Makefile.am
|   |-- Makefile.in
|   `-- tcpflow.1.in
|-- install-sh
|-- missing
|-- mkinstalldirs
|-- src
|   |-- Makefile.am
|   |-- Makefile.in
|   |-- conf.h.in
|   |-- datalink.c
|   |-- flow.c
|   |-- main.c
|   |-- stamp-h.in
|   |-- sysdep.h
|   |-- tcpflow.h
|   |-- tcpip.c
|   `-- util.c
`-- tcpflow.spec.in
源代码只有八个文件
./src/sysdep.h
./src/tcpflow.h
./acconfig.h
./src/flow.c
./src/datalink.c
./src/util.c
./src/main.c
./src/tcpip.c
 
2 程序流程
 
从程序的入口main()开始追踪下去
 
main:
  (1)获取用户输入的一些参数,对于必要的而用户又没有指定的参数,程序会给出一个默认参数值
  (2.1)当输入是之前抓包的存储文件时,调用pd = pcap_open_offline(infile, error)
  (2.2)当输入是网卡的实时数据时,调用pd = pcap_open_live(device, SNAPLEN, !no_promisc, 1000, error)
  (3)第2步中通过pd这个返回值获取了读取设备的句柄,再通过
     dlt = pcap_datalink(pd); //获取网络协议类型
     handler = find_handler(dlt, device);//根据网络协议类型查找对应的处理函数
  (4)获取用户输入参数 expression = copy_argv(&argv[optind]);
  (5)利用libpcap库设置用户过滤规则
     if (pcap_compile(pd, &fcode, expression, 1, 0) < 0)
         die("%s", pcap_geterr(pd));
     if (pcap_setfilter(pd, &fcode) < 0)
         die("%s", pcap_geterr(pd));
  (6)初始化流存储结构
     init_flow_state();
  (7)安装信号处理函数,当获取对应信号时,触发相应的动作
     portable_signal(SIGTERM, terminate);
     portable_signal(SIGINT, terminate);
     portable_signal(SIGHUP, terminate);
  (8)循环监听设备,并处理监听到的数据流
    if (pcap_loop(pd, -1, handler, NULL) < 0)
        die("%s", pcap_geterr(pd));
 
find_handler函数实现
 
//实现非常简单,将接口的类型和所有的注册类型进行循环匹配,匹配成功时,返回该类型对应的函数
pcap_handler find_handler(int datalink_type, char *device)
{
  int i;
  struct {
    pcap_handler handler;
    int type;
  } handlers[] = {
    { dl_null, DLT_NULL },
#ifdef DLT_RAW /* older versions of libpcap do not have DLT_RAW */
    { dl_raw, DLT_RAW },
#endif
    { dl_ethernet, DLT_EN10MB },
    { dl_ethernet, DLT_IEEE802 },
    { dl_ppp, DLT_PPP },
#ifdef DLT_LINUX_SLL
    { dl_linux_sll, DLT_LINUX_SLL },
#endif
    { NULL, 0 },
  };
  DEBUG(2) ("looking for handler for datalink type %d for interface %s",
 datalink_type, device);
  for (i = 0; handlers[i].handler != NULL; i++)
    if (handlers[i].type == datalink_type)
      return handlers[i].handler;
  die("sorry - unknown datalink type %d on interface %s", datalink_type,
      device);
  /* NOTREACHED */
  return NULL;
}
 
3 处理函数分析
 
从上面的find_handler中可以看出,tcpflow支持的接口类型有:
DLT_NULL: localhost loop-back接口dl_null
DLT_RAW: 原始IP接口,处理函数dl_raw
DLT_EN10MB,DLT_IEEE802: 以太网接口,处理函数dl_ethernet
DLT_PPP:点对点协议接口,处理函数 dl_ppp
DLT_LINUX_SLL:不知道是什么接口,处理函数是 dl_linux_sll
 
现在以dl_ethernet为例来说明该函数的实现
 
void dl_ethernet(u_char *user, const struct pcap_pkthdr *h, const u_char *p)
{
  u_int caplen = h->caplen; //抓取的数据长度
  u_int length = h->len; //期望的数据长度
  struct ether_header *eth_header = (struct ether_header *) p;
  if (length != caplen) {
    DEBUG(6) ("warning: only captured %d bytes of %d byte ether frame",
   caplen, length);
  }
  if (caplen < sizeof(struct ether_header)) { //抓取的数据长度小于以太网帧头长度,后续的分析无法进行了
    DEBUG(6) ("warning: received incomplete ethernet frame");
    return;
  }
  /* we're only expecting IP datagrams, nothing else */
  if (ntohs(eth_header->ether_type) != ETHERTYPE_IP) { //不对非IP包进行分析
    DEBUG(6) ("warning: received ethernet frame with unknown type %x",
   ntohs(eth_header->ether_type));
    return;
  }
 
  process_ip(p + sizeof(struct ether_header),
      caplen - sizeof(struct ether_header));//对IP包进行分析
}
 
下面看看process_ip函数的实现:
 
void process_ip(const u_char *data, u_int32_t caplen)
{
  const struct ip *ip_header = (struct ip *) data;
  u_int ip_header_len;
  u_int ip_total_len;
  /* make sure that the packet is at least as long as the min IP header */
  if (caplen < sizeof(struct ip)) {//抓取的数据长度小于IP头的长度,后续分析无法进行
    DEBUG(6) ("received truncated IP datagram!");
    return;
  }
  /* for now we're only looking for TCP; throw away everything else */
  if (ip_header->ip_p != IPPROTO_TCP) { //对于非TCP包,不进行后续分析
    DEBUG(50) ("got non-TCP frame -- IP proto %d", ip_header->ip_p);
    return;
  }
  /* check and see if we got everything.  NOTE: we must use
   * ip_total_len after this, because we may have captured bytes
   * beyond the end of the packet (e.g. ethernet padding). */
  ip_total_len = ntohs(ip_header->ip_len);
  if (caplen < ip_total_len) {//抓取的IP包数据不完整
    DEBUG(6) ("warning: captured only %ld bytes of %ld-byte IP datagram",
  (long) caplen, (long) ip_total_len);
  }
  /* XXX - throw away everything but fragment 0; this version doesn't
   * know how to do fragment reassembly. */
  if (ntohs(ip_header->ip_off) & 0x1fff) {//丢弃分片的IP包
    DEBUG(2) ("warning: throwing away IP fragment from X to X");
    return;
  }
  /* figure out where the IP header ends */
  ip_header_len = ip_header->ip_hl * 4;
  /* make sure there's some data */
  if (ip_header_len > ip_total_len) {//异常处理,当IP包长度不足以容纳IP头时,不进行后续分析
    DEBUG(6) ("received truncated IP datagram!");
    return;
  }
  /* do TCP processing */
  process_tcp(data + ip_header_len, ip_total_len - ip_header_len,
       ntohl(ip_header->ip_src.s_addr),
       ntohl(ip_header->ip_dst.s_addr));//处理TCP数据
}
 
看process_tcp的实现
 
void process_tcp(const u_char *data, u_int32_t length, u_int32_t src,
   u_int32_t dst)
{
  struct tcphdr *tcp_header = (struct tcphdr *) data;
  flow_t this_flow;
  u_int tcp_header_len;
  tcp_seq seq;
  if (length < sizeof(struct tcphdr)) {//异常情况处理,当数据长度小于tcp头长度时,后续处理无法进行
    DEBUG(6) ("received truncated TCP segment!");
    return;
  }
  /* calculate the total length of the TCP header including options */
  tcp_header_len = tcp_header->th_off * 4;
  /* return if this packet doesn't have any data (e.g., just an ACK) */
  if (length <= tcp_header_len) {//异常情况处理,当数据无法容纳TCP头时,无法进行后续分析
    DEBUG(50) ("got TCP segment with no data");
    return;
  }
  /* fill in the flow_t structure with info that identifies this flow */
  //获取四元组信息和TCP序号
  this_flow.src = src;
  this_flow.dst = dst;
  this_flow.sport = ntohs(tcp_header->th_sport);
  this_flow.dport = ntohs(tcp_header->th_dport);
  seq = ntohl(tcp_header->th_seq);
  /* recalculate the beginning of data and its length, moving past the
   * TCP header */
  data += tcp_header_len;
  length -= tcp_header_len;
  /* strip nonprintable characters if necessary */
  if (strip_nonprint)
    data = do_strip_nonprint(data, length);
  /* store or print the output */
  if (console_only) {
    print_packet(this_flow, data, length);
  } else {
    store_packet(this_flow, data, length, seq);//存储tcp流
  }
}
 
终于到了主流程中的最后一个功能函数store_packet
 
void store_packet(flow_t flow, const u_char *data, u_int32_t length,
    u_int32_t seq)
{
  flow_state_t *state;
  tcp_seq offset;
  long fpos;
  /* see if we have state about this flow; if not, create it */
  if ((state = find_flow_state(flow)) == NULL) {//判断是否需要为该流创建存储文件
    state = create_flow_state(flow, seq);
  }
  /* if we're done collecting for this flow, return now */
  if (IS_SET(state->flags, FLOW_FINISHED))
    return;
  /* calculate the offset into this flow -- should handle seq num
   * wrapping correctly because tcp_seq is the right size */
  offset = seq - state->isn; //获取当前包数据与起始数的偏移值
  /* I want to guard against receiving a packet with a sequence number
   * slightly less than what we consider the ISN to be; the max
   * (though admittedly non-scaled) window of 64K should be enough */
  if (offset >= 0xffff0000) {
    DEBUG(2) ("dropped packet with seq < isn on %s", flow_filename(flow));
    return;
  }
  /* reject this packet if it falls entirely outside of the range of
   * bytes we want to receive for the flow */
  if (bytes_per_flow && (offset > bytes_per_flow)) //当收到的数据包起始地址偏移大于阈值时,不再保存
    return;
  /* if we don't have a file open for this flow, try to open it.
   * return if the open fails.  Note that we don't have to explicitly
   * save the return value because open_file() puts the file pointer
   * into the structure for us. */
  if (state->fp == NULL) {
    if (open_file(state) == NULL) {
      return;
    }
  }
  /* We are go for launch!  Everything's ready for us to do a write. */
  /* reduce length if it goes beyond the number of bytes per flow */
  if (bytes_per_flow && (offset + length > bytes_per_flow)) {//对大于阈值的数据予以截断
    SET_BIT(state->flags, FLOW_FINISHED);
    length = bytes_per_flow - offset;
  }
  /* if we're not at the correct point in the file, seek there */
  if (offset != state->pos) {//找到本次数据包记录的正确记录地址,tcpflow中介绍的对乱序、丢包等问题的处理主要是通过该功能实现的,存在同一数据从不同起始位置被写多次的情况,但实现起来最简单
    fpos = offset;
    FSETPOS(state->fp, &fpos);
  }
  /* write the data into the file */
  DEBUG(25) ("%s: writing %ld bytes @%ld", flow_filename(state->flow),
   (long) length, (long) offset);
  if (fwrite(data, length, 1, state->fp) != 1) {//将数据写入对应的文件中
    /* sigh... this should be a nice, plain DEBUG statement that
     * passes strerrror() as an argument, but SunOS 4.1.3 doesn't seem
     * to have strerror. */
    if (debug_level >= 1) {
      DEBUG(1) ("write to %s failed: ", flow_filename(state->flow));
      perror("");
    }
  }
  fflush(state->fp);
  /* remember the position for next time */
  state->pos = offset + length;
  if (IS_SET(state->flags, FLOW_FINISHED)) {
    DEBUG(5) ("%s: stopping capture", flow_filename(state->flow));
    close_file(state);
  }
}
 
4 总结
 
总体来说,tcpflow的主流程是相当清晰的:
 
获取用户输入->设置过滤规则->根据网络接口类型选择接口数据处理回调函数->循环监听网络接口,当有数据包到达时,调用回调函数处理->回调函数不断拆包(以太网帧->IP包->TCP包)->存储包的内容到文件
上一篇:tcpflow源码分析(2)
下一篇:HTTP头部详解