qemu commit实现简记

2380阅读 0评论2014-09-03 sak0
分类:虚拟化

上一篇 http://blog.chinaunix.net/uid-29718549-id-4445694.htm 说的在线快照的一点问题,这次记录一下在线删除快照的核心流程,对应qemu 的 block-commit命令,就是实现在qcow2磁盘链表中删除磁盘,合并数据的过程

变量名对应的链表视图:
    -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]

点击(此处)折叠或打开

  1. end = s->common.len >> BDRV_SECTOR_BITS;
  2.     buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
  3.     for (sector_num = 0; sector_num < end; sector_num += n) {\\遍历top中所有的块
  4.         uint64_t delay_ns = 0;
  5.         bool copy;

  6. wait:
  7.         /* Note that even when no rate limit is applied we need to yield
  8.          * with no pending I/O here so that bdrv_drain_all() returns.
  9.          */
  10.         block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
  11.         if (block_job_is_cancelled(&s->common)) {
  12.             break;
  13.         }
  14.         /* Copy if allocated above the base */
  15.         /*以下这一坨代码负责检查指定范围内(一次检查1024个sector)需不需要执行commit*/
  16.         ret = bdrv_is_allocated_above(top, base, sector_num,
  17.                                       COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, \\一次检查1024个块
  18.                                       &n);\\检查这个范围的块是否在链表中任意磁盘中被写入过,如果有写入(返回值1)->执行以下的copy动作
  19.             intermediate = top;
  20.             while (intermediate && intermediate != base)\\遍历链表top(包含top)->base的所有中间部分磁盘
  21.                 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
  22.                                 &pnum_inter);
  23.                     ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
  24.                         bdrv_get_block_status_co_entry
  25.                             bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
  26.                                          data->pnum);\\<qcow2_co_get_block_status>
  27.                                 *pnum = nb_sectors;
  28.                                 qemu_co_mutex_lock(&s->lock);\\获取状态码之前加锁
  29.                                 ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);\\下面这坨下会详解
  30.                                     l1_bits = s->l2_bits + s->cluster_bits;
  31.                                     l1_index = offset >> l1_bits;\\这里的offset就是sector_num << 9,也就是seek点
  32.                                     if (l1_index >= s->l1_size) ret = QCOW2_CLUSTER_UNALLOCATED;\\offset超出了BDRVQcowState记录的L1表范围,返回未分配
  33.                                     l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
  34.                                     if (l2_offset == NULL) ret=QCOW2_CLUSTER_UNALLOCATED\\在L1 table中无记录,返回未分配
  35.                                     ret = l2_load(bs, l2_offset, &l2_table);\\读出L2 table
  36.                                     l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
  37.                                     nb_clusters = size_to_clusters(s, nb_needed << 9);
  38.                                     ret = qcow2_get_cluster_type(*cluster_offset);
  39.                                 qemu_co_mutex_unlock(&s->lock);\\获取状态码之后解锁
  40.         /*以上这一坨代码负责检查指定范围内(一次检查1024个sector)需不需要执行commit*/
  41.         copy = (ret == 1);
  42.         trace_commit_one_iteration(s, sector_num, n, ret);
  43.         if (copy) {
  44.             if (s->common.speed) {\\这部分限速控制
  45.                 delay_ns = ratelimit_calculate_delay(&s->limit, n);
  46.                 if (delay_ns > 0) {
  47.                     goto wait;
  48.                 }
  49.             }
  50.             ret = commit_populate(top, base, sector_num, n, buf);\\读出->写入
  51.                 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
  52.                 ret = bdrv_write(base, sector_num, buf, nb_sectors);
  53.             bytes_written += n * BDRV_SECTOR_SIZE;
  54.         }
  55.         if (ret < 0) {
  56.             if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
  57.                 s->on_error == BLOCKDEV_ON_ERROR_REPORT||
  58.                 (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
  59.                 goto exit_free_buf;
  60.             } else {
  61.                 n = 0;
  62.                 continue;
  63.             }
  64.         }
  65.         /* Publish progress */
  66.         s->common.offset += n * BDRV_SECTOR_SIZE;
  67.     }

  68.     ret = 0;

  69.     if (!block_job_is_cancelled(&s->common) && sector_num == end) {
  70.         /* success */
  71.         ret = bdrv_drop_intermediate(active, top, base);\\丢弃中间的部分,里面是改点属性
  72.     }

上一篇:对cinder volume live snapshot(在线快照)实现的一点疑问
下一篇:openstack juno新增功能简介_nova