Openstack cold snapshot 细节分析

3370阅读 0评论2015-01-21 sak0
分类:云计算

1.背景
  读了这位作者写的《OpenStack 创建快照(Create Snapshot)过程源码分析 
   http://blog.chinaunix.net/uid-20940095-id-4752643.html
   文中介绍到目前Openstack在做snapshot时使用cold snapshot的方式,具体为什么不用live snapshot可能与我之前的想到的原因有联系:
  http://blog.chinaunix.net/uid-29718549-id-4346700.html   《openstack.live_snapshot的实现方法存在竞态 

  今天就openstack调用的libvirt接口实现cold snapshot的内部展开介绍一下

2.流程分析

Nova调用libvirt库接口managedSave()对虚机实施cold snapshot,注意:这里传入的flags=0

nova/virt/libvirt/driver.py


点击(此处)折叠或打开

  1. # NOTE(dkang): managedSave does not work for LXC
  2.         if CONF.libvirt.virt_type != 'lxc' and not live_snapshot:
  3.             if state == power_state.RUNNING or state == power_state.PAUSED:
  4.                 self._detach_pci_devices(virt_dom,
  5.                     pci_manager.get_instance_pci_devs(instance))
  6.                 self._detach_sriov_ports(instance, virt_dom)
  7.                 virt_dom.managedSave(0)

libvirt库接口部分,注释中写的很详细:

l  这个接口会挂起虚机,接口完成后虚机默认不会自动恢复运行

l  与virDomainSave()接口不同的是,managedSave()接口会保存libvirt所维护的虚机数据(后边我们会看到它具体是怎样保存的),而且必须调用virDomainCreate()接口恢复虚机运行态

l  三个flags位的具体效果(因为上一步骤中传入的flags=0,所以我们这里并未使用任何一个flags功能)

VIR_DOMAIN_SAVE_BYPASS_CACHE——后续存储文件时忽略文件系统cache

VIR_DOMAIN_SAVE_RUNNING——恢复后自动启动虚机

VIR_DOMAIN_SAVE_PAUSED——恢复后自动暂停虚机(与上一个flag互斥)

/usr/lib64/python2.6/site-packages/libvirt.py


点击(此处)折叠或打开

  1. def managedSave(self, flags):
  2.         """This method will suspend a domain and save its memory contents to
  3.         a file on disk. After the call, if successful, the domain is not
  4.         listed as running anymore.
  5.         The difference from virDomainSave() is that libvirt is keeping track of
  6.         the saved state itself, and will reuse it once the domain is being
  7.         restarted (automatically or via an explicit libvirt call).
  8.         As a result any running domain is sure to not have a managed saved image.
  9.         This also implies that managed save only works on persistent domains,
  10.         since the domain must still exist in order to use virDomainCreate() to
  11.         restart it.
  12.         
  13.         If @flags includes VIR_DOMAIN_SAVE_BYPASS_CACHE, then libvirt will
  14.         attempt to bypass the file system cache while creating the file, or
  15.         fail if it cannot do so for the given system; this can allow less
  16.         pressure on file system cache, but also risks slowing saves to NFS.
  17.         
  18.         Normally, the managed saved state will remember whether the domain
  19.         was running or paused, and start will resume to the same state.
  20.         Specifying VIR_DOMAIN_SAVE_RUNNING or VIR_DOMAIN_SAVE_PAUSED in
  21.         @flags will override the default saved into the file. These two
  22.         flags are mutually exclusive. """
  23.         ret = libvirtmod.virDomainManagedSave(self._o, flags)
  24.         if ret == -1: raise libvirtError ('virDomainManagedSave() failed', dom=self)
  25.         return ret


  26. libvirt/include/libvirt/libvirt.h

  27. typedef enum {
  28.     VIR_DOMAIN_SAVE_BYPASS_CACHE = 1 << 0, /* Avoid file system cache pollution */
  29.     VIR_DOMAIN_SAVE_RUNNING = 1 << 1, /* Favor running over paused */
  30.     VIR_DOMAIN_SAVE_PAUSED = 1 << 2, /* Favor paused over running */
  31. } virDomainSaveRestoreFlags;

省去一些抽象接口流程,来到qemu-driver的实现部分

libvirt/src/qemu/qemu-driver.c


点击(此处)折叠或打开

  1. /* The vm must be active + locked. Vm will be unlocked and
  2.  * potentially free'd after this returns (eg transient VMs are freed
  3.  * shutdown). So 'vm' must not be referenced by the caller after
  4.  * this returns (whether returning success or failure).
  5.  */
  6. static int
  7. qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom,
  8.                        virDomainObjPtr vm, const char *path,
  9.                        int compressed, const char *xmlin, unsigned int flags)
  10. {
  11.     char *xml = NULL;
  12.     bool was_running = false;
  13.     int ret = -1;
  14.     int rc;
  15.     virObjectEventPtr event = NULL;
  16.     qemuDomainObjPrivatePtr priv = vm->privateData;
  17.     virCapsPtr caps;

  18.     if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
  19.         goto cleanup;

  20.     if (!qemuMigrationIsAllowed(driver, vm, vm->def, false, false))
  21.         goto cleanup;

  22. /*处理vm遗留的Job,比如migrate, backup等*/
  23.     if (qemuDomainObjBeginAsyncJob(driver, vm, QEMU_ASYNC_JOB_SAVE) < 0)
  24.         goto cleanup;

  25.     memset(&priv->job.info, 0, sizeof(priv->job.info));
  26.     priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED;

  27. /*使用qemu hmp stop接口暂停虚机,并且记录虚机之前的运行状态,供恢复时使用*/
  28.     if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
  29.         was_running = true;
  30.         if (qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_SAVE,
  31.                                 QEMU_ASYNC_JOB_SAVE) < 0)
  32.             goto endjob;

  33.         if (!virDomainObjIsActive(vm)) {
  34.             virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
  35.                            _("guest unexpectedly quit"));
  36.             goto endjob;
  37.         }
  38.     }

  39.    /* libvirt.c already guaranteed these two flags are exclusive. */
  40.     if (flags & VIR_DOMAIN_SAVE_RUNNING)
  41.         was_running = true;
  42.     else if (flags & VIR_DOMAIN_SAVE_PAUSED)
  43.         was_running = false;

  44.     /* Get XML for the domain. Restore needs only the inactive xml,
  45.      * including secure. We should get the same result whether xmlin
  46.      * is NULL or whether it was the live xml of the domain moments
  47.      * before. */
  48.     /*获得xml,也就是之前提到的libvirt维护的虚机数据*/
  49.     if (xmlin) {
  50.         virDomainDefPtr def = NULL;

  51.         if (!(def = virDomainDefParseString(xmlin, caps, driver->xmlopt,
  52.                                             QEMU_EXPECTED_VIRT_TYPES,
  53.                                             VIR_DOMAIN_XML_INACTIVE))) {
  54.             goto endjob;
  55.         }
  56.         if (!qemuDomainDefCheckABIStability(driver, vm->def, def)) {
  57.             virDomainDefFree(def);
  58.             goto endjob;
  59.         }
  60.         xml = qemuDomainDefFormatLive(driver, def, true, true);
  61.     } else {
  62.         xml = qemuDomainDefFormatLive(driver, vm->def, true, true);
  63.     }
  64.     if (!xml) {
  65.         virReportError(VIR_ERR_OPERATION_FAILED,
  66.                        "%s", _("failed to get domain xml"));
  67.         goto endjob;
  68.     }
  69. /* managedSave()的核心流程,首先会打开xxx.save的保存文件,然后使用qemuDomainSaveHeader ()将之前获得的xml配置信息写入保存文件头部,最后使用qemuMigrationToFile ()将虚机内存与设备状态信息写入保存文件;
  70. 为什么叫migrate呢,因为qemu中迁移的方法就是将虚机的内存和状态信息migrate到另一节点去,而这里的用法是migrate到本地的文件*/
  71.     ret = qemuDomainSaveMemory(driver, vm, path, xml, compressed,
  72.                                was_running, flags, QEMU_ASYNC_JOB_SAVE);
  73.     if (ret < 0)
  74.         goto endjob;

  75. /*保存文件写完后,调用关闭虚机流程,释放所有的资源(注意:qemuDomainSaveMemory()只是暂停虚机,将虚机的内存和状态信息冻结,待全部导出为文件后,这里再关闭虚机),并发送对应的事件信息,顺便说下,Openstack从havana版本后会监听此类关于虚机生命周期事件,也就是说虚机在openstack管控范围外发生了状态变化,比如用户自己关闭了虚机,openstack仍能通过监听的事件获得虚机状态更新*/
  76.     /* Shut it down */
  77.     qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SAVED, 0);
  78.     virDomainAuditStop(vm, "saved");
  79.     event = virDomainEventLifecycleNewFromObj(vm,
  80.                                      VIR_DOMAIN_EVENT_STOPPED,
  81.                                      VIR_DOMAIN_EVENT_STOPPED_SAVED);
  82.     if (!vm->persistent) {
  83.         if (qemuDomainObjEndAsyncJob(driver, vm) > 0)
  84.             qemuDomainRemoveInactive(driver, vm);
  85.         vm = NULL;
  86.     }

  87. /*结束工作,可以看到刚才提到的两个flags是如何生效*/
  88.  endjob:
  89.     if (vm) {
  90.         if (ret != 0) {
  91.             if (was_running && virDomainObjIsActive(vm)) {
  92.                 rc = qemuProcessStartCPUs(driver, vm, dom->conn,
  93.                                           VIR_DOMAIN_RUNNING_SAVE_CANCELED,
  94.                                           QEMU_ASYNC_JOB_SAVE);
  95.                 if (rc < 0) {
  96.                     VIR_WARN("Unable to resume guest CPUs after save failure");
  97.                     event = virDomainEventLifecycleNewFromObj(vm,
  98.                                                      VIR_DOMAIN_EVENT_SUSPENDED,
  99.                                                      VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR);
  100.                 }
  101.             }
  102.         }
  103.         if (qemuDomainObjEndAsyncJob(driver, vm) == 0)
  104.             vm = NULL;
  105.     }

  106.  cleanup:
  107.     VIR_FREE(xml);
  108.     if (event)
  109.         qemuDomainEventQueue(driver, event);
  110.     if (vm)
  111.         virObjectUnlock(vm);
  112.     virObjectUnref(caps);
  113.     return ret;
  114. }
至此cold snapshot部分已完成,可回到OpenStack 创建快照(Create Snapshot)过程源码分析 博文继续了解openstack后续的动作


上一篇:使用virtio-balloon驱动监控windows虚机内存
下一篇:【群分享】OpenStack管理VMware的几点实践