读了这位作者写的《OpenStack 创建快照(Create Snapshot)过程源码分析 》
http://blog.chinaunix.net/uid-20940095-id-4752643.html
文中介绍到目前Openstack在做snapshot时使用cold snapshot的方式,具体为什么不用live snapshot可能与我之前的想到的原因有联系:
http://blog.chinaunix.net/uid-29718549-id-4346700.html 《openstack.live_snapshot的实现方法存在竞态 》
今天就openstack调用的libvirt接口实现cold snapshot的内部展开介绍一下
2.流程分析
Nova调用libvirt库接口managedSave()对虚机实施cold snapshot,注意:这里传入的flags=0
nova/virt/libvirt/driver.py
点击(此处)折叠或打开
-
# NOTE(dkang): managedSave does not work for LXC
-
if CONF.libvirt.virt_type != 'lxc' and not live_snapshot:
-
if state == power_state.RUNNING or state == power_state.PAUSED:
-
self._detach_pci_devices(virt_dom,
-
pci_manager.get_instance_pci_devs(instance))
-
self._detach_sriov_ports(instance, virt_dom)
- virt_dom.managedSave(0)
libvirt库接口部分,注释中写的很详细:
l 这个接口会挂起虚机,接口完成后虚机默认不会自动恢复运行
l 与virDomainSave()接口不同的是,managedSave()接口会保存libvirt所维护的虚机数据(后边我们会看到它具体是怎样保存的),而且必须调用virDomainCreate()接口恢复虚机运行态
l 三个flags位的具体效果(因为上一步骤中传入的flags=0,所以我们这里并未使用任何一个flags功能)
n VIR_DOMAIN_SAVE_BYPASS_CACHE——后续存储文件时忽略文件系统cache
n VIR_DOMAIN_SAVE_RUNNING——恢复后自动启动虚机
n VIR_DOMAIN_SAVE_PAUSED——恢复后自动暂停虚机(与上一个flag互斥)
/usr/lib64/python2.6/site-packages/libvirt.py
点击(此处)折叠或打开
-
def managedSave(self, flags):
-
"""This method will suspend a domain and save its memory contents to
-
a file on disk. After the call, if successful, the domain is not
-
listed as running anymore.
-
The difference from virDomainSave() is that libvirt is keeping track of
-
the saved state itself, and will reuse it once the domain is being
-
restarted (automatically or via an explicit libvirt call).
-
As a result any running domain is sure to not have a managed saved image.
-
This also implies that managed save only works on persistent domains,
-
since the domain must still exist in order to use virDomainCreate() to
-
restart it.
-
-
If @flags includes VIR_DOMAIN_SAVE_BYPASS_CACHE, then libvirt will
-
attempt to bypass the file system cache while creating the file, or
-
fail if it cannot do so for the given system; this can allow less
-
pressure on file system cache, but also risks slowing saves to NFS.
-
-
Normally, the managed saved state will remember whether the domain
-
was running or paused, and start will resume to the same state.
-
Specifying VIR_DOMAIN_SAVE_RUNNING or VIR_DOMAIN_SAVE_PAUSED in
-
@flags will override the default saved into the file. These two
-
flags are mutually exclusive. """
-
ret = libvirtmod.virDomainManagedSave(self._o, flags)
-
if ret == -1: raise libvirtError ('virDomainManagedSave() failed', dom=self)
-
return ret
-
-
-
libvirt/include/libvirt/libvirt.h
-
-
typedef enum {
-
VIR_DOMAIN_SAVE_BYPASS_CACHE = 1 << 0, /* Avoid file system cache pollution */
-
VIR_DOMAIN_SAVE_RUNNING = 1 << 1, /* Favor running over paused */
-
VIR_DOMAIN_SAVE_PAUSED = 1 << 2, /* Favor paused over running */
- } virDomainSaveRestoreFlags;
省去一些抽象接口流程,来到qemu-driver的实现部分
libvirt/src/qemu/qemu-driver.c
点击(此处)折叠或打开
-
/* The vm must be active + locked. Vm will be unlocked and
-
* potentially free'd after this returns (eg transient VMs are freed
-
* shutdown). So 'vm' must not be referenced by the caller after
-
* this returns (whether returning success or failure).
-
*/
-
static int
-
qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom,
-
virDomainObjPtr vm, const char *path,
-
int compressed, const char *xmlin, unsigned int flags)
-
{
-
char *xml = NULL;
-
bool was_running = false;
-
int ret = -1;
-
int rc;
-
virObjectEventPtr event = NULL;
-
qemuDomainObjPrivatePtr priv = vm->privateData;
-
virCapsPtr caps;
-
-
if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
-
goto cleanup;
-
-
if (!qemuMigrationIsAllowed(driver, vm, vm->def, false, false))
-
goto cleanup;
-
-
/*处理vm遗留的Job,比如migrate, backup等*/
-
if (qemuDomainObjBeginAsyncJob(driver, vm, QEMU_ASYNC_JOB_SAVE) < 0)
-
goto cleanup;
-
-
memset(&priv->job.info, 0, sizeof(priv->job.info));
-
priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED;
-
-
/*使用qemu hmp stop接口暂停虚机,并且记录虚机之前的运行状态,供恢复时使用*/
-
if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
-
was_running = true;
-
if (qemuProcessStopCPUs(driver, vm, VIR_DOMAIN_PAUSED_SAVE,
-
QEMU_ASYNC_JOB_SAVE) < 0)
-
goto endjob;
-
-
if (!virDomainObjIsActive(vm)) {
-
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
-
_("guest unexpectedly quit"));
-
goto endjob;
-
}
-
}
-
-
/* libvirt.c already guaranteed these two flags are exclusive. */
-
if (flags & VIR_DOMAIN_SAVE_RUNNING)
-
was_running = true;
-
else if (flags & VIR_DOMAIN_SAVE_PAUSED)
-
was_running = false;
-
-
/* Get XML for the domain. Restore needs only the inactive xml,
-
* including secure. We should get the same result whether xmlin
-
* is NULL or whether it was the live xml of the domain moments
-
* before. */
-
/*获得xml,也就是之前提到的libvirt维护的虚机数据*/
-
if (xmlin) {
-
virDomainDefPtr def = NULL;
-
-
if (!(def = virDomainDefParseString(xmlin, caps, driver->xmlopt,
-
QEMU_EXPECTED_VIRT_TYPES,
-
VIR_DOMAIN_XML_INACTIVE))) {
-
goto endjob;
-
}
-
if (!qemuDomainDefCheckABIStability(driver, vm->def, def)) {
-
virDomainDefFree(def);
-
goto endjob;
-
}
-
xml = qemuDomainDefFormatLive(driver, def, true, true);
-
} else {
-
xml = qemuDomainDefFormatLive(driver, vm->def, true, true);
-
}
-
if (!xml) {
-
virReportError(VIR_ERR_OPERATION_FAILED,
-
"%s", _("failed to get domain xml"));
-
goto endjob;
-
}
-
/* managedSave()的核心流程,首先会打开xxx.save的保存文件,然后使用qemuDomainSaveHeader ()将之前获得的xml配置信息写入保存文件头部,最后使用qemuMigrationToFile ()将虚机内存与设备状态信息写入保存文件;
- 为什么叫migrate呢,因为qemu中迁移的方法就是将虚机的内存和状态信息migrate到另一节点去,而这里的用法是migrate到本地的文件*/
-
ret = qemuDomainSaveMemory(driver, vm, path, xml, compressed,
-
was_running, flags, QEMU_ASYNC_JOB_SAVE);
-
if (ret < 0)
-
goto endjob;
-
-
/*保存文件写完后,调用关闭虚机流程,释放所有的资源(注意:qemuDomainSaveMemory()只是暂停虚机,将虚机的内存和状态信息冻结,待全部导出为文件后,这里再关闭虚机),并发送对应的事件信息,顺便说下,Openstack从havana版本后会监听此类关于虚机生命周期事件,也就是说虚机在openstack管控范围外发生了状态变化,比如用户自己关闭了虚机,openstack仍能通过监听的事件获得虚机状态更新*/
-
/* Shut it down */
-
qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SAVED, 0);
-
virDomainAuditStop(vm, "saved");
-
event = virDomainEventLifecycleNewFromObj(vm,
-
VIR_DOMAIN_EVENT_STOPPED,
-
VIR_DOMAIN_EVENT_STOPPED_SAVED);
-
if (!vm->persistent) {
-
if (qemuDomainObjEndAsyncJob(driver, vm) > 0)
-
qemuDomainRemoveInactive(driver, vm);
-
vm = NULL;
-
}
-
-
/*结束工作,可以看到刚才提到的两个flags是如何生效*/
-
endjob:
-
if (vm) {
-
if (ret != 0) {
-
if (was_running && virDomainObjIsActive(vm)) {
-
rc = qemuProcessStartCPUs(driver, vm, dom->conn,
-
VIR_DOMAIN_RUNNING_SAVE_CANCELED,
-
QEMU_ASYNC_JOB_SAVE);
-
if (rc < 0) {
-
VIR_WARN("Unable to resume guest CPUs after save failure");
-
event = virDomainEventLifecycleNewFromObj(vm,
-
VIR_DOMAIN_EVENT_SUSPENDED,
-
VIR_DOMAIN_EVENT_SUSPENDED_API_ERROR);
-
}
-
}
-
}
-
if (qemuDomainObjEndAsyncJob(driver, vm) == 0)
-
vm = NULL;
-
}
-
-
cleanup:
-
VIR_FREE(xml);
-
if (event)
-
qemuDomainEventQueue(driver, event);
-
if (vm)
-
virObjectUnlock(vm);
-
virObjectUnref(caps);
-
return ret;
- }