基本步骤就是三个:
1. 停止实例
2. 修改instance type
3. 启动实例
代码不贴了,看我的github : 这里
import boto.ec2
region = ""
aws_access_key_id = ""
aws_secret_access_key = ""
conn = boto.ec2.connect_to_region(region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
instance_list = []
conn.stop_instances(instance_ids=instance_list)
import boto.ec2
region = ""
aws_access_key_id = ""
aws_secret_access_key = ""
conn = boto.ec2.connect_to_region(region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
instance_list = []
for instance in instance_list :
conn.modify_instance_attribute(instance, "disableApiTermination", False)
conn.terminate_instances(instance_ids=instance_list)
这个需求一开始是开发跟我提的,比如说他们部署好一台机器(也许很难部署)之后,不想重复部署其他机器,只想根据部署好的机器去生成一个或多个一模一样的机器。服务扩容的时候可以使用这个功能,比如每次线上服务发布之后对其中一台机器做镜像,等服务需要扩容的时候根据此镜像生成数个相同数据的实例;还有一种扩容方式是先生成数台”裸”实例,然后部署这数个实例的应用,部署之后和线上的环境一模一样。说实话,我更喜欢用第二种方式,因为它更轻,只需要把部署做好。
so,我更倾向如果一个实例非常难部署,才用这个功能(线上服务是不能非常难部署的)。
这里有几个坑,我们的每台实例主机名不一样;主机名要注册到DNS;我们的实例第二块盘是用UUID挂载的,而新生成的盘UUID可能不同,所以要修改/etc/fstab;同时主机名变了,Puppet证书会有问题;应用程序的日志也会在新实例中,最好置空。先列下坑,最后解决:
坑一:主机名设置问题
坑二:DNS设置问题
坑三:硬盘挂载问题
坑四:Puppet证书问题(我们的所有实例上都跑着Puppet)
坑五:应用程序日志问题(暂时不考虑这个问题)
实现这个功能的大概步骤是这样:
根据instance_id 生成ami,拿到ami_id
根据instance_id 拿到它的 subnet_id、key_name、instance_type、sg_id ,这四项会被用在新实例的创建上,已保证新实例和原实例保持一样。
根据ami_id 创建新实例, 并用 user_data 来解决上面提到的四个坑(并保证新实例的Puppet正常运行)
导入需要用到的模块:
import os
import time
import requests
from multiprocessing.dummy import Pool as ThreadPool
import boto.ec2
from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType
from boto.ec2.networkinterface import NetworkInterfaceSpecification, NetworkInterfaceCollection
看看根据instance_id 生成ami的代码。
def create_ami(region, instance_id, name):
conn = boto.ec2.connect_to_region(region,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key)
ami_id= conn.create_image(instance_id, name, no_reboot=True,
block_device_mapping=None)
# print ami_id
ami_ids = [ami_id]
# print ami_ids
time.sleep(3)
ami_object = conn.get_all_images(image_ids=ami_ids)[0]
# print ami_object.__dict__
time_init = 0
time_total = 300
time_interval = 3
while time_init < time_total:
ami_object.update()
print ami_object.state
if ami_object.state == 'available':
return ami_id
else:
time.sleep(time_interval)
time_init += time_interval
return False
再看下拿到原instance的 subnet_id、key_name、instance_type、sg_id 的代码。
def instance_info(region, instance_id):
conn = boto.ec2.connect_to_region(region,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key)
instance_ids = [instance_id]
reservations = conn.get_all_instances(instance_ids=instance_ids)
for res in reservations:
for instance in res.instances:
subnet_id = instance.subnet_id
key_name = instance.key_name
instance_type = instance.instance_type
sg_id = instance.interfaces[0].groups[0].id
_dict = {
'subnet_id': subnet_id,
'key_name': key_name,
'instance_type': instance_type,
'sg_id': sg_id
}
return _dict
创建一台新实例的函数(hostname根据usage向资产系统获取,user_data是从http获取的,然后修改里面的hostname、dns_vip、ns_servers等,用于初始化)。
def create_instance(create_list):
region = create_list["region"]
subnet_id = create_list["subnet_id"]
ami_id = create_list["ami_id"]
key_name = create_list["key_name"]
instance_type = create_list["instance_type"]
sg_id = create_list["sg_id"]
user_data = create_list["user_data"]
usage = create_list["usage"]
ret = libs.hostnames.get(region, usage)
hostname = ret["hostname"].split(".")[0]
user_data = user_data.replace("hostname=","hostname=%s" % hostname)
user_data = user_data.replace("dns_vip=",
"dns_vip=%s" % dns_info["dns_vip"])
user_data = user_data.replace("ns_servers=",
"ns_servers='%s'" % " ".join(dns_info["ns_servers"]) )
network_interface = NetworkInterfaceSpecification(subnet_id=subnet_id,groups=[sg_id])
network_interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection(network_interface)
conn = boto.ec2.connect_to_region(region,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key)
reservation = conn.run_instances(ami_id,
key_name=key_name,
network_interfaces=network_interfaces,
instance_type=instance_type,
min_count=1,
max_count=1,
user_data=user_data
)
instance = reservation.instances[0]
time_init = 0
time_total = 300
time_interval = 5
while time_init < time_total:
status = instance.update()
if status == 'running':
instance.add_tag("Name",hostname)
break
else:
time.sleep(time_interval)
time_init += time_interval
create_list["instance_id"] = str(instance).split(":")[-1]
create_list["placement"] = instance.placement
create_list["status"] = instance.update()
create_list["hostname"] = hostname
return create_list
现在看下总的入口了(用到了multiprocessing来并发创建实例)。
def create_instances(region, instance_id, num, usage):
ret = requests.get(clone_install_script)
user_data = ret.text
_time = time.strftime("%Y%m%d%H%M%S", time.localtime())
name = "{0}-{1}".format(instance_id, _time)
ami_id = create_ami(region, instance_id, name)
if not ami_id:
return False
_instance_info = instance_info(region, instance_id)
create_list = {
"region": region,
"subnet_id": _instance_info["subnet_id"],
"instance_type": _instance_info["instance_type"],
"key_name": _instance_info["key_name"],
"sg_id": _instance_info["sg_id"],
"ami_id": ami_id,
"user_data": user_data,
"usage": usage
}
create_lists = list()
for i in xrange(num):
create_lists.append(create_list)
pool = ThreadPool(100)
create_results = pool.map(create_instances, create_lists)
pool.close()
pool.join()
return create_results
最后再看下user_data的脚本。
#!/bin/bash
hostname=
hostname $hostname
sed -i "s/^HOSTNAME=.*/HOSTNAME=$hostname/g" /etc/sysconfig/network
sed -i "#/home/#d" /etc/fstab
/sbin/blkid |egrep -v "vda" |sort -u -k1 |awk '{print $2" /home/ ext4 nosuid,noatime 1 2"}' >>/etc/fstab
/bin/rm -rf /var/lib/puppet/
dns_vip=
sed -i "/nameserver/s/.*/nameserver ${dns_vip}/g" /etc/resolv.conf
ns_servers=
#################
##增加DNS解析代码##
#################
####################
##配置Puppet代码#####
####################
reboot
代码这东西,参考下啦,Goodbye …