WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
import sagemakerimport boto3sess = sagemaker.Session()# sagemaker session bucket -> used for uploading data, models and logs# sagemaker will automatically create this bucket if it not existssagemaker_session_bucket=Noneif sagemaker_session_bucket isNoneand sess isnotNone:# set to default bucket if a bucket name is not given sagemaker_session_bucket = sess.default_bucket()try: role = sagemaker.get_execution_role()exceptValueError: iam = boto3.client('iam') role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)print(f"sagemaker role arn: {role}")print(f"sagemaker bucket: {sess.default_bucket()}")print(f"sagemaker session region: {sess.boto_region_name}")
%%writefile code/inference.pyimport base64import torchfrom io import BytesIOfrom diffusers import StableDiffusionPipelinedef model_fn(model_dir):# Load stable diffusion and move it to the GPU pipe = StableDiffusionPipeline.from_pretrained(model_dir, torch_dtype=torch.float16) pipe = pipe.to("cuda")return pipedef predict_fn(data, pipe):# get prompt & parameters prompt = data.pop("inputs", data)# set valid HP for stable diffusion num_inference_steps = data.pop("num_inference_steps", 50) guidance_scale = data.pop("guidance_scale", 7.5) num_images_per_prompt = data.pop("num_images_per_prompt", 4)# run generation with parameters generated_images = pipe( prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, num_images_per_prompt=num_images_per_prompt, )["images"]# create response encoded_images = []for image in generated_images: buffered = BytesIO() image.save(buffered, format="JPEG") encoded_images.append(base64.b64encode(buffered.getvalue()).decode())# create responsereturn {"generated_images": encoded_images}
Writing code/inference.py
Create SageMaker model.tar.gz artifact
To use the inference.py we need to package it in a model.tzr.gz to be used by a container. We will also use the hugging_face SDK to download the CompVis/stable-diffusion-v1-4 model.
If planning to reproduce this work and use the mstable diffusion model please make sure you read the lisence information here.
from distutils.dir_util import copy_treefrom pathlib import Pathfrom huggingface_hub import snapshot_downloadimport randomHF_MODEL_ID="CompVis/stable-diffusion-v1-4"HF_TOKEN="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"# your hf token: https://huggingface.co/settings/tokensassertlen(HF_TOKEN) >0, "Please set HF_TOKEN to your huggingface token. You can find it here: https://huggingface.co/settings/tokens"# download snapshotsnapshot_dir = snapshot_download(repo_id=HF_MODEL_ID,revision="fp16",use_auth_token=HF_TOKEN)# create model dirmodel_tar = Path(f"model-{random.getrandbits(16)}")model_tar.mkdir(exist_ok=True)# copy snapshot to model dircopy_tree(snapshot_dir, str(model_tar))
from sagemaker.s3 import S3Uploader# upload model.tar.gz to s3s3_model_uri=S3Uploader.upload(local_path="model.tar.gz", desired_s3_uri=f"s3://{sess.default_bucket()}/stable-diffusion-v1-4")print(f"model uploaded to: {s3_model_uri}")
model uploaded to: s3://sagemaker-us-east-1-332886139243/stable-diffusion-v1-4/model.tar.gz
Deploy the model to Amazon SageMaker
In this step we are going to deploy the model in Amazon SageMaker using the HuggingFace estimator to create a real-time inference endpoint.
The endpoint will use a g4dn.xlarge instance.
from sagemaker.huggingface.model import HuggingFaceModel# create Hugging Face Model Classhuggingface_model = HuggingFaceModel( model_data=s3_model_uri, # path to your model and script role=role, # iam role with permissions to create an Endpoint transformers_version="4.17", # transformers version used pytorch_version="1.10", # pytorch version used py_version='py38', # python version used)# deploy the endpoint endpointpredictor = huggingface_model.deploy( initial_instance_count=1, instance_type="ml.g4dn.xlarge" )
----------!
Generate images using the deployed model
After running .deploy() we habe an estimator that we can use to request inferences. The endpoint expects a json with at least inputs key.
The predictor.predict() function returns a json with the generated_images key. The generated_images key contains the 4 generated images as a base64 encoded string. To decode our response we added a small helper function decode_base64_to_image which takes the base64 encoded string and returns a PIL.Image object and display_images, which takes a list of PIL.Image objects and displays them.
from PIL import Imagefrom io import BytesIOfrom IPython.display import displayimport base64import matplotlib.pyplot as plt# helper decoderdef decode_base64_image(image_string): base64_image = base64.b64decode(image_string)buffer= BytesIO(base64_image)return Image.open(buffer)# display PIL images as griddef display_images(images=None,columns=3, width=100, height=100): plt.figure(figsize=(width, height))for i, image inenumerate(images): plt.subplot(int(len(images) / columns +1), columns, i +1) plt.axis('off') plt.imshow(image) plt.show()
Now lets generte some images. I will try to generate 3 images for the prompt “Paradise, green, beauty, birds, rivers, trees, nature, serenity, eternal happiness”
num_images_per_prompt =3prompt ="Paradise, green, beauty, birds, rivers, trees, nature, serenity, eternal happiness"# run predictionresponse = predictor.predict(data={"inputs": prompt,"num_images_per_prompt" : num_images_per_prompt })# decode imagesdecoded_images = [decode_base64_image(image) for image in response["generated_images"]]# visualize generationdisplay_images(decoded_images)