add new code

7 months ago · 028f10f0ec
parent f1d8b043ea
commit 028f10f0ec
6 changed files with 297 additions and 41 deletions
--- a/app.py
+++ b/app.py
@ -60,16 +60,17 @@ def image_generation_process(
        warmup=10,
        acceleration="tensorrt",
        use_lcm_lora=False,
-        mode="txt2img",
+        mode="img2img",
        cfg_type="none",
        use_denoising_batch=True,
        output_type="pil",
    )
    start_prompt = "A glowing, vintage phone booth standing in surreal landscapes across different scene"
    # Prepare the stream
    stream.prepare(
        prompt=start_prompt,
-        num_inference_steps=4,
+        num_inference_steps=50,
    )
    # Prepare image
@ -92,6 +93,7 @@ def image_generation_process(
        input_image= input_queue.get(block=True)
        # input_image = stream.preprocess_image('input.png')
        # Check if a new prompt is available in the prompt_queue
@ -106,9 +108,9 @@ def image_generation_process(
-        preprocessed_image =stream.preprocess_image(x_output)
+        # preprocessed_image =stream.postprocess_image(x_output)
-        queue.put(preprocessed_image, block=False)
+        queue.put(x_output, block=False)
        # queue.put(preprocessed_image, block=False)
@ -171,13 +173,13 @@ def main()-> None:
        # process_show=ctx.Process(target=receive_images, args=(queue, fps_queue))
        # process_show.start()
-        print("Starting spout output process")
+        # print("Starting spout output process")
        process_spout_out=ctx.Process(target=send_spout_image, args=(queue, 512, 512))
        process_spout_out.start()
        process_gen.join()
-        process_spout_in.join()
+        # process_spout_in.join()
        process_spout_out.join()
        process_osc.join()
@ -186,7 +188,7 @@ def main()-> None:
        print("Process interrupted")
        process_gen.terminate()
-        process_spout_in.terminate()
+        # process_spout_in.terminate()
        process_spout_out.terminate()
        process_osc.terminate()
--- a/img2img.py
+++ b/img2img.py
@ -0,0 +1,112 @@
 import sys
 import os
 sys.path.append(
    os.path.join(
        os.path.dirname(__file__),
        "..",
        "..",
    )
 )
 from utils.wrapper import StreamDiffusionWrapper
 import torch
 # from config import Args
 from pydantic import BaseModel, Field
 from PIL import Image
 import math
 # base_model = "stabilityai/sd-turbo"
 # taesd_model = "madebyollin/taesd"
 base_model = "./models/sd-turbo"
 taesd_model = "./models/taesd"
 default_prompt = "Portrait of The Joker halloween costume, face painting, with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
 default_negative_prompt = "black and white, blurry, low resolution, pixelated,  pixel art, low quality, low fidelity"
 page_content = """<h1 class="text-3xl font-bold">StreamDiffusion</h1>
 <h3 class="text-xl font-bold">Image-to-Image SD-Turbo</h3>
 <p class="text-sm">
    This demo showcases
    <a
    href="https://github.com/cumulo-autumn/StreamDiffusion"
    target="_blank"
    class="text-blue-500 underline hover:no-underline">StreamDiffusion
 </a>
 Image to Image pipeline using
    <a
    href="https://huggingface.co/stabilityai/sd-turbo"
    target="_blank"
    class="text-blue-500 underline hover:no-underline">SD-Turbo</a
    > with a MJPEG stream server.
 </p>
 """
 class Pipeline:
    class Info(BaseModel):
        name: str = "StreamDiffusion img2img"
        input_mode: str = "image"
        page_content: str = page_content
    class InputParams(BaseModel):
        prompt: str = Field(
            default_prompt,
            title="Prompt",
            field="textarea",
            id="prompt",
        )
        # negative_prompt: str = Field(
        #     default_negative_prompt,
        #     title="Negative Prompt",
        #     field="textarea",
        #     id="negative_prompt",
        # )
        width: int = Field(
            512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
        )
        height: int = Field(
            512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
        )
    def __init__(self, device: torch.device, torch_dtype: torch.dtype):
        params = self.InputParams()
        self.stream = StreamDiffusionWrapper(
            model_id_or_path=base_model,
            use_tiny_vae=True,
            device=device,
            dtype=torch_dtype,
            t_index_list=[35, 45],
            frame_buffer_size=1,
            width=params.width,
            height=params.height,
            use_lcm_lora=False,
            output_type="pil",
            warmup=10,
            vae_id=taesd_model,
            acceleration="xformers",
            mode="img2img",
            use_denoising_batch=True,
            cfg_type="none",
            # use_safety_checker=args.safety_checker,
            enable_similar_image_filter=True,
            similar_image_filter_threshold=0.98,
            # engine_dir=args.engine_dir,
        )
        self.last_prompt = default_prompt
        self.stream.prepare(
            prompt=default_prompt,
            negative_prompt=default_negative_prompt,
            num_inference_steps=50,
            guidance_scale=1.2,
        )
    def predict(self, image: Image.Image, params: "Pipeline.InputParams") -> Image.Image:
        image_tensor = self.stream.preprocess_image(image)
        # output_image = self.stream(image=image_tensor, prompt=params.prompt)
        output_image = self.stream(image=image_tensor, prompt=params.prompt)
        return output_image
--- a/input.png
+++ b/input.png
--- a/main.py
+++ b/main.py
@ -0,0 +1,120 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
 import datetime
 import torch
 from PIL import Image
 import numpy as np
 import SpoutGL
 from OpenGL.GL import GL_RGBA
 import time
 import img2img
 def main():
    TARGET_FPS = 60
    SPOUT_RECEIVER_NAME = "Spout DX11 Sender"
    SPOUT_SENDER_NAME = "Output - StreamDiffusion"
    WIDTH = 512
    HEIGHT = 512
    PROMPT = "a beautiful landscape painting, trending on artstation, 8k, hyperrealistic"
    timestamp = datetime.datetime.now()
    fps = 30.0
    print("Initializing StreamDiffusion pipeline...")
    global pipeline
    try:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        torch_dtype = torch.float16
        pipeline = img2img.Pipeline(device, torch_dtype)
        app = FastAPI()
        @app.get("/health")
        def read_root():
            return {"status": "ok"}
        class PromptUpdate(BaseModel):
            prompt: str
        @app.post("/api/update/prompt")
        async def update_prompt(update: PromptUpdate):
            global PROMPT
            PROMPT = update.prompt
            print(f"Prompt updated to: {PROMPT}")
            return {"message": "Prompt updated successfully", "new_prompt": PROMPT}
        print("Pipeline initialized.")
    except Exception as e:
        print(f"Error initializing StreamDiffusion pipeline: {e}")
        return
    print(f"Initializing Spout receiver for '{SPOUT_RECEIVER_NAME}'...")
    spout_receiver = SpoutGL.SpoutReceiver()
    spout_receiver.setReceiverName(SPOUT_RECEIVER_NAME)
    print(f"Initializing Spout sender as '{SPOUT_SENDER_NAME}'...")
    spout_sender = SpoutGL.SpoutSender()
    spout_sender.setSenderName(SPOUT_SENDER_NAME)
    image_bgra = np.zeros((HEIGHT, WIDTH, 4), dtype=np.uint8)
    import uvicorn
    import threading
    config = uvicorn.Config(app, host="0.0.0.0", port=34800, log_level="info")
    server = uvicorn.Server(config)
    threading.Thread(target=server.run, daemon=True).start()
    print("FastAPI server started at http://0.0.0.0:34800")
    try:
        print("Starting main loop. Press Ctrl+C to exit.")
        while True:
            received = spout_receiver.receiveImage(image_bgra, GL_RGBA, False, 0)
            # print(f"Received: {received}, Connected: {spout_receiver.isConnected()}, Updated: {spout_receiver.isUpdated()}, Empty: {SpoutGL.helpers.isBufferEmpty(image_bgra)}")
            if received:
                if spout_receiver.isUpdated(): 
                    continue
                if spout_receiver.isConnected() and SpoutGL.helpers.isBufferEmpty(image_bgra):
                    continue
                image_rgb_array = image_bgra[:, :, [2,1,0]]
                input_image = Image.fromarray(image_rgb_array, 'RGB')
                # input_image.save("debug_input.png")
                params = img2img.Pipeline.InputParams(prompt=PROMPT)
                output_image = pipeline.predict(image=input_image, params=params)                
                # output_image.save("debug_output.png")
                # output_rgba_array = np.array(output_image.convert("RGBA"))
                # output_bgra_array = output_rgba_array[:, :, [2, 1, 0, 3]]
                # buffer = np.ascontiguousarray(output_bgra_array)
                output_bgr_array = np.array(output_image, dtype=np.uint8)[:, :, ::-1]
                output_bgra_array = np.zeros((HEIGHT, WIDTH, 4), dtype=np.uint8)
                output_bgra_array[:, :, :3] = output_bgr_array
                output_bgra_array[:, :, 3] = 255
                buffer = output_bgra_array
                spout_sender.sendImage(buffer, WIDTH, HEIGHT, GL_RGBA, False, 0)
                # timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
                dt = (datetime.datetime.now() - timestamp).total_seconds()
                t = 0.05
                fps = fps * t + 1 / dt * (1 - t)
                timestamp = datetime.datetime.now()
                print("\033[92m[ STREAM DIFFUSION ]\033[0m " + f"Frame processed and sent to Spout: {fps:2f}", end="\r", flush=True)
            else:
                time.sleep(1. / TARGET_FPS)
    except KeyboardInterrupt:
        print("\nExiting...")
    finally:
        print("Releasing Spout resources.")
        spout_receiver.releaseReceiver()
        spout_sender.releaseSender()
 if __name__ == "__main__":
    main()
--- a/requirements.txt
+++ b/requirements.txt
@ -45,8 +45,8 @@ streamdiffusion @ git+https://github.com/cumulo-autumn/StreamDiffusion.git@b6232
 sympy==1.13.3
 termcolor==3.1.0
 tokenizers==0.15.2
-torch==2.1.0+cu121
+torch==2.1.0
-torchvision==0.16.0+cu121
+torchvision==0.16.0
 tqdm==4.67.1
 transformers==4.35.2
 twython==3.9.1
--- a/spout_util.py
+++ b/spout_util.py
@ -9,19 +9,20 @@ from OpenGL import GL
 from multiprocessing import Queue
 import numpy as np
-
+from PIL import Image
 TARGET_FPS = 30
 SEND_WIDTH = 512
 SEND_HEIGHT = 512
-
+alpha_cache = np.full((512, 512, 1), 255, dtype=np.uint8)
 def spout_buffer_to_tensor(buffer, width, height):
-    np_buffer = np.asarray(buffer, dtype=np.uint8)
+    # np_buffer = np.asarray(buffer, dtype=np.uint8)
    np_buffer=np.frombuffer(buffer, dtype=np.uint8)
    image_bgra = np_buffer.reshape((height, width, 4))
    image_rgb = image_bgra[..., [2, 1, 0]]
@ -29,30 +30,42 @@ def spout_buffer_to_tensor(buffer, width, height):
    # image_normalized = (image_float * 2.0) - 1.0    
    tensor = torch.from_numpy(image_float).permute(2, 0, 1)
    del np_buffer  # Free memory
    del image_bgra  # Free memory
    del image_rgb  # Free memory
    del image_float  # Free memory
    return tensor.unsqueeze(0)
 def get_spout_image(queue, wwidth: int, wheight: int) -> None:
    with SpoutGL.SpoutReceiver() as receiver:
        receiver.setReceiverName("Spout DX11 Sender")
-
+        image_bgra = np.zeros((SEND_HEIGHT, SEND_WIDTH, 4), dtype=np.uint8)
        buffer = None
        while True:
-            result = receiver.receiveImage(buffer, GL.GL_RGBA, False, 0)
+            result = receiver.receiveImage(image_bgra, GL.GL_RGBA, False, 0)
            # print("Receive result", result)
            if receiver.isUpdated():
-                width = receiver.getSenderWidth()
+                continue
-                height = receiver.getSenderHeight()
+                # width = receiver.getSenderWidth()
-                buffer = array.array('B', [0] * (width * height * 4))  # Correctly reallocate buffer with updated size
+                # height = receiver.getSenderHeight()
-                print("Spout Receiver updated, Buffer size", width, height)
+                # image_bgra = array.array('B', [0] * (width * height * 4))  # Correctly reallocate buffer with updated size
-
+                # print("Spout Receiver updated, Buffer size", width, height)
-            if buffer and result and not SpoutGL.helpers.isBufferEmpty(buffer):
+
-                pixels=spout_buffer_to_tensor(buffer, width, height)
+            # if buffer and result and not SpoutGL.helpers.isBufferEmpty(buffer):
            if SpoutGL.helpers.isBufferEmpty(image_bgra):
                continue
                # pixels=spout_buffer_to_tensor(buffer, width, height)
                # print("get_spout_image", pixels.shape)
            image_rgb_array= image_bgra[:, :, [2, 1, 0]]
            pixels=Image.fromarray(image_rgb_array, 'RGB')
            queue.put(pixels, block=False)
            # Wait until the next frame is ready
            # Wait time is in milliseconds; note that 0 will return immediately
            # receiver.waitFrameSync("SpoutSender", 10000)
@ -66,19 +79,21 @@ def randcolor():
 def tensor_to_spout_image(tensor):
    image = tensor.squeeze(0)
-    image = image.permute(1, 2, 0)
+    if image.device.type != "cpu":
-    image_np = image.cpu().numpy()
+        image = image.cpu()
-
+    image = image.permute(1, 2, 0).numpy()
    if image_np.min() < 0:
        image_np = (image_np + 1) / 2  # Scale from [-1, 1] to [0, 1]
    image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8)
-    h, w, _ = image_np.shape
+    if image.min() < 0:
-    alpha = np.full((h, w, 1), 255, dtype=np.uint8)
+        image = (image + 1) / 2  # Scale from [-1, 1] to [0, 1]
-    image_rgba = np.concatenate((image_np, alpha), axis=-1)
+    image = np.clip(image * 255, 0, 255).astype(np.uint8)
    # h, w, _ = image_np.shape
    # alpha = np.full((h, w, 1), 255, dtype=np.uint8)
    image_rgba = np.concatenate((image, alpha_cache), axis=-1)
    image_bgra = image_rgba[..., [2, 1, 0, 3]]
    del image  # Free memory
    return np.ascontiguousarray(image_bgra)  # Ensure the array is contiguous in memory
 def send_spout_image(queue: Queue, width: int, height: int)->None:
@ -90,10 +105,17 @@ def send_spout_image(queue: Queue, width: int, height: int)->None:
            # Check if there are images in the queue
            if not queue.empty():
-                image = queue.get(block=False)
+                output_image = queue.get(block=False)
-                pixels = tensor_to_spout_image(image)
+                # pixels = tensor_to_spout_image(image)
                output_bgr_array = np.array(output_image, dtype=np.uint8)[:, :, ::-1]
                output_bgra_array = np.zeros((SEND_HEIGHT, SEND_WIDTH, 4), dtype=np.uint8)
                output_bgra_array[:, :, :3] = output_bgr_array
                output_bgra_array[:, :, 3] = 255
                buffer = output_bgra_array
-                result = sender.sendImage(pixels, width, height, GL.GL_RGBA, False, 0)
+                result = sender.sendImage(buffer, width, height, GL.GL_RGBA, False, 0)
                # print("Send result", result)
                # Indicate that a frame is ready to read