v1

7 months ago · e73d66bb7f
commit e73d66bb7f
6 changed files with 437 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 .venv
 engines
--- a/app.py
+++ b/app.py
@ -0,0 +1,198 @@
 import os
 import sys
 import time
 import torch
 from diffusers import AutoencoderTiny, StableDiffusionPipeline
 from diffusers.utils import load_image
 sys.path.insert(0, os.path.abspath('../StreamDiffusion'))
 from streamdiffusion import StreamDiffusion
 from streamdiffusion.image_utils import postprocess_image
 from utils.viewer import receive_images
 from utils.wrapper import StreamDiffusionWrapper
 from threading import Thread
 from multiprocessing import Process, Queue, get_context
 from perlin import perlin_2d, rand_perlin_2d, rand_perlin_2d_octaves, perlin_2d_octaves
 from scene_prompt import surreal_prompt_parts
 from scene_prompt import surreal_prompts
 from scene_prompt import regret_prompts
 from spout_util import send_spout_image, get_spout_image
 from osc import start_osc_server
 import fire
 def image_generation_process(
    queue: Queue,
    fps_queue: Queue,
    prompt_queue: Queue,
    input_queue: Queue,
    # prompt: str,
    model_id_or_path: str,
 )-> None:
    # stream = StreamDiffusionWrapper(       
    #         model_id_or_path=model_id_or_path,
    #         lora_dict=None,
    #         t_index_list=[0, 16, 32, 45],
    #         frame_buffer_size=1,
    #         width=512,
    #         height=512,
    #         warmup=10,
    #         acceleration="xformers",
    #         mode="txt2img",
    #         use_denoising_batch=False,
    #         cfg_type="none",
    #         seed=2,
    #     )
    stream = StreamDiffusionWrapper(
        model_id_or_path=model_id_or_path,
        t_index_list=[0],
        frame_buffer_size=1,
        warmup=10,
        acceleration="tensorrt",
        use_lcm_lora=False,
        mode="txt2img",
        cfg_type="none",
        use_denoising_batch=True,
    )
    start_prompt = "A glowing, vintage phone booth standing in surreal landscapes across different scene"
    # Prepare the stream
    stream.prepare(
        prompt=start_prompt,
        num_inference_steps=4,
    )
    # Prepare image
    # init_image = load_image("example.png").resize((512, 512))
    # Warmup >= len(t_index_list) x frame_buffer_size
    # for _ in range(stream.batch_size - 1):
    #     stream()
    previous_output = None
    idx=0
    last_time = time.time()
    while True:
        # try:
        start_time = time.time()
        # x_output = stream(image=previous_output)
        # x_output=stream.stream.txt2img_sd_turbo(1).cpu()
        input_image= input_queue.get(block=True)
        # Check if a new prompt is available in the prompt_queue
        if not prompt_queue.empty():
            new_prompt = prompt_queue.get(block=False)
            if new_prompt:
                x_output = stream.img2img(image=input_image, prompt=new_prompt)
                print(f"Received new prompt from queue: {new_prompt}")
        else:
            # Use the current prompt if no new prompt is available
            x_output = stream.img2img(image=input_image)
        preprocessed_image =stream.preprocess_image(x_output)
        queue.put(preprocessed_image, block=False)
        # queue.put(preprocessed_image, block=False)
        # Calculate FPS
        elapsed_time = time.time() - start_time
        fps = 1 / elapsed_time if elapsed_time > 0 else float('inf')
        fps_queue.put(fps)
        # x_output = (x_output + 1) / 2  # Scale from [-1, 1] to [0, 1]
        # x_output = torch.clamp(x_output, 0, 1)
        # previous_output = x_output
        # except KeyboardInterrupt:
        #     print(f"fps: {fps}")
        #     return
 def main()-> None:
    try:
        ctx = get_context('spawn')
        queue = Queue()
        fps_queue = Queue()
        # noise_queue = Queue()
        spout_in_queue = Queue()
        # prompt = "A surreal landscapes"
        # prompt=regret_prompts[0]
        prompt_queue = Queue()
        # model_id_or_path = "KBlueLeaf/kohaku-v2.1"
        model_id_or_path = "stabilityai/sd-turbo"
        # start_osc_server(prompt_queue)
        process_osc = ctx.Process(
            target=start_osc_server,
            args=(prompt_queue,)
        )
        process_osc.start()
        print("Starting spout input process")
        process_spout_in = ctx.Process(
            target=get_spout_image,
            args=(spout_in_queue, 512, 512),
        )
        process_spout_in.start()
        print("Starting image generation process")
        process_gen= ctx.Process(
            target=image_generation_process,
            args=(queue, fps_queue, prompt_queue, spout_in_queue, model_id_or_path),
        )
        process_gen.start()
        # process_show=ctx.Process(target=receive_images, args=(queue, fps_queue))
        # process_show.start()
        print("Starting spout output process")
        process_spout_out=ctx.Process(target=send_spout_image, args=(queue, 512, 512))
        process_spout_out.start()
        process_gen.join()
        process_spout_in.join()
        process_spout_out.join()
        process_osc.join()
    except KeyboardInterrupt:
        print("Process interrupted")
        process_gen.terminate()
        process_spout_in.terminate()
        process_spout_out.terminate()
        process_osc.terminate()
        return
 if __name__ == "__main__":
    fire.Fire(main)
--- a/osc.py
+++ b/osc.py
@ -0,0 +1,23 @@
 import argparse
 import math
 from pythonosc.dispatcher import Dispatcher
 from pythonosc import osc_server
 OSC_PORT = 8787
 def start_osc_server(queue):
    def onReceivePrompt(address, *args):
        prompt = " ".join(args)
        print(f"Received prompt: {prompt}")
        queue.put(prompt)
    dispatcher = Dispatcher()
    dispatcher.map("/prompt", onReceivePrompt)
    server = osc_server.ThreadingOSCUDPServer(("localhost", OSC_PORT), dispatcher)
    print(f"OSC server is running on port {OSC_PORT}")
    server.serve_forever()
--- a/perlin.py
+++ b/perlin.py
@ -0,0 +1,69 @@
 import torch
 import math
 def rand_perlin_2d(shape, res, fade = lambda t: 6*t**5 - 15*t**4 + 10*t**3):
    delta = (res[0] / shape[0], res[1] / shape[1])
    d = (shape[0] // res[0], shape[1] // res[1])
    grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1])), dim = -1) % 1
    angles = 2*math.pi*torch.rand(res[0]+1, res[1]+1)
    gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim = -1)
    tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0], 0).repeat_interleave(d[1], 1)
    dot = lambda grad, shift: (torch.stack((grid[:shape[0],:shape[1],0] + shift[0], grid[:shape[0],:shape[1], 1] + shift[1]  ), dim = -1) * grad[:shape[0], :shape[1]]).sum(dim = -1)
    n00 = dot(tile_grads([0, -1], [0, -1]), [0,  0])
    n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
    n01 = dot(tile_grads([0, -1],[1, None]), [0, -1])
    n11 = dot(tile_grads([1, None], [1, None]), [-1,-1])
    t = fade(grid[:shape[0], :shape[1]])
    return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1])
 def rand_perlin_2d_octaves(shape, res, octaves=1, persistence=0.5):
    noise = torch.zeros(shape)
    frequency = 1
    amplitude = 1
    for _ in range(octaves):
        noise += amplitude * rand_perlin_2d(shape, (frequency*res[0], frequency*res[1]))
        frequency *= 2
        amplitude *= persistence
    return noise
 def perlin_2d(shape, res, seed, fade=lambda t: 6*t**5 - 15*t**4 + 10*t**3):
    delta = (res[0] / shape[0], res[1] / shape[1])
    d = (shape[0] // res[0], shape[1] // res[1])
    grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1])), dim=-1) % 1
    base_seed = int(seed)
    frac_seed = seed - base_seed
    torch.manual_seed(base_seed)
    angles_base = 2 * math.pi * torch.rand(res[0] + 1, res[1] + 1)
    gradients_base = torch.stack((torch.cos(angles_base), torch.sin(angles_base)), dim=-1)
    torch.manual_seed(base_seed + 1)
    angles_next = 2 * math.pi * torch.rand(res[0] + 1, res[1] + 1)
    gradients_next = torch.stack((torch.cos(angles_next), torch.sin(angles_next)), dim=-1)
    gradients = (1 - frac_seed) * gradients_base + frac_seed * gradients_next
    tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0], 0).repeat_interleave(d[1], 1)
    dot = lambda grad, shift: (torch.stack((grid[:shape[0], :shape[1], 0] + shift[0], grid[:shape[0], :shape[1], 1] + shift[1]), dim=-1) * grad[:shape[0], :shape[1]]).sum(dim=-1)
    n00 = dot(tile_grads([0, -1], [0, -1]), [0,  0])
    n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
    n01 = dot(tile_grads([0, -1], [1, None]), [0, -1])
    n11 = dot(tile_grads([1, None], [1, None]), [-1, -1])
    t = fade(grid[:shape[0], :shape[1]])
    return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1])
 def perlin_2d_octaves(shape, res, seed, octaves=1, persistence=0.5, fade=lambda t: 6*t**5 - 15*t**4 + 10*t**3):
    noise = torch.zeros(shape)
    frequency = 1
    amplitude = 1
    for i in range(octaves):
        noise += amplitude * perlin_2d(shape, (frequency * res[0], frequency * res[1]), seed + i, fade)
        frequency *= 2
        amplitude *= persistence
    return noise
--- a/scene_prompt.py
+++ b/scene_prompt.py
@ -0,0 +1,42 @@
 surreal_prompts = [
    "a surreal landscape of floating islands under a glowing sky",
    "an ethereal valley where waterfalls rise into the clouds",
    "a dreamlike desert with mirrored sand and hovering stones",
    "an endless ocean reflecting fractured moons and stars",
    "a neon-lit canyon with levitating ruins and glowing mist",
    "a twilight forest where the trees grow upside-down",
    "a luminous terrain with bioluminescent plants and crystal arches",
    "a gravity-defying mountain range spiraling into the void",
    "a shattered realm of glass bridges and hovering towers",
    "an alien world lit by pulsating constellations and fluid geometry"
 ]
 surreal_prompt_parts = [
    "a surreal landscape",
    "with floating islands",
    "glowing waterfalls",
    "neon-colored skies",
    "mirror-like desert ground",
    "levitating rocks",
    "upside-down trees",
    "ancient ruins suspended in air",
    "bioluminescent flora",
    "shattered moons overhead",
    "a path of glass tiles",
    "crystal towers emitting soft hums",
    "gravity-defying rivers",
    "alien constellations glowing brightly"
 ]
 regret_prompts = [
    "a lone figure standing in a vast, empty desert at dusk",
    "fractured mirrors scattered across the sand, reflecting different memories",
    "a withered tree growing upside down from the sky, its roots dripping ink",
    "floating clocks melting into the horizon, ticking backwards",
    "ghostly silhouettes walking in reverse, retracing forgotten steps",
    "a house half-submerged in water, its windows glowing faintly with past laughter",
    "the sky opens into a tunnel of old photographs slowly burning at the edges",
    "giant stone hands reaching out from the earth, trying to grasp something lost",
    "an ocean made of letters never sent, waves crashing with whispered apologies",
    "a child version of the figure stands alone, staring at the adult with distant eyes"
 ]
--- a/spout_util.py
+++ b/spout_util.py
@ -0,0 +1,103 @@
 import torch
 import SpoutGL
 from itertools import islice, cycle, repeat
 import array
 from random import randint
 import time
 from OpenGL import GL
 from multiprocessing import Queue
 import numpy as np
 TARGET_FPS = 30
 SEND_WIDTH = 512
 SEND_HEIGHT = 512
 def spout_buffer_to_tensor(buffer, width, height):
    np_buffer = np.asarray(buffer, dtype=np.uint8)
    image_bgra = np_buffer.reshape((height, width, 4))
    image_rgb = image_bgra[..., [2, 1, 0]]
    image_float = image_rgb.astype(np.float32) / 255.0
    # image_normalized = (image_float * 2.0) - 1.0
    tensor = torch.from_numpy(image_float).permute(2, 0, 1)
    return tensor.unsqueeze(0)
 def get_spout_image(queue, wwidth: int, wheight: int) -> None:
    with SpoutGL.SpoutReceiver() as receiver:
        receiver.setReceiverName("Spout DX11 Sender")
        buffer = None
        while True:
            result = receiver.receiveImage(buffer, GL.GL_RGBA, False, 0)
            # print("Receive result", result)
            if receiver.isUpdated():
                width = receiver.getSenderWidth()
                height = receiver.getSenderHeight()
                buffer = array.array('B', [0] * (width * height * 4))  # Correctly reallocate buffer with updated size
                print("Spout Receiver updated, Buffer size", width, height)
            if buffer and result and not SpoutGL.helpers.isBufferEmpty(buffer):
                pixels=spout_buffer_to_tensor(buffer, width, height)
                # print("get_spout_image", pixels.shape)
                queue.put(pixels, block=False)
            # Wait until the next frame is ready
            # Wait time is in milliseconds; note that 0 will return immediately
            # receiver.waitFrameSync("SpoutSender", 10000)
 def randcolor():
    return randint(0, 255)
 def tensor_to_spout_image(tensor):
    image = tensor.squeeze(0)
    image = image.permute(1, 2, 0)
    image_np = image.cpu().numpy()
    if image_np.min() < 0:
        image_np = (image_np + 1) / 2  # Scale from [-1, 1] to [0, 1]
    image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8)
    h, w, _ = image_np.shape
    alpha = np.full((h, w, 1), 255, dtype=np.uint8)
    image_rgba = np.concatenate((image_np, alpha), axis=-1)
    image_bgra = image_rgba[..., [2, 1, 0, 3]]
    return np.ascontiguousarray(image_bgra)  # Ensure the array is contiguous in memory
 def send_spout_image(queue: Queue, width: int, height: int)->None:
    with SpoutGL.SpoutSender() as sender:
        sender.setSenderName("StreamDiffusion")
        while True:
            # Check if there are images in the queue
            if not queue.empty():
                image = queue.get(block=False)
                pixels = tensor_to_spout_image(image)
                result = sender.sendImage(pixels, width, height, GL.GL_RGBA, False, 0)
                # print("Send result", result)
                # Indicate that a frame is ready to read
                sender.setFrameSync("StreamDiffusion")
                # Wait for next send attempt
                # time.sleep(1./TARGET_FPS)