Image animation

kitzoro · July 14, 2024, 11:46am

import cv2
import numpy as np
import pyaudio
import wave
import os
import time

# Constants
AUDIO_FILE = "/home/kit/sfml_project/sound_test/Start-1.wav"
IMAGE_PATH_CLOSED = "images/Start4.png"
IMAGE_PATH_OPEN = "images/Start5.png"
FRAME_DIR = "frames/"
SILENCE_THRESHOLD = 0.01
FRAME_RATE = 60  # 60 frames per second
FRAME_DELAY = 1.0 / FRAME_RATE

# Ensure frame directory exists
os.makedirs(FRAME_DIR, exist_ok=True)

# Load images
imageClosed = cv2.imread(IMAGE_PATH_CLOSED)
imageOpen = cv2.imread(IMAGE_PATH_OPEN)

if imageClosed is None or imageOpen is None:
    print("Error loading images.")
    exit(-1)

# Initialize PyAudio
p = pyaudio.PyAudio()

# Open the audio file
wf = wave.open(AUDIO_FILE, 'rb')

# Calculate the total duration of the audio file in seconds
audio_duration = wf.getnframes() / wf.getframerate()
total_frames_needed = int(audio_duration * FRAME_RATE)

# Global variable for current frame
currentFrame = imageClosed

# Define callback for audio stream
def audio_callback(in_data, frame_count, time_info, status):
    data = wf.readframes(frame_count)
    rms = np.sqrt(np.mean(np.square(np.frombuffer(data, dtype=np.int16))))

    global currentFrame
    if rms > SILENCE_THRESHOLD:
        currentFrame = imageOpen
    else:
        currentFrame = imageClosed

    return (data, pyaudio.paContinue)

# Open audio stream
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                channels=wf.getnchannels(),
                rate=wf.getframerate(),
                output=True,
                stream_callback=audio_callback)

# Main loop
frameCounter = 0
keepRunning = True

cv2.namedWindow("Talking Animation")

start_time = time.time()

while frameCounter < total_frames_needed and keepRunning:
    elapsed_time = time.time() - start_time
    expected_frame_count = int(elapsed_time * FRAME_RATE)

    if frameCounter < expected_frame_count:
        # Display the current frame
        if currentFrame is not None:
            cv2.imshow("Talking Animation", currentFrame)
            frame_path = os.path.join(FRAME_DIR, f"frame_{frameCounter:06d}.png")
            cv2.imwrite(frame_path, currentFrame)
            print(f"Saved frame {frameCounter}")
            frameCounter += 1

    # Check for 'q' key press
    if cv2.waitKey(1) == ord('q'):
        keepRunning = False

# Ensure the audio stream has fully processed the audio file
stream.stop_stream()
stream.close()
p.terminate()
wf.close()

print(f"Saved final frame {frameCounter}")

# Cleanup
cv2.destroyAllWindows()

# Combine frames and audio into a video
os.system(f"ffmpeg -framerate {FRAME_RATE} -i {FRAME_DIR}frame_%06d.png -i {AUDIO_FILE} -c:v libx264 -pix_fmt yuv420p -c:a aac -strict experimental talking_animation_with_audio.mp4")

I am wanting to make a code that will make a talking animation, where there is sound, the mouth is open, which is start5.png, sound = 1. When there is no sound the mouth is close, no sound = 0 which is start4.png. any reason why the code is partially working.

kknechtel · July 14, 2024, 1:14pm

We can only possibly comment on a problem if we understand what you think is wrong with it.

Exactly what happens when you try using the code, and how is that different from what you want to happen?

kitzoro · July 14, 2024, 2:16pm

When I use the code, the character talks and it syncs with the audio file but towards the end of the file it breaks being in sync. I want it to sync through out the whole process not just half or most, from beginning to end.