I am using whipser to convert audio/video to transcription but getting the following error
python main.py
[nltk_data] Downloading package punkt to
[nltk_data] C:\Users\Akash\AppData\Roaming\nltk_data…
[nltk_data] Package punkt is already up-to-date!
Converting video to audio…
MoviePy - Writing audio in C:/Users/Akash/Downloads/a.wav
MoviePy - Done.
Conversion completed successfully.
Converting audio to WAV format…
Audio conversion to WAV completed successfully. Output file: C:/Users/Akash/Downloads/a.wav
Transcribing audio with Whisper from file: C:/Users/Akash/Downloads/a.wav
FileNotFoundError: [WinError 2] The system cannot find the file specified
Transcript saved to C:/Users/Akash/Desktop/sa.docx
Keeping temporary audio file: C:/Users/Akash/Downloads/a.wav
This is code for app
import os
import threading
from tkinter import filedialog, Tk, Button, Label, ttk, messagebox, Frame
from moviepy.editor import AudioFileClip
import whisper
from pydub import AudioSegment
import time
import nltk
from docx import Document # Import for creating .docx files
# Ensure necessary NLTK data is downloaded
nltk.download('punkt')
# Global variable to store the after ID
timer_update_id = None
def convert_video_to_audio(video_file, audio_file):
"""Convert video file to audio file."""
try:
print("Converting video to audio...")
audio_clip = AudioFileClip(video_file)
audio_clip.write_audiofile(audio_file, codec='pcm_s16le')
audio_clip.close()
print("Conversion completed successfully.")
except Exception as e:
print(f"Error during conversion: {e}")
def convert_audio_to_wav(audio_file):
"""Ensure audio file is in WAV format, mono, and 16kHz sample rate."""
try:
print("Converting audio to WAV format...")
sound = AudioSegment.from_file(audio_file)
sound = sound.set_channels(1) # Convert to mono
sound = sound.set_frame_rate(16000) # Resample to 16kHz
sound.export(audio_file, format="wav")
print(f"Audio conversion to WAV completed successfully. Output file: {audio_file}")
except Exception as e:
print(f"Error during audio conversion: {e}")
def transcribe_audio_with_whisper(audio_file):
"""Transcribe audio file using Whisper model."""
transcript = ""
try:
# Ensure the file exists before proceeding
if not os.path.isfile(audio_file):
raise FileNotFoundError(f"File not found: {audio_file}")
print(f"Transcribing audio with Whisper from file: {audio_file}")
# Load the Whisper model
model = whisper.load_model("base")
# Confirm that the model is loaded
if model is None:
raise RuntimeError("Whisper model could not be loaded.")
# Perform transcription
result = model.transcribe(audio_file)
transcript = result["text"]
print("Transcription completed successfully.")
except FileNotFoundError as e:
transcript = f"File not found: {e}"
print(f"FileNotFoundError: {e}")
except Exception as e:
transcript = f"An error occurred during transcription: {e}"
print(f"Error: {e}")
return transcript
def save_transcript_to_docx(transcript, docx_file):
"""Save the transcript to a .docx file."""
try:
if transcript:
doc = Document()
doc.add_heading('Transcript', level=1)
doc.add_paragraph(transcript)
doc.save(docx_file)
print(f"Transcript saved to {docx_file}")
else:
print("No transcript available to save.")
except Exception as e:
print(f"Error saving transcript: {e}")
def update_status(stage):
"""Update the status label."""
status_label.config(text=f"Current Stage: {stage}")
def update_timer(start_time):
"""Update the timer label."""
global timer_update_id
current_time = time.time()
elapsed_time = current_time - start_time
elapsed_time_str = f"{elapsed_time:.2f} seconds"
timer_label.config(text=f"Elapsed Time: {elapsed_time_str}")
timer_update_id = root.after(100, update_timer, start_time) # Schedule next update after 100ms
def select_file():
global selected_file
selected_file = filedialog.askopenfilename(title="Select File", filetypes=[("MP4 files", "*.mp4"), ("WAV files", "*.wav")])
if selected_file:
file_label.config(text=f"Selected File: {os.path.basename(selected_file)}")
convert_button.config(state="normal")
def convert_and_transcribe():
if selected_file:
audio_file = selected_file.replace('.mp4', '.wav') if selected_file.lower().endswith('.mp4') else selected_file
docx_file = filedialog.asksaveasfilename(defaultextension=".docx", title="Save Transcript As")
if docx_file:
# Disable the button and show the progress bar
convert_button.config(state="disabled")
progress_bar.start()
update_status("Processing...")
# Track the start time
start_time = time.time()
# Start updating the timer
update_timer(start_time)
# Run the processing in a separate thread to avoid blocking the GUI
threading.Thread(target=run_conversion, args=(selected_file, audio_file, docx_file, start_time)).start()
def run_conversion(file_path, audio_file, docx_file, start_time):
try:
# Convert video to audio if necessary
if file_path.lower().endswith(".mp4"):
update_status("Converting video to audio")
convert_video_to_audio(file_path, audio_file)
# Convert audio to WAV format, mono, and 16kHz
update_status("Converting audio to WAV format")
convert_audio_to_wav(audio_file)
# Ensure the audio file exists before attempting transcription
if not os.path.isfile(audio_file):
raise FileNotFoundError(f"Converted audio file not found: {audio_file}")
# Transcribe audio with Whisper
update_status("Transcribing audio")
transcript = transcribe_audio_with_whisper(audio_file)
# Save transcript
update_status("Saving transcript")
save_transcript_to_docx(transcript, docx_file)
except Exception as e:
print(f"Error: {e}")
finally:
# Avoid deleting the file
print(f"Keeping temporary audio file: {audio_file}")
# Stop updating the timer
if timer_update_id is not None:
root.after_cancel(timer_update_id)
# Calculate elapsed time
elapsed_time = time.time() - start_time
elapsed_time_str = f"{elapsed_time:.2f} seconds"
# Update the GUI after processing
progress_bar.stop()
convert_button.config(state="normal")
status_label.config(text=f"Completed in {elapsed_time_str}")
messagebox.showinfo("Done", "The transcript has been saved successfully!")
# GUI Setup
root = Tk()
root.title("Video/Audio to Transcript Converter")
root.geometry("800x500")
root.resizable(False, False)
# Left frame (blue background)
left_frame = Frame(root, bg="#1976D2", width=300)
left_frame.pack(side="left", fill="y")
welcome_label = Label(left_frame, text="Welcome", bg="#1976D2", fg="white", font=("Helvetica", 16, "bold"))
welcome_label.pack(pady=20, padx=20)
info_label = Label(left_frame, text="- Use this tool to convert video or audio files into text transcripts.\n\n- It supports both video and audio files as input and generates a transcript with timestamps. The user-friendly GUI allows for easy file selection and conversion.",
bg="#1976D2", fg="white", font=("Helvetica", 12), wraplength=200, justify="left")
info_label.pack(pady=10, padx=20)
# Right frame (white background)
right_frame = Frame(root, bg="white", width=500)
right_frame.pack(side="right", fill="both", expand=True)
input_label = Label(right_frame, text="Transcripts Generator", bg="white", fg="#333333", font=("Helvetica", 14, "bold"))
input_label.pack(pady=20)
# File selection button
select_button = ttk.Button(right_frame, text="Select File", command=select_file)
select_button.pack(pady=10)
# Label to display selected file
file_label = Label(right_frame, text="No file selected", bg="white", fg="#333333", font=('Helvetica', 12))
file_label.pack(pady=10)
# Convert and Transcribe button
convert_button = ttk.Button(right_frame, text="Convert and Transcribe", command=convert_and_transcribe, state="disabled")
convert_button.pack(pady=10)
# Progress Bar
progress_bar = ttk.Progressbar(right_frame, mode='indeterminate', length=280)
progress_bar.pack(pady=20)
# Status Label
status_label = Label(right_frame, text="", bg="white", fg="#333333", font=('Helvetica', 12))
status_label.pack(pady=5)
# Timer Label
timer_label = Label(right_frame, text="Elapsed Time: 0.00 seconds", bg="white", fg="#333333", font=('Helvetica', 12))
timer_label.pack(pady=5)
root.mainloop()
type or paste code here