subtitle2 / app.py
Adasadqw's picture
Create app.py
52ea100 verified
import gradio as gr
import torch
import librosa
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, pipeline
# -------------------------------
# 1) STT ๋ชจ๋ธ ๋กœ๋“œ (Whisper ๊ธฐ๋ฐ˜)
# -------------------------------
processor = AutoProcessor.from_pretrained("openai/whisper-small")
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small")
# -------------------------------
# 2) ๊ฐ์ • ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
# -------------------------------
sentiment_pipe = pipeline(
"sentiment-analysis",
model="monologg/koelectra-base-v3-discriminator",
tokenizer="monologg/koelectra-base-v3-discriminator"
)
# -------------------------------
# 3) ์˜ค๋””์˜ค -> ํ…์ŠคํŠธ
# -------------------------------
def transcribe_audio(audio_path):
speech, _ = librosa.load(audio_path, sr=16000)
input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
predicted_ids = model.generate(input_features)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
return transcription
# -------------------------------
# 4) ๊ฐ์ • ๋ ˆ์ด๋ธ” -> ์ƒ‰์ƒ ๋งคํ•‘
# -------------------------------
def label_to_color(label):
if label in ["4 stars", "5 stars", "LABEL_4", "LABEL_5"]:
return "green" # ๊ธ์ •
elif label in ["1 star", "2 stars", "LABEL_1", "LABEL_2"]:
return "red" # ๋ถ€์ •
else: # ์ค‘๋ฆฝ
return "orange"
# -------------------------------
# 5) ํ…์ŠคํŠธ ๊ฐ์ • ๋ถ„์„ (๋ฌธ์žฅ ์ „์ฒด)
# -------------------------------
def sentiment_whole_text(text):
res = sentiment_pipe(text)[0]
label = res['label']
score = res['score']
color = label_to_color(label)
styled_text = f"<span style='color:{color}'>{text}</span>"
legend = (
"<div style='margin-top:10px;'>"
"<b>์ƒ‰์ƒ ์„ค๋ช…:</b> "
"<span style='color:green'>๋…น์ƒ‰=๊ธ์ •</span>, "
"<span style='color:red'>๋นจ๊ฐ•=๋ถ€์ •</span>, "
"<span style='color:orange'>์ฃผํ™ฉ=์ค‘๋ฆฝ/๋ณดํ†ต</span>"
"</div>"
)
return styled_text + legend, f"๊ฐ์ •: {label}, ์‹ ๋ขฐ๋„: {score:.2f}"
# -------------------------------
# 6) ํ…์ŠคํŠธ ๊ฐ์ • ๋ถ„์„ (๋‹จ์–ด๋ณ„)
# -------------------------------
def sentiment_word_level(text):
words = text.split()
styled_words = []
for w in words:
res = sentiment_pipe(w)[0]
label = res['label']
color = label_to_color(label)
styled_words.append(f"<span style='color:{color}'>{w}</span>")
styled_text = " ".join(styled_words)
legend = (
"<div style='margin-top:10px;'>"
"<b>์ƒ‰์ƒ ์„ค๋ช…:</b> "
"<span style='color:green'>๋…น์ƒ‰=๊ธ์ •</span>, "
"<span style='color:red'>๋นจ๊ฐ•=๋ถ€์ •</span>, "
"<span style='color:orange'>์ฃผํ™ฉ=์ค‘๋ฆฝ/๋ณดํ†ต</span>"
"</div>"
)
return styled_text + legend, "๋‹จ์–ด๋ณ„ ๊ฐ์ • ํ‘œ์‹œ ์™„๋ฃŒ"
# -------------------------------
# 7) ์˜ค๋””์˜ค -> ํ…์ŠคํŠธ + ๊ฐ์ • ๋ถ„์„ (๋ฌธ์žฅ+๋‹จ์–ด)
# -------------------------------
def process_audio_full(audio_file):
text = transcribe_audio(audio_file)
whole_text_result, whole_text_score = sentiment_whole_text(text)
word_level_result, word_level_status = sentiment_word_level(text)
return text, whole_text_result, whole_text_score, word_level_result, word_level_status
# -------------------------------
# 8) Gradio UI ๊ตฌ์„ฑ
# -------------------------------
with gr.Blocks() as demo:
gr.Markdown("# ๐ŸŽค ์˜ค๋””์˜ค/ํ…์ŠคํŠธ โ†’ ๊ฐ์ • ๋ถ„์„")
with gr.Tabs():
# ------------------- ์˜ค๋””์˜ค -> ํ…์ŠคํŠธ -------------------
with gr.Tab("์˜ค๋””์˜ค โ†’ ํ…์ŠคํŠธ"):
audio_input_1 = gr.Audio(label="์Œ์„ฑ ์—…๋กœ๋“œ", type="filepath")
audio_text_output = gr.Textbox(label="๋ณ€ํ™˜๋œ ํ…์ŠคํŠธ")
audio_transcribe_btn = gr.Button("ํ…์ŠคํŠธ ์ถ”์ถœ")
audio_transcribe_btn.click(fn=transcribe_audio, inputs=[audio_input_1], outputs=[audio_text_output])
# ------------------- ํ…์ŠคํŠธ -> ๊ฐ์ • ๋ถ„์„ -------------------
with gr.Tab("ํ…์ŠคํŠธ โ†’ ๊ฐ์ • ๋ถ„์„"):
text_input = gr.Textbox(label="ํ…์ŠคํŠธ ์ž…๋ ฅ")
sentiment_whole_output = gr.HTML(label="๋ฌธ์žฅ ๋‹จ์œ„ ๊ฐ์ • ๋ถ„์„")
sentiment_whole_score = gr.Markdown(label="๊ฐ์ • ๊ฒฐ๊ณผ")
sentiment_word_output = gr.HTML(label="๋‹จ์–ด ๋‹จ์œ„ ๊ฐ์ • ๋ถ„์„")
sentiment_btn = gr.Button("๊ฐ์ • ๋ถ„์„")
def analyze_text(text):
whole_res, whole_score = sentiment_whole_text(text)
word_res, word_status = sentiment_word_level(text)
return whole_res, whole_score, word_res
sentiment_btn.click(
fn=analyze_text,
inputs=[text_input],
outputs=[sentiment_whole_output, sentiment_whole_score, sentiment_word_output]
)
# ------------------- ์˜ค๋””์˜ค โ†’ ํ…์ŠคํŠธ + ๊ฐ์ • ๋ถ„์„ -------------------
with gr.Tab("์˜ค๋””์˜ค โ†’ ํ…์ŠคํŠธ + ๊ฐ์ • ๋ถ„์„"):
audio_input_2 = gr.Audio(label="์Œ์„ฑ ์—…๋กœ๋“œ", type="filepath")
audio_text_output_2 = gr.Textbox(label="๋ณ€ํ™˜๋œ ํ…์ŠคํŠธ")
sentiment_whole_output_2 = gr.HTML(label="๋ฌธ์žฅ ๋‹จ์œ„ ๊ฐ์ • ๋ถ„์„")
sentiment_whole_score_2 = gr.Markdown(label="๊ฐ์ • ๊ฒฐ๊ณผ")
sentiment_word_output_2 = gr.HTML(label="๋‹จ์–ด ๋‹จ์œ„ ๊ฐ์ • ๋ถ„์„")
audio_process_btn = gr.Button("๋ถ„์„ ์‹œ์ž‘")
def process_audio_tab(audio_file):
text, whole_res, whole_score, word_res, word_status = process_audio_full(audio_file)
return text, whole_res, whole_score, word_res
audio_process_btn.click(
fn=process_audio_tab,
inputs=[audio_input_2],
outputs=[audio_text_output_2, sentiment_whole_output_2, sentiment_whole_score_2, sentiment_word_output_2]
)
demo.launch()