File size: 6,042 Bytes
52ea100 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import gradio as gr
import torch
import librosa
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, pipeline
# -------------------------------
# 1) STT ๋ชจ๋ธ ๋ก๋ (Whisper ๊ธฐ๋ฐ)
# -------------------------------
processor = AutoProcessor.from_pretrained("openai/whisper-small")
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small")
# -------------------------------
# 2) ๊ฐ์ ๋ถ์ ๋ชจ๋ธ ๋ก๋
# -------------------------------
sentiment_pipe = pipeline(
"sentiment-analysis",
model="monologg/koelectra-base-v3-discriminator",
tokenizer="monologg/koelectra-base-v3-discriminator"
)
# -------------------------------
# 3) ์ค๋์ค -> ํ
์คํธ
# -------------------------------
def transcribe_audio(audio_path):
speech, _ = librosa.load(audio_path, sr=16000)
input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
predicted_ids = model.generate(input_features)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
return transcription
# -------------------------------
# 4) ๊ฐ์ ๋ ์ด๋ธ -> ์์ ๋งคํ
# -------------------------------
def label_to_color(label):
if label in ["4 stars", "5 stars", "LABEL_4", "LABEL_5"]:
return "green" # ๊ธ์
elif label in ["1 star", "2 stars", "LABEL_1", "LABEL_2"]:
return "red" # ๋ถ์
else: # ์ค๋ฆฝ
return "orange"
# -------------------------------
# 5) ํ
์คํธ ๊ฐ์ ๋ถ์ (๋ฌธ์ฅ ์ ์ฒด)
# -------------------------------
def sentiment_whole_text(text):
res = sentiment_pipe(text)[0]
label = res['label']
score = res['score']
color = label_to_color(label)
styled_text = f"<span style='color:{color}'>{text}</span>"
legend = (
"<div style='margin-top:10px;'>"
"<b>์์ ์ค๋ช
:</b> "
"<span style='color:green'>๋
น์=๊ธ์ </span>, "
"<span style='color:red'>๋นจ๊ฐ=๋ถ์ </span>, "
"<span style='color:orange'>์ฃผํฉ=์ค๋ฆฝ/๋ณดํต</span>"
"</div>"
)
return styled_text + legend, f"๊ฐ์ : {label}, ์ ๋ขฐ๋: {score:.2f}"
# -------------------------------
# 6) ํ
์คํธ ๊ฐ์ ๋ถ์ (๋จ์ด๋ณ)
# -------------------------------
def sentiment_word_level(text):
words = text.split()
styled_words = []
for w in words:
res = sentiment_pipe(w)[0]
label = res['label']
color = label_to_color(label)
styled_words.append(f"<span style='color:{color}'>{w}</span>")
styled_text = " ".join(styled_words)
legend = (
"<div style='margin-top:10px;'>"
"<b>์์ ์ค๋ช
:</b> "
"<span style='color:green'>๋
น์=๊ธ์ </span>, "
"<span style='color:red'>๋นจ๊ฐ=๋ถ์ </span>, "
"<span style='color:orange'>์ฃผํฉ=์ค๋ฆฝ/๋ณดํต</span>"
"</div>"
)
return styled_text + legend, "๋จ์ด๋ณ ๊ฐ์ ํ์ ์๋ฃ"
# -------------------------------
# 7) ์ค๋์ค -> ํ
์คํธ + ๊ฐ์ ๋ถ์ (๋ฌธ์ฅ+๋จ์ด)
# -------------------------------
def process_audio_full(audio_file):
text = transcribe_audio(audio_file)
whole_text_result, whole_text_score = sentiment_whole_text(text)
word_level_result, word_level_status = sentiment_word_level(text)
return text, whole_text_result, whole_text_score, word_level_result, word_level_status
# -------------------------------
# 8) Gradio UI ๊ตฌ์ฑ
# -------------------------------
with gr.Blocks() as demo:
gr.Markdown("# ๐ค ์ค๋์ค/ํ
์คํธ โ ๊ฐ์ ๋ถ์")
with gr.Tabs():
# ------------------- ์ค๋์ค -> ํ
์คํธ -------------------
with gr.Tab("์ค๋์ค โ ํ
์คํธ"):
audio_input_1 = gr.Audio(label="์์ฑ ์
๋ก๋", type="filepath")
audio_text_output = gr.Textbox(label="๋ณํ๋ ํ
์คํธ")
audio_transcribe_btn = gr.Button("ํ
์คํธ ์ถ์ถ")
audio_transcribe_btn.click(fn=transcribe_audio, inputs=[audio_input_1], outputs=[audio_text_output])
# ------------------- ํ
์คํธ -> ๊ฐ์ ๋ถ์ -------------------
with gr.Tab("ํ
์คํธ โ ๊ฐ์ ๋ถ์"):
text_input = gr.Textbox(label="ํ
์คํธ ์
๋ ฅ")
sentiment_whole_output = gr.HTML(label="๋ฌธ์ฅ ๋จ์ ๊ฐ์ ๋ถ์")
sentiment_whole_score = gr.Markdown(label="๊ฐ์ ๊ฒฐ๊ณผ")
sentiment_word_output = gr.HTML(label="๋จ์ด ๋จ์ ๊ฐ์ ๋ถ์")
sentiment_btn = gr.Button("๊ฐ์ ๋ถ์")
def analyze_text(text):
whole_res, whole_score = sentiment_whole_text(text)
word_res, word_status = sentiment_word_level(text)
return whole_res, whole_score, word_res
sentiment_btn.click(
fn=analyze_text,
inputs=[text_input],
outputs=[sentiment_whole_output, sentiment_whole_score, sentiment_word_output]
)
# ------------------- ์ค๋์ค โ ํ
์คํธ + ๊ฐ์ ๋ถ์ -------------------
with gr.Tab("์ค๋์ค โ ํ
์คํธ + ๊ฐ์ ๋ถ์"):
audio_input_2 = gr.Audio(label="์์ฑ ์
๋ก๋", type="filepath")
audio_text_output_2 = gr.Textbox(label="๋ณํ๋ ํ
์คํธ")
sentiment_whole_output_2 = gr.HTML(label="๋ฌธ์ฅ ๋จ์ ๊ฐ์ ๋ถ์")
sentiment_whole_score_2 = gr.Markdown(label="๊ฐ์ ๊ฒฐ๊ณผ")
sentiment_word_output_2 = gr.HTML(label="๋จ์ด ๋จ์ ๊ฐ์ ๋ถ์")
audio_process_btn = gr.Button("๋ถ์ ์์")
def process_audio_tab(audio_file):
text, whole_res, whole_score, word_res, word_status = process_audio_full(audio_file)
return text, whole_res, whole_score, word_res
audio_process_btn.click(
fn=process_audio_tab,
inputs=[audio_input_2],
outputs=[audio_text_output_2, sentiment_whole_output_2, sentiment_whole_score_2, sentiment_word_output_2]
)
demo.launch()
|