|
|
import gradio as gr |
|
|
import torch |
|
|
import librosa |
|
|
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, pipeline |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
processor = AutoProcessor.from_pretrained("openai/whisper-small") |
|
|
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sentiment_pipe = pipeline( |
|
|
"sentiment-analysis", |
|
|
model="monologg/koelectra-base-v3-discriminator", |
|
|
tokenizer="monologg/koelectra-base-v3-discriminator" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def transcribe_audio(audio_path): |
|
|
speech, _ = librosa.load(audio_path, sr=16000) |
|
|
input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features |
|
|
predicted_ids = model.generate(input_features) |
|
|
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] |
|
|
return transcription |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def label_to_color(label): |
|
|
if label in ["4 stars", "5 stars", "LABEL_4", "LABEL_5"]: |
|
|
return "green" |
|
|
elif label in ["1 star", "2 stars", "LABEL_1", "LABEL_2"]: |
|
|
return "red" |
|
|
else: |
|
|
return "orange" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sentiment_whole_text(text): |
|
|
res = sentiment_pipe(text)[0] |
|
|
label = res['label'] |
|
|
score = res['score'] |
|
|
color = label_to_color(label) |
|
|
styled_text = f"<span style='color:{color}'>{text}</span>" |
|
|
legend = ( |
|
|
"<div style='margin-top:10px;'>" |
|
|
"<b>์์ ์ค๋ช
:</b> " |
|
|
"<span style='color:green'>๋
น์=๊ธ์ </span>, " |
|
|
"<span style='color:red'>๋นจ๊ฐ=๋ถ์ </span>, " |
|
|
"<span style='color:orange'>์ฃผํฉ=์ค๋ฆฝ/๋ณดํต</span>" |
|
|
"</div>" |
|
|
) |
|
|
return styled_text + legend, f"๊ฐ์ : {label}, ์ ๋ขฐ๋: {score:.2f}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sentiment_word_level(text): |
|
|
words = text.split() |
|
|
styled_words = [] |
|
|
for w in words: |
|
|
res = sentiment_pipe(w)[0] |
|
|
label = res['label'] |
|
|
color = label_to_color(label) |
|
|
styled_words.append(f"<span style='color:{color}'>{w}</span>") |
|
|
styled_text = " ".join(styled_words) |
|
|
legend = ( |
|
|
"<div style='margin-top:10px;'>" |
|
|
"<b>์์ ์ค๋ช
:</b> " |
|
|
"<span style='color:green'>๋
น์=๊ธ์ </span>, " |
|
|
"<span style='color:red'>๋นจ๊ฐ=๋ถ์ </span>, " |
|
|
"<span style='color:orange'>์ฃผํฉ=์ค๋ฆฝ/๋ณดํต</span>" |
|
|
"</div>" |
|
|
) |
|
|
return styled_text + legend, "๋จ์ด๋ณ ๊ฐ์ ํ์ ์๋ฃ" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_audio_full(audio_file): |
|
|
text = transcribe_audio(audio_file) |
|
|
whole_text_result, whole_text_score = sentiment_whole_text(text) |
|
|
word_level_result, word_level_status = sentiment_word_level(text) |
|
|
return text, whole_text_result, whole_text_score, word_level_result, word_level_status |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# ๐ค ์ค๋์ค/ํ
์คํธ โ ๊ฐ์ ๋ถ์") |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.Tab("์ค๋์ค โ ํ
์คํธ"): |
|
|
audio_input_1 = gr.Audio(label="์์ฑ ์
๋ก๋", type="filepath") |
|
|
audio_text_output = gr.Textbox(label="๋ณํ๋ ํ
์คํธ") |
|
|
audio_transcribe_btn = gr.Button("ํ
์คํธ ์ถ์ถ") |
|
|
audio_transcribe_btn.click(fn=transcribe_audio, inputs=[audio_input_1], outputs=[audio_text_output]) |
|
|
|
|
|
|
|
|
with gr.Tab("ํ
์คํธ โ ๊ฐ์ ๋ถ์"): |
|
|
text_input = gr.Textbox(label="ํ
์คํธ ์
๋ ฅ") |
|
|
sentiment_whole_output = gr.HTML(label="๋ฌธ์ฅ ๋จ์ ๊ฐ์ ๋ถ์") |
|
|
sentiment_whole_score = gr.Markdown(label="๊ฐ์ ๊ฒฐ๊ณผ") |
|
|
sentiment_word_output = gr.HTML(label="๋จ์ด ๋จ์ ๊ฐ์ ๋ถ์") |
|
|
sentiment_btn = gr.Button("๊ฐ์ ๋ถ์") |
|
|
def analyze_text(text): |
|
|
whole_res, whole_score = sentiment_whole_text(text) |
|
|
word_res, word_status = sentiment_word_level(text) |
|
|
return whole_res, whole_score, word_res |
|
|
sentiment_btn.click( |
|
|
fn=analyze_text, |
|
|
inputs=[text_input], |
|
|
outputs=[sentiment_whole_output, sentiment_whole_score, sentiment_word_output] |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Tab("์ค๋์ค โ ํ
์คํธ + ๊ฐ์ ๋ถ์"): |
|
|
audio_input_2 = gr.Audio(label="์์ฑ ์
๋ก๋", type="filepath") |
|
|
audio_text_output_2 = gr.Textbox(label="๋ณํ๋ ํ
์คํธ") |
|
|
sentiment_whole_output_2 = gr.HTML(label="๋ฌธ์ฅ ๋จ์ ๊ฐ์ ๋ถ์") |
|
|
sentiment_whole_score_2 = gr.Markdown(label="๊ฐ์ ๊ฒฐ๊ณผ") |
|
|
sentiment_word_output_2 = gr.HTML(label="๋จ์ด ๋จ์ ๊ฐ์ ๋ถ์") |
|
|
audio_process_btn = gr.Button("๋ถ์ ์์") |
|
|
def process_audio_tab(audio_file): |
|
|
text, whole_res, whole_score, word_res, word_status = process_audio_full(audio_file) |
|
|
return text, whole_res, whole_score, word_res |
|
|
audio_process_btn.click( |
|
|
fn=process_audio_tab, |
|
|
inputs=[audio_input_2], |
|
|
outputs=[audio_text_output_2, sentiment_whole_output_2, sentiment_whole_score_2, sentiment_word_output_2] |
|
|
) |
|
|
|
|
|
demo.launch() |
|
|
|