File size: 6,042 Bytes
52ea100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import gradio as gr
import torch
import librosa
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, pipeline

# -------------------------------
# 1) STT ๋ชจ๋ธ ๋กœ๋“œ (Whisper ๊ธฐ๋ฐ˜)
# -------------------------------
processor = AutoProcessor.from_pretrained("openai/whisper-small")
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small")

# -------------------------------
# 2) ๊ฐ์ • ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
# -------------------------------
sentiment_pipe = pipeline(
    "sentiment-analysis",
    model="monologg/koelectra-base-v3-discriminator",
    tokenizer="monologg/koelectra-base-v3-discriminator"
)

# -------------------------------
# 3) ์˜ค๋””์˜ค -> ํ…์ŠคํŠธ
# -------------------------------
def transcribe_audio(audio_path):
    speech, _ = librosa.load(audio_path, sr=16000)
    input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
    predicted_ids = model.generate(input_features)
    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
    return transcription

# -------------------------------
# 4) ๊ฐ์ • ๋ ˆ์ด๋ธ” -> ์ƒ‰์ƒ ๋งคํ•‘
# -------------------------------
def label_to_color(label):
    if label in ["4 stars", "5 stars", "LABEL_4", "LABEL_5"]:
        return "green"  # ๊ธ์ •
    elif label in ["1 star", "2 stars", "LABEL_1", "LABEL_2"]:
        return "red"    # ๋ถ€์ •
    else:  # ์ค‘๋ฆฝ
        return "orange"

# -------------------------------
# 5) ํ…์ŠคํŠธ ๊ฐ์ • ๋ถ„์„ (๋ฌธ์žฅ ์ „์ฒด)
# -------------------------------
def sentiment_whole_text(text):
    res = sentiment_pipe(text)[0]
    label = res['label']
    score = res['score']
    color = label_to_color(label)
    styled_text = f"<span style='color:{color}'>{text}</span>"
    legend = (
        "<div style='margin-top:10px;'>"
        "<b>์ƒ‰์ƒ ์„ค๋ช…:</b> "
        "<span style='color:green'>๋…น์ƒ‰=๊ธ์ •</span>, "
        "<span style='color:red'>๋นจ๊ฐ•=๋ถ€์ •</span>, "
        "<span style='color:orange'>์ฃผํ™ฉ=์ค‘๋ฆฝ/๋ณดํ†ต</span>"
        "</div>"
    )
    return styled_text + legend, f"๊ฐ์ •: {label}, ์‹ ๋ขฐ๋„: {score:.2f}"

# -------------------------------
# 6) ํ…์ŠคํŠธ ๊ฐ์ • ๋ถ„์„ (๋‹จ์–ด๋ณ„)
# -------------------------------
def sentiment_word_level(text):
    words = text.split()
    styled_words = []
    for w in words:
        res = sentiment_pipe(w)[0]
        label = res['label']
        color = label_to_color(label)
        styled_words.append(f"<span style='color:{color}'>{w}</span>")
    styled_text = " ".join(styled_words)
    legend = (
        "<div style='margin-top:10px;'>"
        "<b>์ƒ‰์ƒ ์„ค๋ช…:</b> "
        "<span style='color:green'>๋…น์ƒ‰=๊ธ์ •</span>, "
        "<span style='color:red'>๋นจ๊ฐ•=๋ถ€์ •</span>, "
        "<span style='color:orange'>์ฃผํ™ฉ=์ค‘๋ฆฝ/๋ณดํ†ต</span>"
        "</div>"
    )
    return styled_text + legend, "๋‹จ์–ด๋ณ„ ๊ฐ์ • ํ‘œ์‹œ ์™„๋ฃŒ"

# -------------------------------
# 7) ์˜ค๋””์˜ค -> ํ…์ŠคํŠธ + ๊ฐ์ • ๋ถ„์„ (๋ฌธ์žฅ+๋‹จ์–ด)
# -------------------------------
def process_audio_full(audio_file):
    text = transcribe_audio(audio_file)
    whole_text_result, whole_text_score = sentiment_whole_text(text)
    word_level_result, word_level_status = sentiment_word_level(text)
    return text, whole_text_result, whole_text_score, word_level_result, word_level_status

# -------------------------------
# 8) Gradio UI ๊ตฌ์„ฑ
# -------------------------------
with gr.Blocks() as demo:
    gr.Markdown("# ๐ŸŽค ์˜ค๋””์˜ค/ํ…์ŠคํŠธ โ†’ ๊ฐ์ • ๋ถ„์„")
    
    with gr.Tabs():
        # ------------------- ์˜ค๋””์˜ค -> ํ…์ŠคํŠธ -------------------
        with gr.Tab("์˜ค๋””์˜ค โ†’ ํ…์ŠคํŠธ"):
            audio_input_1 = gr.Audio(label="์Œ์„ฑ ์—…๋กœ๋“œ", type="filepath")
            audio_text_output = gr.Textbox(label="๋ณ€ํ™˜๋œ ํ…์ŠคํŠธ")
            audio_transcribe_btn = gr.Button("ํ…์ŠคํŠธ ์ถ”์ถœ")
            audio_transcribe_btn.click(fn=transcribe_audio, inputs=[audio_input_1], outputs=[audio_text_output])
        
        # ------------------- ํ…์ŠคํŠธ -> ๊ฐ์ • ๋ถ„์„ -------------------
        with gr.Tab("ํ…์ŠคํŠธ โ†’ ๊ฐ์ • ๋ถ„์„"):
            text_input = gr.Textbox(label="ํ…์ŠคํŠธ ์ž…๋ ฅ")
            sentiment_whole_output = gr.HTML(label="๋ฌธ์žฅ ๋‹จ์œ„ ๊ฐ์ • ๋ถ„์„")
            sentiment_whole_score = gr.Markdown(label="๊ฐ์ • ๊ฒฐ๊ณผ")
            sentiment_word_output = gr.HTML(label="๋‹จ์–ด ๋‹จ์œ„ ๊ฐ์ • ๋ถ„์„")
            sentiment_btn = gr.Button("๊ฐ์ • ๋ถ„์„")
            def analyze_text(text):
                whole_res, whole_score = sentiment_whole_text(text)
                word_res, word_status = sentiment_word_level(text)
                return whole_res, whole_score, word_res
            sentiment_btn.click(
                fn=analyze_text,
                inputs=[text_input],
                outputs=[sentiment_whole_output, sentiment_whole_score, sentiment_word_output]
            )
        
        # ------------------- ์˜ค๋””์˜ค โ†’ ํ…์ŠคํŠธ + ๊ฐ์ • ๋ถ„์„ -------------------
        with gr.Tab("์˜ค๋””์˜ค โ†’ ํ…์ŠคํŠธ + ๊ฐ์ • ๋ถ„์„"):
            audio_input_2 = gr.Audio(label="์Œ์„ฑ ์—…๋กœ๋“œ", type="filepath")
            audio_text_output_2 = gr.Textbox(label="๋ณ€ํ™˜๋œ ํ…์ŠคํŠธ")
            sentiment_whole_output_2 = gr.HTML(label="๋ฌธ์žฅ ๋‹จ์œ„ ๊ฐ์ • ๋ถ„์„")
            sentiment_whole_score_2 = gr.Markdown(label="๊ฐ์ • ๊ฒฐ๊ณผ")
            sentiment_word_output_2 = gr.HTML(label="๋‹จ์–ด ๋‹จ์œ„ ๊ฐ์ • ๋ถ„์„")
            audio_process_btn = gr.Button("๋ถ„์„ ์‹œ์ž‘")
            def process_audio_tab(audio_file):
                text, whole_res, whole_score, word_res, word_status = process_audio_full(audio_file)
                return text, whole_res, whole_score, word_res
            audio_process_btn.click(
                fn=process_audio_tab,
                inputs=[audio_input_2],
                outputs=[audio_text_output_2, sentiment_whole_output_2, sentiment_whole_score_2, sentiment_word_output_2]
            )

demo.launch()