Adasadqw commited on
Commit
52ea100
ยท
verified ยท
1 Parent(s): 7f305fd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import librosa
4
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, pipeline
5
+
6
+ # -------------------------------
7
+ # 1) STT ๋ชจ๋ธ ๋กœ๋“œ (Whisper ๊ธฐ๋ฐ˜)
8
+ # -------------------------------
9
+ processor = AutoProcessor.from_pretrained("openai/whisper-small")
10
+ model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small")
11
+
12
+ # -------------------------------
13
+ # 2) ๊ฐ์ • ๋ถ„์„ ๋ชจ๋ธ ๋กœ๋“œ
14
+ # -------------------------------
15
+ sentiment_pipe = pipeline(
16
+ "sentiment-analysis",
17
+ model="monologg/koelectra-base-v3-discriminator",
18
+ tokenizer="monologg/koelectra-base-v3-discriminator"
19
+ )
20
+
21
+ # -------------------------------
22
+ # 3) ์˜ค๋””์˜ค -> ํ…์ŠคํŠธ
23
+ # -------------------------------
24
+ def transcribe_audio(audio_path):
25
+ speech, _ = librosa.load(audio_path, sr=16000)
26
+ input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
27
+ predicted_ids = model.generate(input_features)
28
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
29
+ return transcription
30
+
31
+ # -------------------------------
32
+ # 4) ๊ฐ์ • ๋ ˆ์ด๋ธ” -> ์ƒ‰์ƒ ๋งคํ•‘
33
+ # -------------------------------
34
+ def label_to_color(label):
35
+ if label in ["4 stars", "5 stars", "LABEL_4", "LABEL_5"]:
36
+ return "green" # ๊ธ์ •
37
+ elif label in ["1 star", "2 stars", "LABEL_1", "LABEL_2"]:
38
+ return "red" # ๋ถ€์ •
39
+ else: # ์ค‘๋ฆฝ
40
+ return "orange"
41
+
42
+ # -------------------------------
43
+ # 5) ํ…์ŠคํŠธ ๊ฐ์ • ๋ถ„์„ (๋ฌธ์žฅ ์ „์ฒด)
44
+ # -------------------------------
45
+ def sentiment_whole_text(text):
46
+ res = sentiment_pipe(text)[0]
47
+ label = res['label']
48
+ score = res['score']
49
+ color = label_to_color(label)
50
+ styled_text = f"<span style='color:{color}'>{text}</span>"
51
+ legend = (
52
+ "<div style='margin-top:10px;'>"
53
+ "<b>์ƒ‰์ƒ ์„ค๋ช…:</b> "
54
+ "<span style='color:green'>๋…น์ƒ‰=๊ธ์ •</span>, "
55
+ "<span style='color:red'>๋นจ๊ฐ•=๋ถ€์ •</span>, "
56
+ "<span style='color:orange'>์ฃผํ™ฉ=์ค‘๋ฆฝ/๋ณดํ†ต</span>"
57
+ "</div>"
58
+ )
59
+ return styled_text + legend, f"๊ฐ์ •: {label}, ์‹ ๋ขฐ๋„: {score:.2f}"
60
+
61
+ # -------------------------------
62
+ # 6) ํ…์ŠคํŠธ ๊ฐ์ • ๋ถ„์„ (๋‹จ์–ด๋ณ„)
63
+ # -------------------------------
64
+ def sentiment_word_level(text):
65
+ words = text.split()
66
+ styled_words = []
67
+ for w in words:
68
+ res = sentiment_pipe(w)[0]
69
+ label = res['label']
70
+ color = label_to_color(label)
71
+ styled_words.append(f"<span style='color:{color}'>{w}</span>")
72
+ styled_text = " ".join(styled_words)
73
+ legend = (
74
+ "<div style='margin-top:10px;'>"
75
+ "<b>์ƒ‰์ƒ ์„ค๋ช…:</b> "
76
+ "<span style='color:green'>๋…น์ƒ‰=๊ธ์ •</span>, "
77
+ "<span style='color:red'>๋นจ๊ฐ•=๋ถ€์ •</span>, "
78
+ "<span style='color:orange'>์ฃผํ™ฉ=์ค‘๋ฆฝ/๋ณดํ†ต</span>"
79
+ "</div>"
80
+ )
81
+ return styled_text + legend, "๋‹จ์–ด๋ณ„ ๊ฐ์ • ํ‘œ์‹œ ์™„๋ฃŒ"
82
+
83
+ # -------------------------------
84
+ # 7) ์˜ค๋””์˜ค -> ํ…์ŠคํŠธ + ๊ฐ์ • ๋ถ„์„ (๋ฌธ์žฅ+๋‹จ์–ด)
85
+ # -------------------------------
86
+ def process_audio_full(audio_file):
87
+ text = transcribe_audio(audio_file)
88
+ whole_text_result, whole_text_score = sentiment_whole_text(text)
89
+ word_level_result, word_level_status = sentiment_word_level(text)
90
+ return text, whole_text_result, whole_text_score, word_level_result, word_level_status
91
+
92
+ # -------------------------------
93
+ # 8) Gradio UI ๊ตฌ์„ฑ
94
+ # -------------------------------
95
+ with gr.Blocks() as demo:
96
+ gr.Markdown("# ๐ŸŽค ์˜ค๋””์˜ค/ํ…์ŠคํŠธ โ†’ ๊ฐ์ • ๋ถ„์„")
97
+
98
+ with gr.Tabs():
99
+ # ------------------- ์˜ค๋””์˜ค -> ํ…์ŠคํŠธ -------------------
100
+ with gr.Tab("์˜ค๋””์˜ค โ†’ ํ…์ŠคํŠธ"):
101
+ audio_input_1 = gr.Audio(label="์Œ์„ฑ ์—…๋กœ๋“œ", type="filepath")
102
+ audio_text_output = gr.Textbox(label="๋ณ€ํ™˜๋œ ํ…์ŠคํŠธ")
103
+ audio_transcribe_btn = gr.Button("ํ…์ŠคํŠธ ์ถ”์ถœ")
104
+ audio_transcribe_btn.click(fn=transcribe_audio, inputs=[audio_input_1], outputs=[audio_text_output])
105
+
106
+ # ------------------- ํ…์ŠคํŠธ -> ๊ฐ์ • ๋ถ„์„ -------------------
107
+ with gr.Tab("ํ…์ŠคํŠธ โ†’ ๊ฐ์ • ๋ถ„์„"):
108
+ text_input = gr.Textbox(label="ํ…์ŠคํŠธ ์ž…๋ ฅ")
109
+ sentiment_whole_output = gr.HTML(label="๋ฌธ์žฅ ๋‹จ์œ„ ๊ฐ์ • ๋ถ„์„")
110
+ sentiment_whole_score = gr.Markdown(label="๊ฐ์ • ๊ฒฐ๊ณผ")
111
+ sentiment_word_output = gr.HTML(label="๋‹จ์–ด ๋‹จ์œ„ ๊ฐ์ • ๋ถ„์„")
112
+ sentiment_btn = gr.Button("๊ฐ์ • ๋ถ„์„")
113
+ def analyze_text(text):
114
+ whole_res, whole_score = sentiment_whole_text(text)
115
+ word_res, word_status = sentiment_word_level(text)
116
+ return whole_res, whole_score, word_res
117
+ sentiment_btn.click(
118
+ fn=analyze_text,
119
+ inputs=[text_input],
120
+ outputs=[sentiment_whole_output, sentiment_whole_score, sentiment_word_output]
121
+ )
122
+
123
+ # ------------------- ์˜ค๋””์˜ค โ†’ ํ…์ŠคํŠธ + ๊ฐ์ • ๋ถ„์„ -------------------
124
+ with gr.Tab("์˜ค๋””์˜ค โ†’ ํ…์ŠคํŠธ + ๊ฐ์ • ๋ถ„์„"):
125
+ audio_input_2 = gr.Audio(label="์Œ์„ฑ ์—…๋กœ๋“œ", type="filepath")
126
+ audio_text_output_2 = gr.Textbox(label="๋ณ€ํ™˜๋œ ํ…์ŠคํŠธ")
127
+ sentiment_whole_output_2 = gr.HTML(label="๋ฌธ์žฅ ๋‹จ์œ„ ๊ฐ์ • ๋ถ„์„")
128
+ sentiment_whole_score_2 = gr.Markdown(label="๊ฐ์ • ๊ฒฐ๊ณผ")
129
+ sentiment_word_output_2 = gr.HTML(label="๋‹จ์–ด ๋‹จ์œ„ ๊ฐ์ • ๋ถ„์„")
130
+ audio_process_btn = gr.Button("๋ถ„์„ ์‹œ์ž‘")
131
+ def process_audio_tab(audio_file):
132
+ text, whole_res, whole_score, word_res, word_status = process_audio_full(audio_file)
133
+ return text, whole_res, whole_score, word_res
134
+ audio_process_btn.click(
135
+ fn=process_audio_tab,
136
+ inputs=[audio_input_2],
137
+ outputs=[audio_text_output_2, sentiment_whole_output_2, sentiment_whole_score_2, sentiment_word_output_2]
138
+ )
139
+
140
+ demo.launch()