Jeong-hun Kim commited on
Commit
8428376
ยท
1 Parent(s): 57cd8e6
Files changed (4) hide show
  1. README.md +1 -0
  2. app/main.py +101 -35
  3. requirements.txt +0 -0
  4. todo.txt +4 -0
README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ ์ฑ—๋ด‡์„ ๋งŒ๋“ค์–ด ๋ด…์‹œ๋‹ค
app/main.py CHANGED
@@ -5,28 +5,51 @@ from transformers import pipeline
5
  from PIL import Image
6
  import re, os
7
  import gradio as gr
 
8
 
9
  app = FastAPI()
10
 
11
  # 1. LLM ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™” (SmolLM3 ๋ชจ๋ธ)
12
- llm = pipeline("text-generation", model="HuggingFaceTB/SmolLM3-3B")
 
 
13
 
14
  # 2. ๊ฐ์ • ๋ฐ ์ƒํ™ฉ๋ณ„ ์ด๋ฏธ์ง€ ๋งคํ•‘
15
- emotion_to_face = {
16
- "happy": "aria_happy.png",
17
- "sad": "aria_sad.png",
18
- "angry": "aria_angry.png",
19
- "excited": "aria_excited.png",
20
- "nervous": "aria_nervous.png",
21
- "neutral": "aria_neutral.png"
22
- }
23
- situation_to_bg = {
24
- "greeting": "bg_town.jpg",
25
- "mission_start": "bg_forest_day.jpg",
26
- "enemy_detected": "bg_dungeon_dark.jpg",
27
- "spooky_location": "bg_cave.png",
28
- "farewell": "bg_sunset.jpg"
29
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # 3. ์ถœ๋ ฅ ๋ผ์ธ ํŒŒ์‹ฑ ํ•จ์ˆ˜
32
  def parse_output(text: str):
@@ -43,33 +66,58 @@ def parse_output(text: str):
43
  return results
44
 
45
  # 4. ์ด๋ฏธ์ง€ ํ•ฉ์„ฑ ํ•จ์ˆ˜
46
- def combine_images(bg_path, char_path):
47
- bg = Image.open(bg_path).convert("RGBA")
48
- char = Image.open(char_path).convert("RGBA")
49
- char = char.resize((300, 300))
50
- pos = ((bg.width - char.width) // 2, bg.height - char.height - 20)
51
- bg.paste(char, pos, char)
 
 
 
 
 
 
 
52
  return bg
53
 
54
  # 5. ์ฑ—๋ด‡ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜ (Gradio์šฉ)
55
- def character_chat(prompt):
 
 
 
56
  system_prompt = (
57
  "You are Aria, a cheerful and expressive fantasy mage."
58
  " Respond in multiple steps if needed."
59
  " Format: \"text\" (emotion: tag, situation: tag)"
60
  )
61
- full_prompt = system_prompt + "\nUser: " + prompt + "\nAria:"
62
 
63
- raw_output = llm(full_prompt, max_new_tokens=300)[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  parsed = parse_output(raw_output)
65
 
66
  result_outputs = []
67
  for i, item in enumerate(parsed):
68
- face = emotion_to_face.get(item['emotion'], "aria_neutral.png")
69
- bg = situation_to_bg.get(item['situation'], "bg_default.jpg")
70
  composite = combine_images(os.path.join("assets/bg", bg), os.path.join("assets/face", face))
71
- img_path = f"static/output_{i}.png"
72
- composite.save(img_path)
 
 
73
  result_outputs.append((item['text'], img_path))
74
 
75
  return result_outputs
@@ -83,27 +131,45 @@ with gr.Blocks(css="""
83
  .bubble-right { background-color: #d1e7ff; border-radius: 10px; padding: 10px; margin: 5px; max-width: 70%; float: right; clear: both; text-align: right; }
84
  .image-preview { margin: 5px 0; max-width: 100%; border-radius: 10px; }
85
  """) as demo:
86
- gr.Markdown("# Aria ์บ๋ฆญํ„ฐ ์ฑ—๋ด‡")
87
  with gr.Column():
88
  chat_output = gr.HTML(value="<div class='chat-box' id='chat-box'></div>")
89
  user_input = gr.Textbox(label="Say something to Aria", placeholder="Type here and press Enter")
90
 
91
  def render_chat():
92
- html = "<div class='chat-box'>"
93
  for item in chat_history:
94
  if item['role'] == 'user':
95
  html += f"<div class='bubble-right'>{item['text']}</div>"
96
  elif item['role'] == 'bot':
97
- html += f"<div class='bubble-left'>{item['text']}<br><img class='image-preview' src='{item['image']}'></div>"
98
- html += "</div>"
 
 
 
99
  return html
100
 
101
  def on_submit(user_msg):
102
  chat_history.append({"role": "user", "text": user_msg})
 
103
  bot_results = character_chat(user_msg)
104
- for text, image_path in bot_results:
105
- chat_history.append({"role": "bot", "text": text, "image": image_path})
106
- return render_chat(), ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  user_input.submit(on_submit, inputs=user_input, outputs=[chat_output, user_input])
109
 
 
5
  from PIL import Image
6
  import re, os
7
  import gradio as gr
8
+ import torch
9
 
10
  app = FastAPI()
11
 
12
  # 1. LLM ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™” (SmolLM3 ๋ชจ๋ธ)
13
+ print("[torch] is available:", torch.cuda.is_available())
14
+ print("[device] default:", torch.device("cuda" if torch.cuda.is_available() else "cpu"))
15
+ llm = pipeline("text-generation", model="HuggingFaceTB/SmolLM3-3B", device=0 if torch.cuda.is_available() else -1)
16
 
17
  # 2. ๊ฐ์ • ๋ฐ ์ƒํ™ฉ๋ณ„ ์ด๋ฏธ์ง€ ๋งคํ•‘
18
+ '''
19
+ ์ด๋ฏธ์ง€ ๋งคํ•‘ ์˜ˆ์‹œ
20
+ -----------------------------
21
+ ์ด๋ฏธ์ง€๋ฅผ ๋‹ค์Œ๊ณผ ๊ฐ™์ด ์ง‘์–ด๋„ฃ์œผ๋ฉด (./asset/face/)
22
+ happy.png
23
+ sad.png
24
+ angry.png
25
+ =>
26
+ ์ด๋Ÿฐ ๋”•์…”๋„ˆ๋ฆฌ ํ˜•์‹์œผ๋กœ ๋ฐ˜ํ™˜๋จ
27
+ {
28
+ "happy": "happy.png",
29
+ "sad": "sad.png",
30
+ "angry": "angry.png"
 
31
  }
32
+ =>
33
+ ๋ชจ๋ธ ์ถœ๋ ฅ์˜ ๊ฐ์ • ๋ถ€๋ถ„์— ๋Œ€์‘๋˜๋Š” ์ด๋ฏธ์ง€ ์ถœ๋ ฅ
34
+ '''
35
+ def load_faces(face_dir="assets/face"):
36
+ emotion_to_face = {}
37
+ for filename in os.listdir(face_dir):
38
+ if filename.endswith(".png"):
39
+ emotion = os.path.splitext(filename)[0] # 'happy.png' โ†’ 'happy'
40
+ emotion_to_face[emotion] = filename # "happy": "happy.png"
41
+ return emotion_to_face
42
+
43
+ def load_bgs(bg_dir="assets/bg"):
44
+ situation_to_bg = {}
45
+ for filename in os.listdir(bg_dir):
46
+ if filename.endswith(".png"):
47
+ emotion = os.path.splitext(filename)[0] # 'happy.png' โ†’ 'happy'
48
+ situation_to_bg[emotion] = filename # "happy": "happy.png"
49
+ return situation_to_bg
50
+
51
+ emotion_to_face = load_faces()
52
+ situation_to_bg = load_bgs()
53
 
54
  # 3. ์ถœ๋ ฅ ๋ผ์ธ ํŒŒ์‹ฑ ํ•จ์ˆ˜
55
  def parse_output(text: str):
 
66
  return results
67
 
68
  # 4. ์ด๋ฏธ์ง€ ํ•ฉ์„ฑ ํ•จ์ˆ˜
69
+ def combine_images(bg_path, face_path):
70
+ try:
71
+ bg = Image.open(bg_path).convert("RGBA")
72
+ except FileNotFoundError:
73
+ print(f"[warning] ๋ฐฐ๊ฒฝ ์ด๋ฏธ์ง€ ์—†์Œ: {bg_path}")
74
+ return None
75
+ try:
76
+ face = Image.open(face_path).convert("RGBA")
77
+ except FileNotFoundError:
78
+ print(f"[warning] ์บ๋ฆญํ„ฐ ์ด๋ฏธ์ง€ ์—†์Œ: {face_path}")
79
+ return None
80
+ # ์ด๋ฏธ์ง€ ํ•ฉ์„ฑ
81
+ bg.paste(face, (0, 0), face)
82
  return bg
83
 
84
  # 5. ์ฑ—๋ด‡ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜ (Gradio์šฉ)
85
+ '''
86
+ ์ง€๊ธˆ๊นŒ์ง€ ๋Œ€ํ™” ๋‚ด์šฉ์„ ๋ชจ๋‘ ํ”„๋กฌํ”„ํŠธ๋กœ ๋„ฃ์–ด์„œ ๋Œ€ํ™”๋‚ด์šฉ์„ ๊ธฐ์–ตํ•˜๋„๋ก ํ•จ
87
+ '''
88
+ def build_prompt(chat_history, user_msg):
89
  system_prompt = (
90
  "You are Aria, a cheerful and expressive fantasy mage."
91
  " Respond in multiple steps if needed."
92
  " Format: \"text\" (emotion: tag, situation: tag)"
93
  )
 
94
 
95
+ dialogue = ""
96
+ for item in chat_history:
97
+ if item["role"] == "user":
98
+ dialogue += f"User: {item['text']}\n"
99
+ elif item["role"] == "bot":
100
+ dialogue += f"Aria: {item['text']}\n"
101
+
102
+ dialogue += f"User: {user_msg}\nAria:"
103
+ return system_prompt + "\n" + dialogue
104
+
105
+ def character_chat(prompt):
106
+ full_prompt = build_prompt(chat_history, prompt)
107
+
108
+ #raw_output = llm(full_prompt, max_new_tokens=300)[0]['generated_text']
109
+ raw_output = '"์šฐ์˜ค์•„" (emotion: tag, situation: tag)'
110
  parsed = parse_output(raw_output)
111
 
112
  result_outputs = []
113
  for i, item in enumerate(parsed):
114
+ face = emotion_to_face.get(item['emotion'], "neutral.png")
115
+ bg = situation_to_bg.get(item['situation'], "default.jpg")
116
  composite = combine_images(os.path.join("assets/bg", bg), os.path.join("assets/face", face))
117
+ img_path = None #์ด๋ฏธ์ง€๊ฐ€ ์—†์œผ๋ฉด ์ถœ๋ ฅ ์•ˆํ•จ
118
+ if composite:
119
+ img_path = f"static/output_{i}.png"
120
+ composite.save(img_path)
121
  result_outputs.append((item['text'], img_path))
122
 
123
  return result_outputs
 
131
  .bubble-right { background-color: #d1e7ff; border-radius: 10px; padding: 10px; margin: 5px; max-width: 70%; float: right; clear: both; text-align: right; }
132
  .image-preview { margin: 5px 0; max-width: 100%; border-radius: 10px; }
133
  """) as demo:
134
+ gr.Markdown("์ฑ—๋ด‡")
135
  with gr.Column():
136
  chat_output = gr.HTML(value="<div class='chat-box' id='chat-box'></div>")
137
  user_input = gr.Textbox(label="Say something to Aria", placeholder="Type here and press Enter")
138
 
139
  def render_chat():
140
+ html = ""
141
  for item in chat_history:
142
  if item['role'] == 'user':
143
  html += f"<div class='bubble-right'>{item['text']}</div>"
144
  elif item['role'] == 'bot':
145
+ bubble = f"<div class='bubble-left'>{item['text']}"
146
+ if 'image' in item and item['image']:
147
+ bubble += f"<br><img class='image-preview' src='{item['image']}'>"
148
+ bubble += "</div>"
149
+ html += bubble
150
  return html
151
 
152
  def on_submit(user_msg):
153
  chat_history.append({"role": "user", "text": user_msg})
154
+
155
  bot_results = character_chat(user_msg)
156
+
157
+ for item in bot_results:
158
+ try:
159
+ text, image_path = item # unpack ์‹œ๋„
160
+ except (ValueError, TypeError):
161
+ # unpack ์•ˆ๋˜๋ฉด ๊ธฐ๋ณธ๊ฐ’ ์ฒ˜๋ฆฌ (์ด๋ฏธ์ง€ ์—†์ด)
162
+ text = str(item)
163
+ image_path = None
164
+
165
+ chat_entry = {"role": "bot", "text": text}
166
+ if image_path:
167
+ chat_entry["image"] = image_path
168
+
169
+ chat_history.append(chat_entry)
170
+
171
+ new_chat_html = render_chat()
172
+ return f"<div class='chat-box' id='chat-box'>{new_chat_html}</div>", ""
173
 
174
  user_input.submit(on_submit, inputs=user_input, outputs=[chat_output, user_input])
175
 
requirements.txt ADDED
Binary file (2.26 kB). View file
 
todo.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ๐Ÿ’ก ์ถ”๊ฐ€ ํŒ
2
+ ํ…์ŠคํŠธ๋ฅผ ์ „๋ถ€ ์ €์žฅํ•ด์„œ prompt์— ๋ˆ„์ ํ•  ์ˆ˜๋„ ์žˆ์ง€๋งŒ,
3
+ ๋„ˆ๋ฌด ๊ธธ์–ด์ง€๋ฉด ์ด์ „ ๋‚ด์šฉ์„ ์š”์•ฝํ•˜๊ฑฐ๋‚˜, ์ค‘์š”ํ•œ ๋ฐœ์–ธ๋งŒ ๋‚จ๊ธฐ๋Š” ์š”์•ฝ ๊ธฐ์–ต ๋ฐฉ์‹(memory compression) ๋„ ๊ณ ๋ คํ•ด๋ณผ ์ˆ˜ ์žˆ์–ด์š”.
4
+ ํ•„์š”ํ•˜๋ฉด ์š”์•ฝ ๊ธฐ์–ต ๋ฐฉ์‹๋„ ๋„์™€๋“œ๋ฆด๊ฒŒ์š”!