milwright commited on
Commit
56287e6
·
1 Parent(s): 07a676b

Add local LLM integration support

Browse files

- Add support for local LLM servers (e.g., LM Studio) on port 1234
- Enable local mode via URL parameter (?local=true)
- Implement automatic response cleaning for local LLM output artifacts
- Add test scripts for validating local LLM integration
- Update documentation with local LLM setup instructions
- No API key required for local mode operation

.claude/settings.local.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(git checkout:*)",
5
+ "Bash(cp:*)",
6
+ "Bash(rm:*)",
7
+ "Bash(git commit:*)",
8
+ "Bash(git push:*)",
9
+ "Bash(git add:*)",
10
+ "Bash(grep:*)",
11
+ "Bash(node:*)"
12
+ ],
13
+ "deny": []
14
+ }
15
+ }
CLAUDE.local.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ - DO NOT SIGN OFF COMMIT MESSAGES WITH CLAUDE AS AN AUTHOR
2
+ - Remember to review this file at the end of prompt engineering related changes or when the user tells you to or at the end of a long session. If changes have been made to OpenRouter prompt language or programming logic, low or high level, then update this file accordingly.
README.md CHANGED
@@ -37,7 +37,7 @@ An interactive cloze reading practice application with AI-powered assistance. Pr
37
 
38
  ## Technology
39
 
40
- Built with vanilla JavaScript, powered by AI for intelligent word selection and contextual assistance.
41
 
42
  ## Running Locally with Docker
43
 
@@ -60,6 +60,31 @@ To run the Cloze Reader application locally using Docker:
60
  - Docker installed on your system
61
  - Port 7860 available on your machine
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  ## Architecture
64
  This is a **vanilla JavaScript modular application** with no build step. Key architectural patterns:
65
 
 
37
 
38
  ## Technology
39
 
40
+ Built with vanilla JavaScript, powered by AI for intelligent word selection and contextual assistance. Supports both OpenRouter API and local LLM integration.
41
 
42
  ## Running Locally with Docker
43
 
 
60
  - Docker installed on your system
61
  - Port 7860 available on your machine
62
 
63
+ ## Local LLM Integration
64
+
65
+ The `local-llm-integration` branch adds support for running with a local LLM server instead of OpenRouter API:
66
+
67
+ ### Setup
68
+ 1. **Start your local LLM server** on port 1234 (e.g., using LM Studio with Gemma-3-12b)
69
+ 2. **Run the development server**:
70
+ ```bash
71
+ make dev # or python3 local-server.py 8000
72
+ ```
73
+ 3. **Access with local LLM**:
74
+ - Navigate to `http://localhost:8000/index.html?local=true`
75
+ - The `?local=true` parameter switches from OpenRouter to your local LLM
76
+
77
+ ### Local LLM Features
78
+ - **No API key required** - works entirely offline with your local model
79
+ - **Automatic response cleaning** - handles local LLM output artifacts
80
+ - **Compatible with LM Studio** and other OpenAI-compatible local servers
81
+ - **Same game experience** - all features work identically to cloud version
82
+
83
+ ### Testing Local Integration
84
+ - Test page: `http://localhost:8000/test-local-llm.html?local=true`
85
+ - Stress test script: `node test-local-llm.js`
86
+ - Direct integration test available in test files
87
+
88
  ## Architecture
89
  This is a **vanilla JavaScript modular application** with no build step. Key architectural patterns:
90
 
src/aiService.js CHANGED
@@ -1,11 +1,31 @@
1
  class OpenRouterService {
2
  constructor() {
3
- this.apiUrl = 'https://openrouter.ai/api/v1/chat/completions';
 
 
4
  this.apiKey = this.getApiKey();
5
- this.model = 'google/gemma-3-27b-it:free';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
7
 
8
  getApiKey() {
 
 
 
 
9
  if (typeof process !== 'undefined' && process.env && process.env.OPENROUTER_API_KEY) {
10
  return process.env.OPENROUTER_API_KEY;
11
  }
@@ -51,14 +71,20 @@ class OpenRouterService {
51
  }
52
 
53
  try {
 
 
 
 
 
 
 
 
 
 
 
54
  const response = await fetch(this.apiUrl, {
55
  method: 'POST',
56
- headers: {
57
- 'Content-Type': 'application/json',
58
- 'Authorization': `Bearer ${this.apiKey}`,
59
- 'HTTP-Referer': window.location.origin,
60
- 'X-Title': 'Cloze Reader'
61
- },
62
  body: JSON.stringify({
63
  model: this.model,
64
  messages: [{
@@ -200,11 +226,35 @@ Passage: "${passage}"`
200
  throw new Error('API response missing expected content');
201
  }
202
 
203
- const content = data.choices[0].message.content.trim();
 
 
 
 
 
204
 
205
  // Try to parse as JSON array
206
  try {
207
- const words = JSON.parse(content);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  if (Array.isArray(words)) {
209
  // Filter problematic words and validate word lengths based on level
210
  const problematicWords = ['negro', 'retard', 'retarded', 'nigger', 'chinaman', 'jap', 'gypsy', 'savage', 'primitive', 'heathen'];
@@ -302,14 +352,20 @@ Passage: "${passage}"`
302
  const controller = new AbortController();
303
  const timeoutId = setTimeout(() => controller.abort(), 15000); // 15 second timeout
304
 
 
 
 
 
 
 
 
 
 
 
 
305
  const response = await fetch(this.apiUrl, {
306
  method: 'POST',
307
- headers: {
308
- 'Content-Type': 'application/json',
309
- 'Authorization': `Bearer ${this.apiKey}`,
310
- 'HTTP-Referer': window.location.origin,
311
- 'X-Title': 'Cloze Reader'
312
- },
313
  signal: controller.signal,
314
  body: JSON.stringify({
315
  model: this.model,
@@ -604,6 +660,16 @@ Return as JSON: {"passage1": {...}, "passage2": {...}}`
604
  }
605
  }
606
 
 
 
 
 
 
 
 
 
 
 
607
  }
608
 
609
  export { OpenRouterService as AIService };
 
1
  class OpenRouterService {
2
  constructor() {
3
+ // Check for local LLM mode
4
+ this.isLocalMode = this.checkLocalMode();
5
+ this.apiUrl = this.isLocalMode ? 'http://localhost:1234/v1/chat/completions' : 'https://openrouter.ai/api/v1/chat/completions';
6
  this.apiKey = this.getApiKey();
7
+ this.model = this.isLocalMode ? 'gemma-3-12b' : 'google/gemma-3-27b-it:free';
8
+
9
+ console.log('AI Service initialized:', {
10
+ mode: this.isLocalMode ? 'Local LLM' : 'OpenRouter',
11
+ url: this.apiUrl,
12
+ model: this.model
13
+ });
14
+ }
15
+
16
+ checkLocalMode() {
17
+ if (typeof window !== 'undefined' && window.location) {
18
+ const urlParams = new URLSearchParams(window.location.search);
19
+ return urlParams.get('local') === 'true';
20
+ }
21
+ return false;
22
  }
23
 
24
  getApiKey() {
25
+ // Local mode doesn't need API key
26
+ if (this.isLocalMode) {
27
+ return 'local-mode-no-key';
28
+ }
29
  if (typeof process !== 'undefined' && process.env && process.env.OPENROUTER_API_KEY) {
30
  return process.env.OPENROUTER_API_KEY;
31
  }
 
71
  }
72
 
73
  try {
74
+ const headers = {
75
+ 'Content-Type': 'application/json'
76
+ };
77
+
78
+ // Only add auth headers for OpenRouter
79
+ if (!this.isLocalMode) {
80
+ headers['Authorization'] = `Bearer ${this.apiKey}`;
81
+ headers['HTTP-Referer'] = window.location.origin;
82
+ headers['X-Title'] = 'Cloze Reader';
83
+ }
84
+
85
  const response = await fetch(this.apiUrl, {
86
  method: 'POST',
87
+ headers,
 
 
 
 
 
88
  body: JSON.stringify({
89
  model: this.model,
90
  messages: [{
 
226
  throw new Error('API response missing expected content');
227
  }
228
 
229
+ let content = data.choices[0].message.content.trim();
230
+
231
+ // Clean up local LLM artifacts
232
+ if (this.isLocalMode) {
233
+ content = this.cleanLocalLLMResponse(content);
234
+ }
235
 
236
  // Try to parse as JSON array
237
  try {
238
+ let words;
239
+
240
+ // For local LLM, try different parsing strategies
241
+ if (this.isLocalMode) {
242
+ // Try JSON parse first
243
+ try {
244
+ words = JSON.parse(content);
245
+ } catch {
246
+ // If not JSON, try comma-separated
247
+ if (content.includes(',')) {
248
+ words = content.split(',').map(w => w.trim());
249
+ } else {
250
+ // Single word
251
+ words = [content.trim()];
252
+ }
253
+ }
254
+ } else {
255
+ words = JSON.parse(content);
256
+ }
257
+
258
  if (Array.isArray(words)) {
259
  // Filter problematic words and validate word lengths based on level
260
  const problematicWords = ['negro', 'retard', 'retarded', 'nigger', 'chinaman', 'jap', 'gypsy', 'savage', 'primitive', 'heathen'];
 
352
  const controller = new AbortController();
353
  const timeoutId = setTimeout(() => controller.abort(), 15000); // 15 second timeout
354
 
355
+ const headers = {
356
+ 'Content-Type': 'application/json'
357
+ };
358
+
359
+ // Only add auth headers for OpenRouter
360
+ if (!this.isLocalMode) {
361
+ headers['Authorization'] = `Bearer ${this.apiKey}`;
362
+ headers['HTTP-Referer'] = window.location.origin;
363
+ headers['X-Title'] = 'Cloze Reader';
364
+ }
365
+
366
  const response = await fetch(this.apiUrl, {
367
  method: 'POST',
368
+ headers,
 
 
 
 
 
369
  signal: controller.signal,
370
  body: JSON.stringify({
371
  model: this.model,
 
660
  }
661
  }
662
 
663
+ cleanLocalLLMResponse(content) {
664
+ // Remove common artifacts from local LLM responses
665
+ return content
666
+ .replace(/\["?/g, '') // Remove opening bracket and quote
667
+ .replace(/"?\]/g, '') // Remove closing quote and bracket
668
+ .replace(/^[>"|']+/g, '') // Remove leading > or quotes
669
+ .replace(/[>"|']+$/g, '') // Remove trailing > or quotes
670
+ .replace(/\\n/g, ' ') // Replace escaped newlines
671
+ .trim();
672
+ }
673
  }
674
 
675
  export { OpenRouterService as AIService };
test-direct.js ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { AIService } from './src/aiService.js';
2
+
3
+ // Force local mode
4
+ const originalSearch = window.location.search;
5
+ window.location.search = '?local=true';
6
+
7
+ const ai = new AIService();
8
+
9
+ console.log('Testing direct AI connection...');
10
+ console.log('Config:', {
11
+ url: ai.apiUrl,
12
+ model: ai.model,
13
+ isLocal: ai.isLocalMode
14
+ });
15
+
16
+ const testPassage = "The ancient library contained thousands of manuscripts, each one carefully preserved by generations of scholars who dedicated their lives to knowledge.";
17
+
18
+ try {
19
+ console.log('\nTesting word selection...');
20
+ const words = await ai.selectSignificantWords(testPassage, 2, 3);
21
+ console.log('Selected words:', words);
22
+ console.log('✅ Success!');
23
+ } catch (error) {
24
+ console.error('❌ Error:', error.message);
25
+ }
26
+
27
+ // Restore original search
28
+ window.location.search = originalSearch;
test-local-llm.js ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ // Stress test for local LLM on port 1234
4
+ // Tests word selection functionality with Gutenberg passages
5
+
6
+ import http from 'http';
7
+
8
+ // Sample Gutenberg passages for testing
9
+ const testPassages = [
10
+ "The sun was shining brightly on the sea, shining with all his might. He did his very best to make the billows smooth and bright. And this was odd, because it was the middle of the night.",
11
+ "It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity.",
12
+ "In a hole in the ground there lived a hobbit. Not a nasty, dirty, wet hole, filled with the ends of worms and an oozy smell, nor yet a dry, bare, sandy hole with nothing in it to sit down on.",
13
+ "Call me Ishmael. Some years ago—never mind how long precisely—having little or no money in my purse, and nothing particular to interest me on shore, I thought I would sail about a little.",
14
+ "It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife. However little known the feelings or views of such a man may be."
15
+ ];
16
+
17
+ // Word selection prompt template (based on cloze reader's format)
18
+ function createWordSelectionPrompt(passage, level = 1) {
19
+ const wordCount = level < 6 ? 1 : level < 11 ? 2 : 3;
20
+ const minLength = level < 3 ? 4 : 5;
21
+ const maxLength = level < 3 ? 7 : level < 5 ? 10 : 14;
22
+
23
+ return {
24
+ model: "gemma-3-12b",
25
+ messages: [
26
+ {
27
+ role: "system",
28
+ content: "You are a vocabulary expert who selects appropriate words for cloze exercises."
29
+ },
30
+ {
31
+ role: "user",
32
+ content: `Select ${wordCount} word${wordCount > 1 ? 's' : ''} from this passage for a cloze exercise.
33
+
34
+ Passage: "${passage}"
35
+
36
+ Requirements:
37
+ - Select exactly ${wordCount} different word${wordCount > 1 ? 's' : ''}
38
+ - Each word must be ${minLength}-${maxLength} letters long
39
+ - Words must be meaningful nouns, verbs, adjectives, or adverbs
40
+ - Avoid pronouns, articles, and common words
41
+ - Return ONLY the selected word${wordCount > 1 ? 's' : ''}, ${wordCount > 1 ? 'comma-separated' : 'nothing else'}
42
+
43
+ Selected word${wordCount > 1 ? 's' : ''}:`
44
+ }
45
+ ],
46
+ temperature: 0.7,
47
+ max_tokens: 50
48
+ };
49
+ }
50
+
51
+ // Function to make HTTP request to local LLM
52
+ function testLLMConnection(passage, testNumber) {
53
+ return new Promise((resolve, reject) => {
54
+ const prompt = createWordSelectionPrompt(passage, Math.floor(Math.random() * 10) + 1);
55
+ const data = JSON.stringify(prompt);
56
+
57
+ const options = {
58
+ hostname: 'localhost',
59
+ port: 1234,
60
+ path: '/v1/chat/completions',
61
+ method: 'POST',
62
+ headers: {
63
+ 'Content-Type': 'application/json',
64
+ 'Content-Length': data.length
65
+ }
66
+ };
67
+
68
+ console.log(`\n=== Test ${testNumber} ===`);
69
+ console.log(`Passage: "${passage.substring(0, 80)}..."`);
70
+ console.log(`Sending request to http://localhost:1234/v1/chat/completions`);
71
+
72
+ const startTime = Date.now();
73
+
74
+ const req = http.request(options, (res) => {
75
+ let responseData = '';
76
+
77
+ res.on('data', (chunk) => {
78
+ responseData += chunk;
79
+ });
80
+
81
+ res.on('end', () => {
82
+ const elapsed = Date.now() - startTime;
83
+ console.log(`Response received in ${elapsed}ms`);
84
+ console.log(`Status: ${res.statusCode}`);
85
+
86
+ try {
87
+ const parsed = JSON.parse(responseData);
88
+ if (parsed.choices && parsed.choices[0] && parsed.choices[0].message) {
89
+ const selectedWords = parsed.choices[0].message.content.trim();
90
+ console.log(`Selected words: ${selectedWords}`);
91
+ console.log(`✓ Test ${testNumber} PASSED`);
92
+ resolve({ success: true, words: selectedWords, time: elapsed });
93
+ } else {
94
+ console.log(`Response structure unexpected:`, parsed);
95
+ resolve({ success: false, error: 'Invalid response structure', time: elapsed });
96
+ }
97
+ } catch (error) {
98
+ console.log(`Failed to parse response:`, error.message);
99
+ console.log(`Raw response:`, responseData.substring(0, 200));
100
+ resolve({ success: false, error: error.message, time: elapsed });
101
+ }
102
+ });
103
+ });
104
+
105
+ req.on('error', (error) => {
106
+ const elapsed = Date.now() - startTime;
107
+ console.log(`✗ Test ${testNumber} FAILED - Connection error after ${elapsed}ms`);
108
+ console.log(`Error: ${error.message}`);
109
+ resolve({ success: false, error: error.message, time: elapsed });
110
+ });
111
+
112
+ req.write(data);
113
+ req.end();
114
+ });
115
+ }
116
+
117
+ // Run stress test
118
+ async function runStressTest() {
119
+ console.log('Starting stress test for Gemma-3-12b on localhost:1234');
120
+ console.log('Testing word selection for cloze reader game...\n');
121
+
122
+ const results = [];
123
+
124
+ // Test each passage
125
+ for (let i = 0; i < testPassages.length; i++) {
126
+ const result = await testLLMConnection(testPassages[i], i + 1);
127
+ results.push(result);
128
+
129
+ // Small delay between tests
130
+ await new Promise(resolve => setTimeout(resolve, 500));
131
+ }
132
+
133
+ // Summary
134
+ console.log('\n=== STRESS TEST SUMMARY ===');
135
+ const successful = results.filter(r => r.success).length;
136
+ const failed = results.length - successful;
137
+ const avgTime = results.reduce((sum, r) => sum + r.time, 0) / results.length;
138
+
139
+ console.log(`Total tests: ${results.length}`);
140
+ console.log(`Successful: ${successful}`);
141
+ console.log(`Failed: ${failed}`);
142
+ console.log(`Average response time: ${avgTime.toFixed(0)}ms`);
143
+ console.log(`Success rate: ${(successful / results.length * 100).toFixed(1)}%`);
144
+
145
+ if (successful === results.length) {
146
+ console.log('\n✓ All tests passed! The Gemma-3-12b server is functioning correctly for cloze reader.');
147
+ } else if (successful > 0) {
148
+ console.log('\n⚠ Some tests passed. The server is partially functional.');
149
+ } else {
150
+ console.log('\n✗ All tests failed. Please check if the server is running on port 1234.');
151
+ }
152
+ }
153
+
154
+ // Run the test
155
+ runStressTest().catch(console.error);
test-prompts-lm-studio.md ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gemma-3-27b Model Integration Guide for Cloze Reader
2
+
3
+ ## Part 1: Step-by-Step API Request Processing
4
+
5
+ ### 1. Initial Request Reception
6
+ When the Cloze Reader application makes an API request through OpenRouter:
7
+
8
+ 1. **Authentication**: Verify Bearer token from `Authorization` header
9
+ 2. **Request Type Detection**: Identify the operation type based on prompt content
10
+ 3. **Parameter Extraction**: Parse temperature, max_tokens, and message content
11
+ 4. **Rate Limiting Check**: Ensure request complies with free tier limits
12
+
13
+ ### 2. Word Selection Request Processing
14
+
15
+ **When Temperature = 0.3 and prompt contains "CLOZE DELETION PRINCIPLES":**
16
+
17
+ 1. **Parse Passage**: Extract the text passage from the system message
18
+ 2. **Identify Difficulty Level**:
19
+ - Level 1-2: Target 4-7 letter words (easy vocabulary)
20
+ - Level 3-4: Target 4-10 letter words (medium difficulty)
21
+ - Level 5+: Target 5-14 letter words (challenging vocabulary)
22
+ 3. **Select Words**:
23
+ - Identify significant vocabulary words (nouns, verbs, adjectives, adverbs)
24
+ - Avoid proper nouns, numbers, articles, and function words
25
+ - Ensure words are contextually important for comprehension
26
+ 4. **Format Response**: Return JSON array of selected words
27
+ 5. **Validate**: Ensure all words exist in the original passage
28
+
29
+ ### 3. Batch Processing Request
30
+
31
+ **When Temperature = 0.5 and prompt contains two passages:**
32
+
33
+ 1. **Parse Both Passages**: Extract passage1 and passage2 from the prompt
34
+ 2. **Process Each Passage**:
35
+ - Apply word selection logic for each based on difficulty level
36
+ - Generate one-sentence contextualization for each book
37
+ 3. **Format Response**: Return structured JSON with both passages' data
38
+ 4. **Ensure Consistency**: Words must match exactly as they appear in passages
39
+
40
+ ### 4. Contextualization Request
41
+
42
+ **When Temperature = 0.2 and prompt asks for book context:**
43
+
44
+ 1. **Extract Book Information**: Parse title and author from prompt
45
+ 2. **Generate Context**: Create one factual sentence about:
46
+ - Type of work (novel, short story, essay)
47
+ - Historical period when written
48
+ - Literary significance or genre
49
+ 3. **Keep Concise**: Limit to 80 tokens maximum
50
+ 4. **Avoid Speculation**: Only include verifiable information
51
+
52
+ ### 5. Chat Hint Request
53
+
54
+ **When Temperature = 0.6 and prompt includes "word puzzles":**
55
+
56
+ 1. **Identify Question Type**:
57
+ - `part_of_speech`: Grammar category identification
58
+ - `sentence_role`: Function in the sentence
59
+ - `word_category`: Abstract/concrete classification
60
+ - `synonym`: Alternative word suggestion
61
+ 2. **Parse Target Word**: Extract the hidden word (NEVER reveal it)
62
+ 3. **Generate Appropriate Hint**:
63
+ - Follow exact format requested
64
+ - Stay within 50 token limit
65
+ - Use plain text only, no formatting
66
+ 4. **Validate**: Ensure hint doesn't contain or spell out the target word
67
+
68
+ ### 6. Response Formatting Rules
69
+
70
+ 1. **JSON Responses**:
71
+ - Word selection: Clean array format `["word1", "word2"]`
72
+ - Batch processing: Nested object structure
73
+ - No markdown code blocks unless specifically requested
74
+
75
+ 2. **Text Responses**:
76
+ - Contextualization: Single sentence, no formatting
77
+ - Chat hints: Plain text, follows exact format requested
78
+
79
+ 3. **Error Handling**:
80
+ - Invalid requests: Return graceful error messages
81
+ - Missing parameters: Use sensible defaults
82
+ - Malformed input: Attempt to parse intent
83
+
84
+ ## Part 2: LM Studio Testing Configuration
85
+
86
+ ### System Prompt
87
+ ```
88
+ You are a specialized AI assistant for the Cloze Reader educational application. You help create vocabulary exercises by selecting appropriate words from text passages and providing contextual hints without revealing answers. Always respond in the exact format requested, using plain JSON or text as specified. Never use markdown formatting unless explicitly requested.
89
+ ```
90
+
91
+ ### Temperature Settings
92
+ - **Word Selection**: 0.3
93
+ - **Batch Processing**: 0.5
94
+ - **Contextualization**: 0.2
95
+ - **Chat Hints**: 0.6
96
+
97
+ ### Response Length Limits
98
+ - **Word Selection**: 100 tokens
99
+ - **Batch Processing**: 800 tokens
100
+ - **Contextualization**: 80 tokens
101
+ - **Chat Hints**: 50 tokens
102
+
103
+ ### Test Prompts
104
+
105
+ #### 1. Word Selection Test (Level 1-2 Easy)
106
+ ```json
107
+ {
108
+ "messages": [
109
+ {
110
+ "role": "system",
111
+ "content": "CLOZE DELETION PRINCIPLES:\n- Select words that require understanding context and vocabulary to identify\n- Choose words essential for comprehension that test language ability\n- Target words where deletion creates meaningful cognitive gaps\n\nFrom the following passage, select exactly 1 word that is important for reading comprehension.\n\nDifficulty level 1-2: Focus on easier vocabulary (4-7 letters) like common nouns, simple verbs, and basic adjectives.\n\nRETURN ONLY A JSON ARRAY OF YOUR SELECTED WORDS. Select words that appear EXACTLY as written in the passage.\n\nPassage:\nThe old woman lived in a small cottage by the forest. Every morning, she would walk to the village market to buy fresh bread."
112
+ }
113
+ ],
114
+ "temperature": 0.3,
115
+ "max_tokens": 100
116
+ }
117
+ ```
118
+
119
+ **Expected Output Schema:**
120
+ ```json
121
+ {
122
+ "type": "array",
123
+ "items": {
124
+ "type": "string",
125
+ "minLength": 4,
126
+ "maxLength": 7
127
+ },
128
+ "minItems": 1,
129
+ "maxItems": 1
130
+ }
131
+ ```
132
+
133
+ #### 2. Batch Processing Test (Level 3-4 Medium)
134
+ ```json
135
+ {
136
+ "messages": [
137
+ {
138
+ "role": "system",
139
+ "content": "Process these two passages for a cloze reading exercise:\n\nPASSAGE 1 (Pride and Prejudice by Jane Austen):\nIt is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife.\n\nPASSAGE 2 (A Tale of Two Cities by Charles Dickens):\nIt was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness.\n\nFor each passage:\n1. Select 1 word for difficulty level 3-4 (medium vocabulary, 4-10 letters)\n2. Write ONE sentence about the book/author\n\nReturn a JSON object with this structure:\n{\n \"passage1\": {\n \"words\": [selected words],\n \"context\": \"One sentence about the book\"\n },\n \"passage2\": {\n \"words\": [selected words],\n \"context\": \"One sentence about the book\"\n }\n}"
140
+ }
141
+ ],
142
+ "temperature": 0.5,
143
+ "max_tokens": 800
144
+ }
145
+ ```
146
+
147
+ **Expected Output Schema:**
148
+ ```json
149
+ {
150
+ "type": "object",
151
+ "properties": {
152
+ "passage1": {
153
+ "type": "object",
154
+ "properties": {
155
+ "words": {
156
+ "type": "array",
157
+ "items": { "type": "string" },
158
+ "minItems": 1,
159
+ "maxItems": 1
160
+ },
161
+ "context": {
162
+ "type": "string",
163
+ "maxLength": 150
164
+ }
165
+ },
166
+ "required": ["words", "context"]
167
+ },
168
+ "passage2": {
169
+ "type": "object",
170
+ "properties": {
171
+ "words": {
172
+ "type": "array",
173
+ "items": { "type": "string" },
174
+ "minItems": 1,
175
+ "maxItems": 1
176
+ },
177
+ "context": {
178
+ "type": "string",
179
+ "maxLength": 150
180
+ }
181
+ },
182
+ "required": ["words", "context"]
183
+ }
184
+ },
185
+ "required": ["passage1", "passage2"]
186
+ }
187
+ ```
188
+
189
+ #### 3. Contextualization Test
190
+ ```json
191
+ {
192
+ "messages": [
193
+ {
194
+ "role": "user",
195
+ "content": "Write one factual sentence about 'The Adventures of Sherlock Holmes' by Arthur Conan Doyle. Focus on what type of work it is, when it was written, or its historical significance. Keep it under 20 words and conversational."
196
+ }
197
+ ],
198
+ "temperature": 0.2,
199
+ "max_tokens": 80
200
+ }
201
+ ```
202
+
203
+ **Expected Output:** Plain text string, no JSON structure required.
204
+
205
+ #### 4. Chat Hint Test (Part of Speech)
206
+ ```json
207
+ {
208
+ "messages": [
209
+ {
210
+ "role": "system",
211
+ "content": "You provide clues for word puzzles. You will be told the target word that players need to guess, but you must NEVER mention, spell, or reveal that word in your response. Follow the EXACT format requested. Be concise and direct about the target word without revealing it. Use plain text only - no bold, italics, asterisks, or markdown formatting. Stick to word limits."
212
+ },
213
+ {
214
+ "role": "user",
215
+ "content": "The target word is 'walked'. The sentence is: 'Every morning, she would _____ to the village market to buy fresh bread.'\n\nQuestion type: part_of_speech\n\nIdentify what part of speech fits in this blank. Answer in 2-5 words. Format: 'It's a/an [part of speech]'"
216
+ }
217
+ ],
218
+ "temperature": 0.6,
219
+ "max_tokens": 50
220
+ }
221
+ ```
222
+
223
+ **Expected Output:** Plain text following format "It's a/an [part of speech]"
224
+
225
+ #### 5. Chat Hint Test (Synonym)
226
+ ```json
227
+ {
228
+ "messages": [
229
+ {
230
+ "role": "system",
231
+ "content": "You provide clues for word puzzles. You will be told the target word that players need to guess, but you must NEVER mention, spell, or reveal that word in your response. Follow the EXACT format requested. Be concise and direct about the target word without revealing it. Use plain text only - no bold, italics, asterisks, or markdown formatting. Stick to word limits."
232
+ },
233
+ {
234
+ "role": "user",
235
+ "content": "The target word is 'cottage'. The sentence is: 'The old woman lived in a small _____ by the forest.'\n\nQuestion type: synonym\n\nSuggest a different word that could replace the blank. Answer in 1-3 words only."
236
+ }
237
+ ],
238
+ "temperature": 0.6,
239
+ "max_tokens": 50
240
+ }
241
+ ```
242
+
243
+ **Expected Output:** Plain text with 1-3 word synonym
244
+
245
+ ### LM Studio Configuration
246
+
247
+ 1. **Model Selection**: Load gemma-3-27b or equivalent model
248
+ 2. **Context Length**: Set to at least 4096 tokens
249
+ 3. **GPU Layers**: Maximize based on available VRAM
250
+ 4. **Batch Size**: 512 for optimal performance
251
+ 5. **Prompt Format**: Use ChatML or model's native format
252
+
253
+ ### Testing Checklist
254
+
255
+ - [ ] Verify JSON responses are clean (no markdown blocks)
256
+ - [ ] Check word selections match passage exactly
257
+ - [ ] Ensure hints never reveal target words
258
+ - [ ] Validate response stays within token limits
259
+ - [ ] Test difficulty level word length constraints
260
+ - [ ] Confirm batch processing handles both passages
261
+ - [ ] Verify contextualization produces factual content
262
+ - [ ] Test all four hint question types