en-ml-transliteration

Running

App Files Files Community

kavyamanohar commited on Dec 16, 2024

Commit

998926e

verified ·

1 Parent(s): 969634e

Create app.py

Browse files

Files changed (1) hide show

app.py +116 -0

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import gradio as gr
+import numpy as np
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+import re
+from huggingface_hub import from_pretrained_keras
+# Load the model from Hugging Face
+model = from_pretrained_keras("vrclc/transliteration")
+# Define source and target tokenizers
+source_tokens = list('abcdefghijklmnopqrstuvwxyz ')
+source_tokenizer = Tokenizer(char_level=True, filters='')
+source_tokenizer.fit_on_texts(source_tokens)
+malayalam_tokens = [
+    # Independent vowels
+    'അ', 'ആ', 'ഇ', 'ഈ', 'ഉ', 'ഊ', 'ഋ', 'ൠ', 'ഌ', 'ൡ', 'എ', 'ഏ', 'ഐ', 'ഒ', 'ഓ', 'ഔ',
+    # Consonants
+    'ക', 'ഖ', 'ഗ', 'ഘ', 'ങ', 'ച', 'ഛ', 'ജ', 'ഝ', 'ഞ',
+    'ട', 'ഠ', 'ഡ', 'ഢ', 'ണ', 'ത', 'ഥ', 'ദ', 'ധ', 'ന',
+    'പ', 'ഫ', 'ബ', 'ഭ', 'മ', 'യ', 'ര', 'ല', 'വ', 'ശ',
+    'ഷ', 'സ', 'ഹ', 'ള', 'ഴ', 'റ',
+    # Chillu letters
+    'ൺ', 'ൻ', 'ർ', 'ൽ', 'ൾ',
+    # Additional characters
+    'ം', 'ഃ', '്',
+    # Vowel modifiers / Signs
+    'ാ', 'ി', 'ീ', 'ു', 'ൂ', 'ൃ', 'ൄ', 'െ', 'േ', 'ൈ', 'ൊ', 'ോ', 'ൌ', 'ൗ', ' '
+]
+# Create tokenizer for Malayalam tokens
+target_tokenizer = Tokenizer(char_level=True, filters='')
+target_tokenizer.fit_on_texts(malayalam_tokens)
+# Get max sequence length from the model
+max_seq_length = model.get_layer("encoder_input").input_shape[0][1]
+def transliterate_with_split_tokens(input_text, model, source_tokenizer, target_tokenizer, max_seq_length):
+    """
+    Transliterates input text, preserving non-token characters.
+    """
+    # Handle empty input
+    if not input_text:
+        return ""
+    # Regular expression to split the text into tokens and non-tokens
+    tokens_and_non_tokens = re.findall(r"([a-zA-Z]+)|([^a-zA-Z]+)", input_text)
+    transliterated_text = ""
+    for token_or_non_token in tokens_and_non_tokens:
+        token = token_or_non_token[0]
+        non_token = token_or_non_token[1]
+        if token:
+            # Convert to lowercase to handle mixed case
+            token = token.lower()
+            input_sequence = source_tokenizer.texts_to_sequences([token])[0]
+            input_sequence_padded = pad_sequences([input_sequence], maxlen=max_seq_length, padding='post')
+            predicted_sequence = model.predict(input_sequence_padded)
+            predicted_indices = np.argmax(predicted_sequence, axis=-1)[0]
+            transliterated_word = ''.join([target_tokenizer.index_word[idx] for idx in predicted_indices if idx != 0])
+            transliterated_text += transliterated_word
+        elif non_token:
+            transliterated_text += non_token
+    return transliterated_text
+# Create Gradio interface with enhanced features
+def create_transliteration_interface():
+    # Define input and output components with more details
+    input_textbox = gr.Textbox(
+        lines=3,
+        placeholder="Enter English text to transliterate to Malayalam...",
+        label="Input Text"
+    )
+    output_textbox = gr.Textbox(
+        lines=3,
+        label="Transliterated Malayalam Text"
+    )
+    # Create the Gradio interface with more comprehensive configuration
+    interface = gr.Interface(
+        fn=transliterate_with_split_tokens,
+        inputs=[
+            gr.Textbox(
+                lines=3,
+                placeholder="Enter English text to transliterate to Malayalam...",
+                label="Input Text"
+            )
+        ],
+        outputs=[
+            gr.Textbox(
+                lines=3,
+                label="Transliterated Malayalam Text"
+            )
+        ],
+        title="🌟 English to Malayalam Transliterator",
+        description="Transliterate English text to Malayalam characters. Simply type or paste your English text, and see the Malayalam transliteration instantly!",
+        article="## How to Use\n1. Enter English text in the input box\n2. The transliteration will appear automatically\n3. Works with words, phrases, and sentences",
+        examples=[
+            ["ente veed"],
+            ["malayalam"],
+            ["hello world"],
+            ["njan pranayam"]
+        ],
+        theme="huggingface"
+    )
+    return interface
+# Launch the Gradio interface
+if __name__ == "__main__":
+    iface = create_transliteration_interface()
+    iface.launch()