Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import torch | |
| import random | |
| import re | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| class BatchHumanizer: | |
| def __init__(self): | |
| try: | |
| self.model_name = "Vamsi/T5_Paraphrase_Paws" | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=False) | |
| self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name) | |
| print("β Batch Humanizer model loaded successfully") | |
| except Exception as e: | |
| print(f"β Error loading model: {e}") | |
| self.tokenizer = None | |
| self.model = None | |
| def humanize_single_text(self, text, strength="medium"): | |
| """Humanize a single piece of text""" | |
| if not self.model or not self.tokenizer: | |
| return self.fallback_humanize(text) | |
| try: | |
| # Paraphrase using T5 | |
| input_text = f"paraphrase: {text}" | |
| input_ids = self.tokenizer.encode( | |
| input_text, | |
| return_tensors="pt", | |
| max_length=512, | |
| truncation=True | |
| ) | |
| # Adjust parameters based on strength | |
| if strength == "light": | |
| temp, top_p = 1.1, 0.9 | |
| elif strength == "heavy": | |
| temp, top_p = 1.5, 0.95 | |
| else: # medium | |
| temp, top_p = 1.3, 0.92 | |
| with torch.no_grad(): | |
| outputs = self.model.generate( | |
| input_ids=input_ids, | |
| max_length=min(len(text.split()) + 50, 512), | |
| num_beams=5, | |
| temperature=temp, | |
| top_p=top_p, | |
| do_sample=True, | |
| early_stopping=True, | |
| repetition_penalty=1.2 | |
| ) | |
| result = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Additional humanization | |
| if strength in ["medium", "heavy"]: | |
| result = self.add_natural_variations(result) | |
| return self.clean_text(result) if result and len(result) > 10 else text | |
| except Exception as e: | |
| print(f"Error humanizing text: {e}") | |
| return self.fallback_humanize(text) | |
| def fallback_humanize(self, text): | |
| """Simple fallback humanization without model""" | |
| # Basic word replacements | |
| replacements = { | |
| "utilize": "use", "demonstrate": "show", "facilitate": "help", | |
| "optimize": "improve", "implement": "apply", "generate": "create", | |
| "therefore": "thus", "however": "yet", "furthermore": "also" | |
| } | |
| result = text | |
| for old, new in replacements.items(): | |
| result = re.sub(r'\b' + old + r'\b', new, result, flags=re.IGNORECASE) | |
| return result | |
| def add_natural_variations(self, text): | |
| """Add natural language variations""" | |
| # Academic connectors | |
| connectors = [ | |
| "Moreover", "Furthermore", "Additionally", "In contrast", | |
| "Similarly", "Consequently", "Nevertheless", "Notably" | |
| ] | |
| sentences = text.split('.') | |
| varied = [] | |
| for i, sentence in enumerate(sentences): | |
| sentence = sentence.strip() | |
| if not sentence: | |
| continue | |
| # Sometimes add connectors | |
| if i > 0 and random.random() < 0.2: | |
| connector = random.choice(connectors) | |
| sentence = f"{connector}, {sentence.lower()}" | |
| varied.append(sentence) | |
| return '. '.join(varied) + '.' if varied else text | |
| def clean_text(self, text): | |
| """Clean and format text""" | |
| # Remove extra spaces | |
| text = re.sub(r'\s+', ' ', text) | |
| text = re.sub(r'\s+([.!?,:;])', r'\1', text) | |
| # Capitalize sentences | |
| sentences = text.split('. ') | |
| formatted = [] | |
| for sentence in sentences: | |
| sentence = sentence.strip() | |
| if sentence: | |
| sentence = sentence[0].upper() + sentence[1:] if len(sentence) > 1 else sentence.upper() | |
| formatted.append(sentence) | |
| result = '. '.join(formatted) | |
| if not result.endswith(('.', '!', '?')): | |
| result += '.' | |
| return result | |
| # Initialize humanizer | |
| batch_humanizer = BatchHumanizer() | |
| def process_text_input(text_input, strength): | |
| """Process single text input""" | |
| if not text_input or not text_input.strip(): | |
| return "Please enter some text to humanize." | |
| return batch_humanizer.humanize_single_text(text_input, strength.lower()) | |
| def process_file_upload(file, strength): | |
| """Process uploaded file""" | |
| if file is None: | |
| return "Please upload a file.", None | |
| try: | |
| # Read the file | |
| if file.name.endswith('.txt'): | |
| with open(file.name, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| # Split into paragraphs or sentences for processing | |
| paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()] | |
| humanized_paragraphs = [] | |
| for para in paragraphs: | |
| if len(para) > 50: # Only process substantial paragraphs | |
| humanized = batch_humanizer.humanize_single_text(para, strength.lower()) | |
| humanized_paragraphs.append(humanized) | |
| else: | |
| humanized_paragraphs.append(para) | |
| result = '\n\n'.join(humanized_paragraphs) | |
| # Save to new file | |
| output_filename = file.name.replace('.txt', '_humanized.txt') | |
| with open(output_filename, 'w', encoding='utf-8') as f: | |
| f.write(result) | |
| return result, output_filename | |
| elif file.name.endswith('.csv'): | |
| df = pd.read_csv(file.name) | |
| # Assume the text column is named 'text' or the first column | |
| text_column = 'text' if 'text' in df.columns else df.columns[0] | |
| # Humanize each text entry | |
| df['humanized'] = df[text_column].apply( | |
| lambda x: batch_humanizer.humanize_single_text(str(x), strength.lower()) if pd.notna(x) else x | |
| ) | |
| # Save to new CSV | |
| output_filename = file.name.replace('.csv', '_humanized.csv') | |
| df.to_csv(output_filename, index=False) | |
| return f"Processed {len(df)} entries. Check the 'humanized' column.", output_filename | |
| else: | |
| return "Unsupported file format. Please upload .txt or .csv files.", None | |
| except Exception as e: | |
| return f"Error processing file: {str(e)}", None | |
| # Create Gradio interface with tabs | |
| with gr.Blocks(theme="soft", title="AI Text Humanizer Pro") as demo: | |
| gr.Markdown(""" | |
| # π€β‘οΈπ¨ AI Text Humanizer Pro | |
| **Advanced tool to transform robotic AI-generated text into natural, human-like writing** | |
| Perfect for: | |
| - π Academic papers and essays | |
| - π Research reports | |
| - π Business documents | |
| - πΌ Professional content | |
| - π Bypassing AI detection tools | |
| """) | |
| with gr.Tabs(): | |
| # Single Text Tab | |
| with gr.TabItem("Single Text"): | |
| gr.Markdown("### Humanize Individual Text") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| text_input = gr.Textbox( | |
| lines=12, | |
| placeholder="Paste your AI-generated text here...", | |
| label="Input Text", | |
| info="Enter the text you want to humanize" | |
| ) | |
| strength_single = gr.Radio( | |
| choices=["Light", "Medium", "Heavy"], | |
| value="Medium", | |
| label="Humanization Strength" | |
| ) | |
| process_btn = gr.Button("π Humanize Text", variant="primary") | |
| with gr.Column(scale=2): | |
| text_output = gr.Textbox( | |
| lines=12, | |
| label="Humanized Output", | |
| show_copy_button=True | |
| ) | |
| # Examples | |
| gr.Examples( | |
| examples=[ | |
| ["The implementation of artificial intelligence algorithms demonstrates significant improvements in computational efficiency and accuracy metrics across various benchmark datasets.", "Medium"], | |
| ["Machine learning models exhibit superior performance characteristics when evaluated against traditional statistical approaches in predictive analytics applications.", "Heavy"], | |
| ["The research methodology utilized in this study involves comprehensive data collection and analysis procedures to ensure robust and reliable results.", "Light"] | |
| ], | |
| inputs=[text_input, strength_single], | |
| outputs=text_output, | |
| fn=process_text_input | |
| ) | |
| # Batch Processing Tab | |
| with gr.TabItem("Batch Processing"): | |
| gr.Markdown("### Process Files in Batch") | |
| gr.Markdown("Upload .txt or .csv files to humanize multiple texts at once") | |
| with gr.Row(): | |
| with gr.Column(): | |
| file_input = gr.File( | |
| label="Upload File (.txt or .csv)", | |
| file_types=[".txt", ".csv"] | |
| ) | |
| strength_batch = gr.Radio( | |
| choices=["Light", "Medium", "Heavy"], | |
| value="Medium", | |
| label="Humanization Strength" | |
| ) | |
| process_file_btn = gr.Button("π Process File", variant="primary") | |
| with gr.Column(): | |
| file_output = gr.Textbox( | |
| lines=10, | |
| label="Processing Results", | |
| show_copy_button=True | |
| ) | |
| download_file = gr.File( | |
| label="Download Processed File", | |
| visible=False | |
| ) | |
| # Settings Tab | |
| with gr.TabItem("Settings & Info"): | |
| gr.Markdown(""" | |
| ### How it works: | |
| 1. **Light Humanization**: Basic paraphrasing with minimal changes | |
| 2. **Medium Humanization**: Paraphrasing + vocabulary variations | |
| 3. **Heavy Humanization**: All techniques + sentence structure changes | |
| ### Features: | |
| - β Advanced T5-based paraphrasing | |
| - β Natural vocabulary diversification | |
| - β Sentence structure optimization | |
| - β Academic tone preservation | |
| - β Batch file processing | |
| - β Multiple output formats | |
| ### Supported Formats: | |
| - **Text files (.txt)**: Processes paragraph by paragraph | |
| - **CSV files (.csv)**: Adds 'humanized' column with processed text | |
| ### Tips for best results: | |
| - Use complete sentences and paragraphs | |
| - Avoid very short fragments | |
| - Choose appropriate humanization strength | |
| - Review output for context accuracy | |
| """) | |
| # Event handlers | |
| process_btn.click( | |
| fn=process_text_input, | |
| inputs=[text_input, strength_single], | |
| outputs=text_output | |
| ) | |
| process_file_btn.click( | |
| fn=process_file_upload, | |
| inputs=[file_input, strength_batch], | |
| outputs=[file_output, download_file] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| share=False, | |
| server_name="0.0.0.0", | |
| server_port=7862, | |
| debug=True | |
| ) | |