visolex/ViHOS
Viewer • Updated • 11.1k • 39
This model is a fine-tuned version of phobert-v2 for Vietnamese Hate Speech Span Detection.
phobert-v2645e-63210050.63260.64940.63050.0000from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch
model_name = "phobert-v2-hsd-span"
tok = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)
text = "Ví dụ câu tiếng Việt có nội dung thù ghét ..."
enc = tok(text, return_tensors="pt", truncation=True, max_length=256, is_split_into_words=False)
with torch.no_grad():
logits = model(**enc).logits
pred_ids = logits.argmax(-1)[0].tolist()
# TODO: chuyển pred_ids -> spans theo scheme nhãn của bạn (BIO/BILOU/char-offset)
Apache-2.0