Spaces:

SuperBigtoo
/

ThaiNewsClassify

Sleeping

SuperBigtoo commited on Oct 24, 2023

Commit

42d0b45

1 Parent(s): b649cf6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,16 +4,23 @@ import re
 from simpletransformers.classification import ClassificationModel
 from pythainlp import sent_tokenize
 from thai_tokenization import ThaiTokenizer
-tokenizer = ThaiTokenizer(vocab_file='th.wiki.bpe.op25000.vocab', spm_file='th.wiki.bpe.op25000.model')
 typeId = {'การเมือง': 0, 'กีฬา': 1, 'คุณภาพชีวิต': 2, 'ทั่วไทย': 3, 'ไลฟ์สไตล์': 4,
             'อื่นๆ': 5, 'อาชญากรรม': 6, 'สิ่งแวดล้อม': 7, 'บันเทิง & วัฒนธรรม': 8, 'เศรษฐกิจ': 9,
             'วิทยาศาสตร์ & การศึกษา': 10, 'สังคม': 11, 'unspecified': 12, 'ต่างประเทศ': 13}
 loaded_model = ClassificationModel(
-     "bert",
-     "SuperBigtoo/thainews-classification-thaikeras-bert-th",
      use_cuda=torch.cuda.is_available(),
      num_labels=14,
 )

 from simpletransformers.classification import ClassificationModel
 from pythainlp import sent_tokenize
 from thai_tokenization import ThaiTokenizer
+from transformers import AutoTokenizer
+#tokenizer = ThaiTokenizer(vocab_file='th.wiki.bpe.op25000.vocab', spm_file='th.wiki.bpe.op25000.model')
+#create tokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+                'airesearch/wangchanberta-base-att-spm-uncased',
+                revision='main',
+                model_max_length=416,)
 typeId = {'การเมือง': 0, 'กีฬา': 1, 'คุณภาพชีวิต': 2, 'ทั่วไทย': 3, 'ไลฟ์สไตล์': 4,
             'อื่นๆ': 5, 'อาชญากรรม': 6, 'สิ่งแวดล้อม': 7, 'บันเทิง & วัฒนธรรม': 8, 'เศรษฐกิจ': 9,
             'วิทยาศาสตร์ & การศึกษา': 10, 'สังคม': 11, 'unspecified': 12, 'ต่างประเทศ': 13}
 loaded_model = ClassificationModel(
+     "camembert",
+     "SuperBigtoo/thainews-classification-wangchanberta",
      use_cuda=torch.cuda.is_available(),
      num_labels=14,
 )