diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/Batch_comet_reward_100_t5.png b/Batch_comet_reward_100_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..e9c2ac0c0054a3748b8963dcfac5318c1bc65315 Binary files /dev/null and b/Batch_comet_reward_100_t5.png differ diff --git a/Batch_comet_reward_125_t5.png b/Batch_comet_reward_125_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..710b851e55eba9a471478fc6ee1e64ebf1cf301a Binary files /dev/null and b/Batch_comet_reward_125_t5.png differ diff --git a/Batch_comet_reward_150_t5.png b/Batch_comet_reward_150_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..8a21d44462b0e1998b054bf3f2ab21e05fbf1715 Binary files /dev/null and b/Batch_comet_reward_150_t5.png differ diff --git a/Batch_comet_reward_175_t5.png b/Batch_comet_reward_175_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..11e52e2611a175a082ce18e4fd91f2036e71a5b0 Binary files /dev/null and b/Batch_comet_reward_175_t5.png differ diff --git a/Batch_comet_reward_200_t5.png b/Batch_comet_reward_200_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..fb166d22ecf499649e64e2ab837fcfd99f854e37 Binary files /dev/null and b/Batch_comet_reward_200_t5.png differ diff --git a/Batch_comet_reward_225_t5.png b/Batch_comet_reward_225_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..7e5e3cb6208fe7c51860235109a9db97ca52b3df Binary files /dev/null and b/Batch_comet_reward_225_t5.png differ diff --git a/Batch_comet_reward_250_t5.png b/Batch_comet_reward_250_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..96885eff31c4699762ae4c44242ef9b9cc626a69 Binary files /dev/null and b/Batch_comet_reward_250_t5.png differ diff --git a/Batch_comet_reward_25_t5.png b/Batch_comet_reward_25_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..8a046ce7d736338e099dc111b378269687c14ac1 Binary files /dev/null and b/Batch_comet_reward_25_t5.png differ diff --git a/Batch_comet_reward_275_t5.png b/Batch_comet_reward_275_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..ce040ea78d31f4199b9d013d95e177d2e1285e34 Binary files /dev/null and b/Batch_comet_reward_275_t5.png differ diff --git a/Batch_comet_reward_300_t5.png b/Batch_comet_reward_300_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..be2cc10ec8ecef87fc4c5529b5718851dda0f77e Binary files /dev/null and b/Batch_comet_reward_300_t5.png differ diff --git a/Batch_comet_reward_325_t5.png b/Batch_comet_reward_325_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..ab2ff8128c7d0fb8d55842eb33c720a3f62530bd Binary files /dev/null and b/Batch_comet_reward_325_t5.png differ diff --git a/Batch_comet_reward_350_t5.png b/Batch_comet_reward_350_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..3060b722e37ca064fe20905d47a19ec59c738aff Binary files /dev/null and b/Batch_comet_reward_350_t5.png differ diff --git a/Batch_comet_reward_375_t5.png b/Batch_comet_reward_375_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..51c91def5a259c60a19605a6eceb063df9dd5b11 Binary files /dev/null and b/Batch_comet_reward_375_t5.png differ diff --git a/Batch_comet_reward_400_t5.png b/Batch_comet_reward_400_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..a0e62631cccf23959304e6c9cc27f938a88b233a Binary files /dev/null and b/Batch_comet_reward_400_t5.png differ diff --git a/Batch_comet_reward_425_t5.png b/Batch_comet_reward_425_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..d3e8db323cb30e63a1d276df330224b417930501 Binary files /dev/null and b/Batch_comet_reward_425_t5.png differ diff --git a/Batch_comet_reward_450_t5.png b/Batch_comet_reward_450_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..1c174f818eb9490e758b3330e32067a91a79736b Binary files /dev/null and b/Batch_comet_reward_450_t5.png differ diff --git a/Batch_comet_reward_475_t5.png b/Batch_comet_reward_475_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..544954994c7d692d20ed3e9bb6176cca1e1fa573 Binary files /dev/null and b/Batch_comet_reward_475_t5.png differ diff --git a/Batch_comet_reward_500_t5.png b/Batch_comet_reward_500_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..b59ac808a4caa30d6ba4a856086022673d8ed84d Binary files /dev/null and b/Batch_comet_reward_500_t5.png differ diff --git a/Batch_comet_reward_50_t5.png b/Batch_comet_reward_50_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..011edce11d6e45baaa9a044f1e1be127678b02ab Binary files /dev/null and b/Batch_comet_reward_50_t5.png differ diff --git a/Batch_comet_reward_525_t5.png b/Batch_comet_reward_525_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..c6c527c05d91168c7a041a5a4b282bab3055b748 Binary files /dev/null and b/Batch_comet_reward_525_t5.png differ diff --git a/Batch_comet_reward_550_t5.png b/Batch_comet_reward_550_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..0cd2da202dc9268702b7749abebc93fe40a5f8db Binary files /dev/null and b/Batch_comet_reward_550_t5.png differ diff --git a/Batch_comet_reward_575_t5.png b/Batch_comet_reward_575_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..b724b16fb696f70a32240c808ac6226a73315682 Binary files /dev/null and b/Batch_comet_reward_575_t5.png differ diff --git a/Batch_comet_reward_600_t5.png b/Batch_comet_reward_600_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..997e52cd90057da2522e99418117c6414075c493 Binary files /dev/null and b/Batch_comet_reward_600_t5.png differ diff --git a/Batch_comet_reward_75_t5.png b/Batch_comet_reward_75_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..0d21bc423ed0d6f2deed3867539613c505c77758 Binary files /dev/null and b/Batch_comet_reward_75_t5.png differ diff --git a/Batch_pronoun_reward_100_t5.png b/Batch_pronoun_reward_100_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..3642eba2d95f711342d5d5d761c16a6bbdb9be96 Binary files /dev/null and b/Batch_pronoun_reward_100_t5.png differ diff --git a/Batch_pronoun_reward_125_t5.png b/Batch_pronoun_reward_125_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..0d2cdf3899d7c0fa16288d78e2afca07592b5399 Binary files /dev/null and b/Batch_pronoun_reward_125_t5.png differ diff --git a/Batch_pronoun_reward_150_t5.png b/Batch_pronoun_reward_150_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..e3e68ee1d46c5e0753de5230700bab5907f7b293 Binary files /dev/null and b/Batch_pronoun_reward_150_t5.png differ diff --git a/Batch_pronoun_reward_175_t5.png b/Batch_pronoun_reward_175_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..85d0d288f1194e2388d94561f82f8fc0de60821e Binary files /dev/null and b/Batch_pronoun_reward_175_t5.png differ diff --git a/Batch_pronoun_reward_200_t5.png b/Batch_pronoun_reward_200_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..2f8688ad8f6bcd33cadc2602ff3fa7dd5da6ab11 Binary files /dev/null and b/Batch_pronoun_reward_200_t5.png differ diff --git a/Batch_pronoun_reward_225_t5.png b/Batch_pronoun_reward_225_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..b790587131c8cf9ce04a6d03dd9761abec45574a Binary files /dev/null and b/Batch_pronoun_reward_225_t5.png differ diff --git a/Batch_pronoun_reward_250_t5.png b/Batch_pronoun_reward_250_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..74e91509d8d856f62ec72379e87a4c9f2f2efa9b Binary files /dev/null and b/Batch_pronoun_reward_250_t5.png differ diff --git a/Batch_pronoun_reward_25_t5.png b/Batch_pronoun_reward_25_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..d4d86337778db360f743ac077dfe6c94a0eb7346 Binary files /dev/null and b/Batch_pronoun_reward_25_t5.png differ diff --git a/Batch_pronoun_reward_275_t5.png b/Batch_pronoun_reward_275_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..180469a15fcee207a258249c47efacff712d4750 Binary files /dev/null and b/Batch_pronoun_reward_275_t5.png differ diff --git a/Batch_pronoun_reward_300_t5.png b/Batch_pronoun_reward_300_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..b25aca04ea62e0016be4f77fcf960461a7c406e3 Binary files /dev/null and b/Batch_pronoun_reward_300_t5.png differ diff --git a/Batch_pronoun_reward_325_t5.png b/Batch_pronoun_reward_325_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..57d8e1cad49bf0fa5a9b39837255619110593243 Binary files /dev/null and b/Batch_pronoun_reward_325_t5.png differ diff --git a/Batch_pronoun_reward_350_t5.png b/Batch_pronoun_reward_350_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..61fbe183a3fd0f4c049946aa1e3ce80d1b570a27 Binary files /dev/null and b/Batch_pronoun_reward_350_t5.png differ diff --git a/Batch_pronoun_reward_375_t5.png b/Batch_pronoun_reward_375_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..b21f404d2f1185dd991d7a927d7688b875f3a458 Binary files /dev/null and b/Batch_pronoun_reward_375_t5.png differ diff --git a/Batch_pronoun_reward_400_t5.png b/Batch_pronoun_reward_400_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..287f46f29c6df95376d5ca0c71c0ef1f9d840dd2 Binary files /dev/null and b/Batch_pronoun_reward_400_t5.png differ diff --git a/Batch_pronoun_reward_425_t5.png b/Batch_pronoun_reward_425_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..6966fadb7ed6561d94fb90f3a84d201863a1acaa Binary files /dev/null and b/Batch_pronoun_reward_425_t5.png differ diff --git a/Batch_pronoun_reward_450_t5.png b/Batch_pronoun_reward_450_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..f5393df5270639421c3369bcbedadbb8d452ffd2 Binary files /dev/null and b/Batch_pronoun_reward_450_t5.png differ diff --git a/Batch_pronoun_reward_475_t5.png b/Batch_pronoun_reward_475_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..ee3a059a57e17182903134e1def955a95c83177d Binary files /dev/null and b/Batch_pronoun_reward_475_t5.png differ diff --git a/Batch_pronoun_reward_500_t5.png b/Batch_pronoun_reward_500_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..9275e1774bed7a9f51e4c9b53ae35d6f6bec12a1 Binary files /dev/null and b/Batch_pronoun_reward_500_t5.png differ diff --git a/Batch_pronoun_reward_50_t5.png b/Batch_pronoun_reward_50_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..a4e36b336fc47e8d277e915de871a1052db3f585 Binary files /dev/null and b/Batch_pronoun_reward_50_t5.png differ diff --git a/Batch_pronoun_reward_525_t5.png b/Batch_pronoun_reward_525_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..3b4040926ce0a5b7d7e90d57bd4302685e1c3190 Binary files /dev/null and b/Batch_pronoun_reward_525_t5.png differ diff --git a/Batch_pronoun_reward_550_t5.png b/Batch_pronoun_reward_550_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..31c3fe3dc4e6099165c666c50c9627b3b39a9243 Binary files /dev/null and b/Batch_pronoun_reward_550_t5.png differ diff --git a/Batch_pronoun_reward_575_t5.png b/Batch_pronoun_reward_575_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..829db950cf0c47d4f9951db2427567681b8dcae2 Binary files /dev/null and b/Batch_pronoun_reward_575_t5.png differ diff --git a/Batch_pronoun_reward_600_t5.png b/Batch_pronoun_reward_600_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..8a5087437bdd458c7a31b3e3a8c3f5236ab5be22 Binary files /dev/null and b/Batch_pronoun_reward_600_t5.png differ diff --git a/Batch_pronoun_reward_75_t5.png b/Batch_pronoun_reward_75_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..4e21892acc38cc0b5a3c0a3617ce4ba03d720d41 Binary files /dev/null and b/Batch_pronoun_reward_75_t5.png differ diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..08eb4a478065a828b238b0e696d3f41c4061a339 --- /dev/null +++ b/README.md @@ -0,0 +1,52 @@ +--- +license: cc-by-nc-4.0 +base_model: facebook/nllb-200-distilled-600M +tags: +- trl +- iterative-sft +- generated_from_trainer +model-index: +- name: working + results: [] +--- + + + +# working + +This model is a fine-tuned version of [facebook/nllb-200-distilled-600M](https://huggingface.co/facebook/nllb-200-distilled-600M) on an unknown dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 6e-06 +- train_batch_size: 4 +- eval_batch_size: 8 +- seed: 42 +- gradient_accumulation_steps: 16 +- total_train_batch_size: 64 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: linear +- training_steps: 1000 + +### Framework versions + +- Transformers 4.44.0 +- Pytorch 2.4.0 +- Datasets 3.0.0 +- Tokenizers 0.19.1 diff --git a/Sample_comet_reward_100_t5.png b/Sample_comet_reward_100_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..a6721e217312b19342fac189b6c415f0ae3bbb2e Binary files /dev/null and b/Sample_comet_reward_100_t5.png differ diff --git a/Sample_comet_reward_125_t5.png b/Sample_comet_reward_125_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..70d8dd504dc808f4623d46bff7ea96d1d37b6de9 Binary files /dev/null and b/Sample_comet_reward_125_t5.png differ diff --git a/Sample_comet_reward_150_t5.png b/Sample_comet_reward_150_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..cb1b5799d6f34af635784ef31e41f48d1c9db4b1 Binary files /dev/null and b/Sample_comet_reward_150_t5.png differ diff --git a/Sample_comet_reward_175_t5.png b/Sample_comet_reward_175_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..ee884f560eaa496667f70446623a51b2b100c7e3 Binary files /dev/null and b/Sample_comet_reward_175_t5.png differ diff --git a/Sample_comet_reward_200_t5.png b/Sample_comet_reward_200_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..deb0fdaef2eb14a63a1a6c76564a77b18edadc8d Binary files /dev/null and b/Sample_comet_reward_200_t5.png differ diff --git a/Sample_comet_reward_225_t5.png b/Sample_comet_reward_225_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..850c060c95633c971f1c6c76477da81b6f8a446b Binary files /dev/null and b/Sample_comet_reward_225_t5.png differ diff --git a/Sample_comet_reward_250_t5.png b/Sample_comet_reward_250_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..930af47def64a35c66a4ec1430e529be6ea2c22c Binary files /dev/null and b/Sample_comet_reward_250_t5.png differ diff --git a/Sample_comet_reward_25_t5.png b/Sample_comet_reward_25_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..3a649383bf71c72bfb99d6f710a913b422b9cb6c Binary files /dev/null and b/Sample_comet_reward_25_t5.png differ diff --git a/Sample_comet_reward_275_t5.png b/Sample_comet_reward_275_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..c67562b7304cb906da3e08fa10a923554edd2e37 Binary files /dev/null and b/Sample_comet_reward_275_t5.png differ diff --git a/Sample_comet_reward_300_t5.png b/Sample_comet_reward_300_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..d70e141de5f153676311c865f9bf03e2839fe9a4 Binary files /dev/null and b/Sample_comet_reward_300_t5.png differ diff --git a/Sample_comet_reward_325_t5.png b/Sample_comet_reward_325_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..8b575f8a86e04c7206d53a4f8fd6ed62ef4606ef Binary files /dev/null and b/Sample_comet_reward_325_t5.png differ diff --git a/Sample_comet_reward_350_t5.png b/Sample_comet_reward_350_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..1e32c187c68f7f4181271a1c9cd31d70c75c9e67 Binary files /dev/null and b/Sample_comet_reward_350_t5.png differ diff --git a/Sample_comet_reward_375_t5.png b/Sample_comet_reward_375_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..d45684d2b9536e434f988e8c593be1a0247787bf Binary files /dev/null and b/Sample_comet_reward_375_t5.png differ diff --git a/Sample_comet_reward_400_t5.png b/Sample_comet_reward_400_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..cf168211d2d82e9b055d6b86af9d2ae31df9d555 Binary files /dev/null and b/Sample_comet_reward_400_t5.png differ diff --git a/Sample_comet_reward_425_t5.png b/Sample_comet_reward_425_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..26c62ff5e183329368163aeae2eec42b47bc0f5c Binary files /dev/null and b/Sample_comet_reward_425_t5.png differ diff --git a/Sample_comet_reward_450_t5.png b/Sample_comet_reward_450_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..b604d4c65eec5effd2e6623e106b845441919738 Binary files /dev/null and b/Sample_comet_reward_450_t5.png differ diff --git a/Sample_comet_reward_475_t5.png b/Sample_comet_reward_475_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..e4d9a54e15f2ef7e777c09a975644669fc144578 Binary files /dev/null and b/Sample_comet_reward_475_t5.png differ diff --git a/Sample_comet_reward_500_t5.png b/Sample_comet_reward_500_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..b95a0a67b4f98e76a1d9e794eeab89f27b77b6d9 Binary files /dev/null and b/Sample_comet_reward_500_t5.png differ diff --git a/Sample_comet_reward_50_t5.png b/Sample_comet_reward_50_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..db42f88aad21a0908efd530a49acace258af42df Binary files /dev/null and b/Sample_comet_reward_50_t5.png differ diff --git a/Sample_comet_reward_525_t5.png b/Sample_comet_reward_525_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..6deecb8a3459d8c18270f3b58ea59653f843ffe8 Binary files /dev/null and b/Sample_comet_reward_525_t5.png differ diff --git a/Sample_comet_reward_550_t5.png b/Sample_comet_reward_550_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..26b736a7c0a04b7a07e49632d7118cb6866b20b6 Binary files /dev/null and b/Sample_comet_reward_550_t5.png differ diff --git a/Sample_comet_reward_575_t5.png b/Sample_comet_reward_575_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..90b406956e778ca48ba5141763eff886aaaf8640 Binary files /dev/null and b/Sample_comet_reward_575_t5.png differ diff --git a/Sample_comet_reward_600_t5.png b/Sample_comet_reward_600_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..19cb6916a8a5f9e4dfceb4c1d34760909e1e2e9f Binary files /dev/null and b/Sample_comet_reward_600_t5.png differ diff --git a/Sample_comet_reward_75_t5.png b/Sample_comet_reward_75_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..7a71c32281d3a931c43e7c72e4775b562c0ab7e8 Binary files /dev/null and b/Sample_comet_reward_75_t5.png differ diff --git a/Sample_pronoun_reward_100_t5.png b/Sample_pronoun_reward_100_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..8e1528d96bcf6005ac7cee4a5e835970fabf46b6 Binary files /dev/null and b/Sample_pronoun_reward_100_t5.png differ diff --git a/Sample_pronoun_reward_125_t5.png b/Sample_pronoun_reward_125_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..14fe5345b464693883a9ab0d2294de6bfed3a49c Binary files /dev/null and b/Sample_pronoun_reward_125_t5.png differ diff --git a/Sample_pronoun_reward_150_t5.png b/Sample_pronoun_reward_150_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..5b079ee137d480ec648fdab8098dd05185181182 Binary files /dev/null and b/Sample_pronoun_reward_150_t5.png differ diff --git a/Sample_pronoun_reward_175_t5.png b/Sample_pronoun_reward_175_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..ea3e65516c692a690bc0ad02845caa24cbc34909 Binary files /dev/null and b/Sample_pronoun_reward_175_t5.png differ diff --git a/Sample_pronoun_reward_200_t5.png b/Sample_pronoun_reward_200_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..1dc52cee5ceb5757ac39d7e72ba5aed08b9555d5 Binary files /dev/null and b/Sample_pronoun_reward_200_t5.png differ diff --git a/Sample_pronoun_reward_225_t5.png b/Sample_pronoun_reward_225_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..2f4cf80d36579eb02e18e81e36635bfd075debd4 Binary files /dev/null and b/Sample_pronoun_reward_225_t5.png differ diff --git a/Sample_pronoun_reward_250_t5.png b/Sample_pronoun_reward_250_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..1da70a1c968092818390e5fccaf41c8661302dc3 Binary files /dev/null and b/Sample_pronoun_reward_250_t5.png differ diff --git a/Sample_pronoun_reward_25_t5.png b/Sample_pronoun_reward_25_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..1df87ab40775a8816421341a20488fcb8849fca1 Binary files /dev/null and b/Sample_pronoun_reward_25_t5.png differ diff --git a/Sample_pronoun_reward_275_t5.png b/Sample_pronoun_reward_275_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..38be8a1bab13b5fb675bd6fbaa54f445f42f6f7b Binary files /dev/null and b/Sample_pronoun_reward_275_t5.png differ diff --git a/Sample_pronoun_reward_300_t5.png b/Sample_pronoun_reward_300_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..6176f3aeb176eb3dd7063b94c7022be1dcbc4386 Binary files /dev/null and b/Sample_pronoun_reward_300_t5.png differ diff --git a/Sample_pronoun_reward_325_t5.png b/Sample_pronoun_reward_325_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..2b66fb7e8179208072c7a0e34f97004dcbf850d3 Binary files /dev/null and b/Sample_pronoun_reward_325_t5.png differ diff --git a/Sample_pronoun_reward_350_t5.png b/Sample_pronoun_reward_350_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..32a3feff8e9f0c7761d0c48a844aabda3c3ec235 Binary files /dev/null and b/Sample_pronoun_reward_350_t5.png differ diff --git a/Sample_pronoun_reward_375_t5.png b/Sample_pronoun_reward_375_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..a1616f8dbb7585bd523be7c81a38ebb9b086e96a Binary files /dev/null and b/Sample_pronoun_reward_375_t5.png differ diff --git a/Sample_pronoun_reward_400_t5.png b/Sample_pronoun_reward_400_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..813684a2c7d00c11b3eece0d50f401ef079cdd1c Binary files /dev/null and b/Sample_pronoun_reward_400_t5.png differ diff --git a/Sample_pronoun_reward_425_t5.png b/Sample_pronoun_reward_425_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..e2c8432bf7af3f9c05bcdc2bf1e0a64ccabb2baf Binary files /dev/null and b/Sample_pronoun_reward_425_t5.png differ diff --git a/Sample_pronoun_reward_450_t5.png b/Sample_pronoun_reward_450_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..703e1a6d7dfa3993ec7c5811d597e3a03838ec50 Binary files /dev/null and b/Sample_pronoun_reward_450_t5.png differ diff --git a/Sample_pronoun_reward_475_t5.png b/Sample_pronoun_reward_475_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..b349070839fc91e0926438b8583a40026a4a8b77 Binary files /dev/null and b/Sample_pronoun_reward_475_t5.png differ diff --git a/Sample_pronoun_reward_500_t5.png b/Sample_pronoun_reward_500_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..02dc01866ec8196696162f7d4fd2e752e253aed0 Binary files /dev/null and b/Sample_pronoun_reward_500_t5.png differ diff --git a/Sample_pronoun_reward_50_t5.png b/Sample_pronoun_reward_50_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..a0923dde4a3ef19b42604c57360cb013aa3de145 Binary files /dev/null and b/Sample_pronoun_reward_50_t5.png differ diff --git a/Sample_pronoun_reward_525_t5.png b/Sample_pronoun_reward_525_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..fc0b5b3a539a3ae6487b20348ee3cd35b999c561 Binary files /dev/null and b/Sample_pronoun_reward_525_t5.png differ diff --git a/Sample_pronoun_reward_550_t5.png b/Sample_pronoun_reward_550_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..271f10bc3f122a193abeb0b79334d1bfcd12db48 Binary files /dev/null and b/Sample_pronoun_reward_550_t5.png differ diff --git a/Sample_pronoun_reward_575_t5.png b/Sample_pronoun_reward_575_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..27d43834c4407fd9f71791fc0eb0cf5ab17f6022 Binary files /dev/null and b/Sample_pronoun_reward_575_t5.png differ diff --git a/Sample_pronoun_reward_600_t5.png b/Sample_pronoun_reward_600_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..5451be8a289e4216cc34f09eeb43c034f7d52f61 Binary files /dev/null and b/Sample_pronoun_reward_600_t5.png differ diff --git a/Sample_pronoun_reward_75_t5.png b/Sample_pronoun_reward_75_t5.png new file mode 100644 index 0000000000000000000000000000000000000000..2c22fb87ab582e5c0cec4d0e051dd0ed01267e1c Binary files /dev/null and b/Sample_pronoun_reward_75_t5.png differ diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..34816bd65bcc26595f41b983a2d0874c6036f0a6 --- /dev/null +++ b/config.json @@ -0,0 +1,35 @@ +{ + "_name_or_path": "facebook/nllb-200-distilled-600M", + "activation_dropout": 0.0, + "activation_function": "relu", + "architectures": [ + "M2M100ForConditionalGeneration" + ], + "attention_dropout": 0.1, + "bos_token_id": 0, + "d_model": 1024, + "decoder_attention_heads": 16, + "decoder_ffn_dim": 4096, + "decoder_layerdrop": 0, + "decoder_layers": 12, + "decoder_start_token_id": 2, + "dropout": 0.1, + "encoder_attention_heads": 16, + "encoder_ffn_dim": 4096, + "encoder_layerdrop": 0, + "encoder_layers": 12, + "eos_token_id": 2, + "init_std": 0.02, + "is_encoder_decoder": true, + "max_length": 200, + "max_position_embeddings": 1024, + "model_type": "m2m_100", + "num_hidden_layers": 12, + "pad_token_id": 1, + "scale_embedding": true, + "tokenizer_class": "NllbTokenizer", + "torch_dtype": "float32", + "transformers_version": "4.44.0", + "use_cache": true, + "vocab_size": 256206 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d8ec0334b74309b6d5ebb81423eb90ffa68a9c68 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "eos_token_id": 2, + "max_length": 200, + "pad_token_id": 1, + "transformers_version": "4.44.0" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29cbd3f338d2e81905cca57f5c113bf0f94fa1a4 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63a80a4758476315ab85015c6cc336c4ec0155f7415047d9422251048f61bf3 +size 2460354912 diff --git a/sentencepiece.bpe.model b/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..dc2262d3e1d375b235eb71c24119c8e73f85d4ad --- /dev/null +++ b/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14bb8dfb35c0ffdea7bc01e56cea38b9e3d5efcdcb9c251d6b40538e1aab555a +size 4852054 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..770c6f4e25faf27bbc3878b806f2ecfb88c5169e --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,255 @@ +{ + "additional_special_tokens": [ + "ace_Arab", + "ace_Latn", + "acm_Arab", + "acq_Arab", + "aeb_Arab", + "afr_Latn", + "ajp_Arab", + "aka_Latn", + "amh_Ethi", + "apc_Arab", + "arb_Arab", + "ars_Arab", + "ary_Arab", + "arz_Arab", + "asm_Beng", + "ast_Latn", + "awa_Deva", + "ayr_Latn", + "azb_Arab", + "azj_Latn", + "bak_Cyrl", + "bam_Latn", + "ban_Latn", + "bel_Cyrl", + "bem_Latn", + "ben_Beng", + "bho_Deva", + "bjn_Arab", + "bjn_Latn", + "bod_Tibt", + "bos_Latn", + "bug_Latn", + "bul_Cyrl", + "cat_Latn", + "ceb_Latn", + "ces_Latn", + "cjk_Latn", + "ckb_Arab", + "crh_Latn", + "cym_Latn", + "dan_Latn", + "deu_Latn", + "dik_Latn", + "dyu_Latn", + "dzo_Tibt", + "ell_Grek", + "eng_Latn", + "epo_Latn", + "est_Latn", + "eus_Latn", + "ewe_Latn", + "fao_Latn", + "pes_Arab", + "fij_Latn", + "fin_Latn", + "fon_Latn", + "fra_Latn", + "fur_Latn", + "fuv_Latn", + "gla_Latn", + "gle_Latn", + "glg_Latn", + "grn_Latn", + "guj_Gujr", + "hat_Latn", + "hau_Latn", + "heb_Hebr", + "hin_Deva", + "hne_Deva", + "hrv_Latn", + "hun_Latn", + "hye_Armn", + "ibo_Latn", + "ilo_Latn", + "ind_Latn", + "isl_Latn", + "ita_Latn", + "jav_Latn", + "jpn_Jpan", + "kab_Latn", + "kac_Latn", + "kam_Latn", + "kan_Knda", + "kas_Arab", + "kas_Deva", + "kat_Geor", + "knc_Arab", + "knc_Latn", + "kaz_Cyrl", + "kbp_Latn", + "kea_Latn", + "khm_Khmr", + "kik_Latn", + "kin_Latn", + "kir_Cyrl", + "kmb_Latn", + "kon_Latn", + "kor_Hang", + "kmr_Latn", + "lao_Laoo", + "lvs_Latn", + "lij_Latn", + "lim_Latn", + "lin_Latn", + "lit_Latn", + "lmo_Latn", + "ltg_Latn", + "ltz_Latn", + "lua_Latn", + "lug_Latn", + "luo_Latn", + "lus_Latn", + "mag_Deva", + "mai_Deva", + "mal_Mlym", + "mar_Deva", + "min_Latn", + "mkd_Cyrl", + "plt_Latn", + "mlt_Latn", + "mni_Beng", + "khk_Cyrl", + "mos_Latn", + "mri_Latn", + "zsm_Latn", + "mya_Mymr", + "nld_Latn", + "nno_Latn", + "nob_Latn", + "npi_Deva", + "nso_Latn", + "nus_Latn", + "nya_Latn", + "oci_Latn", + "gaz_Latn", + "ory_Orya", + "pag_Latn", + "pan_Guru", + "pap_Latn", + "pol_Latn", + "por_Latn", + "prs_Arab", + "pbt_Arab", + "quy_Latn", + "ron_Latn", + "run_Latn", + "rus_Cyrl", + "sag_Latn", + "san_Deva", + "sat_Beng", + "scn_Latn", + "shn_Mymr", + "sin_Sinh", + "slk_Latn", + "slv_Latn", + "smo_Latn", + "sna_Latn", + "snd_Arab", + "som_Latn", + "sot_Latn", + "spa_Latn", + "als_Latn", + "srd_Latn", + "srp_Cyrl", + "ssw_Latn", + "sun_Latn", + "swe_Latn", + "swh_Latn", + "szl_Latn", + "tam_Taml", + "tat_Cyrl", + "tel_Telu", + "tgk_Cyrl", + "tgl_Latn", + "tha_Thai", + "tir_Ethi", + "taq_Latn", + "taq_Tfng", + "tpi_Latn", + "tsn_Latn", + "tso_Latn", + "tuk_Latn", + "tum_Latn", + "tur_Latn", + "twi_Latn", + "tzm_Tfng", + "uig_Arab", + "ukr_Cyrl", + "umb_Latn", + "urd_Arab", + "uzn_Latn", + "vec_Latn", + "vie_Latn", + "war_Latn", + "wol_Latn", + "xho_Latn", + "ydd_Hebr", + "yor_Latn", + "yue_Hant", + "zho_Hans", + "zho_Hant", + "zul_Latn" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..abc1aa4a73a3d9e5651cdc48e79e94c1d3bc69d4 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7508db47acfece4b7fac0942c68e6456690bbb19c311914e0e026323e7f5d536 +size 17331547 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6aaf11b45de7c803a07acc29f6df39e1b5c8bcd3 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,1879 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256001": { + "content": "ace_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256002": { + "content": "ace_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256003": { + "content": "acm_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256004": { + "content": "acq_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256005": { + "content": "aeb_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256006": { + "content": "afr_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256007": { + "content": "ajp_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256008": { + "content": "aka_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256009": { + "content": "amh_Ethi", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256010": { + "content": "apc_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256011": { + "content": "arb_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256012": { + "content": "ars_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256013": { + "content": "ary_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256014": { + "content": "arz_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256015": { + "content": "asm_Beng", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256016": { + "content": "ast_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256017": { + "content": "awa_Deva", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256018": { + "content": "ayr_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256019": { + "content": "azb_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256020": { + "content": "azj_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256021": { + "content": "bak_Cyrl", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256022": { + "content": "bam_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256023": { + "content": "ban_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256024": { + "content": "bel_Cyrl", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256025": { + "content": "bem_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256026": { + "content": "ben_Beng", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256027": { + "content": "bho_Deva", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256028": { + "content": "bjn_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256029": { + "content": "bjn_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256030": { + "content": "bod_Tibt", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256031": { + "content": "bos_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256032": { + "content": "bug_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256033": { + "content": "bul_Cyrl", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256034": { + "content": "cat_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256035": { + "content": "ceb_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256036": { + "content": "ces_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256037": { + "content": "cjk_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256038": { + "content": "ckb_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256039": { + "content": "crh_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256040": { + "content": "cym_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256041": { + "content": "dan_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256042": { + "content": "deu_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256043": { + "content": "dik_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256044": { + "content": "dyu_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256045": { + "content": "dzo_Tibt", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256046": { + "content": "ell_Grek", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256047": { + "content": "eng_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256048": { + "content": "epo_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256049": { + "content": "est_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256050": { + "content": "eus_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256051": { + "content": "ewe_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256052": { + "content": "fao_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256053": { + "content": "pes_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256054": { + "content": "fij_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256055": { + "content": "fin_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256056": { + "content": "fon_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256057": { + "content": "fra_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256058": { + "content": "fur_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256059": { + "content": "fuv_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256060": { + "content": "gla_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256061": { + "content": "gle_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256062": { + "content": "glg_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256063": { + "content": "grn_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256064": { + "content": "guj_Gujr", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256065": { + "content": "hat_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256066": { + "content": "hau_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256067": { + "content": "heb_Hebr", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256068": { + "content": "hin_Deva", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256069": { + "content": "hne_Deva", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256070": { + "content": "hrv_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256071": { + "content": "hun_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256072": { + "content": "hye_Armn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256073": { + "content": "ibo_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256074": { + "content": "ilo_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256075": { + "content": "ind_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256076": { + "content": "isl_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256077": { + "content": "ita_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256078": { + "content": "jav_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256079": { + "content": "jpn_Jpan", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256080": { + "content": "kab_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256081": { + "content": "kac_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256082": { + "content": "kam_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256083": { + "content": "kan_Knda", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256084": { + "content": "kas_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256085": { + "content": "kas_Deva", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256086": { + "content": "kat_Geor", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256087": { + "content": "knc_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256088": { + "content": "knc_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256089": { + "content": "kaz_Cyrl", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256090": { + "content": "kbp_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256091": { + "content": "kea_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256092": { + "content": "khm_Khmr", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256093": { + "content": "kik_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256094": { + "content": "kin_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256095": { + "content": "kir_Cyrl", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256096": { + "content": "kmb_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256097": { + "content": "kon_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256098": { + "content": "kor_Hang", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256099": { + "content": "kmr_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256100": { + "content": "lao_Laoo", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256101": { + "content": "lvs_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256102": { + "content": "lij_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256103": { + "content": "lim_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256104": { + "content": "lin_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256105": { + "content": "lit_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256106": { + "content": "lmo_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256107": { + "content": "ltg_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256108": { + "content": "ltz_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256109": { + "content": "lua_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256110": { + "content": "lug_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256111": { + "content": "luo_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256112": { + "content": "lus_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256113": { + "content": "mag_Deva", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256114": { + "content": "mai_Deva", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256115": { + "content": "mal_Mlym", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256116": { + "content": "mar_Deva", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256117": { + "content": "min_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256118": { + "content": "mkd_Cyrl", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256119": { + "content": "plt_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256120": { + "content": "mlt_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256121": { + "content": "mni_Beng", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256122": { + "content": "khk_Cyrl", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256123": { + "content": "mos_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256124": { + "content": "mri_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256125": { + "content": "zsm_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256126": { + "content": "mya_Mymr", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256127": { + "content": "nld_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256128": { + "content": "nno_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256129": { + "content": "nob_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256130": { + "content": "npi_Deva", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256131": { + "content": "nso_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256132": { + "content": "nus_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256133": { + "content": "nya_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256134": { + "content": "oci_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256135": { + "content": "gaz_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256136": { + "content": "ory_Orya", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256137": { + "content": "pag_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256138": { + "content": "pan_Guru", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256139": { + "content": "pap_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256140": { + "content": "pol_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256141": { + "content": "por_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256142": { + "content": "prs_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256143": { + "content": "pbt_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256144": { + "content": "quy_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256145": { + "content": "ron_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256146": { + "content": "run_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256147": { + "content": "rus_Cyrl", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256148": { + "content": "sag_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256149": { + "content": "san_Deva", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256150": { + "content": "sat_Beng", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256151": { + "content": "scn_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256152": { + "content": "shn_Mymr", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256153": { + "content": "sin_Sinh", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256154": { + "content": "slk_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256155": { + "content": "slv_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256156": { + "content": "smo_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256157": { + "content": "sna_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256158": { + "content": "snd_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256159": { + "content": "som_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256160": { + "content": "sot_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256161": { + "content": "spa_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256162": { + "content": "als_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256163": { + "content": "srd_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256164": { + "content": "srp_Cyrl", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256165": { + "content": "ssw_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256166": { + "content": "sun_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256167": { + "content": "swe_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256168": { + "content": "swh_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256169": { + "content": "szl_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256170": { + "content": "tam_Taml", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256171": { + "content": "tat_Cyrl", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256172": { + "content": "tel_Telu", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256173": { + "content": "tgk_Cyrl", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256174": { + "content": "tgl_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256175": { + "content": "tha_Thai", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256176": { + "content": "tir_Ethi", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256177": { + "content": "taq_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256178": { + "content": "taq_Tfng", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256179": { + "content": "tpi_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256180": { + "content": "tsn_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256181": { + "content": "tso_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256182": { + "content": "tuk_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256183": { + "content": "tum_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256184": { + "content": "tur_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256185": { + "content": "twi_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256186": { + "content": "tzm_Tfng", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256187": { + "content": "uig_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256188": { + "content": "ukr_Cyrl", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256189": { + "content": "umb_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256190": { + "content": "urd_Arab", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256191": { + "content": "uzn_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256192": { + "content": "vec_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256193": { + "content": "vie_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256194": { + "content": "war_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256195": { + "content": "wol_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256196": { + "content": "xho_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256197": { + "content": "ydd_Hebr", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256198": { + "content": "yor_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256199": { + "content": "yue_Hant", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256200": { + "content": "zho_Hans", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256201": { + "content": "zho_Hant", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256202": { + "content": "zul_Latn", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256203": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "ace_Arab", + "ace_Latn", + "acm_Arab", + "acq_Arab", + "aeb_Arab", + "afr_Latn", + "ajp_Arab", + "aka_Latn", + "amh_Ethi", + "apc_Arab", + "arb_Arab", + "ars_Arab", + "ary_Arab", + "arz_Arab", + "asm_Beng", + "ast_Latn", + "awa_Deva", + "ayr_Latn", + "azb_Arab", + "azj_Latn", + "bak_Cyrl", + "bam_Latn", + "ban_Latn", + "bel_Cyrl", + "bem_Latn", + "ben_Beng", + "bho_Deva", + "bjn_Arab", + "bjn_Latn", + "bod_Tibt", + "bos_Latn", + "bug_Latn", + "bul_Cyrl", + "cat_Latn", + "ceb_Latn", + "ces_Latn", + "cjk_Latn", + "ckb_Arab", + "crh_Latn", + "cym_Latn", + "dan_Latn", + "deu_Latn", + "dik_Latn", + "dyu_Latn", + "dzo_Tibt", + "ell_Grek", + "eng_Latn", + "epo_Latn", + "est_Latn", + "eus_Latn", + "ewe_Latn", + "fao_Latn", + "pes_Arab", + "fij_Latn", + "fin_Latn", + "fon_Latn", + "fra_Latn", + "fur_Latn", + "fuv_Latn", + "gla_Latn", + "gle_Latn", + "glg_Latn", + "grn_Latn", + "guj_Gujr", + "hat_Latn", + "hau_Latn", + "heb_Hebr", + "hin_Deva", + "hne_Deva", + "hrv_Latn", + "hun_Latn", + "hye_Armn", + "ibo_Latn", + "ilo_Latn", + "ind_Latn", + "isl_Latn", + "ita_Latn", + "jav_Latn", + "jpn_Jpan", + "kab_Latn", + "kac_Latn", + "kam_Latn", + "kan_Knda", + "kas_Arab", + "kas_Deva", + "kat_Geor", + "knc_Arab", + "knc_Latn", + "kaz_Cyrl", + "kbp_Latn", + "kea_Latn", + "khm_Khmr", + "kik_Latn", + "kin_Latn", + "kir_Cyrl", + "kmb_Latn", + "kon_Latn", + "kor_Hang", + "kmr_Latn", + "lao_Laoo", + "lvs_Latn", + "lij_Latn", + "lim_Latn", + "lin_Latn", + "lit_Latn", + "lmo_Latn", + "ltg_Latn", + "ltz_Latn", + "lua_Latn", + "lug_Latn", + "luo_Latn", + "lus_Latn", + "mag_Deva", + "mai_Deva", + "mal_Mlym", + "mar_Deva", + "min_Latn", + "mkd_Cyrl", + "plt_Latn", + "mlt_Latn", + "mni_Beng", + "khk_Cyrl", + "mos_Latn", + "mri_Latn", + "zsm_Latn", + "mya_Mymr", + "nld_Latn", + "nno_Latn", + "nob_Latn", + "npi_Deva", + "nso_Latn", + "nus_Latn", + "nya_Latn", + "oci_Latn", + "gaz_Latn", + "ory_Orya", + "pag_Latn", + "pan_Guru", + "pap_Latn", + "pol_Latn", + "por_Latn", + "prs_Arab", + "pbt_Arab", + "quy_Latn", + "ron_Latn", + "run_Latn", + "rus_Cyrl", + "sag_Latn", + "san_Deva", + "sat_Beng", + "scn_Latn", + "shn_Mymr", + "sin_Sinh", + "slk_Latn", + "slv_Latn", + "smo_Latn", + "sna_Latn", + "snd_Arab", + "som_Latn", + "sot_Latn", + "spa_Latn", + "als_Latn", + "srd_Latn", + "srp_Cyrl", + "ssw_Latn", + "sun_Latn", + "swe_Latn", + "swh_Latn", + "szl_Latn", + "tam_Taml", + "tat_Cyrl", + "tel_Telu", + "tgk_Cyrl", + "tgl_Latn", + "tha_Thai", + "tir_Ethi", + "taq_Latn", + "taq_Tfng", + "tpi_Latn", + "tsn_Latn", + "tso_Latn", + "tuk_Latn", + "tum_Latn", + "tur_Latn", + "twi_Latn", + "tzm_Tfng", + "uig_Arab", + "ukr_Cyrl", + "umb_Latn", + "urd_Arab", + "uzn_Latn", + "vec_Latn", + "vie_Latn", + "war_Latn", + "wol_Latn", + "xho_Latn", + "ydd_Hebr", + "yor_Latn", + "yue_Hant", + "zho_Hans", + "zho_Hant", + "zul_Latn" + ], + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "legacy_behaviour": false, + "mask_token": "", + "model_max_length": 1024, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "src_lang": "eng_Latn", + "tgt_lang": "deu_Latn", + "tokenizer_class": "NllbTokenizer", + "unk_token": "", + "use_fast": "False" +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..20017a3b55d1f290b0cfc72c8f8f561243b061f1 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40ea4d03bd78cd4244fb7198e3e6383bf0ca77a9890cff810cea3fd4e14b04b5 +size 5112