{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": null, "post_processor": { "type": "TemplateProcessing", "single": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 1 ], "tokens": [ "" ] } } }, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "'": 4, "-": 5, ".": 6, "/": 7, "1": 8, "a": 9, "b": 10, "d": 11, "e": 12, "f": 13, "h": 14, "i": 15, "j": 16, "k": 17, "l": 18, "m": 19, "n": 20, "o": 21, "p": 22, "r": 23, "s": 24, "t": 25, "u": 26, "w": 27, "ŋ": 28, "ɔ": 29, "ɕ": 30, "ɛ": 31, "ɤ": 32, "ɯ": 33, "ʔ": 34, "ʰ": 35, "ː": 36, "˥": 37, "˦": 38, "˧": 39, "˨": 40, "˩": 41, "̚": 42, "̯": 43, "͡": 44, "ก": 45, "ข": 46, "ฃ": 47, "ค": 48, "ฅ": 49, "ฆ": 50, "ง": 51, "จ": 52, "ฉ": 53, "ช": 54, "ซ": 55, "ฌ": 56, "ญ": 57, "ฎ": 58, "ฏ": 59, "ฐ": 60, "ฑ": 61, "ฒ": 62, "ณ": 63, "ด": 64, "ต": 65, "ถ": 66, "ท": 67, "ธ": 68, "น": 69, "บ": 70, "ป": 71, "ผ": 72, "ฝ": 73, "พ": 74, "ฟ": 75, "ภ": 76, "ม": 77, "ย": 78, "ร": 79, "ฤ": 80, "ล": 81, "ฦ": 82, "ว": 83, "ศ": 84, "ษ": 85, "ส": 86, "ห": 87, "ฬ": 88, "อ": 89, "ฮ": 90, "ฯ": 91, "ะ": 92, "ั": 93, "า": 94, "ำ": 95, "ิ": 96, "ี": 97, "ึ": 98, "ื": 99, "ุ": 100, "ู": 101, "ฺ": 102, "฿": 103, "เ": 104, "แ": 105, "โ": 106, "ใ": 107, "ไ": 108, "ๅ": 109, "็": 110, "่": 111, "้": 112, "๊": 113, "๋": 114, "์": 115, "ํ": 116, "‍": 117, "˩.": 118, "˧.": 119, "aː": 120, "˦˥": 121, "˦˥.": 122, "˨˩.": 123, "˥˩.": 124, "aːn": 125, "˩˦": 126, "˩˩˦": 127, "ʰa": 128, "ɔː": 129, "้า": 130, "n˧.": 131, "tɕ": 132, "้าน": 133, "baːn": 134, "บ้าน": 135, "baːn˥˩.": 136, "˩˩˦.": 137, "pʰ": 138, "a˨˩.": 139, "kʰ": 140, "a˦˥.": 141, "˨˩": 142, "tʰ": 143, "aː˧.": 144, "n˧": 145, "m˧.": 146, "iː": 147, "˥˩": 148, "ŋ˧.": 149, "aj": 150, "ʰa˦˥.": 151, "at": 152, "ɛː": 153, "าร": 154, "eː": 155, "อง": 156, "aː˧": 157, "oː": 158, "i˦˥.": 159, "tɕʰ": 160, "uː": 161, "it": 162, "aːn˧.": 163, "sa˨˩.": 164, "pʰa": 165, "am˧.": 166, "ŋ˧": 167, "on˧.": 168, "ระ": 169, "ัน": 170, "การ": 171, "ua": 172, "tʰa": 173, "่า": 174, "ra˦˥.": 175, "kaːn˧.": 176, "an˧.": 177, "ิน": 178, "i˨˩.": 179, "ŋ˩˩˦.": 180, "j˧.": 181, "aːt": 182, "หน": 183, "าม": 184, "อย": 185, "ีย": 186, "iː˧.": 187, "ia": 188, "ɯa": 189, "รา": 190, "รั": 191, "m˧": 192, "k˨˩.": 193, "ตำ": 194, "tam˧.": 195, "บล": 196, "าน": 197, "tʰa˦˥.": 198, "iː˧": 199, "ra˨˩.": 200, "kʰa": 201, "bon˧.": 202, "ตำบล": 203, "tam˧.bon˧.": 204, "t˦˥.": 205, "na˦˥.": 206, "ɔːn˧": 207, "at˦˥.": 208, "n˩˩˦.": 209, "sa": 210, "ŋ˥˩.": 211, "อน": 212, "sɔː": 213, "saː": 214, "นา": 215, "waː": 216, "baːn˥˩.n": 217, "ร์": 218, "รร": 219, "ɤː": 220, "วั": 221, "่ง": 222, "t˨˩.": 223, "าง": 224, "tɕʰa˦˥.": 225, "su": 226, "ัก": 227, "t˦˥": 228, "k˦˥.": 229, "ริ": 230, "eʔ": 231, "าย": 232, "ติ": 233, "คว": 234, "aːj": 235, "aj˧.": 236, "an˧": 237, "k˨˩": 238, "สุ": 239, "ɔːŋ˩˩˦.": 240, "วิ": 241, "at˦˥": 242, "naː": 243, "m˩˩˦.": 244, "p˨˩.": 245, "oː˧.": 246, "ประ": 247, "pra˨˩.": 248, "aːn˧": 249, "tɕʰa": 250, "าก": 251, "n˩˩˦": 252, "หม": 253, "k˥˩.": 254, "eː˧.": 255, "kʰwaː": 256, "ภา": 257, "͡ɕ": 258, "ือ": 259, "t͡ɕ": 260, "ma˦˥.": 261, "ี่": 262, "aj˧": 263, "เก": 264, "tʰaː": 265, "น์": 266, "หล": 267, "lɔː": 268, "ɯː": 269, "้น": 270, "aj˥˩.": 271, "ัง": 272, "no": 273, "ความ": 274, "พิ": 275, "ta˨˩.": 276, "ซอย": 277, "la": 278, "ณ์": 279, "̚˨˩.": 280, "sɔːj˧.": 281, "ภั": 282, "หนอง": 283, "t˨˩": 284, "kʰwaːm˧.": 285, "ิต": 286, "at˨˩.": 287, "pʰo": 288, "ŋ˩˩˦": 289, "ɔː˧.": 290, "it˦˥.": 291, "wa˦˥.": 292, "พร": 293, "น้": 294, "พั": 295, "ย์": 296, "คล": 297, "เส": 298, "ื่": 299, "ra": 300, "jaː˧.": 301, "ดา": 302, "อก": 303, "in˧.": 304, "pʰuː": 305, "ธิ": 306, "pʰat˦˥.": 307, "คร": 308, "ɔːn˧.": 309, "เข": 310, "baːn˥˩.kʰ": 311, "้ว": 312, "ชา": 313, "na": 314, "pʰa˦˥.": 315, "ัว": 316, "su˨˩.": 317, "so": 318, "เล": 319, "kʰaː": 320, "kʰr": 321, "ยา": 322, "นิ": 323, "tʰa˨˩.": 324, "ม่": 325, "ัญ": 326, "กร": 327, "ja˦˥.": 328, "เท": 329, "j˧": 330, "ni˦˥.": 331, "k˦˥": 332, "วง": 333, "บ้านหนอง": 334, "naː˧.": 335, "ัย": 336, "baːn˥˩.nɔːŋ˩˩˦.": 337, "pa˨˩.": 338, "เป": 339, "it˨˩.": 340, "ŋ˨˩.": 341, "รี": 342, "siː": 343, "ŋ˥˩": 344, "jaː˧": 345, "ka˨˩.": 346, "n˥˩.": 347, "pʰat˦˥": 348, "ลา": 349, "it˦˥": 350, "p˦˥.": 351, "si˨˩.": 352, "สิ": 353, "้อ": 354, "ญา": 355, "in˧": 356, "kr": 357, "wi˦˥.": 358, "ณั": 359, "oŋ˧.": 360, "saː˩˩˦.": 361, "m˦˥.": 362, "tʰu": 363, "aːŋ˧.": 364, "เม": 365, "บ้านโ": 366, "pʰɔːn˧": 367, "เร": 368, "ับ": 369, "้ง": 370, "tʰi˦˥.": 371, "aw": 372, "เจ": 373, "nat˦˥.": 374, "la˦˥.": 375, "on˧": 376, "daː˧": 377, "k̚": 378, "่น": 379, "pʰaː˧.": 380, "ʔa˨˩.": 381, "ti˨˩.": 382, "ตา": 383, "iː˥˩.": 384, "กล": 385, "สม": 386, "raː˧.": 387, "ตร": 388, "ันท": 389, "เด": 390, "tʰɔː": 391, "ru": 392, "t˥˩": 393, "น้ำ": 394, "ทร": 395, "ู้": 396, "pʰi˦˥.": 397, "วน": 398, "ต์": 399, "u˨˩.": 400, "aŋ˧.": 401, "ัด": 402, "aːt˥˩": 403, "ชน": 404, "si": 405, "it˨˩": 406, "uː˧.": 407, "บ้านท": 408, "พล": 409, "kra˨˩.": 410, "ro": 411, "กา": 412, "t˥˩.": 413, "laː": 414, "นน": 415, "maj": 416, "rɔː": 417, "naː˧": 418, "paː": 419, "roː": 420, "มา": 421, "aːt˥˩.": 422, "pʰaː": 423, "่อ": 424, "โร": 425, "ุม": 426, "กระ": 427, "p˥˩.": 428, "ʔaː˧.": 429, "baːn˥˩.h": 430, "n˥˩": 431, "ɔʔ": 432, "t̚": 433, "tɕa˨˩.": 434, "kan˧.": 435, "บ้านห": 436, "็น": 437, "nu": 438, "บ้านส": 439, "mɛː": 440, "ที่": 441, "ri˦˥.": 442, "nɔː": 443, "k˥˩": 444, "seː": 445, "un˧.": 446, "รรณ": 447, "p˦˥": 448, "rat˦˥": 449, "w˧.": 450, "เน": 451, "บุ": 452, "kʰa˦˥.": 453, "p˨˩": 454, "lɔːŋ˧.": 455, "kʰɔː": 456, "ศิ": 457, "ต้": 458, "คลอง": 459, "kaːn˧": 460, "baːn˥˩.b": 461, "rat˦˥.": 462, "ดี": 463, "pr": 464, "siː˩˩˦.": 465, "บ้านแ": 466, "ินท": 467, "พัฒ": 468, "เช": 469, "kʰaw": 470, "นก": 471, "kɛː": 472, "tʰiː˥˩.": 473, "เพ": 474, "้วย": 475, "m˥˩.": 476, "ี้": 477, "รรม": 478, "baːn˥˩.d": 479, "tʰi": 480, "taː˧.": 481, "aːt˨˩.": 482, "n˨˩.": 483, "kɔːn˧": 484, "pʰuː˥˩.": 485, "ิด": 486, "บ้านป": 487, "่าง": 488, "aj˥˩": 489, "รณ์": 490, "aːt˨˩": 491, "คำ": 492, "ตะ": 493, "อม": 494, "ดง": 495, "ุ่ง": 496, "้าง": 497, "เบ": 498, "jaː": 499 }, "merges": [ [ "˩", "." ], [ "˧", "." ], [ "a", "ː" ], [ "˦", "˥" ], [ "˦˥", "." ], [ "˨", "˩." ], [ "˥", "˩." ], [ "aː", "n" ], [ "˩", "˦" ], [ "˩", "˩˦" ], [ "ʰ", "a" ], [ "ɔ", "ː" ], [ "้", "า" ], [ "n", "˧." ], [ "t", "ɕ" ], [ "้า", "น" ], [ "b", "aːn" ], [ "บ", "้าน" ], [ "baːn", "˥˩." ], [ "˩˩˦", "." ], [ "p", "ʰ" ], [ "a", "˨˩." ], [ "k", "ʰ" ], [ "a", "˦˥." ], [ "˨", "˩" ], [ "t", "ʰ" ], [ "aː", "˧." ], [ "n", "˧" ], [ "m", "˧." ], [ "i", "ː" ], [ "˥", "˩" ], [ "ŋ", "˧." ], [ "a", "j" ], [ "ʰa", "˦˥." ], [ "a", "t" ], [ "ɛ", "ː" ], [ "า", "ร" ], [ "e", "ː" ], [ "อ", "ง" ], [ "aː", "˧" ], [ "o", "ː" ], [ "i", "˦˥." ], [ "tɕ", "ʰ" ], [ "u", "ː" ], [ "i", "t" ], [ "aːn", "˧." ], [ "s", "a˨˩." ], [ "p", "ʰa" ], [ "a", "m˧." ], [ "ŋ", "˧" ], [ "o", "n˧." ], [ "ร", "ะ" ], [ "ั", "น" ], [ "ก", "าร" ], [ "u", "a" ], [ "t", "ʰa" ], [ "่", "า" ], [ "r", "a˦˥." ], [ "k", "aːn˧." ], [ "a", "n˧." ], [ "ิ", "น" ], [ "i", "˨˩." ], [ "ŋ", "˩˩˦." ], [ "j", "˧." ], [ "aː", "t" ], [ "ห", "น" ], [ "า", "ม" ], [ "อ", "ย" ], [ "ี", "ย" ], [ "iː", "˧." ], [ "i", "a" ], [ "ɯ", "a" ], [ "ร", "า" ], [ "ร", "ั" ], [ "m", "˧" ], [ "k", "˨˩." ], [ "ต", "ำ" ], [ "t", "am˧." ], [ "บ", "ล" ], [ "า", "น" ], [ "t", "ʰa˦˥." ], [ "iː", "˧" ], [ "r", "a˨˩." ], [ "k", "ʰa" ], [ "b", "on˧." ], [ "ตำ", "บล" ], [ "tam˧.", "bon˧." ], [ "t", "˦˥." ], [ "n", "a˦˥." ], [ "ɔː", "n˧" ], [ "at", "˦˥." ], [ "n", "˩˩˦." ], [ "s", "a" ], [ "ŋ", "˥˩." ], [ "อ", "น" ], [ "s", "ɔː" ], [ "s", "aː" ], [ "น", "า" ], [ "w", "aː" ], [ "baːn˥˩.", "n" ], [ "ร", "์" ], [ "ร", "ร" ], [ "ɤ", "ː" ], [ "ว", "ั" ], [ "่", "ง" ], [ "t", "˨˩." ], [ "า", "ง" ], [ "tɕ", "ʰa˦˥." ], [ "s", "u" ], [ "ั", "ก" ], [ "t", "˦˥" ], [ "k", "˦˥." ], [ "ร", "ิ" ], [ "e", "ʔ" ], [ "า", "ย" ], [ "ต", "ิ" ], [ "ค", "ว" ], [ "aː", "j" ], [ "aj", "˧." ], [ "a", "n˧" ], [ "k", "˨˩" ], [ "ส", "ุ" ], [ "ɔː", "ŋ˩˩˦." ], [ "ว", "ิ" ], [ "at", "˦˥" ], [ "n", "aː" ], [ "m", "˩˩˦." ], [ "p", "˨˩." ], [ "oː", "˧." ], [ "ป", "ระ" ], [ "p", "ra˨˩." ], [ "aːn", "˧" ], [ "tɕ", "ʰa" ], [ "า", "ก" ], [ "n", "˩˩˦" ], [ "ห", "ม" ], [ "k", "˥˩." ], [ "eː", "˧." ], [ "kʰ", "waː" ], [ "ภ", "า" ], [ "͡", "ɕ" ], [ "ื", "อ" ], [ "t", "͡ɕ" ], [ "m", "a˦˥." ], [ "ี", "่" ], [ "aj", "˧" ], [ "เ", "ก" ], [ "tʰ", "aː" ], [ "น", "์" ], [ "ห", "ล" ], [ "l", "ɔː" ], [ "ɯ", "ː" ], [ "้", "น" ], [ "aj", "˥˩." ], [ "ั", "ง" ], [ "n", "o" ], [ "คว", "าม" ], [ "พ", "ิ" ], [ "t", "a˨˩." ], [ "ซ", "อย" ], [ "l", "a" ], [ "ณ", "์" ], [ "̚", "˨˩." ], [ "sɔː", "j˧." ], [ "ภ", "ั" ], [ "หน", "อง" ], [ "t", "˨˩" ], [ "kʰwaː", "m˧." ], [ "ิ", "ต" ], [ "at", "˨˩." ], [ "pʰ", "o" ], [ "ŋ", "˩˩˦" ], [ "ɔː", "˧." ], [ "it", "˦˥." ], [ "w", "a˦˥." ], [ "พ", "ร" ], [ "น", "้" ], [ "พ", "ั" ], [ "ย", "์" ], [ "ค", "ล" ], [ "เ", "ส" ], [ "ื", "่" ], [ "r", "a" ], [ "j", "aː˧." ], [ "ด", "า" ], [ "อ", "ก" ], [ "i", "n˧." ], [ "pʰ", "uː" ], [ "ธ", "ิ" ], [ "pʰa", "t˦˥." ], [ "ค", "ร" ], [ "ɔː", "n˧." ], [ "เ", "ข" ], [ "baːn˥˩.", "kʰ" ], [ "้", "ว" ], [ "ช", "า" ], [ "n", "a" ], [ "p", "ʰa˦˥." ], [ "ั", "ว" ], [ "su", "˨˩." ], [ "s", "o" ], [ "เ", "ล" ], [ "kʰ", "aː" ], [ "kʰ", "r" ], [ "ย", "า" ], [ "น", "ิ" ], [ "tʰa", "˨˩." ], [ "ม", "่" ], [ "ั", "ญ" ], [ "ก", "ร" ], [ "j", "a˦˥." ], [ "เ", "ท" ], [ "j", "˧" ], [ "n", "i˦˥." ], [ "k", "˦˥" ], [ "ว", "ง" ], [ "บ้าน", "หนอง" ], [ "n", "aː˧." ], [ "ั", "ย" ], [ "baːn˥˩.n", "ɔːŋ˩˩˦." ], [ "p", "a˨˩." ], [ "เ", "ป" ], [ "it", "˨˩." ], [ "ŋ", "˨˩." ], [ "ร", "ี" ], [ "s", "iː" ], [ "ŋ", "˥˩" ], [ "j", "aː˧" ], [ "k", "a˨˩." ], [ "n", "˥˩." ], [ "pʰa", "t˦˥" ], [ "ล", "า" ], [ "it", "˦˥" ], [ "p", "˦˥." ], [ "s", "i˨˩." ], [ "ส", "ิ" ], [ "้", "อ" ], [ "ญ", "า" ], [ "i", "n˧" ], [ "k", "r" ], [ "w", "i˦˥." ], [ "ณ", "ั" ], [ "o", "ŋ˧." ], [ "saː", "˩˩˦." ], [ "m", "˦˥." ], [ "tʰ", "u" ], [ "aː", "ŋ˧." ], [ "เ", "ม" ], [ "บ้าน", "โ" ], [ "pʰ", "ɔːn˧" ], [ "เ", "ร" ], [ "ั", "บ" ], [ "้", "ง" ], [ "tʰ", "i˦˥." ], [ "a", "w" ], [ "เ", "จ" ], [ "n", "at˦˥." ], [ "l", "a˦˥." ], [ "o", "n˧" ], [ "d", "aː˧" ], [ "k", "̚" ], [ "่", "น" ], [ "pʰ", "aː˧." ], [ "ʔ", "a˨˩." ], [ "t", "i˨˩." ], [ "ต", "า" ], [ "iː", "˥˩." ], [ "ก", "ล" ], [ "ส", "ม" ], [ "r", "aː˧." ], [ "ต", "ร" ], [ "ัน", "ท" ], [ "เ", "ด" ], [ "tʰ", "ɔː" ], [ "r", "u" ], [ "t", "˥˩" ], [ "น้", "ำ" ], [ "ท", "ร" ], [ "ู", "้" ], [ "pʰ", "i˦˥." ], [ "ว", "น" ], [ "ต", "์" ], [ "u", "˨˩." ], [ "a", "ŋ˧." ], [ "ั", "ด" ], [ "aːt", "˥˩" ], [ "ช", "น" ], [ "s", "i" ], [ "it", "˨˩" ], [ "uː", "˧." ], [ "บ้าน", "ท" ], [ "พ", "ล" ], [ "k", "ra˨˩." ], [ "r", "o" ], [ "ก", "า" ], [ "t", "˥˩." ], [ "l", "aː" ], [ "น", "น" ], [ "m", "aj" ], [ "r", "ɔː" ], [ "n", "aː˧" ], [ "p", "aː" ], [ "r", "oː" ], [ "ม", "า" ], [ "aːt", "˥˩." ], [ "pʰ", "aː" ], [ "่", "อ" ], [ "โ", "ร" ], [ "ุ", "ม" ], [ "ก", "ระ" ], [ "p", "˥˩." ], [ "ʔ", "aː˧." ], [ "baːn˥˩.", "h" ], [ "n", "˥˩" ], [ "ɔ", "ʔ" ], [ "t", "̚" ], [ "tɕ", "a˨˩." ], [ "k", "an˧." ], [ "บ้าน", "ห" ], [ "็", "น" ], [ "n", "u" ], [ "บ้าน", "ส" ], [ "m", "ɛː" ], [ "ท", "ี่" ], [ "r", "i˦˥." ], [ "n", "ɔː" ], [ "k", "˥˩" ], [ "s", "eː" ], [ "u", "n˧." ], [ "รร", "ณ" ], [ "p", "˦˥" ], [ "r", "at˦˥" ], [ "w", "˧." ], [ "เ", "น" ], [ "บ", "ุ" ], [ "k", "ʰa˦˥." ], [ "p", "˨˩" ], [ "lɔː", "ŋ˧." ], [ "kʰ", "ɔː" ], [ "ศ", "ิ" ], [ "ต", "้" ], [ "คล", "อง" ], [ "k", "aːn˧" ], [ "baːn˥˩.", "b" ], [ "r", "at˦˥." ], [ "ด", "ี" ], [ "p", "r" ], [ "siː", "˩˩˦." ], [ "บ้าน", "แ" ], [ "ิน", "ท" ], [ "พั", "ฒ" ], [ "เ", "ช" ], [ "kʰa", "w" ], [ "น", "ก" ], [ "k", "ɛː" ], [ "tʰ", "iː˥˩." ], [ "เ", "พ" ], [ "้ว", "ย" ], [ "m", "˥˩." ], [ "ี", "้" ], [ "รร", "ม" ], [ "baːn˥˩.", "d" ], [ "tʰ", "i" ], [ "t", "aː˧." ], [ "aːt", "˨˩." ], [ "n", "˨˩." ], [ "k", "ɔːn˧" ], [ "pʰuː", "˥˩." ], [ "ิ", "ด" ], [ "บ้าน", "ป" ], [ "่า", "ง" ], [ "aj", "˥˩" ], [ "ร", "ณ์" ], [ "aːt", "˨˩" ], [ "ค", "ำ" ], [ "ต", "ะ" ], [ "อ", "ม" ], [ "ด", "ง" ], [ "ุ", "่ง" ], [ "้า", "ง" ], [ "เ", "บ" ], [ "j", "aː" ] ] } }