{ "vocab_size": 500, "hf_tokenizer": { "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": null, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": "", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "!": 4, "\"": 5, "#": 6, "$": 7, "%": 8, "&": 9, "'": 10, "(": 11, ")": 12, "*": 13, "+": 14, ",": 15, "-": 16, ".": 17, "/": 18, "0": 19, "1": 20, "2": 21, "3": 22, "4": 23, "5": 24, "6": 25, "7": 26, "8": 27, "9": 28, ":": 29, ";": 30, "<": 31, "=": 32, ">": 33, "?": 34, "@": 35, "A": 36, "B": 37, "C": 38, "D": 39, "E": 40, "F": 41, "G": 42, "H": 43, "I": 44, "J": 45, "K": 46, "L": 47, "M": 48, "N": 49, "O": 50, "P": 51, "Q": 52, "R": 53, "S": 54, "T": 55, "U": 56, "V": 57, "W": 58, "X": 59, "Y": 60, "Z": 61, "[": 62, "\\": 63, "]": 64, "^": 65, "_": 66, "`": 67, "a": 68, "b": 69, "c": 70, "d": 71, "e": 72, "f": 73, "g": 74, "h": 75, "i": 76, "j": 77, "k": 78, "l": 79, "m": 80, "n": 81, "o": 82, "p": 83, "q": 84, "r": 85, "s": 86, "t": 87, "u": 88, "v": 89, "w": 90, "x": 91, "y": 92, "z": 93, "{": 94, "|": 95, "}": 96, "~": 97, "¡": 98, "¢": 99, "£": 100, "¤": 101, "¥": 102, "¦": 103, "§": 104, "¨": 105, "©": 106, "ª": 107, "«": 108, "¬": 109, "®": 110, "¯": 111, "°": 112, "±": 113, "²": 114, "³": 115, "´": 116, "µ": 117, "¶": 118, "·": 119, "¸": 120, "¹": 121, "º": 122, "»": 123, "¼": 124, "½": 125, "¾": 126, "¿": 127, "À": 128, "Á": 129, "Â": 130, "Ã": 131, "Ä": 132, "Å": 133, "Æ": 134, "Ç": 135, "È": 136, "É": 137, "Ê": 138, "Ë": 139, "Ì": 140, "Í": 141, "Î": 142, "Ï": 143, "Ð": 144, "Ñ": 145, "Ò": 146, "Ó": 147, "Ô": 148, "Õ": 149, "Ö": 150, "×": 151, "Ø": 152, "Ù": 153, "Ú": 154, "Û": 155, "Ü": 156, "Ý": 157, "Þ": 158, "ß": 159, "à": 160, "á": 161, "â": 162, "ã": 163, "ä": 164, "å": 165, "æ": 166, "ç": 167, "è": 168, "é": 169, "ê": 170, "ë": 171, "ì": 172, "í": 173, "î": 174, "ï": 175, "ð": 176, "ñ": 177, "ò": 178, "ó": 179, "ô": 180, "õ": 181, "ö": 182, "÷": 183, "ø": 184, "ù": 185, "ú": 186, "û": 187, "ü": 188, "ý": 189, "þ": 190, "ÿ": 191, "Ā": 192, "ā": 193, "Ă": 194, "ă": 195, "Ą": 196, "ą": 197, "Ć": 198, "ć": 199, "Ĉ": 200, "ĉ": 201, "Ċ": 202, "ċ": 203, "Č": 204, "č": 205, "Ď": 206, "ď": 207, "Đ": 208, "đ": 209, "Ē": 210, "ē": 211, "Ĕ": 212, "ĕ": 213, "Ė": 214, "ė": 215, "Ę": 216, "ę": 217, "Ě": 218, "ě": 219, "Ĝ": 220, "ĝ": 221, "Ğ": 222, "ğ": 223, "Ġ": 224, "ġ": 225, "Ģ": 226, "ģ": 227, "Ĥ": 228, "ĥ": 229, "Ħ": 230, "ħ": 231, "Ĩ": 232, "ĩ": 233, "Ī": 234, "ī": 235, "Ĭ": 236, "ĭ": 237, "Į": 238, "į": 239, "İ": 240, "ı": 241, "IJ": 242, "ij": 243, "Ĵ": 244, "ĵ": 245, "Ķ": 246, "ķ": 247, "ĸ": 248, "Ĺ": 249, "ĺ": 250, "Ļ": 251, "ļ": 252, "Ľ": 253, "ľ": 254, "Ŀ": 255, "ŀ": 256, "Ł": 257, "ł": 258, "Ń": 259, "Ġt": 260, "Ġa": 261, "he": 262, "in": 263, "re": 264, "Ġthe": 265, "on": 266, "er": 267, "Ġo": 268, "at": 269, "Ġs": 270, "en": 271, "Ġc": 272, "es": 273, "Ġw": 274, "is": 275, "nd": 276, "or": 277, "it": 278, "Ġp": 279, "al": 280, "ed": 281, "Ġof": 282, "Ġb": 283, "an": 284, "Ġf": 285, "Ġin": 286, "ar": 287, "ing": 288, "ou": 289, "Ġm": 290, "ic": 291, "Ġand": 292, "Ġto": 293, "ion": 294, "Ġd": 295, "ro": 296, "le": 297, "as": 298, "Ġh": 299, "ent": 300, "Ġth": 301, "il": 302, "ct": 303, "Ġe": 304, "Ġre": 305, "om": 306, "ve": 307, "Ġn": 308, "st": 309, "Ġl": 310, "ly": 311, "Ġbe": 312, "Ġis": 313, "ĠT": 314, "se": 315, "ol": 316, "ation": 317, "Ġg": 318, "id": 319, "im": 320, "et": 321, "ĠA": 322, "ut": 323, "ce": 324, "ot": 325, "ur": 326, "ra": 327, "ch": 328, "ĠS": 329, "Ġfor": 330, "ow": 331, "ig": 332, "Ġon": 333, "ĠC": 334, "Ġthat": 335, "Ġu": 336, "ver": 337, "âĢ": 338, "Ġst": 339, "ĠI": 340, "ir": 341, "am": 342, "ay": 343, "ul": 344, "ad": 345, "el": 346, "her": 347, "Ġas": 348, "ith": 349, "Ġy": 350, "Ġpro": 351, "Ġcon": 352, "ĠM": 353, "Ġan": 354, "Ġare": 355, "Ġ(": 356, "Ġwith": 357, "Ġor": 358, "Ġ1": 359, "Ġwh": 360, "ter": 361, "if": 362, "ĠP": 363, "od": 364, "Ġit": 365, "ĠThe": 366, "Ġal": 367, "ment": 368, "th": 369, "ist": 370, "ge": 371, "op": 372, "ate": 373, "ers": 374, "ĠB": 375, "Ġde": 376, "ies": 377, "ab": 378, "Ġhe": 379, "ill": 380, "Ġsu": 381, "Ġyou": 382, "Ġex": 383, "res": 384, "ĠH": 385, "us": 386, "ere": 387, "est": 388, "ect": 389, "ess": 390, "Ġby": 391, "ore": 392, "rom": 393, "ac": 394, "Ġcom": 395, "os": 396, "ity": 397, "ld": 398, "um": 399, "and": 400, "ri": 401, "Ġat": 402, "ĠW": 403, "ĠD": 404, "em": 405, "Ġv": 406, "ive": 407, "ain": 408, "Ġwas": 409, "ĠR": 410, "qu": 411, "nt": 412, "ant": 413, "ĠE": 414, "igh": 415, "ke": 416, "pp": 417, "Ġfrom": 418, "Ġha": 419, "ud": 420, "ĠF": 421, "Ġ2": 422, "ĠN": 423, "oc": 424, "Ġch": 425, "iv": 426, "ort": 427, "Ġse": 428, "Ġne": 429, "Ġr": 430, "ĠG": 431, "Ġnot": 432, "Ġcan": 433, "00": 434, "art": 435, "ical": 436, "ure": 437, "un": 438, "ĠL": 439, "Ġhave": 440, "ial": 441, "Ġle": 442, "og": 443, "Ġsp": 444, "Ġsh": 445, "all": 446, "ight": 447, "'s": 448, "ich": 449, "ther": 450, "Ġen": 451, "pt": 452, "Ġthis": 453, "rou": 454, "Ġab": 455, "The": 456, "ould": 457, "gh": 458, "âĢĻ": 459, "ost": 460, "our": 461, "ions": 462, "ated": 463, "ome": 464, "ear": 465, "ĠJ": 466, "ine": 467, "Ġwor": 468, "ag": 469, "ĠO": 470, "du": 471, "ĠU": 472, "ard": 473, "out": 474, "Ġwe": 475, "ell": 476, "ff": 477, "ast": 478, "ap": 479, "Ġim": 480, "ec": 481, "Ġpl": 482, "Ġus": 483, "ans": 484, "Ġint": 485, "ew": 486, "Ġtheir": 487, "Ġwhich": 488, "pl": 489, "act": 490, "ust": 491, "age": 492, "ĠIn": 493, "Ġ\"": 494, "ous": 495, "ĠâĢ": 496, "are": 497, "ak": 498, "Ġwhe": 499 }, "merges": [ [ "Ġ", "t" ], [ "Ġ", "a" ], [ "h", "e" ], [ "i", "n" ], [ "r", "e" ], [ "Ġt", "he" ], [ "o", "n" ], [ "e", "r" ], [ "Ġ", "o" ], [ "a", "t" ], [ "Ġ", "s" ], [ "e", "n" ], [ "Ġ", "c" ], [ "e", "s" ], [ "Ġ", "w" ], [ "i", "s" ], [ "n", "d" ], [ "o", "r" ], [ "i", "t" ], [ "Ġ", "p" ], [ "a", "l" ], [ "e", "d" ], [ "Ġo", "f" ], [ "Ġ", "b" ], [ "a", "n" ], [ "Ġ", "f" ], [ "Ġ", "in" ], [ "a", "r" ], [ "in", "g" ], [ "o", "u" ], [ "Ġ", "m" ], [ "i", "c" ], [ "Ġa", "nd" ], [ "Ġt", "o" ], [ "i", "on" ], [ "Ġ", "d" ], [ "r", "o" ], [ "l", "e" ], [ "a", "s" ], [ "Ġ", "h" ], [ "en", "t" ], [ "Ġt", "h" ], [ "i", "l" ], [ "c", "t" ], [ "Ġ", "e" ], [ "Ġ", "re" ], [ "o", "m" ], [ "v", "e" ], [ "Ġ", "n" ], [ "s", "t" ], [ "Ġ", "l" ], [ "l", "y" ], [ "Ġb", "e" ], [ "Ġ", "is" ], [ "Ġ", "T" ], [ "s", "e" ], [ "o", "l" ], [ "at", "ion" ], [ "Ġ", "g" ], [ "i", "d" ], [ "i", "m" ], [ "e", "t" ], [ "Ġ", "A" ], [ "u", "t" ], [ "c", "e" ], [ "o", "t" ], [ "u", "r" ], [ "r", "a" ], [ "c", "h" ], [ "Ġ", "S" ], [ "Ġf", "or" ], [ "o", "w" ], [ "i", "g" ], [ "Ġ", "on" ], [ "Ġ", "C" ], [ "Ġth", "at" ], [ "Ġ", "u" ], [ "v", "er" ], [ "â", "Ģ" ], [ "Ġs", "t" ], [ "Ġ", "I" ], [ "i", "r" ], [ "a", "m" ], [ "a", "y" ], [ "u", "l" ], [ "a", "d" ], [ "e", "l" ], [ "he", "r" ], [ "Ġa", "s" ], [ "it", "h" ], [ "Ġ", "y" ], [ "Ġp", "ro" ], [ "Ġc", "on" ], [ "Ġ", "M" ], [ "Ġa", "n" ], [ "Ġa", "re" ], [ "Ġ", "(" ], [ "Ġw", "ith" ], [ "Ġo", "r" ], [ "Ġ", "1" ], [ "Ġw", "h" ], [ "t", "er" ], [ "i", "f" ], [ "Ġ", "P" ], [ "o", "d" ], [ "Ġ", "it" ], [ "ĠT", "he" ], [ "Ġa", "l" ], [ "m", "ent" ], [ "t", "h" ], [ "is", "t" ], [ "g", "e" ], [ "o", "p" ], [ "at", "e" ], [ "er", "s" ], [ "Ġ", "B" ], [ "Ġd", "e" ], [ "i", "es" ], [ "a", "b" ], [ "Ġ", "he" ], [ "il", "l" ], [ "Ġs", "u" ], [ "Ġy", "ou" ], [ "Ġe", "x" ], [ "re", "s" ], [ "Ġ", "H" ], [ "u", "s" ], [ "e", "re" ], [ "es", "t" ], [ "e", "ct" ], [ "es", "s" ], [ "Ġb", "y" ], [ "o", "re" ], [ "ro", "m" ], [ "a", "c" ], [ "Ġc", "om" ], [ "o", "s" ], [ "it", "y" ], [ "l", "d" ], [ "u", "m" ], [ "a", "nd" ], [ "r", "i" ], [ "Ġa", "t" ], [ "Ġ", "W" ], [ "Ġ", "D" ], [ "e", "m" ], [ "Ġ", "v" ], [ "i", "ve" ], [ "a", "in" ], [ "Ġw", "as" ], [ "Ġ", "R" ], [ "q", "u" ], [ "n", "t" ], [ "an", "t" ], [ "Ġ", "E" ], [ "ig", "h" ], [ "k", "e" ], [ "p", "p" ], [ "Ġf", "rom" ], [ "Ġh", "a" ], [ "u", "d" ], [ "Ġ", "F" ], [ "Ġ", "2" ], [ "Ġ", "N" ], [ "o", "c" ], [ "Ġc", "h" ], [ "i", "v" ], [ "or", "t" ], [ "Ġs", "e" ], [ "Ġn", "e" ], [ "Ġ", "r" ], [ "Ġ", "G" ], [ "Ġn", "ot" ], [ "Ġc", "an" ], [ "0", "0" ], [ "ar", "t" ], [ "ic", "al" ], [ "u", "re" ], [ "u", "n" ], [ "Ġ", "L" ], [ "Ġha", "ve" ], [ "i", "al" ], [ "Ġ", "le" ], [ "o", "g" ], [ "Ġs", "p" ], [ "Ġs", "h" ], [ "al", "l" ], [ "igh", "t" ], [ "'", "s" ], [ "ic", "h" ], [ "t", "her" ], [ "Ġ", "en" ], [ "p", "t" ], [ "Ġth", "is" ], [ "r", "ou" ], [ "Ġa", "b" ], [ "T", "he" ], [ "ou", "ld" ], [ "g", "h" ], [ "âĢ", "Ļ" ], [ "o", "st" ], [ "ou", "r" ], [ "ion", "s" ], [ "at", "ed" ], [ "om", "e" ], [ "e", "ar" ], [ "Ġ", "J" ], [ "in", "e" ], [ "Ġw", "or" ], [ "a", "g" ], [ "Ġ", "O" ], [ "d", "u" ], [ "Ġ", "U" ], [ "ar", "d" ], [ "ou", "t" ], [ "Ġw", "e" ], [ "el", "l" ], [ "f", "f" ], [ "as", "t" ], [ "a", "p" ], [ "Ġ", "im" ], [ "e", "c" ], [ "Ġp", "l" ], [ "Ġu", "s" ], [ "an", "s" ], [ "Ġin", "t" ], [ "e", "w" ], [ "Ġthe", "ir" ], [ "Ġwh", "ich" ], [ "p", "l" ], [ "a", "ct" ], [ "u", "st" ], [ "a", "ge" ], [ "ĠI", "n" ], [ "Ġ", "\"" ], [ "ou", "s" ], [ "Ġ", "âĢ" ], [ "a", "re" ], [ "a", "k" ], [ "Ġw", "he" ] ] } } }