PyTorch
gpt2
achille-fusco commited on
Commit
7512fea
·
verified ·
1 Parent(s): 2c42f3d

Update tokenizer.py

Browse files
Files changed (1) hide show
  1. tokenizer.py +28 -28
tokenizer.py CHANGED
@@ -16,35 +16,35 @@ def _get_repo_file(repo_id_or_path: str, filename: str, revision: Optional[str]
16
  return local
17
  return hf_hub_download(repo_id=repo_id_or_path, filename=filename, revision=revision)
18
 
19
- def _coerce_to_str(x):
20
- # common cases first
21
- if isinstance(x, str):
22
- return x
23
- if isinstance(x, dict):
24
- for key in ("text", "sentence", "input", "prompt"):
25
- if key in x and isinstance(x[key], str):
26
- return x[key]
27
- # fallback: join any stringy values
28
- vals = [v for v in x.values() if isinstance(v, str)]
29
- if vals:
30
- return " ".join(vals)
31
- return str(x)
32
- if isinstance(x, (list, tuple)):
33
- # prefer first/last string element if present
34
- for pick in (0, -1):
35
- try:
36
- v = x[pick]
37
- if isinstance(v, str):
38
- return v
39
- except Exception:
40
- pass
41
- # else join all string elements
42
- parts = [v for v in x if isinstance(v, str)]
43
- if parts:
44
- return " ".join(parts)
45
- return str(x)
46
- # final fallback
47
  return str(x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  class SyllabicTokenizerWrapper(PreTrainedTokenizerFast):
50
  slow_tokenizer_class = None
 
16
  return local
17
  return hf_hub_download(repo_id=repo_id_or_path, filename=filename, revision=revision)
18
 
19
+ def _coerce_to_str(x):
20
+ # common cases first
21
+ if isinstance(x, str):
22
+ return x
23
+ if isinstance(x, dict):
24
+ for key in ("text", "sentence", "input", "prompt"):
25
+ if key in x and isinstance(x[key], str):
26
+ return x[key]
27
+ # fallback: join any stringy values
28
+ vals = [v for v in x.values() if isinstance(v, str)]
29
+ if vals:
30
+ return " ".join(vals)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  return str(x)
32
+ if isinstance(x, (list, tuple)):
33
+ # prefer first/last string element if present
34
+ for pick in (0, -1):
35
+ try:
36
+ v = x[pick]
37
+ if isinstance(v, str):
38
+ return v
39
+ except Exception:
40
+ pass
41
+ # else join all string elements
42
+ parts = [v for v in x if isinstance(v, str)]
43
+ if parts:
44
+ return " ".join(parts)
45
+ return str(x)
46
+ # final fallback
47
+ return str(x)
48
 
49
  class SyllabicTokenizerWrapper(PreTrainedTokenizerFast):
50
  slow_tokenizer_class = None