@@ -22,10 +22,6 @@ def _create_combined_repo_layout(root: Path) -> None:
2222 for name in ms .BUNDLES ["xgb" ].required_files :
2323 _touch (root / "xgb" / name )
2424
25- # Tokenizer bundle
26- for name in ms .BUNDLES ["tokenizer" ].required_files :
27- _touch (root / "tokenizer" / name )
28-
2925
3026class TestModelStoreSharedRepo (unittest .TestCase ):
3127 def setUp (self ) -> None :
@@ -68,12 +64,10 @@ def snapshot_download(
6864 with patch .object (ms , "_import_modelscope_snapshot_download" , return_value = snapshot_download ):
6965 vision_dir = Path (ms .resolve_bundle_dir ("vision" , model_home = tmp , provider = "modelscope" , offline = False ))
7066 xgb_dir = Path (ms .resolve_bundle_dir ("xgb" , model_home = tmp , provider = "modelscope" , offline = False ))
71- tok_dir = Path (ms .resolve_bundle_dir ("tokenizer" , model_home = tmp , provider = "modelscope" , offline = False ))
7267
7368 expected_root = (Path (tmp ) / "modelscope" / "Xorbits__deepdoc" / "v1" ).resolve ()
7469 self .assertEqual (vision_dir .resolve (), (expected_root / "vision" ).resolve ())
7570 self .assertEqual (xgb_dir .resolve (), (expected_root / "xgb" ).resolve ())
76- self .assertEqual (tok_dir .resolve (), (expected_root / "tokenizer" ).resolve ())
7771
7872 self .assertGreaterEqual (len (calls ), 1 )
7973 for call in calls :
@@ -134,8 +128,22 @@ def snapshot_download(*args, **kwargs) -> str: # pragma: no cover
134128 with patch .object (ms , "_import_modelscope_snapshot_download" , return_value = snapshot_download ):
135129 vision_dir = Path (ms .resolve_bundle_dir ("vision" , model_home = tmp , provider = "auto" , offline = False ))
136130 xgb_dir = Path (ms .resolve_bundle_dir ("xgb" , model_home = tmp , provider = "auto" , offline = False ))
137- tok_dir = Path (ms .resolve_bundle_dir ("tokenizer" , model_home = tmp , provider = "auto" , offline = False ))
138131
139132 self .assertEqual (vision_dir .resolve (), (expected_root / "vision" ).resolve ())
140133 self .assertEqual (xgb_dir .resolve (), (expected_root / "xgb" ).resolve ())
141- self .assertEqual (tok_dir .resolve (), (expected_root / "tokenizer" ).resolve ())
134+
135+ def test_resolve_tokenizer_dict_prefix_uses_packaged_dict_by_default (self ) -> None :
136+ prefix = Path (ms .resolve_tokenizer_dict_prefix ())
137+
138+ self .assertEqual (prefix .name , "huqie" )
139+ self .assertTrue (prefix .with_suffix (".txt" ).exists ())
140+
141+ def test_resolve_tokenizer_dict_prefix_uses_env_dir_when_set (self ) -> None :
142+ with tempfile .TemporaryDirectory () as tmp :
143+ tokenizer_dir = Path (tmp ) / "tokenizer"
144+ _touch (tokenizer_dir / "huqie.txt" )
145+ os .environ [ms .TOKENIZER_MODEL_DIR_ENV ] = str (tokenizer_dir )
146+
147+ prefix = Path (ms .resolve_tokenizer_dict_prefix ())
148+
149+ self .assertEqual (prefix .resolve (), (tokenizer_dir / "huqie" ).resolve ())
0 commit comments