File tree 2 files changed +20
-6
lines changed
2 files changed +20
-6
lines changed Original file line number Diff line number Diff line change @@ -113,9 +113,16 @@ def init_device(self):
113
113
# can have slightly different XLA graphs.
114
114
world_size = self .parallel_config .world_size
115
115
rank = xr .global_ordinal ()
116
- per_rank_path = os .path .join (envs .VLLM_XLA_CACHE_PATH ,
117
- f"tp{ world_size } _rank{ rank } " )
118
- xr .initialize_cache (per_rank_path , readonly = False )
116
+ # The PyTorch/XLA compilation cache uses the Torch IR to generate keys.
117
+ # Consequently, changes in optimization flags, which affect compilation
118
+ # results, don't change the cache key. This can result in the wrong
119
+ # compilation being used. To prevent this, disabling the XLA compilation
120
+ # cache during development is recommended.We can disable it by
121
+ # `export VLLM_XLA_CACHE_PATH=`
122
+ if envs .VLLM_XLA_CACHE_PATH :
123
+ per_rank_path = os .path .join (envs .VLLM_XLA_CACHE_PATH ,
124
+ f"tp{ world_size } _rank{ rank } " )
125
+ xr .initialize_cache (per_rank_path , readonly = False )
119
126
120
127
# Init ModelRunner here, so that we have access to self.device.
121
128
self .model_runner = TPUModelRunner (self .vllm_config , self .device )
Original file line number Diff line number Diff line change @@ -93,9 +93,16 @@ def init_device(self) -> None:
93
93
# can have slightly different XLA graphs.
94
94
world_size = self .parallel_config .world_size
95
95
rank = xr .global_ordinal ()
96
- per_rank_path = os .path .join (envs .VLLM_XLA_CACHE_PATH ,
97
- f"tp{ world_size } _rank{ rank } " )
98
- xr .initialize_cache (per_rank_path , readonly = False )
96
+ # The PyTorch/XLA compilation cache uses the Torch IR to generate keys.
97
+ # Consequently, changes in optimization flags, which affect compilation
98
+ # results, don't change the cache key. This can result in the wrong
99
+ # compilation being used. To prevent this, disabling the XLA compilation
100
+ # cache during development is recommended.We can disable it by
101
+ # `export VLLM_XLA_CACHE_PATH=`
102
+ if envs .VLLM_XLA_CACHE_PATH :
103
+ per_rank_path = os .path .join (envs .VLLM_XLA_CACHE_PATH ,
104
+ f"tp{ world_size } _rank{ rank } " )
105
+ xr .initialize_cache (per_rank_path , readonly = False )
99
106
100
107
self .profiler = None
101
108
if envs .VLLM_TORCH_PROFILER_DIR and self .rank < 1 :
You can’t perform that action at this time.
0 commit comments