ModelCloud · Qubitium · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -63,7 +63,7 @@ env:
   RUNNER: 10.0.13.31
   TRANSFORMERS_DIFF_TESTS: "models/test_internlm.py,models/test_internlm2_5.py,models/test_xverse.py"
   TORCH_2_5_TESTS: "test_evalplus.py,test_perplexity.py,test_q4_ipex.py,test_ipex_xpu.py,test_save_loaded_quantized_model.py,test_quant_formats.py,models/test_hymba.py"
-  IGNORED_TEST_FILES: "test_tgi.py,test_gptneox.py,models/test_mixtral.py"
+  IGNORED_TEST_FILES: "test_tgi.py,test_gptneox.py,models/test_mixtral.py,models/test_phi_3_moe.py"
   GPTQMODEL_FORCE_BUILD: 1
   repo: ${{ github.event.inputs.repo || github.repository }}
   ref: ${{ github.event.inputs.ref || github.ref }}

diff --git a/gptqmodel/models/_const.py b/gptqmodel/models/_const.py
@@ -148,6 +148,7 @@ def get_best_device(backend: BACKEND = BACKEND.AUTO) -> torch.device:
     "qwen2",
     "phi",
     "phi3",
+    "phimoe",
     "gemma",
     "gemma2",
     "starcoder2",

diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py
@@ -76,7 +76,7 @@
 from .definitions.opt import OPTGPTQ  # noqa: E402
 from .definitions.ovis import OvisGPTQ  # noqa: E402
 from .definitions.phi import PhiGPTQ  # noqa: E402
-from .definitions.phi3 import Phi3GPTQ  # noqa: E402
+from .definitions.phi3 import Phi3GPTQ, PhiMoEGPTQForCausalLM  # noqa: E402
 from .definitions.qwen import QwenGPTQ  # noqa: E402
 from .definitions.qwen2 import Qwen2GPTQ  # noqa: E402
 from .definitions.qwen2_moe import Qwen2MoeGPTQ  # noqa: E402
@@ -124,6 +124,7 @@
     "gemma2": Gemma2GPTQ,
     "phi": PhiGPTQ,
     "phi3": Phi3GPTQ,
+    "phimoe": PhiMoEGPTQForCausalLM,
     "mpt": MPTGPTQ,
     "minicpm": MiniCPMGPTQ,
     "minicpm3":MiniCPM3GPTQ,

diff --git a/gptqmodel/models/definitions/phi3.py b/gptqmodel/models/definitions/phi3.py
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+from .._const import EXPERT_INDEX_PLACEHOLDER
 from ..base import BaseGPTQModel
 
 
@@ -27,3 +27,19 @@ class Phi3GPTQ(BaseGPTQModel):
         ["mlp.gate_up_proj"],
         ["mlp.down_proj"],
     ]
+
+class PhiMoEGPTQForCausalLM(BaseGPTQModel):
+    require_pkgs_version = ["transformers<=4.44.2"]
+
+    layer_type = "PhiMoEDecoderLayer"
+    layers_node = "model.layers"
+    base_modules = ["model.embed_tokens", "model.norm"]
+
+    layer_modules = [
+        ["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
+        ["self_attn.o_proj"],
+        [f"block_sparse_moe.experts.{EXPERT_INDEX_PLACEHOLDER}.w1"],
+        [f"block_sparse_moe.experts.{EXPERT_INDEX_PLACEHOLDER}.w2"],
+    ]
+
+__all__ = ["Phi3GPTQ", "PhiMoEGPTQForCausalLM"]
diff --git a/tests/test_phi_3_moe.py b/tests/test_phi_3_moe.py
@@ -0,0 +1,27 @@
+# Copyright 2025 ModelCloud
+# Contact: [email protected], x.com/qubitium
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from model_test import ModelTest
+
+
+class TestPhi_3(ModelTest):
+    NATIVE_MODEL_ID = "/monster/data/model/Phi-3.5-MoE-instruct" # microsoft/Phi-3.5-MoE-instruct
+    NATIVE_ARC_CHALLENGE_ACC = 0.5401
+    NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5674
+    APPLY_CHAT_TEMPLATE = True
+    TRUST_REMOTE_CODE = True
+
+    def test_phi_3(self):
+        self.quant_lm_eval()