Skip to content

Commit 61cc76c

Browse files
authored
chore(autogptq): drop archived backend (#5214)
Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 8abecb4 commit 61cc76c

23 files changed

+5
-322
lines changed

.github/dependabot.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@ updates:
2929
schedule:
3030
# Check for updates to GitHub Actions every weekday
3131
interval: "weekly"
32-
- package-ecosystem: "pip"
33-
directory: "/backend/python/autogptq"
34-
schedule:
35-
interval: "weekly"
3632
- package-ecosystem: "pip"
3733
directory: "/backend/python/bark"
3834
schedule:

Dockerfile

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ ARG TARGETARCH
1515
ARG TARGETVARIANT
1616

1717
ENV DEBIAN_FRONTEND=noninteractive
18-
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
18+
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
1919

2020
RUN apt-get update && \
2121
apt-get install -y --no-install-recommends \
@@ -431,9 +431,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMA
431431
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
432432
make -C backend/python/vllm \
433433
; fi && \
434-
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
435-
make -C backend/python/autogptq \
436-
; fi && \
437434
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
438435
make -C backend/python/bark \
439436
; fi && \

Makefile

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -505,18 +505,10 @@ protogen-go-clean:
505505
$(RM) bin/*
506506

507507
.PHONY: protogen-python
508-
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
508+
protogen-python: bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
509509

510510
.PHONY: protogen-python-clean
511-
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
512-
513-
.PHONY: autogptq-protogen
514-
autogptq-protogen:
515-
$(MAKE) -C backend/python/autogptq protogen
516-
517-
.PHONY: autogptq-protogen-clean
518-
autogptq-protogen-clean:
519-
$(MAKE) -C backend/python/autogptq protogen-clean
511+
protogen-python-clean: bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
520512

521513
.PHONY: bark-protogen
522514
bark-protogen:
@@ -593,7 +585,6 @@ vllm-protogen-clean:
593585
## GRPC
594586
# Note: it is duplicated in the Dockerfile
595587
prepare-extra-conda-environments: protogen-python
596-
$(MAKE) -C backend/python/autogptq
597588
$(MAKE) -C backend/python/bark
598589
$(MAKE) -C backend/python/coqui
599590
$(MAKE) -C backend/python/diffusers

backend/backend.proto

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,7 @@ message ModelOptions {
190190
int32 NGQA = 20;
191191
string ModelFile = 21;
192192

193-
// AutoGPTQ
194-
string Device = 22;
195-
bool UseTriton = 23;
196-
string ModelBaseName = 24;
197-
bool UseFastTokenizer = 25;
193+
198194

199195
// Diffusers
200196
string PipelineType = 26;

backend/python/autogptq/Makefile

Lines changed: 0 additions & 17 deletions
This file was deleted.

backend/python/autogptq/README.md

Lines changed: 0 additions & 5 deletions
This file was deleted.

backend/python/autogptq/backend.py

Lines changed: 0 additions & 158 deletions
This file was deleted.

backend/python/autogptq/install.sh

Lines changed: 0 additions & 14 deletions
This file was deleted.

backend/python/autogptq/requirements-cublas11.txt

Lines changed: 0 additions & 2 deletions
This file was deleted.

backend/python/autogptq/requirements-cublas12.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

backend/python/autogptq/requirements-hipblas.txt

Lines changed: 0 additions & 2 deletions
This file was deleted.

backend/python/autogptq/requirements-intel.txt

Lines changed: 0 additions & 6 deletions
This file was deleted.

backend/python/autogptq/requirements.txt

Lines changed: 0 additions & 6 deletions
This file was deleted.

backend/python/autogptq/run.sh

Lines changed: 0 additions & 4 deletions
This file was deleted.

backend/python/autogptq/test.sh

Lines changed: 0 additions & 6 deletions
This file was deleted.

core/backend/options.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -184,11 +184,6 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
184184
MainGPU: c.MainGPU,
185185
Threads: int32(*c.Threads),
186186
TensorSplit: c.TensorSplit,
187-
// AutoGPTQ
188-
ModelBaseName: c.AutoGPTQ.ModelBaseName,
189-
Device: c.AutoGPTQ.Device,
190-
UseTriton: c.AutoGPTQ.Triton,
191-
UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
192187
// RWKV
193188
Tokenizer: c.Tokenizer,
194189
}

core/config/backend_config.go

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,6 @@ type BackendConfig struct {
5050
// LLM configs (GPT4ALL, Llama.cpp, ...)
5151
LLMConfig `yaml:",inline"`
5252

53-
// AutoGPTQ specifics
54-
AutoGPTQ AutoGPTQ `yaml:"autogptq"`
55-
5653
// Diffusers
5754
Diffusers Diffusers `yaml:"diffusers"`
5855
Step int `yaml:"step"`
@@ -176,14 +173,6 @@ type LimitMMPerPrompt struct {
176173
LimitAudioPerPrompt int `yaml:"audio"`
177174
}
178175

179-
// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
180-
type AutoGPTQ struct {
181-
ModelBaseName string `yaml:"model_base_name"`
182-
Device string `yaml:"device"`
183-
Triton bool `yaml:"triton"`
184-
UseFastTokenizer bool `yaml:"use_fast_tokenizer"`
185-
}
186-
187176
// TemplateConfig is a struct that holds the configuration of the templating system
188177
type TemplateConfig struct {
189178
// Chat is the template used in the chat completion endpoint

core/http/middleware/request.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -203,18 +203,10 @@ func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *sch
203203
config.Diffusers.ClipSkip = input.ClipSkip
204204
}
205205

206-
if input.ModelBaseName != "" {
207-
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
208-
}
209-
210206
if input.NegativePromptScale != 0 {
211207
config.NegativePromptScale = input.NegativePromptScale
212208
}
213209

214-
if input.UseFastTokenizer {
215-
config.UseFastTokenizer = input.UseFastTokenizer
216-
}
217-
218210
if input.NegativePrompt != "" {
219211
config.NegativePrompt = input.NegativePrompt
220212
}

core/schema/openai.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,6 @@ type OpenAIRequest struct {
202202

203203
Backend string `json:"backend" yaml:"backend"`
204204

205-
// AutoGPTQ
206205
ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
207206
}
208207

core/schema/prediction.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ type PredictionOptions struct {
4141
RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
4242
RopeFreqScale float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
4343
NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
44-
// AutoGPTQ
45-
UseFastTokenizer bool `json:"use_fast_tokenizer" yaml:"use_fast_tokenizer"`
4644

4745
// Diffusers
4846
ClipSkip int `json:"clip_skip" yaml:"clip_skip"`

docs/content/docs/advanced/advanced-usage.md

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -268,14 +268,6 @@ yarn_ext_factor: 0
268268
yarn_attn_factor: 0
269269
yarn_beta_fast: 0
270270
yarn_beta_slow: 0
271-
272-
# AutoGPT-Q settings, for configurations specific to GPT models.
273-
autogptq:
274-
model_base_name: "" # Base name of the model.
275-
device: "" # Device to run the model on.
276-
triton: false # Whether to use Triton Inference Server.
277-
use_fast_tokenizer: false # Whether to use a fast tokenizer for quicker processing.
278-
279271
# configuration for diffusers model
280272
diffusers:
281273
cuda: false # Whether to use CUDA

0 commit comments

Comments
 (0)