title | description | slug |
---|---|---|
API |
Cortex Server Overview. |
server |
import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem";
:::warning 🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. :::
Cortex has an API server that runs at localhost:3928
.
# By default the server will be started on port `3928`
cortex
# Start a server with different port number
cortex -a <address> -p <port_number>
# Set the data folder directory
cortex --dataFolder <dataFolderPath>
# Pull a model
curl --request POST \
--url http://localhost:3928/v1/models/mistral/pull
# Start the model
curl --request POST \
--url http://localhost:3928/v1/models/mistral/start \
--header 'Content-Type: application/json' \
--data '{
"prompt_template": "system\n{system_message}\nuser\n{prompt}\nassistant",
"stop": [],
"ngl": 4096,
"ctx_len": 4096,
"cpu_threads": 10,
"n_batch": 2048,
"caching_enabled": true,
"grp_attn_n": 1,
"grp_attn_w": 512,
"mlock": false,
"flash_attn": true,
"cache_type": "f16",
"use_mmap": true,
"engine": "llamacpp"
}'
# Check the model status
curl --request GET \
--url http://localhost:3928/v1/system/events/model
# Invoke the chat completions endpoint
curl http://localhost:3928/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "",
"messages": [
{
"role": "user",
"content": "Hello"
},
],
"model": "mistral",
"stream": true,
"max_tokens": 1,
"stop": [
null
],
"frequency_penalty": 1,
"presence_penalty": 1,
"temperature": 1,
"top_p": 1
}'
# Stop a model
curl --request POST \
--url http://localhost:3928/v1/models/mistral/stop
# Pull a model
curl --request POST \
--url http://localhost:3928/v1/models/mistral/pull