Mixtral 8x7B v0.1 is a sparse mixture-of-experts (SMoE) large language model developed by Mistral AI. With 46.7 billion total parameters and 12.9 billion active parameters per token, it outperforms Llama 2 70B and matches GPT-3.5 on many benchmarks while offering efficient inference. The model handles context lengths up to 32k tokens, supports multiple languages including English, French, Italian, German, and Spanish, and excels in code generation tasks. Licensed under Apache 2.0, Mixtral provides a powerful and efficient solution for diverse NLP applications.
import requests
import json
url = "https://api.cyfuture.ai/aiapi/inferencing/response"
payload = {
"model": "Model Name",
"max_tokens": 16384,
"top_p": 1,
"top_k": 40,
"presence_penalty": 0,
"frequency_penalty": 0,
"temperature": 0.6,
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Bearer <API_KEY>"
}
requests.request("POST", url, headers=headers, data=json.dumps(payload))
await fetch("https://api.cyfuture.ai/aiapi/inferencing/response", {
method: "POST",
headers: {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Bearer <API_KEY>"
},
body: JSON.stringify({
model: ""Model Name"",
max_tokens: 16384,
top_p: 1,
top_k: 40,
presence_penalty: 0,
frequency_penalty: 0,
temperature: 0.6,
messages: [
{
role: "user",
content: "Hello, how are you?"
}
]
})
});
URI uri = URI.create("https://api.cyfuture.ai/aiapi/inferencing/response");
HttpClient client = HttpClient.newHttpClient();
HttpRequest request = HttpRequest.newBuilder()
.uri(uri)
.header("Accept", "application/json")
.header("Content-Type", "application/json")
.header("Authorization", "Bearer <API_KEY>")
.POST(HttpRequest.BodyPublishers.ofString("""{
"model": ""Model Name"",
"max_tokens": 16384,
"top_p": 1,
"top_k": 40,
"presence_penalty": 0,
"frequency_penalty": 0,
"temperature": 0.6,
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}"""))
.build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
package main
import (
"bytes"
"net/http"
"fmt"
)
apiUrl := "https://api.cyfuture.ai/aiapi/inferencing/response"
var jsonData = []byte(`{
"model": "Model Name",
"max_tokens": 16384,
"top_p": 1,
"top_k": 40,
"presence_penalty": 0,
"frequency_penalty": 0,
"temperature": 0.6,
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}`)
req, err := http.NewRequest(POST, apiUrl, bytes.NewBuffer(jsonData))
req.Header.Set("Accept", "application/json")
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer <API_KEY>")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
fmt.Println("response Status:", resp.Status)
curl --request POST \
--url https://api.cyfuture.ai/aiapi/inferencing/response \
-H 'Accept: application/json' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer <API_KEY>' \
--data '{
"model": "Model Name",
"max_tokens": 16384,
"top_p": 1,
"top_k": 40,
"presence_penalty": 0,
"frequency_penalty": 0,
"temperature": 0.6,
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}'
On-demand deployments allow you to use Mixtral 8x7B v0.1 on dedicated GPUs with Cyfuture AI' high-performance serving stack with high reliability and no rate limits.
See the On-demand deployments guide for details.