Llama-Guard is a 7B parameter Llama 2-based input-output safeguard model. It can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM: it generates text in its output that indicates whether a given prompt or response is safe/unsafe, and if unsafe based on a policy, it also lists the violating subcategories.
import requests
import json
url = "https://api.cyfuture.ai/aiapi/inferencing/response"
payload = {
"model": "Model Name",
"max_tokens": 16384,
"top_p": 1,
"top_k": 40,
"presence_penalty": 0,
"frequency_penalty": 0,
"temperature": 0.6,
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Bearer <API_KEY>"
}
requests.request("POST", url, headers=headers, data=json.dumps(payload))
await fetch("https://api.cyfuture.ai/aiapi/inferencing/response", {
method: "POST",
headers: {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Bearer <API_KEY>"
},
body: JSON.stringify({
model: ""Model Name"",
max_tokens: 16384,
top_p: 1,
top_k: 40,
presence_penalty: 0,
frequency_penalty: 0,
temperature: 0.6,
messages: [
{
role: "user",
content: "Hello, how are you?"
}
]
})
});
URI uri = URI.create("https://api.cyfuture.ai/aiapi/inferencing/response");
HttpClient client = HttpClient.newHttpClient();
HttpRequest request = HttpRequest.newBuilder()
.uri(uri)
.header("Accept", "application/json")
.header("Content-Type", "application/json")
.header("Authorization", "Bearer <API_KEY>")
.POST(HttpRequest.BodyPublishers.ofString("""{
"model": ""Model Name"",
"max_tokens": 16384,
"top_p": 1,
"top_k": 40,
"presence_penalty": 0,
"frequency_penalty": 0,
"temperature": 0.6,
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}"""))
.build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
package main
import (
"bytes"
"net/http"
"fmt"
)
apiUrl := "https://api.cyfuture.ai/aiapi/inferencing/response"
var jsonData = []byte(`{
"model": "Model Name",
"max_tokens": 16384,
"top_p": 1,
"top_k": 40,
"presence_penalty": 0,
"frequency_penalty": 0,
"temperature": 0.6,
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}`)
req, err := http.NewRequest(POST, apiUrl, bytes.NewBuffer(jsonData))
req.Header.Set("Accept", "application/json")
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer <API_KEY>")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
fmt.Println("response Status:", resp.Status)
curl --request POST \
--url https://api.cyfuture.ai/aiapi/inferencing/response \
-H 'Accept: application/json' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer <API_KEY>' \
--data '{
"model": "Model Name",
"max_tokens": 16384,
"top_p": 1,
"top_k": 40,
"presence_penalty": 0,
"frequency_penalty": 0,
"temperature": 0.6,
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}'
On-demand deployments allow you to use Llama Guard 7B on dedicated GPUs with Cyfuture AI' high-performance serving stack with high reliability and no rate limits.
See the On-demand deployments guide for details.