Makes a request to the LLaMA Vision API with an optional image and prompt, returning the response message from the API. The function uses async/await syntax and assumes the LLaMA Vision API is running on http://localhost:11434/api/chat
.
npm run import -- "ollama vision request"
const { request } = require('gaxios')
const fs = require('fs')
async function requestOllamaVision(image, prompt) {
if (!image) {
console.error('image not set!')
return
}
let base64_image
if(typeof image == 'string') {
if(image.startsWith(', '')
if(image.includes('://')) {
let result = await request({
url: image,
method: 'GET',
})
base64_image = Buffer.from(await result.data.arrayBuffer()).toString('base64')
} else if (!fs.existsSync(image)) {
base64_image = Buffer.from(image, 'base64').toString('base64')
} else {
base64_image = fs.readFileSync(image).toString('base64')
}
} else {
base64_image = image.toString('base64')
}
let result = await request({
url: 'http://localhost:11434/api/chat',
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
data: JSON.stringify({
"model": "llama3.2-vision",
"stream": false,
"messages": [
{
"role": "user",
"content": prompt ? prompt : "Describe the image in great detail.",
//"content": (
// "Extract all text from the image and return it as markdown.\n"
// "Do not describe the image or add extra text.\n"
// "Only return the text found in the image."
//),
"images": [base64_image]
}
]
})
})
//let buff = Buffer.from(result.data.images[0], 'base64');
if(result.data && result.data.message)
return result.data.message.content
else
return
}
module.exports = requestOllamaVision
const { google } = require('googleapis');
const fs = require('fs');
const fetch = require('isomorphic-fetch');
/**
* Makes a request to the LLaMA Vision API.
*
* @param {Buffer|string} image - The image to be processed.
* @param {string} prompt - The prompt for the image description.
* @returns {Promise} The response from the API.
*/
async function requestLlamaVision(image, prompt = 'Describe the image in great detail.') {
// Check if the image is valid
if (!image) {
console.error('Image not set!');
return;
}
let base64Image;
if (typeof image ==='string') {
try {
// Try to parse the image as a base64 string
if (image.startsWith(', '');
} else if (image.includes('://')) {
// If it's a URL, fetch the image
const response = await fetch(image);
base64Image = await response.arrayBuffer().then(buffer => Buffer.from(buffer).toString('base64'));
} else {
// If it's a local file, read it
base64Image = fs.readFileSync(image).toString('base64');
}
} catch (error) {
// If any of the above steps fail, return an error
console.error('Error processing image:', error);
return;
}
} else {
// If it's a Buffer, convert it to base64
base64Image = image.toString('base64');
}
try {
// Make the request to the LLaMA Vision API
const apiResponse = await google.chat('v1').messages.list({
'parent': 'projects/-/locations/-/agents/-',
'body': {
'model': 'llama3.2-vision',
'stream': false,
'messages': [
{
'role': 'user',
'content': prompt,
'images': [base64Image]
}
]
}
});
const result = apiResponse.data.messages;
return result[0].text;
} catch (error) {
// If the API request fails, return an error
console.error('Error requesting LLaMA Vision API:', error);
return;
}
}
module.exports = requestLlamaVision;
requestOllamaVision
Makes a request to the LLaMA Vision API with an optional image and prompt.
image
: The image to be processed. Can be a:
Buffer
objectstring
representing a base64 encoded imagestring
representing a URL to an image (will be downloaded and processed)string
representing a local file path to an image (will be read and processed)prompt
: The prompt to be sent to the LLaMA Vision API. Defaults to "Describe the image in great detail."string
: The response message from the LLaMA Vision API.Error
: If the image is not set or cannot be processed.gaxios
for making HTTP requestsfs
for reading local filesasync/await
syntax to handle promises.request
function from gaxios
is used to make HTTP requests.http://localhost:11434/api/chat
.