The code defines an asynchronous doVision
function that retrieves and describes images from URLs, including those from Discord channels, using a machine vision proxy. The function updates the interaction's content with the image description.
npm run import -- "discord ollama vision"
const { request } = require('gaxios')
//const fs = require('fs')
const requestOllamaVision = importer.import("request ollama vision")
const {requestAuthQ} = importer.import("discord request")
const {URL} = require('url')
//const TEMP_DIR = os.tmpdir();
async function doVision(interaction) {
// TODO: download the image file to temp
let result
if(interaction.data.options[0].value.includes('discord')
&& interaction.data.options[0].value.includes('/channels/')
) {
// TODO: get the message reference instead
let messageUrl = new URL(interaction.data.options[0].value)
let channelId = (/\/channels\/(.*?)\/(.*?)\//gi).exec(interaction.data.options[0].value)[2]
let messageId = path.basename(messageUrl.pathname)
var message = await requestAuthQ({
method: 'GET',
url: `channels/${channelId}/messages/${messageId}`,
})
if(message.attachments && message.attachments[0]) {
result = await request({
url: message.attachments[0].url,
method: 'GET',
})
} else if (message.message_snapshots && message.message_snapshots[0]
&& message.message_snapshots[0].message
&& message.message_snapshots[0].message.attachments
&& message.message_snapshots[0].message.attachments[0]
) {
result = await request({
url: message.message_snapshots[0].message.attachments[0].url,
method: 'GET',
})
}
} else {
result = await request({
url: interaction.data.options[0].value,
method: 'GET',
})
}
if(!result) {
return await updateInteraction({
content: 'Could not load: ' + interaction.data.options[0].value
}, interaction.id, interaction.token)
}
// Describe the image using ollama vision proxy
let content = await requestOllamaVision(Buffer.from(await result.data.arrayBuffer()))
return await updateInteraction({
content
}, interaction.id, interaction.token)
}
module.exports = doVision
// utils.js
function importAll(moduleName) {
const module = require(moduleName);
return Object.keys(module).reduce((acc, key) => {
acc[key] = module[key];
return acc;
}, {});
}
module.exports = { importAll };
Code Breakdown
The code uses the following dependencies:
gaxios
: a promise-based HTTP clientimporter
: an importer module for loading dependenciesurl
: a module for working with URLsos
: a module for working with the operating system (not used in this code snippet)path
: a module for working with file paths (not used in this code snippet)The code defines the following functions:
doVision
: an asynchronous function that takes an interaction
object as inputupdateInteraction
: a function that updates an interaction with new content (not shown in this code snippet)doVision
FunctionThe doVision
function does the following:
requestAuthQ
function to retrieve the message with the given ID, and then checks if it has an attachment.request
function to retrieve the attachment URL.request
function to retrieve the attachment URL.request
function to retrieve the URL directly.requestOllamaVision
function to describe the image using a machine vision proxy.The code exports the doVision
function as a module.