The code imports various modules and functions, then defines an asynchronous function whiskImages
that takes four arguments and handles different types of input for its first two arguments, subject
and scene
.
npm run import -- "whisk images"
const fs = require('fs')
const { request } = require('gaxios')
const requestOllamaVision = importer.import("request ollama vision")
const selectModel = importer.import("select llm")
const {doStableRequest} = importer.import("stable diffusion request")
const {doImage2Image} = importer.import("image 2 image")
const {doBackgroundMask} = importer.import("mask image")
const {doInpaintMask} = importer.import("inpaint mask")
// TODO: use the above functions in combination to whisk together a set of images
async function whiskImages(subject, scene, style, short) {
let promptModel = await selectModel(process.env.DEFAULT_MODEL || 'Default')
let subjectString
let subjectShort
let base64_subject
if(typeof subject == 'string') {
if(subject.startsWith('data:image/')) {
subject = subject.replace(/^data:image\/.*?;base64,/gi, '')
base64_subject = Buffer.from(subject, 'base64').toString('base64')
} else if(subject.includes('://')) {
let result = await request({
url: subject,
method: 'GET',
})
base64_subject = Buffer.from(await result.data.arrayBuffer()).toString('base64')
} else if (!fs.existsSync(subject)) {
subjectString = subject
} else {
base64_subject = fs.readFileSync(subject).toString('base64')
}
} else if(subject) {
base64_subject = subject.toString('base64')
}
let sceneString
let sceneShort
let base64_scene
if(typeof scene == 'string') {
if(scene.startsWith('data:image/')) {
scene = scene.replace(/^data:image\/.*?;base64,/gi, '')
base64_scene = Buffer.from(scene, 'base64').toString('base64')
} else if(scene.includes('://')) {
let result = await request({
url: scene,
method: 'GET',
})
base64_scene = Buffer.from(await result.data.arrayBuffer()).toString('base64')
} else if (!fs.existsSync(scene)) {
sceneString = scene
} else {
base64_scene = fs.readFileSync(scene).toString('base64')
}
} else if (scene) {
base64_scene = scene.toString('base64')
}
let styleString
let styleShort
let base64_style
if(typeof style == 'string') {
if(style.startsWith('data:image/')) {
style = style.replace(/^data:image\/.*?;base64,/gi, '')
base64_style = Buffer.from(style, 'base64').toString('base64')
} else if(style.includes('://')) {
let result = await request({
url: style,
method: 'GET',
})
base64_style = Buffer.from(await result.data.arrayBuffer()).toString('base64')
} else if (!fs.existsSync(style)) {
styleString = style
} else {
base64_style = fs.readFileSync(style).toString('base64')
}
} else if(style) {
base64_style = style.toString('base64')
}
// TODO: if passing in an image, ask ollama vision for a description,
// if passing in a description use it to generate the next image
if(!subjectString && base64_subject) {
subjectString = await requestOllamaVision('data:image/png;base64,' + base64_subject, 'Describe the foreground subject of the image in one short sentence.')
}
if(short && subjectString) {
subjectShort = await promptModel('Summarize this sentence into four or five words:\n' + subjectString + '\nOnly return the summary, no title or explanation.')
}
if(!sceneString && base64_scene) {
sceneString = await requestOllamaVision('data:image/png;base64,' + base64_scene, 'Describe the scenery in the image in one short sentence.')
}
if(short && sceneString) {
sceneShort = await promptModel('Summarize this sentence into four or five words:\n' + sceneString + '\nOnly return the summary, no title or explanation.')
}
if(!styleString && base64_style) {
styleString = await requestOllamaVision('data:image/png;base64,' + base64_style, 'Describe the art style of image in one short sentence.')
}
if(short && styleString) {
styleShort = await promptModel('Summarize this sentence into four or five words:\n' + styleString + '\nOnly return the summary, no title or explanation.')
}
// TODO: if no scene, only subject and style, then just call image 2 image
if(!base64_scene && !base64_style && !base64_subject) {
// no images passed in, send directly to image generator
if(short)
return await doStableRequest(subjectShort + '\n' + sceneShort + '\n' + styleShort)
else
return await doStableRequest(subjectString + '\n' + sceneString + '\n' + styleString)
} else if (base64_subject && !sceneString) {
// subject and style process only, pass directly to image 2 image
if(short)
return await doImage2Image('data:image/png;base64,' + base64_subject, subjectShort + (styleShort ? ('\n' + styleShort) : ''))
else
return await doImage2Image('data:image/png;base64,' + base64_subject, subjectString + (styleString ? ('\n' + styleString) : ''))
} else if (base64_scene && !subjectString) {
// scene and style only, pass to image 2 image
if(short)
return await doImage2Image('data:image/png;base64,' + base64_scene, sceneShort + (styleShort ? ('\n' + styleShort) : ''))
else
return await doImage2Image('data:image/png;base64,' + base64_scene, sceneString + (styleString ? ('\n' + styleString) : ''))
} else if (base64_subject && sceneString) {
// TODO: extract mask on subject
let maskObject = await doBackgroundMask('data:image/png;base64,' + base64_subject)
let base64_mask = maskObject.image.toString('base64')
// TODO: combine subject with new scene
let inpaintObject
if(short)
inpaintObject = await doInpaintMask(
'data:image/png;base64,' + base64_subject,
'data:image/png;base64,' + base64_mask,
sceneShort)
else
inpaintObject = await doInpaintMask(
'data:image/png;base64,' + base64_subject,
'data:image/png;base64,' + base64_mask,
sceneString)
// Drop out early if there is no style specified, just do the proper inpainting
if(!styleString) {
return inpaintObject
}
let base64_inpaint = inpaintObject.image.toString('base64')
// TODO: generate final image in new style
if(short)
return await doImage2Image('data:image/png;base64,' + base64_inpaint, styleShort + '\n' + subjectShort + '\n' + sceneShort)
else
return await doImage2Image('data:image/png;base64,' + base64_inpaint, styleString + '\n' + subjectString + '\n' + sceneString)
} else {
console.error('Missing components: ')
return {}
}
}
module.exports = whiskImages
const { request, getAuth } = require('gaxios');
const importer = require('importer');
const { doStableRequest, doImage2Image, doBackgroundMask, doInpaintMask } = importer.import([
'stable diffusion request',
'image 2 image',
'mask image',
'inpaint mask'
]);
// Function to get base64 image from URL or file
async function getImageAsBase64(imagePathOrUrl) {
if (imagePathOrUrl.startsWith('data:image/')) {
return imagePathOrUrl.replace(/^data:image\/.*?;base64,/gi, '');
} else if (imagePathOrUrl.includes('://')) {
const response = await request({
url: imagePathOrUrl,
method: 'GET',
});
return Buffer.from(await response.data.arrayBuffer()).toString('base64');
} else if (!require('fs').existsSync(imagePathOrUrl)) {
throw new Error(`File ${imagePathOrUrl} does not exist.`);
} else {
return require('fs').readFileSync(imagePathOrUrl).toString('base64');
}
}
// Function to describe image using Ollama Vision
async function getImageDescription(imageBase64, descriptionPrompt) {
const response = await requestOllamaVision('data:image/png;base64,' + imageBase64, descriptionPrompt);
return response;
}
// Function to summarize a sentence using a model
async function summarizeSentence(sentence, model) {
const response = await model(`Summarize this sentence into four or five words:\n${sentence}\nOnly return the summary, no title or explanation.`);
return response;
}
// Main function to whisk images together
async function whiskImages(subject, scene, style, short) {
// Get the model to use for summarization
const model = await selectModel(process.env.DEFAULT_MODEL || 'Default');
// Get base64 images from URLs or files
let subjectBase64, sceneBase64, styleBase64;
try {
subjectBase64 = await getImageAsBase64(subject);
sceneBase64 = await getImageAsBase64(scene);
styleBase64 = await getImageAsBase64(style);
} catch (error) {
if (error.message.includes('does not exist')) {
// If a file does not exist, try to get a description for the image from Ollama Vision
const subjectDescription = await getImageDescription(subjectBase64 || subject, 'Describe the foreground subject of the image in one short sentence.');
const sceneDescription = await getImageDescription(sceneBase64 || scene, 'Describe the scenery in the image in one short sentence.');
const styleDescription = await getImageDescription(styleBase64 || style, 'Describe the art style of image in one short sentence.');
subjectBase64 = await getImageAsBase64(subjectDescription);
sceneBase64 = await getImageAsBase64(sceneDescription);
styleBase64 = await getImageAsBase64(styleDescription);
} else {
throw error;
}
}
// Summarize the descriptions
let subjectSummary, sceneSummary, styleSummary;
if (short) {
subjectSummary = await summarizeSentence(subjectBase64, model);
sceneSummary = await summarizeSentence(sceneBase64, model);
styleSummary = await summarizeSentence(styleBase64, model);
} else {
subjectSummary = await summarizeSentence(subject, model);
sceneSummary = await summarizeSentence(scene, model);
styleSummary = await summarizeSentence(style, model);
}
// Determine which image generation route to take
if (!sceneBase64 &&!styleBase64 &&!subjectBase64) {
// No images provided, generate an image directly
if (short) {
return await doStableRequest(subjectSummary + '\n' + sceneSummary + '\n' + styleSummary);
} else {
return await doStableRequest(subject + '\n' + scene + '\n' + style);
}
} else if (subjectBase64 &&!sceneSummary) {
// Only subject and style, generate an image 2 image
if (short) {
return await doImage2Image('data:image/png;base64,' + subjectBase64, subjectSummary + (styleSummary? ('\n' + styleSummary) : ''));
} else {
return await doImage2Image('data:image/png;base64,' + subjectBase64, subject + (style? ('\n' + style) : ''));
}
} else if (sceneBase64 &&!subjectSummary) {
// Only scene and style, generate an image 2 image
if (short) {
return await doImage2Image('data:image/png;base64,' + sceneBase64, sceneSummary + (styleSummary? ('\n' + styleSummary) : ''));
} else {
return await doImage2Image('data:image/png;base64,' + sceneBase64, scene + (style? ('\n' + style) : ''));
}
} else if (subjectBase64 && sceneSummary) {
// Combine subject and scene to generate a new image
const mask = await doBackgroundMask('data:image/png;base64,' + subjectBase64);
const inpaintImage = await doInpaintMask('data:image/png;base64,' + subjectBase64, mask.image.toString('base64'), sceneSummary);
if (!styleSummary) {
return inpaintImage;
}
return await doImage2Image('data:image/png;base64,' + inpaintImage.image, styleSummary + '\n' + subjectSummary + '\n' + sceneSummary);
} else {
console.error('Missing components:');
return {};
}
}
module.exports = whiskImages;
Code Breakdown
The code starts by importing various modules and functions:
const fs = require('fs')
const { request } = require('gaxios')
const requestOllamaVision = importer.import('request ollama vision')
const selectModel = importer.import('select llm')
const {doStableRequest} = importer.import('stable diffusion request')
const {doImage2Image} = importer.import('image 2 image')
const {doBackgroundMask} = importer.import('mask image')
const {doInpaintMask} = importer.import('inpaint mask')
whiskImages
The whiskImages
function takes four arguments: subject
, scene
, style
, and short
, and is defined as an asynchronous function:
async function whiskImages(subject, scene, style, short) {
//...
}
subject
InputThe function handles the subject
input in several cases:
subject
is a string that starts with 'data:image/'
, it extracts the base64-encoded image data.subject
is a string that includes '://'
, it makes a GET request to the URL and extracts the base64-encoded image data.subject
is a string that does not exist as a file, it keeps the string value.subject
is not a string, it converts it to a base64-encoded string.scene
InputThe function handles the scene
input similarly to the subject
input.
The function does not appear to include any error handling or logging.
The variables sceneShort
, sceneString
, style
, and short
are declared but not used anywhere in the function.
The code does not include any comments or documentation, making it difficult to understand its purpose or functionality.