whisk images

The code imports various modules and functions, then defines an asynchronous function whiskImages that takes four arguments and handles different types of input for its first two arguments, subject and scene.

Run example

whisk images

What the code could have been:

const { request, getAuth } = require('gaxios');
const importer = require('importer');
const { doStableRequest, doImage2Image, doBackgroundMask, doInpaintMask } = importer.import([
 'stable diffusion request',
  'image 2 image',
 'mask image',
  'inpaint mask'
]);

// Function to get base64 image from URL or file
async function getImageAsBase64(imagePathOrUrl) {
  if (imagePathOrUrl.startsWith('data:image/')) {
    return imagePathOrUrl.replace(/^data:image\/.*?;base64,/gi, '');
  } else if (imagePathOrUrl.includes('://')) {
    const response = await request({
      url: imagePathOrUrl,
      method: 'GET',
    });
    return Buffer.from(await response.data.arrayBuffer()).toString('base64');
  } else if (!require('fs').existsSync(imagePathOrUrl)) {
    throw new Error(`File ${imagePathOrUrl} does not exist.`);
  } else {
    return require('fs').readFileSync(imagePathOrUrl).toString('base64');
  }
}

// Function to describe image using Ollama Vision
async function getImageDescription(imageBase64, descriptionPrompt) {
  const response = await requestOllamaVision('data:image/png;base64,' + imageBase64, descriptionPrompt);
  return response;
}

// Function to summarize a sentence using a model
async function summarizeSentence(sentence, model) {
  const response = await model(`Summarize this sentence into four or five words:\n${sentence}\nOnly return the summary, no title or explanation.`);
  return response;
}

// Main function to whisk images together
async function whiskImages(subject, scene, style, short) {
  // Get the model to use for summarization
  const model = await selectModel(process.env.DEFAULT_MODEL || 'Default');

  // Get base64 images from URLs or files
  let subjectBase64, sceneBase64, styleBase64;
  try {
    subjectBase64 = await getImageAsBase64(subject);
    sceneBase64 = await getImageAsBase64(scene);
    styleBase64 = await getImageAsBase64(style);
  } catch (error) {
    if (error.message.includes('does not exist')) {
      // If a file does not exist, try to get a description for the image from Ollama Vision
      const subjectDescription = await getImageDescription(subjectBase64 || subject, 'Describe the foreground subject of the image in one short sentence.');
      const sceneDescription = await getImageDescription(sceneBase64 || scene, 'Describe the scenery in the image in one short sentence.');
      const styleDescription = await getImageDescription(styleBase64 || style, 'Describe the art style of image in one short sentence.');
      subjectBase64 = await getImageAsBase64(subjectDescription);
      sceneBase64 = await getImageAsBase64(sceneDescription);
      styleBase64 = await getImageAsBase64(styleDescription);
    } else {
      throw error;
    }
  }

  // Summarize the descriptions
  let subjectSummary, sceneSummary, styleSummary;
  if (short) {
    subjectSummary = await summarizeSentence(subjectBase64, model);
    sceneSummary = await summarizeSentence(sceneBase64, model);
    styleSummary = await summarizeSentence(styleBase64, model);
  } else {
    subjectSummary = await summarizeSentence(subject, model);
    sceneSummary = await summarizeSentence(scene, model);
    styleSummary = await summarizeSentence(style, model);
  }

  // Determine which image generation route to take
  if (!sceneBase64 &&!styleBase64 &&!subjectBase64) {
    // No images provided, generate an image directly
    if (short) {
      return await doStableRequest(subjectSummary + '\n' + sceneSummary + '\n' + styleSummary);
    } else {
      return await doStableRequest(subject + '\n' + scene + '\n' + style);
    }
  } else if (subjectBase64 &&!sceneSummary) {
    // Only subject and style, generate an image 2 image
    if (short) {
      return await doImage2Image('data:image/png;base64,' + subjectBase64, subjectSummary + (styleSummary? ('\n' + styleSummary) : ''));
    } else {
      return await doImage2Image('data:image/png;base64,' + subjectBase64, subject + (style? ('\n' + style) : ''));
    }
  } else if (sceneBase64 &&!subjectSummary) {
    // Only scene and style, generate an image 2 image
    if (short) {
      return await doImage2Image('data:image/png;base64,' + sceneBase64, sceneSummary + (styleSummary? ('\n' + styleSummary) : ''));
    } else {
      return await doImage2Image('data:image/png;base64,' + sceneBase64, scene + (style? ('\n' + style) : ''));
    }
  } else if (subjectBase64 && sceneSummary) {
    // Combine subject and scene to generate a new image
    const mask = await doBackgroundMask('data:image/png;base64,' + subjectBase64);
    const inpaintImage = await doInpaintMask('data:image/png;base64,' + subjectBase64, mask.image.toString('base64'), sceneSummary);
    if (!styleSummary) {
      return inpaintImage;
    }
    return await doImage2Image('data:image/png;base64,' + inpaintImage.image, styleSummary + '\n' + subjectSummary + '\n' + sceneSummary);
  } else {
    console.error('Missing components:');
    return {};
  }
}

module.exports = whiskImages;

Importing Modules and Functions

const fs = require('fs') const { request } = require('gaxios') const requestOllamaVision = importer.import('request ollama vision') const selectModel = importer.import('select llm') const {doStableRequest} = importer.import('stable diffusion request') const {doImage2Image} = importer.import('image 2 image') const {doBackgroundMask} = importer.import('mask image') const {doInpaintMask} = importer.import('inpaint mask')

Function Definition: whiskImages

The whiskImages function takes four arguments: subject, scene, style, and short, and is defined as an asynchronous function:

Handling subject Input

Handling scene Input

Error Handling

Unused Variables

The variables sceneShort, sceneString, style, and short are declared but not used anywhere in the function.

Documentation

The code does not include any comments or documentation, making it difficult to understand its purpose or functionality.

Run example

whisk images

What the code could have been:

Importing Modules and Functions

Function Definition: `whiskImages`

Handling `subject` Input

Handling `scene` Input

Error Handling

Unused Variables

Documentation

Run example

whisk images

What the code could have been:

Importing Modules and Functions

Function Definition: whiskImages

Handling subject Input

Handling scene Input

Error Handling

Unused Variables

Documentation

Function Definition: `whiskImages`

Handling `subject` Input

Handling `scene` Input