The code imports necessary modules, sets the HOMEPATH
variable, configures a TTS model, and defines a generateSpeech
function to synthesize speech from a prompt and save it to a file. The generateSpeech
function is then exported as the default export of the module.
import path from 'path'
import process from 'process'
import { HFModelConfig_v1, InterfaceHF } from "outetts";
const HOMEPATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE
// Configure the model
const model_config = new HFModelConfig_v1({
model_path: 'OuteAI/OuteTTS-0.3-1B',
language: "en", // Supported languages in v0.2: en, zh, ja, ko
dtype: 'fp32', // Supported dtypes: fp32, q8, q4
})
let tts_interface
let speaker
async function generateSpeech(prompt) {
// Initialize the interface
if(!tts_interface) {
tts_interface = await InterfaceHF({ model_version: "0.2", cfg: model_config })
// Print available default speakers
tts_interface.print_default_speakers()
// Load a default speaker
speaker = tts_interface.load_default_speaker("male_1")
}
// Generate speech
const output = await tts_interface.generate({
text: prompt,
temperature: 0.1, // Lower temperature values may result in a more stable tone
repetition_penalty: 1.1,
//max_length: 4096,
// Optional: Use a speaker profile for consistent voice characteristics
// Without a speaker profile, the model will generate a voice with random characteristics
speaker,
})
// Save the synthesized speech to a file
output.save("output.wav")
}
export default generateSpeech
// Import required modules
import { createInterface, createSpeaker } from './interface.js';
import { HFModelConfig_v1, InterfaceHF } from './outetts.js';
// Define constants
const HOMEPATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
const DEFAULT_MODEL_PATH = 'OuteAI/OuteTTS-0.3-1B';
const DEFAULT_LANGUAGE = 'en';
const DEFAULT_DTYPE = 'fp32';
// Create a configuration class for the model
class ModelConfig {
constructor({
modelPath = DEFAULT_MODEL_PATH,
language = DEFAULT_LANGUAGE,
dtype = DEFAULT_DTYPE,
}) {
this.modelPath = modelPath;
this.language = language;
this.dtype = dtype;
}
}
// Create an interface to the model
class TTSInterface {
constructor({ modelVersion = '0.2', cfg, modelPath = DEFAULT_MODEL_PATH }) {
this.modelVersion = modelVersion;
this.cfg = cfg;
this.modelPath = modelPath;
this.speaker = null;
}
async generate(options) {
if (!this.speaker) {
await this.loadDefaultSpeaker('male_1');
}
const { text, temperature = 0.1, repetitionPenalty = 1.1 } = options;
const output = await this.interface.generate({
text,
temperature,
repetitionPenalty,
speaker: this.speaker,
});
return output;
}
async loadDefaultSpeaker(speakerName) {
const speaker = await this.interface.loadDefaultSpeaker(speakerName);
this.speaker = speaker;
}
async printDefaultSpeakers() {
await this.interface.printDefaultSpeakers();
}
}
// Refactor the generateSpeech function
async function generateSpeech(prompt, {
modelVersion = '0.2',
modelPath = DEFAULT_MODEL_PATH,
language = DEFAULT_LANGUAGE,
dtype = DEFAULT_DTYPE,
} = {}) {
// Create a model configuration
const modelConfig = new ModelConfig({ modelPath, language, dtype });
// Create an interface to the model
let interface;
if (!interface) {
interface = new TTSInterface({ modelVersion, cfg: modelConfig });
await interface.printDefaultSpeakers();
await interface.loadDefaultSpeaker('male_1');
}
// Generate speech
const output = await interface.generate({ text: prompt });
// Save the synthesized speech to a file
output.save('output.wav');
}
// Export the generateSpeech function
export default generateSpeech;
// TODO: Refactor the interface.js file to use ES6 classes
// TODO: Add error handling for the outetts module
The code imports the following modules:
path
from the path
module, which provides utilities for working with file paths.process
from the process
module, which provides information about the current Node.js process.HFModelConfig_v1
and InterfaceHF
from the outetts
module, which are used to configure and interact with a TTS (Text-to-Speech) model.The code sets the HOMEPATH
variable to the value of the following environment variables in order:
HOME
HOMEPATH
USERPROFILE
This is used to determine the home directory on the user's system.
The code creates a new instance of HFModelConfig_v1
with the following settings:
model_path
: The path to the TTS model, set to 'OuteAI/OuteTTS-0.3-1B'
.language
: The language of the model, set to "en"
(English).dtype
: The data type of the model, set to "fp32"
(Float 32).generateSpeech
This is the main function that generates speech from a given prompt. It:
"male_1"
.generate
method of the TTS interface, passing in the prompt and other settings."output.wav"
.The generateSpeech
function is exported as the default export of the module.