The code defines a function accessInternet
that uses Selenium to extract articles from websites or search for knowledge on the internet based on user input, and summarizes the results using the summarizeArticle
function.
The accessInternet
function is an asynchronous function that takes a prompt model, session object, and prompt string as input and performs various tasks based on user input. These tasks include extracting articles from websites, searching for knowledge on the internet, and summarizing the results using the summarizeArticle
function.
npm run import -- "access web information"
const extractArticle = importer.import("extract llm article")
const getClient = importer.import("selenium client")
const summerizeArticle = importer.import("summarize llm article")
const HTTPS_LINK = /https:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)/;
const SEARCH_ENGINES = [
'https://www.google.com/search?q=',
'https://www.bing.com/search?q=',
'https://search.yahoo.com/search?p=',
'https://www.ask.com/web?q=',
'https://search.aol.com/aol/search?q=',
'http://www.baidu.com/s?wd=',
'https://www.wolframalpha.com/input/?i=',
'https://duckduckgo.com/?q=',
'https://www.yandex.com/search/?text=',
'https://archive.org/search.php?query=',
];
async function accessInternet(promptModel, session, prompt) {
let q1 = 'Given the following functions:\n'
+ 'extractArticle - extract article text from specific websites\n'
+ 'knowledgeSearch - search the web for related knowledge\n'
+ '\nWhich function would be related to solving this prompt:\n'
+ prompt
+ '\nAnswer with only the functions names, nothing else.'
console.log('User: ' + q1)
let a1 = await promptModel(q1)
console.log('AI: ' + a1)
let matching = [
'extractArticle',
'knowledgeSearch',
].filter(func => a1.match(func))
let driver = await getClient()
let link = HTTPS_LINK.exec(prompt)
let summary1 = ''
if (matching.includes('extractArticle') || link) {
let article = await extractArticle(driver, link[0])
let summary = await summerizeArticle(article)
summary1 = 'TLDR: ' + summary[1] + '\n\n' + summary[0]
}
let summary2 = ''
if(matching.includes('knowledgeSearch')) {
let q2 = 'What is the search topic for this prompt:\n'
+ prompt
+ '\nOnly return the search text and nothing else.'
console.log('User: ' + q2)
let a2 = await promptModel(q2)
console.log('AI: ' + a2)
let article = await extractArticle(driver, 'https://www.google.com/search?q=' + a2)
let summary = await summerizeArticle(article)
summary2 = 'TLDR: ' + summary[1] + '\n\n' + summary[0]
}
driver.quit()
return summary1 + summary2
}
module.exports = accessInternet
const importer = require('./importer'); // Import the importer module
const { extractArticle, summerizeArticle } = importer.import(['extract llm article','summarize llm article']); // Import functions
const Client = importer.import('selenium client'); // Import the selenium client class
// Regular expression for validating HTTPS links
const HTTPS_LINK_REGEX = /^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)$/i;
// Array of search engines
const SEARCH_ENGINES = [
'https://www.google.com/search?q=',
'https://www.bing.com/search?q=',
'https://search.yahoo.com/search?p=',
'https://www.ask.com/web?q=',
'https://search.aol.com/aol/search?q=',
'http://www.baidu.com/s?wd=',
'https://www.wolframalpha.com/input/?i=',
'https://duckduckgo.com/?q=',
'https://www.yandex.com/search/?text=',
'https://archive.org/search.php?query=',
];
/**
* Accesses the internet by interacting with the user and searching for related knowledge.
*
* @param {importer} promptModel - The model for generating prompts.
* @param {Client} session - The selenium client session.
* @param {string} prompt - The user prompt.
* @returns {Promise} A summary of the search results in markdown format.
*/
async function accessInternet(promptModel, session, prompt) {
// Generate the initial prompt for the user
const q1 = `Given the following functions:\n\
${Object.keys(importer.importedFunctions).join('\n')}\n\
\n\
Which function would be related to solving this prompt:\n\
${prompt}\n\
\n\
Answer with only the functions names, nothing else.`;
// Log the prompt and its response
console.log('User:', q1);
const a1 = await promptModel(q1);
console.log('AI:', a1);
// Identify the matching functions
const matchingFunctions = Object.keys(importer.importedFunctions).filter(func => a1.match(func));
// Create a new selenium client session
const driver = new Client();
// Get the link from the prompt
let link = HTTPS_LINK_REGEX.exec(prompt);
// Initialize the summaries
let summaries = [];
// Check if extractArticle is a matching function or if a link is present
if (matchingFunctions.includes('extractArticle') || link) {
// Extract the article text
const article = await extractArticle(driver, link? link[0] : null);
// Summarize the article
const summary = await summerizeArticle(article);
// Add the summary to the list
summaries.push({ key: 'extractArticle', value: `TLDR: ${summary[1]}\n\n${summary[0]}` });
}
// Check if knowledgeSearch is a matching function
if (matchingFunctions.includes('knowledgeSearch')) {
// Generate the search query
const q2 = `What is the search topic for this prompt:\n\
${prompt}\n\
\n\
Only return the search text and nothing else.`;
console.log('User:', q2);
const a2 = await promptModel(q2);
console.log('AI:', a2);
// Extract the article text
const article = await extractArticle(driver, `https://www.google.com/search?q=${a2}`);
// Summarize the article
const summary = await summerizeArticle(article);
// Add the summary to the list
summaries.push({ key: 'knowledgeSearch', value: `TLDR: ${summary[1]}\n\n${summary[0]}` });
}
// Quit the selenium client session
await driver.quit();
// Join the summaries into a single string
return summaries.map(summary => summary.value).join('\n\n');
}
module.exports = accessInternet;
Code Breakdown
The code imports three modules:
extractArticle
: extracts article text from specific websitesgetClient
: returns a Selenium clientsummarizeArticle
: summarizes an articleThe code defines a regular expression HTTPS_LINK
to match HTTPS links.
The code defines an array SEARCH_ENGINES
containing URLs of various search engines.
accessInternet
FunctionThe accessInternet
function is an asynchronous function that takes three parameters:
promptModel
: a prompt modelsession
: a session object (not used in the function)prompt
: a prompt stringThe function does the following:
extractArticle
or knowledgeSearch
.extractArticle
or a link was found, it extracts the article text from the link using extractArticle
function.summarizeArticle
function.knowledgeSearch
, it asks the user for a search topic, extracts the article text from the search engine, and summarizes the article.The code defines several variables and strings:
q1
: a question string asking the user to choose a functiona1
: the user's response to q1
matching
: an array of matched functionslink
: an HTTPS link extracted from the promptsummary1
: a summarized version of the article (if extracted)summary2
: a summarized version of the article (if searched)await
to wait for the user's response and for the functions to complete.filter
to match the user's response with the allowed functions.exec
to extract the HTTPS link from the prompt.