2025-04-19 19:45:42 +01:00
/ * *
* @license
* Copyright 2025 Google LLC
* SPDX - License - Identifier : Apache - 2.0
* /
import { SchemaValidator } from '../utils/schemaValidator.js' ;
2025-06-13 17:44:14 -07:00
import {
BaseTool ,
ToolResult ,
ToolCallConfirmationDetails ,
ToolConfirmationOutcome ,
} from './tools.js' ;
2025-04-19 19:45:42 +01:00
import { getErrorMessage } from '../utils/errors.js' ;
2025-06-13 17:44:14 -07:00
import { Config , ApprovalMode } from '../config/config.js' ;
2025-05-29 15:02:31 -07:00
import { getResponseText } from '../utils/generateContentResponseUtilities.js' ;
2025-06-13 17:44:14 -07:00
import { fetchWithTimeout , isPrivateIp } from '../utils/fetch.js' ;
import { convert } from 'html-to-text' ;
const URL_FETCH_TIMEOUT_MS = 10000 ;
const MAX_CONTENT_LENGTH = 100000 ;
// Helper function to extract URLs from a string
function extractUrls ( text : string ) : string [ ] {
const urlRegex = /(https?:\/\/[^\s]+)/g ;
return text . match ( urlRegex ) || [ ] ;
}
2025-05-29 15:02:31 -07:00
// Interfaces for grounding metadata (similar to web-search.ts)
interface GroundingChunkWeb {
uri? : string ;
title? : string ;
}
interface GroundingChunkItem {
web? : GroundingChunkWeb ;
}
interface GroundingSupportSegment {
startIndex : number ;
endIndex : number ;
text? : string ;
}
interface GroundingSupportItem {
segment? : GroundingSupportSegment ;
groundingChunkIndices? : number [ ] ;
}
2025-04-19 19:45:42 +01:00
/ * *
* Parameters for the WebFetch tool
* /
export interface WebFetchToolParams {
/ * *
2025-05-29 15:02:31 -07:00
* The prompt containing URL ( s ) ( up to 20 ) and instructions for processing their content .
2025-04-19 19:45:42 +01:00
* /
2025-05-29 15:02:31 -07:00
prompt : string ;
2025-04-19 19:45:42 +01:00
}
/ * *
2025-05-02 14:39:39 -07:00
* Implementation of the WebFetch tool logic
2025-04-19 19:45:42 +01:00
* /
2025-04-21 10:53:11 -04:00
export class WebFetchTool extends BaseTool < WebFetchToolParams , ToolResult > {
2025-04-19 19:45:42 +01:00
static readonly Name : string = 'web_fetch' ;
2025-05-29 15:02:31 -07:00
constructor ( private readonly config : Config ) {
2025-04-19 19:45:42 +01:00
super (
2025-04-21 10:53:11 -04:00
WebFetchTool . Name ,
'WebFetch' ,
2025-06-13 17:44:14 -07:00
"Processes content from URL(s), including local and private network addresses (e.g., localhost), embedded in a prompt. Include up to 20 URLs and instructions (e.g., summarize, extract specific data) directly in the 'prompt' parameter." ,
2025-04-19 19:45:42 +01:00
{
properties : {
2025-05-29 15:02:31 -07:00
prompt : {
2025-04-19 19:45:42 +01:00
description :
2025-05-29 15:02:31 -07:00
'A comprehensive prompt that includes the URL(s) (up to 20) to fetch and specific instructions on how to process their content (e.g., "Summarize https://example.com/article and extract key points from https://another.com/data"). Must contain as least one URL starting with http:// or https://.' ,
2025-04-19 19:45:42 +01:00
type : 'string' ,
} ,
} ,
2025-05-29 15:02:31 -07:00
required : [ 'prompt' ] ,
2025-04-19 19:45:42 +01:00
type : 'object' ,
} ,
) ;
}
2025-06-13 17:44:14 -07:00
private async executeFallback (
params : WebFetchToolParams ,
signal : AbortSignal ,
) : Promise < ToolResult > {
const urls = extractUrls ( params . prompt ) ;
if ( urls . length === 0 ) {
return {
llmContent : 'Error: No URL found in the prompt for fallback.' ,
returnDisplay : 'Error: No URL found in the prompt for fallback.' ,
} ;
}
// For now, we only support one URL for fallback
let url = urls [ 0 ] ;
// Convert GitHub blob URL to raw URL
if ( url . includes ( 'github.com' ) && url . includes ( '/blob/' ) ) {
url = url
. replace ( 'github.com' , 'raw.githubusercontent.com' )
. replace ( '/blob/' , '/' ) ;
}
try {
const response = await fetchWithTimeout ( url , URL_FETCH_TIMEOUT_MS ) ;
if ( ! response . ok ) {
throw new Error (
` Request failed with status code ${ response . status } ${ response . statusText } ` ,
) ;
}
const html = await response . text ( ) ;
const textContent = convert ( html , {
wordwrap : false ,
selectors : [
{ selector : 'a' , options : { ignoreHref : true } } ,
{ selector : 'img' , format : 'skip' } ,
] ,
} ) . substring ( 0 , MAX_CONTENT_LENGTH ) ;
const geminiClient = this . config . getGeminiClient ( ) ;
const fallbackPrompt = ` The user requested the following: " ${ params . prompt } ".
I was unable to access the URL directly . Instead , I have fetched the raw content of the page . Please use the following content to answer the user ' s request . Do not attempt to access the URL again .
-- -
$ { textContent }
-- - ` ;
const result = await geminiClient . generateContent (
[ { role : 'user' , parts : [ { text : fallbackPrompt } ] } ] ,
{ } ,
signal ,
) ;
const resultText = getResponseText ( result ) || '' ;
return {
llmContent : resultText ,
returnDisplay : ` Content for ${ url } processed using fallback fetch. ` ,
} ;
} catch ( e ) {
const error = e as Error ;
const errorMessage = ` Error during fallback fetch for ${ url } : ${ error . message } ` ;
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
} ;
}
}
2025-04-19 19:45:42 +01:00
validateParams ( params : WebFetchToolParams ) : string | null {
if (
this . schema . parameters &&
! SchemaValidator . validate (
this . schema . parameters as Record < string , unknown > ,
params ,
)
) {
return 'Parameters failed schema validation.' ;
}
2025-05-29 15:02:31 -07:00
if ( ! params . prompt || params . prompt . trim ( ) === '' ) {
return "The 'prompt' parameter cannot be empty and must contain URL(s) and instructions." ;
}
if (
! params . prompt . includes ( 'http://' ) &&
! params . prompt . includes ( 'https://' )
) {
return "The 'prompt' must contain at least one valid URL (starting with http:// or https://)." ;
2025-04-19 19:45:42 +01:00
}
return null ;
}
getDescription ( params : WebFetchToolParams ) : string {
2025-05-29 15:02:31 -07:00
const displayPrompt =
params . prompt . length > 100
? params . prompt . substring ( 0 , 97 ) + '...'
: params . prompt ;
return ` Processing URLs and instructions from prompt: " ${ displayPrompt } " ` ;
2025-04-19 19:45:42 +01:00
}
2025-06-13 17:44:14 -07:00
async shouldConfirmExecute (
params : WebFetchToolParams ,
) : Promise < ToolCallConfirmationDetails | false > {
if ( this . config . getApprovalMode ( ) === ApprovalMode . AUTO_EDIT ) {
return false ;
}
const validationError = this . validateParams ( params ) ;
if ( validationError ) {
return false ;
}
// Perform GitHub URL conversion here to differentiate between user-provided
// URL and the actual URL to be fetched.
const urls = extractUrls ( params . prompt ) . map ( ( url ) = > {
if ( url . includes ( 'github.com' ) && url . includes ( '/blob/' ) ) {
return url
. replace ( 'github.com' , 'raw.githubusercontent.com' )
. replace ( '/blob/' , '/' ) ;
}
return url ;
} ) ;
const confirmationDetails : ToolCallConfirmationDetails = {
type : 'info' ,
title : ` Confirm Web Fetch ` ,
prompt : params.prompt ,
urls ,
onConfirm : async ( outcome : ToolConfirmationOutcome ) = > {
if ( outcome === ToolConfirmationOutcome . ProceedAlways ) {
this . config . setApprovalMode ( ApprovalMode . AUTO_EDIT ) ;
}
} ,
} ;
return confirmationDetails ;
}
2025-05-09 23:29:02 -07:00
async execute (
params : WebFetchToolParams ,
2025-06-02 14:55:51 -07:00
signal : AbortSignal ,
2025-05-09 23:29:02 -07:00
) : Promise < ToolResult > {
2025-04-19 19:45:42 +01:00
const validationError = this . validateParams ( params ) ;
if ( validationError ) {
return {
llmContent : ` Error: Invalid parameters provided. Reason: ${ validationError } ` ,
2025-05-29 15:02:31 -07:00
returnDisplay : validationError ,
2025-04-19 19:45:42 +01:00
} ;
}
2025-05-29 15:02:31 -07:00
const userPrompt = params . prompt ;
2025-06-13 17:44:14 -07:00
const urls = extractUrls ( userPrompt ) ;
const url = urls [ 0 ] ;
const isPrivate = isPrivateIp ( url ) ;
if ( isPrivate ) {
return this . executeFallback ( params , signal ) ;
}
2025-06-02 14:55:51 -07:00
const geminiClient = this . config . getGeminiClient ( ) ;
2025-04-19 19:45:42 +01:00
try {
2025-06-02 14:55:51 -07:00
const response = await geminiClient . generateContent (
[ { role : 'user' , parts : [ { text : userPrompt } ] } ] ,
{ tools : [ { urlContext : { } } ] } ,
signal , // Pass signal
) ;
2025-04-19 19:45:42 +01:00
2025-05-29 15:02:31 -07:00
console . debug (
2025-06-13 17:44:14 -07:00
` [WebFetchTool] Full response for prompt " ${ userPrompt . substring (
0 ,
50 ,
) } . . . " : ` ,
2025-05-29 15:02:31 -07:00
JSON . stringify ( response , null , 2 ) ,
) ;
let responseText = getResponseText ( response ) || '' ;
const urlContextMeta = response . candidates ? . [ 0 ] ? . urlContextMetadata ;
2025-06-30 04:06:03 +09:00
const groundingMetadata = response . candidates ? . [ 0 ] ? . groundingMetadata ;
2025-05-29 15:02:31 -07:00
const sources = groundingMetadata ? . groundingChunks as
| GroundingChunkItem [ ]
| undefined ;
const groundingSupports = groundingMetadata ? . groundingSupports as
| GroundingSupportItem [ ]
| undefined ;
// Error Handling
let processingError = false ;
if (
urlContextMeta ? . urlMetadata &&
urlContextMeta . urlMetadata . length > 0
) {
const allStatuses = urlContextMeta . urlMetadata . map (
( m ) = > m . urlRetrievalStatus ,
) ;
if ( allStatuses . every ( ( s ) = > s !== 'URL_RETRIEVAL_STATUS_SUCCESS' ) ) {
processingError = true ;
}
} else if ( ! responseText . trim ( ) && ! sources ? . length ) {
// No URL metadata and no content/sources
processingError = true ;
2025-04-19 19:45:42 +01:00
}
if (
2025-05-29 15:02:31 -07:00
! processingError &&
! responseText . trim ( ) &&
( ! sources || sources . length === 0 )
2025-04-19 19:45:42 +01:00
) {
2025-05-29 15:02:31 -07:00
// Successfully retrieved some URL (or no specific error from urlContextMeta), but no usable text or grounding data.
processingError = true ;
}
if ( processingError ) {
2025-06-13 17:44:14 -07:00
return this . executeFallback ( params , signal ) ;
2025-04-19 19:45:42 +01:00
}
2025-05-29 15:02:31 -07:00
const sourceListFormatted : string [ ] = [ ] ;
if ( sources && sources . length > 0 ) {
sources . forEach ( ( source : GroundingChunkItem , index : number ) = > {
const title = source . web ? . title || 'Untitled' ;
const uri = source . web ? . uri || 'Unknown URI' ; // Fallback if URI is missing
sourceListFormatted . push ( ` [ ${ index + 1 } ] ${ title } ( ${ uri } ) ` ) ;
} ) ;
if ( groundingSupports && groundingSupports . length > 0 ) {
const insertions : Array < { index : number ; marker : string } > = [ ] ;
groundingSupports . forEach ( ( support : GroundingSupportItem ) = > {
if ( support . segment && support . groundingChunkIndices ) {
const citationMarker = support . groundingChunkIndices
. map ( ( chunkIndex : number ) = > ` [ ${ chunkIndex + 1 } ] ` )
. join ( '' ) ;
insertions . push ( {
index : support.segment.endIndex ,
marker : citationMarker ,
} ) ;
}
} ) ;
insertions . sort ( ( a , b ) = > b . index - a . index ) ;
const responseChars = responseText . split ( '' ) ;
insertions . forEach ( ( insertion ) = > {
responseChars . splice ( insertion . index , 0 , insertion . marker ) ;
} ) ;
responseText = responseChars . join ( '' ) ;
}
if ( sourceListFormatted . length > 0 ) {
responseText += `
Sources :
$ { sourceListFormatted . join ( '\n' ) } ` ;
}
}
const llmContent = responseText ;
2025-04-19 19:45:42 +01:00
2025-05-29 15:02:31 -07:00
console . debug (
` [WebFetchTool] Formatted tool response for prompt " ${ userPrompt } : \ n \ n": ` ,
llmContent ,
) ;
2025-04-19 19:45:42 +01:00
return {
llmContent ,
2025-05-29 15:02:31 -07:00
returnDisplay : ` Content processed from prompt. ` ,
2025-04-19 19:45:42 +01:00
} ;
} catch ( error : unknown ) {
2025-06-13 17:44:14 -07:00
const errorMessage = ` Error processing web content for prompt " ${ userPrompt . substring (
0 ,
50 ,
) } . . . " : $ { getErrorMessage ( error ) } ` ;
2025-05-29 15:02:31 -07:00
console . error ( errorMessage , error ) ;
2025-04-19 19:45:42 +01:00
return {
llmContent : ` Error: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
} ;
}
}
}