Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions packages/components/nodes/agentflow/Agent/Agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1569,16 +1569,20 @@ class Agent_Agentflow implements INode {
for await (const chunk of await llmNodeInstance.stream(messages, { signal: abortController?.signal })) {
if (sseStreamer) {
let content = ''
if (Array.isArray(chunk.content) && chunk.content.length > 0) {

if (typeof chunk === 'string') {
content = chunk
} else if (Array.isArray(chunk.content) && chunk.content.length > 0) {
const contents = chunk.content as MessageContentText[]
content = contents.map((item) => item.text).join('')
} else {
} else if (chunk.content) {
content = chunk.content.toString()
}
sseStreamer.streamTokenEvent(chatId, content)
}

response = response.concat(chunk)
const messageChunk = typeof chunk === 'string' ? new AIMessageChunk(chunk) : chunk
response = response.concat(messageChunk)
}
} catch (error) {
console.error('Error during streaming:', error)
Expand Down
7 changes: 5 additions & 2 deletions packages/components/nodes/agentflow/HumanInput/HumanInput.ts
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,11 @@ class HumanInput_Agentflow implements INode {
if (isStreamable) {
const sseStreamer: IServerSideEventStreamer = options.sseStreamer as IServerSideEventStreamer
for await (const chunk of await llmNodeInstance.stream(messages)) {
sseStreamer.streamTokenEvent(chatId, chunk.content.toString())
response = response.concat(chunk)
const content = typeof chunk === 'string' ? chunk : chunk.content.toString()
sseStreamer.streamTokenEvent(chatId, content)

const messageChunk = typeof chunk === 'string' ? new AIMessageChunk(chunk) : chunk
response = response.concat(messageChunk)
}
humanInputDescription = response.content as string
} else {
Expand Down
10 changes: 7 additions & 3 deletions packages/components/nodes/agentflow/LLM/LLM.ts
Original file line number Diff line number Diff line change
Expand Up @@ -824,16 +824,20 @@ class LLM_Agentflow implements INode {
for await (const chunk of await llmNodeInstance.stream(messages, { signal: abortController?.signal })) {
if (sseStreamer) {
let content = ''
if (Array.isArray(chunk.content) && chunk.content.length > 0) {

if (typeof chunk === 'string') {
content = chunk
} else if (Array.isArray(chunk.content) && chunk.content.length > 0) {
const contents = chunk.content as MessageContentText[]
content = contents.map((item) => item.text).join('')
} else {
} else if (chunk.content) {
content = chunk.content.toString()
}
sseStreamer.streamTokenEvent(chatId, content)
}

response = response.concat(chunk)
const messageChunk = typeof chunk === 'string' ? new AIMessageChunk(chunk) : chunk
response = response.concat(messageChunk)
}
} catch (error) {
console.error('Error during streaming:', error)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,17 @@ class ChatHuggingFace_ChatModels implements INode {
label: 'Model',
name: 'model',
type: 'string',
description: 'If using own inference endpoint, leave this blank',
placeholder: 'gpt2'
description:
'Model name (e.g., deepseek-ai/DeepSeek-V3.2-Exp:novita). If model includes provider (:) or using router endpoint, leave Endpoint blank.',
placeholder: 'deepseek-ai/DeepSeek-V3.2-Exp:novita'
},
{
label: 'Endpoint',
name: 'endpoint',
type: 'string',
placeholder: 'https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2',
description: 'Using your own inference endpoint',
description:
'Custom inference endpoint (optional). Not needed for models with providers (:) or router endpoints. Leave blank to use Inference Providers.',
optional: true
},
{
Expand Down Expand Up @@ -124,6 +126,15 @@ class ChatHuggingFace_ChatModels implements INode {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const huggingFaceApiKey = getCredentialParam('huggingFaceApiKey', credentialData, nodeData)

if (!huggingFaceApiKey) {
console.error('[ChatHuggingFace] API key validation failed: No API key found')
throw new Error('HuggingFace API key is required. Please configure it in the credential settings.')
}

if (!huggingFaceApiKey.startsWith('hf_')) {
console.warn('[ChatHuggingFace] API key format warning: Key does not start with "hf_"')
}

const obj: Partial<HFInput> = {
model,
apiKey: huggingFaceApiKey
Expand Down
146 changes: 103 additions & 43 deletions packages/components/nodes/chatmodels/ChatHuggingFace/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ export class HuggingFaceInference extends LLM implements HFInput {
this.apiKey = fields?.apiKey ?? getEnvironmentVariable('HUGGINGFACEHUB_API_KEY')
this.endpointUrl = fields?.endpointUrl
this.includeCredentials = fields?.includeCredentials
if (!this.apiKey) {
if (!this.apiKey || this.apiKey.trim() === '') {
throw new Error(
'Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.'
'Please set an API key for HuggingFace Hub. Either configure it in the credential settings in the UI, or set the environment variable HUGGINGFACEHUB_API_KEY.'
)
}
}
Expand All @@ -68,71 +68,131 @@ export class HuggingFaceInference extends LLM implements HFInput {
}

invocationParams(options?: this['ParsedCallOptions']) {
return {
model: this.model,
parameters: {
// make it behave similar to openai, returning only the generated text
return_full_text: false,
temperature: this.temperature,
max_new_tokens: this.maxTokens,
stop: options?.stop ?? this.stopSequences,
top_p: this.topP,
top_k: this.topK,
repetition_penalty: this.frequencyPenalty
}
// Return parameters compatible with chatCompletion API (OpenAI-compatible format)
const params: any = {
temperature: this.temperature,
max_tokens: this.maxTokens,
stop: options?.stop ?? this.stopSequences,
top_p: this.topP
}
// Include optional parameters if they are defined
if (this.topK !== undefined) {
params.top_k = this.topK
}
if (this.frequencyPenalty !== undefined) {
params.frequency_penalty = this.frequencyPenalty
}
return params
}

async *_streamResponseChunks(
prompt: string,
options: this['ParsedCallOptions'],
runManager?: CallbackManagerForLLMRun
): AsyncGenerator<GenerationChunk> {
const hfi = await this._prepareHFInference()
const stream = await this.caller.call(async () =>
hfi.textGenerationStream({
...this.invocationParams(options),
inputs: prompt
})
)
for await (const chunk of stream) {
const token = chunk.token.text
yield new GenerationChunk({ text: token, generationInfo: chunk })
await runManager?.handleLLMNewToken(token ?? '')

// stream is done
if (chunk.generated_text)
yield new GenerationChunk({
text: '',
generationInfo: { finished: true }
try {
const client = await this._prepareHFInference()
const stream = await this.caller.call(async () =>
client.chatCompletionStream({
model: this.model,
messages: [{ role: 'user', content: prompt }],
...this.invocationParams(options)
})
)
for await (const chunk of stream) {
const token = chunk.choices[0]?.delta?.content || ''
if (token) {
yield new GenerationChunk({ text: token, generationInfo: chunk })
await runManager?.handleLLMNewToken(token)
}
// stream is done when finish_reason is set
if (chunk.choices[0]?.finish_reason) {
yield new GenerationChunk({
text: '',
generationInfo: { finished: true }
})
break
}
}
} catch (error: any) {
console.error('[ChatHuggingFace] Error in _streamResponseChunks:', error)
// Provide more helpful error messages
if (error?.message?.includes('endpointUrl') || error?.message?.includes('third-party provider')) {
throw new Error(
`Cannot use custom endpoint with model "${this.model}" that includes a provider. Please leave the Endpoint field blank in the UI. Original error: ${error.message}`
)
}
throw error
}
}

/** @ignore */
async _call(prompt: string, options: this['ParsedCallOptions']): Promise<string> {
const hfi = await this._prepareHFInference()
const args = { ...this.invocationParams(options), inputs: prompt }
const res = await this.caller.callWithOptions({ signal: options.signal }, hfi.textGeneration.bind(hfi), args)
return res.generated_text
try {
const client = await this._prepareHFInference()
// Use chatCompletion for chat models (v4 supports conversational models via Inference Providers)
const args = {
model: this.model,
messages: [{ role: 'user', content: prompt }],
...this.invocationParams(options)
}
const res = await this.caller.callWithOptions({ signal: options.signal }, client.chatCompletion.bind(client), args)
const content = res.choices[0]?.message?.content || ''
if (!content) {
console.error('[ChatHuggingFace] No content in response:', JSON.stringify(res))
throw new Error(`No content received from HuggingFace API. Response: ${JSON.stringify(res)}`)
}
return content
} catch (error: any) {
console.error('[ChatHuggingFace] Error in _call:', error.message)
// Provide more helpful error messages
if (error?.message?.includes('endpointUrl') || error?.message?.includes('third-party provider')) {
throw new Error(
`Cannot use custom endpoint with model "${this.model}" that includes a provider. Please leave the Endpoint field blank in the UI. Original error: ${error.message}`
)
}
if (error?.message?.includes('Invalid username or password') || error?.message?.includes('authentication')) {
throw new Error(
`HuggingFace API authentication failed. Please verify your API key is correct and starts with "hf_". Original error: ${error.message}`
)
}
throw error
}
}

/** @ignore */
private async _prepareHFInference() {
const { HfInference } = await HuggingFaceInference.imports()
const hfi = new HfInference(this.apiKey, {
includeCredentials: this.includeCredentials
})
return this.endpointUrl ? hfi.endpoint(this.endpointUrl) : hfi
if (!this.apiKey || this.apiKey.trim() === '') {
console.error('[ChatHuggingFace] API key validation failed: Empty or undefined')
throw new Error('HuggingFace API key is required. Please configure it in the credential settings.')
}

const { InferenceClient } = await HuggingFaceInference.imports()
// Use InferenceClient for chat models (works better with Inference Providers)
const client = new InferenceClient(this.apiKey)

// Don't override endpoint if model uses a provider (contains ':') or if endpoint is router-based
// When using Inference Providers, endpoint should be left blank - InferenceClient handles routing automatically
if (
this.endpointUrl &&
!this.model.includes(':') &&
!this.endpointUrl.includes('/v1/chat/completions') &&
!this.endpointUrl.includes('router.huggingface.co')
) {
return client.endpoint(this.endpointUrl)
}

// Return client without endpoint override - InferenceClient will use Inference Providers automatically
return client
}

/** @ignore */
static async imports(): Promise<{
HfInference: typeof import('@huggingface/inference').HfInference
InferenceClient: typeof import('@huggingface/inference').InferenceClient
}> {
try {
const { HfInference } = await import('@huggingface/inference')
return { HfInference }
const { InferenceClient } = await import('@huggingface/inference')
return { InferenceClient }
} catch (e) {
throw new Error('Please install huggingface as a dependency with, e.g. `pnpm install @huggingface/inference`')
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,22 @@ export class HuggingFaceInferenceEmbeddings extends Embeddings implements Huggin
this.model = fields?.model ?? 'sentence-transformers/distilbert-base-nli-mean-tokens'
this.apiKey = fields?.apiKey ?? getEnvironmentVariable('HUGGINGFACEHUB_API_KEY')
this.endpoint = fields?.endpoint ?? ''
this.client = new HfInference(this.apiKey)
if (this.endpoint) this.client.endpoint(this.endpoint)
const hf = new HfInference(this.apiKey)
// v4 uses Inference Providers by default; only override if custom endpoint provided
this.client = this.endpoint ? hf.endpoint(this.endpoint) : hf
}

async _embed(texts: string[]): Promise<number[][]> {
// replace newlines, which can negatively affect performance.
const clean = texts.map((text) => text.replace(/\n/g, ' '))
const hf = new HfInference(this.apiKey)
const obj: any = {
inputs: clean
}
if (this.endpoint) {
hf.endpoint(this.endpoint)
} else {
if (!this.endpoint) {
obj.model = this.model
}

const res = await this.caller.callWithOptions({}, hf.featureExtraction.bind(hf), obj)
const res = await this.caller.callWithOptions({}, this.client.featureExtraction.bind(this.client), obj)
return res as number[][]
}

Expand Down
8 changes: 4 additions & 4 deletions packages/components/nodes/llms/HuggingFaceInference/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ export class HuggingFaceInference extends LLM implements HFInput {
async _call(prompt: string, options: this['ParsedCallOptions']): Promise<string> {
const { HfInference } = await HuggingFaceInference.imports()
const hf = new HfInference(this.apiKey)
// v4 uses Inference Providers by default; only override if custom endpoint provided
const hfClient = this.endpoint ? hf.endpoint(this.endpoint) : hf
const obj: any = {
parameters: {
// make it behave similar to openai, returning only the generated text
Expand All @@ -90,12 +92,10 @@ export class HuggingFaceInference extends LLM implements HFInput {
},
inputs: prompt
}
if (this.endpoint) {
hf.endpoint(this.endpoint)
} else {
if (!this.endpoint) {
obj.model = this.model
}
const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), obj)
const res = await this.caller.callWithOptions({ signal: options.signal }, hfClient.textGeneration.bind(hfClient), obj)
return res.generated_text
}

Expand Down
2 changes: 1 addition & 1 deletion packages/components/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"@google-cloud/storage": "^7.15.2",
"@google/generative-ai": "^0.24.0",
"@grpc/grpc-js": "^1.10.10",
"@huggingface/inference": "^2.6.1",
"@huggingface/inference": "^4.13.2",
"@langchain/anthropic": "0.3.33",
"@langchain/aws": "^0.1.11",
"@langchain/baidu-qianfan": "^0.1.0",
Expand Down
Loading