uploaded vector store workflow

This commit is contained in:
2025-08-08 22:39:42 +02:00
commit 201742b8b6

View File

@@ -0,0 +1,834 @@
{
"name": "tepto-vector-storing-shop",
"nodes": [
{
"parameters": {
"options": {}
},
"type": "@n8n/n8n-nodes-langchain.chatTrigger",
"typeVersion": 1.3,
"position": [
-1072,
256
],
"id": "3f2411cd-7cdf-4b37-91cb-43afa0fa611c",
"name": "When chat message received",
"webhookId": "cc00d5d8-2f31-4af5-a50a-0bfbcee955f3"
},
{
"parameters": {
"url": "={{ $json.chatInput }}/robots.txt",
"sendQuery": true,
"queryParameters": {
"parameters": [
{}
]
},
"options": {}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
-784,
256
],
"id": "fcc3a522-e4da-4974-93bb-65f7c81941c4",
"name": "Check for robots.txt",
"onError": "continueErrorOutput"
},
{
"parameters": {
"method": "POST",
"url": "http://crawl4ai:11235/crawl",
"sendBody": true,
"bodyParameters": {
"parameters": [
{
"name": "urls",
"value": "={{ [$json.chatInput] }}"
},
{
"name": "priority",
"value": "10"
}
]
},
"options": {}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
-288,
448
],
"id": "17b9227e-8f21-48e6-aaa7-0e2c46aea61f",
"name": "Crawl with Crawl4ai"
},
{
"parameters": {
"promptType": "define",
"text": "=Look at the following data below and return only a json in the format of the example below. \ndata: {{ $json.data }} \n\nexample:\n{\"sitemap_found\":\"true\", \"sitemap_url_count\": \"1\", \"sitemap_url\":[\"https://example.com/sitemap.xml\"]}",
"hasOutputParser": true,
"messages": {
"messageValues": [
{
"message": "You are an assistant to read robots.txt and find out about sitemap xml."
},
{
"type": "HumanMessagePromptTemplate",
"message": "=Look at the following data below and only a json in the format of the example below. \ndata: {{ $json.data }} \n\nexample:\n{\"sitemap_found\":\"true\", \"sitemap_url_count\": \"1\", \"sitemap_url\":[\"https://example.com/sitemap.xml\"]}"
}
]
},
"batching": {}
},
"type": "@n8n/n8n-nodes-langchain.chainLlm",
"typeVersion": 1.7,
"position": [
-384,
-160
],
"id": "c32c974d-8da0-4110-adb2-1b4890834ef3",
"name": "Basic LLM Chain"
},
{
"parameters": {
"model": {
"__rl": true,
"value": "gpt-4o-mini",
"mode": "list",
"cachedResultName": "gpt-4o-mini"
},
"options": {}
},
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"typeVersion": 1.2,
"position": [
-416,
64
],
"id": "68e21a9e-ca9c-4757-99dd-46d8067bf8a4",
"name": "OpenAI Chat Model2",
"credentials": {
"openAiApi": {
"id": "n8YEc2Yc3MsrQ9g2",
"name": "OpenAi account"
}
}
},
{
"parameters": {
"jsonSchemaExample": "{\"sitemap_found\":\"true\", \"sitemap_url_count\": \"1\", \"sitemap_url\":[\"https://example.com/sitemap.xml\"]}",
"autoFix": true
},
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"typeVersion": 1.3,
"position": [
-288,
64
],
"id": "fd52fc51-53a7-4951-b51e-6a437fa4da7e",
"name": "Structured Output Parser"
},
{
"parameters": {
"model": {
"__rl": true,
"value": "gpt-4o-mini",
"mode": "list",
"cachedResultName": "gpt-4o-mini"
},
"options": {}
},
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"typeVersion": 1.2,
"position": [
-208,
272
],
"id": "f817f562-e5b0-476c-a9ca-a91f27196209",
"name": "OpenAI Chat Model3",
"credentials": {
"openAiApi": {
"id": "n8YEc2Yc3MsrQ9g2",
"name": "OpenAi account"
}
}
},
{
"parameters": {
"url": "={{ $json['output.sitemap_url'] }}",
"options": {}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
752,
-32
],
"id": "3145274b-1581-4a92-88a9-1f2e0b23f41d",
"name": "HTTP Request"
},
{
"parameters": {
"dataPropertyName": "=data",
"options": {}
},
"type": "n8n-nodes-base.xml",
"typeVersion": 1,
"position": [
976,
-32
],
"id": "8b251af5-30c1-429b-b934-f679936e8e4c",
"name": "XML"
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"id": "15b424f0-b279-4b28-8df1-a602304bcef8",
"leftValue": "={{ $json.sitemapindex.sitemap.loc }}",
"rightValue": ".gz",
"operator": {
"type": "string",
"operation": "endsWith"
}
}
],
"combinator": "and"
},
"options": {}
},
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
1200,
-32
],
"id": "4b5de457-31af-461c-9d44-40a183aa1e73",
"name": "If"
},
{
"parameters": {
"url": "={{ $json.sitemapindex.sitemap.loc }}",
"options": {
"response": {
"response": {
"responseFormat": "file"
}
}
}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
1424,
-32
],
"id": "0f8e4e87-30e1-4ed6-9802-b7a03ac7e9b4",
"name": "HTTP Request1"
},
{
"parameters": {
"mode": "insert",
"pineconeIndex": {
"__rl": true,
"value": "tepto-n8n-workflow",
"mode": "list",
"cachedResultName": "tepto-n8n-workflow"
},
"options": {}
},
"type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
"typeVersion": 1.3,
"position": [
3248,
48
],
"id": "20699b80-88eb-4056-a0c4-1b39d14c388a",
"name": "Pinecone Vector Store",
"credentials": {
"pineconeApi": {
"id": "MsOEOyi31OS8GY2t",
"name": "PineconeApi account"
}
}
},
{
"parameters": {
"model": "text-embedding-3-large",
"options": {}
},
"type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
"typeVersion": 1.2,
"position": [
3216,
272
],
"id": "842e5826-8c87-4367-b5ba-a6c9df97e792",
"name": "Embeddings OpenAI",
"credentials": {
"openAiApi": {
"id": "n8YEc2Yc3MsrQ9g2",
"name": "OpenAi account"
}
}
},
{
"parameters": {
"textSplittingMode": "custom",
"options": {}
},
"type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
"typeVersion": 1.1,
"position": [
3344,
272
],
"id": "e34b06b3-8fb2-4dab-9e5f-dbd0df6597b0",
"name": "Default Data Loader"
},
{
"parameters": {
"chunkSize": 350,
"chunkOverlap": 50
},
"type": "@n8n/n8n-nodes-langchain.textSplitterCharacterTextSplitter",
"typeVersion": 1,
"position": [
3424,
480
],
"id": "18d17989-7b27-4dce-9be2-b31cc6847d65",
"name": "Character Text Splitter"
},
{
"parameters": {
"content": "# Check for robots.txt",
"height": 480,
"width": 272
},
"type": "n8n-nodes-base.stickyNote",
"position": [
-880,
32
],
"typeVersion": 1,
"id": "3d5e8145-ac02-4b80-ad4d-6b181cabe307",
"name": "Sticky Note"
},
{
"parameters": {
"content": "# Get sitemap.xml",
"height": 640,
"width": 464
},
"type": "n8n-nodes-base.stickyNote",
"position": [
-448,
-240
],
"typeVersion": 1,
"id": "f14023dd-00f2-466d-b0ea-d450554fb548",
"name": "Sticky Note1"
},
{
"parameters": {
"content": "# Vector storing process",
"height": 736,
"width": 464
},
"type": "n8n-nodes-base.stickyNote",
"position": [
3136,
-64
],
"typeVersion": 1,
"id": "b151cb65-e90b-4749-8652-a14bda25bf3c",
"name": "Sticky Note2"
},
{
"parameters": {
"content": "# Unpacking .xml.gz and looping through all xml documents to get the html content\n",
"height": 304,
"width": 2608
},
"type": "n8n-nodes-base.stickyNote",
"position": [
496,
-160
],
"typeVersion": 1,
"id": "239e2c30-b757-49e6-8a6a-047a284c1229",
"name": "Sticky Note3"
},
{
"parameters": {
"content": "# Check for robots.txt",
"height": 480,
"width": 272
},
"type": "n8n-nodes-base.stickyNote",
"position": [
-1488,
16
],
"typeVersion": 1,
"id": "57a41e8b-398f-4523-82a4-997eaf790a37",
"name": "Sticky Note4"
},
{
"parameters": {
"fieldToSplitOut": "output.sitemap_url",
"options": {}
},
"type": "n8n-nodes-base.splitOut",
"typeVersion": 1,
"position": [
80,
48
],
"id": "46141b15-6ec4-4e52-8e98-3d75e5cb25b4",
"name": "Split out sitemap.xml url"
},
{
"parameters": {
"options": {}
},
"type": "n8n-nodes-base.splitInBatches",
"typeVersion": 3,
"position": [
304,
48
],
"id": "aa7fdca2-3288-4a20-a2ab-c2bc31d57c28",
"name": "Loop Over XML URLs"
},
{
"parameters": {
"amount": 1.5
},
"type": "n8n-nodes-base.wait",
"typeVersion": 1.1,
"position": [
528,
-32
],
"id": "2696dbe2-f99c-4368-bf87-6e38eee15432",
"name": "Wait",
"webhookId": "8d09ff79-c9e5-4cc7-89ce-d4c08d8b7b8f"
},
{
"parameters": {
"amount": 1.5
},
"type": "n8n-nodes-base.wait",
"typeVersion": 1.1,
"position": [
2768,
-32
],
"id": "165447bc-5901-4ee6-a7d8-7b36d53edbc2",
"name": "Wait2",
"webhookId": "ec67f4bf-d70d-47ac-89b9-23939918311c"
},
{
"parameters": {},
"type": "n8n-nodes-base.compression",
"typeVersion": 1.1,
"position": [
1648,
-32
],
"id": "0fafd398-3d87-4ee9-a48f-89120c7ec186",
"name": "Decompress gzip file"
},
{
"parameters": {
"operation": "xml",
"binaryPropertyName": "file_0",
"options": {}
},
"type": "n8n-nodes-base.extractFromFile",
"typeVersion": 1,
"position": [
1872,
-32
],
"id": "d94db85f-8c1d-4adb-882c-fb59679f1b64",
"name": "Extract xml content"
},
{
"parameters": {
"options": {}
},
"type": "n8n-nodes-base.xml",
"typeVersion": 1,
"position": [
2096,
-32
],
"id": "8fcfbf31-ac20-4541-8a53-7cf8f61ab36b",
"name": "Convert XML to JSON"
},
{
"parameters": {
"fieldToSplitOut": "urlset.url",
"options": {}
},
"type": "n8n-nodes-base.splitOut",
"typeVersion": 1,
"position": [
2320,
-32
],
"id": "73d28f87-a3fd-4cfa-ae9b-86cc183e148c",
"name": "Split Out XML URLs from decompressed gzip"
},
{
"parameters": {
"options": {
"reset": "="
}
},
"type": "n8n-nodes-base.splitInBatches",
"typeVersion": 3,
"position": [
2544,
48
],
"id": "ed5bb42d-4fd2-48c0-83f4-fb9c7e7c5816",
"name": "Loop Over all XML URLs",
"executeOnce": false
},
{
"parameters": {
"method": "POST",
"url": "http://crawl4ai:11235/crawl",
"sendBody": true,
"bodyParameters": {
"parameters": [
{
"name": "urls",
"value": "={{ [$json.loc] }}"
},
{
"name": "priority",
"value": "10"
}
]
},
"options": {}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
2992,
-32
],
"id": "6044642f-9c8f-40b3-8d3e-daefa79f820a",
"name": "Crawl with Crawl4ai-2"
}
],
"pinData": {},
"connections": {
"When chat message received": {
"main": [
[
{
"node": "Check for robots.txt",
"type": "main",
"index": 0
}
]
]
},
"Check for robots.txt": {
"main": [
[
{
"node": "Basic LLM Chain",
"type": "main",
"index": 0
}
],
[
{
"node": "Crawl with Crawl4ai",
"type": "main",
"index": 0
}
]
]
},
"Basic LLM Chain": {
"main": [
[
{
"node": "Split out sitemap.xml url",
"type": "main",
"index": 0
}
]
]
},
"OpenAI Chat Model2": {
"ai_languageModel": [
[
{
"node": "Basic LLM Chain",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Structured Output Parser": {
"ai_outputParser": [
[
{
"node": "Basic LLM Chain",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"OpenAI Chat Model3": {
"ai_languageModel": [
[
{
"node": "Structured Output Parser",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"HTTP Request": {
"main": [
[
{
"node": "XML",
"type": "main",
"index": 0
}
]
]
},
"XML": {
"main": [
[
{
"node": "If",
"type": "main",
"index": 0
}
]
]
},
"If": {
"main": [
[
{
"node": "HTTP Request1",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request1": {
"main": [
[
{
"node": "Decompress gzip file",
"type": "main",
"index": 0
}
]
]
},
"Pinecone Vector Store": {
"main": [
[
{
"node": "Loop Over all XML URLs",
"type": "main",
"index": 0
}
]
]
},
"Embeddings OpenAI": {
"ai_embedding": [
[
{
"node": "Pinecone Vector Store",
"type": "ai_embedding",
"index": 0
}
]
]
},
"Default Data Loader": {
"ai_document": [
[
{
"node": "Pinecone Vector Store",
"type": "ai_document",
"index": 0
}
]
]
},
"Character Text Splitter": {
"ai_textSplitter": [
[
{
"node": "Default Data Loader",
"type": "ai_textSplitter",
"index": 0
}
]
]
},
"Split out sitemap.xml url": {
"main": [
[
{
"node": "Loop Over XML URLs",
"type": "main",
"index": 0
}
]
]
},
"Loop Over XML URLs": {
"main": [
[],
[
{
"node": "Wait",
"type": "main",
"index": 0
}
]
]
},
"Wait": {
"main": [
[
{
"node": "HTTP Request",
"type": "main",
"index": 0
}
]
]
},
"Wait2": {
"main": [
[
{
"node": "Crawl with Crawl4ai-2",
"type": "main",
"index": 0
}
]
]
},
"Decompress gzip file": {
"main": [
[
{
"node": "Extract xml content",
"type": "main",
"index": 0
}
]
]
},
"Extract xml content": {
"main": [
[
{
"node": "Convert XML to JSON",
"type": "main",
"index": 0
}
]
]
},
"Convert XML to JSON": {
"main": [
[
{
"node": "Split Out XML URLs from decompressed gzip",
"type": "main",
"index": 0
}
]
]
},
"Split Out XML URLs from decompressed gzip": {
"main": [
[
{
"node": "Loop Over all XML URLs",
"type": "main",
"index": 0
}
]
]
},
"Loop Over all XML URLs": {
"main": [
[
{
"node": "Loop Over XML URLs",
"type": "main",
"index": 0
}
],
[
{
"node": "Wait2",
"type": "main",
"index": 0
}
]
]
},
"Crawl with Crawl4ai-2": {
"main": [
[
{
"node": "Pinecone Vector Store",
"type": "main",
"index": 0
}
]
]
}
},
"active": false,
"settings": {
"executionOrder": "v1"
},
"versionId": "b337dd9a-99f0-4d0f-b782-39afa8b62e0c",
"meta": {
"templateCredsSetupCompleted": true,
"instanceId": "09eb18af5dccac351cf0268b293a762f1ecd59086e605300f34309f080a1382a"
},
"id": "kCNlJS2UNnNebhIz",
"tags": []
}