commit 201742b8b625f570425262e4569ccf2275112daa Author: Nenad Date: Fri Aug 8 22:39:42 2025 +0200 uploaded vector store workflow diff --git a/tepto-vector-storing-shop.json b/tepto-vector-storing-shop.json new file mode 100644 index 0000000..abeb375 --- /dev/null +++ b/tepto-vector-storing-shop.json @@ -0,0 +1,834 @@ +{ + "name": "tepto-vector-storing-shop", + "nodes": [ + { + "parameters": { + "options": {} + }, + "type": "@n8n/n8n-nodes-langchain.chatTrigger", + "typeVersion": 1.3, + "position": [ + -1072, + 256 + ], + "id": "3f2411cd-7cdf-4b37-91cb-43afa0fa611c", + "name": "When chat message received", + "webhookId": "cc00d5d8-2f31-4af5-a50a-0bfbcee955f3" + }, + { + "parameters": { + "url": "={{ $json.chatInput }}/robots.txt", + "sendQuery": true, + "queryParameters": { + "parameters": [ + {} + ] + }, + "options": {} + }, + "type": "n8n-nodes-base.httpRequest", + "typeVersion": 4.2, + "position": [ + -784, + 256 + ], + "id": "fcc3a522-e4da-4974-93bb-65f7c81941c4", + "name": "Check for robots.txt", + "onError": "continueErrorOutput" + }, + { + "parameters": { + "method": "POST", + "url": "http://crawl4ai:11235/crawl", + "sendBody": true, + "bodyParameters": { + "parameters": [ + { + "name": "urls", + "value": "={{ [$json.chatInput] }}" + }, + { + "name": "priority", + "value": "10" + } + ] + }, + "options": {} + }, + "type": "n8n-nodes-base.httpRequest", + "typeVersion": 4.2, + "position": [ + -288, + 448 + ], + "id": "17b9227e-8f21-48e6-aaa7-0e2c46aea61f", + "name": "Crawl with Crawl4ai" + }, + { + "parameters": { + "promptType": "define", + "text": "=Look at the following data below and return only a json in the format of the example below. \ndata: {{ $json.data }} \n\nexample:\n{\"sitemap_found\":\"true\", \"sitemap_url_count\": \"1\", \"sitemap_url\":[\"https://example.com/sitemap.xml\"]}", + "hasOutputParser": true, + "messages": { + "messageValues": [ + { + "message": "You are an assistant to read robots.txt and find out about sitemap xml." + }, + { + "type": "HumanMessagePromptTemplate", + "message": "=Look at the following data below and only a json in the format of the example below. \ndata: {{ $json.data }} \n\nexample:\n{\"sitemap_found\":\"true\", \"sitemap_url_count\": \"1\", \"sitemap_url\":[\"https://example.com/sitemap.xml\"]}" + } + ] + }, + "batching": {} + }, + "type": "@n8n/n8n-nodes-langchain.chainLlm", + "typeVersion": 1.7, + "position": [ + -384, + -160 + ], + "id": "c32c974d-8da0-4110-adb2-1b4890834ef3", + "name": "Basic LLM Chain" + }, + { + "parameters": { + "model": { + "__rl": true, + "value": "gpt-4o-mini", + "mode": "list", + "cachedResultName": "gpt-4o-mini" + }, + "options": {} + }, + "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi", + "typeVersion": 1.2, + "position": [ + -416, + 64 + ], + "id": "68e21a9e-ca9c-4757-99dd-46d8067bf8a4", + "name": "OpenAI Chat Model2", + "credentials": { + "openAiApi": { + "id": "n8YEc2Yc3MsrQ9g2", + "name": "OpenAi account" + } + } + }, + { + "parameters": { + "jsonSchemaExample": "{\"sitemap_found\":\"true\", \"sitemap_url_count\": \"1\", \"sitemap_url\":[\"https://example.com/sitemap.xml\"]}", + "autoFix": true + }, + "type": "@n8n/n8n-nodes-langchain.outputParserStructured", + "typeVersion": 1.3, + "position": [ + -288, + 64 + ], + "id": "fd52fc51-53a7-4951-b51e-6a437fa4da7e", + "name": "Structured Output Parser" + }, + { + "parameters": { + "model": { + "__rl": true, + "value": "gpt-4o-mini", + "mode": "list", + "cachedResultName": "gpt-4o-mini" + }, + "options": {} + }, + "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi", + "typeVersion": 1.2, + "position": [ + -208, + 272 + ], + "id": "f817f562-e5b0-476c-a9ca-a91f27196209", + "name": "OpenAI Chat Model3", + "credentials": { + "openAiApi": { + "id": "n8YEc2Yc3MsrQ9g2", + "name": "OpenAi account" + } + } + }, + { + "parameters": { + "url": "={{ $json['output.sitemap_url'] }}", + "options": {} + }, + "type": "n8n-nodes-base.httpRequest", + "typeVersion": 4.2, + "position": [ + 752, + -32 + ], + "id": "3145274b-1581-4a92-88a9-1f2e0b23f41d", + "name": "HTTP Request" + }, + { + "parameters": { + "dataPropertyName": "=data", + "options": {} + }, + "type": "n8n-nodes-base.xml", + "typeVersion": 1, + "position": [ + 976, + -32 + ], + "id": "8b251af5-30c1-429b-b934-f679936e8e4c", + "name": "XML" + }, + { + "parameters": { + "conditions": { + "options": { + "caseSensitive": true, + "leftValue": "", + "typeValidation": "strict", + "version": 2 + }, + "conditions": [ + { + "id": "15b424f0-b279-4b28-8df1-a602304bcef8", + "leftValue": "={{ $json.sitemapindex.sitemap.loc }}", + "rightValue": ".gz", + "operator": { + "type": "string", + "operation": "endsWith" + } + } + ], + "combinator": "and" + }, + "options": {} + }, + "type": "n8n-nodes-base.if", + "typeVersion": 2.2, + "position": [ + 1200, + -32 + ], + "id": "4b5de457-31af-461c-9d44-40a183aa1e73", + "name": "If" + }, + { + "parameters": { + "url": "={{ $json.sitemapindex.sitemap.loc }}", + "options": { + "response": { + "response": { + "responseFormat": "file" + } + } + } + }, + "type": "n8n-nodes-base.httpRequest", + "typeVersion": 4.2, + "position": [ + 1424, + -32 + ], + "id": "0f8e4e87-30e1-4ed6-9802-b7a03ac7e9b4", + "name": "HTTP Request1" + }, + { + "parameters": { + "mode": "insert", + "pineconeIndex": { + "__rl": true, + "value": "tepto-n8n-workflow", + "mode": "list", + "cachedResultName": "tepto-n8n-workflow" + }, + "options": {} + }, + "type": "@n8n/n8n-nodes-langchain.vectorStorePinecone", + "typeVersion": 1.3, + "position": [ + 3248, + 48 + ], + "id": "20699b80-88eb-4056-a0c4-1b39d14c388a", + "name": "Pinecone Vector Store", + "credentials": { + "pineconeApi": { + "id": "MsOEOyi31OS8GY2t", + "name": "PineconeApi account" + } + } + }, + { + "parameters": { + "model": "text-embedding-3-large", + "options": {} + }, + "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi", + "typeVersion": 1.2, + "position": [ + 3216, + 272 + ], + "id": "842e5826-8c87-4367-b5ba-a6c9df97e792", + "name": "Embeddings OpenAI", + "credentials": { + "openAiApi": { + "id": "n8YEc2Yc3MsrQ9g2", + "name": "OpenAi account" + } + } + }, + { + "parameters": { + "textSplittingMode": "custom", + "options": {} + }, + "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader", + "typeVersion": 1.1, + "position": [ + 3344, + 272 + ], + "id": "e34b06b3-8fb2-4dab-9e5f-dbd0df6597b0", + "name": "Default Data Loader" + }, + { + "parameters": { + "chunkSize": 350, + "chunkOverlap": 50 + }, + "type": "@n8n/n8n-nodes-langchain.textSplitterCharacterTextSplitter", + "typeVersion": 1, + "position": [ + 3424, + 480 + ], + "id": "18d17989-7b27-4dce-9be2-b31cc6847d65", + "name": "Character Text Splitter" + }, + { + "parameters": { + "content": "# Check for robots.txt", + "height": 480, + "width": 272 + }, + "type": "n8n-nodes-base.stickyNote", + "position": [ + -880, + 32 + ], + "typeVersion": 1, + "id": "3d5e8145-ac02-4b80-ad4d-6b181cabe307", + "name": "Sticky Note" + }, + { + "parameters": { + "content": "# Get sitemap.xml", + "height": 640, + "width": 464 + }, + "type": "n8n-nodes-base.stickyNote", + "position": [ + -448, + -240 + ], + "typeVersion": 1, + "id": "f14023dd-00f2-466d-b0ea-d450554fb548", + "name": "Sticky Note1" + }, + { + "parameters": { + "content": "# Vector storing process", + "height": 736, + "width": 464 + }, + "type": "n8n-nodes-base.stickyNote", + "position": [ + 3136, + -64 + ], + "typeVersion": 1, + "id": "b151cb65-e90b-4749-8652-a14bda25bf3c", + "name": "Sticky Note2" + }, + { + "parameters": { + "content": "# Unpacking .xml.gz and looping through all xml documents to get the html content\n", + "height": 304, + "width": 2608 + }, + "type": "n8n-nodes-base.stickyNote", + "position": [ + 496, + -160 + ], + "typeVersion": 1, + "id": "239e2c30-b757-49e6-8a6a-047a284c1229", + "name": "Sticky Note3" + }, + { + "parameters": { + "content": "# Check for robots.txt", + "height": 480, + "width": 272 + }, + "type": "n8n-nodes-base.stickyNote", + "position": [ + -1488, + 16 + ], + "typeVersion": 1, + "id": "57a41e8b-398f-4523-82a4-997eaf790a37", + "name": "Sticky Note4" + }, + { + "parameters": { + "fieldToSplitOut": "output.sitemap_url", + "options": {} + }, + "type": "n8n-nodes-base.splitOut", + "typeVersion": 1, + "position": [ + 80, + 48 + ], + "id": "46141b15-6ec4-4e52-8e98-3d75e5cb25b4", + "name": "Split out sitemap.xml url" + }, + { + "parameters": { + "options": {} + }, + "type": "n8n-nodes-base.splitInBatches", + "typeVersion": 3, + "position": [ + 304, + 48 + ], + "id": "aa7fdca2-3288-4a20-a2ab-c2bc31d57c28", + "name": "Loop Over XML URLs" + }, + { + "parameters": { + "amount": 1.5 + }, + "type": "n8n-nodes-base.wait", + "typeVersion": 1.1, + "position": [ + 528, + -32 + ], + "id": "2696dbe2-f99c-4368-bf87-6e38eee15432", + "name": "Wait", + "webhookId": "8d09ff79-c9e5-4cc7-89ce-d4c08d8b7b8f" + }, + { + "parameters": { + "amount": 1.5 + }, + "type": "n8n-nodes-base.wait", + "typeVersion": 1.1, + "position": [ + 2768, + -32 + ], + "id": "165447bc-5901-4ee6-a7d8-7b36d53edbc2", + "name": "Wait2", + "webhookId": "ec67f4bf-d70d-47ac-89b9-23939918311c" + }, + { + "parameters": {}, + "type": "n8n-nodes-base.compression", + "typeVersion": 1.1, + "position": [ + 1648, + -32 + ], + "id": "0fafd398-3d87-4ee9-a48f-89120c7ec186", + "name": "Decompress gzip file" + }, + { + "parameters": { + "operation": "xml", + "binaryPropertyName": "file_0", + "options": {} + }, + "type": "n8n-nodes-base.extractFromFile", + "typeVersion": 1, + "position": [ + 1872, + -32 + ], + "id": "d94db85f-8c1d-4adb-882c-fb59679f1b64", + "name": "Extract xml content" + }, + { + "parameters": { + "options": {} + }, + "type": "n8n-nodes-base.xml", + "typeVersion": 1, + "position": [ + 2096, + -32 + ], + "id": "8fcfbf31-ac20-4541-8a53-7cf8f61ab36b", + "name": "Convert XML to JSON" + }, + { + "parameters": { + "fieldToSplitOut": "urlset.url", + "options": {} + }, + "type": "n8n-nodes-base.splitOut", + "typeVersion": 1, + "position": [ + 2320, + -32 + ], + "id": "73d28f87-a3fd-4cfa-ae9b-86cc183e148c", + "name": "Split Out XML URLs from decompressed gzip" + }, + { + "parameters": { + "options": { + "reset": "=" + } + }, + "type": "n8n-nodes-base.splitInBatches", + "typeVersion": 3, + "position": [ + 2544, + 48 + ], + "id": "ed5bb42d-4fd2-48c0-83f4-fb9c7e7c5816", + "name": "Loop Over all XML URLs", + "executeOnce": false + }, + { + "parameters": { + "method": "POST", + "url": "http://crawl4ai:11235/crawl", + "sendBody": true, + "bodyParameters": { + "parameters": [ + { + "name": "urls", + "value": "={{ [$json.loc] }}" + }, + { + "name": "priority", + "value": "10" + } + ] + }, + "options": {} + }, + "type": "n8n-nodes-base.httpRequest", + "typeVersion": 4.2, + "position": [ + 2992, + -32 + ], + "id": "6044642f-9c8f-40b3-8d3e-daefa79f820a", + "name": "Crawl with Crawl4ai-2" + } + ], + "pinData": {}, + "connections": { + "When chat message received": { + "main": [ + [ + { + "node": "Check for robots.txt", + "type": "main", + "index": 0 + } + ] + ] + }, + "Check for robots.txt": { + "main": [ + [ + { + "node": "Basic LLM Chain", + "type": "main", + "index": 0 + } + ], + [ + { + "node": "Crawl with Crawl4ai", + "type": "main", + "index": 0 + } + ] + ] + }, + "Basic LLM Chain": { + "main": [ + [ + { + "node": "Split out sitemap.xml url", + "type": "main", + "index": 0 + } + ] + ] + }, + "OpenAI Chat Model2": { + "ai_languageModel": [ + [ + { + "node": "Basic LLM Chain", + "type": "ai_languageModel", + "index": 0 + } + ] + ] + }, + "Structured Output Parser": { + "ai_outputParser": [ + [ + { + "node": "Basic LLM Chain", + "type": "ai_outputParser", + "index": 0 + } + ] + ] + }, + "OpenAI Chat Model3": { + "ai_languageModel": [ + [ + { + "node": "Structured Output Parser", + "type": "ai_languageModel", + "index": 0 + } + ] + ] + }, + "HTTP Request": { + "main": [ + [ + { + "node": "XML", + "type": "main", + "index": 0 + } + ] + ] + }, + "XML": { + "main": [ + [ + { + "node": "If", + "type": "main", + "index": 0 + } + ] + ] + }, + "If": { + "main": [ + [ + { + "node": "HTTP Request1", + "type": "main", + "index": 0 + } + ] + ] + }, + "HTTP Request1": { + "main": [ + [ + { + "node": "Decompress gzip file", + "type": "main", + "index": 0 + } + ] + ] + }, + "Pinecone Vector Store": { + "main": [ + [ + { + "node": "Loop Over all XML URLs", + "type": "main", + "index": 0 + } + ] + ] + }, + "Embeddings OpenAI": { + "ai_embedding": [ + [ + { + "node": "Pinecone Vector Store", + "type": "ai_embedding", + "index": 0 + } + ] + ] + }, + "Default Data Loader": { + "ai_document": [ + [ + { + "node": "Pinecone Vector Store", + "type": "ai_document", + "index": 0 + } + ] + ] + }, + "Character Text Splitter": { + "ai_textSplitter": [ + [ + { + "node": "Default Data Loader", + "type": "ai_textSplitter", + "index": 0 + } + ] + ] + }, + "Split out sitemap.xml url": { + "main": [ + [ + { + "node": "Loop Over XML URLs", + "type": "main", + "index": 0 + } + ] + ] + }, + "Loop Over XML URLs": { + "main": [ + [], + [ + { + "node": "Wait", + "type": "main", + "index": 0 + } + ] + ] + }, + "Wait": { + "main": [ + [ + { + "node": "HTTP Request", + "type": "main", + "index": 0 + } + ] + ] + }, + "Wait2": { + "main": [ + [ + { + "node": "Crawl with Crawl4ai-2", + "type": "main", + "index": 0 + } + ] + ] + }, + "Decompress gzip file": { + "main": [ + [ + { + "node": "Extract xml content", + "type": "main", + "index": 0 + } + ] + ] + }, + "Extract xml content": { + "main": [ + [ + { + "node": "Convert XML to JSON", + "type": "main", + "index": 0 + } + ] + ] + }, + "Convert XML to JSON": { + "main": [ + [ + { + "node": "Split Out XML URLs from decompressed gzip", + "type": "main", + "index": 0 + } + ] + ] + }, + "Split Out XML URLs from decompressed gzip": { + "main": [ + [ + { + "node": "Loop Over all XML URLs", + "type": "main", + "index": 0 + } + ] + ] + }, + "Loop Over all XML URLs": { + "main": [ + [ + { + "node": "Loop Over XML URLs", + "type": "main", + "index": 0 + } + ], + [ + { + "node": "Wait2", + "type": "main", + "index": 0 + } + ] + ] + }, + "Crawl with Crawl4ai-2": { + "main": [ + [ + { + "node": "Pinecone Vector Store", + "type": "main", + "index": 0 + } + ] + ] + } + }, + "active": false, + "settings": { + "executionOrder": "v1" + }, + "versionId": "b337dd9a-99f0-4d0f-b782-39afa8b62e0c", + "meta": { + "templateCredsSetupCompleted": true, + "instanceId": "09eb18af5dccac351cf0268b293a762f1ecd59086e605300f34309f080a1382a" + }, + "id": "kCNlJS2UNnNebhIz", + "tags": [] +} \ No newline at end of file