{ "name": "tepto-vector-storing-shop", "nodes": [ { "parameters": { "options": {} }, "type": "@n8n/n8n-nodes-langchain.chatTrigger", "typeVersion": 1.3, "position": [ -1072, 256 ], "id": "3f2411cd-7cdf-4b37-91cb-43afa0fa611c", "name": "When chat message received", "webhookId": "cc00d5d8-2f31-4af5-a50a-0bfbcee955f3" }, { "parameters": { "url": "={{ $json.chatInput }}/robots.txt", "sendQuery": true, "queryParameters": { "parameters": [ {} ] }, "options": {} }, "type": "n8n-nodes-base.httpRequest", "typeVersion": 4.2, "position": [ -784, 256 ], "id": "fcc3a522-e4da-4974-93bb-65f7c81941c4", "name": "Check for robots.txt", "onError": "continueErrorOutput" }, { "parameters": { "method": "POST", "url": "http://crawl4ai:11235/crawl", "sendBody": true, "bodyParameters": { "parameters": [ { "name": "urls", "value": "={{ [$json.chatInput] }}" }, { "name": "priority", "value": "10" } ] }, "options": {} }, "type": "n8n-nodes-base.httpRequest", "typeVersion": 4.2, "position": [ -288, 448 ], "id": "17b9227e-8f21-48e6-aaa7-0e2c46aea61f", "name": "Crawl with Crawl4ai" }, { "parameters": { "promptType": "define", "text": "=Look at the following data below and return only a json in the format of the example below. \ndata: {{ $json.data }} \n\nexample:\n{\"sitemap_found\":\"true\", \"sitemap_url_count\": \"1\", \"sitemap_url\":[\"https://example.com/sitemap.xml\"]}", "hasOutputParser": true, "messages": { "messageValues": [ { "message": "You are an assistant to read robots.txt and find out about sitemap xml." }, { "type": "HumanMessagePromptTemplate", "message": "=Look at the following data below and only a json in the format of the example below. \ndata: {{ $json.data }} \n\nexample:\n{\"sitemap_found\":\"true\", \"sitemap_url_count\": \"1\", \"sitemap_url\":[\"https://example.com/sitemap.xml\"]}" } ] }, "batching": {} }, "type": "@n8n/n8n-nodes-langchain.chainLlm", "typeVersion": 1.7, "position": [ -384, -160 ], "id": "c32c974d-8da0-4110-adb2-1b4890834ef3", "name": "Basic LLM Chain" }, { "parameters": { "model": { "__rl": true, "value": "gpt-4o-mini", "mode": "list", "cachedResultName": "gpt-4o-mini" }, "options": {} }, "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi", "typeVersion": 1.2, "position": [ -416, 64 ], "id": "68e21a9e-ca9c-4757-99dd-46d8067bf8a4", "name": "OpenAI Chat Model2", "credentials": { "openAiApi": { "id": "n8YEc2Yc3MsrQ9g2", "name": "OpenAi account" } } }, { "parameters": { "jsonSchemaExample": "{\"sitemap_found\":\"true\", \"sitemap_url_count\": \"1\", \"sitemap_url\":[\"https://example.com/sitemap.xml\"]}", "autoFix": true }, "type": "@n8n/n8n-nodes-langchain.outputParserStructured", "typeVersion": 1.3, "position": [ -288, 64 ], "id": "fd52fc51-53a7-4951-b51e-6a437fa4da7e", "name": "Structured Output Parser" }, { "parameters": { "model": { "__rl": true, "value": "gpt-4o-mini", "mode": "list", "cachedResultName": "gpt-4o-mini" }, "options": {} }, "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi", "typeVersion": 1.2, "position": [ -208, 272 ], "id": "f817f562-e5b0-476c-a9ca-a91f27196209", "name": "OpenAI Chat Model3", "credentials": { "openAiApi": { "id": "n8YEc2Yc3MsrQ9g2", "name": "OpenAi account" } } }, { "parameters": { "url": "={{ $json['output.sitemap_url'] }}", "options": {} }, "type": "n8n-nodes-base.httpRequest", "typeVersion": 4.2, "position": [ 752, -32 ], "id": "3145274b-1581-4a92-88a9-1f2e0b23f41d", "name": "HTTP Request" }, { "parameters": { "dataPropertyName": "=data", "options": {} }, "type": "n8n-nodes-base.xml", "typeVersion": 1, "position": [ 976, -32 ], "id": "8b251af5-30c1-429b-b934-f679936e8e4c", "name": "XML" }, { "parameters": { "conditions": { "options": { "caseSensitive": true, "leftValue": "", "typeValidation": "strict", "version": 2 }, "conditions": [ { "id": "15b424f0-b279-4b28-8df1-a602304bcef8", "leftValue": "={{ $json.sitemapindex.sitemap.loc }}", "rightValue": ".gz", "operator": { "type": "string", "operation": "endsWith" } } ], "combinator": "and" }, "options": {} }, "type": "n8n-nodes-base.if", "typeVersion": 2.2, "position": [ 1200, -32 ], "id": "4b5de457-31af-461c-9d44-40a183aa1e73", "name": "If" }, { "parameters": { "url": "={{ $json.sitemapindex.sitemap.loc }}", "options": { "response": { "response": { "responseFormat": "file" } } } }, "type": "n8n-nodes-base.httpRequest", "typeVersion": 4.2, "position": [ 1424, -32 ], "id": "0f8e4e87-30e1-4ed6-9802-b7a03ac7e9b4", "name": "HTTP Request1" }, { "parameters": { "mode": "insert", "pineconeIndex": { "__rl": true, "value": "tepto-n8n-workflow", "mode": "list", "cachedResultName": "tepto-n8n-workflow" }, "options": {} }, "type": "@n8n/n8n-nodes-langchain.vectorStorePinecone", "typeVersion": 1.3, "position": [ 3248, 48 ], "id": "20699b80-88eb-4056-a0c4-1b39d14c388a", "name": "Pinecone Vector Store", "credentials": { "pineconeApi": { "id": "MsOEOyi31OS8GY2t", "name": "PineconeApi account" } } }, { "parameters": { "model": "text-embedding-3-large", "options": {} }, "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi", "typeVersion": 1.2, "position": [ 3216, 272 ], "id": "842e5826-8c87-4367-b5ba-a6c9df97e792", "name": "Embeddings OpenAI", "credentials": { "openAiApi": { "id": "n8YEc2Yc3MsrQ9g2", "name": "OpenAi account" } } }, { "parameters": { "textSplittingMode": "custom", "options": {} }, "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader", "typeVersion": 1.1, "position": [ 3344, 272 ], "id": "e34b06b3-8fb2-4dab-9e5f-dbd0df6597b0", "name": "Default Data Loader" }, { "parameters": { "chunkSize": 350, "chunkOverlap": 50 }, "type": "@n8n/n8n-nodes-langchain.textSplitterCharacterTextSplitter", "typeVersion": 1, "position": [ 3424, 480 ], "id": "18d17989-7b27-4dce-9be2-b31cc6847d65", "name": "Character Text Splitter" }, { "parameters": { "content": "# Check for robots.txt", "height": 480, "width": 272 }, "type": "n8n-nodes-base.stickyNote", "position": [ -880, 32 ], "typeVersion": 1, "id": "3d5e8145-ac02-4b80-ad4d-6b181cabe307", "name": "Sticky Note" }, { "parameters": { "content": "# Get sitemap.xml", "height": 640, "width": 464 }, "type": "n8n-nodes-base.stickyNote", "position": [ -448, -240 ], "typeVersion": 1, "id": "f14023dd-00f2-466d-b0ea-d450554fb548", "name": "Sticky Note1" }, { "parameters": { "content": "# Vector storing process", "height": 736, "width": 464 }, "type": "n8n-nodes-base.stickyNote", "position": [ 3136, -64 ], "typeVersion": 1, "id": "b151cb65-e90b-4749-8652-a14bda25bf3c", "name": "Sticky Note2" }, { "parameters": { "content": "# Unpacking .xml.gz and looping through all xml documents to get the html content\n", "height": 304, "width": 2608 }, "type": "n8n-nodes-base.stickyNote", "position": [ 496, -160 ], "typeVersion": 1, "id": "239e2c30-b757-49e6-8a6a-047a284c1229", "name": "Sticky Note3" }, { "parameters": { "content": "# Check for robots.txt", "height": 480, "width": 272 }, "type": "n8n-nodes-base.stickyNote", "position": [ -1488, 16 ], "typeVersion": 1, "id": "57a41e8b-398f-4523-82a4-997eaf790a37", "name": "Sticky Note4" }, { "parameters": { "fieldToSplitOut": "output.sitemap_url", "options": {} }, "type": "n8n-nodes-base.splitOut", "typeVersion": 1, "position": [ 80, 48 ], "id": "46141b15-6ec4-4e52-8e98-3d75e5cb25b4", "name": "Split out sitemap.xml url" }, { "parameters": { "options": {} }, "type": "n8n-nodes-base.splitInBatches", "typeVersion": 3, "position": [ 304, 48 ], "id": "aa7fdca2-3288-4a20-a2ab-c2bc31d57c28", "name": "Loop Over XML URLs" }, { "parameters": { "amount": 1.5 }, "type": "n8n-nodes-base.wait", "typeVersion": 1.1, "position": [ 528, -32 ], "id": "2696dbe2-f99c-4368-bf87-6e38eee15432", "name": "Wait", "webhookId": "8d09ff79-c9e5-4cc7-89ce-d4c08d8b7b8f" }, { "parameters": { "amount": 1.5 }, "type": "n8n-nodes-base.wait", "typeVersion": 1.1, "position": [ 2768, -32 ], "id": "165447bc-5901-4ee6-a7d8-7b36d53edbc2", "name": "Wait2", "webhookId": "ec67f4bf-d70d-47ac-89b9-23939918311c" }, { "parameters": {}, "type": "n8n-nodes-base.compression", "typeVersion": 1.1, "position": [ 1648, -32 ], "id": "0fafd398-3d87-4ee9-a48f-89120c7ec186", "name": "Decompress gzip file" }, { "parameters": { "operation": "xml", "binaryPropertyName": "file_0", "options": {} }, "type": "n8n-nodes-base.extractFromFile", "typeVersion": 1, "position": [ 1872, -32 ], "id": "d94db85f-8c1d-4adb-882c-fb59679f1b64", "name": "Extract xml content" }, { "parameters": { "options": {} }, "type": "n8n-nodes-base.xml", "typeVersion": 1, "position": [ 2096, -32 ], "id": "8fcfbf31-ac20-4541-8a53-7cf8f61ab36b", "name": "Convert XML to JSON" }, { "parameters": { "fieldToSplitOut": "urlset.url", "options": {} }, "type": "n8n-nodes-base.splitOut", "typeVersion": 1, "position": [ 2320, -32 ], "id": "73d28f87-a3fd-4cfa-ae9b-86cc183e148c", "name": "Split Out XML URLs from decompressed gzip" }, { "parameters": { "options": { "reset": "=" } }, "type": "n8n-nodes-base.splitInBatches", "typeVersion": 3, "position": [ 2544, 48 ], "id": "ed5bb42d-4fd2-48c0-83f4-fb9c7e7c5816", "name": "Loop Over all XML URLs", "executeOnce": false }, { "parameters": { "method": "POST", "url": "http://crawl4ai:11235/crawl", "sendBody": true, "bodyParameters": { "parameters": [ { "name": "urls", "value": "={{ [$json.loc] }}" }, { "name": "priority", "value": "10" } ] }, "options": {} }, "type": "n8n-nodes-base.httpRequest", "typeVersion": 4.2, "position": [ 2992, -32 ], "id": "6044642f-9c8f-40b3-8d3e-daefa79f820a", "name": "Crawl with Crawl4ai-2" } ], "pinData": {}, "connections": { "When chat message received": { "main": [ [ { "node": "Check for robots.txt", "type": "main", "index": 0 } ] ] }, "Check for robots.txt": { "main": [ [ { "node": "Basic LLM Chain", "type": "main", "index": 0 } ], [ { "node": "Crawl with Crawl4ai", "type": "main", "index": 0 } ] ] }, "Basic LLM Chain": { "main": [ [ { "node": "Split out sitemap.xml url", "type": "main", "index": 0 } ] ] }, "OpenAI Chat Model2": { "ai_languageModel": [ [ { "node": "Basic LLM Chain", "type": "ai_languageModel", "index": 0 } ] ] }, "Structured Output Parser": { "ai_outputParser": [ [ { "node": "Basic LLM Chain", "type": "ai_outputParser", "index": 0 } ] ] }, "OpenAI Chat Model3": { "ai_languageModel": [ [ { "node": "Structured Output Parser", "type": "ai_languageModel", "index": 0 } ] ] }, "HTTP Request": { "main": [ [ { "node": "XML", "type": "main", "index": 0 } ] ] }, "XML": { "main": [ [ { "node": "If", "type": "main", "index": 0 } ] ] }, "If": { "main": [ [ { "node": "HTTP Request1", "type": "main", "index": 0 } ] ] }, "HTTP Request1": { "main": [ [ { "node": "Decompress gzip file", "type": "main", "index": 0 } ] ] }, "Pinecone Vector Store": { "main": [ [ { "node": "Loop Over all XML URLs", "type": "main", "index": 0 } ] ] }, "Embeddings OpenAI": { "ai_embedding": [ [ { "node": "Pinecone Vector Store", "type": "ai_embedding", "index": 0 } ] ] }, "Default Data Loader": { "ai_document": [ [ { "node": "Pinecone Vector Store", "type": "ai_document", "index": 0 } ] ] }, "Character Text Splitter": { "ai_textSplitter": [ [ { "node": "Default Data Loader", "type": "ai_textSplitter", "index": 0 } ] ] }, "Split out sitemap.xml url": { "main": [ [ { "node": "Loop Over XML URLs", "type": "main", "index": 0 } ] ] }, "Loop Over XML URLs": { "main": [ [], [ { "node": "Wait", "type": "main", "index": 0 } ] ] }, "Wait": { "main": [ [ { "node": "HTTP Request", "type": "main", "index": 0 } ] ] }, "Wait2": { "main": [ [ { "node": "Crawl with Crawl4ai-2", "type": "main", "index": 0 } ] ] }, "Decompress gzip file": { "main": [ [ { "node": "Extract xml content", "type": "main", "index": 0 } ] ] }, "Extract xml content": { "main": [ [ { "node": "Convert XML to JSON", "type": "main", "index": 0 } ] ] }, "Convert XML to JSON": { "main": [ [ { "node": "Split Out XML URLs from decompressed gzip", "type": "main", "index": 0 } ] ] }, "Split Out XML URLs from decompressed gzip": { "main": [ [ { "node": "Loop Over all XML URLs", "type": "main", "index": 0 } ] ] }, "Loop Over all XML URLs": { "main": [ [ { "node": "Loop Over XML URLs", "type": "main", "index": 0 } ], [ { "node": "Wait2", "type": "main", "index": 0 } ] ] }, "Crawl with Crawl4ai-2": { "main": [ [ { "node": "Pinecone Vector Store", "type": "main", "index": 0 } ] ] } }, "active": false, "settings": { "executionOrder": "v1" }, "versionId": "b337dd9a-99f0-4d0f-b782-39afa8b62e0c", "meta": { "templateCredsSetupCompleted": true, "instanceId": "09eb18af5dccac351cf0268b293a762f1ecd59086e605300f34309f080a1382a" }, "id": "kCNlJS2UNnNebhIz", "tags": [] }