From 84c6533b11643256887562ae5e9c546e122685a7 Mon Sep 17 00:00:00 2001 From: Vikrant-Khedkar Date: Fri, 14 Nov 2025 18:45:49 +0530 Subject: [PATCH 1/3] Revert "feat: update health endpoint" This reverts commit 3e993b3de38bd6358bc4c01252df444895ec5bf6. --- .gitignore | 2 - __pycache__/test_sdk.cpython-312.pyc | Bin 0 -> 838 bytes scrapegraph-js/.gitignore | 130 ++ scrapegraph-js/.prettierignore | 1 + scrapegraph-js/.prettierrc.json | 11 + scrapegraph-js/AGENTIC_SCRAPER.md | 438 +++++ scrapegraph-js/CODE_OF_CONDUCT.md | 128 ++ scrapegraph-js/CONTRIBUTING.MD | 83 + scrapegraph-js/PAGINATION.md | 244 +++ scrapegraph-js/README.md | 808 +++++++++ scrapegraph-js/eslint.config.js | 11 + scrapegraph-js/examples/.env.example | 2 + .../cookies/cookies_integration_example.js | 261 +++ .../mock/mock_mode_example.js | 297 ++++ .../agenticScraper_advanced_example.js | 322 ++++ .../agenticScraper_complete_example.js | 146 ++ .../agenticScraper_comprehensive_example.js | 448 +++++ .../agenticscraper/agenticScraper_example.js | 77 + .../examples/crawl/crawl_example.js | 106 ++ .../crawl_markdown_direct_api_example.js | 269 +++ .../examples/crawl/crawl_markdown_example.js | 217 +++ .../examples/crawl/crawl_sitemap_example.js | 232 +++ .../markdownify/markdownify_example.js | 35 + .../scheduled_jobs/scheduledJobs_example.js | 267 +++ .../scheduledJobs_simple_example.js | 289 ++++ .../examples/schema_generation_example.js | 293 ++++ .../schema_searchScraper_example.js | 44 + .../searchScraper_enhanced_example.js | 333 ++++ .../searchscraper/searchScraper_example.js | 38 + .../searchScraper_markdown_example.js | 93 + .../searchScraper_markdown_polling_example.js | 145 ++ scrapegraph-js/examples/sitemap/README.md | 128 ++ .../examples/sitemap/sitemap_example.js | 72 + .../sitemap/sitemap_with_smartscraper.js | 106 ++ .../schema_smartScraper_example.js | 20 + .../smartScraper_cookies_example.js | 125 ++ .../smartScraper_cookies_simple_example.js | 40 + .../smartscraper/smartScraper_example.js | 13 + .../smartscraper/smartScraper_html_example.js | 136 ++ .../smartScraper_infinite_scroll_example.js | 15 + .../smartScraper_markdown_example.js | 113 ++ ...martScraper_pagination_enhanced_example.js | 287 ++++ .../smartScraper_pagination_example.js | 41 + ...tScraper_pagination_with_scroll_example.js | 121 ++ .../smartScraper_render_heavy_example.js | 24 + .../examples/stealth_mode_example.js | 613 +++++++ .../step_by_step_schema_generation.js | 184 ++ .../getAgenticScraperRequest_example.js | 31 + .../examples/utilities/getCredits_example.js | 11 + .../getSearchScraperRequest_example.js | 12 + .../getSmartScraperRequest_example.js | 12 + .../examples/utilities/healthz_example.js | 59 + .../utilities/healthz_monitoring_example.js | 199 +++ .../utilities/scrape_advanced_example.js | 524 ++++++ .../examples/utilities/scrape_example.js | 205 +++ .../utilities/scrape_polling_example.js | 288 ++++ .../utilities/sendFeedback_example.js | 20 + scrapegraph-js/index.js | 34 + scrapegraph-js/package-lock.json | 1516 +++++++++++++++++ scrapegraph-js/package.json | 49 + scrapegraph-js/src/agenticScraper.js | 226 +++ scrapegraph-js/src/crawl.js | 132 ++ scrapegraph-js/src/credits.js | 37 + scrapegraph-js/src/feedback.js | 49 + scrapegraph-js/src/healthz.js | 56 + scrapegraph-js/src/markdownify.js | 115 ++ scrapegraph-js/src/scheduledJobs.js | 398 +++++ scrapegraph-js/src/schema.js | 185 ++ scrapegraph-js/src/scrape.js | 161 ++ scrapegraph-js/src/searchScraper.js | 145 ++ scrapegraph-js/src/sitemap.js | 68 + scrapegraph-js/src/smartScraper.js | 190 +++ scrapegraph-js/src/utils/handleError.js | 44 + scrapegraph-js/src/utils/mockConfig.js | 100 ++ scrapegraph-js/src/utils/mockResponse.js | 271 +++ scrapegraph-js/test/agenticScraper_test.js | 506 ++++++ scrapegraph-js/test/crawl_markdown_test.js | 609 +++++++ scrapegraph-js/test/healthz_test.js | 314 ++++ scrapegraph-js/test/scheduledJobs_test.js | 413 +++++ scrapegraph-js/test/scrape_test.js | 451 +++++ .../test/searchScraper_markdown_test.js | 524 ++++++ scrapegraph-js/test/sitemap_test.js | 371 ++++ .../test/smartScraper_markdown_html_test.js | 377 ++++ .../test/smartScraper_pagination_test.js | 252 +++ .../test/smartScraper_render_heavy_test.js | 312 ++++ scrapegraph-js/test/stealth_mode_test.js | 626 +++++++ scrapegraph-js/test_cookies_integration.js | 92 + scrapegraph-js/test_schema_generation.js | 187 ++ 88 files changed, 17977 insertions(+), 2 deletions(-) create mode 100644 __pycache__/test_sdk.cpython-312.pyc create mode 100644 scrapegraph-js/.gitignore create mode 100644 scrapegraph-js/.prettierignore create mode 100644 scrapegraph-js/.prettierrc.json create mode 100644 scrapegraph-js/AGENTIC_SCRAPER.md create mode 100644 scrapegraph-js/CODE_OF_CONDUCT.md create mode 100644 scrapegraph-js/CONTRIBUTING.MD create mode 100644 scrapegraph-js/PAGINATION.md create mode 100644 scrapegraph-js/README.md create mode 100644 scrapegraph-js/eslint.config.js create mode 100644 scrapegraph-js/examples/.env.example create mode 100644 scrapegraph-js/examples/advanced_features/cookies/cookies_integration_example.js create mode 100644 scrapegraph-js/examples/advanced_features/mock/mock_mode_example.js create mode 100644 scrapegraph-js/examples/agenticscraper/agenticScraper_advanced_example.js create mode 100644 scrapegraph-js/examples/agenticscraper/agenticScraper_complete_example.js create mode 100644 scrapegraph-js/examples/agenticscraper/agenticScraper_comprehensive_example.js create mode 100644 scrapegraph-js/examples/agenticscraper/agenticScraper_example.js create mode 100644 scrapegraph-js/examples/crawl/crawl_example.js create mode 100644 scrapegraph-js/examples/crawl/crawl_markdown_direct_api_example.js create mode 100644 scrapegraph-js/examples/crawl/crawl_markdown_example.js create mode 100644 scrapegraph-js/examples/crawl/crawl_sitemap_example.js create mode 100644 scrapegraph-js/examples/markdownify/markdownify_example.js create mode 100644 scrapegraph-js/examples/scheduled_jobs/scheduledJobs_example.js create mode 100644 scrapegraph-js/examples/scheduled_jobs/scheduledJobs_simple_example.js create mode 100644 scrapegraph-js/examples/schema_generation_example.js create mode 100644 scrapegraph-js/examples/searchscraper/schema_searchScraper_example.js create mode 100644 scrapegraph-js/examples/searchscraper/searchScraper_enhanced_example.js create mode 100644 scrapegraph-js/examples/searchscraper/searchScraper_example.js create mode 100644 scrapegraph-js/examples/searchscraper/searchScraper_markdown_example.js create mode 100644 scrapegraph-js/examples/searchscraper/searchScraper_markdown_polling_example.js create mode 100644 scrapegraph-js/examples/sitemap/README.md create mode 100644 scrapegraph-js/examples/sitemap/sitemap_example.js create mode 100644 scrapegraph-js/examples/sitemap/sitemap_with_smartscraper.js create mode 100644 scrapegraph-js/examples/smartscraper/schema_smartScraper_example.js create mode 100644 scrapegraph-js/examples/smartscraper/smartScraper_cookies_example.js create mode 100644 scrapegraph-js/examples/smartscraper/smartScraper_cookies_simple_example.js create mode 100644 scrapegraph-js/examples/smartscraper/smartScraper_example.js create mode 100644 scrapegraph-js/examples/smartscraper/smartScraper_html_example.js create mode 100644 scrapegraph-js/examples/smartscraper/smartScraper_infinite_scroll_example.js create mode 100644 scrapegraph-js/examples/smartscraper/smartScraper_markdown_example.js create mode 100644 scrapegraph-js/examples/smartscraper/smartScraper_pagination_enhanced_example.js create mode 100644 scrapegraph-js/examples/smartscraper/smartScraper_pagination_example.js create mode 100644 scrapegraph-js/examples/smartscraper/smartScraper_pagination_with_scroll_example.js create mode 100644 scrapegraph-js/examples/smartscraper/smartScraper_render_heavy_example.js create mode 100644 scrapegraph-js/examples/stealth_mode_example.js create mode 100644 scrapegraph-js/examples/step_by_step_schema_generation.js create mode 100644 scrapegraph-js/examples/utilities/getAgenticScraperRequest_example.js create mode 100644 scrapegraph-js/examples/utilities/getCredits_example.js create mode 100644 scrapegraph-js/examples/utilities/getSearchScraperRequest_example.js create mode 100644 scrapegraph-js/examples/utilities/getSmartScraperRequest_example.js create mode 100644 scrapegraph-js/examples/utilities/healthz_example.js create mode 100644 scrapegraph-js/examples/utilities/healthz_monitoring_example.js create mode 100644 scrapegraph-js/examples/utilities/scrape_advanced_example.js create mode 100644 scrapegraph-js/examples/utilities/scrape_example.js create mode 100644 scrapegraph-js/examples/utilities/scrape_polling_example.js create mode 100644 scrapegraph-js/examples/utilities/sendFeedback_example.js create mode 100644 scrapegraph-js/index.js create mode 100644 scrapegraph-js/package-lock.json create mode 100644 scrapegraph-js/package.json create mode 100644 scrapegraph-js/src/agenticScraper.js create mode 100644 scrapegraph-js/src/crawl.js create mode 100644 scrapegraph-js/src/credits.js create mode 100644 scrapegraph-js/src/feedback.js create mode 100644 scrapegraph-js/src/healthz.js create mode 100644 scrapegraph-js/src/markdownify.js create mode 100644 scrapegraph-js/src/scheduledJobs.js create mode 100644 scrapegraph-js/src/schema.js create mode 100644 scrapegraph-js/src/scrape.js create mode 100644 scrapegraph-js/src/searchScraper.js create mode 100644 scrapegraph-js/src/sitemap.js create mode 100644 scrapegraph-js/src/smartScraper.js create mode 100644 scrapegraph-js/src/utils/handleError.js create mode 100644 scrapegraph-js/src/utils/mockConfig.js create mode 100644 scrapegraph-js/src/utils/mockResponse.js create mode 100644 scrapegraph-js/test/agenticScraper_test.js create mode 100644 scrapegraph-js/test/crawl_markdown_test.js create mode 100644 scrapegraph-js/test/healthz_test.js create mode 100644 scrapegraph-js/test/scheduledJobs_test.js create mode 100644 scrapegraph-js/test/scrape_test.js create mode 100644 scrapegraph-js/test/searchScraper_markdown_test.js create mode 100644 scrapegraph-js/test/sitemap_test.js create mode 100644 scrapegraph-js/test/smartScraper_markdown_html_test.js create mode 100644 scrapegraph-js/test/smartScraper_pagination_test.js create mode 100644 scrapegraph-js/test/smartScraper_render_heavy_test.js create mode 100644 scrapegraph-js/test/stealth_mode_test.js create mode 100644 scrapegraph-js/test_cookies_integration.js create mode 100644 scrapegraph-js/test_schema_generation.js diff --git a/.gitignore b/.gitignore index 08b21b2..c0b5069 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,3 @@ **/.DS_Store *.csv venv/ - -__pycache__/ \ No newline at end of file diff --git a/__pycache__/test_sdk.cpython-312.pyc b/__pycache__/test_sdk.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8cccba0cbe89b54332d0afba7f9ad557eb723c8b GIT binary patch literal 838 zcmZWn&ubG=5PomByIHfFwxNd9gRs(844SN-gd&1im4LKR5hR!8nK#XDyT5$zMVp-J zt;gOhh<|{ze}*?NiL|sX^yJA~OK&~-cGFPBIm~|F%)FUzhW%bD6_K>~+lsfWAoMFE zW796d`40&`A&MvtP>5R?XIKf8P;IFgDG1bcWBlmqOV#~}Jv`!h6{gz0K900B(AG;= z)nh~p)Y!kaC$(hOD#-2zEkc2sHzfazz9{1kc~65{GFSRPC%ez&E&g$PmkgucWNqqq z#%eg#p9VfeLL!rQi0=k*rvogVXf8irkwGnh_Ym~Uy|~NjB=PGT(5rjEK;{aPyOQ^3 zJRuUk=s2)R!X$u3I}ZB~pKS_8+QJqd*dg&F+XF)VsAH3e+7x)3`HAr3$d1?Ynh)?c zAswhGsVO1%1-M-nq-K``a}ySaiRfFa@U9D7*v+LyJ1w#ibA8%3R)Kc|A)`9W(Q>Vr zPRb!9fsS;ZoAj6}cTOw+1{8T-u9F+LNa9m!>+W z8;+1mx)L;!-i7ffjA=K3$E+%wXMYv{DBY=wF+M@JPEh?+M|gJ2JVMnWs_w2GqJ=XZ zO;rXr?;lJ)*fM3>svenhLv!wUdf|Awd8#36{v4^6vSplE2j;&HsC with `request_id` and initial `status` + +**Example Steps:** +```javascript +const steps = [ + 'click on search bar', + 'type "laptop" in search input', + 'press Enter key', + 'wait for 2 seconds', + 'click on first result', + 'scroll down to reviews' +]; +``` + +### `getAgenticScraperRequest(apiKey, requestId)` + +Retrieves the status or result of an agentic scraper request. + +**Parameters:** +- `apiKey` (string): Your ScrapeGraph AI API key +- `requestId` (string): The request ID from a previous agentic scraper call + +**Returns:** Promise with: +- `status`: 'pending', 'completed', or 'failed' +- `result`: Automation results (when completed) +- `error`: Error message (when failed) +- `created_at`: Request creation timestamp +- `completed_at`: Completion timestamp (when completed) + +## ๐ŸŽฏ Use Cases + +### 1. **Basic Automation (No AI)** +Perfect for simple automation tasks where you just need the raw HTML/markdown content: +- **Login automation**: Automate login flows and capture the resulting page +- **Form submission**: Fill out forms and get confirmation pages +- **Navigation**: Navigate through multi-step workflows +- **Content scraping**: Get page content after performing actions + +### 2. **AI-Powered Data Extraction** +Ideal when you need structured data from the automated interactions: +- **Dashboard data extraction**: Login and extract user information, metrics, settings +- **E-commerce scraping**: Search products and extract structured product data +- **Form result parsing**: Submit forms and extract confirmation details, reference numbers +- **Content analysis**: Navigate to content and extract key information in structured format + +### 3. **Hybrid Approach** +Use both modes depending on your needs: +- **Development/Testing**: Start with basic mode to test automation steps +- **Production**: Add AI extraction for structured data processing +- **Fallback**: Use basic mode when AI extraction isn't needed + +## ๐Ÿ’ก AI Extraction Examples + +### E-commerce Product Search +```javascript +const steps = [ + 'click on search box', + 'type "wireless headphones" in search', + 'press enter', + 'wait for results to load', + 'scroll down 2 times' +]; + +const schema = { + products: { + type: "array", + items: { + type: "object", + properties: { + name: { type: "string" }, + price: { type: "string" }, + rating: { type: "number" }, + availability: { type: "string" } + } + } + } +}; + +const response = await agenticScraper( + apiKey, + 'https://example-store.com', + steps, + true, + 'Extract product names, prices, ratings, and availability from search results', + schema, + true +); +``` + +### Contact Form with Confirmation +```javascript +const steps = [ + 'type "John Doe" in name field', + 'type "john@example.com" in email field', + 'type "Product inquiry" in subject field', + 'type "I need more information about pricing" in message field', + 'click submit button', + 'wait for confirmation' +]; + +const schema = { + submission: { + type: "object", + properties: { + status: { type: "string" }, + message: { type: "string" }, + reference_number: { type: "string" }, + response_time: { type: "string" } + } + } +}; + +const response = await agenticScraper( + apiKey, + 'https://company.com/contact', + steps, + true, + 'Extract form submission status, confirmation message, and any reference numbers', + schema, + true +); +``` + +### Social Media Data Extraction +```javascript +const steps = [ + 'type "username" in username field', + 'type "password" in password field', + 'click login button', + 'wait for dashboard', + 'click on profile section' +]; + +const schema = { + profile: { + type: "object", + properties: { + username: { type: "string" }, + followers: { type: "number" }, + following: { type: "number" }, + posts: { type: "number" }, + recent_activity: { type: "array", items: { type: "string" } } + } + } +}; + +const response = await agenticScraper( + apiKey, + 'https://social-platform.com/login', + steps, + true, + 'Extract profile information including username, follower counts, and recent activity', + schema, + true +); +``` + +## ๐Ÿ”ง Best Practices + +### When to Use AI Extraction +- โœ… **Use AI extraction when**: You need structured data, specific information extraction, or data validation +- โŒ **Skip AI extraction when**: You just need raw content, testing automation steps, or processing content externally + +### Schema Design Tips +- **Be specific**: Define exact data types and required fields +- **Use descriptions**: Add description fields to guide AI extraction +- **Nested objects**: Use nested schemas for complex data structures +- **Arrays**: Use arrays for lists of similar items (products, comments, etc.) + +### Step Optimization +- **Wait steps**: Add wait steps after actions that trigger loading +- **Specific selectors**: Use specific element descriptions ("click on blue submit button") +- **Sequential actions**: Break complex actions into smaller, specific steps +- **Error handling**: Include steps to handle common UI variations + +### ๐Ÿ” Login Automation +```javascript +const loginSteps = [ + 'click on email input', + 'type "user@example.com" in email field', + 'click on password input', + 'type "password123" in password field', + 'click login button', + 'wait for dashboard to load' +]; + +const response = await agenticScraper(apiKey, 'https://app.example.com/login', loginSteps, true); +``` + +### ๐Ÿ›’ E-commerce Interaction +```javascript +const shoppingSteps = [ + 'click on search bar', + 'type "wireless headphones" in search', + 'press Enter', + 'wait for results to load', + 'click on first product', + 'click add to cart button', + 'click view cart' +]; + +const response = await agenticScraper(apiKey, 'https://shop.example.com', shoppingSteps, true); +``` + +### ๐Ÿ“ Form Submission +```javascript +const formSteps = [ + 'click on name input', + 'type "John Doe" in name field', + 'click on email input', + 'type "john@example.com" in email field', + 'click on message textarea', + 'type "Hello, this is a test message" in message area', + 'click submit button' +]; + +const response = await agenticScraper(apiKey, 'https://example.com/contact', formSteps, false); +``` + +## โšก Advanced Usage + +### Polling for Results +```javascript +async function waitForCompletion(requestId, timeoutSeconds = 120) { + const startTime = Date.now(); + const timeout = timeoutSeconds * 1000; + + while (Date.now() - startTime < timeout) { + const status = await getAgenticScraperRequest(apiKey, requestId); + + if (status.status === 'completed') { + return status.result; + } else if (status.status === 'failed') { + throw new Error(status.error); + } + + await new Promise(resolve => setTimeout(resolve, 5000)); // Wait 5 seconds + } + + throw new Error('Timeout waiting for completion'); +} +``` + +### Error Handling +```javascript +try { + const response = await agenticScraper(apiKey, url, steps, true); + const result = await waitForCompletion(response.request_id); + console.log('Automation successful:', result); +} catch (error) { + if (error.message.includes('validation')) { + console.log('Input validation failed:', error.message); + } else if (error.message.includes('timeout')) { + console.log('Automation timed out'); + } else { + console.log('Automation failed:', error.message); + } +} +``` + +## ๐Ÿ“ Step Syntax + +Steps should be written in natural language describing the action to perform: + +### Clicking Elements +- `"click on login button"` +- `"click on search icon"` +- `"click on first result"` + +### Typing Text +- `"type 'username' in email field"` +- `"type 'password123' in password input"` +- `"type 'search query' in search box"` + +### Keyboard Actions +- `"press Enter key"` +- `"press Tab key"` +- `"press Escape key"` + +### Waiting +- `"wait for 2 seconds"` +- `"wait for page to load"` +- `"wait for results to appear"` + +### Scrolling +- `"scroll down"` +- `"scroll to bottom"` +- `"scroll to top"` + +## ๐Ÿ”ง Best Practices + +1. **Use Session Management**: Set `useSession: true` for multi-step workflows +2. **Add Wait Steps**: Include wait times between actions for reliability +3. **Be Specific**: Use descriptive selectors like "login button" vs "button" +4. **Handle Timeouts**: Implement proper timeout handling for long operations +5. **Validate Inputs**: Check URLs and steps before making requests + +## ๐Ÿšจ Common Errors + +### Input Validation Errors +```javascript +// โŒ Invalid URL +await agenticScraper(apiKey, 'not-a-url', steps); + +// โŒ Empty steps +await agenticScraper(apiKey, url, []); + +// โŒ Invalid step +await agenticScraper(apiKey, url, ['click button', '']); // Empty step +``` + +### Runtime Errors +- **Element not found**: Make steps more specific or add wait times +- **Timeout**: Increase polling timeout or break down complex steps +- **Session expired**: Use session management for multi-step flows + +## ๐ŸŒ cURL Equivalent + +```bash +curl --location 'https://api.scrapegraphai.com/v1/agentic-scrapper' \ +--header 'SGAI-APIKEY: your-api-key' \ +--header 'Content-Type: application/json' \ +--data-raw '{ + "url": "https://dashboard.scrapegraphai.com/", + "use_session": true, + "steps": [ + "Type email@gmail.com in email input box", + "Type test-password@123 in password inputbox", + "click on login" + ] +}' +``` + +## ๐Ÿ“– Examples + +Check out the example files in the `/examples` directory: + +- `agenticScraper_example.js` - Basic usage +- `getAgenticScraperRequest_example.js` - Status checking +- `agenticScraper_complete_example.js` - Complete workflow +- `agenticScraper_advanced_example.js` - Advanced patterns with error handling + +## ๐Ÿ’ก Tips + +- Start with simple steps and gradually add complexity +- Test individual steps before combining them +- Use browser developer tools to identify element selectors +- Consider mobile vs desktop layouts when writing steps +- Monitor request status regularly for long-running automations diff --git a/scrapegraph-js/CODE_OF_CONDUCT.md b/scrapegraph-js/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..10e3c8a --- /dev/null +++ b/scrapegraph-js/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +- Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +- The use of sexualized language or imagery, and sexual attention or + advances of any kind +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email + address, without their explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +mvincig11@gmail.com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/scrapegraph-js/CONTRIBUTING.MD b/scrapegraph-js/CONTRIBUTING.MD new file mode 100644 index 0000000..aab0da0 --- /dev/null +++ b/scrapegraph-js/CONTRIBUTING.MD @@ -0,0 +1,83 @@ +# Contributing to ScrapeGraphAI + +Thank you for your interest in contributing to **ScrapeGraphAI**! We welcome contributions from the community to help improve and grow the project. This document outlines the guidelines and steps for contributing. + +## Table of Contents + +- [Getting Started](#getting-started) +- [Contributing Guidelines](#contributing-guidelines) +- [Code Style](#code-style) +- [Submitting a Pull Request](#submitting-a-pull-request) +- [Reporting Issues](#reporting-issues) +- [License](#license) + +## Getting Started + +To get started with contributing, follow these steps: + +1. Fork the repository on GitHub **(FROM pre/beta branch)**. +2. Clone your forked repository to your local machine. +3. Install the necessary dependencies from requirements.txt or via pyproject.toml as you prefere :). +4. Make your changes or additions. +5. Test your changes thoroughly. +6. Commit your changes with descriptive commit messages. +7. Push your changes to your forked repository. +8. Submit a pull request to the pre/beta branch. + +N.B All the pull request to the main branch will be rejected! + +## Contributing Guidelines + +Please adhere to the following guidelines when contributing to ScrapeGraphAI: + +- Follow the code style and formatting guidelines specified in the [Code Style](#code-style) section. +- Make sure your changes are well-documented and include any necessary updates to the project's documentation and requirements if needed. +- Write clear and concise commit messages that describe the purpose of your changes and the last commit before the pull request has to follow the following format: + - `feat: Add new feature` + - `fix: Correct issue with existing feature` + - `docs: Update documentation` + - `style: Improve formatting and style` + - `refactor: Restructure code` + - `test: Add or update tests` + - `perf: Improve performance` +- Be respectful and considerate towards other contributors and maintainers. + +## Code Style + +Please make sure to format your code accordingly before submitting a pull request. + +### Python + +- [Style Guide for Python Code](https://www.python.org/dev/peps/pep-0008/) +- [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html) +- [The Hitchhiker's Guide to Python](https://docs.python-guide.org/writing/style/) +- [Pylint style of code for the documentation](https://pylint.pycqa.org/en/1.6.0/tutorial.html) + +## Submitting a Pull Request + +To submit your changes for review, please follow these steps: + +1. Ensure that your changes are pushed to your forked repository. +2. Go to the main repository on GitHub and navigate to the "Pull Requests" tab. +3. Click on the "New Pull Request" button. +4. Select your forked repository and the branch containing your changes. +5. Provide a descriptive title and detailed description for your pull request. +6. Reviewers will provide feedback and discuss any necessary changes. +7. Once your pull request is approved, it will be merged into the pre/beta branch. + +## Reporting Issues + +If you encounter any issues or have suggestions for improvements, please open an issue on the GitHub repository. Provide a clear and detailed description of the problem or suggestion, along with any relevant information or steps to reproduce the issue. + +## License + +ScrapeGraphAI is licensed under the **MIT License**. See the [LICENSE](LICENSE) file for more information. +By contributing to this project, you agree to license your contributions under the same license. + +ScrapeGraphAI uses code from the Langchain +frameworks. You find their original licenses below. + +LANGCHAIN LICENSE +https://github.com/langchain-ai/langchain/blob/master/LICENSE + +Can't wait to see your contributions! :smile: diff --git a/scrapegraph-js/PAGINATION.md b/scrapegraph-js/PAGINATION.md new file mode 100644 index 0000000..cf6ba49 --- /dev/null +++ b/scrapegraph-js/PAGINATION.md @@ -0,0 +1,244 @@ +# SmartScraper Pagination + +This document describes the pagination functionality added to the ScrapeGraph JavaScript SDK. + +## Overview + +The `smartScraper` function now supports pagination, allowing you to scrape multiple pages of content in a single request. This is particularly useful for e-commerce sites, search results, news feeds, and other paginated content. + +## Usage + +### Basic Pagination + +```javascript +import { smartScraper } from 'scrapegraph-js'; + +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://example.com/products'; +const prompt = 'Extract all product information'; +const totalPages = 5; // Scrape 5 pages + +const result = await smartScraper(apiKey, url, prompt, null, null, totalPages); +``` + +### Pagination with Schema + +```javascript +import { smartScraper } from 'scrapegraph-js'; +import { z } from 'zod'; + +const ProductSchema = z.object({ + products: z.array(z.object({ + name: z.string(), + price: z.string(), + rating: z.string().optional(), + })), +}); + +const result = await smartScraper( + apiKey, + url, + prompt, + ProductSchema, + null, + 3 // 3 pages +); +``` + +### Pagination with Scrolling + +```javascript +const result = await smartScraper( + apiKey, + url, + prompt, + null, + 10, // 10 scrolls per page + 2 // 2 pages +); +``` + +### All Features Combined + +```javascript +const result = await smartScraper( + apiKey, + url, + prompt, + ProductSchema, + 5, // numberOfScrolls + 3 // totalPages +); +``` + +## Function Signature + +```javascript +smartScraper(apiKey, url, prompt, schema, numberOfScrolls, totalPages) +``` + +### Parameters + +- `apiKey` (string): Your ScrapeGraph AI API key +- `url` (string): The URL of the webpage to scrape +- `prompt` (string): Natural language prompt describing what data to extract +- `schema` (Object, optional): Zod schema object defining the output structure +- `numberOfScrolls` (number, optional): Number of times to scroll the page (0-100) +- `totalPages` (number, optional): Number of pages to scrape (1-10) + +### Parameter Validation + +- `totalPages` must be an integer between 1 and 10 +- `numberOfScrolls` must be an integer between 0 and 100 +- Both parameters are optional and default to `null` + +## Examples + +### E-commerce Product Scraping + +```javascript +import { smartScraper } from 'scrapegraph-js'; +import { z } from 'zod'; + +const ProductSchema = z.object({ + products: z.array(z.object({ + name: z.string(), + price: z.string(), + rating: z.string().optional(), + image_url: z.string().optional(), + })), +}); + +const result = await smartScraper( + process.env.SGAI_APIKEY, + 'https://www.amazon.com/s?k=laptops', + 'Extract all laptop products with name, price, rating, and image', + ProductSchema, + null, + 5 // Scrape 5 pages of results +); +``` + +### News Articles Scraping + +```javascript +const NewsSchema = z.object({ + articles: z.array(z.object({ + title: z.string(), + summary: z.string(), + author: z.string().optional(), + date: z.string().optional(), + })), +}); + +const result = await smartScraper( + process.env.SGAI_APIKEY, + 'https://news.example.com', + 'Extract all news articles with title, summary, author, and date', + NewsSchema, + 3, // Scroll 3 times per page + 4 // Scrape 4 pages +); +``` + +## Error Handling + +The function will throw an error if: +- `totalPages` is not an integer between 1 and 10 +- `numberOfScrolls` is not an integer between 0 and 100 +- API key is invalid +- Network request fails + +```javascript +try { + const result = await smartScraper(apiKey, url, prompt, null, null, totalPages); + console.log('Success:', result); +} catch (error) { + if (error.message.includes('totalPages')) { + console.error('Pagination error:', error.message); + } else { + console.error('Other error:', error.message); + } +} +``` + +## Backward Compatibility + +The pagination feature is fully backward compatible. All existing function calls will continue to work: + +```javascript +// These all work as before +await smartScraper(apiKey, url, prompt); +await smartScraper(apiKey, url, prompt, schema); +await smartScraper(apiKey, url, prompt, schema, numberOfScrolls); +``` + +## Performance Considerations + +- Pagination requests may take significantly longer than single-page requests +- Consider using smaller `totalPages` values for testing +- Some websites may not support pagination +- Rate limiting may apply for large pagination requests + +## Testing + +Run the pagination tests: + +```bash +npm test +``` + +Or run specific examples: + +```bash +node examples/smartScraper_pagination_example.js +node examples/smartScraper_pagination_enhanced_example.js +node examples/smartScraper_pagination_with_scroll_example.js +``` + +## Best Practices + +1. **Start Small**: Begin with 1-2 pages for testing +2. **Use Schemas**: Define clear schemas for structured data extraction +3. **Error Handling**: Always wrap calls in try-catch blocks +4. **Rate Limiting**: Be mindful of API rate limits with large pagination requests +5. **Website Compatibility**: Not all websites support pagination - test thoroughly +6. **Performance**: Monitor request times and adjust parameters accordingly + +## Troubleshooting + +### Common Issues + +1. **Validation Error**: Ensure `totalPages` is between 1-10 +2. **Timeout**: Try reducing `totalPages` or `numberOfScrolls` +3. **No Results**: Some websites may not support pagination +4. **Rate Limiting**: Reduce request frequency or pagination size + +### Debug Tips + +```javascript +console.log('Starting pagination request...'); +console.log('URL:', url); +console.log('Total Pages:', totalPages); +console.log('Number of Scrolls:', numberOfScrolls); + +const startTime = Date.now(); +const result = await smartScraper(apiKey, url, prompt, schema, numberOfScrolls, totalPages); +const duration = Date.now() - startTime; + +console.log('Request completed in:', duration, 'ms'); +console.log('Result type:', typeof result); +``` + +## Support + +For issues or questions about pagination functionality: + +1. Check the examples in the `examples/` directory +2. Run the test suite with `npm test` +3. Review the error messages for specific guidance +4. Check the main SDK documentation + +--- + +*This pagination feature is designed to work with the existing ScrapeGraph AI API and maintains full backward compatibility with existing code.* diff --git a/scrapegraph-js/README.md b/scrapegraph-js/README.md new file mode 100644 index 0000000..2d2e19e --- /dev/null +++ b/scrapegraph-js/README.md @@ -0,0 +1,808 @@ +# ๐ŸŒ ScrapeGraph JavaScript SDK + +[![npm version](https://badge.fury.io/js/scrapegraph-js.svg)](https://badge.fury.io/js/scrapegraph-js) [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://docs.scrapegraphai.com) + +

+ ScrapeGraph API Banner +

+ +Official JavaScript/TypeScript SDK for the ScrapeGraph AI API - Smart web scraping powered by AI. + +## ๐Ÿš€ Features + +- โœจ Smart web scraping with AI +- ๐Ÿ”„ Fully asynchronous design +- ๐Ÿ” Detailed error handling +- โšก Automatic retries and logging +- ๐Ÿ” Secure API authentication +- ๐Ÿ”ง AI-powered schema generation + +## ๐Ÿ“ฆ Installation + +Install the package using npm or yarn: + +```bash +# Using npm +npm i scrapegraph-js + +# Using yarn +yarn add scrapegraph-js +``` + +## ๐Ÿ”ง Quick Start + +> **Note**: Store your API keys securely in environment variables. Use `.env` files and libraries like `dotenv` to load them into your app. + +### Basic Example + +```javascript +import { smartScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +// Initialize variables +const apiKey = process.env.SGAI_APIKEY; // Set your API key as an environment variable +const websiteUrl = 'https://example.com'; +const prompt = 'What does the company do?'; + +(async () => { + try { + const response = await smartScraper(apiKey, websiteUrl, prompt); + console.log(response.result); + } catch (error) { + console.error('Error:', error); + } +})(); +``` + +## ๐ŸŽฏ Examples + +### Scrape - Get HTML Content + +#### Basic Scrape + +```javascript +import { scrape } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const url = 'https://example.com'; + +(async () => { + try { + const response = await scrape(apiKey, url); + console.log('HTML content:', response.html); + console.log('Status:', response.status); + } catch (error) { + console.error('Error:', error); + } +})(); +``` + +#### Scrape with Heavy JavaScript Rendering + +```javascript +import { scrape } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const url = 'https://example.com'; + +(async () => { + try { + const response = await scrape(apiKey, url, { + renderHeavyJs: true + }); + console.log('HTML content with JS rendering:', response.html); + } catch (error) { + console.error('Error:', error); + } +})(); +``` + +#### Scrape with Custom Headers + +```javascript +import { scrape } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const url = 'https://example.com'; + +(async () => { + try { + const response = await scrape(apiKey, url, { + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', + 'Cookie': 'session=123' + } + }); + console.log('HTML content with custom headers:', response.html); + } catch (error) { + console.error('Error:', error); + } +})(); +``` + +#### Get Scrape Request Status + +```javascript +import { getScrapeRequest } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const requestId = 'your-request-id'; + +(async () => { + try { + const response = await getScrapeRequest(apiKey, requestId); + console.log('Request status:', response.status); + if (response.status === 'completed') { + console.log('HTML content:', response.html); + } + } catch (error) { + console.error('Error:', error); + } +})(); +``` + +### Scraping Websites + +#### Basic Scraping + +```javascript +import { smartScraper } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const url = 'https://example.com'; +const prompt = 'Extract the main heading and description.'; + +(async () => { + try { + const response = await smartScraper(apiKey, url, prompt); + console.log(response.result); + } catch (error) { + console.error('Error:', error); + } +})(); +``` + +#### Scraping with Custom Output Schema + +> [!NOTE] +> To use this feature, it is necessary to employ the [Zod](https://www.npmjs.com/package/zod) package for schema creation. + +Here is a real-world example: + +```javascript +import { smartScraper } from 'scrapegraph-js'; +import { z } from 'zod'; + +const apiKey = 'your-api-key'; +const url = 'https://scrapegraphai.com/'; +const prompt = 'What does the company do? and '; + +const schema = z.object({ + title: z.string().describe('The title of the webpage'), + description: z.string().describe('The description of the webpage'), + summary: z.string().describe('A brief summary of the webpage'), +}); + +(async () => { + try { + const response = await smartScraper(apiKey, url, prompt, schema); + console.log(response.result); + } catch (error) { + console.error('Error:', error); + } +})(); +``` + +#### Scraping with Infinite Scrolling + +For websites that load content dynamically through infinite scrolling (like social media feeds), you can use the `numberOfScrolls` parameter: + +```javascript +import { smartScraper } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const url = 'https://example.com/infinite-scroll-page'; +const prompt = 'Extract all the posts from the feed'; +const numberOfScrolls = 10; // Will scroll 10 times to load more content + +(async () => { + try { + const response = await smartScraper(apiKey, url, prompt, null, numberOfScrolls); + console.log('Extracted data from scrolled page:', response); + } catch (error) { + console.error('Error:', error); + } +})(); +``` + +The `numberOfScrolls` parameter accepts values between 0 and 100, allowing you to control how many times the page should be scrolled before extraction. + +#### Scraping with Cookies + +Use cookies for authentication and session management when scraping websites that require login or have user-specific content: + +```javascript +import { smartScraper } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const url = 'https://example.com/dashboard'; +const prompt = 'Extract user profile information'; + +// Define cookies for authentication +const cookies = { + session_id: 'abc123def456', + auth_token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', + user_preferences: 'dark_mode,usd' +}; + +(async () => { + try { + const response = await smartScraper(apiKey, url, prompt, null, null, null, cookies); + console.log(response.result); + } catch (error) { + console.error('Error:', error); + } +})(); +``` + +**Common Use Cases:** +- **E-commerce sites**: User authentication, shopping cart persistence +- **Social media**: Session management, user preferences +- **Banking/Financial**: Secure authentication, transaction history +- **News sites**: User preferences, subscription content +- **API endpoints**: Authentication tokens, API keys + +#### Advanced Scraping with Cookies, Scrolling, and Pagination + +Combine cookies with infinite scrolling and pagination for comprehensive data extraction: + +```javascript +import { smartScraper } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const url = 'https://example.com/feed'; +const prompt = 'Extract all posts from the feed'; +const cookies = { session_token: 'xyz789abc123' }; +const numberOfScrolls = 10; // Scroll 10 times +const totalPages = 5; // Scrape 5 pages + +(async () => { + try { + const response = await smartScraper(apiKey, url, prompt, null, numberOfScrolls, totalPages, cookies); + console.log('Extracted data:', response); + } catch (error) { + console.error('Error:', error); + } +})(); +``` + +### Search Scraping + +Search and extract information from multiple web sources using AI. + +```javascript +import { searchScraper } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const prompt = 'What is the latest version of Python and what are its main features?'; + +(async () => { + try { + const response = await searchScraper(apiKey, prompt); + console.log(response.result); + } catch (error) { + console.error('Error:', error); + } +})(); +``` + +### Crawl API + +Start a crawl job to extract structured data from a website and its linked pages, using a custom schema. + +```javascript +import { crawl, getCrawlRequest } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://scrapegraphai.com/'; +const prompt = 'What does the company do? and I need text content from there privacy and terms'; + +const schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ScrapeGraphAI Website Content", + "type": "object", + "properties": { + "company": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "description": { "type": "string" }, + "features": { "type": "array", "items": { "type": "string" } }, + "contact_email": { "type": "string", "format": "email" }, + "social_links": { + "type": "object", + "properties": { + "github": { "type": "string", "format": "uri" }, + "linkedin": { "type": "string", "format": "uri" }, + "twitter": { "type": "string", "format": "uri" } + }, + "additionalProperties": false + } + }, + "required": ["name", "description"] + }, + "services": { + "type": "array", + "items": { + "type": "object", + "properties": { + "service_name": { "type": "string" }, + "description": { "type": "string" }, + "features": { "type": "array", "items": { "type": "string" } } + }, + "required": ["service_name", "description"] + } + }, + "legal": { + "type": "object", + "properties": { + "privacy_policy": { "type": "string" }, + "terms_of_service": { "type": "string" } + }, + "required": ["privacy_policy", "terms_of_service"] + } + }, + "required": ["company", "services", "legal"] +}; + +(async () => { + try { + // Start the crawl job + const crawlResponse = await crawl(apiKey, url, prompt, schema, { + cacheWebsite: true, + depth: 2, + maxPages: 2, + sameDomainOnly: true, + sitemap: true, // Use sitemap for better page discovery + batchSize: 1, + }); + console.log('Crawl job started. Response:', crawlResponse); + + // If the crawl is asynchronous and returns an ID, fetch the result + const crawlId = crawlResponse.id || crawlResponse.task_id; + if (crawlId) { + for (let i = 0; i < 10; i++) { + await new Promise((resolve) => setTimeout(resolve, 5000)); + const result = await getCrawlRequest(apiKey, crawlId); + if (result.status === 'success' && result.result) { + console.log('Crawl completed. Result:', result.result.llm_result); + break; + } else if (result.status === 'failed') { + console.log('Crawl failed. Result:', result); + break; + } else { + console.log(`Status: ${result.status}, waiting...`); + } + } + } else { + console.log('No crawl ID found in response. Synchronous result:', crawlResponse); + } + } catch (error) { + console.error('Error occurred:', error); + } +})(); +``` + +You can use a plain JSON schema or a [Zod](https://www.npmjs.com/package/zod) schema for the `schema` parameter. The crawl API supports options for crawl depth, max pages, domain restriction, sitemap discovery, and batch size. + +**Sitemap Benefits:** +- Better page discovery using sitemap.xml +- More comprehensive website coverage +- Efficient crawling of structured websites +- Perfect for e-commerce, news sites, and content-heavy websites + +### Scraping local HTML + +Extract structured data from local HTML content + +```javascript +import { localScraper } from 'scrapegraph-js'; + +const apiKey = 'your_api_key'; +const prompt = 'What does the company do?'; + +const websiteHtml = ` + +

Company Name

+

We are a technology company focused on AI solutions.

+
+

Email: contact@example.com

+
+ + `; +(async () => { + try { + const response = await localScraper(apiKey, websiteHtml, prompt); + console.log(response); + } catch (error) { + console.error(error); + } +})(); +``` + +### Markdownify + +Converts a webpage into clean, well-structured markdown format. + +```javascript +import { smartScraper } from 'scrapegraph-js'; + +const apiKey = 'your_api_key'; +const url = 'https://scrapegraphai.com/'; + +(async () => { + try { + const response = await markdownify(apiKey, url); + console.log(response); + } catch (error) { + console.error(error); + } +})(); +``` + +### Sitemap + +Extract all URLs from a website's sitemap. Automatically discovers sitemap from robots.txt or common sitemap locations. + +```javascript +import { sitemap } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const websiteUrl = 'https://example.com'; + +(async () => { + try { + const response = await sitemap(apiKey, websiteUrl); + console.log('Total URLs found:', response.urls.length); + console.log('URLs:', response.urls); + } catch (error) { + console.error('Error:', error); + } +})(); +``` + +### Checking API Credits + +```javascript +import { getCredits } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; + +(async () => { + try { + const credits = await getCredits(apiKey); + console.log('Available credits:', credits); + } catch (error) { + console.error('Error fetching credits:', error); + } +})(); +``` + +### Submitting Feedback + +```javascript +import { sendFeedback } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const requestId = '16a63a80-c87f-4cde-b005-e6c3ecda278b'; +const rating = 5; +const feedbackText = 'This is a test feedback message.'; + +(async () => { + try { + const response = await sendFeedback(apiKey, requestId, rating, feedbackText); + console.log('Feedback response:', response); + } catch (error) { + console.error('Error sending feedback:', error); + } +})(); +``` + +### AI-Powered Schema Generation + +Generate JSON schemas from natural language prompts using AI. This feature helps you create structured data schemas for web scraping and data extraction. + +#### Basic Schema Generation + +```javascript +import { generateSchema } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const prompt = 'Find laptops with specifications like brand, processor, RAM, storage, and price'; + +(async () => { + try { + const response = await generateSchema(prompt, null, { apiKey }); + console.log('Generated schema:', response.generated_schema); + console.log('Request ID:', response.request_id); + } catch (error) { + console.error('Error generating schema:', error); + } +})(); +``` + +#### Modifying Existing Schemas + +```javascript +import { generateSchema } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const existingSchema = { + type: 'object', + properties: { + name: { type: 'string' }, + price: { type: 'number' } + }, + required: ['name', 'price'] +}; + +const modificationPrompt = 'Add brand and rating fields to the existing schema'; + +(async () => { + try { + const response = await generateSchema(modificationPrompt, existingSchema, { apiKey }); + console.log('Modified schema:', response.generated_schema); + } catch (error) { + console.error('Error modifying schema:', error); + } +})(); +``` + +#### Checking Schema Generation Status + +```javascript +import { getSchemaStatus } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const requestId = '123e4567-e89b-12d3-a456-426614174000'; + +(async () => { + try { + const response = await getSchemaStatus(requestId, { apiKey }); + console.log('Status:', response.status); + if (response.status === 'completed') { + console.log('Generated schema:', response.generated_schema); + } + } catch (error) { + console.error('Error checking status:', error); + } +})(); +``` + +#### Polling for Completion with Progress Tracking + +```javascript +import { pollSchemaGeneration } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const requestId = '123e4567-e89b-12d3-a456-426614174000'; + +(async () => { + try { + const finalResult = await pollSchemaGeneration(requestId, { + apiKey, + maxAttempts: 15, + delay: 3000, + onProgress: ({ attempt, maxAttempts, status, response }) => { + if (status === 'checking') { + console.log(`Checking status... (${attempt}/${maxAttempts})`); + } else { + console.log(`Status: ${status} (${attempt}/${maxAttempts})`); + } + } + }); + + console.log('Schema generation completed!'); + console.log('Final schema:', finalResult.generated_schema); + } catch (error) { + console.error('Error during polling:', error); + } +})(); +``` + +## ๐Ÿ”ง Available Functions + +### Scrape + +#### `scrape(apiKey, url, options)` + +Converts a webpage into HTML format with optional JavaScript rendering. + +**Parameters:** +- `apiKey` (string): Your ScrapeGraph AI API key +- `url` (string): The URL of the webpage to convert +- `options` (object, optional): Configuration options + - `renderHeavyJs` (boolean, optional): Whether to render heavy JavaScript (default: false) + - `headers` (object, optional): Custom headers to send with the request + +**Returns:** Promise that resolves to an object containing: +- `html`: The HTML content of the webpage +- `status`: Request status ('completed', 'processing', 'failed') +- `scrape_request_id`: Unique identifier for the request +- `error`: Error message if the request failed + +**Example:** +```javascript +const response = await scrape(apiKey, 'https://example.com', { + renderHeavyJs: true, + headers: { 'User-Agent': 'Custom Agent' } +}); +``` + +#### `getScrapeRequest(apiKey, requestId)` + +Retrieves the status or result of a previous scrape request. + +**Parameters:** +- `apiKey` (string): Your ScrapeGraph AI API key +- `requestId` (string): The unique identifier for the scrape request + +**Returns:** Promise that resolves to the request result object. + +**Example:** +```javascript +const result = await getScrapeRequest(apiKey, 'request-id-here'); +``` + +### Smart Scraper + +#### `smartScraper(apiKey, url, prompt, schema, numberOfScrolls, totalPages, cookies)` + +Extracts structured data from websites using AI-powered scraping. + +**Parameters:** +- `apiKey` (string): Your ScrapeGraph AI API key +- `url` (string): The URL of the website to scrape +- `prompt` (string): Natural language prompt describing what to extract +- `schema` (object, optional): Zod schema for structured output +- `numberOfScrolls` (number, optional): Number of scrolls for infinite scroll pages +- `totalPages` (number, optional): Number of pages to scrape +- `cookies` (object, optional): Cookies for authentication + +### Search Scraper + +#### `searchScraper(apiKey, prompt, url, numResults, headers, outputSchema)` + +Searches and extracts information from multiple web sources using AI. + +### Crawl API + +#### `crawl(apiKey, url, prompt, dataSchema, options)` + +Starts a crawl job to extract structured data from a website and its linked pages. + +**Parameters:** +- `apiKey` (string): Your ScrapeGraph AI API key +- `url` (string): The starting URL for the crawl +- `prompt` (string): AI prompt to guide data extraction (required for AI mode) +- `dataSchema` (object): JSON schema defining extracted data structure (required for AI mode) +- `options` (object): Optional crawl parameters + - `extractionMode` (boolean, default: true): true for AI extraction, false for markdown conversion + - `cacheWebsite` (boolean, default: true): Whether to cache website content + - `depth` (number, default: 2): Maximum crawl depth (1-10) + - `maxPages` (number, default: 2): Maximum pages to crawl (1-100) + - `sameDomainOnly` (boolean, default: true): Only crawl pages from the same domain + - `sitemap` (boolean, default: false): Use sitemap.xml for better page discovery + - `batchSize` (number, default: 1): Batch size for processing pages (1-10) + - `renderHeavyJs` (boolean, default: false): Whether to render heavy JavaScript + +**Sitemap Benefits:** +- Better page discovery using sitemap.xml +- More comprehensive website coverage +- Efficient crawling of structured websites +- Perfect for e-commerce, news sites, and content-heavy websites + +### Markdownify + +#### `markdownify(apiKey, url, headers)` + +Converts a webpage into clean, well-structured markdown format. + +### Sitemap + +#### `sitemap(apiKey, websiteUrl, options)` + +Extracts all URLs from a website's sitemap. Automatically discovers sitemap from robots.txt or common sitemap locations. + +**Parameters:** +- `apiKey` (string): Your ScrapeGraph AI API key +- `websiteUrl` (string): The URL of the website to extract sitemap from +- `options` (object, optional): Additional options + - `mock` (boolean): Override mock mode for this request + +**Returns:** Promise resolving to an object containing: +- `urls` (array): List of URLs extracted from the sitemap + +### Agentic Scraper + +#### `agenticScraper(apiKey, url, steps, useSession, userPrompt, outputSchema, aiExtraction)` + +Performs automated actions on webpages using step-by-step instructions. + +### Utility Functions + +#### `getCredits(apiKey)` + +Retrieves your current credit balance and usage statistics. + +#### `sendFeedback(apiKey, requestId, rating, feedbackText)` + +Submits feedback for a specific request. + +## ๐Ÿ“š Documentation + +For detailed documentation, visit [docs.scrapegraphai.com](https://docs.scrapegraphai.com) + +## ๐Ÿ› ๏ธ Development + +### Setup + +1. Clone the repository: + + ```bash + git clone https://github.com/ScrapeGraphAI/scrapegraph-sdk.git + cd scrapegraph-sdk/scrapegraph-js + ``` + +2. Install dependencies: + + ```bash + npm install + ``` + +3. Run linting and testing: + ```bash + npm run lint + npm test + ``` + +### Running Tests + +```bash +# Run all tests +npm test + +# Run tests with coverage +npm run test:coverage +``` + +## ๐Ÿ“ License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## ๐Ÿค Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change. + +1. Fork the repository +2. Create your feature branch (`git checkout -b feature/AmazingFeature`) +3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) +4. Push to the branch (`git push origin feature/AmazingFeature`) +5. Open a Pull Request + +## ๐Ÿ”— Links + +- [Website](https://scrapegraphai.com) +- [Documentation](https://docs.scrapegraphai.com) +- [GitHub](https://github.com/ScrapeGraphAI/scrapegraph-sdk) + +## ๐Ÿ’ฌ Support + +- ๐Ÿ“ง Email: support@scrapegraphai.com +- ๐Ÿ’ป GitHub Issues: [Create an issue](https://github.com/ScrapeGraphAI/scrapegraph-sdk/issues) +- ๐ŸŒŸ Feature Requests: [Request a feature](https://github.com/ScrapeGraphAI/scrapegraph-sdk/issues/new) + +--- + +Made with โค๏ธ by [ScrapeGraph AI](https://scrapegraphai.com) diff --git a/scrapegraph-js/eslint.config.js b/scrapegraph-js/eslint.config.js new file mode 100644 index 0000000..a3fe107 --- /dev/null +++ b/scrapegraph-js/eslint.config.js @@ -0,0 +1,11 @@ +import globals from 'globals'; +import pluginJs from '@eslint/js'; +import eslintPluginPrettierRecommended from 'eslint-plugin-prettier/recommended'; + +/** @type {import('eslint').Linter.Config[]} */ +export default [ + { languageOptions: { globals: { ...globals.browser, ...globals.node } } }, + pluginJs.configs.recommended, + eslintPluginPrettierRecommended, + { ignores: ['node_modules/'] }, +]; diff --git a/scrapegraph-js/examples/.env.example b/scrapegraph-js/examples/.env.example new file mode 100644 index 0000000..381b291 --- /dev/null +++ b/scrapegraph-js/examples/.env.example @@ -0,0 +1,2 @@ +# ScrapegraphAI API Key +SGAI_APIKEY="your ScrapegraphAI API Key" diff --git a/scrapegraph-js/examples/advanced_features/cookies/cookies_integration_example.js b/scrapegraph-js/examples/advanced_features/cookies/cookies_integration_example.js new file mode 100644 index 0000000..7d54f49 --- /dev/null +++ b/scrapegraph-js/examples/advanced_features/cookies/cookies_integration_example.js @@ -0,0 +1,261 @@ +/** + * Comprehensive example demonstrating cookies integration for web scraping. + * + * This example shows various real-world scenarios where cookies are essential: + * 1. E-commerce site scraping with authentication + * 2. Social media scraping with session cookies + * 3. Banking/financial site scraping with secure cookies + * 4. News site scraping with user preferences + * 5. API endpoint scraping with authentication tokens + * + * Requirements: + * - Node.js 16+ + * - scrapegraph-js + * - A .env file with your SGAI_APIKEY + * + * Example .env file: + * SGAI_APIKEY=your_api_key_here + */ + +import { smartScraper } from 'scrapegraph-js'; +import { z } from 'zod'; +import 'dotenv/config'; + +// Define data schemas for different scenarios +const ProductInfoSchema = z.object({ + name: z.string().describe('Product name'), + price: z.string().describe('Product price'), + availability: z.string().describe('Product availability status'), + rating: z.string().optional().describe('Product rating') +}); + +const SocialMediaPostSchema = z.object({ + author: z.string().describe('Post author'), + content: z.string().describe('Post content'), + likes: z.string().optional().describe('Number of likes'), + comments: z.string().optional().describe('Number of comments'), + timestamp: z.string().optional().describe('Post timestamp') +}); + +const NewsArticleSchema = z.object({ + title: z.string().describe('Article title'), + summary: z.string().describe('Article summary'), + author: z.string().optional().describe('Article author'), + publish_date: z.string().optional().describe('Publish date') +}); + +const BankTransactionSchema = z.object({ + date: z.string().describe('Transaction date'), + description: z.string().describe('Transaction description'), + amount: z.string().describe('Transaction amount'), + type: z.string().describe('Transaction type (credit/debit)') +}); + +async function scrapeEcommerceWithAuth() { + console.log('='.repeat(60)); + console.log('E-COMMERCE SITE SCRAPING WITH AUTHENTICATION'); + console.log('='.repeat(60)); + + // Example cookies for an e-commerce site + const cookies = { + session_id: 'abc123def456', + user_id: 'user789', + cart_id: 'cart101112', + preferences: 'dark_mode,usd', + auth_token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...' + }; + + const websiteUrl = 'https://example-ecommerce.com/products'; + const userPrompt = 'Extract product information including name, price, availability, and rating'; + + try { + const response = await smartScraper( + process.env.SGAI_APIKEY, + websiteUrl, + userPrompt, + ProductInfoSchema, + 5, // numberOfScrolls - Scroll to load more products + null, // totalPages + cookies + ); + + console.log('โœ… E-commerce scraping completed successfully'); + console.log(JSON.stringify(response, null, 2)); + + } catch (error) { + console.error(`โŒ Error in e-commerce scraping: ${error.message}`); + } +} + +async function scrapeSocialMediaWithSession() { + console.log('\n' + '='.repeat(60)); + console.log('SOCIAL MEDIA SCRAPING WITH SESSION COOKIES'); + console.log('='.repeat(60)); + + // Example cookies for a social media site + const cookies = { + session_token: 'xyz789abc123', + user_session: 'def456ghi789', + csrf_token: 'jkl012mno345', + remember_me: 'true', + language: 'en_US' + }; + + const websiteUrl = 'https://example-social.com/feed'; + const userPrompt = 'Extract posts from the feed including author, content, likes, and comments'; + + try { + const response = await smartScraper( + process.env.SGAI_APIKEY, + websiteUrl, + userPrompt, + SocialMediaPostSchema, + 10, // numberOfScrolls - Scroll to load more posts + null, // totalPages + cookies + ); + + console.log('โœ… Social media scraping completed successfully'); + console.log(JSON.stringify(response, null, 2)); + + } catch (error) { + console.error(`โŒ Error in social media scraping: ${error.message}`); + } +} + +async function scrapeNewsWithPreferences() { + console.log('\n' + '='.repeat(60)); + console.log('NEWS SITE SCRAPING WITH USER PREFERENCES'); + console.log('='.repeat(60)); + + // Example cookies for a news site + const cookies = { + user_preferences: 'technology,science,ai', + reading_level: 'advanced', + region: 'US', + subscription_tier: 'premium', + theme: 'dark' + }; + + const websiteUrl = 'https://example-news.com/technology'; + const userPrompt = 'Extract news articles including title, summary, author, and publish date'; + + try { + const response = await smartScraper( + process.env.SGAI_APIKEY, + websiteUrl, + userPrompt, + NewsArticleSchema, + null, // numberOfScrolls + 3, // totalPages - Scrape multiple pages + cookies + ); + + console.log('โœ… News scraping completed successfully'); + console.log(JSON.stringify(response, null, 2)); + + } catch (error) { + console.error(`โŒ Error in news scraping: ${error.message}`); + } +} + +async function scrapeBankingWithSecureCookies() { + console.log('\n' + '='.repeat(60)); + console.log('BANKING SITE SCRAPING WITH SECURE COOKIES'); + console.log('='.repeat(60)); + + // Example secure cookies for a banking site + const cookies = { + secure_session: 'pqr678stu901', + auth_token: 'vwx234yz567', + mfa_verified: 'true', + device_id: 'device_abc123', + last_activity: '2024-01-15T10:30:00Z' + }; + + const websiteUrl = 'https://example-bank.com/transactions'; + const userPrompt = 'Extract recent transactions including date, description, amount, and type'; + + try { + const response = await smartScraper( + process.env.SGAI_APIKEY, + websiteUrl, + userPrompt, + BankTransactionSchema, + null, // numberOfScrolls + 5, // totalPages - Scrape multiple pages of transactions + cookies + ); + + console.log('โœ… Banking scraping completed successfully'); + console.log(JSON.stringify(response, null, 2)); + + } catch (error) { + console.error(`โŒ Error in banking scraping: ${error.message}`); + } +} + +async function scrapeApiWithAuthTokens() { + console.log('\n' + '='.repeat(60)); + console.log('API ENDPOINT SCRAPING WITH AUTH TOKENS'); + console.log('='.repeat(60)); + + // Example API authentication cookies + const cookies = { + api_token: 'api_abc123def456', + client_id: 'client_789', + access_token: 'access_xyz789', + refresh_token: 'refresh_abc123', + scope: 'read:all' + }; + + const websiteUrl = 'https://api.example.com/data'; + const userPrompt = 'Extract data from the API response'; + + try { + const response = await smartScraper( + process.env.SGAI_APIKEY, + websiteUrl, + userPrompt, + null, // No schema for generic API response + null, // numberOfScrolls + null, // totalPages + cookies + ); + + console.log('โœ… API scraping completed successfully'); + console.log(JSON.stringify(response, null, 2)); + + } catch (error) { + console.error(`โŒ Error in API scraping: ${error.message}`); + } +} + +async function main() { + const apiKey = process.env.SGAI_APIKEY; + + // Check if API key is available + if (!apiKey) { + console.error('Error: SGAI_APIKEY not found in .env file'); + console.log('Please create a .env file with your API key:'); + console.log('SGAI_APIKEY=your_api_key_here'); + return; + } + + console.log('๐Ÿช COOKIES INTEGRATION EXAMPLES'); + console.log('This demonstrates various real-world scenarios where cookies are essential for web scraping.'); + + // Run all examples + await scrapeEcommerceWithAuth(); + await scrapeSocialMediaWithSession(); + await scrapeNewsWithPreferences(); + await scrapeBankingWithSecureCookies(); + await scrapeApiWithAuthTokens(); + + console.log('\n' + '='.repeat(60)); + console.log('โœ… All examples completed!'); + console.log('='.repeat(60)); +} + +// Run the example +main().catch(console.error); diff --git a/scrapegraph-js/examples/advanced_features/mock/mock_mode_example.js b/scrapegraph-js/examples/advanced_features/mock/mock_mode_example.js new file mode 100644 index 0000000..2eda6be --- /dev/null +++ b/scrapegraph-js/examples/advanced_features/mock/mock_mode_example.js @@ -0,0 +1,297 @@ +/** + * Example demonstrating how to use the ScrapeGraph AI SDK in mock mode. + * + * This example shows how to: + * 1. Enable mock mode globally or per-request + * 2. Use custom mock responses + * 3. Use custom mock handlers + * 4. Test different endpoints in mock mode + * 5. Demonstrate environment variable activation + * + * Requirements: + * - Node.js 16+ + * - scrapegraph-js + * + * Usage: + * node mock_mode_example.js + * + * Or with environment variable: + * SGAI_MOCK=1 node mock_mode_example.js + */ + +import { + scrape, + getScrapeRequest, + smartScraper, + getSmartScraperRequest, + searchScraper, + getSearchScraperRequest, + markdownify, + getMarkdownifyRequest, + crawl, + getCrawlRequest, + agenticScraper, + getAgenticScraperRequest, + getCredits, + sendFeedback +} from '../index.js'; + +import { + initMockConfig, + enableMock, + disableMock, + setMockResponses, + setMockHandler +} from '../src/utils/mockConfig.js'; + +// Configuration +const API_KEY = process.env.SGAI_API_KEY || 'sgai-00000000-0000-0000-0000-000000000000'; + +/** + * Basic mock mode usage demonstration + */ +async function basicMockUsage() { + console.log('\n=== Basic Mock Usage ==='); + + // Enable mock mode globally + enableMock(); + + try { + // Test scrape endpoint + console.log('\n-- Testing scrape endpoint --'); + const scrapeResult = await scrape(API_KEY, 'https://example.com', { renderHeavyJs: true }); + console.log('Scrape result:', scrapeResult); + + // Test getScrapeRequest endpoint + console.log('\n-- Testing getScrapeRequest endpoint --'); + const scrapeStatus = await getScrapeRequest(API_KEY, 'mock-request-id'); + console.log('Scrape status:', scrapeStatus); + + // Test smartScraper endpoint + console.log('\n-- Testing smartScraper endpoint --'); + const smartResult = await smartScraper(API_KEY, 'https://example.com', 'Extract the title'); + console.log('SmartScraper result:', smartResult); + + // Test getCredits endpoint + console.log('\n-- Testing getCredits endpoint --'); + const credits = await getCredits(API_KEY); + console.log('Credits:', credits); + + // Test sendFeedback endpoint + console.log('\n-- Testing sendFeedback endpoint --'); + const feedback = await sendFeedback(API_KEY, 'mock-request-id', 5, 'Great service!'); + console.log('Feedback result:', feedback); + + } catch (error) { + console.error('Error in basic mock usage:', error.message); + } +} + +/** + * Mock mode with custom responses + */ +async function mockWithCustomResponses() { + console.log('\n=== Mock Mode with Custom Responses ==='); + + // Set custom responses for specific endpoints + setMockResponses({ + '/v1/credits': { + remaining_credits: 42, + total_credits_used: 58, + custom_field: 'This is a custom response' + }, + '/v1/smartscraper': () => ({ + request_id: 'custom-mock-request-id', + custom_data: 'Generated by custom function' + }) + }); + + try { + // Test credits with custom response + console.log('\n-- Testing credits with custom response --'); + const credits = await getCredits(API_KEY); + console.log('Custom credits:', credits); + + // Test smartScraper with custom response + console.log('\n-- Testing smartScraper with custom response --'); + const smartResult = await smartScraper(API_KEY, 'https://example.com', 'Extract data'); + console.log('Custom smartScraper result:', smartResult); + + } catch (error) { + console.error('Error in custom responses:', error.message); + } +} + +/** + * Mock mode with custom handler + */ +async function mockWithCustomHandler() { + console.log('\n=== Mock Mode with Custom Handler ==='); + + // Set a custom handler that overrides all responses + setMockHandler((method, url) => { + return { + custom_handler: true, + method: method, + url: url, + timestamp: new Date().toISOString(), + message: 'This response was generated by a custom handler' + }; + }); + + try { + // Test various endpoints with custom handler + console.log('\n-- Testing with custom handler --'); + + const scrapeResult = await scrape(API_KEY, 'https://example.com'); + console.log('Scrape with custom handler:', scrapeResult); + + const smartResult = await smartScraper(API_KEY, 'https://example.com', 'Test prompt'); + console.log('SmartScraper with custom handler:', smartResult); + + const credits = await getCredits(API_KEY); + console.log('Credits with custom handler:', credits); + + } catch (error) { + console.error('Error in custom handler:', error.message); + } +} + +/** + * Per-request mock mode (without global enable) + */ +async function perRequestMockMode() { + console.log('\n=== Per-Request Mock Mode ==='); + + // Disable global mock mode + disableMock(); + + try { + // Test individual requests with mock enabled + console.log('\n-- Testing per-request mock mode --'); + + const scrapeResult = await scrape(API_KEY, 'https://example.com', { mock: true }); + console.log('Per-request mock scrape:', scrapeResult); + + const smartResult = await smartScraper(API_KEY, 'https://example.com', 'Test', null, null, null, null, { mock: true }); + console.log('Per-request mock smartScraper:', smartResult); + + const scrapeStatus = await getScrapeRequest(API_KEY, 'test-id', { mock: true }); + console.log('Per-request mock getScrapeRequest:', scrapeStatus); + + } catch (error) { + console.error('Error in per-request mock mode:', error.message); + } +} + +/** + * Test all available endpoints in mock mode + */ +async function testAllEndpoints() { + console.log('\n=== Testing All Endpoints in Mock Mode ==='); + + enableMock(); + + try { + // Test all available endpoints + console.log('\n-- Testing all endpoints --'); + + // Scrape endpoints + const scrapeResult = await scrape(API_KEY, 'https://example.com'); + console.log('Scrape:', scrapeResult.request_id ? 'โœ…' : 'โŒ'); + + const scrapeStatus = await getScrapeRequest(API_KEY, 'mock-id'); + console.log('GetScrapeRequest:', scrapeStatus.status ? 'โœ…' : 'โŒ'); + + // SmartScraper endpoints + const smartResult = await smartScraper(API_KEY, 'https://example.com', 'Extract title'); + console.log('SmartScraper:', smartResult.request_id ? 'โœ…' : 'โŒ'); + + const smartStatus = await getSmartScraperRequest(API_KEY, 'mock-id'); + console.log('GetSmartScraperRequest:', smartStatus.status ? 'โœ…' : 'โŒ'); + + // SearchScraper endpoints + const searchResult = await searchScraper(API_KEY, 'Search for information'); + console.log('SearchScraper:', searchResult.request_id ? 'โœ…' : 'โŒ'); + + const searchStatus = await getSearchScraperRequest(API_KEY, 'mock-id'); + console.log('GetSearchScraperRequest:', searchStatus.status ? 'โœ…' : 'โŒ'); + + // Markdownify endpoints + const markdownResult = await markdownify(API_KEY, 'https://example.com'); + console.log('Markdownify:', markdownResult.request_id ? 'โœ…' : 'โŒ'); + + const markdownStatus = await getMarkdownifyRequest(API_KEY, 'mock-id'); + console.log('GetMarkdownifyRequest:', markdownStatus.status ? 'โœ…' : 'โŒ'); + + // Crawl endpoints + const crawlResult = await crawl(API_KEY, 'https://example.com'); + console.log('Crawl:', crawlResult.crawl_id ? 'โœ…' : 'โŒ'); + + const crawlStatus = await getCrawlRequest(API_KEY, 'mock-id'); + console.log('GetCrawlRequest:', crawlStatus.status ? 'โœ…' : 'โŒ'); + + // AgenticScraper endpoints + const agenticResult = await agenticScraper(API_KEY, 'https://example.com', ['click button']); + console.log('AgenticScraper:', agenticResult.request_id ? 'โœ…' : 'โŒ'); + + const agenticStatus = await getAgenticScraperRequest(API_KEY, 'mock-id'); + console.log('GetAgenticScraperRequest:', agenticStatus.status ? 'โœ…' : 'โŒ'); + + // Utility endpoints + const credits = await getCredits(API_KEY); + console.log('GetCredits:', credits.remaining_credits ? 'โœ…' : 'โŒ'); + + const feedback = await sendFeedback(API_KEY, 'mock-id', 5, 'Great!'); + console.log('SendFeedback:', feedback.status ? 'โœ…' : 'โŒ'); + + } catch (error) { + console.error('Error testing endpoints:', error.message); + } +} + +/** + * Environment variable activation test + */ +async function testEnvironmentActivation() { + console.log('\n=== Environment Variable Activation Test ==='); + + console.log('Current SGAI_MOCK value:', process.env.SGAI_MOCK || 'not set'); + + // Reinitialize mock config to check environment + initMockConfig(); + + try { + const credits = await getCredits(API_KEY); + console.log('Credits with env check:', credits); + } catch (error) { + console.error('Error in environment test:', error.message); + } +} + +/** + * Main function to run all examples + */ +async function main() { + console.log('๐Ÿงช ScrapeGraph AI SDK - Mock Mode Examples'); + console.log('=========================================='); + + try { + await basicMockUsage(); + await mockWithCustomResponses(); + await mockWithCustomHandler(); + await perRequestMockMode(); + await testAllEndpoints(); + await testEnvironmentActivation(); + + console.log('\nโœ… All mock mode examples completed successfully!'); + + } catch (error) { + console.error('\nโŒ Error running examples:', error.message); + } +} + +// Run the examples +if (import.meta.url === `file://${process.argv[1]}`) { + main(); +} diff --git a/scrapegraph-js/examples/agenticscraper/agenticScraper_advanced_example.js b/scrapegraph-js/examples/agenticscraper/agenticScraper_advanced_example.js new file mode 100644 index 0000000..bc5eab1 --- /dev/null +++ b/scrapegraph-js/examples/agenticscraper/agenticScraper_advanced_example.js @@ -0,0 +1,322 @@ +import { agenticScraper, getAgenticScraperRequest } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; + +/** + * Advanced example with input validation and error handling + */ +async function advancedAgenticScrapingExample() { + console.log('๐Ÿš€ Advanced Agentic Scraping Example'); + console.log('=' * 45); + + // Example configurations for different scenarios + const scenarios = [ + { + name: 'Social Media Login (No AI)', + url: 'https://twitter.com/login', + steps: [ + 'click on email input field', + 'type "user@example.com" in email field', + 'click on password input field', + 'type "password123" in password field', + 'click login button', + 'wait for 3 seconds' + ], + useSession: true, + aiExtraction: false + }, + { + name: 'Form Submission with AI Extraction', + url: 'https://example.com/contact', + steps: [ + 'click on name input', + 'type "John Doe" in name field', + 'click on email input', + 'type "john@example.com" in email field', + 'click on message textarea', + 'type "Hello, this is a test message" in message field', + 'click submit button', + 'wait for confirmation message' + ], + useSession: false, + aiExtraction: true, + userPrompt: 'Extract the form submission result, confirmation message, and any reference numbers provided', + outputSchema: { + submission: { + type: "object", + properties: { + status: { type: "string" }, + message: { type: "string" }, + reference_id: { type: "string" } + }, + required: ["status", "message"] + } + } + }, + { + name: 'E-commerce Search with Product Extraction', + url: 'https://example-store.com', + steps: [ + 'wait for page to load', + 'click on search bar', + 'type "wireless headphones" in search', + 'press Enter key', + 'wait for 2 seconds', + 'click on filter button', + 'select price range $50-$100', + 'click apply filters', + 'scroll down to see more products' + ], + useSession: true, + aiExtraction: true, + userPrompt: 'Extract product information including names, prices, ratings, and availability from the search results', + outputSchema: { + search_results: { + type: "object", + properties: { + products: { + type: "array", + items: { + type: "object", + properties: { + name: { type: "string" }, + price: { type: "string" }, + rating: { type: "number" }, + availability: { type: "string" } + } + } + }, + total_results: { type: "number" }, + current_page: { type: "number" } + } + } + } + } + ]; + + // Run a specific scenario (change index to test different ones) + const scenario = scenarios[0]; // Social Media Login + + try { + console.log(`\n๐Ÿ“‹ Running Scenario: ${scenario.name}`); + console.log(`URL: ${scenario.url}`); + console.log(`Steps: ${scenario.steps.length} automation actions`); + console.log(`Use Session: ${scenario.useSession}`); + console.log(`AI Extraction: ${scenario.aiExtraction}`); + if (scenario.aiExtraction) { + console.log(`User Prompt: ${scenario.userPrompt}`); + console.log(`Output Schema: ${scenario.outputSchema ? 'Provided' : 'None'}`); + } + + // Validate inputs before making the request + validateInputs(scenario.url, scenario.steps); + + console.log('\nโœ… Input validation passed'); + console.log('๐Ÿš€ Submitting agentic scraper request...'); + + const response = await agenticScraper( + apiKey, + scenario.url, + scenario.steps, + scenario.useSession, + scenario.userPrompt || null, + scenario.outputSchema || null, + scenario.aiExtraction || false + ); + + console.log('โœ… Request submitted successfully!'); + console.log(`Request ID: ${response.request_id}`); + console.log(`Status: ${response.status}`); + + // Monitor the request with timeout + const result = await monitorRequest(response.request_id, 120); // 2 minute timeout + + console.log('\n๐ŸŽ‰ Automation completed!'); + + if (scenario.aiExtraction && result.result) { + console.log('๐ŸŽฏ Extracted Structured Data:'); + console.log(JSON.stringify(result.result, null, 2)); + } else if (result.markdown) { + console.log('๐Ÿ“„ Raw Content (markdown):'); + const preview = result.markdown.length > 500 + ? result.markdown.substring(0, 500) + '...' + : result.markdown; + console.log(preview); + } else { + console.log('Final Result:', JSON.stringify(result.result, null, 2)); + } + + return result; + + } catch (error) { + console.error(`\nโŒ Error in ${scenario.name}:`, error.message); + + // Provide helpful error context + if (error.message.includes('validation')) { + console.log('\n๐Ÿ’ก Validation Tips:'); + console.log('- Ensure URL starts with http:// or https://'); + console.log('- Make sure all steps are non-empty strings'); + console.log('- Check that the steps array is not empty'); + } else if (error.message.includes('timeout')) { + console.log('\n๐Ÿ’ก Timeout Tips:'); + console.log('- Complex automations may take longer'); + console.log('- Consider breaking down into smaller steps'); + console.log('- Check if the target website is responsive'); + } + + throw error; + } +} + +/** + * Input validation function + */ +function validateInputs(url, steps) { + // Validate URL + if (!url || typeof url !== 'string') { + throw new Error('validation: URL must be a non-empty string'); + } + + if (!url.startsWith('http://') && !url.startsWith('https://')) { + throw new Error('validation: URL must start with http:// or https://'); + } + + // Validate steps + if (!Array.isArray(steps) || steps.length === 0) { + throw new Error('validation: Steps must be a non-empty array'); + } + + steps.forEach((step, index) => { + if (!step || typeof step !== 'string' || !step.trim()) { + throw new Error(`validation: Step ${index + 1} must be a non-empty string`); + } + }); + + console.log(`โœ… Validated URL and ${steps.length} steps`); +} + +/** + * Monitor request with timeout and progress updates + */ +async function monitorRequest(requestId, timeoutSeconds = 120) { + const startTime = Date.now(); + const timeoutMs = timeoutSeconds * 1000; + let attempts = 0; + + console.log(`\n๐Ÿ”„ Monitoring request ${requestId}`); + console.log(`Timeout: ${timeoutSeconds} seconds`); + + while (Date.now() - startTime < timeoutMs) { + attempts++; + const elapsed = Math.round((Date.now() - startTime) / 1000); + + try { + console.log(`\nโณ Check ${attempts} (${elapsed}s elapsed)`); + + const status = await getAgenticScraperRequest(apiKey, requestId); + console.log(`Status: ${status.status}`); + + if (status.status === 'completed') { + const totalTime = Math.round((Date.now() - startTime) / 1000); + console.log(`โœ… Completed in ${totalTime} seconds`); + return status; + } else if (status.status === 'failed') { + throw new Error(`Automation failed: ${status.error}`); + } + + // Wait before next check (progressive backoff) + const waitTime = Math.min(5000 + (attempts * 1000), 15000); // 5-15 seconds + console.log(`โธ๏ธ Waiting ${waitTime/1000}s before next check...`); + await sleep(waitTime); + + } catch (error) { + if (error.message.includes('Automation failed')) { + throw error; + } + console.log(`โš ๏ธ Check failed: ${error.message}`); + await sleep(5000); + } + } + + throw new Error(`timeout: Request did not complete within ${timeoutSeconds} seconds`); +} + +/** + * Demonstrate error handling scenarios + */ +async function errorHandlingExamples() { + console.log('\n๐Ÿ›ก๏ธ Error Handling Examples'); + console.log('=' * 30); + + const errorScenarios = [ + { + name: 'Invalid URL', + url: 'not-a-valid-url', + steps: ['click button'], + expectedError: 'URL must start with' + }, + { + name: 'Empty Steps', + url: 'https://example.com', + steps: [], + expectedError: 'non-empty array' + }, + { + name: 'Invalid Step', + url: 'https://example.com', + steps: ['valid step', '', 'another valid step'], + expectedError: 'non-empty string' + } + ]; + + for (const scenario of errorScenarios) { + try { + console.log(`\n๐Ÿงช Testing: ${scenario.name}`); + await agenticScraper(apiKey, scenario.url, scenario.steps); + console.log('โŒ Expected error but request succeeded'); + } catch (error) { + if (error.message.includes(scenario.expectedError)) { + console.log(`โœ… Correctly caught error: ${error.message}`); + } else { + console.log(`โš ๏ธ Unexpected error: ${error.message}`); + } + } + } +} + +/** + * Utility function + */ +function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +/** + * Main execution + */ +async function main() { + if (!apiKey) { + console.error('โŒ Error: SGAI_APIKEY environment variable not set'); + console.log('\nPlease create a .env file with:'); + console.log('SGAI_APIKEY=your-api-key-here'); + process.exit(1); + } + + try { + // Run the advanced example + await advancedAgenticScrapingExample(); + + // Uncomment to test error handling + // await errorHandlingExamples(); + + console.log('\nโœจ Advanced example completed successfully!'); + + } catch (error) { + console.error('\n๐Ÿ’ฅ Advanced example failed:', error.message); + process.exit(1); + } +} + +// Run the advanced example +main(); diff --git a/scrapegraph-js/examples/agenticscraper/agenticScraper_complete_example.js b/scrapegraph-js/examples/agenticscraper/agenticScraper_complete_example.js new file mode 100644 index 0000000..e848207 --- /dev/null +++ b/scrapegraph-js/examples/agenticscraper/agenticScraper_complete_example.js @@ -0,0 +1,146 @@ +import { agenticScraper, getAgenticScraperRequest } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; + +/** + * Complete example showing how to use agentic scraper for automated login + * and then retrieve the results + */ +async function completeAgenticScrapingExample() { + console.log('๐Ÿค– Starting Complete Agentic Scraping Example'); + console.log('=' * 50); + + // Configuration + const url = 'https://dashboard.scrapegraphai.com/'; + const steps = [ + 'Type email@gmail.com in email input box', + 'Type test-password@123 in password inputbox', + 'click on login' + ]; + const useSession = true; + + try { + // Step 1: Submit the agentic scraper request + console.log('\n๐Ÿ“ค Step 1: Submitting agentic scraper request...'); + console.log('URL:', url); + console.log('Use Session:', useSession); + console.log('Steps:', steps.length, 'automation steps'); + + const submitResponse = await agenticScraper(apiKey, url, steps, useSession); + + console.log('โœ… Request submitted successfully!'); + console.log('Request ID:', submitResponse.request_id); + console.log('Initial Status:', submitResponse.status); + + const requestId = submitResponse.request_id; + + // Step 2: Poll for results + console.log('\n๐Ÿ”„ Step 2: Polling for results...'); + let attempts = 0; + const maxAttempts = 12; // 2 minutes max (10 seconds * 12) + + while (attempts < maxAttempts) { + attempts++; + console.log(`\nโณ Attempt ${attempts}/${maxAttempts}: Checking status...`); + + const statusResponse = await getAgenticScraperRequest(apiKey, requestId); + console.log('Status:', statusResponse.status); + + if (statusResponse.status === 'completed') { + console.log('\n๐ŸŽ‰ Automation completed successfully!'); + console.log('Completed At:', statusResponse.completed_at); + console.log('Processing Time:', calculateProcessingTime(submitResponse.created_at, statusResponse.completed_at)); + console.log('\n๐Ÿ“‹ Results:'); + console.log(JSON.stringify(statusResponse.result, null, 2)); + + return statusResponse; + } else if (statusResponse.status === 'failed') { + console.log('\nโŒ Automation failed'); + console.log('Error:', statusResponse.error); + throw new Error(`Automation failed: ${statusResponse.error}`); + } else { + console.log('Still processing... waiting 10 seconds'); + await sleep(10000); // Wait 10 seconds + } + } + + throw new Error('Timeout: Automation took too long to complete'); + + } catch (error) { + console.error('\nโŒ Error in complete example:', error.message); + throw error; + } +} + +/** + * Example with different automation steps + */ +async function ecommerceAutomationExample() { + console.log('\n๐Ÿ›’ E-commerce Automation Example'); + console.log('=' * 40); + + const url = 'https://example-shop.com'; + const steps = [ + 'click on search input', + 'type "laptop" in search box', + 'click search button', + 'wait for 2 seconds', + 'click on first product', + 'scroll down to reviews section' + ]; + + try { + const response = await agenticScraper(apiKey, url, steps, true); + console.log('E-commerce automation started:', response.request_id); + return response; + } catch (error) { + console.error('E-commerce automation error:', error.message); + } +} + +/** + * Utility functions + */ +function calculateProcessingTime(startTime, endTime) { + const start = new Date(startTime); + const end = new Date(endTime); + const diffMs = end - start; + const diffSeconds = Math.round(diffMs / 1000); + return `${diffSeconds} seconds`; +} + +function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +/** + * Main execution + */ +async function main() { + if (!apiKey) { + console.error('โŒ Error: SGAI_APIKEY environment variable not set'); + console.log('Please set your API key in the .env file:'); + console.log('SGAI_APIKEY=your-api-key-here'); + process.exit(1); + } + + try { + console.log('๐Ÿš€ Running Agentic Scraper Examples'); + + // Run the complete login automation example + await completeAgenticScrapingExample(); + + // Uncomment to run the e-commerce example + // await ecommerceAutomationExample(); + + console.log('\nโœ… All examples completed successfully!'); + + } catch (error) { + console.error('\n๐Ÿ’ฅ Example failed:', error.message); + process.exit(1); + } +} + +// Run the examples +main(); diff --git a/scrapegraph-js/examples/agenticscraper/agenticScraper_comprehensive_example.js b/scrapegraph-js/examples/agenticscraper/agenticScraper_comprehensive_example.js new file mode 100644 index 0000000..022eb36 --- /dev/null +++ b/scrapegraph-js/examples/agenticscraper/agenticScraper_comprehensive_example.js @@ -0,0 +1,448 @@ +#!/usr/bin/env node + +/** + * Comprehensive Agentic Scraper Example + * + * This example demonstrates how to use the agentic scraper API endpoint + * to perform automated browser actions and scrape content with both + * AI extraction and non-AI extraction modes. + * + * The agentic scraper can: + * 1. Navigate to a website + * 2. Perform a series of automated actions (like filling forms, clicking buttons) + * 3. Extract the resulting HTML content as markdown + * 4. Optionally use AI to extract structured data + * + * Usage: + * node examples/agenticScraper_comprehensive_example.js + */ + +import { agenticScraper, getAgenticScraperRequest } from 'scrapegraph-js'; +import 'dotenv/config'; +import fs from 'fs/promises'; + +const apiKey = process.env.SGAI_APIKEY; + +/** + * Helper function to poll for request completion + * @param {string} requestId - The request ID to poll + * @param {number} maxAttempts - Maximum number of polling attempts + * @param {number} delayMs - Delay between polling attempts in milliseconds + * @returns {Promise} The final result + */ +async function pollForCompletion(requestId, maxAttempts = 12, delayMs = 10000) { + let attempts = 0; + + while (attempts < maxAttempts) { + attempts++; + console.log(`๐Ÿ”„ Polling attempt ${attempts}/${maxAttempts}...`); + + try { + const result = await getAgenticScraperRequest(apiKey, requestId); + + if (result.status === 'completed') { + console.log('โœ… Request completed!'); + return result; + } else if (result.status === 'failed') { + console.log('โŒ Request failed:', result.error || 'Unknown error'); + return result; + } else { + console.log(`โณ Status: ${result.status}, waiting ${delayMs/1000} seconds...`); + await new Promise(resolve => setTimeout(resolve, delayMs)); + } + } catch (error) { + console.log(`โš ๏ธ Polling error: ${error.message}`); + if (attempts === maxAttempts) throw error; + await new Promise(resolve => setTimeout(resolve, delayMs)); + } + } + + throw new Error('Request timed out - maximum polling attempts reached'); +} + +/** + * Example: Basic agentic scraping without AI extraction + */ +async function exampleBasicScrapingNoAI() { + console.log('๐Ÿš€ Starting basic agentic scraping (no AI extraction)...'); + + const url = 'https://dashboard.scrapegraphai.com/'; + const steps = [ + 'Type email@gmail.com in email input box', + 'Type test-password@123 in password inputbox', + 'click on login', + ]; + + try { + console.log(`URL: ${url}`); + console.log(`Steps: ${JSON.stringify(steps, null, 2)}`); + + // Perform the scraping without AI extraction + const submitResponse = await agenticScraper( + apiKey, + url, + steps, + true, // useSession + null, // userPrompt (not needed) + null, // outputSchema (not needed) + false // aiExtraction = false + ); + + console.log('โœ… Basic scraping request submitted!'); + console.log(`Request ID: ${submitResponse.request_id}`); + + // Poll for completion + const result = await pollForCompletion(submitResponse.request_id); + + if (result.status === 'completed') { + // Save the markdown content to a file + if (result.markdown) { + await fs.writeFile('basic_scraped_content.md', result.markdown, 'utf-8'); + console.log('๐Ÿ“„ Markdown content saved to "basic_scraped_content.md"'); + } + + // Print a preview of the content + if (result.markdown) { + const preview = result.markdown.length > 500 + ? result.markdown.substring(0, 500) + '...' + : result.markdown; + console.log(`\n๐Ÿ“ Content Preview:\n${preview}`); + } + + if (result.error) { + console.log(`โš ๏ธ Warning: ${result.error}`); + } + } + + return result; + + } catch (error) { + console.error(`โŒ Error: ${error.message}`); + return null; + } +} + +/** + * Example: Use AI extraction to get structured data from dashboard + */ +async function exampleAIExtraction() { + console.log('๐Ÿค– Starting agentic scraping with AI extraction...'); + + const url = 'https://dashboard.scrapegraphai.com/'; + const steps = [ + 'Type email@gmail.com in email input box', + 'Type test-password@123 in password inputbox', + 'click on login', + 'wait for dashboard to load completely', + ]; + + // Define extraction schema for user dashboard information + const outputSchema = { + user_info: { + type: "object", + properties: { + username: { type: "string" }, + email: { type: "string" }, + dashboard_sections: { + type: "array", + items: { type: "string" } + }, + account_status: { type: "string" }, + credits_remaining: { type: "number" } + }, + required: ["username", "dashboard_sections"] + } + }; + + const userPrompt = "Extract user information, available dashboard sections, account status, and remaining credits from the dashboard"; + + try { + console.log(`URL: ${url}`); + console.log(`Steps: ${JSON.stringify(steps, null, 2)}`); + console.log(`User Prompt: ${userPrompt}`); + + const submitResponse = await agenticScraper( + apiKey, + url, + steps, + true, // useSession + userPrompt, // userPrompt for AI extraction + outputSchema, // outputSchema for structured data + true // aiExtraction = true + ); + + console.log('โœ… AI extraction request submitted!'); + console.log(`Request ID: ${submitResponse.request_id}`); + + // Poll for completion + const result = await pollForCompletion(submitResponse.request_id); + + if (result.status === 'completed') { + if (result.result) { + console.log('๐ŸŽฏ Extracted Structured Data:'); + console.log(JSON.stringify(result.result, null, 2)); + + // Save extracted data to JSON file + await fs.writeFile('extracted_dashboard_data.json', JSON.stringify(result.result, null, 2), 'utf-8'); + console.log('๐Ÿ’พ Structured data saved to "extracted_dashboard_data.json"'); + } + + // Also save the raw markdown if available + if (result.markdown) { + await fs.writeFile('ai_scraped_content.md', result.markdown, 'utf-8'); + console.log('๐Ÿ“„ Raw markdown also saved to "ai_scraped_content.md"'); + } + } + + return result; + + } catch (error) { + console.error(`โŒ Error: ${error.message}`); + return null; + } +} + +/** + * Example: Scraping an e-commerce site for product information + */ +async function exampleEcommerceProductScraping() { + console.log('๐Ÿ›’ Scraping e-commerce products with AI extraction...'); + + const url = 'https://example-ecommerce.com'; + const steps = [ + 'click on search box', + 'type "laptop" in search box', + 'press enter', + 'wait for search results to load', + 'scroll down 3 times to load more products', + ]; + + const outputSchema = { + products: { + type: "array", + items: { + type: "object", + properties: { + name: { type: "string" }, + price: { type: "string" }, + rating: { type: "number" }, + availability: { type: "string" }, + description: { type: "string" }, + image_url: { type: "string" } + }, + required: ["name", "price"] + } + }, + search_info: { + type: "object", + properties: { + total_results: { type: "number" }, + search_term: { type: "string" }, + page: { type: "number" } + } + } + }; + + const userPrompt = "Extract all visible product information including names, prices, ratings, availability status, descriptions, and image URLs. Also extract search metadata like total results and current page."; + + try { + console.log(`URL: ${url}`); + console.log(`Steps: ${JSON.stringify(steps, null, 2)}`); + + const submitResponse = await agenticScraper( + apiKey, + url, + steps, + true, + userPrompt, + outputSchema, + true + ); + + console.log('โœ… E-commerce scraping request submitted!'); + console.log(`Request ID: ${submitResponse.request_id}`); + + // Poll for completion + const result = await pollForCompletion(submitResponse.request_id); + + if (result.status === 'completed' && result.result) { + const products = result.result.products || []; + const searchInfo = result.result.search_info || {}; + + console.log(`๐Ÿ” Search Results for "${searchInfo.search_term || 'laptop'}":`); + console.log(`๐Ÿ“Š Total Results: ${searchInfo.total_results || 'Unknown'}`); + console.log(`๐Ÿ“„ Current Page: ${searchInfo.page || 'Unknown'}`); + console.log(`๐Ÿ›๏ธ Products Found: ${products.length}`); + + console.log('\n๐Ÿ“ฆ Product Details:'); + products.slice(0, 5).forEach((product, index) => { + console.log(`\n${index + 1}. ${product.name || 'N/A'}`); + console.log(` ๐Ÿ’ฐ Price: ${product.price || 'N/A'}`); + console.log(` โญ Rating: ${product.rating || 'N/A'}`); + console.log(` ๐Ÿ“ฆ Availability: ${product.availability || 'N/A'}`); + if (product.description) { + const desc = product.description.length > 100 + ? product.description.substring(0, 100) + '...' + : product.description; + console.log(` ๐Ÿ“ Description: ${desc}`); + } + }); + + // Save extracted data + await fs.writeFile('ecommerce_products.json', JSON.stringify(result.result, null, 2), 'utf-8'); + console.log('\n๐Ÿ’พ Product data saved to "ecommerce_products.json"'); + } + + return result; + + } catch (error) { + console.error(`โŒ Error: ${error.message}`); + return null; + } +} + +/** + * Example: Fill out a contact form and extract confirmation details + */ +async function exampleFormFillingAndDataExtraction() { + console.log('๐Ÿ“ Filling contact form and extracting confirmation...'); + + const url = 'https://example-company.com/contact'; + const steps = [ + 'find and click on contact form', + 'type "John Doe" in name field', + 'type "john.doe@example.com" in email field', + 'type "Product Inquiry" in subject field', + 'type "I am interested in your premium plan. Could you provide more details about pricing and features?" in message field', + 'click submit button', + 'wait for confirmation message to appear', + ]; + + const outputSchema = { + form_submission: { + type: "object", + properties: { + status: { type: "string" }, + confirmation_message: { type: "string" }, + reference_number: { type: "string" }, + estimated_response_time: { type: "string" }, + submitted_data: { + type: "object", + properties: { + name: { type: "string" }, + email: { type: "string" }, + subject: { type: "string" } + } + } + }, + required: ["status", "confirmation_message"] + } + }; + + const userPrompt = "Extract the form submission status, confirmation message, any reference numbers, estimated response time, and echo back the submitted form data"; + + try { + console.log(`URL: ${url}`); + console.log(`Steps: ${JSON.stringify(steps, null, 2)}`); + + const submitResponse = await agenticScraper( + apiKey, + url, + steps, + true, + userPrompt, + outputSchema, + true + ); + + console.log('โœ… Form submission request submitted!'); + console.log(`Request ID: ${submitResponse.request_id}`); + + // Poll for completion + const result = await pollForCompletion(submitResponse.request_id); + + if (result.status === 'completed' && result.result) { + const formData = result.result.form_submission || {}; + + console.log('๐Ÿ“‹ Form Submission Results:'); + console.log(` โœ… Status: ${formData.status || 'Unknown'}`); + console.log(` ๐Ÿ’ฌ Message: ${formData.confirmation_message || 'No message'}`); + + if (formData.reference_number) { + console.log(` ๐Ÿ”ข Reference: ${formData.reference_number}`); + } + + if (formData.estimated_response_time) { + console.log(` โฐ Response Time: ${formData.estimated_response_time}`); + } + + const submittedData = formData.submitted_data || {}; + if (Object.keys(submittedData).length > 0) { + console.log('\n๐Ÿ“ค Submitted Data:'); + Object.entries(submittedData).forEach(([key, value]) => { + console.log(` ${key.charAt(0).toUpperCase() + key.slice(1)}: ${value}`); + }); + } + + // Save form results + await fs.writeFile('form_submission_results.json', JSON.stringify(result.result, null, 2), 'utf-8'); + console.log('\n๐Ÿ’พ Form results saved to "form_submission_results.json"'); + } + + return result; + + } catch (error) { + console.error(`โŒ Error: ${error.message}`); + return null; + } +} + +/** + * Main function to run all examples + */ +async function main() { + console.log('๐Ÿ”ง Comprehensive Agentic Scraper Examples'); + console.log('='.repeat(60)); + + // Check if API key is set + if (!apiKey) { + console.error('โŒ Error: SGAI_APIKEY environment variable not set'); + console.log('Please either:'); + console.log(' 1. Set environment variable: export SGAI_APIKEY=your-api-key-here'); + console.log(' 2. Create a .env file with: SGAI_APIKEY=your-api-key-here'); + process.exit(1); + } + + try { + console.log('\n1. Basic Scraping (No AI Extraction)'); + console.log('-'.repeat(40)); + await exampleBasicScrapingNoAI(); + + console.log('\n\n2. AI Extraction Example - Dashboard Data'); + console.log('-'.repeat(40)); + await exampleAIExtraction(); + + console.log('\n\n3. E-commerce Product Scraping with AI'); + console.log('-'.repeat(40)); + // Uncomment to run e-commerce example + // await exampleEcommerceProductScraping(); + + console.log('\n\n4. Form Filling and Confirmation Extraction'); + console.log('-'.repeat(40)); + // Uncomment to run form filling example + // await exampleFormFillingAndDataExtraction(); + + console.log('\nโœจ Examples completed!'); + console.log('\nโ„น๏ธ Note: Some examples are commented out by default.'); + console.log(' Uncomment them in the main function to run additional examples.'); + + } catch (error) { + console.error(`โŒ Fatal error: ${error.message}`); + process.exit(1); + } +} + +// Run the main function if this script is executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + main(); +} diff --git a/scrapegraph-js/examples/agenticscraper/agenticScraper_example.js b/scrapegraph-js/examples/agenticscraper/agenticScraper_example.js new file mode 100644 index 0000000..6fc19df --- /dev/null +++ b/scrapegraph-js/examples/agenticscraper/agenticScraper_example.js @@ -0,0 +1,77 @@ +import { agenticScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; + +// Example 1: Basic scraping without AI extraction +console.log('๐Ÿค– Example 1: Basic Agentic Scraping (No AI)'); +console.log('='.repeat(50)); + +const url = 'https://dashboard.scrapegraphai.com/'; +const steps = [ + 'Type email@gmail.com in email input box', + 'Type test-password@123 in password inputbox', + 'click on login' +]; + +try { + const response = await agenticScraper( + apiKey, + url, + steps, + true, // useSession + null, // userPrompt (not needed for basic scraping) + null, // outputSchema (not needed for basic scraping) + false // aiExtraction = false + ); + + console.log('โœ… Basic Agentic Scraper Request Submitted'); + console.log('Request ID:', response.request_id); + console.log('Status:', response.status); + console.log('Full Response:', JSON.stringify(response, null, 2)); +} catch (error) { + console.error('โŒ Error:', error.message); +} + +// Example 2: AI extraction for structured data +console.log('\n\n๐Ÿง  Example 2: Agentic Scraping with AI Extraction'); +console.log('='.repeat(50)); + +const aiExtractionSchema = { + dashboard_info: { + type: "object", + properties: { + username: { type: "string" }, + email: { type: "string" }, + available_sections: { + type: "array", + items: { type: "string" } + }, + credits_remaining: { type: "number" } + }, + required: ["username", "available_sections"] + } +}; + +const userPrompt = "Extract the user's dashboard information including username, email, available dashboard sections, and remaining credits"; + +try { + const aiResponse = await agenticScraper( + apiKey, + url, + [...steps, 'wait for dashboard to load completely'], // Add wait step for AI extraction + true, // useSession + userPrompt, // userPrompt for AI extraction + aiExtractionSchema, // outputSchema for structured data + true // aiExtraction = true + ); + + console.log('โœ… AI Extraction Request Submitted'); + console.log('Request ID:', aiResponse.request_id); + console.log('Status:', aiResponse.status); + console.log('User Prompt:', userPrompt); + console.log('Schema Provided:', aiExtractionSchema ? 'Yes' : 'No'); + console.log('Full Response:', JSON.stringify(aiResponse, null, 2)); +} catch (error) { + console.error('โŒ AI Extraction Error:', error.message); +} diff --git a/scrapegraph-js/examples/crawl/crawl_example.js b/scrapegraph-js/examples/crawl/crawl_example.js new file mode 100644 index 0000000..1fdec38 --- /dev/null +++ b/scrapegraph-js/examples/crawl/crawl_example.js @@ -0,0 +1,106 @@ +import { crawl, getCrawlRequest } from '../index.js'; +import 'dotenv/config'; + +// Example .env file: +// SGAI_APIKEY=your_sgai_api_key + +const apiKey = process.env.SGAI_APIKEY; + +const schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ScrapeGraphAI Website Content", + "type": "object", + "properties": { + "company": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "description": { "type": "string" }, + "features": { "type": "array", "items": { "type": "string" } }, + "contact_email": { "type": "string", "format": "email" }, + "social_links": { + "type": "object", + "properties": { + "github": { "type": "string", "format": "uri" }, + "linkedin": { "type": "string", "format": "uri" }, + "twitter": { "type": "string", "format": "uri" } + }, + "additionalProperties": false + } + }, + "required": ["name", "description"] + }, + "services": { + "type": "array", + "items": { + "type": "object", + "properties": { + "service_name": { "type": "string" }, + "description": { "type": "string" }, + "features": { "type": "array", "items": { "type": "string" } } + }, + "required": ["service_name", "description"] + } + }, + "legal": { + "type": "object", + "properties": { + "privacy_policy": { "type": "string" }, + "terms_of_service": { "type": "string" } + }, + "required": ["privacy_policy", "terms_of_service"] + } + }, + "required": ["company", "services", "legal"] +}; + +const url = 'https://scrapegraphai.com/'; +const prompt = 'What does the company do? and I need text content from there privacy and terms'; + +(async () => { + if (!apiKey) { + console.error('SGAI_APIKEY not found in environment. Please set it in your .env file.'); + process.exit(1); + } + + try { + // Start the crawl job + console.log(`\nStarting crawl for: ${url}`); + const crawlResponse = await crawl(apiKey, url, prompt, schema, { + cacheWebsite: true, + depth: 2, + maxPages: 2, + sameDomainOnly: true, + sitemap: true, // Use sitemap for better page discovery + batchSize: 1, + }); + console.log('\nCrawl job started. Response:'); + console.log(JSON.stringify(crawlResponse, null, 2)); + + // If the crawl is asynchronous and returns an ID, fetch the result + const crawlId = crawlResponse.id || crawlResponse.task_id; + if (crawlId) { + console.log('\nPolling for crawl result...'); + for (let i = 0; i < 10; i++) { + await new Promise((resolve) => setTimeout(resolve, 5000)); + const result = await getCrawlRequest(apiKey, crawlId); + if (result.status === 'success' && result.result) { + console.log(`\nCrawl completed. Result:`); + console.log(JSON.stringify(result.result.llm_result, null, 2)); + break; + } else if (result.status === 'failed') { + console.log('\nCrawl failed. Result:'); + console.log(JSON.stringify(result, null, 2)); + break; + } else { + console.log(`Status: ${result.status}, waiting...`); + } + } + } else { + console.log('No crawl ID found in response. Synchronous result:'); + console.log(JSON.stringify(crawlResponse, null, 2)); + } + } catch (error) { + console.error('Error occurred:', error); + } +})(); diff --git a/scrapegraph-js/examples/crawl/crawl_markdown_direct_api_example.js b/scrapegraph-js/examples/crawl/crawl_markdown_direct_api_example.js new file mode 100644 index 0000000..8c0cca9 --- /dev/null +++ b/scrapegraph-js/examples/crawl/crawl_markdown_direct_api_example.js @@ -0,0 +1,269 @@ +#!/usr/bin/env node + +/** + * Example script demonstrating the ScrapeGraphAI Crawler markdown conversion mode. + * + * This example shows how to use the crawler in markdown conversion mode: + * - Cost-effective markdown conversion (NO AI/LLM processing) + * - 2 credits per page (80% savings compared to AI mode) + * - Clean HTML to markdown conversion with metadata extraction + * + * Requirements: + * - Node.js 14+ + * - dotenv + * - A .env file with your API_KEY + * + * Example .env file: + * API_KEY=your_api_key_here + */ + +import 'dotenv/config'; + +// Configuration - API key from environment or fallback +const API_KEY = process.env.TEST_API_KEY || "sgai-xxx"; // Load from .env file +const BASE_URL = process.env.BASE_URL || "http://localhost:8001"; // Can be overridden via env + +/** + * Make an HTTP request to the API. + * @param {string} url - The URL to make the request to + * @param {Object} data - The data to send in the request body + * @returns {Promise} The response JSON + */ +async function makeRequest(url, data) { + const headers = { + "Content-Type": "application/json", + "SGAI-APIKEY": API_KEY + }; + + const response = await fetch(url, { + method: 'POST', + headers: headers, + body: JSON.stringify(data) + }); + + return await response.json(); +} + +/** + * Poll for the result of a crawl job with rate limit handling. + * @param {string} taskId - The task ID to poll for + * @returns {Promise} The response JSON + */ +async function pollResult(taskId) { + const headers = { "SGAI-APIKEY": API_KEY }; + const url = `${BASE_URL}/v1/crawl/${taskId}`; + + const response = await fetch(url, { + method: 'GET', + headers: headers + }); + + if (response.status === 429) { + // Rate limited - return special status to handle in polling loop + return { status: "rate_limited", retry_after: 60 }; + } + + return await response.json(); +} + +/** + * Poll for crawl results with intelligent backoff to avoid rate limits. + * @param {string} taskId - The task ID to poll for + * @param {number} maxAttempts - Maximum number of polling attempts + * @returns {Promise} The final result or throws an exception on timeout/failure + */ +async function pollWithBackoff(taskId, maxAttempts = 20) { + console.log("โณ Starting to poll for results with rate-limit protection..."); + + // Initial wait to give the job time to start processing + await new Promise(resolve => setTimeout(resolve, 15000)); + + for (let attempt = 0; attempt < maxAttempts; attempt++) { + try { + const result = await pollResult(taskId); + const status = result.status; + + if (status === "rate_limited") { + const waitTime = Math.min(90, 30 + (attempt * 10)); // Exponential backoff for rate limits + console.log(`โš ๏ธ Rate limited! Waiting ${waitTime}s before retry...`); + await new Promise(resolve => setTimeout(resolve, waitTime * 1000)); + continue; + } else if (status === "success") { + return result; + } else if (status === "failed") { + throw new Error(`Crawl failed: ${result.error || 'Unknown error'}`); + } else { + // Calculate progressive wait time: start at 15s, increase gradually + const baseWait = 15; + const progressiveWait = Math.min(60, baseWait + (attempt * 3)); // Cap at 60s + + console.log(`โณ Status: ${status} (attempt ${attempt + 1}/${maxAttempts}) - waiting ${progressiveWait}s...`); + await new Promise(resolve => setTimeout(resolve, progressiveWait * 1000)); + } + } catch (error) { + if (error.message.toLowerCase().includes('rate') || error.message.includes('429')) { + const waitTime = Math.min(90, 45 + (attempt * 10)); + console.log(`โš ๏ธ Rate limit detected in error, waiting ${waitTime}s...`); + await new Promise(resolve => setTimeout(resolve, waitTime * 1000)); + continue; + } else { + console.log(`โŒ Error polling for results: ${error.message}`); + if (attempt < maxAttempts - 1) { + await new Promise(resolve => setTimeout(resolve, 20000)); // Wait before retry + continue; + } + throw error; + } + } + } + + throw new Error(`โฐ Timeout: Job did not complete after ${maxAttempts} attempts`); +} + +/** + * Markdown Conversion Mode (NO AI/LLM Used) + * + * This example demonstrates cost-effective crawling that converts pages to clean markdown + * WITHOUT any AI processing. Perfect for content archival and when you only need clean markdown. + */ +async function markdownCrawlingExample() { + console.log("=".repeat(60)); + console.log("MARKDOWN CONVERSION MODE (NO AI/LLM)"); + console.log("=".repeat(60)); + console.log("Use case: Get clean markdown content without AI processing"); + console.log("Cost: 2 credits per page (80% savings!)"); + console.log("Features: Clean markdown conversion, metadata extraction"); + console.log("โš ๏ธ NO AI/LLM PROCESSING - Pure HTML to markdown conversion only!"); + console.log(); + + // Markdown conversion request - NO AI/LLM processing + const requestData = { + url: "https://scrapegraphai.com/", + extraction_mode: false, // FALSE = Markdown conversion mode (NO AI/LLM used) + depth: 2, + max_pages: 2, + same_domain_only: true, + sitemap: false, // Use sitemap for better coverage + // Note: No prompt needed when extraction_mode = false + }; + + console.log(`๐ŸŒ Target URL: ${requestData.url}`); + console.log("๐Ÿค– AI Prompt: None (no AI processing)"); + console.log(`๐Ÿ“Š Crawl Depth: ${requestData.depth}`); + console.log(`๐Ÿ“„ Max Pages: ${requestData.max_pages}`); + console.log(`๐Ÿ—บ๏ธ Use Sitemap: ${requestData.sitemap}`); + console.log("๐Ÿ’ก Mode: Pure HTML to markdown conversion"); + console.log(); + + // Start the markdown conversion job + console.log("๐Ÿš€ Starting markdown conversion job..."); + const response = await makeRequest(`${BASE_URL}/v1/crawl`, requestData); + const taskId = response.task_id; + + if (!taskId) { + console.log("โŒ Failed to start markdown conversion job"); + return; + } + + console.log(`๐Ÿ“‹ Task ID: ${taskId}`); + console.log("โณ Polling for results..."); + console.log(); + + // Poll for results with rate-limit protection + try { + const result = await pollWithBackoff(taskId, 20); + + console.log("โœ… Markdown conversion completed successfully!"); + console.log(); + + const resultData = result.result || {}; + const pages = resultData.pages || []; + const crawledUrls = resultData.crawled_urls || []; + const creditsUsed = resultData.credits_used || 0; + const pagesProcessed = resultData.pages_processed || 0; + + console.log("๐Ÿ“Š CONVERSION RESULTS:"); + console.log("-".repeat(40)); + console.log(`๐Ÿ“„ Pages processed: ${pagesProcessed}`); + console.log(`๐Ÿ’ฐ Credits used: ${creditsUsed}`); + console.log(`๐Ÿ’ต Cost per page: ${pagesProcessed > 0 ? (creditsUsed / pagesProcessed).toFixed(1) : 0} credits`); + if (crawledUrls.length > 0) { + console.log(`๐Ÿ”— URLs processed: ${JSON.stringify(crawledUrls)}`); + } + console.log(); + + console.log("๐Ÿ“ MARKDOWN CONTENT:"); + console.log("-".repeat(40)); + if (pages.length > 0) { + console.log(`๐Ÿ“„ Total pages with markdown: ${pages.length}`); + pages.slice(0, 3).forEach((page, i) => { // Show first 3 pages + console.log(`\n๐Ÿ“„ Page ${i + 1}:`); + console.log(` URL: ${page.url || 'N/A'}`); + console.log(` Title: ${page.title || 'None'}`); + + const metadata = page.metadata || {}; + console.log(` ๐Ÿ“Š Word count: ${metadata.word_count || 0}`); + console.log(` ๐Ÿ“‹ Headers: ${JSON.stringify((metadata.headers || []).slice(0, 3))}`); // First 3 headers + console.log(` ๐Ÿ”— Links: ${metadata.links_count || 0}`); + + // Show markdown preview + const markdownContent = page.markdown || ""; + let markdownPreview = markdownContent.substring(0, 200); + if (markdownContent.length > 200) { + markdownPreview += "..."; + } + console.log(` ๐Ÿ“ Content preview: ${markdownPreview}`); + }); + + if (pages.length > 3) { + console.log(`\n ... and ${pages.length - 3} more pages with markdown content`); + } + } else { + console.log("No markdown content available"); + } + + } catch (error) { + console.log(`โŒ Markdown conversion failed: ${error.message}`); + } +} + +/** + * Main function to run the markdown crawling example. + */ +async function main() { + console.log("๐ŸŒ ScrapeGraphAI Crawler - Markdown Conversion Example"); + console.log("Cost-effective HTML to Markdown conversion (NO AI/LLM)"); + console.log("=".repeat(60)); + + // Check if API key is set + if (API_KEY === "sgai-xxx") { + console.log("โš ๏ธ Please set your API key in the .env file"); + console.log(" Create a .env file with your API key:"); + console.log(" API_KEY=your_api_key_here"); + console.log(); + console.log(" You can get your API key from: https://dashboard.scrapegraphai.com"); + console.log(); + console.log(" Example .env file:"); + console.log(" API_KEY=sgai-your-actual-api-key-here"); + console.log(" BASE_URL=https://api.scrapegraphai.com # Optional"); + return; + } + + console.log(`๐Ÿ”‘ Using API key: ${API_KEY.substring(0, 10)}...`); + console.log(`๐ŸŒ Base URL: ${BASE_URL}`); + console.log(); + + // Run the single example + await markdownCrawlingExample(); // Markdown conversion mode (NO AI) + + console.log("\n" + "=".repeat(60)); + console.log("๐ŸŽ‰ Example completed!"); + console.log("๐Ÿ’ก This demonstrates markdown conversion mode:"); + console.log(" โ€ข Cost-effective: Only 2 credits per page"); + console.log(" โ€ข No AI/LLM processing - pure HTML to markdown conversion"); + console.log(" โ€ข Perfect for content archival and documentation"); + console.log(" โ€ข 80% cheaper than AI extraction modes!"); +} + +// Run the example +main().catch(console.error); diff --git a/scrapegraph-js/examples/crawl/crawl_markdown_example.js b/scrapegraph-js/examples/crawl/crawl_markdown_example.js new file mode 100644 index 0000000..7c69a84 --- /dev/null +++ b/scrapegraph-js/examples/crawl/crawl_markdown_example.js @@ -0,0 +1,217 @@ +#!/usr/bin/env node + +/** + * Example demonstrating the ScrapeGraphAI Crawler markdown conversion mode. + * + * This example shows how to use the crawler in markdown conversion mode: + * - Cost-effective markdown conversion (NO AI/LLM processing) + * - 2 credits per page (80% savings compared to AI mode) + * - Clean HTML to markdown conversion with metadata extraction + * + * Requirements: + * - Node.js 14+ + * - scrapegraph-js + * - dotenv + * - A valid API key (set in .env file as SGAI_APIKEY=your_key or environment variable) + * + * Usage: + * node crawl_markdown_example.js + */ + +import { crawl, getCrawlRequest } from '../index.js'; +import 'dotenv/config'; + +// Example .env file: +// SGAI_APIKEY=your_sgai_api_key + +const apiKey = process.env.SGAI_APIKEY; + +/** + * Poll for crawl results with intelligent backoff to avoid rate limits. + * @param {string} crawlId - The crawl ID to poll for + * @param {number} maxAttempts - Maximum number of polling attempts + * @returns {Promise} The final result or throws an exception on timeout/failure + */ +async function pollForResult(crawlId, maxAttempts = 20) { + console.log("โณ Starting to poll for results with rate-limit protection..."); + + // Initial wait to give the job time to start processing + await new Promise(resolve => setTimeout(resolve, 15000)); + + for (let attempt = 0; attempt < maxAttempts; attempt++) { + try { + const result = await getCrawlRequest(apiKey, crawlId); + const status = result.status; + + if (status === "success") { + return result; + } else if (status === "failed") { + throw new Error(`Crawl failed: ${result.error || 'Unknown error'}`); + } else { + // Calculate progressive wait time: start at 15s, increase gradually + const baseWait = 15000; + const progressiveWait = Math.min(60000, baseWait + (attempt * 3000)); // Cap at 60s + + console.log(`โณ Status: ${status} (attempt ${attempt + 1}/${maxAttempts}) - waiting ${progressiveWait/1000}s...`); + await new Promise(resolve => setTimeout(resolve, progressiveWait)); + } + } catch (error) { + if (error.message.toLowerCase().includes('rate') || error.message.includes('429')) { + const waitTime = Math.min(90000, 45000 + (attempt * 10000)); + console.log(`โš ๏ธ Rate limit detected in error, waiting ${waitTime/1000}s...`); + await new Promise(resolve => setTimeout(resolve, waitTime)); + continue; + } else { + console.log(`โŒ Error polling for results: ${error.message}`); + if (attempt < maxAttempts - 1) { + await new Promise(resolve => setTimeout(resolve, 20000)); // Wait before retry + continue; + } + throw error; + } + } + } + + throw new Error(`โฐ Timeout: Job did not complete after ${maxAttempts} attempts`); +} + +/** + * Markdown Conversion Mode (NO AI/LLM Used) + * + * This example demonstrates cost-effective crawling that converts pages to clean markdown + * WITHOUT any AI processing. Perfect for content archival and when you only need clean markdown. + */ +async function markdownCrawlingExample() { + console.log("=".repeat(60)); + console.log("MARKDOWN CONVERSION MODE (NO AI/LLM)"); + console.log("=".repeat(60)); + console.log("Use case: Get clean markdown content without AI processing"); + console.log("Cost: 2 credits per page (80% savings!)"); + console.log("Features: Clean markdown conversion, metadata extraction"); + console.log("โš ๏ธ NO AI/LLM PROCESSING - Pure HTML to markdown conversion only!"); + console.log(); + + // Target URL for markdown conversion + const url = "https://scrapegraphai.com/"; + + console.log(`๐ŸŒ Target URL: ${url}`); + console.log("๐Ÿค– AI Prompt: None (no AI processing)"); + console.log("๐Ÿ“Š Crawl Depth: 2"); + console.log("๐Ÿ“„ Max Pages: 2"); + console.log("๐Ÿ—บ๏ธ Use Sitemap: true"); + console.log("๐Ÿ’ก Mode: Pure HTML to markdown conversion"); + console.log(); + + // Start the markdown conversion job + console.log("๐Ÿš€ Starting markdown conversion job..."); + + try { + // Call crawl with extractionMode=false for markdown conversion + const response = await crawl(apiKey, url, null, null, { + extractionMode: false, // FALSE = Markdown conversion mode (NO AI/LLM used) + depth: 2, + maxPages: 2, + sameDomainOnly: true, + sitemap: true, // Use sitemap for better page discovery + // Note: No prompt or dataSchema needed when extractionMode=false + }); + + const crawlId = response.id || response.task_id || response.crawl_id; + + if (!crawlId) { + console.log("โŒ Failed to start markdown conversion job"); + return; + } + + console.log(`๐Ÿ“‹ Crawl ID: ${crawlId}`); + console.log("โณ Polling for results..."); + console.log(); + + // Poll for results with rate-limit protection + const result = await pollForResult(crawlId, 20); + + console.log("โœ… Markdown conversion completed successfully!"); + console.log(); + + const resultData = result.result || {}; + const pages = resultData.pages || []; + const crawledUrls = resultData.crawled_urls || []; + const creditsUsed = resultData.credits_used || 0; + const pagesProcessed = resultData.pages_processed || 0; + + // Prepare JSON output + const jsonOutput = { + conversion_results: { + pages_processed: pagesProcessed, + credits_used: creditsUsed, + cost_per_page: pagesProcessed > 0 ? creditsUsed / pagesProcessed : 0, + crawled_urls: crawledUrls + }, + markdown_content: { + total_pages: pages.length, + pages: [] + } + }; + + // Add page details to JSON + pages.forEach((page, i) => { + const metadata = page.metadata || {}; + const pageData = { + page_number: i + 1, + url: page.url, + title: page.title, + metadata: { + word_count: metadata.word_count || 0, + headers: metadata.headers || [], + links_count: metadata.links_count || 0 + }, + markdown_content: page.markdown || "" + }; + jsonOutput.markdown_content.pages.push(pageData); + }); + + // Print JSON output + console.log("๐Ÿ“Š RESULTS IN JSON FORMAT:"); + console.log("-".repeat(40)); + console.log(JSON.stringify(jsonOutput, null, 2)); + + } catch (error) { + console.log(`โŒ Markdown conversion failed: ${error.message}`); + } +} + +/** + * Main function to run the markdown crawling example. + */ +async function main() { + console.log("๐ŸŒ ScrapeGraphAI Crawler - Markdown Conversion Example"); + console.log("Cost-effective HTML to Markdown conversion (NO AI/LLM)"); + console.log("=".repeat(60)); + + // Check if API key is set + if (!apiKey) { + console.log("โš ๏ธ Please set your API key in the environment variable SGAI_APIKEY"); + console.log(" Option 1: Create a .env file with: SGAI_APIKEY=your_api_key_here"); + console.log(" Option 2: Set environment variable: export SGAI_APIKEY=your_api_key_here"); + console.log(); + console.log(" You can get your API key from: https://dashboard.scrapegraphai.com"); + return; + } + + console.log(`๐Ÿ”‘ Using API key: ${apiKey.substring(0, 10)}...`); + console.log(); + + // Run the markdown conversion example + await markdownCrawlingExample(); + + console.log("\n" + "=".repeat(60)); + console.log("๐ŸŽ‰ Example completed!"); + console.log("๐Ÿ’ก This demonstrates markdown conversion mode:"); + console.log(" โ€ข Cost-effective: Only 2 credits per page"); + console.log(" โ€ข No AI/LLM processing - pure HTML to markdown conversion"); + console.log(" โ€ข Perfect for content archival and documentation"); + console.log(" โ€ข 80% cheaper than AI extraction modes!"); +} + +// Run the example +main().catch(console.error); diff --git a/scrapegraph-js/examples/crawl/crawl_sitemap_example.js b/scrapegraph-js/examples/crawl/crawl_sitemap_example.js new file mode 100644 index 0000000..53b9e81 --- /dev/null +++ b/scrapegraph-js/examples/crawl/crawl_sitemap_example.js @@ -0,0 +1,232 @@ +#!/usr/bin/env node + +/** + * Example demonstrating the ScrapeGraphAI Crawler with sitemap functionality. + * + * This example shows how to use the crawler with sitemap enabled for better page discovery: + * - Sitemap helps discover more pages efficiently + * - Better coverage of website content + * - More comprehensive crawling results + * + * Requirements: + * - Node.js 14+ + * - scrapegraph-js + * - dotenv + * - A valid API key (set in .env file as SGAI_APIKEY=your_key or environment variable) + * + * Usage: + * node crawl_sitemap_example.js + */ + +import { crawl, getCrawlRequest } from '../index.js'; +import 'dotenv/config'; + +// Example .env file: +// SGAI_APIKEY=your_sgai_api_key + +const apiKey = process.env.SGAI_APIKEY; + +/** + * Poll for crawl results with intelligent backoff to avoid rate limits. + * @param {string} crawlId - The crawl ID to poll for + * @param {number} maxAttempts - Maximum number of polling attempts + * @returns {Promise} The final result or throws an exception on timeout/failure + */ +async function pollForResult(crawlId, maxAttempts = 20) { + console.log("โณ Starting to poll for results with rate-limit protection..."); + + // Initial wait to give the job time to start processing + await new Promise(resolve => setTimeout(resolve, 15000)); + + for (let attempt = 0; attempt < maxAttempts; attempt++) { + try { + const result = await getCrawlRequest(apiKey, crawlId); + const status = result.status; + + if (status === "success") { + return result; + } else if (status === "failed") { + throw new Error(`Crawl failed: ${result.error || 'Unknown error'}`); + } else { + // Calculate progressive wait time: start at 15s, increase gradually + const baseWait = 15000; + const progressiveWait = Math.min(60000, baseWait + (attempt * 3000)); // Cap at 60s + + console.log(`โณ Status: ${status} (attempt ${attempt + 1}/${maxAttempts}) - waiting ${progressiveWait/1000}s...`); + await new Promise(resolve => setTimeout(resolve, progressiveWait)); + } + } catch (error) { + if (error.message.toLowerCase().includes('rate') || error.message.includes('429')) { + const waitTime = Math.min(90000, 45000 + (attempt * 10000)); + console.log(`โš ๏ธ Rate limit detected in error, waiting ${waitTime/1000}s...`); + await new Promise(resolve => setTimeout(resolve, waitTime)); + continue; + } else { + console.log(`โŒ Error polling for results: ${error.message}`); + if (attempt < maxAttempts - 1) { + await new Promise(resolve => setTimeout(resolve, 20000)); // Wait before retry + continue; + } + throw error; + } + } + } + + throw new Error(`โฐ Timeout: Job did not complete after ${maxAttempts} attempts`); +} + +/** + * Sitemap-enabled Crawling Example + * + * This example demonstrates how to use sitemap for better page discovery. + * Sitemap helps the crawler find more pages efficiently by using the website's sitemap.xml. + */ +async function sitemapCrawlingExample() { + console.log("=".repeat(60)); + console.log("SITEMAP-ENABLED CRAWLING EXAMPLE"); + console.log("=".repeat(60)); + console.log("Use case: Comprehensive website crawling with sitemap discovery"); + console.log("Benefits: Better page coverage, more efficient crawling"); + console.log("Features: Sitemap-based page discovery, structured data extraction"); + console.log(); + + // Target URL - using a website that likely has a sitemap + const url = "https://www.giemmeagordo.com/risultati-ricerca-annunci/?sort=newest&search_city=&search_lat=null&search_lng=null&search_category=0&search_type=0&search_min_price=&search_max_price=&bagni=&bagni_comparison=equal&camere=&camere_comparison=equal"; + + // Schema for real estate listings + const schema = { + "type": "object", + "properties": { + "listings": { + "type": "array", + "items": { + "type": "object", + "properties": { + "title": { "type": "string" }, + "price": { "type": "string" }, + "location": { "type": "string" }, + "description": { "type": "string" }, + "features": { "type": "array", "items": { "type": "string" } }, + "url": { "type": "string" } + } + } + } + } + }; + + const prompt = "Extract all real estate listings with their details including title, price, location, description, and features"; + + console.log(`๐ŸŒ Target URL: ${url}`); + console.log("๐Ÿค– AI Prompt: Extract real estate listings"); + console.log("๐Ÿ“Š Crawl Depth: 1"); + console.log("๐Ÿ“„ Max Pages: 10"); + console.log("๐Ÿ—บ๏ธ Use Sitemap: true (enabled for better page discovery)"); + console.log("๐Ÿ  Same Domain Only: true"); + console.log("๐Ÿ’พ Cache Website: true"); + console.log("๐Ÿ’ก Mode: AI extraction with sitemap discovery"); + console.log(); + + // Start the sitemap-enabled crawl job + console.log("๐Ÿš€ Starting sitemap-enabled crawl job..."); + + try { + // Call crawl with sitemap=true for better page discovery + const response = await crawl(apiKey, url, prompt, schema, { + extractionMode: true, // AI extraction mode + depth: 1, + maxPages: 10, + sameDomainOnly: true, + cacheWebsite: true, + sitemap: true, // Enable sitemap for better page discovery + }); + + const crawlId = response.id || response.task_id || response.crawl_id; + + if (!crawlId) { + console.log("โŒ Failed to start sitemap-enabled crawl job"); + return; + } + + console.log(`๐Ÿ“‹ Crawl ID: ${crawlId}`); + console.log("โณ Polling for results..."); + console.log(); + + // Poll for results with rate-limit protection + const result = await pollForResult(crawlId, 20); + + console.log("โœ… Sitemap-enabled crawl completed successfully!"); + console.log(); + + const resultData = result.result || {}; + const llmResult = resultData.llm_result || {}; + const crawledUrls = resultData.crawled_urls || []; + const creditsUsed = resultData.credits_used || 0; + const pagesProcessed = resultData.pages_processed || 0; + + // Prepare JSON output + const jsonOutput = { + crawl_results: { + pages_processed: pagesProcessed, + credits_used: creditsUsed, + cost_per_page: pagesProcessed > 0 ? creditsUsed / pagesProcessed : 0, + crawled_urls: crawledUrls, + sitemap_enabled: true + }, + extracted_data: llmResult + }; + + // Print JSON output + console.log("๐Ÿ“Š RESULTS IN JSON FORMAT:"); + console.log("-".repeat(40)); + console.log(JSON.stringify(jsonOutput, null, 2)); + + // Print summary + console.log("\n" + "=".repeat(60)); + console.log("๐Ÿ“ˆ CRAWL SUMMARY:"); + console.log("=".repeat(60)); + console.log(`โœ… Pages processed: ${pagesProcessed}`); + console.log(`๐Ÿ’ฐ Credits used: ${creditsUsed}`); + console.log(`๐Ÿ”— URLs crawled: ${crawledUrls.length}`); + console.log(`๐Ÿ—บ๏ธ Sitemap enabled: Yes`); + console.log(`๐Ÿ“Š Data extracted: ${llmResult.listings ? llmResult.listings.length : 0} listings found`); + + } catch (error) { + console.log(`โŒ Sitemap-enabled crawl failed: ${error.message}`); + } +} + +/** + * Main function to run the sitemap crawling example. + */ +async function main() { + console.log("๐ŸŒ ScrapeGraphAI Crawler - Sitemap Example"); + console.log("Comprehensive website crawling with sitemap discovery"); + console.log("=".repeat(60)); + + // Check if API key is set + if (!apiKey) { + console.log("โš ๏ธ Please set your API key in the environment variable SGAI_APIKEY"); + console.log(" Option 1: Create a .env file with: SGAI_APIKEY=your_api_key_here"); + console.log(" Option 2: Set environment variable: export SGAI_APIKEY=your_api_key_here"); + console.log(); + console.log(" You can get your API key from: https://dashboard.scrapegraphai.com"); + return; + } + + console.log(`๐Ÿ”‘ Using API key: ${apiKey.substring(0, 10)}...`); + console.log(); + + // Run the sitemap crawling example + await sitemapCrawlingExample(); + + console.log("\n" + "=".repeat(60)); + console.log("๐ŸŽ‰ Example completed!"); + console.log("๐Ÿ’ก This demonstrates sitemap-enabled crawling:"); + console.log(" โ€ข Better page discovery using sitemap.xml"); + console.log(" โ€ข More comprehensive website coverage"); + console.log(" โ€ข Efficient crawling of structured websites"); + console.log(" โ€ข Perfect for e-commerce, news sites, and content-heavy websites"); +} + +// Run the example +main().catch(console.error); diff --git a/scrapegraph-js/examples/markdownify/markdownify_example.js b/scrapegraph-js/examples/markdownify/markdownify_example.js new file mode 100644 index 0000000..5136b8f --- /dev/null +++ b/scrapegraph-js/examples/markdownify/markdownify_example.js @@ -0,0 +1,35 @@ +import { getMarkdownifyRequest, markdownify } from 'scrapegraph-js'; +import fs from 'fs'; +import 'dotenv/config'; + +// markdownify function example +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://scrapegraphai.com/'; + +try { + const response = await markdownify(apiKey, url); + console.log(response); + saveFile(response.result); +} catch (error) { + console.error(error); +} + +// helper function for save the file locally +function saveFile(output) { + try { + fs.writeFileSync('result.md', output); + console.log('Success!'); + } catch (err) { + console.error('Error during the file writing: ', err); + } +} + +// getMarkdownifyRequest function example +const requestId = '2563b972-cb6f-400b-be76-edb235458560'; + +try { + const response = await getMarkdownifyRequest(apiKey, requestId); + console.log(response); +} catch (error) { + console.log(error); +} diff --git a/scrapegraph-js/examples/scheduled_jobs/scheduledJobs_example.js b/scrapegraph-js/examples/scheduled_jobs/scheduledJobs_example.js new file mode 100644 index 0000000..21bf003 --- /dev/null +++ b/scrapegraph-js/examples/scheduled_jobs/scheduledJobs_example.js @@ -0,0 +1,267 @@ +import { + createScheduledJob, + getScheduledJobs, + getScheduledJob, + updateScheduledJob, + deleteScheduledJob, + pauseScheduledJob, + resumeScheduledJob, + triggerScheduledJob, + getJobExecutions, + enableMock, + disableMock +} from '../index.js'; + +// Enable mock mode for testing +enableMock(); + +/** + * Create a SmartScraper scheduled job + */ +async function createSmartScraperJob(apiKey) { + console.log('๐Ÿ“… Creating SmartScraper scheduled job...'); + + const jobConfig = { + website_url: 'https://news.ycombinator.com', + user_prompt: 'Extract the top 5 news titles and their URLs', + render_heavy_js: false, + headers: { + 'User-Agent': 'Mozilla/5.0 (compatible; ScheduledJob/1.0)' + } + }; + + try { + const result = await createScheduledJob( + apiKey, + 'HN Top News Scraper', + 'smartscraper', + '0 */6 * * *', // Every 6 hours + jobConfig, + true + ); + + console.log(`โœ… Created SmartScraper job with ID: ${result.id}`); + return result.id; + } catch (error) { + console.error('โŒ Error creating SmartScraper job:', error.message); + throw error; + } +} + +/** + * Create a SearchScraper scheduled job + */ +async function createSearchScraperJob(apiKey) { + console.log('๐Ÿ“… Creating SearchScraper scheduled job...'); + + const jobConfig = { + user_prompt: 'Find the latest AI and machine learning news', + num_results: 5, + headers: { + 'User-Agent': 'Mozilla/5.0 (compatible; ScheduledJob/1.0)' + } + }; + + try { + const result = await createScheduledJob( + apiKey, + 'AI News Search', + 'searchscraper', + '0 9 * * 1', // Every Monday at 9 AM + jobConfig, + true + ); + + console.log(`โœ… Created SearchScraper job with ID: ${result.id}`); + return result.id; + } catch (error) { + console.error('โŒ Error creating SearchScraper job:', error.message); + throw error; + } +} + +/** + * Create a Crawl scheduled job + */ +async function createCrawlJob(apiKey) { + console.log('๐Ÿ“… Creating Crawl scheduled job...'); + + const jobConfig = { + url: 'https://example.com', + prompt: 'Extract all product information', + extraction_mode: true, + depth: 2, + max_pages: 10, + same_domain_only: true, + cache_website: true + }; + + try { + const result = await createScheduledJob( + apiKey, + 'Product Catalog Crawler', + 'crawl', + '0 2 * * *', // Daily at 2 AM + jobConfig, + true + ); + + console.log(`โœ… Created Crawl job with ID: ${result.id}`); + return result.id; + } catch (error) { + console.error('โŒ Error creating Crawl job:', error.message); + throw error; + } +} + +/** + * Manage scheduled jobs + */ +async function manageJobs(apiKey, jobIds) { + console.log('\n๐Ÿ”ง Managing scheduled jobs...'); + + try { + // List all jobs + console.log('\n๐Ÿ“‹ Listing all scheduled jobs:'); + const jobsResult = await getScheduledJobs(apiKey, { page: 1, pageSize: 10 }); + console.log(`Total jobs: ${jobsResult.total}`); + + jobsResult.jobs.forEach(job => { + console.log(` - ${job.job_name} (${job.service_type}) - Active: ${job.is_active}`); + }); + + // Get details of first job + if (jobIds.length > 0) { + console.log(`\n๐Ÿ” Getting details for job ${jobIds[0]}:`); + const jobDetails = await getScheduledJob(apiKey, jobIds[0]); + console.log(` Name: ${jobDetails.job_name}`); + console.log(` Cron: ${jobDetails.cron_expression}`); + console.log(` Next run: ${jobDetails.next_run_at || 'N/A'}`); + + // Pause the first job + console.log(`\nโธ๏ธ Pausing job ${jobIds[0]}:`); + const pauseResult = await pauseScheduledJob(apiKey, jobIds[0]); + console.log(` Status: ${pauseResult.message}`); + + // Resume the job + console.log(`\nโ–ถ๏ธ Resuming job ${jobIds[0]}:`); + const resumeResult = await resumeScheduledJob(apiKey, jobIds[0]); + console.log(` Status: ${resumeResult.message}`); + + // Update job configuration + console.log(`\n๐Ÿ“ Updating job ${jobIds[0]}:`); + const updateResult = await updateScheduledJob(apiKey, jobIds[0], { + jobName: 'Updated HN News Scraper', + cronExpression: '0 */4 * * *' // Every 4 hours instead of 6 + }); + console.log(` Updated job name: ${updateResult.job_name}`); + console.log(` Updated cron: ${updateResult.cron_expression}`); + } + } catch (error) { + console.error('โŒ Error managing jobs:', error.message); + } +} + +/** + * Trigger and monitor jobs + */ +async function triggerAndMonitorJobs(apiKey, jobIds) { + console.log('\n๐Ÿš€ Triggering and monitoring jobs...'); + + for (const jobId of jobIds) { + try { + console.log(`\n๐ŸŽฏ Manually triggering job ${jobId}:`); + const triggerResult = await triggerScheduledJob(apiKey, jobId); + const executionId = triggerResult.execution_id; + console.log(` Execution ID: ${executionId}`); + console.log(` Message: ${triggerResult.message}`); + + // Wait a bit for execution to start + await new Promise(resolve => setTimeout(resolve, 2000)); + + // Get execution history + console.log(`\n๐Ÿ“Š Getting execution history for job ${jobId}:`); + const executions = await getJobExecutions(apiKey, jobId, { page: 1, pageSize: 5 }); + console.log(` Total executions: ${executions.total}`); + + executions.executions.slice(0, 3).forEach(execution => { + console.log(` - Execution ${execution.id}: ${execution.status}`); + console.log(` Started: ${execution.started_at}`); + if (execution.completed_at) { + console.log(` Completed: ${execution.completed_at}`); + } + if (execution.credits_used) { + console.log(` Credits used: ${execution.credits_used}`); + } + }); + } catch (error) { + console.error(`โŒ Error triggering job ${jobId}:`, error.message); + } + } +} + +/** + * Clean up created jobs + */ +async function cleanupJobs(apiKey, jobIds) { + console.log('\n๐Ÿงน Cleaning up created jobs...'); + + for (const jobId of jobIds) { + try { + console.log(`๐Ÿ—‘๏ธ Deleting job ${jobId}:`); + const deleteResult = await deleteScheduledJob(apiKey, jobId); + console.log(` Status: ${deleteResult.message}`); + } catch (error) { + console.error(`โŒ Error deleting job ${jobId}:`, error.message); + } + } +} + +/** + * Main function demonstrating scheduled jobs + */ +async function main() { + const apiKey = process.env.SGAI_API_KEY || 'your-api-key-here'; + + if (apiKey === 'your-api-key-here') { + console.log('โŒ Error: SGAI_API_KEY environment variable not set'); + console.log('Please either:'); + console.log(' 1. Set environment variable: export SGAI_API_KEY="your-api-key-here"'); + console.log(' 2. Or update the apiKey variable in this script'); + return; + } + + console.log('๐Ÿš€ Starting Scheduled Jobs Demo'); + console.log('='.repeat(50)); + + const jobIds = []; + + try { + // Create different types of scheduled jobs + const smartscraperJobId = await createSmartScraperJob(apiKey); + jobIds.push(smartscraperJobId); + + const searchscraperJobId = await createSearchScraperJob(apiKey); + jobIds.push(searchscraperJobId); + + const crawlJobId = await createCrawlJob(apiKey); + jobIds.push(crawlJobId); + + // Manage jobs + await manageJobs(apiKey, jobIds); + + // Trigger and monitor jobs + await triggerAndMonitorJobs(apiKey, jobIds); + + } catch (error) { + console.error('โŒ Error during execution:', error.message); + } finally { + // Clean up + await cleanupJobs(apiKey, jobIds); + } + + console.log('\nโœ… Scheduled Jobs Demo completed!'); +} + +// Run the demo +main().catch(console.error); diff --git a/scrapegraph-js/examples/scheduled_jobs/scheduledJobs_simple_example.js b/scrapegraph-js/examples/scheduled_jobs/scheduledJobs_simple_example.js new file mode 100644 index 0000000..6b55bc7 --- /dev/null +++ b/scrapegraph-js/examples/scheduled_jobs/scheduledJobs_simple_example.js @@ -0,0 +1,289 @@ +import { + createScheduledJob, + getScheduledJobs, + getScheduledJob, + updateScheduledJob, + deleteScheduledJob, + pauseScheduledJob, + resumeScheduledJob, + triggerScheduledJob, + getJobExecutions, + enableMock, + disableMock +} from '../index.js'; + +/** + * Simple scheduled jobs example for JavaScript SDK + */ +async function simpleScheduledJobsExample() { + const apiKey = process.env.SGAI_API_KEY || 'your-api-key-here'; + + if (apiKey === 'your-api-key-here') { + console.log('โŒ Error: SGAI_API_KEY environment variable not set'); + console.log('Please either:'); + console.log(' 1. Set environment variable: export SGAI_API_KEY="your-api-key-here"'); + console.log(' 2. Or update the apiKey variable in this script'); + return; + } + + console.log('๐Ÿš€ Starting Simple Scheduled Jobs Example'); + console.log('='.repeat(50)); + + const jobIds = []; + + try { + // Create a SmartScraper job + console.log('\n๐Ÿ“… Creating SmartScraper job...'); + const smartScraperJob = await createScheduledJob( + apiKey, + 'Daily News Scraper', + 'smartscraper', + '0 9 * * *', // Daily at 9 AM + { + website_url: 'https://news.ycombinator.com', + user_prompt: 'Extract the top 5 news titles and their URLs', + render_heavy_js: false + }, + true + ); + + console.log(`โœ… Created SmartScraper job: ${smartScraperJob.id}`); + jobIds.push(smartScraperJob.id); + + // Create a SearchScraper job + console.log('\n๐Ÿ“… Creating SearchScraper job...'); + const searchScraperJob = await createScheduledJob( + apiKey, + 'Weekly AI Research', + 'searchscraper', + '0 10 * * 1', // Every Monday at 10 AM + { + user_prompt: 'Find the latest AI and machine learning research papers', + num_results: 5 + }, + true + ); + + console.log(`โœ… Created SearchScraper job: ${searchScraperJob.id}`); + jobIds.push(searchScraperJob.id); + + // List all jobs + console.log('\n๐Ÿ“‹ Listing all scheduled jobs:'); + const allJobs = await getScheduledJobs(apiKey, { page: 1, pageSize: 10 }); + console.log(`Total jobs: ${allJobs.total}`); + + allJobs.jobs.forEach(job => { + console.log(` - ${job.job_name} (${job.service_type}) - Active: ${job.is_active}`); + }); + + // Get details of first job + if (jobIds.length > 0) { + console.log(`\n๐Ÿ” Getting details for job ${jobIds[0]}:`); + const jobDetails = await getScheduledJob(apiKey, jobIds[0]); + console.log(` Name: ${jobDetails.job_name}`); + console.log(` Cron: ${jobDetails.cron_expression}`); + console.log(` Next run: ${jobDetails.next_run_at || 'N/A'}`); + + // Update the job + console.log(`\n๐Ÿ“ Updating job ${jobIds[0]}:`); + const updatedJob = await updateScheduledJob(apiKey, jobIds[0], { + jobName: 'Updated Daily News Scraper', + cronExpression: '0 8 * * *' // Change to 8 AM + }); + console.log(` Updated name: ${updatedJob.job_name}`); + console.log(` Updated cron: ${updatedJob.cron_expression}`); + + // Pause the job + console.log(`\nโธ๏ธ Pausing job ${jobIds[0]}:`); + const pauseResult = await pauseScheduledJob(apiKey, jobIds[0]); + console.log(` Status: ${pauseResult.message}`); + + // Resume the job + console.log(`\nโ–ถ๏ธ Resuming job ${jobIds[0]}:`); + const resumeResult = await resumeScheduledJob(apiKey, jobIds[0]); + console.log(` Status: ${resumeResult.message}`); + + // Trigger the job manually + console.log(`\n๐Ÿš€ Manually triggering job ${jobIds[0]}:`); + const triggerResult = await triggerScheduledJob(apiKey, jobIds[0]); + console.log(` Execution ID: ${triggerResult.execution_id}`); + console.log(` Message: ${triggerResult.message}`); + + // Get execution history + console.log(`\n๐Ÿ“Š Getting execution history for job ${jobIds[0]}:`); + const executions = await getJobExecutions(apiKey, jobIds[0], { page: 1, pageSize: 5 }); + console.log(` Total executions: ${executions.total}`); + + executions.executions.slice(0, 3).forEach(execution => { + console.log(` - Execution ${execution.id}: ${execution.status}`); + console.log(` Started: ${execution.started_at}`); + if (execution.completed_at) { + console.log(` Completed: ${execution.completed_at}`); + } + if (execution.credits_used) { + console.log(` Credits used: ${execution.credits_used}`); + } + }); + } + + } catch (error) { + console.error('โŒ Error during execution:', error.message); + } finally { + // Clean up created jobs + console.log('\n๐Ÿงน Cleaning up created jobs:'); + for (const jobId of jobIds) { + try { + const deleteResult = await deleteScheduledJob(apiKey, jobId); + console.log(` โœ… Deleted job ${jobId}: ${deleteResult.message}`); + } catch (error) { + console.error(` โŒ Failed to delete job ${jobId}:`, error.message); + } + } + } + + console.log('\nโœ… Simple Scheduled Jobs Example completed!'); +} + +/** + * Mock mode example + */ +async function mockModeExample() { + console.log('\n๐Ÿงช Mock Mode Example'); + console.log('='.repeat(30)); + + // Enable mock mode + enableMock(); + + const apiKey = 'mock-api-key'; + + try { + // Create a job in mock mode + const mockJob = await createScheduledJob( + apiKey, + 'Mock Job', + 'smartscraper', + '0 9 * * *', + { test: 'config' }, + true + ); + + console.log(`โœ… Created mock job: ${mockJob.id}`); + console.log(` Job name: ${mockJob.job_name}`); + console.log(` Service type: ${mockJob.service_type}`); + + // List jobs in mock mode + const mockJobs = await getScheduledJobs(apiKey); + console.log(`๐Ÿ“‹ Mock jobs count: ${mockJobs.total}`); + + // Trigger job in mock mode + const triggerResult = await triggerScheduledJob(apiKey, mockJob.id); + console.log(`๐Ÿš€ Mock trigger result: ${triggerResult.message}`); + + } catch (error) { + console.error('โŒ Mock mode error:', error.message); + } finally { + disableMock(); + } + + console.log('โœ… Mock Mode Example completed!'); +} + +/** + * Concurrent operations example + */ +async function concurrentOperationsExample() { + console.log('\nโšก Concurrent Operations Example'); + console.log('='.repeat(40)); + + const apiKey = process.env.SGAI_API_KEY || 'your-api-key-here'; + + if (apiKey === 'your-api-key-here') { + console.log('โŒ Error: SGAI_API_KEY environment variable not set'); + return; + } + + const jobIds = []; + + try { + // Create multiple jobs concurrently + console.log('๐Ÿ“… Creating multiple jobs concurrently...'); + + const jobPromises = [ + createScheduledJob( + apiKey, + 'Concurrent Job 1', + 'smartscraper', + '0 9 * * *', + { website_url: 'https://example1.com', user_prompt: 'Extract data' } + ), + createScheduledJob( + apiKey, + 'Concurrent Job 2', + 'searchscraper', + '0 10 * * *', + { user_prompt: 'Find information', num_results: 3 } + ), + createScheduledJob( + apiKey, + 'Concurrent Job 3', + 'smartscraper', + '0 11 * * *', + { website_url: 'https://example2.com', user_prompt: 'Monitor changes' } + ) + ]; + + const results = await Promise.all(jobPromises); + + results.forEach((result, index) => { + console.log(` โœ… Created job ${index + 1}: ${result.id}`); + jobIds.push(result.id); + }); + + // Trigger all jobs concurrently + console.log('\n๐Ÿš€ Triggering all jobs concurrently...'); + + const triggerPromises = jobIds.map(jobId => triggerScheduledJob(apiKey, jobId)); + const triggerResults = await Promise.all(triggerPromises); + + triggerResults.forEach((result, index) => { + console.log(` โœ… Triggered job ${index + 1}: ${result.execution_id}`); + }); + + // Get execution history for all jobs concurrently + console.log('\n๐Ÿ“Š Getting execution history for all jobs...'); + + const executionPromises = jobIds.map(jobId => getJobExecutions(apiKey, jobId)); + const executionResults = await Promise.all(executionPromises); + + executionResults.forEach((result, index) => { + console.log(` ๐Ÿ“ˆ Job ${index + 1} executions: ${result.total}`); + }); + + } catch (error) { + console.error('โŒ Concurrent operations error:', error.message); + } finally { + // Clean up all jobs + console.log('\n๐Ÿงน Cleaning up all jobs...'); + const deletePromises = jobIds.map(jobId => deleteScheduledJob(apiKey, jobId)); + await Promise.allSettled(deletePromises); + console.log('โœ… Cleanup completed'); + } + + console.log('โœ… Concurrent Operations Example completed!'); +} + +/** + * Main function to run all examples + */ +async function main() { + try { + await simpleScheduledJobsExample(); + await mockModeExample(); + await concurrentOperationsExample(); + } catch (error) { + console.error('โŒ Main execution error:', error.message); + } +} + +// Run the examples +main().catch(console.error); diff --git a/scrapegraph-js/examples/schema_generation_example.js b/scrapegraph-js/examples/schema_generation_example.js new file mode 100644 index 0000000..ddf8dd5 --- /dev/null +++ b/scrapegraph-js/examples/schema_generation_example.js @@ -0,0 +1,293 @@ +#!/usr/bin/env node +/** + * Example script demonstrating the Generate Schema API endpoint using ScrapeGraph JavaScript SDK. + * + * This script shows how to: + * 1. Generate a new JSON schema from a search query + * 2. Modify an existing schema + * 3. Handle different types of search queries + * 4. Check the status of schema generation requests + * 5. Poll for completion with progress tracking + * + * Requirements: + * - Node.js 16+ + * - scrapegraph-js package + * - SGAI_API_KEY environment variable + * + * Usage: + * SGAI_API_KEY=your_api_key node schema_generation_example.js + */ + +import { generateSchema, getSchemaStatus, pollSchemaGeneration } from '../index.js'; + +class GenerateSchemaExample { + constructor(apiKey, baseUrl = null) { + this.apiKey = apiKey; + this.baseUrl = baseUrl; + + if (!this.apiKey) { + throw new Error( + 'API key must be provided. Set SGAI_API_KEY environment variable or pass it to the constructor.' + ); + } + } + + printSchemaResponse(response, title = 'Schema Generation Response') { + console.log(`\n${'='.repeat(60)}`); + console.log(` ${title}`); + console.log(`${'='.repeat(60)}`); + + if (response.error) { + console.log(`โŒ Error: ${response.error}`); + return; + } + + console.log(`โœ… Request ID: ${response.request_id || 'N/A'}`); + console.log(`๐Ÿ“Š Status: ${response.status || 'N/A'}`); + console.log(`๐Ÿ” User Prompt: ${response.user_prompt || 'N/A'}`); + console.log(`โœจ Refined Prompt: ${response.refined_prompt || 'N/A'}`); + + if (response.generated_schema) { + console.log(`\n๐Ÿ“‹ Generated Schema:`); + console.log(JSON.stringify(response.generated_schema, null, 2)); + } + } + + async runExamples() { + console.log('๐Ÿš€ Generate Schema API Examples using ScrapeGraph JavaScript SDK'); + console.log('='.repeat(60)); + + // Example 1: Generate schema for e-commerce products + console.log('\n1๏ธโƒฃ Example: E-commerce Product Search'); + const ecommercePrompt = 'Find laptops with specifications like brand, processor, RAM, storage, and price'; + try { + const response = await generateSchema(ecommercePrompt, null, { + apiKey: this.apiKey, + baseUrl: this.baseUrl + }); + this.printSchemaResponse(response, 'E-commerce Products Schema'); + } catch (error) { + console.log(`โŒ Error in e-commerce example: ${error.message}`); + } + + // Example 2: Generate schema for job listings + console.log('\n2๏ธโƒฃ Example: Job Listings Search'); + const jobPrompt = 'Search for software engineering jobs with company name, position, location, salary range, and requirements'; + try { + const response = await generateSchema(jobPrompt, null, { + apiKey: this.apiKey, + baseUrl: this.baseUrl + }); + this.printSchemaResponse(response, 'Job Listings Schema'); + } catch (error) { + console.log(`โŒ Error in job listings example: ${error.message}`); + } + + // Example 3: Generate schema for news articles + console.log('\n3๏ธโƒฃ Example: News Articles Search'); + const newsPrompt = 'Find technology news articles with headline, author, publication date, category, and summary'; + try { + const response = await generateSchema(newsPrompt, null, { + apiKey: this.apiKey, + baseUrl: this.baseUrl + }); + this.printSchemaResponse(response, 'News Articles Schema'); + } catch (error) { + console.log(`โŒ Error in news articles example: ${error.message}`); + } + + // Example 4: Modify existing schema + console.log('\n4๏ธโƒฃ Example: Modify Existing Schema'); + const existingSchema = { + $defs: { + ProductSchema: { + title: 'ProductSchema', + type: 'object', + properties: { + name: { title: 'Name', type: 'string' }, + price: { title: 'Price', type: 'number' } + }, + required: ['name', 'price'] + } + }, + title: 'ProductList', + type: 'object', + properties: { + products: { + title: 'Products', + type: 'array', + items: { $ref: '#/$defs/ProductSchema' } + } + }, + required: ['products'] + }; + + const modificationPrompt = 'Add brand, category, and rating fields to the existing product schema'; + try { + const response = await generateSchema(modificationPrompt, existingSchema, { + apiKey: this.apiKey, + baseUrl: this.baseUrl + }); + this.printSchemaResponse(response, 'Modified Product Schema'); + } catch (error) { + console.log(`โŒ Error in schema modification example: ${error.message}`); + } + + // Example 5: Complex nested schema + console.log('\n5๏ธโƒฃ Example: Complex Nested Schema'); + const complexPrompt = 'Create a schema for a company directory with departments, each containing employees with contact info and projects'; + try { + const response = await generateSchema(complexPrompt, null, { + apiKey: this.apiKey, + baseUrl: this.baseUrl + }); + this.printSchemaResponse(response, 'Company Directory Schema'); + } catch (error) { + console.log(`โŒ Error in complex schema example: ${error.message}`); + } + } + + async demonstrateStatusChecking() { + console.log('\n๐Ÿ”„ Demonstrating Status Checking...'); + + // Generate a simple schema first + const prompt = 'Find restaurants with name, cuisine, rating, and address'; + try { + const response = await generateSchema(prompt, null, { + apiKey: this.apiKey, + baseUrl: this.baseUrl + }); + + const requestId = response.request_id; + + if (requestId) { + console.log(`๐Ÿ“ Generated schema request with ID: ${requestId}`); + + // Check the status + console.log('๐Ÿ” Checking status...'); + const statusResponse = await getSchemaStatus(requestId, { + apiKey: this.apiKey, + baseUrl: this.baseUrl + }); + this.printSchemaResponse(statusResponse, `Status Check for ${requestId}`); + } else { + console.log('โš ๏ธ No request ID returned from schema generation'); + } + + } catch (error) { + console.log(`โŒ Error in status checking demonstration: ${error.message}`); + } + } + + async demonstratePolling() { + console.log('\n๐Ÿ”„ Demonstrating Polling with Progress Tracking...'); + + const prompt = 'Find movies with title, director, cast, rating, and release date'; + try { + const response = await generateSchema(prompt, null, { + apiKey: this.apiKey, + baseUrl: this.baseUrl + }); + + const requestId = response.request_id; + + if (requestId) { + console.log(`๐Ÿ“ Generated schema request with ID: ${requestId}`); + console.log('๐Ÿ”„ Polling for completion with progress tracking...'); + + const finalResult = await pollSchemaGeneration(requestId, { + apiKey: this.apiKey, + baseUrl: this.baseUrl, + maxAttempts: 15, + delay: 3000, + onProgress: ({ attempt, maxAttempts, status, response }) => { + if (status === 'checking') { + console.log(`๐Ÿ” Attempt ${attempt}/${maxAttempts}: Checking status...`); + } else { + console.log(`๐Ÿ“Š Attempt ${attempt}/${maxAttempts}: Status = ${status}`); + if (response && response.refined_prompt) { + console.log(` Refined prompt: ${response.refined_prompt}`); + } + } + } + }); + + console.log('โœ… Polling completed successfully!'); + this.printSchemaResponse(finalResult, 'Final Result from Polling'); + + } else { + console.log('โš ๏ธ No request ID returned from schema generation'); + } + + } catch (error) { + console.log(`โŒ Error in polling demonstration: ${error.message}`); + } + } + + async runConcurrentExamples() { + console.log('\n๐Ÿ”„ Running Concurrent Examples...'); + + const prompts = [ + 'Find restaurants with name, cuisine, rating, and address', + 'Search for books with title, author, genre, and publication year', + 'Find movies with title, director, cast, rating, and release date' + ]; + + try { + const tasks = prompts.map(prompt => + generateSchema(prompt, null, { + apiKey: this.apiKey, + baseUrl: this.baseUrl + }) + ); + + const results = await Promise.all(tasks); + + for (let i = 0; i < prompts.length; i++) { + const prompt = prompts[i]; + const result = results[i]; + this.printSchemaResponse(result, `Concurrent Example ${i + 1}: ${prompt.substring(0, 30)}...`); + } + + } catch (error) { + console.log(`โŒ Error in concurrent examples: ${error.message}`); + } + } +} + +async function main() { + // Check if API key is available + const apiKey = process.env.SGAI_API_KEY; + if (!apiKey) { + console.log('Error: SGAI_API_KEY not found in environment variables'); + console.log('Please set your API key:'); + console.log('export SGAI_API_KEY=your_api_key_here'); + console.log('Or run: SGAI_API_KEY=your_api_key node schema_generation_example.js'); + return; + } + + // Initialize the example class + const example = new GenerateSchemaExample(apiKey); + + try { + // Run synchronous examples + await example.runExamples(); + + // Demonstrate status checking + await example.demonstrateStatusChecking(); + + // Demonstrate polling with progress tracking + await example.demonstratePolling(); + + // Run concurrent examples + await example.runConcurrentExamples(); + + } catch (error) { + console.log(`โŒ Unexpected Error: ${error.message}`); + } +} + +// Run the examples if this file is executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + main().catch(console.error); +} diff --git a/scrapegraph-js/examples/searchscraper/schema_searchScraper_example.js b/scrapegraph-js/examples/searchscraper/schema_searchScraper_example.js new file mode 100644 index 0000000..8aa7856 --- /dev/null +++ b/scrapegraph-js/examples/searchscraper/schema_searchScraper_example.js @@ -0,0 +1,44 @@ +/** + * Schema-based SearchScraper Example + * + * This example demonstrates both schema-based output and configurable website limits: + * - Default: 3 websites (30 credits) + * - Enhanced: 5 websites (50 credits) - provides more comprehensive data for schema + * - Maximum: 20 websites (200 credits) - for highly detailed schema population + */ + +import { searchScraper } from 'scrapegraph-js'; +import { z } from 'zod'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const prompt = 'What is the latest version of Python and what are its main features?'; + +const schema = z.object({ + version: z.string().describe('The latest version'), + release_date: z.string().describe('The release date of latest version'), + major_features: z.array(z.string()), +}); + +// Configure number of websites for better schema population +const numResults = 5; // Enhanced search for better schema data (50 credits) + +try { + console.log(`๐Ÿ” Searching ${numResults} websites with custom schema`); + console.log(`๐Ÿ’ณ Credits required: ${numResults <= 3 ? 30 : 30 + (numResults - 3) * 10}`); + console.log('-'.repeat(60)); + + const response = await searchScraper(apiKey, prompt, numResults, schema); + + console.log('โœ… Schema-based search completed successfully!'); + console.log('\n๐Ÿ“‹ STRUCTURED RESULT:'); + console.log(JSON.stringify(response.result, null, 2)); + + console.log('\n๐Ÿ”— Reference URLs:'); + response.reference_urls?.forEach((url, index) => { + console.log(`${index + 1}. ${url}`); + }); + +} catch (error) { + console.error('โŒ Error:', error.message); +} diff --git a/scrapegraph-js/examples/searchscraper/searchScraper_enhanced_example.js b/scrapegraph-js/examples/searchscraper/searchScraper_enhanced_example.js new file mode 100644 index 0000000..67016e1 --- /dev/null +++ b/scrapegraph-js/examples/searchscraper/searchScraper_enhanced_example.js @@ -0,0 +1,333 @@ +/** + * Enhanced SearchScraper Example + * + * This example demonstrates the SearchScraper API with configurable website limits. + * Issue #144 enhancement allows users to search up to 20 websites (increased from the previous limit of 3) + * with a dynamic credit pricing system. + * + * Key Features: + * - Configurable website limits (3-20 websites) + * - Dynamic credit pricing: 30 credits base + 10 credits per additional website + * - Enhanced research depth and accuracy + * - Backward compatibility with existing applications + * + * Cost Structure: + * - Base cost: 30 credits for 3 websites (default) + * - Additional websites: 10 credits each (e.g., 5 websites = 30 + 2*10 = 50 credits) + * - Maximum websites: 20 (total cost: 30 + 17*10 = 200 credits) + * + * Requirements: + * - Node.js + * - scrapegraph-js package + * - dotenv package + * - A .env file with your SGAI_APIKEY + * + * Example .env file: + * SGAI_APIKEY=your_api_key_here + */ + +import { searchScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +/** + * Calculate the required credits for a SearchScraper request. + * @param {number} numWebsites - Number of websites to scrape (3-20) + * @returns {number} Total credits required + */ +function calculateCredits(numWebsites) { + // Validate website count + const validatedCount = Math.max(3, Math.min(20, numWebsites)); + + // Calculate credits: 30 base + 10 per extra website + if (validatedCount <= 3) { + return 30; + } else { + const extraWebsites = validatedCount - 3; + return 30 + (extraWebsites * 10); + } +} + +/** + * Query the Enhanced SearchScraper API for search results. + * @param {string} userPrompt - The search prompt string + * @param {number} numResults - Number of websites to scrape (3-20). Default is 3. + * @returns {Promise} The search results with metadata + */ +async function searchScraperQuery(userPrompt, numResults = 3) { + const apiKey = process.env.SGAI_APIKEY; + + if (!apiKey) { + throw new Error('SGAI_APIKEY not found in environment variables. Please create a .env file with: SGAI_APIKEY=your_api_key_here'); + } + + // Validate and calculate credits + const validatedWebsites = Math.max(3, Math.min(20, numResults)); + const requiredCredits = calculateCredits(validatedWebsites); + + console.log(`๐Ÿ” Search Prompt: ${userPrompt}`); + console.log(`๐ŸŒ Requested websites: ${numResults} โ†’ Validated: ${validatedWebsites}`); + console.log(`๐Ÿ’ณ Required credits: ${requiredCredits}`); + console.log('-'.repeat(60)); + + const startTime = Date.now(); + + try { + const response = await searchScraper(apiKey, userPrompt, numResults); + const executionTime = (Date.now() - startTime) / 1000; + + console.log(`โฑ๏ธ Execution time: ${executionTime.toFixed(2)} seconds`); + + // Extract result data + const resultData = { + result: response.result || '', + references: response.reference_urls || [], + metadata: { + request_id: response.request_id, + num_results: validatedWebsites, + execution_time: executionTime, + required_credits: requiredCredits, + }, + }; + + console.log(`โœ… Found ${resultData.references.length} reference sources`); + console.log(`๐Ÿ“Š Credits used: ${requiredCredits}`); + + return resultData; + + } catch (error) { + const executionTime = (Date.now() - startTime) / 1000; + console.log(`โฑ๏ธ Execution time: ${executionTime.toFixed(2)} seconds`); + console.log(`โŒ Error: ${error.message}`); + throw error; + } +} + +/** + * Demonstrate the benefits of different website scaling options. + */ +function demonstrateScalingBenefits() { + console.log('๐Ÿ’ฐ SEARCHSCRAPER CREDIT SCALING'); + console.log('='.repeat(50)); + + const scalingExamples = [ + [3, 'Standard Search (Default)'], + [5, 'Enhanced Search (More Sources)'], + [10, 'Comprehensive Search (Deep Research)'], + [15, 'Extensive Search (Maximum Coverage)'], + [20, 'Ultimate Search (Complete Coverage)'], + ]; + + scalingExamples.forEach(([websites, description]) => { + const credits = calculateCredits(websites); + const extraWebsites = Math.max(0, websites - 3); + const efficiency = websites / credits; + + console.log(`๐ŸŒ ${websites.toString().padStart(2)} websites (${description})`); + console.log(` ๐Ÿ’ณ ${credits.toString().padStart(3)} credits (base: 30 + ${extraWebsites} ร— 10)`); + console.log(` ๐Ÿ“Š Efficiency: ${efficiency.toFixed(3)} websites/credit`); + console.log(); + }); +} + +/** + * Run the same query with different website limits to show the benefit. + */ +async function runComparisonExample() { + const query = 'Latest advancements in artificial intelligence 2024'; + + console.log('๐Ÿ”ฌ COMPARISON: STANDARD vs ENHANCED SEARCH'); + console.log('='.repeat(60)); + console.log(`Query: ${query}`); + console.log(); + + // Test different configurations + const configurations = [ + { websites: 3, description: 'Standard Search' }, + { websites: 7, description: 'Enhanced Search' }, + ]; + + const results = {}; + + for (const config of configurations) { + const { websites, description } = config; + + console.log(`๐Ÿš€ Running ${description} (${websites} websites)...`); + try { + const result = await searchScraperQuery(query, websites); + results[websites] = result; + console.log(`โœ… ${description} completed successfully`); + console.log(` ๐Ÿ“„ Result length: ${result.result.length} characters`); + console.log(` ๐Ÿ”— References: ${result.references.length} sources`); + console.log(); + } catch (error) { + console.log(`โŒ ${description} failed: ${error.message}`); + console.log(); + } + } + + // Show comparison summary + const resultKeys = Object.keys(results); + if (resultKeys.length > 1) { + console.log('๐Ÿ“Š COMPARISON SUMMARY'); + console.log('-'.repeat(40)); + resultKeys.forEach(websites => { + const result = results[websites]; + const metadata = result.metadata; + console.log( + `๐ŸŒ ${websites} websites: ${result.references.length} sources, ` + + `${metadata.required_credits} credits, ` + + `${metadata.execution_time.toFixed(1)}s` + ); + }); + } +} + +/** + * Run concurrent searches to demonstrate parallel processing + */ +async function runConcurrentExample() { + console.log('๐Ÿš€ CONCURRENT REQUESTS EXAMPLE'); + console.log('='.repeat(50)); + + // Define multiple queries with different website limits + const queries = [ + ['JavaScript best practices 2024', 3], + ['React vs Vue comparison', 5], + ['Node.js performance optimization', 4], + ]; + + console.log('๐Ÿ”„ Running concurrent searches...'); + const startTime = Date.now(); + + try { + // Create promises for concurrent execution + const promises = queries.map(([query, numResults]) => + searchScraperQuery(query, numResults) + ); + + // Wait for all requests to complete + const results = await Promise.allSettled(promises); + const totalTime = (Date.now() - startTime) / 1000; + + console.log(`โฑ๏ธ Total concurrent execution time: ${totalTime.toFixed(2)} seconds`); + console.log(); + + const successfulResults = results.filter(r => r.status === 'fulfilled').map(r => r.value); + const failedResults = results.filter(r => r.status === 'rejected'); + + console.log(`โœ… Successful requests: ${successfulResults.length}`); + console.log(`โŒ Failed requests: ${failedResults.length}`); + + if (successfulResults.length > 0) { + const totalCredits = successfulResults.reduce((sum, r) => sum + r.metadata.required_credits, 0); + const totalSources = successfulResults.reduce((sum, r) => sum + r.references.length, 0); + console.log(`๐Ÿ’ณ Total credits used: ${totalCredits}`); + console.log(`๐Ÿ”— Total sources gathered: ${totalSources}`); + } + + if (failedResults.length > 0) { + console.log('\nโŒ Failed requests:'); + failedResults.forEach((result, index) => { + console.log(` ${index + 1}. ${result.reason.message}`); + }); + } + + } catch (error) { + console.log(`โŒ Concurrent execution failed: ${error.message}`); + } + + console.log(); +} + +/** + * Main function demonstrating enhanced SearchScraper features. + */ +async function main() { + console.log('๐Ÿš€ ENHANCED SEARCHSCRAPER DEMONSTRATION'); + console.log('๐Ÿ”— Issue #144: SearchScraper Website Limit Enhancement'); + console.log('='.repeat(70)); + console.log(); + + // Check API key + const apiKey = process.env.SGAI_APIKEY; + if (!apiKey) { + console.log('โŒ Error: SGAI_APIKEY not found in .env file'); + console.log('Please create a .env file with your API key:'); + console.log('SGAI_APIKEY=your_api_key_here'); + console.log(); + console.log('๐Ÿ“– Showing credit scaling demonstration without API calls...'); + console.log(); + demonstrateScalingBenefits(); + return; + } + + try { + // 1. Show credit scaling + demonstrateScalingBenefits(); + + // 2. Run basic example + console.log('๐ŸŽฏ BASIC EXAMPLE'); + console.log('='.repeat(30)); + + const userPrompt = 'What are the latest trends in machine learning?'; + const numResults = 5; // Enhanced search with 5 websites + + try { + const results = await searchScraperQuery(userPrompt, numResults); + + console.log(); + console.log('๐Ÿ“‹ RESULTS SUMMARY:'); + console.log(` ๐Ÿ” Query: ${userPrompt}`); + console.log(` ๐ŸŒ Websites scraped: ${results.metadata.num_results}`); + console.log(` ๐Ÿ’ณ Credits used: ${results.metadata.required_credits}`); + console.log(` โฑ๏ธ Execution time: ${results.metadata.execution_time.toFixed(1)}s`); + console.log(` ๐Ÿ”— Reference sources: ${results.references.length}`); + console.log(); + + // Show a portion of the result + const resultText = results.result; + if (resultText.length > 300) { + console.log(`๐Ÿ“„ Result preview: ${resultText.substring(0, 300)}...`); + } else { + console.log(`๐Ÿ“„ Result: ${resultText}`); + } + console.log(); + + // Show references + console.log('๐Ÿ”— REFERENCE SOURCES:'); + results.references.slice(0, 5).forEach((ref, i) => { + console.log(` ${i + 1}. ${ref}`); + }); + if (results.references.length > 5) { + console.log(` ... and ${results.references.length - 5} more sources`); + } + console.log(); + + } catch (error) { + console.log(`โŒ Error: ${error.message}`); + console.log(); + } + + // 3. Run comparison example + await runComparisonExample(); + + // 4. Run concurrent example + await runConcurrentExample(); + + console.log('โœจ Enhanced SearchScraper demonstration completed!'); + console.log(); + console.log('๐ŸŽฏ Key Enhancement Benefits:'); + console.log(' โ€ข Configurable website limits (3-20)'); + console.log(' โ€ข Transparent credit pricing'); + console.log(' โ€ข Better research depth and accuracy'); + console.log(' โ€ข Maintained backward compatibility'); + console.log(' โ€ข Enhanced data validation through multiple sources'); + console.log(' โ€ข Concurrent request support for better performance'); + + } catch (error) { + console.log(`โŒ Unexpected error: ${error.message}`); + } +} + +// Run the demonstration +main().catch(console.error); diff --git a/scrapegraph-js/examples/searchscraper/searchScraper_example.js b/scrapegraph-js/examples/searchscraper/searchScraper_example.js new file mode 100644 index 0000000..34f528a --- /dev/null +++ b/scrapegraph-js/examples/searchscraper/searchScraper_example.js @@ -0,0 +1,38 @@ +/** + * Basic SearchScraper Example + * + * This example demonstrates the configurable website limits feature: + * - Default: 3 websites (30 credits) + * - Enhanced: 5 websites (50 credits) - uncomment to try + * - Maximum: 20 websites (200 credits) - for comprehensive research + */ + +import { searchScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const prompt = 'What is the latest version of Python and what are its main features?'; + +// Configure the number of websites to search +const numResults = 3; // Default: 3 websites (30 credits) +// const numResults = 5; // Enhanced: 5 websites (50 credits) - uncomment for more comprehensive results +// const numResults = 10; // Deep research: 10 websites (100 credits) - uncomment for extensive research + +try { + console.log(`๐Ÿ” Searching ${numResults} websites for: ${prompt}`); + console.log(`๐Ÿ’ณ Credits required: ${numResults <= 3 ? 30 : 30 + (numResults - 3) * 10}`); + console.log('-'.repeat(60)); + + const response = await searchScraper(apiKey, prompt, numResults); + + console.log('โœ… Search completed successfully!'); + console.log('\n๐Ÿ“‹ RESULTS:'); + console.log(`Result: ${response.result}`); + console.log('\n๐Ÿ”— Reference URLs:'); + response.reference_urls?.forEach((url, index) => { + console.log(`${index + 1}. ${url}`); + }); + +} catch (error) { + console.error('โŒ Error:', error.message); +} diff --git a/scrapegraph-js/examples/searchscraper/searchScraper_markdown_example.js b/scrapegraph-js/examples/searchscraper/searchScraper_markdown_example.js new file mode 100644 index 0000000..4acc65b --- /dev/null +++ b/scrapegraph-js/examples/searchscraper/searchScraper_markdown_example.js @@ -0,0 +1,93 @@ +/** + * Basic SearchScraper Markdown Example + * + * This example demonstrates the simplest way to use the SearchScraper API + * in markdown mode to search and scrape web pages, returning raw markdown content + * instead of AI-extracted data. + * + * Features demonstrated: + * - Basic search and scrape with markdown output + * - Simple error handling + * - Minimal code approach + * - Cost-effective: Only 2 credits per page (vs 10 credits for AI extraction) + */ + +import { searchScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; + +async function basicSearchScraperMarkdownExample() { + console.log('๐Ÿ” Basic SearchScraper Markdown Example'); + console.log('='.repeat(50)); + + // Configuration + const userPrompt = 'Latest developments in artificial intelligence'; + const numResults = 3; + + console.log(`๐Ÿ“ Query: ${userPrompt}`); + console.log(`๐Ÿ“Š Results: ${numResults} websites`); + console.log('๐Ÿ”ง Mode: Markdown conversion'); + console.log('๐Ÿ’ฐ Cost: 2 credits per page (vs 10 for AI extraction)'); + + try { + // Send a searchscraper request in markdown mode + const response = await searchScraper( + apiKey, + userPrompt, + numResults, + null, // schema + null, // userAgent + { + extractionMode: false, // false = markdown mode, true = AI extraction mode + } + ); + + console.log('\nโœ… SearchScraper markdown completed successfully!'); + console.log(`๐Ÿ“„ Request ID: ${response.request_id || 'N/A'}`); + + // For async requests, you would need to poll for results + if (response.request_id && !response.status) { + console.log('๐Ÿ“ This is an async request. Use getSearchScraperRequest() to retrieve results.'); + console.log(`๐Ÿ” Use: getSearchScraperRequest('${response.request_id}')`); + } else { + // If it's a sync response, display the results + if (response.markdown_content) { + const markdownContent = response.markdown_content; + console.log('\n๐Ÿ“ Markdown Content Preview:'); + console.log(markdownContent.length > 500 + ? markdownContent.substring(0, 500) + '...' + : markdownContent + ); + } else { + console.log('โš ๏ธ No markdown content returned'); + } + + if (response.reference_urls) { + console.log(`\n๐Ÿ”— References: ${response.reference_urls.length}`); + console.log('\n๐Ÿ”— Reference URLs:'); + response.reference_urls.forEach((url, index) => { + console.log(` ${index + 1}. ${url}`); + }); + } else { + console.log('โš ๏ธ No reference URLs returned'); + } + } + + return true; + + } catch (error) { + console.error(`โŒ Error: ${error.message}`); + return false; + } +} + +// Run the example +try { + const success = await basicSearchScraperMarkdownExample(); + process.exit(success ? 0 : 1); +} catch (error) { + console.error('โŒ Unexpected error:', error.message); + process.exit(1); +} + diff --git a/scrapegraph-js/examples/searchscraper/searchScraper_markdown_polling_example.js b/scrapegraph-js/examples/searchscraper/searchScraper_markdown_polling_example.js new file mode 100644 index 0000000..4cce896 --- /dev/null +++ b/scrapegraph-js/examples/searchscraper/searchScraper_markdown_polling_example.js @@ -0,0 +1,145 @@ +/** + * Advanced SearchScraper Markdown Example with Async Polling + * + * This example demonstrates using the SearchScraper API in markdown mode + * with async request handling and result polling. + * + * Features demonstrated: + * - Async search and scrape with markdown output + * - Polling for async results with timeout handling + * - Error handling with async operations + * - Cost-effective: Only 2 credits per page (vs 10 credits for AI extraction) + */ + +import { searchScraper, getSearchScraperRequest } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; + +/** + * Poll for completion of an async SearchScraper request. + * + * @param {string} requestId - The request ID to poll for + * @param {number} maxWaitTime - Maximum time to wait in seconds + * @returns {Promise} The completed response or null if timeout + */ +async function waitForCompletion(requestId, maxWaitTime = 60) { + const startTime = Date.now(); + + while (Date.now() - startTime < maxWaitTime * 1000) { + try { + const result = await getSearchScraperRequest(apiKey, requestId); + + if (result.status === 'completed') { + return result; + } else if (result.status === 'failed') { + console.error(`โŒ Request failed: ${result.error || 'Unknown error'}`); + return null; + } else { + console.log(`โณ Status: ${result.status || 'processing'}... waiting 5 seconds`); + await new Promise(resolve => setTimeout(resolve, 5000)); + } + + } catch (error) { + console.warn(`โš ๏ธ Error polling for results: ${error.message}`); + await new Promise(resolve => setTimeout(resolve, 5000)); + } + } + + console.log('โฐ Timeout waiting for completion'); + return null; +} + +async function advancedSearchScraperMarkdownExample() { + console.log('๐Ÿ” Advanced SearchScraper Markdown Example with Async Polling'); + console.log('='.repeat(60)); + + // Configuration + const userPrompt = 'Latest developments in artificial intelligence'; + const numResults = 3; + + console.log(`๐Ÿ“ Query: ${userPrompt}`); + console.log(`๐Ÿ“Š Results: ${numResults} websites`); + console.log('๐Ÿ”ง Mode: Markdown conversion'); + console.log('๐Ÿ’ฐ Cost: 2 credits per page (vs 10 for AI extraction)'); + + try { + // Send a searchscraper request in markdown mode + const response = await searchScraper( + apiKey, + userPrompt, + numResults, + null, // schema + null, // userAgent + { + extractionMode: false, // false = markdown mode, true = AI extraction mode + } + ); + + console.log('\nโœ… SearchScraper request submitted successfully!'); + console.log(`๐Ÿ“„ Request ID: ${response.request_id || 'N/A'}`); + + // Check if this is an async request that needs polling + if (response.request_id && !response.status) { + console.log('โณ Waiting for async processing to complete...'); + + // Poll for completion + const finalResult = await waitForCompletion(response.request_id); + + if (finalResult) { + // Update response with final results + Object.assign(response, finalResult); + } else { + console.error('โŒ Failed to get completed results'); + return false; + } + } + + // Display results + if (response.status === 'completed') { + console.log('\n๐ŸŽ‰ SearchScraper markdown completed successfully!'); + + // Display markdown content (first 500 chars) + if (response.markdown_content) { + const markdownContent = response.markdown_content; + console.log('\n๐Ÿ“ Markdown Content Preview:'); + console.log(markdownContent.length > 500 + ? markdownContent.substring(0, 500) + '...' + : markdownContent + ); + } else { + console.log('โš ๏ธ No markdown content returned'); + } + + // Display reference URLs + if (response.reference_urls && response.reference_urls.length > 0) { + console.log(`\n๐Ÿ”— References: ${response.reference_urls.length}`); + console.log('\n๐Ÿ”— Reference URLs:'); + response.reference_urls.forEach((url, index) => { + console.log(` ${index + 1}. ${url}`); + }); + } else { + console.log('โš ๏ธ No reference URLs returned'); + } + + return true; + } else { + console.error(`โŒ Request not completed. Status: ${response.status || 'unknown'}`); + return false; + } + + } catch (error) { + console.error(`โŒ Error: ${error.message}`); + return false; + } +} + +// Run the example +try { + const success = await advancedSearchScraperMarkdownExample(); + process.exit(success ? 0 : 1); +} catch (error) { + console.error('โŒ Unexpected error:', error.message); + process.exit(1); +} + diff --git a/scrapegraph-js/examples/sitemap/README.md b/scrapegraph-js/examples/sitemap/README.md new file mode 100644 index 0000000..472f53e --- /dev/null +++ b/scrapegraph-js/examples/sitemap/README.md @@ -0,0 +1,128 @@ +# Sitemap Examples + +This directory contains examples demonstrating how to use the `sitemap` endpoint to extract URLs from website sitemaps. + +## ๐Ÿ“ Examples + +### 1. Basic Sitemap Extraction (`sitemap_example.js`) + +Demonstrates the basic usage of the sitemap endpoint: +- Extract all URLs from a website's sitemap +- Display the URLs +- Save URLs to a text file +- Save complete response as JSON + +**Usage:** +```bash +node sitemap_example.js +``` + +**What it does:** +1. Calls the sitemap API with a target website URL +2. Retrieves all URLs from the sitemap +3. Displays the first 10 URLs in the console +4. Saves all URLs to `sitemap_urls.txt` +5. Saves the full response to `sitemap_urls.json` + +### 2. Advanced: Sitemap + SmartScraper (`sitemap_with_smartscraper.js`) + +Shows how to combine sitemap extraction with smartScraper for batch processing: +- Extract sitemap URLs +- Filter URLs based on patterns (e.g., blog posts) +- Scrape selected URLs with smartScraper +- Display results and summary + +**Usage:** +```bash +node sitemap_with_smartscraper.js +``` + +**What it does:** +1. Extracts all URLs from a website's sitemap +2. Filters URLs (example: only blog posts or specific sections) +3. Scrapes each filtered URL using smartScraper +4. Extracts structured data from each page +5. Displays a summary of successful and failed scrapes + +**Use Cases:** +- Bulk content extraction from blogs +- E-commerce product catalog scraping +- News article aggregation +- Content migration and archival + +## ๐Ÿ”‘ Setup + +Before running the examples, make sure you have: + +1. **API Key**: Set your ScrapeGraph AI API key as an environment variable: + ```bash + export SGAI_APIKEY="your-api-key-here" + ``` + + Or create a `.env` file in the project root: + ``` + SGAI_APIKEY=your-api-key-here + ``` + +2. **Dependencies**: Install required packages: + ```bash + npm install + ``` + +## ๐Ÿ“Š Expected Output + +### Basic Sitemap Example Output: +``` +๐Ÿ—บ๏ธ Extracting sitemap from: https://example.com/ +โณ Please wait... + +โœ… Sitemap extracted successfully! +๐Ÿ“Š Total URLs found: 150 + +๐Ÿ“„ First 10 URLs: + 1. https://example.com/ + 2. https://example.com/about + 3. https://example.com/products + ... + +๐Ÿ’พ URLs saved to: sitemap_urls.txt +๐Ÿ’พ JSON saved to: sitemap_urls.json +``` + +### Advanced Example Output: +``` +๐Ÿ—บ๏ธ Step 1: Extracting sitemap from: https://example.com/ +โณ Please wait... + +โœ… Sitemap extracted successfully! +๐Ÿ“Š Total URLs found: 150 + +๐ŸŽฏ Selected 3 URLs to scrape: + 1. https://example.com/blog/post-1 + 2. https://example.com/blog/post-2 + 3. https://example.com/blog/post-3 + +๐Ÿค– Step 2: Scraping selected URLs... + +๐Ÿ“„ Scraping (1/3): https://example.com/blog/post-1 + โœ… Success +... + +๐Ÿ“ˆ Summary: + โœ… Successful: 3 + โŒ Failed: 0 + ๐Ÿ“Š Total: 3 +``` + +## ๐Ÿ’ก Tips + +1. **Rate Limiting**: When scraping multiple URLs, add delays between requests to avoid rate limiting +2. **Error Handling**: Always use try/catch blocks to handle API errors gracefully +3. **Filtering**: Use URL patterns to filter specific sections (e.g., `/blog/`, `/products/`) +4. **Batch Size**: Start with a small batch to test before processing hundreds of URLs + +## ๐Ÿ”— Related Documentation + +- [Sitemap API Documentation](../../README.md#sitemap) +- [SmartScraper Documentation](../../README.md#smart-scraper) +- [ScrapeGraph AI API Docs](https://docs.scrapegraphai.com) diff --git a/scrapegraph-js/examples/sitemap/sitemap_example.js b/scrapegraph-js/examples/sitemap/sitemap_example.js new file mode 100644 index 0000000..99b84b1 --- /dev/null +++ b/scrapegraph-js/examples/sitemap/sitemap_example.js @@ -0,0 +1,72 @@ +import { sitemap } from 'scrapegraph-js'; +import fs from 'fs'; +import 'dotenv/config'; + +/** + * Example: Extract sitemap URLs from a website + * + * This example demonstrates how to use the sitemap endpoint to extract + * all URLs from a website's sitemap.xml file. + */ + +// Get API key from environment variable +const apiKey = process.env.SGAI_APIKEY; + +// Target website URL +const url = 'https://scrapegraphai.com/'; + +console.log('๐Ÿ—บ๏ธ Extracting sitemap from:', url); +console.log('โณ Please wait...\n'); + +try { + // Call the sitemap endpoint + const response = await sitemap(apiKey, url); + + console.log('โœ… Sitemap extracted successfully!'); + console.log(`๐Ÿ“Š Total URLs found: ${response.urls.length}\n`); + + // Display first 10 URLs + console.log('๐Ÿ“„ First 10 URLs:'); + response.urls.slice(0, 10).forEach((url, index) => { + console.log(` ${index + 1}. ${url}`); + }); + + if (response.urls.length > 10) { + console.log(` ... and ${response.urls.length - 10} more URLs`); + } + + // Save the complete list to a file + saveUrlsToFile(response.urls, 'sitemap_urls.txt'); + + // Save as JSON for programmatic use + saveUrlsToJson(response, 'sitemap_urls.json'); + +} catch (error) { + console.error('โŒ Error:', error.message); + process.exit(1); +} + +/** + * Helper function to save URLs to a text file + */ +function saveUrlsToFile(urls, filename) { + try { + const content = urls.join('\n'); + fs.writeFileSync(filename, content); + console.log(`\n๐Ÿ’พ URLs saved to: ${filename}`); + } catch (err) { + console.error('โŒ Error saving file:', err.message); + } +} + +/** + * Helper function to save complete response as JSON + */ +function saveUrlsToJson(response, filename) { + try { + fs.writeFileSync(filename, JSON.stringify(response, null, 2)); + console.log(`๐Ÿ’พ JSON saved to: ${filename}`); + } catch (err) { + console.error('โŒ Error saving JSON:', err.message); + } +} diff --git a/scrapegraph-js/examples/sitemap/sitemap_with_smartscraper.js b/scrapegraph-js/examples/sitemap/sitemap_with_smartscraper.js new file mode 100644 index 0000000..962128e --- /dev/null +++ b/scrapegraph-js/examples/sitemap/sitemap_with_smartscraper.js @@ -0,0 +1,106 @@ +import { sitemap, smartScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +/** + * Advanced Example: Extract sitemap and scrape selected URLs + * + * This example demonstrates how to combine the sitemap endpoint + * with smartScraper to extract structured data from multiple pages. + */ + +const apiKey = process.env.SGAI_APIKEY; + +// Configuration +const websiteUrl = 'https://scrapegraphai.com/'; +const maxPagesToScrape = 3; // Limit number of pages to scrape +const userPrompt = 'Extract the page title and main heading'; + +console.log('๐Ÿ—บ๏ธ Step 1: Extracting sitemap from:', websiteUrl); +console.log('โณ Please wait...\n'); + +try { + // Step 1: Get all URLs from sitemap + const sitemapResponse = await sitemap(apiKey, websiteUrl); + + console.log('โœ… Sitemap extracted successfully!'); + console.log(`๐Ÿ“Š Total URLs found: ${sitemapResponse.urls.length}\n`); + + // Step 2: Filter URLs (example: only blog posts) + const filteredUrls = sitemapResponse.urls + .filter(url => url.includes('/blog/') || url.includes('/post/')) + .slice(0, maxPagesToScrape); + + if (filteredUrls.length === 0) { + console.log('โ„น๏ธ No blog URLs found, using first 3 URLs instead'); + filteredUrls.push(...sitemapResponse.urls.slice(0, maxPagesToScrape)); + } + + console.log(`๐ŸŽฏ Selected ${filteredUrls.length} URLs to scrape:`); + filteredUrls.forEach((url, index) => { + console.log(` ${index + 1}. ${url}`); + }); + + // Step 3: Scrape each selected URL + console.log('\n๐Ÿค– Step 2: Scraping selected URLs...\n'); + + const results = []; + + for (let i = 0; i < filteredUrls.length; i++) { + const url = filteredUrls[i]; + console.log(`๐Ÿ“„ Scraping (${i + 1}/${filteredUrls.length}): ${url}`); + + try { + const scrapeResponse = await smartScraper( + apiKey, + url, + userPrompt + ); + + results.push({ + url: url, + data: scrapeResponse.result, + status: 'success' + }); + + console.log(' โœ… Success'); + + // Add a small delay between requests to avoid rate limiting + if (i < filteredUrls.length - 1) { + await new Promise(resolve => setTimeout(resolve, 1000)); + } + + } catch (error) { + console.log(` โŒ Failed: ${error.message}`); + results.push({ + url: url, + error: error.message, + status: 'failed' + }); + } + } + + // Step 4: Display results + console.log('\n๐Ÿ“Š Scraping Results:\n'); + results.forEach((result, index) => { + console.log(`${index + 1}. ${result.url}`); + if (result.status === 'success') { + console.log(' Status: โœ… Success'); + console.log(' Data:', JSON.stringify(result.data, null, 2)); + } else { + console.log(' Status: โŒ Failed'); + console.log(' Error:', result.error); + } + console.log(''); + }); + + // Summary + const successCount = results.filter(r => r.status === 'success').length; + console.log('๐Ÿ“ˆ Summary:'); + console.log(` โœ… Successful: ${successCount}`); + console.log(` โŒ Failed: ${results.length - successCount}`); + console.log(` ๐Ÿ“Š Total: ${results.length}`); + +} catch (error) { + console.error('โŒ Error:', error.message); + process.exit(1); +} diff --git a/scrapegraph-js/examples/smartscraper/schema_smartScraper_example.js b/scrapegraph-js/examples/smartscraper/schema_smartScraper_example.js new file mode 100644 index 0000000..bdf51df --- /dev/null +++ b/scrapegraph-js/examples/smartscraper/schema_smartScraper_example.js @@ -0,0 +1,20 @@ +import { smartScraper } from 'scrapegraph-js'; +import { z } from 'zod'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://scrapegraphai.com/'; +const prompt = 'What does the company do? and '; + +const schema = z.object({ + title: z.string().describe('The title of the webpage'), + description: z.string().describe('The description of the webpage'), + summary: z.string().describe('A brief summary of the webpage'), +}); + +try { + const response = await smartScraper(apiKey, url, prompt, schema); + console.log(response.result); +} catch (error) { + console.error(error); +} diff --git a/scrapegraph-js/examples/smartscraper/smartScraper_cookies_example.js b/scrapegraph-js/examples/smartscraper/smartScraper_cookies_example.js new file mode 100644 index 0000000..088dee5 --- /dev/null +++ b/scrapegraph-js/examples/smartscraper/smartScraper_cookies_example.js @@ -0,0 +1,125 @@ +/** + * Example demonstrating how to use the SmartScraper API with cookies. + * + * This example shows how to: + * 1. Set up the API request with cookies for authentication + * 2. Use cookies with infinite scrolling + * 3. Define a Zod schema for structured output + * 4. Make the API call and handle the response + * 5. Process the extracted data + * + * Requirements: + * - Node.js 16+ + * - scrapegraph-js + * - A .env file with your SGAI_APIKEY + * + * Example .env file: + * SGAI_APIKEY=your_api_key_here + */ + +import { smartScraper } from 'scrapegraph-js'; +import { z } from 'zod'; +import 'dotenv/config'; + +// Define the data schema for structured output +const CookieInfoSchema = z.object({ + cookies: z.record(z.string()).describe('Dictionary of cookie key-value pairs') +}); + +async function main() { + const apiKey = process.env.SGAI_APIKEY; + + // Check if API key is available + if (!apiKey) { + console.error('Error: SGAI_APIKEY not found in .env file'); + console.log('Please create a .env file with your API key:'); + console.log('SGAI_APIKEY=your_api_key_here'); + return; + } + + // Example 1: Basic cookies example (httpbin.org/cookies) + console.log('='.repeat(60)); + console.log('EXAMPLE 1: Basic Cookies Example'); + console.log('='.repeat(60)); + + const websiteUrl = 'https://httpbin.org/cookies'; + const userPrompt = 'Extract all cookies info'; + const cookies = { cookies_key: 'cookies_value' }; + + try { + // Perform the scraping with cookies + const response = await smartScraper( + apiKey, + websiteUrl, + userPrompt, + CookieInfoSchema, + null, // numberOfScrolls + null, // totalPages + cookies + ); + + // Print the results + console.log('\nExtracted Cookie Information:'); + console.log(JSON.stringify(response, null, 2)); + + } catch (error) { + console.error(`Error occurred: ${error.message}`); + } + + // Example 2: Cookies with infinite scrolling + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 2: Cookies with Infinite Scrolling'); + console.log('='.repeat(60)); + + const cookiesWithScroll = { session_id: 'abc123', user_token: 'xyz789' }; + + try { + // Perform the scraping with cookies and infinite scrolling + const response = await smartScraper( + apiKey, + websiteUrl, + 'Extract all cookies and scroll information', + CookieInfoSchema, + 3, // numberOfScrolls + null, // totalPages + cookiesWithScroll + ); + + // Print the results + console.log('\nExtracted Cookie Information with Scrolling:'); + console.log(JSON.stringify(response, null, 2)); + + } catch (error) { + console.error(`Error occurred: ${error.message}`); + } + + // Example 3: Cookies with pagination + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 3: Cookies with Pagination'); + console.log('='.repeat(60)); + + const cookiesWithPagination = { auth_token: 'secret123', preferences: 'dark_mode' }; + + try { + // Perform the scraping with cookies and pagination + const response = await smartScraper( + apiKey, + websiteUrl, + 'Extract all cookies from multiple pages', + CookieInfoSchema, + null, // numberOfScrolls + 3, // totalPages + cookiesWithPagination + ); + + // Print the results + console.log('\nExtracted Cookie Information with Pagination:'); + console.log(JSON.stringify(response, null, 2)); + + } catch (error) { + console.error(`Error occurred: ${error.message}`); + } +} + +// Run the example +main().catch(console.error); diff --git a/scrapegraph-js/examples/smartscraper/smartScraper_cookies_simple_example.js b/scrapegraph-js/examples/smartscraper/smartScraper_cookies_simple_example.js new file mode 100644 index 0000000..369987f --- /dev/null +++ b/scrapegraph-js/examples/smartscraper/smartScraper_cookies_simple_example.js @@ -0,0 +1,40 @@ +/** + * Simple example demonstrating cookies usage with SmartScraper. + * + * This example shows the basic pattern for using cookies with the API. + */ + +import { smartScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; + +// Example cookies for authentication +const cookies = { + session_id: 'abc123def456', + auth_token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', + user_preferences: 'dark_mode,usd' +}; + +async function scrapeWithCookies() { + try { + const response = await smartScraper( + apiKey, + 'https://example.com/dashboard', + 'Extract user profile information', + null, // schema + null, // numberOfScrolls + null, // totalPages + cookies // cookies parameter + ); + + console.log('โœ… Scraping with cookies completed successfully'); + console.log(JSON.stringify(response, null, 2)); + + } catch (error) { + console.error('โŒ Error:', error.message); + } +} + +// Run the example +scrapeWithCookies(); diff --git a/scrapegraph-js/examples/smartscraper/smartScraper_example.js b/scrapegraph-js/examples/smartscraper/smartScraper_example.js new file mode 100644 index 0000000..38e5613 --- /dev/null +++ b/scrapegraph-js/examples/smartscraper/smartScraper_example.js @@ -0,0 +1,13 @@ +import { smartScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://scrapegraphai.com'; +const prompt = 'What does the company do?'; + +try { + const response = await smartScraper(apiKey, url, prompt); + console.log(response); +} catch (error) { + console.error(error); +} diff --git a/scrapegraph-js/examples/smartscraper/smartScraper_html_example.js b/scrapegraph-js/examples/smartscraper/smartScraper_html_example.js new file mode 100644 index 0000000..fe98504 --- /dev/null +++ b/scrapegraph-js/examples/smartscraper/smartScraper_html_example.js @@ -0,0 +1,136 @@ +import { smartScraper } from '../../index.js'; +import 'dotenv/config'; + +/** + * Example: Using SmartScraper with HTML content + * + * This example demonstrates how to use the SmartScraper with local HTML content + * instead of fetching from a URL. This is useful when you already have HTML + * content (e.g., from another source) and want to extract structured data from it. + */ + +const apiKey = process.env.SGAI_APIKEY; + +if (!apiKey) { + console.error('โŒ Error: SGAI_APIKEY environment variable is not set'); + console.log('๐Ÿ’ก Please set your API key: export SGAI_APIKEY="your-api-key"'); + process.exit(1); +} + +// Sample HTML content (e.g., from a file or API response) +const htmlContent = ` + + + + + + Product Catalog + + +
+

Product Catalog

+ +
+

Laptop Pro 15

+
TechCorp
+
$1,299.99
+
4.5/5
+
In Stock
+

High-performance laptop with 15-inch display, 16GB RAM, and 512GB SSD

+
+ +
+

Wireless Mouse Elite

+
PeripheralCo
+
$29.99
+
4.8/5
+
In Stock
+

Ergonomic wireless mouse with precision tracking

+
+ +
+

USB-C Hub Pro

+
ConnectTech
+
$49.99
+
4.3/5
+
Out of Stock
+

7-in-1 USB-C hub with HDMI, USB 3.0, and SD card reader

+
+ +
+

Customer Reviews

+
+

"The Laptop Pro 15 is amazing! Fast and reliable."

+

- John D.

+
+
+

"Great mouse, very comfortable for long work sessions."

+

- Sarah M.

+
+
+ +
+

Shipping Information

+

Free shipping on orders over $50. Standard delivery takes 3-5 business days.

+
+
+ + +`; + +async function runExample() { + console.log('๐Ÿš€ SmartScraper HTML Example'); + console.log('='.repeat(60)); + console.log(''); + + try { + console.log('๐Ÿ“„ Processing HTML content...'); + console.log(`๐Ÿ“ Content size: ${(Buffer.byteLength(htmlContent, 'utf8') / 1024).toFixed(2)} KB`); + console.log(''); + + const prompt = 'Extract all products with their names, brands, prices, ratings, and stock status'; + + console.log('๐Ÿ” Prompt:', prompt); + console.log('โณ Sending request to ScrapeGraph AI...'); + console.log(''); + + const result = await smartScraper( + apiKey, + null, // url is null when using HTML + prompt, + null, // schema (optional) + null, // numberOfScrolls (not applicable for local HTML) + null, // totalPages (not applicable for local HTML) + null, // cookies (not applicable for local HTML) + {}, // options + false, // plain_text + false, // renderHeavyJs (not applicable for local HTML) + false, // stealth (not applicable for local HTML) + htmlContent, // websiteHtml + null // websiteMarkdown + ); + + console.log('โœ… Success! Extraction completed.'); + console.log(''); + console.log('๐Ÿ“Š Extracted Data:'); + console.log('='.repeat(60)); + console.log(JSON.stringify(result, null, 2)); + console.log('='.repeat(60)); + + } catch (error) { + console.error('โŒ Error:', error.message); + if (error.response) { + console.error('API Response:', error.response.data); + } + process.exit(1); + } +} + +console.log('๐Ÿ’ก This example demonstrates:'); +console.log(' - Processing local HTML content'); +console.log(' - Extracting structured data from HTML'); +console.log(' - Using null for URL parameter when using HTML'); +console.log(' - Content size validation (max 2MB)'); +console.log(''); + +runExample(); diff --git a/scrapegraph-js/examples/smartscraper/smartScraper_infinite_scroll_example.js b/scrapegraph-js/examples/smartscraper/smartScraper_infinite_scroll_example.js new file mode 100644 index 0000000..f2d34c6 --- /dev/null +++ b/scrapegraph-js/examples/smartscraper/smartScraper_infinite_scroll_example.js @@ -0,0 +1,15 @@ +import { smartScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +// Example URL that requires scrolling (e.g., a social media feed or infinite scroll page) +const url = 'https://example.com/infinite-scroll-page'; +const prompt = 'Extract all the posts from the feed'; +const numberOfScrolls = 10; // Will scroll 10 times to load more content + +try { + const response = await smartScraper(apiKey, url, prompt, null, numberOfScrolls); + console.log('Extracted data from scrolled page:', response); +} catch (error) { + console.error('Error:', error); +} diff --git a/scrapegraph-js/examples/smartscraper/smartScraper_markdown_example.js b/scrapegraph-js/examples/smartscraper/smartScraper_markdown_example.js new file mode 100644 index 0000000..a3cd3b8 --- /dev/null +++ b/scrapegraph-js/examples/smartscraper/smartScraper_markdown_example.js @@ -0,0 +1,113 @@ +import { smartScraper } from '../../index.js'; +import 'dotenv/config'; + +/** + * Example: Using SmartScraper with Markdown content + * + * This example demonstrates how to use the SmartScraper with local Markdown content + * instead of fetching from a URL. This is useful when you already have markdown + * content and want to extract structured data from it. + */ + +const apiKey = process.env.SGAI_APIKEY; + +if (!apiKey) { + console.error('โŒ Error: SGAI_APIKEY environment variable is not set'); + console.log('๐Ÿ’ก Please set your API key: export SGAI_APIKEY="your-api-key"'); + process.exit(1); +} + +// Sample markdown content (e.g., from a file or API response) +const markdownContent = ` +# Product Catalog + +## Featured Products + +### Laptop Pro 15 +- **Brand**: TechCorp +- **Price**: $1,299.99 +- **Rating**: 4.5/5 +- **In Stock**: Yes +- **Description**: High-performance laptop with 15-inch display, 16GB RAM, and 512GB SSD + +### Wireless Mouse Elite +- **Brand**: PeripheralCo +- **Price**: $29.99 +- **Rating**: 4.8/5 +- **In Stock**: Yes +- **Description**: Ergonomic wireless mouse with precision tracking + +### USB-C Hub Pro +- **Brand**: ConnectTech +- **Price**: $49.99 +- **Rating**: 4.3/5 +- **In Stock**: No +- **Description**: 7-in-1 USB-C hub with HDMI, USB 3.0, and SD card reader + +## Customer Reviews + +> "The Laptop Pro 15 is amazing! Fast and reliable." - John D. + +> "Great mouse, very comfortable for long work sessions." - Sarah M. + +## Shipping Information + +Free shipping on orders over $50. Standard delivery takes 3-5 business days. +`; + +async function runExample() { + console.log('๐Ÿš€ SmartScraper Markdown Example'); + console.log('='.repeat(60)); + console.log(''); + + try { + console.log('๐Ÿ“ Processing Markdown content...'); + console.log(`๐Ÿ“ Content size: ${(Buffer.byteLength(markdownContent, 'utf8') / 1024).toFixed(2)} KB`); + console.log(''); + + const prompt = 'Extract all products with their names, brands, prices, ratings, and stock status'; + + console.log('๐Ÿ” Prompt:', prompt); + console.log('โณ Sending request to ScrapeGraph AI...'); + console.log(''); + + const result = await smartScraper( + apiKey, + null, // url is null when using markdown + prompt, + null, // schema (optional) + null, // numberOfScrolls (not applicable for markdown) + null, // totalPages (not applicable for markdown) + null, // cookies (not applicable for markdown) + {}, // options + false, // plain_text + false, // renderHeavyJs (not applicable for markdown) + false, // stealth (not applicable for markdown) + null, // websiteHtml + markdownContent // websiteMarkdown + ); + + console.log('โœ… Success! Extraction completed.'); + console.log(''); + console.log('๐Ÿ“Š Extracted Data:'); + console.log('='.repeat(60)); + console.log(JSON.stringify(result, null, 2)); + console.log('='.repeat(60)); + + } catch (error) { + console.error('โŒ Error:', error.message); + if (error.response) { + console.error('API Response:', error.response.data); + } + process.exit(1); + } +} + +console.log('๐Ÿ’ก This example demonstrates:'); +console.log(' - Processing local Markdown content'); +console.log(' - Extracting structured data from markdown'); +console.log(' - Using null for URL parameter when using markdown'); +console.log(' - Content size validation (max 2MB)'); +console.log(''); + +runExample(); diff --git a/scrapegraph-js/examples/smartscraper/smartScraper_pagination_enhanced_example.js b/scrapegraph-js/examples/smartscraper/smartScraper_pagination_enhanced_example.js new file mode 100644 index 0000000..f345ad6 --- /dev/null +++ b/scrapegraph-js/examples/smartscraper/smartScraper_pagination_enhanced_example.js @@ -0,0 +1,287 @@ +import { smartScraper } from 'scrapegraph-js'; +import { z } from 'zod'; +import 'dotenv/config'; + +// Define a schema for structured product data +const ProductSchema = z.object({ + name: z.string(), + price: z.string().optional(), + rating: z.string().optional(), + image_url: z.string().optional(), + description: z.string().optional(), +}); + +const ProductListSchema = z.object({ + products: z.array(ProductSchema), +}); + +/** + * Basic pagination example + */ +async function basicPaginationExample() { + console.log('๐Ÿ” Basic Pagination Example'); + console.log('='.repeat(50)); + + const apiKey = process.env.SGAI_APIKEY; + const url = 'https://www.amazon.in/s?k=tv&crid=1TEF1ZFVLU8R8&sprefix=t%2Caps%2C390&ref=nb_sb_noss_2'; + const prompt = 'Extract all product info including name, price, rating, and image_url'; + const totalPages = 3; + + try { + console.log(`๐ŸŒ URL: ${url}`); + console.log(`๐Ÿ“ Prompt: ${prompt}`); + console.log(`๐Ÿ“„ Total Pages: ${totalPages}`); + console.log('-'.repeat(50)); + + const startTime = Date.now(); + + const response = await smartScraper(apiKey, url, prompt, null, null, totalPages); + + const duration = Date.now() - startTime; + + console.log(`โœ… Request completed in ${duration}ms`); + console.log('๐Ÿ“Š Response type:', typeof response); + console.log('๐Ÿ“‹ Response preview:', JSON.stringify(response, null, 2).substring(0, 500) + '...'); + + return response; + } catch (error) { + console.error('โŒ Basic pagination error:', error.message); + throw error; + } +} + +/** + * Pagination with schema validation + */ +async function paginationWithSchemaExample() { + console.log('\n๐Ÿ” Pagination with Schema Validation'); + console.log('='.repeat(50)); + + const apiKey = process.env.SGAI_APIKEY; + const url = 'https://www.amazon.in/s?k=laptops&ref=nb_sb_noss'; + const prompt = 'Extract product information including name, price, rating, image_url, and description'; + const totalPages = 2; + + try { + console.log(`๐ŸŒ URL: ${url}`); + console.log(`๐Ÿ“ Prompt: ${prompt}`); + console.log(`๐Ÿ“„ Total Pages: ${totalPages}`); + console.log('๐Ÿ—๏ธ Using ProductListSchema for structured output'); + console.log('-'.repeat(50)); + + const startTime = Date.now(); + + const response = await smartScraper(apiKey, url, prompt, ProductListSchema, null, totalPages); + + const duration = Date.now() - startTime; + + console.log(`โœ… Request completed in ${duration}ms`); + console.log('๐Ÿ“Š Response type:', typeof response); + + // Try to validate the response against our schema + try { + const validatedData = ProductListSchema.parse(response); + console.log(`โœจ Schema validation successful! Found ${validatedData.products.length} products`); + + // Show first few products + validatedData.products.slice(0, 3).forEach((product, index) => { + console.log(` ${index + 1}. ${product.name} - ${product.price || 'N/A'}`); + }); + } catch (schemaError) { + console.log('โš ๏ธ Schema validation failed, but request succeeded'); + console.log('๐Ÿ“‹ Raw response:', JSON.stringify(response, null, 2).substring(0, 300) + '...'); + } + + return response; + } catch (error) { + console.error('โŒ Schema pagination error:', error.message); + throw error; + } +} + +/** + * Pagination with scrolling and all features + */ +async function paginationWithAllFeaturesExample() { + console.log('\n๐Ÿ” Pagination with All Features'); + console.log('='.repeat(50)); + + const apiKey = process.env.SGAI_APIKEY; + const url = 'https://news.ycombinator.com/'; + const prompt = 'Extract all news articles with title, points, and comments count'; + const totalPages = 2; + const numberOfScrolls = 5; + + try { + console.log(`๐ŸŒ URL: ${url}`); + console.log(`๐Ÿ“ Prompt: ${prompt}`); + console.log(`๐Ÿ“„ Total Pages: ${totalPages}`); + console.log(`๐Ÿ”„ Number of Scrolls: ${numberOfScrolls}`); + console.log('-'.repeat(50)); + + const startTime = Date.now(); + + const response = await smartScraper(apiKey, url, prompt, null, numberOfScrolls, totalPages); + + const duration = Date.now() - startTime; + + console.log(`โœ… Request completed in ${duration}ms`); + console.log('๐Ÿ“Š Response type:', typeof response); + console.log('๐Ÿ“‹ Response preview:', JSON.stringify(response, null, 2).substring(0, 400) + '...'); + + return response; + } catch (error) { + console.error('โŒ Full features pagination error:', error.message); + throw error; + } +} + +/** + * Test different pagination parameters + */ +async function testPaginationParameters() { + console.log('\n๐Ÿงช Testing Pagination Parameters'); + console.log('='.repeat(50)); + + const apiKey = process.env.SGAI_APIKEY; + const testCases = [ + { + name: 'Single page (no pagination)', + url: 'https://example.com', + prompt: 'Extract basic page info', + totalPages: null, + }, + { + name: 'Two pages', + url: 'https://example.com/products', + prompt: 'Extract product listings', + totalPages: 2, + }, + { + name: 'Maximum pages', + url: 'https://example.com/search', + prompt: 'Extract search results', + totalPages: 10, + }, + ]; + + for (const testCase of testCases) { + console.log(`\n๐Ÿงช Test: ${testCase.name}`); + console.log(` URL: ${testCase.url}`); + console.log(` Pages: ${testCase.totalPages || 'default (1)'}`); + + try { + // This is just to test the parameter validation + // In a real scenario, you'd use actual URLs + console.log(' โœ… Configuration valid'); + } catch (error) { + console.log(` โŒ Configuration error: ${error.message}`); + } + } +} + +/** + * Test pagination validation + */ +async function testPaginationValidation() { + console.log('\n๐Ÿงช Testing Pagination Validation'); + console.log('='.repeat(50)); + + const apiKey = process.env.SGAI_APIKEY; + const url = 'https://example.com'; + const prompt = 'Extract data'; + + const testCases = [ + { pages: 0, shouldFail: true, description: 'Zero pages' }, + { pages: 1, shouldFail: false, description: 'Minimum valid pages' }, + { pages: 5, shouldFail: false, description: 'Mid-range pages' }, + { pages: 10, shouldFail: false, description: 'Maximum valid pages' }, + { pages: 11, shouldFail: true, description: 'Exceed maximum pages' }, + { pages: -1, shouldFail: true, description: 'Negative pages' }, + { pages: 1.5, shouldFail: true, description: 'Float pages' }, + { pages: 'invalid', shouldFail: true, description: 'String pages' }, + ]; + + for (const testCase of testCases) { + console.log(`\n๐Ÿงช Test: ${testCase.description} (${testCase.pages})`); + + try { + // This will validate the parameters but not make the actual request + if (testCase.pages !== null) { + if (!Number.isInteger(testCase.pages) || testCase.pages < 1 || testCase.pages > 10) { + throw new Error('totalPages must be an integer between 1 and 10'); + } + } + + if (testCase.shouldFail) { + console.log(' โŒ Expected validation to fail, but it passed'); + } else { + console.log(' โœ… Validation passed as expected'); + } + } catch (error) { + if (testCase.shouldFail) { + console.log(` โœ… Validation failed as expected: ${error.message}`); + } else { + console.log(` โŒ Unexpected validation failure: ${error.message}`); + } + } + } +} + +/** + * Main function to run all examples + */ +async function main() { + console.log('ScrapeGraph JS SDK - SmartScraper Pagination Examples'); + console.log('='.repeat(60)); + + if (!process.env.SGAI_APIKEY) { + console.error('โŒ Error: SGAI_APIKEY environment variable not set'); + console.error('Please set your API key:'); + console.error(' export SGAI_APIKEY="your-api-key-here"'); + console.error(' or create a .env file with: SGAI_APIKEY=your-api-key-here'); + process.exit(1); + } + + try { + // Run basic pagination example + await basicPaginationExample(); + + // Run pagination with schema validation + await paginationWithSchemaExample(); + + // Run pagination with all features + await paginationWithAllFeaturesExample(); + + // Test different parameters + await testPaginationParameters(); + + // Test validation + await testPaginationValidation(); + + console.log('\n' + '='.repeat(60)); + console.log('โœ… All examples completed successfully!'); + console.log('\nNext steps:'); + console.log('1. Set SGAI_APIKEY environment variable'); + console.log('2. Replace example URLs with real websites'); + console.log('3. Adjust totalPages parameter (1-10)'); + console.log('4. Customize prompts for your use case'); + console.log('5. Define schemas for structured data'); + console.log('\nTips:'); + console.log('- Use smaller totalPages for testing'); + console.log('- Pagination requests may take longer'); + console.log('- Some websites may not support pagination'); + console.log('- Consider rate limiting for large requests'); + + } catch (error) { + console.error('\nโŒ Example execution failed:', error.message); + console.error('\nTroubleshooting:'); + console.error('- Check your API key'); + console.error('- Verify network connectivity'); + console.error('- Try with smaller totalPages values'); + console.error('- Check if the website supports pagination'); + } +} + +// Run the examples +main(); diff --git a/scrapegraph-js/examples/smartscraper/smartScraper_pagination_example.js b/scrapegraph-js/examples/smartscraper/smartScraper_pagination_example.js new file mode 100644 index 0000000..8e726bc --- /dev/null +++ b/scrapegraph-js/examples/smartscraper/smartScraper_pagination_example.js @@ -0,0 +1,41 @@ +import { smartScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://www.amazon.in/s?k=tv&crid=1TEF1ZFVLU8R8&sprefix=t%2Caps%2C390&ref=nb_sb_noss_2'; +const prompt = 'Extract all product info including name, price, rating, and image_url'; +const totalPages = 3; // Number of pages to scrape + +try { + console.log('๐Ÿ” Starting SmartScraper pagination request...'); + console.log(`๐ŸŒ URL: ${url}`); + console.log(`๐Ÿ“ Prompt: ${prompt}`); + console.log(`๐Ÿ“„ Total Pages: ${totalPages}`); + console.log('-'.repeat(50)); + + const startTime = Date.now(); + + const response = await smartScraper(apiKey, url, prompt, null, null, totalPages); + + const duration = Date.now() - startTime; + + console.log(`โœ… Request completed in ${duration}ms`); + console.log('๐Ÿ“Š Response:', JSON.stringify(response, null, 2)); + + // Check if pagination worked + if (response && typeof response === 'object' && response.data) { + console.log(`\nโœจ Pagination successful! Data extracted from ${totalPages} pages`); + } else if (Array.isArray(response)) { + console.log(`\nโœ… Pagination successful! Extracted ${response.length} items`); + } else { + console.log(`\n๐Ÿ“‹ Request successful! Response type: ${typeof response}`); + } + +} catch (error) { + console.error('โŒ Error:', error.message); + console.error('This could be due to:'); + console.error(' - Invalid API key'); + console.error(' - Rate limiting'); + console.error(' - Server issues'); + console.error(' - Network connectivity issues'); +} diff --git a/scrapegraph-js/examples/smartscraper/smartScraper_pagination_with_scroll_example.js b/scrapegraph-js/examples/smartscraper/smartScraper_pagination_with_scroll_example.js new file mode 100644 index 0000000..96b1105 --- /dev/null +++ b/scrapegraph-js/examples/smartscraper/smartScraper_pagination_with_scroll_example.js @@ -0,0 +1,121 @@ +import { smartScraper } from 'scrapegraph-js'; +import { z } from 'zod'; +import 'dotenv/config'; + +// Define schema for product data +const ProductSchema = z.object({ + name: z.string().describe('The product name'), + price: z.string().optional().describe('The product price'), + rating: z.string().optional().describe('The product rating'), + image_url: z.string().optional().describe('The product image URL'), + availability: z.string().optional().describe('Product availability status'), +}); + +const ProductListSchema = z.object({ + products: z.array(ProductSchema).describe('List of products found'), + total_count: z.number().optional().describe('Total number of products'), + page_info: z.object({ + current_page: z.number().optional(), + total_pages: z.number().optional(), + }).optional().describe('Pagination information'), +}); + +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://www.amazon.in/s?k=tv&crid=1TEF1ZFVLU8R8&sprefix=t%2Caps%2C390&ref=nb_sb_noss_2'; +const prompt = 'Extract all product information including name, price, rating, image_url, and availability. Also extract pagination info if available.'; +const numberOfScrolls = 5; // Scroll to load more products on each page +const totalPages = 3; // Scrape 3 pages total + +console.log('๐Ÿš€ SmartScraper with Pagination and Scrolling'); +console.log('='.repeat(60)); +console.log(`๐ŸŒ URL: ${url}`); +console.log(`๐Ÿ“ Prompt: ${prompt}`); +console.log(`๐Ÿ”„ Number of Scrolls per page: ${numberOfScrolls}`); +console.log(`๐Ÿ“„ Total Pages: ${totalPages}`); +console.log(`๐Ÿ—๏ธ Using structured schema: ProductListSchema`); +console.log('-'.repeat(60)); + +try { + const startTime = Date.now(); + + const response = await smartScraper( + apiKey, + url, + prompt, + ProductListSchema, + numberOfScrolls, + totalPages + ); + + const duration = Date.now() - startTime; + + console.log(`โœ… Request completed in ${duration}ms`); + console.log('๐Ÿ“Š Response type:', typeof response); + + // Validate and display the response + try { + const validatedData = ProductListSchema.parse(response); + console.log(`\nโœจ Schema validation successful!`); + console.log(`๐Ÿ“ฆ Found ${validatedData.products.length} products`); + + if (validatedData.page_info) { + console.log(`๐Ÿ“„ Page info: ${validatedData.page_info.current_page}/${validatedData.page_info.total_pages}`); + } + + if (validatedData.total_count) { + console.log(`๐Ÿ”ข Total products: ${validatedData.total_count}`); + } + + console.log('\n๐Ÿ“‹ Product Examples:'); + validatedData.products.slice(0, 5).forEach((product, index) => { + console.log(` ${index + 1}. ${product.name}`); + console.log(` ๐Ÿ’ฐ Price: ${product.price || 'N/A'}`); + console.log(` โญ Rating: ${product.rating || 'N/A'}`); + console.log(` ๐Ÿ“ฆ Availability: ${product.availability || 'N/A'}`); + console.log(` ๐Ÿ–ผ๏ธ Image: ${product.image_url ? 'Available' : 'N/A'}`); + console.log(''); + }); + + if (validatedData.products.length > 5) { + console.log(` ... and ${validatedData.products.length - 5} more products`); + } + + } catch (validationError) { + console.log('โš ๏ธ Schema validation failed, showing raw response:'); + console.log(JSON.stringify(response, null, 2)); + console.log('\nValidation error:', validationError.message); + } + + console.log('\n' + '='.repeat(60)); + console.log('โœ… Pagination with scrolling completed successfully!'); + console.log('\nFeatures demonstrated:'); + console.log('โœ“ Multi-page scraping (pagination)'); + console.log('โœ“ Infinite scrolling on each page'); + console.log('โœ“ Structured data extraction with Zod schema'); + console.log('โœ“ Comprehensive error handling'); + console.log('โœ“ Performance timing'); + +} catch (error) { + console.error('\nโŒ Error occurred:', error.message); + + // Provide specific error guidance + if (error.message.includes('totalPages')) { + console.error('\n๐Ÿ”ง Pagination Error:'); + console.error('- totalPages must be an integer between 1 and 10'); + console.error('- Current value:', totalPages); + } else if (error.message.includes('numberOfScrolls')) { + console.error('\n๐Ÿ”ง Scrolling Error:'); + console.error('- numberOfScrolls must be an integer between 0 and 100'); + console.error('- Current value:', numberOfScrolls); + } else if (error.message.includes('SGAI_APIKEY')) { + console.error('\n๐Ÿ”ง API Key Error:'); + console.error('- Please set SGAI_APIKEY environment variable'); + console.error('- export SGAI_APIKEY="your-api-key-here"'); + } else { + console.error('\n๐Ÿ”ง General troubleshooting:'); + console.error('- Check your internet connection'); + console.error('- Verify the website URL is accessible'); + console.error('- Try with fewer pages or scrolls'); + console.error('- Check API key validity'); + } +} diff --git a/scrapegraph-js/examples/smartscraper/smartScraper_render_heavy_example.js b/scrapegraph-js/examples/smartscraper/smartScraper_render_heavy_example.js new file mode 100644 index 0000000..035f29d --- /dev/null +++ b/scrapegraph-js/examples/smartscraper/smartScraper_render_heavy_example.js @@ -0,0 +1,24 @@ +import { smartScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://example.com'; +const prompt = 'Find the CEO of company X and their contact details'; + +try { + const response = await smartScraper( + apiKey, + url, + prompt, + null, // schema + null, // numberOfScrolls + null, // totalPages + null, // cookies + {}, // options + false, // plain_text + true // renderHeavyJs - Enable heavy JavaScript rendering + ); + console.log(response); +} catch (error) { + console.error(error); +} \ No newline at end of file diff --git a/scrapegraph-js/examples/stealth_mode_example.js b/scrapegraph-js/examples/stealth_mode_example.js new file mode 100644 index 0000000..3913589 --- /dev/null +++ b/scrapegraph-js/examples/stealth_mode_example.js @@ -0,0 +1,613 @@ +/** + * Stealth Mode Examples for ScrapeGraph AI JavaScript SDK + * + * This file demonstrates how to use stealth mode with various endpoints + * to avoid bot detection when scraping websites. + * + * Stealth mode enables advanced techniques to make requests appear more + * like those from a real browser, helping to bypass basic bot detection. + */ + +import { + smartScraper, + searchScraper, + markdownify, + scrape, + agenticScraper, + crawl, + getScrapeRequest, + getAgenticScraperRequest, + getCrawlRequest +} from '../index.js'; +import 'dotenv/config'; + +// Get API key from environment variable +const API_KEY = process.env.SGAI_APIKEY || 'your-api-key-here'; + +// ============================================================================ +// EXAMPLE 1: SmartScraper with Stealth Mode +// ============================================================================ + +async function exampleSmartScraperWithStealth() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 1: SmartScraper with Stealth Mode'); + console.log('='.repeat(60)); + + try { + const response = await smartScraper( + API_KEY, + 'https://www.scrapethissite.com/pages/simple/', + 'Extract country names and capitals', + null, // schema + null, // numberOfScrolls + null, // totalPages + null, // cookies + {}, // options + false, // plain_text + false, // renderHeavyJs + true // stealth - Enable stealth mode + ); + + console.log('Status:', response.status); + console.log('Request ID:', response.request_id); + console.log('Result:', JSON.stringify(response.result, null, 2)); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 2: SmartScraper with Stealth Mode and Pagination +// ============================================================================ + +async function exampleSmartScraperWithStealthAndPagination() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 2: SmartScraper with Stealth Mode and Pagination'); + console.log('='.repeat(60)); + + try { + const response = await smartScraper( + API_KEY, + 'https://example.com/products', + 'Extract all product information from multiple pages', + null, // schema + 10, // numberOfScrolls + 5, // totalPages + null, // cookies + {}, // options + false, // plain_text + true, // renderHeavyJs - Enable JS rendering + true // stealth - Enable stealth mode + ); + + console.log('Status:', response.status); + console.log('Request ID:', response.request_id); + console.log('Products extracted:', response.result); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 3: SearchScraper with Stealth Mode +// ============================================================================ + +async function exampleSearchScraperWithStealth() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 3: SearchScraper with Stealth Mode'); + console.log('='.repeat(60)); + + try { + const response = await searchScraper( + API_KEY, + 'What are the latest developments in AI technology?', + 5, // numResults + null, // schema + null, // userAgent + { + stealth: true, // Enable stealth mode + extractionMode: true, + renderHeavyJs: false + } + ); + + console.log('Status:', response.status); + console.log('Request ID:', response.request_id); + console.log('Result:', JSON.stringify(response.result, null, 2)); + + if (response.reference_urls) { + console.log('Reference URLs:', response.reference_urls); + } + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 4: Markdownify with Stealth Mode +// ============================================================================ + +async function exampleMarkdownifyWithStealth() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 4: Markdownify with Stealth Mode'); + console.log('='.repeat(60)); + + try { + const response = await markdownify( + API_KEY, + 'https://www.example.com', + { + stealth: true // Enable stealth mode + } + ); + + console.log('Status:', response.status); + console.log('Request ID:', response.request_id); + console.log('Markdown Preview (first 500 chars):'); + console.log(response.result.substring(0, 500)); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 5: Scrape with Stealth Mode +// ============================================================================ + +async function exampleScrapeWithStealth() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 5: Scrape with Stealth Mode'); + console.log('='.repeat(60)); + + try { + const response = await scrape( + API_KEY, + 'https://www.example.com', + { + stealth: true // Enable stealth mode + } + ); + + console.log('Status:', response.status); + console.log('Scrape Request ID:', response.scrape_request_id); + console.log('HTML Preview (first 500 chars):'); + console.log(response.html.substring(0, 500)); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 6: Scrape with Stealth Mode and Heavy JS Rendering +// ============================================================================ + +async function exampleScrapeWithStealthAndJS() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 6: Scrape with Stealth Mode and Heavy JS'); + console.log('='.repeat(60)); + + try { + const response = await scrape( + API_KEY, + 'https://www.example.com', + { + renderHeavyJs: true, // Enable JavaScript rendering + stealth: true // Enable stealth mode + } + ); + + console.log('Status:', response.status); + console.log('Scrape Request ID:', response.scrape_request_id); + console.log('HTML Preview (first 500 chars):'); + console.log(response.html.substring(0, 500)); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 7: Scrape with Stealth Mode and Custom Headers +// ============================================================================ + +async function exampleScrapeWithStealthAndHeaders() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 7: Scrape with Stealth Mode and Custom Headers'); + console.log('='.repeat(60)); + + try { + const customHeaders = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'DNT': '1' + }; + + const response = await scrape( + API_KEY, + 'https://www.protected-site.com', + { + headers: customHeaders, // Custom headers + stealth: true // Enable stealth mode + } + ); + + console.log('Status:', response.status); + console.log('Scrape Request ID:', response.scrape_request_id); + console.log('Success! Stealth mode + custom headers bypassed detection.'); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 8: Agentic Scraper with Stealth Mode +// ============================================================================ + +async function exampleAgenticScraperWithStealth() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 8: Agentic Scraper with Stealth Mode'); + console.log('='.repeat(60)); + + try { + const steps = [ + 'Type user@example.com in email input box', + 'Type password123 in password input box', + 'Click on login button' + ]; + + const response = await agenticScraper( + API_KEY, + 'https://dashboard.example.com/login', + steps, + true, // useSession + null, // userPrompt + null, // outputSchema + false, // aiExtraction + { + stealth: true // Enable stealth mode + } + ); + + console.log('Status:', response.status); + console.log('Request ID:', response.request_id); + console.log('Message:', response.message); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 9: Agentic Scraper with Stealth Mode and AI Extraction +// ============================================================================ + +async function exampleAgenticScraperWithStealthAndAI() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 9: Agentic Scraper with Stealth and AI Extraction'); + console.log('='.repeat(60)); + + try { + const steps = [ + 'Navigate to user profile section', + 'Click on settings tab' + ]; + + const outputSchema = { + user_info: { + type: 'object', + properties: { + username: { type: 'string' }, + email: { type: 'string' }, + settings: { type: 'object' } + } + } + }; + + const response = await agenticScraper( + API_KEY, + 'https://dashboard.example.com', + steps, + true, // useSession + 'Extract user profile information and settings', // userPrompt + outputSchema, // outputSchema + true, // aiExtraction + { + stealth: true // Enable stealth mode + } + ); + + console.log('Status:', response.status); + console.log('Request ID:', response.request_id); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 10: Crawl with Stealth Mode +// ============================================================================ + +async function exampleCrawlWithStealth() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 10: Crawl with Stealth Mode'); + console.log('='.repeat(60)); + + try { + const schema = { + type: 'object', + properties: { + title: { type: 'string', description: 'Page title' }, + content: { type: 'string', description: 'Main content' } + }, + required: ['title'] + }; + + const response = await crawl( + API_KEY, + 'https://www.example.com', + 'Extract page titles and main content', + schema, + { + depth: 2, + maxPages: 5, + stealth: true // Enable stealth mode + } + ); + + console.log('Status:', response.status); + console.log('Crawl ID:', response.id); + console.log('Message:', response.message); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 11: Crawl with Stealth Mode and Sitemap +// ============================================================================ + +async function exampleCrawlWithStealthAndSitemap() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 11: Crawl with Stealth Mode and Sitemap'); + console.log('='.repeat(60)); + + try { + const schema = { + type: 'object', + properties: { + product_name: { type: 'string' }, + price: { type: 'string' }, + description: { type: 'string' } + }, + required: ['product_name'] + }; + + const response = await crawl( + API_KEY, + 'https://www.example-shop.com', + 'Extract product information from all pages', + schema, + { + sitemap: true, // Use sitemap for better page discovery + depth: 3, + maxPages: 10, + stealth: true // Enable stealth mode + } + ); + + console.log('Status:', response.status); + console.log('Crawl ID:', response.id); + console.log('Message:', response.message); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 12: Complete Workflow with Stealth Mode +// ============================================================================ + +async function exampleCompleteWorkflowWithStealth() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 12: Complete Workflow with Stealth Mode'); + console.log('='.repeat(60)); + + try { + // Step 1: Start a scrape request with stealth mode + console.log('\n1. Starting scrape request with stealth mode...'); + const scrapeResponse = await scrape( + API_KEY, + 'https://www.example.com', + { + renderHeavyJs: true, + stealth: true + } + ); + + console.log(' Scrape initiated. Request ID:', scrapeResponse.scrape_request_id); + console.log(' Status:', scrapeResponse.status); + + // Step 2: Wait a bit and check the result (if processing) + if (scrapeResponse.status === 'processing') { + console.log('\n2. Waiting for scrape to complete...'); + await new Promise(resolve => setTimeout(resolve, 3000)); // Wait 3 seconds + + const result = await getScrapeRequest(API_KEY, scrapeResponse.scrape_request_id); + console.log(' Updated Status:', result.status); + + if (result.status === 'completed') { + console.log(' HTML received (length):', result.html.length); + } + } + + console.log('\nโœ… Workflow completed successfully with stealth mode!'); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 13: SearchScraper with Stealth and Custom User Agent +// ============================================================================ + +async function exampleSearchScraperWithStealthAndUserAgent() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 13: SearchScraper with Stealth and User Agent'); + console.log('='.repeat(60)); + + try { + const customUserAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'; + + const response = await searchScraper( + API_KEY, + 'Find the best practices for web scraping', + 5, // numResults + null, // schema + customUserAgent, // Custom user agent + { + stealth: true, + extractionMode: true + } + ); + + console.log('Status:', response.status); + console.log('Request ID:', response.request_id); + console.log('Result:', JSON.stringify(response.result, null, 2)); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// EXAMPLE 14: Comparing With and Without Stealth Mode +// ============================================================================ + +async function exampleCompareStealthMode() { + console.log('\n' + '='.repeat(60)); + console.log('EXAMPLE 14: Comparing With and Without Stealth Mode'); + console.log('='.repeat(60)); + + try { + const testUrl = 'https://www.example.com'; + + // Without stealth mode + console.log('\n1. Scraping WITHOUT stealth mode...'); + const responseWithoutStealth = await scrape( + API_KEY, + testUrl, + { + stealth: false + } + ); + console.log(' Status:', responseWithoutStealth.status); + console.log(' Request ID:', responseWithoutStealth.scrape_request_id); + + // With stealth mode + console.log('\n2. Scraping WITH stealth mode...'); + const responseWithStealth = await scrape( + API_KEY, + testUrl, + { + stealth: true + } + ); + console.log(' Status:', responseWithStealth.status); + console.log(' Request ID:', responseWithStealth.scrape_request_id); + + console.log('\n๐Ÿ“Š Comparison complete!'); + console.log(' Both requests succeeded, but stealth mode provides better bot detection avoidance.'); + } catch (error) { + console.error('Error:', error.message); + } +} + +// ============================================================================ +// RUN ALL EXAMPLES +// ============================================================================ + +async function runAllExamples() { + console.log('\n' + '='.repeat(60)); + console.log('STEALTH MODE EXAMPLES FOR SCRAPEGRAPH AI JAVASCRIPT SDK'); + console.log('='.repeat(60)); + console.log('\nThese examples demonstrate how to use stealth mode'); + console.log('to avoid bot detection when scraping websites.'); + console.log('\nStealth mode is available for all major endpoints:'); + console.log('- SmartScraper'); + console.log('- SearchScraper'); + console.log('- Markdownify'); + console.log('- Scrape'); + console.log('- Agentic Scraper'); + console.log('- Crawl'); + + const examples = [ + { name: 'SmartScraper with Stealth', fn: exampleSmartScraperWithStealth }, + { name: 'SmartScraper with Stealth and Pagination', fn: exampleSmartScraperWithStealthAndPagination }, + { name: 'SearchScraper with Stealth', fn: exampleSearchScraperWithStealth }, + { name: 'Markdownify with Stealth', fn: exampleMarkdownifyWithStealth }, + { name: 'Scrape with Stealth', fn: exampleScrapeWithStealth }, + { name: 'Scrape with Stealth and Heavy JS', fn: exampleScrapeWithStealthAndJS }, + { name: 'Scrape with Stealth and Custom Headers', fn: exampleScrapeWithStealthAndHeaders }, + { name: 'Agentic Scraper with Stealth', fn: exampleAgenticScraperWithStealth }, + { name: 'Agentic Scraper with Stealth and AI', fn: exampleAgenticScraperWithStealthAndAI }, + { name: 'Crawl with Stealth', fn: exampleCrawlWithStealth }, + { name: 'Crawl with Stealth and Sitemap', fn: exampleCrawlWithStealthAndSitemap }, + { name: 'Complete Workflow with Stealth', fn: exampleCompleteWorkflowWithStealth }, + { name: 'SearchScraper with Stealth and User Agent', fn: exampleSearchScraperWithStealthAndUserAgent }, + { name: 'Compare Stealth Mode', fn: exampleCompareStealthMode } + ]; + + for (let i = 0; i < examples.length; i++) { + const example = examples[i]; + try { + console.log(`\n\n๐Ÿ“Œ Running Example ${i + 1}/${examples.length}: ${example.name}`); + await example.fn(); + console.log(`\nโœ… Example ${i + 1} completed`); + } catch (error) { + console.error(`\nโŒ Example ${i + 1} failed: ${error.message}`); + } + + // Add a small delay between examples + if (i < examples.length - 1) { + await new Promise(resolve => setTimeout(resolve, 1000)); + } + } + + console.log('\n' + '='.repeat(60)); + console.log('ALL EXAMPLES COMPLETED'); + console.log('='.repeat(60)); +} + +// ============================================================================ +// MAIN EXECUTION +// ============================================================================ + +// Run all examples if this file is executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + runAllExamples() + .then(() => { + console.log('\nโœจ All stealth mode examples executed successfully!'); + process.exit(0); + }) + .catch(error => { + console.error('\n๐Ÿ’ฅ Fatal error:', error.message); + process.exit(1); + }); +} + +// Export individual examples for selective usage +export { + exampleSmartScraperWithStealth, + exampleSmartScraperWithStealthAndPagination, + exampleSearchScraperWithStealth, + exampleMarkdownifyWithStealth, + exampleScrapeWithStealth, + exampleScrapeWithStealthAndJS, + exampleScrapeWithStealthAndHeaders, + exampleAgenticScraperWithStealth, + exampleAgenticScraperWithStealthAndAI, + exampleCrawlWithStealth, + exampleCrawlWithStealthAndSitemap, + exampleCompleteWorkflowWithStealth, + exampleSearchScraperWithStealthAndUserAgent, + exampleCompareStealthMode, + runAllExamples +}; diff --git a/scrapegraph-js/examples/step_by_step_schema_generation.js b/scrapegraph-js/examples/step_by_step_schema_generation.js new file mode 100644 index 0000000..6d87346 --- /dev/null +++ b/scrapegraph-js/examples/step_by_step_schema_generation.js @@ -0,0 +1,184 @@ +#!/usr/bin/env node +/** + * Step-by-step example for schema generation using ScrapeGraph JavaScript SDK. + * + * This script demonstrates the basic workflow for schema generation: + * 1. Initialize the client + * 2. Generate a schema from a prompt + * 3. Check the status of the request + * 4. Retrieve the final result + * + * Requirements: + * - Node.js 16+ + * - scrapegraph-js package + * - SGAI_API_KEY environment variable + * + * Usage: + * SGAI_API_KEY=your_api_key node step_by_step_schema_generation.js + */ + +import { generateSchema, getSchemaStatus } from '../index.js'; + +function printStep(stepNumber, title, description = '') { + console.log(`\n${'='.repeat(60)}`); + console.log(`STEP ${stepNumber}: ${title}`); + console.log(`${'='.repeat(60)}`); + if (description) { + console.log(description); + } + console.log(); +} + +function printResponse(response, title = 'API Response') { + console.log(`\n๐Ÿ“‹ ${title}`); + console.log('-'.repeat(40)); + + if (response.error) { + console.log(`โŒ Error: ${response.error}`); + return; + } + + for (const [key, value] of Object.entries(response)) { + if (key === 'generated_schema' && value) { + console.log(`๐Ÿ”ง ${key}:`); + console.log(JSON.stringify(value, null, 2)); + } else { + console.log(`๐Ÿ”ง ${key}: ${value}`); + } + } +} + +async function main() { + // Step 1: Check API key and initialize + printStep(1, 'Initialize Client', 'Setting up the ScrapeGraph client with your API key'); + + const apiKey = process.env.SGAI_API_KEY; + if (!apiKey) { + console.log('โŒ Error: SGAI_API_KEY not found in environment variables'); + console.log('Please set your API key:'); + console.log('export SGAI_API_KEY=your_api_key_here'); + console.log('Or run: SGAI_API_KEY=your_api_key node step_by_step_schema_generation.js'); + return; + } + + console.log('โœ… API key found in environment variables'); + console.log('โœ… Client ready to use'); + + // Step 2: Define the schema generation request + printStep(2, 'Define Request', 'Creating a prompt for schema generation'); + + const userPrompt = 'Find laptops with specifications like brand, processor, RAM, storage, and price'; + console.log(`๐Ÿ’ญ User Prompt: ${userPrompt}`); + + // Step 3: Generate the schema + printStep(3, 'Generate Schema', 'Sending the schema generation request to the API'); + + try { + const response = await generateSchema(userPrompt, null, { apiKey }); + console.log('โœ… Schema generation request sent successfully'); + printResponse(response, 'Initial Response'); + + // Extract the request ID for status checking + const requestId = response.request_id; + if (!requestId) { + console.log('โŒ No request ID returned from the API'); + return; + } + + } catch (error) { + console.log(`โŒ Failed to generate schema: ${error.message}`); + return; + } + + // Step 4: Check the status (polling) + printStep(4, 'Check Status', 'Polling the API to check the status of the request'); + + const maxAttempts = 10; + let attempt = 0; + let requestId = null; + + // Get the request ID from the previous step + try { + const initialResponse = await generateSchema(userPrompt, null, { apiKey }); + requestId = initialResponse.request_id; + } catch (error) { + console.log(`โŒ Error getting request ID: ${error.message}`); + return; + } + + while (attempt < maxAttempts) { + attempt++; + console.log(`๐Ÿ” Attempt ${attempt}/${maxAttempts}: Checking status...`); + + try { + const statusResponse = await getSchemaStatus(requestId, { apiKey }); + const currentStatus = statusResponse.status || 'unknown'; + + console.log(`๐Ÿ“Š Current Status: ${currentStatus}`); + + if (currentStatus === 'completed') { + console.log('โœ… Schema generation completed successfully!'); + printResponse(statusResponse, 'Final Result'); + break; + } else if (currentStatus === 'failed') { + console.log('โŒ Schema generation failed'); + printResponse(statusResponse, 'Error Response'); + break; + } else if (currentStatus === 'pending' || currentStatus === 'processing') { + console.log('โณ Request is still being processed, waiting...'); + if (attempt < maxAttempts) { + await new Promise(resolve => setTimeout(resolve, 2000)); // Wait 2 seconds + } + } else { + console.log(`โš ๏ธ Unknown status: ${currentStatus}`); + break; + } + + } catch (error) { + console.log(`โŒ Error checking status: ${error.message}`); + break; + } + } + + if (attempt >= maxAttempts) { + console.log('โš ๏ธ Maximum attempts reached. The request might still be processing.'); + console.log('You can check the status later using the request ID.'); + } + + // Step 5: Demonstrate schema modification + printStep(5, 'Schema Modification', 'Demonstrating how to modify an existing schema'); + + const existingSchema = { + type: 'object', + properties: { + name: { type: 'string' }, + price: { type: 'number' } + }, + required: ['name', 'price'] + }; + + const modificationPrompt = 'Add brand and rating fields to the existing schema'; + console.log(`๐Ÿ’ญ Modification Prompt: ${modificationPrompt}`); + console.log(`๐Ÿ“‹ Existing Schema: ${JSON.stringify(existingSchema, null, 2)}`); + + try { + const modificationResponse = await generateSchema(modificationPrompt, existingSchema, { apiKey }); + console.log('โœ… Schema modification request sent successfully'); + printResponse(modificationResponse, 'Modification Response'); + + } catch (error) { + console.log(`โŒ Failed to modify schema: ${error.message}`); + } + + // Step 6: Cleanup + printStep(6, 'Cleanup', 'All operations completed successfully'); + + console.log('โœ… All operations completed successfully'); + console.log('โœ… No cleanup needed for JavaScript SDK'); + + console.log('\n๐ŸŽ‰ Schema generation demonstration completed!'); + console.log(`๐Ÿ“ Request ID for reference: ${requestId}`); +} + +// Run the main function +main().catch(console.error); diff --git a/scrapegraph-js/examples/utilities/getAgenticScraperRequest_example.js b/scrapegraph-js/examples/utilities/getAgenticScraperRequest_example.js new file mode 100644 index 0000000..1d54af4 --- /dev/null +++ b/scrapegraph-js/examples/utilities/getAgenticScraperRequest_example.js @@ -0,0 +1,31 @@ +import { getAgenticScraperRequest } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +// Replace this with an actual request ID from a previous agenticScraper call +const requestId = 'your-request-id-here'; + +try { + const response = await getAgenticScraperRequest(apiKey, requestId); + + console.log('๐Ÿ” Agentic Scraper Request Status'); + console.log('Request ID:', requestId); + console.log('Status:', response.status); + console.log('Created At:', response.created_at); + + if (response.status === 'completed') { + console.log('โœ… Automation Completed!'); + console.log('Completed At:', response.completed_at); + console.log('Result:', JSON.stringify(response.result, null, 2)); + } else if (response.status === 'pending') { + console.log('โณ Automation is still in progress...'); + console.log('Please check again in a few moments.'); + } else if (response.status === 'failed') { + console.log('โŒ Automation Failed'); + console.log('Error:', response.error); + } + + console.log('\nFull Response:', JSON.stringify(response, null, 2)); +} catch (error) { + console.error('โŒ Error:', error.message); +} diff --git a/scrapegraph-js/examples/utilities/getCredits_example.js b/scrapegraph-js/examples/utilities/getCredits_example.js new file mode 100644 index 0000000..e233b08 --- /dev/null +++ b/scrapegraph-js/examples/utilities/getCredits_example.js @@ -0,0 +1,11 @@ +import { getCredits } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; + +try { + const myCredit = await getCredits(apiKey); + console.log(myCredit); +} catch (error) { + console.error(error); +} diff --git a/scrapegraph-js/examples/utilities/getSearchScraperRequest_example.js b/scrapegraph-js/examples/utilities/getSearchScraperRequest_example.js new file mode 100644 index 0000000..49b1797 --- /dev/null +++ b/scrapegraph-js/examples/utilities/getSearchScraperRequest_example.js @@ -0,0 +1,12 @@ +import { getSearchScraperRequest } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const requestId = '64801288-6e3b-41f3-9d94-07cff3829e15'; + +try { + const requestInfo = await getSearchScraperRequest(apiKey, requestId); + console.log(requestInfo); +} catch (error) { + console.error(error); +} diff --git a/scrapegraph-js/examples/utilities/getSmartScraperRequest_example.js b/scrapegraph-js/examples/utilities/getSmartScraperRequest_example.js new file mode 100644 index 0000000..ebafaa6 --- /dev/null +++ b/scrapegraph-js/examples/utilities/getSmartScraperRequest_example.js @@ -0,0 +1,12 @@ +import { getSmartScraperRequest } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const requestId = '3fa85f64-5717-4562-b3fc-2c963f66afa6'; + +try { + const requestInfo = await getSmartScraperRequest(apiKey, requestId); + console.log(requestInfo); +} catch (error) { + console.error(error); +} diff --git a/scrapegraph-js/examples/utilities/healthz_example.js b/scrapegraph-js/examples/utilities/healthz_example.js new file mode 100644 index 0000000..f25a507 --- /dev/null +++ b/scrapegraph-js/examples/utilities/healthz_example.js @@ -0,0 +1,59 @@ +/** + * Health Check Example - Basic + * + * This example demonstrates how to use the health check endpoint to monitor + * the ScrapeGraphAI API service status. This is particularly useful for: + * - Production monitoring and alerting + * - Health checks in containerized environments (Kubernetes, Docker) + * - Ensuring service availability before making API calls + * - Integration with monitoring tools (Prometheus, Datadog, etc.) + * + * The health check endpoint (/healthz) provides a quick way to verify that + * the API service is operational and ready to handle requests. + */ + +import { healthz } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; + +console.log('๐Ÿฅ Checking ScrapeGraphAI API health status...'); +console.log('-'.repeat(50)); + +try { + // Perform health check + const healthStatus = await healthz(apiKey); + + // Display results + console.log('\nโœ… Health Check Response:'); + console.log(`Status: ${healthStatus.status || 'unknown'}`); + + if (healthStatus.message) { + console.log(`Message: ${healthStatus.message}`); + } + + // Display any additional fields + Object.keys(healthStatus).forEach(key => { + if (key !== 'status' && key !== 'message') { + console.log(`${key.charAt(0).toUpperCase() + key.slice(1)}: ${healthStatus[key]}`); + } + }); + + console.log('\n' + '-'.repeat(50)); + console.log('โœจ Health check completed successfully!'); + + // Example: Use in a monitoring context + if (healthStatus.status === 'healthy') { + console.log('\nโœ“ Service is healthy and ready to accept requests'); + process.exit(0); + } else { + console.log('\nโš ๏ธ Service may be experiencing issues'); + process.exit(1); + } + +} catch (error) { + console.error('\nโŒ Health check failed:', error.message); + console.error('The service may be unavailable or experiencing issues'); + process.exit(2); +} + diff --git a/scrapegraph-js/examples/utilities/healthz_monitoring_example.js b/scrapegraph-js/examples/utilities/healthz_monitoring_example.js new file mode 100644 index 0000000..8be4361 --- /dev/null +++ b/scrapegraph-js/examples/utilities/healthz_monitoring_example.js @@ -0,0 +1,199 @@ +/** + * Health Check Example - Advanced Monitoring + * + * This example demonstrates advanced patterns for using the health check endpoint + * in production monitoring scenarios, including: + * - Periodic health checks + * - Integration with Express.js + * - Retry logic with exponential backoff + * - Health check aggregation + */ + +import { healthz } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; + +/** + * Simple monitoring function that checks health status + */ +async function simpleHealthCheck() { + console.log('๐Ÿฅ Performing health check...'); + + try { + const health = await healthz(apiKey); + + if (health.status === 'healthy') { + console.log('โœ“ Health check passed'); + return { success: true, data: health }; + } else { + console.log('โœ— Health check failed - service unhealthy'); + return { success: false, data: health }; + } + } catch (error) { + console.log('โœ— Health check error:', error.message); + return { success: false, error: error.message }; + } +} + +/** + * Health check with retry logic and exponential backoff + */ +async function healthCheckWithRetry(maxRetries = 3, initialDelay = 1000) { + console.log('\n๐Ÿ”„ Health check with retry logic...'); + + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + console.log(`Attempt ${attempt}/${maxRetries}...`); + const health = await healthz(apiKey); + + if (health.status === 'healthy') { + console.log('โœ“ Service is healthy'); + return { success: true, attempts: attempt, data: health }; + } + + console.log(`โš ๏ธ Service returned: ${health.status}`); + + if (attempt < maxRetries) { + const delay = initialDelay * Math.pow(2, attempt - 1); + console.log(`Waiting ${delay}ms before retry...`); + await new Promise(resolve => setTimeout(resolve, delay)); + } + } catch (error) { + console.log(`โœ— Attempt ${attempt} failed:`, error.message); + + if (attempt < maxRetries) { + const delay = initialDelay * Math.pow(2, attempt - 1); + console.log(`Waiting ${delay}ms before retry...`); + await new Promise(resolve => setTimeout(resolve, delay)); + } + } + } + + return { success: false, attempts: maxRetries }; +} + +/** + * Periodic health check that runs at intervals + */ +async function periodicHealthCheck(intervalMs = 30000) { + console.log(`\nโฐ Starting periodic health checks every ${intervalMs}ms...`); + console.log('Press Ctrl+C to stop\n'); + + const checkHealth = async () => { + const timestamp = new Date().toISOString(); + console.log(`[${timestamp}] Running health check...`); + + try { + const health = await healthz(apiKey); + + if (health.status === 'healthy') { + console.log(`[${timestamp}] โœ“ Service is healthy\n`); + } else { + console.log(`[${timestamp}] โš ๏ธ Service status: ${health.status}\n`); + } + } catch (error) { + console.log(`[${timestamp}] โœ— Health check failed: ${error.message}\n`); + } + }; + + // Run initial check + await checkHealth(); + + // Schedule periodic checks + setInterval(checkHealth, intervalMs); +} + +/** + * Example Express.js health endpoint + * + * This demonstrates how to integrate the health check into an Express.js + * application for Kubernetes liveness/readiness probes or load balancer checks. + */ +function expressHealthEndpointExample() { + console.log('\n๐Ÿ“ Express.js Integration Pattern:'); + console.log('-'.repeat(50)); + console.log(` +import express from 'express'; +import { healthz } from 'scrapegraph-js'; + +const app = express(); +const apiKey = process.env.SGAI_APIKEY; + +// Health check endpoint for load balancers/Kubernetes +app.get('/health', async (req, res) => { + try { + const health = await healthz(apiKey); + + if (health.status === 'healthy') { + res.status(200).json({ + status: 'healthy', + timestamp: new Date().toISOString(), + scrapeGraphApi: 'operational' + }); + } else { + res.status(503).json({ + status: 'unhealthy', + timestamp: new Date().toISOString(), + scrapeGraphApi: health.status + }); + } + } catch (error) { + res.status(503).json({ + status: 'error', + timestamp: new Date().toISOString(), + error: error.message + }); + } +}); + +// Liveness probe - checks if the app is running +app.get('/healthz/live', (req, res) => { + res.status(200).json({ status: 'alive' }); +}); + +// Readiness probe - checks if the app can handle requests +app.get('/healthz/ready', async (req, res) => { + try { + const health = await healthz(apiKey); + + if (health.status === 'healthy') { + res.status(200).json({ status: 'ready' }); + } else { + res.status(503).json({ status: 'not ready' }); + } + } catch (error) { + res.status(503).json({ status: 'not ready', error: error.message }); + } +}); + +app.listen(3000, () => { + console.log('Server running on port 3000'); +}); + `); + console.log('-'.repeat(50)); +} + +/** + * Main function - demonstrates different monitoring patterns + */ +async function main() { + console.log('๐Ÿฅ ScrapeGraphAI Health Check - Advanced Monitoring Examples'); + console.log('='.repeat(60)); + + // 1. Simple health check + await simpleHealthCheck(); + + // 2. Health check with retry logic + await healthCheckWithRetry(3, 1000); + + // 3. Show Express.js integration example + expressHealthEndpointExample(); + + // Uncomment to run periodic health checks + // await periodicHealthCheck(30000); // Check every 30 seconds +} + +// Run the examples +main().catch(console.error); + diff --git a/scrapegraph-js/examples/utilities/scrape_advanced_example.js b/scrapegraph-js/examples/utilities/scrape_advanced_example.js new file mode 100644 index 0000000..50b1093 --- /dev/null +++ b/scrapegraph-js/examples/utilities/scrape_advanced_example.js @@ -0,0 +1,524 @@ +/** + * Advanced example demonstrating comprehensive usage of the Scrape API with the scrapegraph-js SDK. + * + * This example shows how to: + * 1. Set up the client for Scrape with various configurations + * 2. Handle different types of websites and rendering modes + * 3. Implement error handling and retry logic + * 4. Process multiple websites concurrently + * 5. Save and analyze HTML content with detailed metadata + * 6. Use custom headers and cookies for authentication + * 7. Compare different rendering modes + * + * Requirements: + * - Node.js 16+ + * - scrapegraph-js + * - A valid API key + * + * Usage: + * node scrape_advanced_example.js + */ + +import { scrape, getScrapeRequest } from '../index.js'; +import fs from 'fs/promises'; +import path from 'path'; + +// Configuration +const API_KEY = process.env.SGAI_API_KEY || 'your-api-key-here'; +const OUTPUT_DIR = 'scrape_advanced_output'; + +/** + * Scrape processor with advanced features + */ +class ScrapeProcessor { + constructor(apiKey) { + this.apiKey = apiKey; + this.retryDelays = [1000, 2000, 4000]; // Exponential backoff delays + } + + /** + * Get HTML content from a website using the Scrape API with retry logic. + * + * @param {string} websiteUrl - The URL of the website to get HTML from + * @param {Object} options - Options for the scrape request + * @returns {Object} The API response with additional metadata + */ + async scrapeWebsite(websiteUrl, options = {}) { + const { renderHeavyJs = false, headers = {}, maxRetries = 3 } = options; + + const jsMode = renderHeavyJs ? 'with heavy JS rendering' : 'without JS rendering'; + console.log(`๐ŸŒ Getting HTML content from: ${websiteUrl}`); + console.log(`๐Ÿ”ง Mode: ${jsMode}`); + + for (let attempt = 0; attempt < maxRetries; attempt++) { + try { + const startTime = Date.now(); + const result = await scrape(this.apiKey, websiteUrl, { + renderHeavyJs, + headers + }); + const executionTime = (Date.now() - startTime) / 1000; + + console.log(`โœ… Success! Execution time: ${executionTime.toFixed(2)} seconds`); + return { + ...result, + executionTime, + attempts: attempt + 1 + }; + + } catch (error) { + console.error(`โŒ Attempt ${attempt + 1} failed: ${error.message}`); + if (attempt < maxRetries - 1) { + const waitTime = this.retryDelays[attempt] || 2000; + console.log(`โณ Waiting ${waitTime}ms before retry...`); + await new Promise(resolve => setTimeout(resolve, waitTime)); + } else { + console.error(`๐Ÿ’ฅ All ${maxRetries} attempts failed for ${websiteUrl}`); + throw error; + } + } + } + } + + /** + * Process multiple websites concurrently. + * + * @param {Array} websites - Array of website configurations + * @param {number} maxConcurrency - Maximum number of concurrent requests + * @returns {Array} Results for each website + */ + async processWebsiteBatch(websites, maxConcurrency = 3) { + const results = []; + + // Process websites in batches to control concurrency + for (let i = 0; i < websites.length; i += maxConcurrency) { + const batch = websites.slice(i, i + maxConcurrency); + const batchPromises = batch.map(website => + this.processSingleWebsite(website) + ); + + const batchResults = await Promise.allSettled(batchPromises); + + // Process batch results + batchResults.forEach((result, index) => { + const website = batch[index]; + if (result.status === 'fulfilled') { + results.push({ + website: website.url, + success: true, + data: result.value + }); + } else { + results.push({ + website: website.url, + success: false, + error: result.reason.message + }); + } + }); + + // Add a small delay between batches to be respectful to the API + if (i + maxConcurrency < websites.length) { + await new Promise(resolve => setTimeout(resolve, 1000)); + } + } + + return results; + } + + /** + * Process a single website with comprehensive analysis. + * + * @param {Object} website - Website configuration object + * @returns {Object} Processing results + */ + async processSingleWebsite(website) { + const { url, name, renderHeavyJs = false, description, headers = {} } = website; + + console.log(`\n๐Ÿ” Processing: ${description}`); + console.log(`๐Ÿ“ URL: ${url}`); + console.log(`โš™๏ธ Render Heavy JS: ${renderHeavyJs}`); + + try { + // Get HTML content + const result = await this.scrapeWebsite(url, { + renderHeavyJs, + headers + }); + + // Analyze the HTML content + const analysis = this.analyzeHtmlContent(result.html); + + // Save the HTML content + const filename = `${name}_${renderHeavyJs ? 'js' : 'nojs'}`; + const savedFile = await this.saveHtmlContent(result.html, filename); + + // Create comprehensive result object + const processedResult = { + website: url, + name, + description, + renderHeavyJs, + success: true, + requestId: result.scrape_request_id, + status: result.status, + executionTime: result.executionTime, + attempts: result.attempts, + analysis, + savedFile, + metadata: { + timestamp: new Date().toISOString(), + userAgent: headers['User-Agent'] || 'Default', + hasCustomHeaders: Object.keys(headers).length > 0 + } + }; + + console.log(`โœ… Successfully processed ${url}`); + console.log(`๐Ÿ“Š Analysis: ${analysis.totalLength.toLocaleString()} chars, ${analysis.lines.toLocaleString()} lines`); + console.log(`๐Ÿ’พ Saved to: ${savedFile}`); + + return processedResult; + + } catch (error) { + console.error(`โŒ Failed to process ${url}: ${error.message}`); + return { + website: url, + name, + description, + renderHeavyJs, + success: false, + error: error.message, + timestamp: new Date().toISOString() + }; + } + } + + /** + * Analyze HTML content and provide detailed statistics. + * + * @param {string} htmlContent - The HTML content to analyze + * @returns {Object} Detailed analysis of the HTML content + */ + analyzeHtmlContent(htmlContent) { + if (!htmlContent) { + return { + totalLength: 0, + lines: 0, + hasDoctype: false, + hasHtmlTag: false, + hasHeadTag: false, + hasBodyTag: false, + scriptTags: 0, + styleTags: 0, + divTags: 0, + pTags: 0, + imgTags: 0, + linkTags: 0, + aTags: 0, + spanTags: 0, + tableTags: 0, + formTags: 0, + inputTags: 0, + buttonTags: 0, + metaTags: 0, + titleTags: 0, + h1Tags: 0, + h2Tags: 0, + h3Tags: 0, + h4Tags: 0, + h5Tags: 0, + h6Tags: 0, + listTags: 0, + codeTags: 0, + preTags: 0, + blockquoteTags: 0, + iframeTags: 0, + canvasTags: 0, + svgTags: 0, + videoTags: 0, + audioTags: 0, + embedTags: 0, + objectTags: 0, + paramTags: 0, + sourceTags: 0, + trackTags: 0, + mapTags: 0, + areaTags: 0, + baseTags: 0, + bdoTags: 0, + brTags: 0, + hrTags: 0, + imgTags: 0, + inputTags: 0, + linkTags: 0, + metaTags: 0, + paramTags: 0, + sourceTags: 0, + trackTags: 0, + wbrTags: 0 + }; + } + + const stats = { + totalLength: htmlContent.length, + lines: htmlContent.split('\n').length, + hasDoctype: htmlContent.trim().startsWith(' r.success).length, + failed: results.filter(r => !r.success).length, + timestamp: new Date().toISOString(), + apiKey: this.apiKey.substring(0, 8) + '...' + }, + results: results, + statistics: { + averageExecutionTime: 0, + totalExecutionTime: 0, + averageAttempts: 0, + totalAttempts: 0 + } + }; + + // Calculate statistics + const successfulResults = results.filter(r => r.success); + if (successfulResults.length > 0) { + report.statistics.averageExecutionTime = + successfulResults.reduce((sum, r) => sum + (r.executionTime || 0), 0) / successfulResults.length; + report.statistics.totalExecutionTime = + successfulResults.reduce((sum, r) => sum + (r.executionTime || 0), 0); + report.statistics.averageAttempts = + successfulResults.reduce((sum, r) => sum + (r.attempts || 1), 0) / successfulResults.length; + report.statistics.totalAttempts = + successfulResults.reduce((sum, r) => sum + (r.attempts || 1), 0); + } + + // Save report as JSON + const reportFile = path.join(outputDir, 'processing_report.json'); + await fs.writeFile(reportFile, JSON.stringify(report, null, 2), 'utf8'); + + // Save summary as text + const summaryFile = path.join(outputDir, 'summary.txt'); + const summaryText = this.formatSummary(report); + await fs.writeFile(summaryFile, summaryText, 'utf8'); + + console.log(`\n๐Ÿ“Š Report generated:`); + console.log(` ๐Ÿ“„ JSON: ${reportFile}`); + console.log(` ๐Ÿ“ Summary: ${summaryFile}`); + + return { reportFile, summaryFile }; + } + + /** + * Format the summary report as readable text. + * + * @param {Object} report - The processing report + * @returns {string} Formatted summary text + */ + formatSummary(report) { + const { summary, statistics } = report; + + let text = 'SCRAPE API PROCESSING REPORT\n'; + text += '='.repeat(50) + '\n\n'; + text += `Generated: ${summary.timestamp}\n`; + text += `Total Websites: ${summary.totalWebsites}\n`; + text += `Successful: ${summary.successful}\n`; + text += `Failed: ${summary.failed}\n`; + text += `Success Rate: ${((summary.successful / summary.totalWebsites) * 100).toFixed(1)}%\n\n`; + + if (summary.successful > 0) { + text += `PERFORMANCE STATISTICS\n`; + text += '-'.repeat(30) + '\n'; + text += `Average Execution Time: ${statistics.averageExecutionTime.toFixed(2)}s\n`; + text += `Total Execution Time: ${statistics.totalExecutionTime.toFixed(2)}s\n`; + text += `Average Attempts: ${statistics.averageAttempts.toFixed(1)}\n`; + text += `Total Attempts: ${statistics.totalAttempts}\n\n`; + } + + text += `DETAILED RESULTS\n`; + text += '-'.repeat(30) + '\n'; + + report.results.forEach((result, index) => { + text += `${index + 1}. ${result.website}\n`; + text += ` Status: ${result.success ? 'โœ… Success' : 'โŒ Failed'}\n`; + if (result.success) { + text += ` Execution Time: ${result.executionTime?.toFixed(2)}s\n`; + text += ` Attempts: ${result.attempts}\n`; + text += ` Saved: ${result.savedFile}\n`; + } else { + text += ` Error: ${result.error}\n`; + } + text += '\n'; + }); + + return text; + } +} + +/** + * Main function demonstrating advanced Scrape API usage. + */ +async function main() { + // Example websites to test with different configurations + const testWebsites = [ + { + url: 'https://example.com', + name: 'example', + renderHeavyJs: false, + description: 'Simple static website', + headers: {} + }, + { + url: 'https://httpbin.org/html', + name: 'httpbin_html', + renderHeavyJs: false, + description: 'HTTP testing service', + headers: {} + }, + { + url: 'https://httpbin.org/user-agent', + name: 'httpbin_user_agent', + renderHeavyJs: false, + description: 'User agent testing with custom headers', + headers: { + 'User-Agent': 'Custom Scraper Bot/1.0', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' + } + } + ]; + + console.log('๐Ÿš€ Advanced Scrape API Example with scrapegraph-js SDK'); + console.log('='.repeat(70)); + + // Check API key + if (!API_KEY || API_KEY === 'your-api-key-here') { + console.error('โŒ Please set your SGAI_API_KEY environment variable'); + console.error('Example: export SGAI_API_KEY=your_api_key_here'); + process.exit(1); + } + + console.log('โœ… API key configured'); + console.log(`๐Ÿ“Š Processing ${testWebsites.length} websites with advanced features\n`); + + try { + // Initialize the processor + const processor = new ScrapeProcessor(API_KEY); + + // Process websites with controlled concurrency + const results = await processor.processWebsiteBatch(testWebsites, 2); + + // Generate comprehensive report + await processor.generateReport(results); + + // Display final summary + const successful = results.filter(r => r.success).length; + const failed = results.filter(r => !r.success).length; + + console.log('\n๐ŸŽฏ FINAL SUMMARY'); + console.log('='.repeat(30)); + console.log(`โœ… Successful: ${successful}`); + console.log(`โŒ Failed: ${failed}`); + console.log(`๐Ÿ“Š Success Rate: ${((successful / results.length) * 100).toFixed(1)}%`); + console.log(`๐Ÿ“ Output saved to: ${OUTPUT_DIR}/`); + + if (failed > 0) { + console.log('\nโŒ Failed websites:'); + results.filter(r => !r.success).forEach(result => { + console.log(` - ${result.website}: ${result.error}`); + }); + } + + console.log('\nโœ… Advanced scrape example completed successfully'); + + } catch (error) { + console.error('๐Ÿ’ฅ Fatal error:', error.message); + process.exit(1); + } +} + +// Run the example +if (import.meta.url === `file://${process.argv[1]}`) { + main().catch(error => { + console.error('โŒ Fatal error:', error.message); + process.exit(1); + }); +} diff --git a/scrapegraph-js/examples/utilities/scrape_example.js b/scrapegraph-js/examples/utilities/scrape_example.js new file mode 100644 index 0000000..15bf3b1 --- /dev/null +++ b/scrapegraph-js/examples/utilities/scrape_example.js @@ -0,0 +1,205 @@ +/** + * Example demonstrating how to use the Scrape API with the scrapegraph-js SDK. + * + * This example shows how to: + * 1. Set up the API request for Scrape + * 2. Make the API call to get HTML content from a website + * 3. Handle the response and save the HTML content + * 4. Demonstrate both regular and heavy JS rendering modes + * 5. Display the results and metadata + * + * Requirements: + * - Node.js 16+ + * - scrapegraph-js + * - A valid API key + * + * Usage: + * node scrape_example.js + */ + +import { scrape, getScrapeRequest } from '../index.js'; +import fs from 'fs/promises'; +import path from 'path'; + +// Configuration +const API_KEY = process.env.SGAI_API_KEY || 'your-api-key-here'; +const OUTPUT_DIR = 'scrape_output'; + +/** + * Get HTML content from a website using the Scrape API. + * + * @param {string} websiteUrl - The URL of the website to get HTML from + * @param {Object} options - Options for the scrape request + * @returns {Object} The API response containing HTML content and metadata + */ +async function scrapeWebsite(websiteUrl, options = {}) { + const { renderHeavyJs = false, headers = {} } = options; + + const jsMode = renderHeavyJs ? 'with heavy JS rendering' : 'without JS rendering'; + console.log(`Getting HTML content from: ${websiteUrl}`); + console.log(`Mode: ${jsMode}`); + + const startTime = Date.now(); + + try { + const result = await scrape(API_KEY, websiteUrl, { + renderHeavyJs, + headers + }); + + const executionTime = (Date.now() - startTime) / 1000; + console.log(`Execution time: ${executionTime.toFixed(2)} seconds`); + + return result; + } catch (error) { + console.error(`Error: ${error.message}`); + throw error; + } +} + +/** + * Save HTML content to a file. + * + * @param {string} htmlContent - The HTML content to save + * @param {string} filename - The name of the file (without extension) + * @param {string} outputDir - The directory to save the file in + * @returns {string} Path to the saved file + */ +async function saveHtmlContent(htmlContent, filename, outputDir = OUTPUT_DIR) { + // Create output directory if it doesn't exist + try { + await fs.mkdir(outputDir, { recursive: true }); + } catch (error) { + if (error.code !== 'EEXIST') { + throw error; + } + } + + // Save HTML file + const htmlFile = path.join(outputDir, `${filename}.html`); + await fs.writeFile(htmlFile, htmlContent, 'utf8'); + + console.log(`HTML content saved to: ${htmlFile}`); + return htmlFile; +} + +/** + * Analyze HTML content and provide basic statistics. + * + * @param {string} htmlContent - The HTML content to analyze + * @returns {Object} Basic statistics about the HTML content + */ +function analyzeHtmlContent(htmlContent) { + const stats = { + totalLength: htmlContent.length, + lines: htmlContent.split('\n').length, + hasDoctype: htmlContent.trim().startsWith(' { + console.error('โŒ Fatal error:', error.message); + process.exit(1); + }); +} diff --git a/scrapegraph-js/examples/utilities/scrape_polling_example.js b/scrapegraph-js/examples/utilities/scrape_polling_example.js new file mode 100644 index 0000000..87820e9 --- /dev/null +++ b/scrapegraph-js/examples/utilities/scrape_polling_example.js @@ -0,0 +1,288 @@ +/** + * Example demonstrating how to use Scrape with polling for results. + * + * This example shows how to: + * 1. Make a scrape request + * 2. Poll for results until completion + * 3. Handle different status responses + * 4. Implement timeout and retry logic + * + * Requirements: + * - Node.js 16+ + * - scrapegraph-js + * - A valid API key + * + * Usage: + * node scrape_polling_example.js + */ + +import { scrape, getScrapeRequest } from '../index.js'; +import fs from 'fs/promises'; +import path from 'path'; + +// Configuration +const API_KEY = process.env.SGAI_API_KEY || 'your-api-key-here'; +const OUTPUT_DIR = 'scrape_polling_output'; +const POLLING_INTERVAL = 2000; // 2 seconds +const MAX_POLLING_TIME = 300000; // 5 minutes +const MAX_RETRIES = 3; + +/** + * Wait for a specified amount of time. + * + * @param {number} ms - Milliseconds to wait + * @returns {Promise} Promise that resolves after the specified time + */ +function wait(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +/** + * Poll for scrape results until completion or timeout. + * + * @param {string} apiKey - Your API key + * @param {string} requestId - The request ID to poll for + * @param {Object} options - Polling options + * @returns {Object} The final result + */ +async function pollScrapeResult(apiKey, requestId, options = {}) { + const { + interval = POLLING_INTERVAL, + maxTime = MAX_POLLING_TIME, + maxRetries = MAX_RETRIES + } = options; + + console.log(`๐Ÿ” Polling for scrape result: ${requestId}`); + console.log(`โฑ๏ธ Polling interval: ${interval}ms`); + console.log(`โฐ Max polling time: ${maxTime / 1000}s`); + + const startTime = Date.now(); + let attempt = 0; + + while (true) { + try { + attempt++; + console.log(`\n๐Ÿ“ก Polling attempt ${attempt}...`); + + const result = await getScrapeRequest(apiKey, requestId); + + console.log(`๐Ÿ“Š Status: ${result.status}`); + + if (result.status === 'completed') { + console.log('โœ… Scrape request completed successfully!'); + return result; + } else if (result.status === 'failed') { + console.error(`โŒ Scrape request failed: ${result.error || 'Unknown error'}`); + throw new Error(`Scrape request failed: ${result.error || 'Unknown error'}`); + } else if (result.status === 'processing') { + console.log('โณ Request is still processing...'); + + // Check if we've exceeded the maximum polling time + if (Date.now() - startTime > maxTime) { + throw new Error(`Polling timeout after ${maxTime / 1000}s`); + } + + // Wait before the next poll + console.log(`โณ Waiting ${interval / 1000}s before next poll...`); + await wait(interval); + + } else { + console.log(`โ„น๏ธ Unknown status: ${result.status}`); + + // Check if we've exceeded the maximum polling time + if (Date.now() - startTime > maxTime) { + throw new Error(`Polling timeout after ${maxTime / 1000}s`); + } + + // Wait before the next poll + await wait(interval); + } + + } catch (error) { + console.error(`โŒ Polling error: ${error.message}`); + + // Check if we've exceeded the maximum polling time + if (Date.now() - startTime > maxTime) { + throw new Error(`Polling timeout after ${maxTime / 1000}s`); + } + + // Check if we've exceeded the maximum retries + if (attempt >= maxRetries) { + throw new Error(`Max retries (${maxRetries}) exceeded`); + } + + // Wait before retry + console.log(`โณ Waiting ${interval / 1000}s before retry...`); + await wait(interval); + } + } +} + +/** + * Save HTML content to a file. + * + * @param {string} htmlContent - The HTML content to save + * @param {string} filename - The name of the file (without extension) + * @param {string} outputDir - The directory to save the file in + * @returns {string} Path to the saved file + */ +async function saveHtmlContent(htmlContent, filename, outputDir = OUTPUT_DIR) { + try { + await fs.mkdir(outputDir, { recursive: true }); + } catch (error) { + if (error.code !== 'EEXIST') { + throw error; + } + } + + const htmlFile = path.join(outputDir, `${filename}.html`); + await fs.writeFile(htmlFile, htmlContent, 'utf8'); + + console.log(`๐Ÿ’พ HTML content saved to: ${htmlFile}`); + return htmlFile; +} + +/** + * Analyze HTML content and provide basic statistics. + * + * @param {string} htmlContent - The HTML content to analyze + * @returns {Object} Basic statistics about the HTML content + */ +function analyzeHtmlContent(htmlContent) { + if (!htmlContent) { + return { + totalLength: 0, + lines: 0, + hasDoctype: false, + hasHtmlTag: false, + hasHeadTag: false, + hasBodyTag: false, + scriptTags: 0, + styleTags: 0, + divTags: 0, + pTags: 0, + imgTags: 0, + linkTags: 0 + }; + } + + const stats = { + totalLength: htmlContent.length, + lines: htmlContent.split('\n').length, + hasDoctype: htmlContent.trim().startsWith(' { + console.error('โŒ Fatal error:', error.message); + process.exit(1); + }); +} diff --git a/scrapegraph-js/examples/utilities/sendFeedback_example.js b/scrapegraph-js/examples/utilities/sendFeedback_example.js new file mode 100644 index 0000000..017343b --- /dev/null +++ b/scrapegraph-js/examples/utilities/sendFeedback_example.js @@ -0,0 +1,20 @@ +import { sendFeedback } from 'scrapegraph-js'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const requestId = '16a63a80-c87f-4cde-b005-e6c3ecda278b'; +const rating = 5; +const feedbackMessage = 'This is a test feedback message.'; + +try { + const feedback_response = await sendFeedback(apiKey, requestId, rating, feedbackMessage); + console.log(feedback_response); +} catch (error) { + console.error(error); +} + +const apiKey = 'your-api-key'; +const requestId = 'your-request-id'; +getSearchScraperRequest(apiKey, requestId) + .then(data => console.log(data)) + .catch(error => console.error(error)); diff --git a/scrapegraph-js/index.js b/scrapegraph-js/index.js new file mode 100644 index 0000000..7356f43 --- /dev/null +++ b/scrapegraph-js/index.js @@ -0,0 +1,34 @@ +export { agenticScraper, getAgenticScraperRequest } from './src/agenticScraper.js'; +export { smartScraper, getSmartScraperRequest } from './src/smartScraper.js'; +export { markdownify, getMarkdownifyRequest } from './src/markdownify.js'; +export { scrape, getScrapeRequest } from './src/scrape.js'; +export { searchScraper, getSearchScraperRequest } from './src/searchScraper.js'; +export { getCredits } from './src/credits.js'; +export { healthz } from './src/healthz.js'; +export { sendFeedback } from './src/feedback.js'; +export { crawl, getCrawlRequest } from './src/crawl.js'; +export { generateSchema, getSchemaStatus, pollSchemaGeneration } from './src/schema.js'; +export { sitemap } from './src/sitemap.js'; +export { + createScheduledJob, + getScheduledJobs, + getScheduledJob, + updateScheduledJob, + replaceScheduledJob, + deleteScheduledJob, + pauseScheduledJob, + resumeScheduledJob, + triggerScheduledJob, + getJobExecutions +} from './src/scheduledJobs.js'; + +// Mock utilities +export { + initMockConfig, + enableMock, + disableMock, + setMockResponses, + setMockHandler, + getMockConfig, + isMockEnabled +} from './src/utils/mockConfig.js'; diff --git a/scrapegraph-js/package-lock.json b/scrapegraph-js/package-lock.json new file mode 100644 index 0000000..5019617 --- /dev/null +++ b/scrapegraph-js/package-lock.json @@ -0,0 +1,1516 @@ +{ + "name": "scrapegraph-js", + "version": "0.2.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "scrapegraph-js", + "version": "0.1.5", + "license": "MIT", + "dependencies": { + "axios": "^1.6.0", + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.5" + }, + "devDependencies": { + "@eslint/js": "^9.16.0", + "dotenv": "^16.4.5", + "eslint": "^9.16.0", + "eslint-config-prettier": "^9.1.0", + "eslint-plugin-prettier": "^5.2.1", + "globals": "^15.12.0", + "prettier": "3.4.1" + } + }, + "node_modules/@eslint-community/eslint-utils": { + "version": "4.8.0", + "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.8.0.tgz", + "integrity": "sha512-MJQFqrZgcW0UNYLGOuQpey/oTN59vyWwplvCGZztn1cKz9agZPPYpJB7h2OMmuu7VLqkvEjN8feFZJmxNF9D+Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "eslint-visitor-keys": "^3.4.3" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + }, + "peerDependencies": { + "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0" + } + }, + "node_modules/@eslint-community/eslint-utils/node_modules/eslint-visitor-keys": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz", + "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/@eslint-community/regexpp": { + "version": "4.12.1", + "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.12.1.tgz", + "integrity": "sha512-CCZCDJuduB9OUkFkY2IgppNZMi2lBQgD2qzwXkEia16cge2pijY/aXi96CJMquDMn3nJdlPV1A5KrJEXwfLNzQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^12.0.0 || ^14.0.0 || >=16.0.0" + } + }, + "node_modules/@eslint/config-array": { + "version": "0.21.0", + "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.21.0.tgz", + "integrity": "sha512-ENIdc4iLu0d93HeYirvKmrzshzofPw6VkZRKQGe9Nv46ZnWUzcF1xV01dcvEg/1wXUR61OmmlSfyeyO7EvjLxQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@eslint/object-schema": "^2.1.6", + "debug": "^4.3.1", + "minimatch": "^3.1.2" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@eslint/config-helpers": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.3.1.tgz", + "integrity": "sha512-xR93k9WhrDYpXHORXpxVL5oHj3Era7wo6k/Wd8/IsQNnZUTzkGS29lyn3nAT05v6ltUuTFVCCYDEGfy2Or/sPA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@eslint/core": { + "version": "0.15.2", + "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.15.2.tgz", + "integrity": "sha512-78Md3/Rrxh83gCxoUc0EiciuOHsIITzLy53m3d9UyiW8y9Dj2D29FeETqyKA+BRK76tnTp6RXWb3pCay8Oyomg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@types/json-schema": "^7.0.15" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@eslint/eslintrc": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-3.3.1.tgz", + "integrity": "sha512-gtF186CXhIl1p4pJNGZw8Yc6RlshoePRvE0X91oPGb3vZ8pM3qOS9W9NGPat9LziaBV7XrJWGylNQXkGcnM3IQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ajv": "^6.12.4", + "debug": "^4.3.2", + "espree": "^10.0.1", + "globals": "^14.0.0", + "ignore": "^5.2.0", + "import-fresh": "^3.2.1", + "js-yaml": "^4.1.0", + "minimatch": "^3.1.2", + "strip-json-comments": "^3.1.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/@eslint/eslintrc/node_modules/globals": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-14.0.0.tgz", + "integrity": "sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@eslint/js": { + "version": "9.34.0", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.34.0.tgz", + "integrity": "sha512-EoyvqQnBNsV1CWaEJ559rxXL4c8V92gxirbawSmVUOWXlsRxxQXl6LmCpdUblgxgSkDIqKnhzba2SjRTI/A5Rw==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://eslint.org/donate" + } + }, + "node_modules/@eslint/object-schema": { + "version": "2.1.6", + "resolved": "https://registry.npmjs.org/@eslint/object-schema/-/object-schema-2.1.6.tgz", + "integrity": "sha512-RBMg5FRL0I0gs51M/guSAj5/e14VQ4tpZnQNWwuDT66P14I43ItmPfIZRhO9fUVIPOAQXU47atlywZ/czoqFPA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@eslint/plugin-kit": { + "version": "0.3.5", + "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.3.5.tgz", + "integrity": "sha512-Z5kJ+wU3oA7MMIqVR9tyZRtjYPr4OC004Q4Rw7pgOKUOKkJfZ3O24nz3WYfGRpMDNmcOi3TwQOmgm7B7Tpii0w==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@eslint/core": "^0.15.2", + "levn": "^0.4.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@humanfs/core": { + "version": "0.19.1", + "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", + "integrity": "sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.18.0" + } + }, + "node_modules/@humanfs/node": { + "version": "0.16.7", + "resolved": "https://registry.npmjs.org/@humanfs/node/-/node-0.16.7.tgz", + "integrity": "sha512-/zUx+yOsIrG4Y43Eh2peDeKCxlRt/gET6aHfaKpuq267qXdYDFViVHfMaLyygZOnl0kGWxFIgsBy8QFuTLUXEQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@humanfs/core": "^0.19.1", + "@humanwhocodes/retry": "^0.4.0" + }, + "engines": { + "node": ">=18.18.0" + } + }, + "node_modules/@humanwhocodes/module-importer": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz", + "integrity": "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=12.22" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/nzakas" + } + }, + "node_modules/@humanwhocodes/retry": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@humanwhocodes/retry/-/retry-0.4.3.tgz", + "integrity": "sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/nzakas" + } + }, + "node_modules/@pkgr/core": { + "version": "0.2.9", + "resolved": "https://registry.npmjs.org/@pkgr/core/-/core-0.2.9.tgz", + "integrity": "sha512-QNqXyfVS2wm9hweSYD2O7F0G06uurj9kZ96TRQE5Y9hU7+tgdZwIkbAKc5Ocy1HxEY2kuDQa6cQ1WRs/O5LFKA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^12.20.0 || ^14.18.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/pkgr" + } + }, + "node_modules/@types/estree": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", + "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/json-schema": { + "version": "7.0.15", + "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", + "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", + "dev": true, + "license": "MIT" + }, + "node_modules/acorn": { + "version": "8.15.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", + "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", + "dev": true, + "license": "MIT", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-jsx": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz", + "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==", + "dev": true, + "license": "MIT", + "peerDependencies": { + "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" + } + }, + "node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dev": true, + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "dev": true, + "license": "Python-2.0" + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.11.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.11.0.tgz", + "integrity": "sha512-1Lx3WLFQWm3ooKDYZD1eXmoGO9fxYQjrycfHFC8P0sCfQVXyROp0p9PFWBehewBOdCwHc+f/b8I0fMto5eSfwA==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.6", + "form-data": "^4.0.4", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true, + "license": "MIT" + }, + "node_modules/brace-expansion": { + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", + "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/callsites": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", + "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true, + "license": "MIT" + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true, + "license": "MIT" + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dev": true, + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/debug": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", + "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/deep-is": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.5.tgz", + "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/dotenv": { + "version": "16.6.1", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.6.1.tgz", + "integrity": "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/escape-string-regexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", + "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/eslint": { + "version": "9.34.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.34.0.tgz", + "integrity": "sha512-RNCHRX5EwdrESy3Jc9o8ie8Bog+PeYvvSR8sDGoZxNFTvZ4dlxUB3WzQ3bQMztFrSRODGrLLj8g6OFuGY/aiQg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@eslint-community/eslint-utils": "^4.2.0", + "@eslint-community/regexpp": "^4.12.1", + "@eslint/config-array": "^0.21.0", + "@eslint/config-helpers": "^0.3.1", + "@eslint/core": "^0.15.2", + "@eslint/eslintrc": "^3.3.1", + "@eslint/js": "9.34.0", + "@eslint/plugin-kit": "^0.3.5", + "@humanfs/node": "^0.16.6", + "@humanwhocodes/module-importer": "^1.0.1", + "@humanwhocodes/retry": "^0.4.2", + "@types/estree": "^1.0.6", + "@types/json-schema": "^7.0.15", + "ajv": "^6.12.4", + "chalk": "^4.0.0", + "cross-spawn": "^7.0.6", + "debug": "^4.3.2", + "escape-string-regexp": "^4.0.0", + "eslint-scope": "^8.4.0", + "eslint-visitor-keys": "^4.2.1", + "espree": "^10.4.0", + "esquery": "^1.5.0", + "esutils": "^2.0.2", + "fast-deep-equal": "^3.1.3", + "file-entry-cache": "^8.0.0", + "find-up": "^5.0.0", + "glob-parent": "^6.0.2", + "ignore": "^5.2.0", + "imurmurhash": "^0.1.5", + "is-glob": "^4.0.0", + "json-stable-stringify-without-jsonify": "^1.0.1", + "lodash.merge": "^4.6.2", + "minimatch": "^3.1.2", + "natural-compare": "^1.4.0", + "optionator": "^0.9.3" + }, + "bin": { + "eslint": "bin/eslint.js" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://eslint.org/donate" + }, + "peerDependencies": { + "jiti": "*" + }, + "peerDependenciesMeta": { + "jiti": { + "optional": true + } + } + }, + "node_modules/eslint-config-prettier": { + "version": "9.1.2", + "resolved": "https://registry.npmjs.org/eslint-config-prettier/-/eslint-config-prettier-9.1.2.tgz", + "integrity": "sha512-iI1f+D2ViGn+uvv5HuHVUamg8ll4tN+JRHGc6IJi4TP9Kl976C57fzPXgseXNs8v0iA8aSJpHsTWjDb9QJamGQ==", + "dev": true, + "license": "MIT", + "bin": { + "eslint-config-prettier": "bin/cli.js" + }, + "peerDependencies": { + "eslint": ">=7.0.0" + } + }, + "node_modules/eslint-plugin-prettier": { + "version": "5.5.4", + "resolved": "https://registry.npmjs.org/eslint-plugin-prettier/-/eslint-plugin-prettier-5.5.4.tgz", + "integrity": "sha512-swNtI95SToIz05YINMA6Ox5R057IMAmWZ26GqPxusAp1TZzj+IdY9tXNWWD3vkF/wEqydCONcwjTFpxybBqZsg==", + "dev": true, + "license": "MIT", + "dependencies": { + "prettier-linter-helpers": "^1.0.0", + "synckit": "^0.11.7" + }, + "engines": { + "node": "^14.18.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint-plugin-prettier" + }, + "peerDependencies": { + "@types/eslint": ">=8.0.0", + "eslint": ">=8.0.0", + "eslint-config-prettier": ">= 7.0.0 <10.0.0 || >=10.1.0", + "prettier": ">=3.0.0" + }, + "peerDependenciesMeta": { + "@types/eslint": { + "optional": true + }, + "eslint-config-prettier": { + "optional": true + } + } + }, + "node_modules/eslint-scope": { + "version": "8.4.0", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.4.0.tgz", + "integrity": "sha512-sNXOfKCn74rt8RICKMvJS7XKV/Xk9kA7DyJr8mJik3S7Cwgy3qlkkmyS2uQB3jiJg6VNdZd/pDBJu0nvG2NlTg==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "esrecurse": "^4.3.0", + "estraverse": "^5.2.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/eslint-visitor-keys": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz", + "integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/espree": { + "version": "10.4.0", + "resolved": "https://registry.npmjs.org/espree/-/espree-10.4.0.tgz", + "integrity": "sha512-j6PAQ2uUr79PZhBjP5C5fhl8e39FmRnOjsD5lGnWrFU8i2G776tBK7+nP8KuQUTTyAZUwfQqXAgrVH5MbH9CYQ==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "acorn": "^8.15.0", + "acorn-jsx": "^5.3.2", + "eslint-visitor-keys": "^4.2.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/esquery": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.6.0.tgz", + "integrity": "sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "estraverse": "^5.1.0" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/esrecurse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", + "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "estraverse": "^5.2.0" + }, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/fast-diff": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/fast-diff/-/fast-diff-1.3.0.tgz", + "integrity": "sha512-VxPP4NqbUjj6MaAOafWeUn2cXWLcCtljklUtZf0Ind4XQ+QPtmA0b18zZy0jIQx+ExRVCR/ZQpBmik5lXshNsw==", + "dev": true, + "license": "Apache-2.0" + }, + "node_modules/fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "dev": true, + "license": "MIT" + }, + "node_modules/fast-levenshtein": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", + "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", + "dev": true, + "license": "MIT" + }, + "node_modules/file-entry-cache": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz", + "integrity": "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "flat-cache": "^4.0.0" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/find-up": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", + "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==", + "dev": true, + "license": "MIT", + "dependencies": { + "locate-path": "^6.0.0", + "path-exists": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/flat-cache": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-4.0.1.tgz", + "integrity": "sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw==", + "dev": true, + "license": "MIT", + "dependencies": { + "flatted": "^3.2.9", + "keyv": "^4.5.4" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/flatted": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.3.tgz", + "integrity": "sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==", + "dev": true, + "license": "ISC" + }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz", + "integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/glob-parent": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", + "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", + "dev": true, + "license": "ISC", + "dependencies": { + "is-glob": "^4.0.3" + }, + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/globals": { + "version": "15.15.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-15.15.0.tgz", + "integrity": "sha512-7ACyT3wmyp3I61S4fG682L0VA2RGD9otkqGJIwNUMF1SWUombIIk+af1unuDYgMm082aHYwD+mzJvv9Iu8dsgg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/ignore": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", + "integrity": "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/import-fresh": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz", + "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "parent-module": "^1.0.0", + "resolve-from": "^4.0.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/imurmurhash": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.5.tgz", + "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.8.19" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true, + "license": "ISC" + }, + "node_modules/js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "dev": true, + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/json-buffer": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", + "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true, + "license": "MIT" + }, + "node_modules/json-stable-stringify-without-jsonify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", + "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==", + "dev": true, + "license": "MIT" + }, + "node_modules/keyv": { + "version": "4.5.4", + "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", + "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==", + "dev": true, + "license": "MIT", + "dependencies": { + "json-buffer": "3.0.1" + } + }, + "node_modules/levn": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", + "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "prelude-ls": "^1.2.1", + "type-check": "~0.4.0" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/locate-path": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", + "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==", + "dev": true, + "license": "MIT", + "dependencies": { + "p-locate": "^5.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/lodash.merge": { + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", + "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true, + "license": "MIT" + }, + "node_modules/natural-compare": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", + "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", + "dev": true, + "license": "MIT" + }, + "node_modules/optionator": { + "version": "0.9.4", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", + "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==", + "dev": true, + "license": "MIT", + "dependencies": { + "deep-is": "^0.1.5", + "fast-levenshtein": "^2.0.6", + "levn": "^0.4.1", + "prelude-ls": "^1.2.1", + "type-check": "^0.4.0", + "word-wrap": "^1.2.5" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "yocto-queue": "^0.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-locate": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz", + "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==", + "dev": true, + "license": "MIT", + "dependencies": { + "p-limit": "^3.0.2" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/parent-module": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", + "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", + "dev": true, + "license": "MIT", + "dependencies": { + "callsites": "^3.0.0" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/path-exists": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", + "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/prelude-ls": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", + "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/prettier": { + "version": "3.4.1", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.4.1.tgz", + "integrity": "sha512-G+YdqtITVZmOJje6QkXQWzl3fSfMxFwm1tjTyo9exhkmWSqC4Yhd1+lug++IlR2mvRVAxEDDWYkQdeSztajqgg==", + "dev": true, + "license": "MIT", + "bin": { + "prettier": "bin/prettier.cjs" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" + } + }, + "node_modules/prettier-linter-helpers": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/prettier-linter-helpers/-/prettier-linter-helpers-1.0.0.tgz", + "integrity": "sha512-GbK2cP9nraSSUF9N2XwUwqfzlAFlMNYYl+ShE/V+H8a9uNl/oUqB1w2EL54Jh0OlyRSd8RfWYJ3coVS4TROP2w==", + "dev": true, + "license": "MIT", + "dependencies": { + "fast-diff": "^1.1.2" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/resolve-from": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", + "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-json-comments": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", + "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/synckit": { + "version": "0.11.11", + "resolved": "https://registry.npmjs.org/synckit/-/synckit-0.11.11.tgz", + "integrity": "sha512-MeQTA1r0litLUf0Rp/iisCaL8761lKAZHaimlbGK4j0HysC4PLfqygQj9srcs0m2RdtDYnF8UuYyKpbjHYp7Jw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@pkgr/core": "^0.2.9" + }, + "engines": { + "node": "^14.18.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/synckit" + } + }, + "node_modules/type-check": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", + "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", + "dev": true, + "license": "MIT", + "dependencies": { + "prelude-ls": "^1.2.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "punycode": "^2.1.0" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/word-wrap": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", + "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/yocto-queue": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", + "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.24.6", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.24.6.tgz", + "integrity": "sha512-h/z3PKvcTcTetyjl1fkj79MHNEjm+HpD6NXheWjzOekY7kV+lwDYnHw+ivHkijnCSMz1yJaWBD9vu/Fcmk+vEg==", + "license": "ISC", + "peerDependencies": { + "zod": "^3.24.1" + } + } + } +} diff --git a/scrapegraph-js/package.json b/scrapegraph-js/package.json new file mode 100644 index 0000000..dfa65a0 --- /dev/null +++ b/scrapegraph-js/package.json @@ -0,0 +1,49 @@ +{ + "name": "scrapegraph-js", + "author": "ScrapeGraphAI", + "version": "0.2.2", + "description": "Scrape and extract structured data from a webpage using ScrapeGraphAI's APIs. Supports cookies for authentication, infinite scrolling, and pagination.", + "repository": { + "type": "git", + "url": "https://github.com/ScrapeGraphAI/scrapegraph-sdk", + "directory": "scrapegraph-js" + }, + "scripts": { + "format": "prettier --write --cache --cache-strategy metadata . !dist", + "lint": "eslint .", + "test": "node test/smartScraper_pagination_test.js" + }, + "license": "MIT", + "homepage": "https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js", + "keywords": [ + "scraping", + "webscraping", + "automated-scraper", + "gpt-3", + "gpt-4", + "llm", + "ai", + "cookies", + "authentication", + "session-management", + "infinite-scroll", + "pagination" + ], + "main": "index.js", + "module": "index.js", + "type": "module", + "dependencies": { + "axios": "^1.6.0", + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.5" + }, + "devDependencies": { + "@eslint/js": "^9.16.0", + "dotenv": "^16.4.5", + "eslint": "^9.16.0", + "eslint-config-prettier": "^9.1.0", + "eslint-plugin-prettier": "^5.2.1", + "globals": "^15.12.0", + "prettier": "3.4.1" + } +} diff --git a/scrapegraph-js/src/agenticScraper.js b/scrapegraph-js/src/agenticScraper.js new file mode 100644 index 0000000..7e48c0c --- /dev/null +++ b/scrapegraph-js/src/agenticScraper.js @@ -0,0 +1,226 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; +import { isMockEnabled, getMockConfig } from './utils/mockConfig.js'; +import { getMockResponse } from './utils/mockResponse.js'; + +/** + * Perform automated browser actions on a webpage using AI-powered agentic scraping. + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} url - The URL of the webpage to interact with + * @param {string[]} steps - Array of steps to perform on the webpage (e.g., ["Type email@gmail.com in email input box", "click on login"]) + * @param {boolean} [useSession=true] - Whether to use session for the scraping operations + * @param {string} [userPrompt=null] - Prompt for AI extraction (required when aiExtraction=true) + * @param {Object} [outputSchema=null] - Schema for structured data extraction (optional, used with aiExtraction=true) + * @param {boolean} [aiExtraction=false] - Whether to use AI for data extraction from the scraped content + * @param {Object} options - Optional configuration options + * @param {boolean} options.mock - Override mock mode for this request + * @param {boolean} options.renderHeavyJs - Whether to render heavy JavaScript on the page + * @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection + * @returns {Promise} Response from the API containing request_id and initial status + * @throws {Error} Will throw an error in case of an HTTP failure or invalid parameters. + * + * @example + * // Example usage for basic automated login (no AI extraction): + * const apiKey = 'your-api-key'; + * const url = 'https://dashboard.scrapegraphai.com/'; + * const steps = [ + * 'Type email@gmail.com in email input box', + * 'Type test-password@123 in password inputbox', + * 'click on login' + * ]; + * + * try { + * const result = await agenticScraper(apiKey, url, steps, true); + * console.log('Request ID:', result.request_id); + * console.log('Status:', result.status); + * } catch (error) { + * console.error('Error:', error.message); + * } + * + * @example + * // Example usage with AI extraction: + * const outputSchema = { + * user_info: { + * type: "object", + * properties: { + * username: { type: "string" }, + * email: { type: "string" }, + * dashboard_sections: { type: "array", items: { type: "string" } } + * } + * } + * }; + * + * try { + * const result = await agenticScraper( + * apiKey, + * url, + * steps, + * true, + * "Extract user information and available dashboard sections", + * outputSchema, + * true + * ); + * console.log('Request ID:', result.request_id); + * } catch (error) { + * console.error('Error:', error.message); + * } + */ +export async function agenticScraper(apiKey, url, steps, useSession = true, userPrompt = null, outputSchema = null, aiExtraction = false, options = {}) { + const { mock = null, renderHeavyJs = false, stealth = false } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for agenticScraper request'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('POST', 'https://api.scrapegraphai.com/v1/agentic-scrapper', mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/agentic-scrapper'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json', + }; + + // Validate inputs + if (!apiKey || typeof apiKey !== 'string') { + throw new Error('API key must be a non-empty string'); + } + + if (!url || typeof url !== 'string') { + throw new Error('URL must be a non-empty string'); + } + + if (!url.startsWith('http://') && !url.startsWith('https://')) { + throw new Error('URL must start with http:// or https://'); + } + + if (!Array.isArray(steps) || steps.length === 0) { + throw new Error('Steps must be a non-empty array'); + } + + if (steps.some(step => !step || typeof step !== 'string' || !step.trim())) { + throw new Error('All steps must be non-empty strings'); + } + + if (typeof useSession !== 'boolean') { + throw new Error('useSession must be a boolean value'); + } + + if (typeof aiExtraction !== 'boolean') { + throw new Error('aiExtraction must be a boolean value'); + } + + // Validate AI extraction parameters + if (aiExtraction) { + if (!userPrompt || typeof userPrompt !== 'string' || !userPrompt.trim()) { + throw new Error('userPrompt is required and must be a non-empty string when aiExtraction=true'); + } + + if (outputSchema !== null && (typeof outputSchema !== 'object' || Array.isArray(outputSchema))) { + throw new Error('outputSchema must be an object or null'); + } + } + + const payload = { + url: url, + use_session: useSession, + steps: steps, + ai_extraction: aiExtraction, + render_heavy_js: renderHeavyJs, + }; + + if (stealth) { + payload.stealth = stealth; + } + + // Add AI extraction parameters if enabled + if (aiExtraction) { + payload.user_prompt = userPrompt; + if (outputSchema) { + payload.output_schema = outputSchema; + } + } + + try { + const response = await axios.post(endpoint, payload, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Retrieve the status or result of an agentic scraper request. + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} requestId - The request ID associated with the agentic scraper request + * @returns {Promise} A promise that resolves to an object containing: + * - status: The current status of the request ('pending', 'completed', 'failed') + * - result: The extracted data or automation result when status is 'completed' + * - error: Error message if the request failed (when status is 'failed') + * - created_at: Timestamp of when the request was created + * - completed_at: Timestamp of when the request was completed (if applicable) + * @throws {Error} Throws an error if the HTTP request fails or if the API key is invalid + * + * @example + * // Example usage: + * const apiKey = 'your-api-key'; + * const requestId = 'previously-obtained-request-id'; + * + * try { + * const result = await getAgenticScraperRequest(apiKey, requestId); + * if (result.status === 'completed') { + * console.log('Automation completed:', result.result); + * } else if (result.status === 'pending') { + * console.log('Automation is still in progress'); + * } else { + * console.log('Automation failed:', result.error); + * } + * } catch (error) { + * console.error('Error fetching request:', error); + * } + * + * @note The agentic scraper performs browser automation steps sequentially, + * allowing for complex interactions like form filling, clicking buttons, + * and navigating through multi-step workflows with session management. + */ +export async function getAgenticScraperRequest(apiKey, requestId, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for getAgenticScraperRequest'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('GET', `https://api.scrapegraphai.com/v1/agentic-scrapper/${requestId}`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/agentic-scrapper/' + requestId; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + // Validate inputs + if (!apiKey || typeof apiKey !== 'string') { + throw new Error('API key must be a non-empty string'); + } + + if (!requestId || typeof requestId !== 'string') { + throw new Error('Request ID must be a non-empty string'); + } + + try { + const response = await axios.get(endpoint, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} diff --git a/scrapegraph-js/src/crawl.js b/scrapegraph-js/src/crawl.js new file mode 100644 index 0000000..5b5a0af --- /dev/null +++ b/scrapegraph-js/src/crawl.js @@ -0,0 +1,132 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; +import { ZodType } from 'zod'; +import { zodToJsonSchema } from 'zod-to-json-schema'; +import { isMockEnabled, getMockConfig } from './utils/mockConfig.js'; +import { getMockResponse } from './utils/mockResponse.js'; + +/** + * Start a crawl job using the ScrapeGraphAI API. + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} url - The starting URL for the crawl + * @param {string|null} prompt - The prompt to guide the crawl and extraction (null for markdown mode) + * @param {Object|ZodType|null} schema - JSON schema or Zod schema defining the structure of the extracted data (null for markdown mode) + * @param {Object} [options] - Optional crawl parameters + * @param {boolean} [options.extractionMode=true] - true for AI extraction, false for markdown conversion (NO AI/LLM) + * @param {boolean} [options.cacheWebsite=true] - Whether to cache the website content + * @param {number} [options.depth=2] - Maximum depth of the crawl (1-10) + * @param {number} [options.maxPages=2] - Maximum number of pages to crawl (1-100) + * @param {boolean} [options.sameDomainOnly=true] - Whether to only crawl pages from the same domain + * @param {boolean} [options.sitemap] - Whether to use sitemap for better page discovery + * @param {number} [options.batchSize=1] - Batch size for processing pages (1-10) + * @param {boolean} [options.mock] - Override mock mode for this request + * @param {boolean} [options.renderHeavyJs=false] - Whether to render heavy JavaScript on the page + * @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection + * @returns {Promise} The crawl job response + * @throws {Error} Throws an error if the HTTP request fails + */ +export async function crawl( + apiKey, + url, + prompt, + schema, + options = {} +) { + const { mock = null, renderHeavyJs = false, stealth = false } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for crawl request'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('POST', 'https://api.scrapegraphai.com/v1/crawl', mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + const endpoint = 'https://api.scrapegraphai.com/v1/crawl'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json', + }; + + let schemaPayload = null; + if (schema !== null && schema !== undefined) { + if (schema instanceof ZodType) { + schemaPayload = zodToJsonSchema(schema); + } else if (typeof schema === 'object') { + schemaPayload = schema; + } else { + throw new Error('The schema must be a Zod schema, a plain object, or null'); + } + } + + const { + cacheWebsite = true, + depth = 2, + maxPages = 2, + sameDomainOnly = true, + sitemap = false, + batchSize = 1, + } = options; + + const payload = { + url, + prompt, + schema: schemaPayload, + cache_website: cacheWebsite, + depth, + max_pages: maxPages, + same_domain_only: sameDomainOnly, + sitemap, + batch_size: batchSize, + render_heavy_js: renderHeavyJs, + }; + + if (stealth) { + payload.stealth = stealth; + } + + try { + const response = await axios.post(endpoint, payload, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Get the result of a crawl job by ID. + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} crawlId - The crawl job ID + * @returns {Promise} The crawl result + * @throws {Error} Throws an error if the HTTP request fails + */ +export async function getCrawlRequest(apiKey, crawlId, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for getCrawlRequest'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('GET', `https://api.scrapegraphai.com/v1/crawl/${crawlId}`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = `https://api.scrapegraphai.com/v1/crawl/${crawlId}`; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.get(endpoint, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} diff --git a/scrapegraph-js/src/credits.js b/scrapegraph-js/src/credits.js new file mode 100644 index 0000000..a6c5814 --- /dev/null +++ b/scrapegraph-js/src/credits.js @@ -0,0 +1,37 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; +import { isMockEnabled, getMockConfig } from './utils/mockConfig.js'; +import { getMockResponse, createMockAxiosResponse } from './utils/mockResponse.js'; + +/** + * Retrieve credits from the API. + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @returns {Promise} Response from the API in JSON format + */ +export async function getCredits(apiKey, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for getCredits'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('GET', 'https://api.scrapegraphai.com/v1/credits', mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/credits'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.get(endpoint, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} diff --git a/scrapegraph-js/src/feedback.js b/scrapegraph-js/src/feedback.js new file mode 100644 index 0000000..2ddfe40 --- /dev/null +++ b/scrapegraph-js/src/feedback.js @@ -0,0 +1,49 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; +import { isMockEnabled, getMockConfig } from './utils/mockConfig.js'; +import { getMockResponse } from './utils/mockResponse.js'; + +/** + * Send feedback to the API. + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} requestId - The request ID associated with the feedback + * @param {number} rating - The rating score + * @param {string} feedbackText - Optional feedback message to send + * @param {Object} options - Optional configuration options + * @param {boolean} options.mock - Override mock mode for this request + * @returns {Promise} Response from the API in JSON format + */ +export async function sendFeedback(apiKey, requestId, rating, feedbackText = null, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for sendFeedback request'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('POST', 'https://api.scrapegraphai.com/v1/feedback', mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/feedback'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json', + }; + + const feedbackData = { + request_id: requestId, + rating: rating, + feedback_text: feedbackText, + }; + + try { + const response = await axios.post(endpoint, feedbackData, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} diff --git a/scrapegraph-js/src/healthz.js b/scrapegraph-js/src/healthz.js new file mode 100644 index 0000000..89a4544 --- /dev/null +++ b/scrapegraph-js/src/healthz.js @@ -0,0 +1,56 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; +import { isMockEnabled, getMockConfig } from './utils/mockConfig.js'; +import { getMockResponse } from './utils/mockResponse.js'; + +/** + * Check the health status of the ScrapeGraphAI API service. + * + * This endpoint is useful for monitoring and ensuring the service is operational. + * It returns a JSON response indicating the service's health status. + * + * Use cases: + * - Production monitoring and alerting + * - Health checks in containerized environments (Kubernetes, Docker) + * - Ensuring service availability before making API calls + * - Integration with monitoring tools (Prometheus, Datadog, etc.) + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {Object} options - Optional configuration + * @param {boolean} options.mock - Whether to use mock mode for this request + * @returns {Promise} Health status response with status and message + * + * @example + * import { healthz } from 'scrapegraph-sdk'; + * + * const health = await healthz('your-api-key'); + * console.log(health); + * // { status: 'healthy', message: 'Service is operational' } + */ +export async function healthz(apiKey, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for healthz'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('GET', 'https://api.scrapegraphai.com/v1/healthz', mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/healthz'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.get(endpoint, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + diff --git a/scrapegraph-js/src/markdownify.js b/scrapegraph-js/src/markdownify.js new file mode 100644 index 0000000..6a1cebd --- /dev/null +++ b/scrapegraph-js/src/markdownify.js @@ -0,0 +1,115 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; +import { isMockEnabled, getMockConfig } from './utils/mockConfig.js'; +import { getMockResponse } from './utils/mockResponse.js'; + +/** + * Converts a webpage into clean, well-structured markdown format. + * + * @param {string} apiKey - Your ScrapeGraph AI API key. + * @param {string} url - The URL of the webpage to be converted. + * @param {Object} options - Optional configuration options. + * @param {boolean} options.mock - Override mock mode for this request + * @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection + * @returns {Promise} A promise that resolves to the markdown representation of the webpage. + * @throws {Error} Throws an error if the HTTP request fails. + */ +export async function markdownify(apiKey, url, options = {}) { + const { mock = null, stealth = false } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for markdownify request'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('POST', 'https://api.scrapegraphai.com/v1/markdownify', mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/markdownify'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + const payload = { + website_url: url, + }; + + if (stealth) { + payload.stealth = stealth; + } + + try { + const response = await axios.post(endpoint, payload, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Retrieves the status or result of a markdownify request, with the option to review results from previous requests. + * + * @param {string} apiKey - Your ScrapeGraph AI API key. + * @param {string} requestId - The unique identifier for the markdownify request whose result you want to retrieve. + * @returns {Promise} A promise that resolves to an object containing: + * - status: The current status of the request ('pending', 'completed', 'failed') + * - result: The markdown content when status is 'completed' + * - error: Error message if the request failed (when status is 'failed') + * - created_at: Timestamp of when the request was created + * - completed_at: Timestamp of when the request was completed (if applicable) + * @throws {Error} Throws an error if the HTTP request fails or if the API key is invalid + * + * @example + * // Example usage: + * const apiKey = 'your-api-key'; + * const requestId = 'previously-obtained-request-id'; + * + * try { + * const result = await getMarkdownifyRequest(apiKey, requestId); + * if (result.status === 'completed') { + * console.log('Markdown content:', result.result); + * } else if (result.status === 'pending') { + * console.log('Conversion is still in progress'); + * } else { + * console.log('Conversion failed:', result.error); + * } + * } catch (error) { + * console.error('Error fetching markdown:', error); + * } + * + * @note The markdown content includes: + * - Properly formatted headers + * - Lists and tables + * - Code blocks with language detection + * - Links and images + * - Text formatting (bold, italic, etc.) + */ +export async function getMarkdownifyRequest(apiKey, requestId, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for getMarkdownifyRequest'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('GET', `https://api.scrapegraphai.com/v1/markdownify/${requestId}`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/markdownify/' + requestId; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.get(endpoint, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} diff --git a/scrapegraph-js/src/scheduledJobs.js b/scrapegraph-js/src/scheduledJobs.js new file mode 100644 index 0000000..1a9eceb --- /dev/null +++ b/scrapegraph-js/src/scheduledJobs.js @@ -0,0 +1,398 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; +import { isMockEnabled, getMockConfig } from './utils/mockConfig.js'; +import { getMockResponse } from './utils/mockResponse.js'; + +/** + * Create a new scheduled job + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} jobName - Name of the scheduled job + * @param {string} serviceType - Type of service (smartscraper, searchscraper, crawl, etc.) + * @param {string} cronExpression - Cron expression for scheduling + * @param {Object} jobConfig - Configuration for the job + * @param {boolean} [isActive=true] - Whether the job is active + * @param {Object} [options={}] - Additional options + * @returns {Promise} Created job details + */ +export async function createScheduledJob(apiKey, jobName, serviceType, cronExpression, jobConfig, isActive = true, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for createScheduledJob'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('POST', 'https://api.scrapegraphai.com/v1/scheduled-jobs', mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/scheduled-jobs'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json', + }; + + const payload = { + job_name: jobName, + service_type: serviceType, + cron_expression: cronExpression, + job_config: jobConfig, + is_active: isActive + }; + + try { + const response = await axios.post(endpoint, payload, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Get list of scheduled jobs with pagination + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {Object} [options={}] - Query options + * @param {number} [options.page=1] - Page number + * @param {number} [options.pageSize=20] - Number of jobs per page + * @param {string} [options.serviceType] - Filter by service type + * @param {boolean} [options.isActive] - Filter by active status + * @param {boolean} [options.mock] - Override mock mode + * @returns {Promise} List of scheduled jobs + */ +export async function getScheduledJobs(apiKey, options = {}) { + const { page = 1, pageSize = 20, serviceType, isActive, mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for getScheduledJobs'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('GET', 'https://api.scrapegraphai.com/v1/scheduled-jobs', mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/scheduled-jobs'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + const params = { page, page_size: pageSize }; + if (serviceType) params.service_type = serviceType; + if (isActive !== undefined) params.is_active = isActive; + + try { + const response = await axios.get(endpoint, { headers, params }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Get details of a specific scheduled job + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} jobId - ID of the scheduled job + * @param {Object} [options={}] - Additional options + * @returns {Promise} Job details + */ +export async function getScheduledJob(apiKey, jobId, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for getScheduledJob'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('GET', `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}`; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.get(endpoint, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Update an existing scheduled job (partial update) + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} jobId - ID of the scheduled job + * @param {Object} updateData - Fields to update + * @param {string} [updateData.jobName] - New job name + * @param {string} [updateData.cronExpression] - New cron expression + * @param {Object} [updateData.jobConfig] - New job configuration + * @param {boolean} [updateData.isActive] - New active status + * @param {Object} [options={}] - Additional options + * @returns {Promise} Updated job details + */ +export async function updateScheduledJob(apiKey, jobId, updateData, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for updateScheduledJob'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('PATCH', `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}`; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json', + }; + + // Convert camelCase to snake_case for API + const payload = {}; + if (updateData.jobName !== undefined) payload.job_name = updateData.jobName; + if (updateData.cronExpression !== undefined) payload.cron_expression = updateData.cronExpression; + if (updateData.jobConfig !== undefined) payload.job_config = updateData.jobConfig; + if (updateData.isActive !== undefined) payload.is_active = updateData.isActive; + + try { + const response = await axios.patch(endpoint, payload, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Replace an existing scheduled job (full update) + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} jobId - ID of the scheduled job + * @param {string} jobName - Name of the scheduled job + * @param {string} serviceType - Type of service + * @param {string} cronExpression - Cron expression for scheduling + * @param {Object} jobConfig - Configuration for the job + * @param {boolean} [isActive=true] - Whether the job is active + * @param {Object} [options={}] - Additional options + * @returns {Promise} Updated job details + */ +export async function replaceScheduledJob(apiKey, jobId, jobName, serviceType, cronExpression, jobConfig, isActive = true, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for replaceScheduledJob'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('PUT', `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}`; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json', + }; + + const payload = { + job_name: jobName, + service_type: serviceType, + cron_expression: cronExpression, + job_config: jobConfig, + is_active: isActive + }; + + try { + const response = await axios.put(endpoint, payload, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Delete a scheduled job + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} jobId - ID of the scheduled job + * @param {Object} [options={}] - Additional options + * @returns {Promise} Deletion confirmation + */ +export async function deleteScheduledJob(apiKey, jobId, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for deleteScheduledJob'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('DELETE', `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}`; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.delete(endpoint, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Pause a scheduled job + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} jobId - ID of the scheduled job + * @param {Object} [options={}] - Additional options + * @returns {Promise} Pause confirmation + */ +export async function pauseScheduledJob(apiKey, jobId, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for pauseScheduledJob'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('POST', `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}/pause`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}/pause`; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.post(endpoint, {}, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Resume a paused scheduled job + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} jobId - ID of the scheduled job + * @param {Object} [options={}] - Additional options + * @returns {Promise} Resume confirmation + */ +export async function resumeScheduledJob(apiKey, jobId, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for resumeScheduledJob'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('POST', `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}/resume`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}/resume`; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.post(endpoint, {}, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Manually trigger a scheduled job + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} jobId - ID of the scheduled job + * @param {Object} [options={}] - Additional options + * @returns {Promise} Trigger confirmation with execution ID + */ +export async function triggerScheduledJob(apiKey, jobId, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for triggerScheduledJob'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('POST', `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}/trigger`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}/trigger`; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.post(endpoint, {}, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Get execution history for a scheduled job + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} jobId - ID of the scheduled job + * @param {Object} [options={}] - Query options + * @param {number} [options.page=1] - Page number + * @param {number} [options.pageSize=20] - Number of executions per page + * @param {string} [options.status] - Filter by execution status + * @returns {Promise} Execution history + */ +export async function getJobExecutions(apiKey, jobId, options = {}) { + const { page = 1, pageSize = 20, status, mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for getJobExecutions'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('GET', `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}/executions`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = `https://api.scrapegraphai.com/v1/scheduled-jobs/${jobId}/executions`; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + const params = { page, page_size: pageSize }; + if (status) params.status = status; + + try { + const response = await axios.get(endpoint, { headers, params }); + return response.data; + } catch (error) { + handleError(error); + } +} diff --git a/scrapegraph-js/src/schema.js b/scrapegraph-js/src/schema.js new file mode 100644 index 0000000..d245328 --- /dev/null +++ b/scrapegraph-js/src/schema.js @@ -0,0 +1,185 @@ +/** + * Schema generation functionality for ScrapeGraph JavaScript SDK + */ + +import handleError from './utils/handleError.js'; + +/** + * Generate a JSON schema from a user prompt + * + * @param {string} userPrompt - The user's search query to be refined into a schema + * @param {Object} existingSchema - Optional existing JSON schema to modify/extend + * @param {Object} options - Additional options for the request + * @param {string} options.apiKey - API key for authentication + * @param {string} options.baseUrl - Base URL for the API (optional, defaults to production) + * @returns {Promise} API response containing the generated schema + */ +export async function generateSchema(userPrompt, existingSchema = null, options = {}) { + try { + const { apiKey, baseUrl = 'https://api.scrapegraph.ai' } = options; + + if (!apiKey) { + throw new Error('API key is required. Please provide it in the options or set SGAI_API_KEY environment variable.'); + } + + if (!userPrompt || typeof userPrompt !== 'string' || userPrompt.trim() === '') { + throw new Error('userPrompt is required and must be a non-empty string'); + } + + const payload = { + user_prompt: userPrompt.trim() + }; + + if (existingSchema) { + if (typeof existingSchema !== 'object' || existingSchema === null) { + throw new Error('existingSchema must be a valid object'); + } + payload.existing_schema = existingSchema; + } + + const response = await fetch(`${baseUrl}/v1/generate_schema`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'SGAI-APIKEY': apiKey + }, + body: JSON.stringify(payload) + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error( + `HTTP ${response.status}: ${errorData.error || response.statusText}` + ); + } + + const result = await response.json(); + return result; + + } catch (error) { + return handleError(error, 'generateSchema'); + } +} + +/** + * Get the status of a schema generation request + * + * @param {string} requestId - The request ID returned from generateSchema + * @param {Object} options - Additional options for the request + * @param {string} options.apiKey - API key for authentication + * @param {string} options.baseUrl - Base URL for the API (optional, defaults to production) + * @returns {Promise} Current status and results of the schema generation + */ +export async function getSchemaStatus(requestId, options = {}) { + try { + const { apiKey, baseUrl = 'https://api.scrapegraph.ai' } = options; + + if (!apiKey) { + throw new Error('API key is required. Please provide it in the options or set SGAI_APIKEY environment variable.'); + } + + if (!requestId || typeof requestId !== 'string' || requestId.trim() === '') { + throw new Error('requestId is required and must be a non-empty string'); + } + + // Validate UUID format (basic check) + const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; + if (!uuidRegex.test(requestId.trim())) { + throw new Error('requestId must be a valid UUID format'); + } + + const response = await fetch(`${baseUrl}/v1/generate_schema/${requestId.trim()}`, { + method: 'GET', + headers: { + 'SGAI-APIKEY': apiKey + } + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error( + `HTTP ${response.status}: ${errorData.error || response.statusText}` + ); + } + + const result = await response.json(); + return result; + + } catch (error) { + return handleError(error, 'getSchemaStatus'); + } +} + +/** + * Poll for schema generation completion + * + * @param {string} requestId - The request ID returned from generateSchema + * @param {Object} options - Additional options for polling + * @param {string} options.apiKey - API key for authentication + * @param {string} options.baseUrl - Base URL for the API (optional, defaults to production) + * @param {number} options.maxAttempts - Maximum number of polling attempts (default: 30) + * @param {number} options.delay - Delay between attempts in milliseconds (default: 2000) + * @param {Function} options.onProgress - Callback function called on each status check + * @returns {Promise} Final result when schema generation is complete + */ +export async function pollSchemaGeneration(requestId, options = {}) { + try { + const { + apiKey, + baseUrl = 'https://api.scrapegraph.ai', + maxAttempts = 30, + delay = 2000, + onProgress = null + } = options; + + if (!apiKey) { + throw new Error('API key is required. Please provide it in the options or set SGAI_APIKEY environment variable.'); + } + + if (!requestId || typeof requestId !== 'string' || requestId.trim() === '') { + throw new Error('requestId is required and must be a non-empty string'); + } + + let attempt = 0; + + while (attempt < maxAttempts) { + attempt++; + + if (onProgress) { + onProgress({ attempt, maxAttempts, status: 'checking' }); + } + + const statusResponse = await getSchemaStatus(requestId, { apiKey, baseUrl }); + + if (statusResponse.error) { + throw new Error(`Schema generation failed: ${statusResponse.error}`); + } + + const currentStatus = statusResponse.status; + + if (onProgress) { + onProgress({ attempt, maxAttempts, status: currentStatus, response: statusResponse }); + } + + if (currentStatus === 'completed') { + return statusResponse; + } else if (currentStatus === 'failed') { + throw new Error(`Schema generation failed with status: ${currentStatus}`); + } else if (currentStatus === 'pending' || currentStatus === 'processing') { + if (attempt < maxAttempts) { + await new Promise(resolve => setTimeout(resolve, delay)); + } + } else { + console.warn(`Unknown status: ${currentStatus}`); + if (attempt < maxAttempts) { + await new Promise(resolve => setTimeout(resolve, delay)); + } + } + } + + throw new Error(`Schema generation did not complete within ${maxAttempts} attempts. Last status: ${statusResponse?.status || 'unknown'}`); + + } catch (error) { + return handleError(error, 'pollSchemaGeneration'); + } +} diff --git a/scrapegraph-js/src/scrape.js b/scrapegraph-js/src/scrape.js new file mode 100644 index 0000000..3d9bfca --- /dev/null +++ b/scrapegraph-js/src/scrape.js @@ -0,0 +1,161 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; +import { isMockEnabled, getMockConfig } from './utils/mockConfig.js'; +import { getMockResponse, createMockAxiosResponse } from './utils/mockResponse.js'; + +/** + * Converts a webpage into HTML format with optional JavaScript rendering. + * + * @param {string} apiKey - Your ScrapeGraph AI API key. + * @param {string} url - The URL of the webpage to be converted. + * @param {Object} options - Optional configuration options. + * @param {boolean} options.renderHeavyJs - Whether to render heavy JavaScript (defaults to false). + * @param {Object} options.headers - Optional custom headers to send with the request. + * @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection + * @returns {Promise} A promise that resolves to the HTML content and metadata. + * @throws {Error} Throws an error if the HTTP request fails. + * + * @example + * // Basic usage: + * const apiKey = 'your-api-key'; + * const url = 'https://example.com'; + * + * try { + * const result = await scrape(apiKey, url); + * console.log('HTML content:', result.html); + * console.log('Status:', result.status); + * } catch (error) { + * console.error('Error:', error); + * } + * + * @example + * // With JavaScript rendering: + * const result = await scrape(apiKey, url, { + * renderHeavyJs: true + * }); + * + * @example + * // With custom headers: + * const result = await scrape(apiKey, url, { + * renderHeavyJs: false, + * headers: { + * 'User-Agent': 'Custom Agent', + * 'Cookie': 'session=123' + * } + * }); + */ +export async function scrape(apiKey, url, options = {}) { + const { + renderHeavyJs = false, + headers: customHeaders = {}, + mock = null, + stealth = false + } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for scrape request'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('POST', 'https://api.scrapegraphai.com/v1/scrape', mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/scrape'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json', + ...customHeaders + }; + + const payload = { + website_url: url, + render_heavy_js: renderHeavyJs, + }; + + if (stealth) { + payload.stealth = stealth; + } + + // Only include headers in payload if they are provided + if (Object.keys(customHeaders).length > 0) { + payload.headers = customHeaders; + } + + try { + const response = await axios.post(endpoint, payload, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Retrieves the status or result of a scrape request. + * + * @param {string} apiKey - Your ScrapeGraph AI API key. + * @param {string} requestId - The unique identifier for the scrape request. + * @returns {Promise} A promise that resolves to an object containing: + * - status: The current status of the request ('pending', 'completed', 'failed') + * - html: The HTML content when status is 'completed' + * - scrape_request_id: The request identifier + * - error: Error message if the request failed (when status is 'failed') + * - created_at: Timestamp of when the request was created + * - completed_at: Timestamp of when the request was completed (if applicable) + * @throws {Error} Throws an error if the HTTP request fails or if the API key is invalid + * + * @example + * // Example usage: + * const apiKey = 'your-api-key'; + * const requestId = 'previously-obtained-request-id'; + * + * try { + * const result = await getScrapeRequest(apiKey, requestId); + * if (result.status === 'completed') { + * console.log('HTML content:', result.html); + * console.log('Request ID:', result.scrape_request_id); + * } else if (result.status === 'pending') { + * console.log('HTML conversion is still in progress'); + * } else { + * console.log('HTML conversion failed:', result.error); + * } + * } catch (error) { + * console.error('Error fetching HTML:', error); + * } + * + * @note The HTML content includes: + * - Full HTML structure with DOCTYPE + * - Head section with meta tags, title, and styles + * - Body content with all elements + * - JavaScript code (if renderHeavyJs was enabled) + * - CSS styles and formatting + * - Images, links, and other media elements + */ +export async function getScrapeRequest(apiKey, requestId, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for getScrapeRequest'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('GET', `https://api.scrapegraphai.com/v1/scrape/${requestId}`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/scrape/' + requestId; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.get(endpoint, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} diff --git a/scrapegraph-js/src/searchScraper.js b/scrapegraph-js/src/searchScraper.js new file mode 100644 index 0000000..a27472b --- /dev/null +++ b/scrapegraph-js/src/searchScraper.js @@ -0,0 +1,145 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; +import { ZodType } from 'zod'; +import { zodToJsonSchema } from 'zod-to-json-schema'; +import { isMockEnabled, getMockConfig } from './utils/mockConfig.js'; +import { getMockResponse } from './utils/mockResponse.js'; + +/** + * Search and extract information from multiple web sources using AI. + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} prompt - Natural language prompt describing what data to extract + * @param {number} [numResults=3] - Number of websites to scrape (3-20). Default is 3. + * More websites provide better research depth but cost more credits. + * Credit calculation: 30 base + 10 per additional website beyond 3. + * @param {Object} [schema] - Optional schema object defining the output structure + * @param {String} userAgent - the user agent like "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + * @param {Object} options - Optional configuration options + * @param {boolean} options.mock - Override mock mode for this request + * @param {boolean} options.renderHeavyJs - Whether to render heavy JavaScript on the page + * @param {boolean} [options.extractionMode=true] - Whether to use AI extraction (true) or markdown conversion (false). + * AI extraction costs 10 credits per page, markdown conversion costs 2 credits per page. + * @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection + * @returns {Promise} Extracted data in JSON format matching the provided schema + * @throws - Will throw an error in case of an HTTP failure. + */ +export async function searchScraper(apiKey, prompt, numResults = 3, schema = null, userAgent = null, options = {}) { + const { mock = null, renderHeavyJs = false, extractionMode = true, stealth = false } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for searchScraper request'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('POST', 'https://api.scrapegraphai.com/v1/searchscraper', mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + const endpoint = 'https://api.scrapegraphai.com/v1/searchscraper'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json', + }; + + if (userAgent) headers['User-Agent'] = userAgent; + + // Validate numResults + if (numResults < 3 || numResults > 20) { + throw new Error('numResults must be between 3 and 20'); + } + + const payload = { + user_prompt: prompt, + num_results: numResults, + render_heavy_js: renderHeavyJs, + extraction_mode: extractionMode, + }; + + if (stealth) { + payload.stealth = stealth; + } + + if (schema) { + if (schema instanceof ZodType) { + payload.output_schema = zodToJsonSchema(schema); + } else { + throw new Error('The schema must be an instance of a valid Zod schema'); + } + } + + try { + const response = await axios.post(endpoint, payload, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Retrieve the status or result of a searchScraper request. This function allows you to check the progress + * or retrieve results of both ongoing and completed search and extraction operations. + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} requestId - The request ID associated with the output of a searchScraper request. + * @returns {Promise} A promise that resolves to an object containing: + * - status: The current status of the request ('pending', 'completed', 'failed') + * - result: The extracted data in JSON format when status is 'completed', including: + * - extracted_data: The structured data extracted from search results + * - source_urls: Array of URLs that were used as sources + * - search_metadata: Information about the search operation + * - error: Error message if the request failed (when status is 'failed') + * - created_at: Timestamp of when the request was created + * - completed_at: Timestamp of when the request was completed (if applicable) + * @throws {Error} Throws an error if the HTTP request fails or if the API key is invalid + * + * @example + * // Example usage: + * const apiKey = 'your-api-key'; + * const requestId = 'previously-obtained-request-id'; + * + * try { + * const result = await getSearchScraperRequest(apiKey, requestId); + * if (result.status === 'completed') { + * console.log('Extracted data:', result.result.extracted_data); + * console.log('Sources:', result.result.source_urls); + * } else if (result.status === 'pending') { + * console.log('Search and extraction still in progress'); + * } else { + * console.log('Operation failed:', result.error); + * } + * } catch (error) { + * console.error('Error fetching search results:', error); + * } + * + * @note The search operation typically processes multiple web pages to gather comprehensive + * information based on the original search query. The results are structured according to + * the schema provided in the original searchScraper call, if any. + */ +export async function getSearchScraperRequest(apiKey, requestId, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for getSearchScraperRequest'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('GET', `https://api.scrapegraphai.com/v1/searchscraper/${requestId}`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/searchscraper/' + requestId; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.get(endpoint, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} diff --git a/scrapegraph-js/src/sitemap.js b/scrapegraph-js/src/sitemap.js new file mode 100644 index 0000000..41afb93 --- /dev/null +++ b/scrapegraph-js/src/sitemap.js @@ -0,0 +1,68 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; +import { isMockEnabled, getMockConfig } from './utils/mockConfig.js'; +import { getMockResponse } from './utils/mockResponse.js'; + +/** + * Extract all URLs from a website's sitemap. + * Automatically discovers sitemap from robots.txt or common sitemap locations. + * + * @param {string} apiKey - Your ScrapeGraph AI API key. + * @param {string} websiteUrl - The URL of the website to extract sitemap from. + * @param {Object} options - Optional configuration options. + * @param {boolean} options.mock - Override mock mode for this request. + * @returns {Promise} A promise that resolves to an object containing: + * - urls: Array of URLs extracted from the sitemap + * @throws {Error} Throws an error if the HTTP request fails. + * + * @example + * // Basic usage: + * const apiKey = 'your-api-key'; + * const websiteUrl = 'https://example.com'; + * + * try { + * const result = await sitemap(apiKey, websiteUrl); + * console.log('Sitemap URLs:', result.urls); + * console.log('Total URLs found:', result.urls.length); + * } catch (error) { + * console.error('Error:', error); + * } + * + * @example + * // Processing sitemap URLs: + * const result = await sitemap(apiKey, 'https://example.com'); + * result.urls.forEach(url => { + * console.log('Found URL:', url); + * }); + */ +export async function sitemap(apiKey, websiteUrl, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for sitemap request'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('POST', 'https://api.scrapegraphai.com/v1/sitemap', mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/sitemap'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json', + }; + + const payload = { + website_url: websiteUrl, + }; + + try { + const response = await axios.post(endpoint, payload, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} diff --git a/scrapegraph-js/src/smartScraper.js b/scrapegraph-js/src/smartScraper.js new file mode 100644 index 0000000..6c79062 --- /dev/null +++ b/scrapegraph-js/src/smartScraper.js @@ -0,0 +1,190 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; +import { ZodType } from 'zod'; +import { zodToJsonSchema } from 'zod-to-json-schema'; +import { isMockEnabled, getMockConfig } from './utils/mockConfig.js'; +import { getMockResponse, createMockAxiosResponse } from './utils/mockResponse.js'; + +/** + * Scrape and extract structured data from a webpage using ScrapeGraph AI. + * + * Supports three types of input (must provide exactly one): + * - url: Scrape from a URL + * - websiteHtml: Process local HTML content + * - websiteMarkdown: Process local Markdown content + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} url - The URL of the webpage to scrape (can be null if using websiteHtml or websiteMarkdown) + * @param {string} prompt - Natural language prompt describing what data to extract + * @param {Object} [schema] - Optional schema object defining the output structure + * @param {number} [numberOfScrolls] - Optional number of times to scroll the page (0-100). If not provided, no scrolling will be performed. + * @param {number} [totalPages] - Optional number of pages to scrape (1-10). If not provided, only the first page will be scraped. + * @param {Object} [cookies] - Optional cookies object for authentication and session management + * @param {Object} [options] - Optional configuration object + * @param {boolean} [plain_text] - Optional flag to return plain text instead of structured data + * @param {boolean} [renderHeavyJs] - Optional flag to enable heavy JavaScript rendering on the page + * @param {boolean} [stealth] - Optional flag to enable stealth mode to avoid bot detection + * @param {string} [websiteHtml] - Optional raw HTML content to process (max 2MB, mutually exclusive with url and websiteMarkdown) + * @param {string} [websiteMarkdown] - Optional Markdown content to process (max 2MB, mutually exclusive with url and websiteHtml) + * @returns {Promise} Extracted data in JSON format matching the provided schema + * @throws - Will throw an error in case of an HTTP failure or validation error. + */ +export async function smartScraper(apiKey, url, prompt, schema = null, numberOfScrolls = null, totalPages = null, cookies = null, options = {}, plain_text = false, renderHeavyJs = false, stealth = false, websiteHtml = null, websiteMarkdown = null) { + const { mock = null } = options; + + // Validate that exactly one of url, websiteHtml, or websiteMarkdown is provided + const inputsProvided = [url, websiteHtml, websiteMarkdown].filter(input => input !== null && input !== undefined).length; + + if (inputsProvided === 0) { + throw new Error('Exactly one of url, websiteHtml, or websiteMarkdown must be provided'); + } + + if (inputsProvided > 1) { + throw new Error('Only one of url, websiteHtml, or websiteMarkdown can be provided'); + } + + // Validate content size for HTML and Markdown (max 2MB) + const MAX_SIZE = 2 * 1024 * 1024; // 2MB in bytes + + if (websiteHtml && Buffer.byteLength(websiteHtml, 'utf8') > MAX_SIZE) { + throw new Error('websiteHtml content exceeds maximum size of 2MB'); + } + + if (websiteMarkdown && Buffer.byteLength(websiteMarkdown, 'utf8') > MAX_SIZE) { + throw new Error('websiteMarkdown content exceeds maximum size of 2MB'); + } + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for smartScraper request'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('POST', 'https://api.scrapegraphai.com/v1/smartscraper', mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/smartscraper'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json', + }; + + const payload = { + user_prompt: prompt, + plain_text: plain_text, + }; + + // Add the appropriate input source to the payload + if (url) { + payload.website_url = url; + } else if (websiteHtml) { + payload.website_html = websiteHtml; + } else if (websiteMarkdown) { + payload.website_markdown = websiteMarkdown; + } + + if (renderHeavyJs) { + payload.render_heavy_js = renderHeavyJs; + } + + if (cookies) { + if (typeof cookies === 'object' && cookies !== null) { + payload.cookies = cookies; + } else { + throw new Error('Cookies must be an object with key-value pairs'); + } + } + + if (schema) { + if (schema instanceof ZodType) { + payload.output_schema = zodToJsonSchema(schema); + } else { + throw new Error('The schema must be an instance of a valid Zod schema'); + } + } + + if (numberOfScrolls !== null) { + if (!Number.isInteger(numberOfScrolls) || numberOfScrolls < 0 || numberOfScrolls > 100) { + throw new Error('numberOfScrolls must be an integer between 0 and 100'); + } + payload.number_of_scrolls = numberOfScrolls; + } + + if (totalPages !== null) { + if (!Number.isInteger(totalPages) || totalPages < 1 || totalPages > 10) { + throw new Error('totalPages must be an integer between 1 and 10'); + } + payload.total_pages = totalPages; + } + + if (stealth) { + payload.stealth = stealth; + } + + try { + const response = await axios.post(endpoint, payload, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} + +/** + * Retrieve the status or the result of a smartScraper request. It also allows you to see the result of old requests. + * + * @param {string} apiKey - Your ScrapeGraph AI API key + * @param {string} requestId - The request ID associated with the output of a smartScraper request. + * @returns {Promise} A promise that resolves to an object containing: + * - status: The current status of the request ('pending', 'completed', 'failed') + * - result: The extracted data in JSON format (when status is 'completed') + * - error: Error message if the request failed (when status is 'failed') + * - created_at: Timestamp of when the request was created + * - completed_at: Timestamp of when the request was completed (if applicable) + * @throws {Error} Throws an error if the HTTP request fails or if the API key is invalid + * + * @example + * // Example usage: + * const apiKey = 'your-api-key'; + * const requestId = 'previously-obtained-request-id'; + * + * try { + * const result = await getSmartScraperRequest(apiKey, requestId); + * if (result.status === 'completed') { + * console.log('Extracted data:', result.result); + * } else if (result.status === 'pending') { + * console.log('Request is still processing'); + * } else { + * console.log('Request failed:', result.error); + * } + * } catch (error) { + * console.error('Error fetching request:', error); + * } + */ +export async function getSmartScraperRequest(apiKey, requestId, options = {}) { + const { mock = null } = options; + + // Check if mock mode is enabled + const useMock = mock !== null ? mock : isMockEnabled(); + + if (useMock) { + console.log('๐Ÿงช Mock mode active. Returning stub for getSmartScraperRequest'); + const mockConfig = getMockConfig(); + const mockData = getMockResponse('GET', `https://api.scrapegraphai.com/v1/smartscraper/${requestId}`, mockConfig.customResponses, mockConfig.customHandler); + return mockData; + } + + const endpoint = 'https://api.scrapegraphai.com/v1/smartscraper/' + requestId; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.get(endpoint, { headers }); + return response.data; + } catch (error) { + handleError(error); + } +} diff --git a/scrapegraph-js/src/utils/handleError.js b/scrapegraph-js/src/utils/handleError.js new file mode 100644 index 0000000..3d392bb --- /dev/null +++ b/scrapegraph-js/src/utils/handleError.js @@ -0,0 +1,44 @@ +class HttpError extends Error { + constructor(statusCode, title, data) { + super(HttpError.makeMessage(statusCode, title, data)); + this.statusCode = statusCode; + this.title = title; + this.info = data; + } + + static makeMessage(statusCode, title, data) { + let message = ''; + + message += statusCode ? `${statusCode} - ` : '(unknown status code) - '; + message += title ? `${title} - ` : '(unknown error message) - '; + message += data.detail + ? 'Error located in: ' + `${JSON.stringify(data.detail[0].loc)}` + ', ' + `${data.detail[0].msg}` + : data.error + ? `${data.error}` + : '(unknown error detail)'; + + return message; + } +} + +class NetworkError extends Error { + constructor(message) { + super(message); + } +} + +class UnexpectedError extends Error { + constructor(message) { + super(message); + } +} + +export default function handleError(error) { + if (error.response) { + throw new HttpError(error.response.status, error.response.statusText, error.response.data); + } else if (error.request) { + throw new NetworkError('Impossible to contact the server. Check your internet connection.'); + } else { + throw new UnexpectedError(`${error.message}`); + } +} diff --git a/scrapegraph-js/src/utils/mockConfig.js b/scrapegraph-js/src/utils/mockConfig.js new file mode 100644 index 0000000..3f10f77 --- /dev/null +++ b/scrapegraph-js/src/utils/mockConfig.js @@ -0,0 +1,100 @@ +/** + * Mock configuration utility for ScrapeGraph AI SDK + * Manages global mock settings and configuration + */ + +// Global mock configuration +let mockConfig = { + enabled: false, + customResponses: {}, + customHandler: null +}; + +/** + * Check if mock mode is enabled via environment variable + * @returns {boolean} True if mock mode should be enabled + */ +function isMockEnabledFromEnv() { + if (typeof process !== 'undefined' && process.env) { + const mockEnv = process.env.SGAI_MOCK; + if (mockEnv) { + return ['1', 'true', 'True', 'TRUE', 'yes', 'YES', 'on', 'ON'].includes(mockEnv.trim()); + } + } + return false; +} + +/** + * Initialize mock configuration + * @param {Object} options - Mock configuration options + * @param {boolean} options.enabled - Whether mock mode is enabled + * @param {Object} options.customResponses - Custom response overrides + * @param {Function} options.customHandler - Custom handler function + */ +export function initMockConfig(options = {}) { + const { + enabled = isMockEnabledFromEnv(), + customResponses = {}, + customHandler = null + } = options; + + mockConfig = { + enabled: Boolean(enabled), + customResponses: { ...customResponses }, + customHandler: customHandler + }; + + if (mockConfig.enabled) { + console.log('๐Ÿงช ScrapeGraph AI SDK: Mock mode enabled'); + } +} + +/** + * Get current mock configuration + * @returns {Object} Current mock configuration + */ +export function getMockConfig() { + return { ...mockConfig }; +} + +/** + * Check if mock mode is currently enabled + * @returns {boolean} True if mock mode is enabled + */ +export function isMockEnabled() { + return mockConfig.enabled; +} + +/** + * Set custom responses for specific endpoints + * @param {Object} responses - Map of endpoint paths to responses + */ +export function setMockResponses(responses) { + mockConfig.customResponses = { ...mockConfig.customResponses, ...responses }; +} + +/** + * Set a custom mock handler function + * @param {Function} handler - Custom handler function + */ +export function setMockHandler(handler) { + mockConfig.customHandler = handler; +} + +/** + * Disable mock mode + */ +export function disableMock() { + mockConfig.enabled = false; +} + +/** + * Enable mock mode + */ +export function enableMock() { + mockConfig.enabled = true; + console.log('๐Ÿงช ScrapeGraph AI SDK: Mock mode enabled'); +} + +// Initialize with environment check +initMockConfig(); diff --git a/scrapegraph-js/src/utils/mockResponse.js b/scrapegraph-js/src/utils/mockResponse.js new file mode 100644 index 0000000..d264376 --- /dev/null +++ b/scrapegraph-js/src/utils/mockResponse.js @@ -0,0 +1,271 @@ +/** + * Mock response utility for ScrapeGraph AI SDK + * Provides deterministic mock responses when mock mode is enabled + */ + +/** + * Generate a mock UUID with a prefix + * @param {string} prefix - Prefix for the mock ID + * @returns {string} Mock UUID + */ +function generateMockId(prefix = 'mock') { + const timestamp = Date.now().toString(36); + const random = Math.random().toString(36).substring(2, 8); + return `${prefix}-${timestamp}-${random}`; +} + +/** + * Get mock response based on endpoint and method + * @param {string} method - HTTP method (GET, POST, etc.) + * @param {string} url - Full URL + * @param {Object} customResponses - Custom response overrides + * @param {Function} customHandler - Custom handler function + * @returns {Object} Mock response data + */ +export function getMockResponse(method, url, customResponses = {}, customHandler = null) { + // Custom handler takes precedence + if (customHandler && typeof customHandler === 'function') { + try { + return customHandler(method, url); + } catch (error) { + console.warn('Custom mock handler failed, falling back to defaults:', error.message); + } + } + + // Parse URL to get path + const urlObj = new URL(url); + const path = urlObj.pathname; + + // Check for custom response override + if (customResponses[path]) { + const override = customResponses[path]; + return typeof override === 'function' ? override() : override; + } + + const upperMethod = method.toUpperCase(); + + // Credits endpoint + if (path.endsWith('/credits') && upperMethod === 'GET') { + return { + remaining_credits: 1000, + total_credits_used: 0 + }; + } + + // Health check endpoint + if (path.endsWith('/healthz') && upperMethod === 'GET') { + return { + status: 'healthy', + message: 'Service is operational' + }; + } + + // Feedback endpoint + if (path.endsWith('/feedback') && upperMethod === 'POST') { + return { + status: 'success' + }; + } + + // Create-like endpoints (POST) + if (upperMethod === 'POST') { + if (path.endsWith('/crawl')) { + return { + crawl_id: generateMockId('mock-crawl') + }; + } + if (path.endsWith('/scheduled-jobs')) { + return { + id: generateMockId('mock-job'), + user_id: generateMockId('mock-user'), + job_name: 'Mock Scheduled Job', + service_type: 'smartscraper', + cron_expression: '0 9 * * 1', + job_config: { mock: 'config' }, + is_active: true, + created_at: '2024-01-01T00:00:00Z', + updated_at: '2024-01-01T00:00:00Z', + next_run_at: '2024-01-08T09:00:00Z' + }; + } + if (path.includes('/pause')) { + return { + message: 'Job paused successfully', + job_id: generateMockId('mock-job'), + is_active: false + }; + } + if (path.includes('/resume')) { + return { + message: 'Job resumed successfully', + job_id: generateMockId('mock-job'), + is_active: true, + next_run_at: '2024-01-08T09:00:00Z' + }; + } + if (path.includes('/trigger')) { + const taskId = generateMockId('mock-task'); + return { + execution_id: taskId, + scheduled_job_id: generateMockId('mock-job'), + triggered_at: '2024-01-01T00:00:00Z', + message: `Job triggered successfully. Task ID: ${taskId}` + }; + } + // All other POST endpoints return a request id + return { + request_id: generateMockId('mock-req') + }; + } + + // Status-like endpoints (GET) + if (upperMethod === 'GET') { + if (path.includes('markdownify')) { + return { + status: 'completed', + content: '# Mock markdown\n\nThis is a mock markdown response...' + }; + } + if (path.includes('smartscraper')) { + return { + status: 'completed', + result: [{ field: 'value', title: 'Mock Title' }] + }; + } + if (path.includes('searchscraper')) { + return { + status: 'completed', + results: [{ url: 'https://example.com', title: 'Mock Result' }] + }; + } + if (path.includes('crawl')) { + return { + status: 'completed', + pages: [] + }; + } + if (path.includes('agentic-scrapper')) { + return { + status: 'completed', + actions: [] + }; + } + if (path.includes('scrape')) { + return { + status: 'completed', + html: 'Mock HTML

Mock Content

' + }; + } + if (path.includes('scheduled-jobs')) { + if (path.includes('/executions')) { + return { + executions: [ + { + id: generateMockId('mock-exec'), + scheduled_job_id: generateMockId('mock-job'), + execution_id: generateMockId('mock-task'), + status: 'completed', + started_at: '2024-01-01T00:00:00Z', + completed_at: '2024-01-01T00:01:00Z', + result: { mock: 'result' }, + credits_used: 10 + } + ], + total: 1, + page: 1, + page_size: 20 + }; + } else if (path.endsWith('/scheduled-jobs')) { + // List jobs endpoint + return { + jobs: [ + { + id: generateMockId('mock-job'), + user_id: generateMockId('mock-user'), + job_name: 'Mock Scheduled Job', + service_type: 'smartscraper', + cron_expression: '0 9 * * 1', + job_config: { mock: 'config' }, + is_active: true, + created_at: '2024-01-01T00:00:00Z', + updated_at: '2024-01-01T00:00:00Z', + next_run_at: '2024-01-08T09:00:00Z' + } + ], + total: 1, + page: 1, + page_size: 20 + }; + } else { + // Single job endpoint + return { + id: generateMockId('mock-job'), + user_id: generateMockId('mock-user'), + job_name: 'Mock Scheduled Job', + service_type: 'smartscraper', + cron_expression: '0 9 * * 1', + job_config: { mock: 'config' }, + is_active: true, + created_at: '2024-01-01T00:00:00Z', + updated_at: '2024-01-01T00:00:00Z', + next_run_at: '2024-01-08T09:00:00Z' + }; + } + } + } + + // Update operations (PATCH/PUT) + if (upperMethod === 'PATCH' || upperMethod === 'PUT') { + if (path.includes('scheduled-jobs')) { + return { + id: generateMockId('mock-job'), + user_id: generateMockId('mock-user'), + job_name: 'Updated Mock Scheduled Job', + service_type: 'smartscraper', + cron_expression: '0 10 * * 1', + job_config: { mock: 'updated_config' }, + is_active: true, + created_at: '2024-01-01T00:00:00Z', + updated_at: '2024-01-01T01:00:00Z', + next_run_at: '2024-01-08T10:00:00Z' + }; + } + } + + // Delete operations + if (upperMethod === 'DELETE') { + if (path.includes('scheduled-jobs')) { + return { + message: 'Scheduled job deleted successfully' + }; + } + } + + // Generic fallback + return { + status: 'mock', + url: url, + method: method, + message: 'Mock response generated' + }; +} + +/** + * Create a mock axios response object + * @param {Object} data - Response data + * @returns {Object} Mock axios response + */ +export function createMockAxiosResponse(data) { + return { + data, + status: 200, + statusText: 'OK', + headers: { + 'content-type': 'application/json' + }, + config: { + url: 'mock-url', + method: 'mock' + } + }; +} diff --git a/scrapegraph-js/test/agenticScraper_test.js b/scrapegraph-js/test/agenticScraper_test.js new file mode 100644 index 0000000..b23f658 --- /dev/null +++ b/scrapegraph-js/test/agenticScraper_test.js @@ -0,0 +1,506 @@ +import { agenticScraper, getAgenticScraperRequest } from '../index.js'; +import 'dotenv/config'; + +/** + * Test suite for AgenticScraper functionality + * This file demonstrates usage and validates the agentic scraper parameters + */ + +// Mock API key for testing (replace with real key for actual testing) +const API_KEY = process.env.SGAI_APIKEY || 'test-api-key'; + +/** + * Test input validation for agenticScraper + */ +function testInputValidation() { + console.log('๐Ÿงช Testing Input Validation'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'Valid inputs', + apiKey: 'valid-key', + url: 'https://example.com', + steps: ['click button', 'type text'], + useSession: true, + expected: true, + description: 'All valid parameters' + }, + { + name: 'Invalid URL - no protocol', + apiKey: 'valid-key', + url: 'example.com', + steps: ['click button'], + useSession: true, + expected: false, + description: 'URL without http/https protocol' + }, + { + name: 'Empty API key', + apiKey: '', + url: 'https://example.com', + steps: ['click button'], + useSession: true, + expected: false, + description: 'Empty API key string' + }, + { + name: 'Empty steps array', + apiKey: 'valid-key', + url: 'https://example.com', + steps: [], + useSession: true, + expected: false, + description: 'Empty steps array' + }, + { + name: 'Steps with empty string', + apiKey: 'valid-key', + url: 'https://example.com', + steps: ['click button', '', 'type text'], + useSession: true, + expected: false, + description: 'Steps array containing empty string' + }, + { + name: 'Non-boolean useSession', + apiKey: 'valid-key', + url: 'https://example.com', + steps: ['click button'], + useSession: 'true', + expected: false, + description: 'useSession as string instead of boolean' + }, + { + name: 'Default useSession', + apiKey: 'valid-key', + url: 'https://example.com', + steps: ['click button'], + useSession: undefined, + expected: true, + description: 'useSession parameter omitted (should default to true)' + } + ]; + + let passed = 0; + let failed = 0; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing: ${testCase.name}`); + console.log(` ${testCase.description}`); + + try { + // Simulate the validation logic from agenticScraper + const { apiKey, url, steps, useSession } = testCase; + + // API Key validation + if (!apiKey || typeof apiKey !== 'string') { + throw new Error('API key must be a non-empty string'); + } + + // URL validation + if (!url || typeof url !== 'string') { + throw new Error('URL must be a non-empty string'); + } + if (!url.startsWith('http://') && !url.startsWith('https://')) { + throw new Error('URL must start with http:// or https://'); + } + + // Steps validation + if (!Array.isArray(steps) || steps.length === 0) { + throw new Error('Steps must be a non-empty array'); + } + if (steps.some(step => !step || typeof step !== 'string' || !step.trim())) { + throw new Error('All steps must be non-empty strings'); + } + + // useSession validation (only if provided) + if (useSession !== undefined && typeof useSession !== 'boolean') { + throw new Error('useSession must be a boolean value'); + } + + if (testCase.expected) { + console.log(' โœ… PASS - Validation passed as expected'); + passed++; + } else { + console.log(' โŒ FAIL - Expected validation to fail, but it passed'); + failed++; + } + } catch (error) { + if (!testCase.expected) { + console.log(` โœ… PASS - Validation failed as expected: ${error.message}`); + passed++; + } else { + console.log(` โŒ FAIL - Unexpected validation failure: ${error.message}`); + failed++; + } + } + }); + + console.log(`\n๐Ÿ“Š Validation Results: ${passed} passed, ${failed} failed`); + return { passed, failed }; +} + +/** + * Test function signatures and parameter handling + */ +function testFunctionSignatures() { + console.log('\n๐Ÿงช Testing Function Signatures'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'agenticScraper with all parameters', + func: 'agenticScraper', + args: [API_KEY, 'https://example.com', ['click button'], true], + description: 'apiKey, url, steps, useSession' + }, + { + name: 'agenticScraper with default useSession', + func: 'agenticScraper', + args: [API_KEY, 'https://example.com', ['click button']], + description: 'apiKey, url, steps (useSession defaults to true)' + }, + { + name: 'getAgenticScraperRequest', + func: 'getAgenticScraperRequest', + args: [API_KEY, 'test-request-id'], + description: 'apiKey, requestId' + } + ]; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing: ${testCase.name}`); + console.log(` Parameters: ${testCase.description}`); + + try { + // Simulate function signature validation + if (testCase.func === 'agenticScraper') { + const [apiKey, url, steps, useSession] = testCase.args; + if (typeof apiKey !== 'string' || typeof url !== 'string' || !Array.isArray(steps)) { + throw new Error('Invalid parameter types'); + } + } else if (testCase.func === 'getAgenticScraperRequest') { + const [apiKey, requestId] = testCase.args; + if (typeof apiKey !== 'string' || typeof requestId !== 'string') { + throw new Error('Invalid parameter types'); + } + } + + console.log(' โœ… PASS - Function signature valid'); + } catch (error) { + console.log(` โŒ FAIL - Function signature error: ${error.message}`); + } + }); +} + +/** + * Test step parsing and validation + */ +function testStepValidation() { + console.log('\n๐Ÿงช Testing Step Validation'); + console.log('='.repeat(50)); + + const validSteps = [ + 'click on login button', + 'type "username" in email field', + 'press Enter key', + 'wait for 2 seconds', + 'scroll down', + 'click on first result' + ]; + + const invalidSteps = [ + '', // Empty string + ' ', // Only whitespace + null, // Null value + 123, // Number instead of string + {}, // Object instead of string + ]; + + console.log('\n1. Testing valid steps:'); + validSteps.forEach((step, index) => { + console.log(` ${index + 1}. "${step}" โœ… Valid`); + }); + + console.log('\n2. Testing invalid steps:'); + invalidSteps.forEach((step, index) => { + const stepStr = step === null ? 'null' : + typeof step === 'object' ? 'object' : + `"${step}"`; + console.log(` ${index + 1}. ${stepStr} โŒ Invalid`); + }); + + console.log('\n3. Testing step combinations:'); + + const testCombinations = [ + { + name: 'All valid steps', + steps: validSteps, + expected: true + }, + { + name: 'Mixed valid and invalid', + steps: ['click button', '', 'type text'], + expected: false + }, + { + name: 'Single valid step', + steps: ['click button'], + expected: true + } + ]; + + testCombinations.forEach((test, index) => { + const isValid = test.steps.every(step => + step && typeof step === 'string' && step.trim() + ); + const result = isValid === test.expected ? 'โœ… PASS' : 'โŒ FAIL'; + console.log(` ${index + 1}. ${test.name}: ${result}`); + }); +} + +/** + * Test payload construction + */ +function testPayloadConstruction() { + console.log('\n๐Ÿงช Testing Payload Construction'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'Basic payload', + url: 'https://example.com', + steps: ['click button', 'type text'], + useSession: true, + expected: { + url: 'https://example.com', + use_session: true, + steps: ['click button', 'type text'] + } + }, + { + name: 'Payload with useSession false', + url: 'https://test.com', + steps: ['fill form'], + useSession: false, + expected: { + url: 'https://test.com', + use_session: false, + steps: ['fill form'] + } + }, + { + name: 'Payload with default useSession', + url: 'https://default.com', + steps: ['navigate'], + useSession: undefined, + expected: { + url: 'https://default.com', + use_session: true, // Should default to true + steps: ['navigate'] + } + } + ]; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing: ${testCase.name}`); + + // Simulate payload construction + const payload = { + url: testCase.url, + use_session: testCase.useSession !== undefined ? testCase.useSession : true, + steps: testCase.steps + }; + + console.log(' ๐Ÿ“ฆ Constructed payload:'); + console.log(' ', JSON.stringify(payload, null, 2)); + + // Validate against expected + const matches = JSON.stringify(payload) === JSON.stringify(testCase.expected); + console.log(` ${matches ? 'โœ… PASS' : 'โŒ FAIL'} - Payload matches expected`); + }); +} + +/** + * Test common use case patterns + */ +function testUseCasePatterns() { + console.log('\n๐Ÿงช Testing Use Case Patterns'); + console.log('='.repeat(50)); + + const useCases = [ + { + name: 'Login Flow', + steps: [ + 'click on email input', + 'type "user@example.com" in email field', + 'click on password input', + 'type "password123" in password field', + 'click login button', + 'wait for dashboard to load' + ], + useSession: true, + description: 'Typical login automation' + }, + { + name: 'Search and Filter', + steps: [ + 'click on search bar', + 'type "laptop" in search input', + 'press Enter key', + 'wait for results to load', + 'click on price filter', + 'select $500-$1000 range', + 'click apply filters' + ], + useSession: false, + description: 'E-commerce search workflow' + }, + { + name: 'Form Submission', + steps: [ + 'click on name input', + 'type "John Doe" in name field', + 'click on email input', + 'type "john@example.com" in email field', + 'click on message textarea', + 'type "Test message" in message field', + 'click submit button' + ], + useSession: false, + description: 'Contact form automation' + } + ]; + + useCases.forEach((useCase, index) => { + console.log(`\n${index + 1}. ${useCase.name}`); + console.log(` Description: ${useCase.description}`); + console.log(` Steps: ${useCase.steps.length} automation actions`); + console.log(` Use Session: ${useCase.useSession}`); + console.log(' โœ… PASS - Valid use case pattern'); + }); +} + +/** + * Test error scenarios + */ +function testErrorScenarios() { + console.log('\n๐Ÿงช Testing Error Scenarios'); + console.log('='.repeat(50)); + + const errorScenarios = [ + { + name: 'Missing API Key', + test: () => { + // Simulate missing API key + throw new Error('API key must be a non-empty string'); + }, + expectedError: 'API key must be a non-empty string' + }, + { + name: 'Invalid URL Format', + test: () => { + // Simulate invalid URL + throw new Error('URL must start with http:// or https://'); + }, + expectedError: 'URL must start with' + }, + { + name: 'Empty Steps Array', + test: () => { + // Simulate empty steps + throw new Error('Steps must be a non-empty array'); + }, + expectedError: 'non-empty array' + } + ]; + + errorScenarios.forEach((scenario, index) => { + console.log(`\n${index + 1}. Testing: ${scenario.name}`); + + try { + scenario.test(); + console.log(' โŒ FAIL - Expected error but none was thrown'); + } catch (error) { + if (error.message.includes(scenario.expectedError)) { + console.log(` โœ… PASS - Correctly caught expected error: ${error.message}`); + } else { + console.log(` โš ๏ธ PARTIAL - Caught error but message differs: ${error.message}`); + } + } + }); +} + +/** + * Main test runner + */ +function runTests() { + console.log('๐Ÿš€ ScrapeGraph JS SDK - AgenticScraper Tests'); + console.log('='.repeat(60)); + + if (!process.env.SGAI_APIKEY) { + console.log('โš ๏ธ Note: SGAI_APIKEY not set - using mock key for validation tests'); + } + + console.log('\n๐ŸŽฏ Testing AgenticScraper functionality...'); + + const results = { + validation: testInputValidation(), + signatures: testFunctionSignatures(), + steps: testStepValidation(), + payload: testPayloadConstruction(), + useCases: testUseCasePatterns(), + errors: testErrorScenarios(), + }; + + console.log('\n' + '='.repeat(60)); + console.log('๐Ÿ“Š Test Summary'); + console.log('='.repeat(60)); + console.log('โœ… Input Validation Tests: Completed'); + console.log('โœ… Function Signature Tests: Completed'); + console.log('โœ… Step Validation Tests: Completed'); + console.log('โœ… Payload Construction Tests: Completed'); + console.log('โœ… Use Case Pattern Tests: Completed'); + console.log('โœ… Error Scenario Tests: Completed'); + + const totalPassed = results.validation.passed; + const totalFailed = results.validation.failed; + + console.log(`\n๐Ÿ“Š Overall Results: ${totalPassed} passed, ${totalFailed} failed`); + + if (totalFailed === 0) { + console.log('๐ŸŽ‰ All tests passed!'); + } else { + console.log('โš ๏ธ Some tests failed - please review the results above'); + } + + console.log('\n๐Ÿ’ก Usage Examples:'); + console.log('// Basic login automation'); + console.log('await agenticScraper(apiKey, url, ["click login", "type email"], true);'); + console.log(''); + console.log('// Form submission without session'); + console.log('await agenticScraper(apiKey, url, ["fill form", "submit"], false);'); + console.log(''); + console.log('// Check request status'); + console.log('await getAgenticScraperRequest(apiKey, requestId);'); + + console.log('\n๐Ÿ”ง Next Steps:'); + console.log('1. Set SGAI_APIKEY environment variable for real API testing'); + console.log('2. Run the example files in the examples/ directory'); + console.log('3. Try with different websites and automation steps'); + console.log('4. Test with both useSession: true and false'); + console.log('5. Monitor request status for long-running automations'); + + console.log('\n๐Ÿ“š Available Examples:'); + console.log('- agenticScraper_example.js - Basic usage'); + console.log('- getAgenticScraperRequest_example.js - Status checking'); + console.log('- agenticScraper_complete_example.js - Full workflow'); + console.log('- agenticScraper_advanced_example.js - Error handling'); + + return totalFailed === 0; +} + +// Run the tests +const success = runTests(); +process.exit(success ? 0 : 1); diff --git a/scrapegraph-js/test/crawl_markdown_test.js b/scrapegraph-js/test/crawl_markdown_test.js new file mode 100644 index 0000000..6dd9f49 --- /dev/null +++ b/scrapegraph-js/test/crawl_markdown_test.js @@ -0,0 +1,609 @@ +import { crawl, getCrawlRequest } from '../index.js'; +import { z } from 'zod'; +import 'dotenv/config'; + +/** + * Test suite for Crawl Markdown functionality + * This file demonstrates usage and validates the markdown crawling parameters + */ + +// Mock API key for testing (replace with real key for actual testing) +const API_KEY = process.env.SGAI_APIKEY || 'test-api-key'; + +// Mock crawl ID for testing polling functionality +const MOCK_CRAWL_ID = 'test-crawl-id-12345'; + +/** + * Test parameter validation for markdown crawling options + */ +function testMarkdownCrawlValidation() { + console.log('๐Ÿงช Testing Markdown Crawl Parameter Validation'); + console.log('='.repeat(50)); + + const testCases = [ + // extractionMode validation + { + options: { extractionMode: false }, + expected: true, + description: 'extractionMode: false (markdown mode)' + }, + { + options: { extractionMode: true }, + expected: true, + description: 'extractionMode: true (AI mode)' + }, + { + options: { extractionMode: 'invalid' }, + expected: false, + description: 'extractionMode: invalid string' + }, + + // depth validation + { + options: { depth: 1 }, + expected: true, + description: 'depth: 1 (minimum valid)' + }, + { + options: { depth: 10 }, + expected: true, + description: 'depth: 10 (maximum valid)' + }, + { + options: { depth: 0 }, + expected: false, + description: 'depth: 0 (below minimum)' + }, + { + options: { depth: 11 }, + expected: false, + description: 'depth: 11 (above maximum)' + }, + + // maxPages validation + { + options: { maxPages: 1 }, + expected: true, + description: 'maxPages: 1 (minimum valid)' + }, + { + options: { maxPages: 100 }, + expected: true, + description: 'maxPages: 100 (maximum valid)' + }, + { + options: { maxPages: 0 }, + expected: false, + description: 'maxPages: 0 (below minimum)' + }, + { + options: { maxPages: 101 }, + expected: false, + description: 'maxPages: 101 (above maximum)' + }, + + // sitemap validation + { + options: { sitemap: true }, + expected: true, + description: 'sitemap: true' + }, + { + options: { sitemap: false }, + expected: true, + description: 'sitemap: false' + }, + { + options: { sitemap: 'invalid' }, + expected: false, + description: 'sitemap: invalid string' + }, + + // sameDomainOnly validation + { + options: { sameDomainOnly: true }, + expected: true, + description: 'sameDomainOnly: true' + }, + { + options: { sameDomainOnly: false }, + expected: true, + description: 'sameDomainOnly: false' + }, + ]; + + let passed = 0; + let failed = 0; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing ${testCase.description}`); + + try { + // Simulate validation logic for markdown crawling + const options = testCase.options; + + if (options.extractionMode !== undefined && typeof options.extractionMode !== 'boolean') { + throw new Error('extractionMode must be a boolean'); + } + + if (options.depth !== undefined && (!Number.isInteger(options.depth) || options.depth < 1 || options.depth > 10)) { + throw new Error('depth must be an integer between 1 and 10'); + } + + if (options.maxPages !== undefined && (!Number.isInteger(options.maxPages) || options.maxPages < 1 || options.maxPages > 100)) { + throw new Error('maxPages must be an integer between 1 and 100'); + } + + if (options.sitemap !== undefined && typeof options.sitemap !== 'boolean') { + throw new Error('sitemap must be a boolean'); + } + + if (options.sameDomainOnly !== undefined && typeof options.sameDomainOnly !== 'boolean') { + throw new Error('sameDomainOnly must be a boolean'); + } + + if (testCase.expected) { + console.log(' โœ… PASS - Validation passed as expected'); + passed++; + } else { + console.log(' โŒ FAIL - Expected validation to fail, but it passed'); + failed++; + } + } catch (error) { + if (!testCase.expected) { + console.log(' โœ… PASS - Validation failed as expected'); + console.log(` Error: ${error.message}`); + passed++; + } else { + console.log(' โŒ FAIL - Unexpected validation failure'); + console.log(` Error: ${error.message}`); + failed++; + } + } + }); + + console.log(`\n๐Ÿ“Š Results: ${passed} passed, ${failed} failed`); + return { passed, failed }; +} + +/** + * Test markdown crawl function signatures + */ +function testMarkdownCrawlSignatures() { + console.log('\n๐Ÿงช Testing Markdown Crawl Function Signatures'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'Markdown mode with null prompt and schema', + args: [API_KEY, 'https://example.com', null, null, { extractionMode: false, depth: 2, maxPages: 2 }], + description: 'apiKey, url, null, null, markdownOptions', + }, + { + name: 'AI mode with prompt and schema', + args: [API_KEY, 'https://example.com', 'Extract data', { title: 'string' }, { extractionMode: true, depth: 3 }], + description: 'apiKey, url, prompt, schema, aiOptions', + }, + { + name: 'Markdown mode with sitemap enabled', + args: [API_KEY, 'https://example.com', null, null, { extractionMode: false, sitemap: true, depth: 2 }], + description: 'apiKey, url, null, null, sitemapOptions', + }, + { + name: 'Basic options only', + args: [API_KEY, 'https://example.com', null, null, { depth: 1, maxPages: 1 }], + description: 'apiKey, url, null, null, basicOptions', + }, + { + name: 'All options combined', + args: [API_KEY, 'https://example.com', null, null, { + extractionMode: false, + depth: 5, + maxPages: 10, + sitemap: true, + sameDomainOnly: false + }], + description: 'apiKey, url, null, null, allOptions', + }, + ]; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing: ${testCase.name}`); + console.log(` Parameters: ${testCase.description}`); + + try { + // Simulate function call validation without making actual API calls + const [apiKey, url, prompt, schema, options] = testCase.args; + + if (!apiKey || typeof apiKey !== 'string') { + throw new Error('API key must be a non-empty string'); + } + + if (!url || typeof url !== 'string') { + throw new Error('URL must be a non-empty string'); + } + + if (options && typeof options !== 'object') { + throw new Error('Options must be an object'); + } + + console.log(' โœ… PASS - Function signature accepts parameters'); + } catch (error) { + console.log(` โŒ FAIL - Function signature error: ${error.message}`); + } + }); +} + +/** + * Test payload construction for markdown crawling + */ +function testMarkdownPayloadConstruction() { + console.log('\n๐Ÿงช Testing Markdown Payload Construction'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'Markdown mode payload', + url: 'https://example.com', + prompt: null, + schema: null, + options: { extractionMode: false, depth: 2, maxPages: 5, sitemap: false }, + expectedPayload: { + url: 'https://example.com', + prompt: null, + schema: null, + extraction_mode: false, + depth: 2, + max_pages: 5, + sitemap: false, + same_domain_only: true, // default + cache_website: true, // default + batch_size: 1 // default + }, + }, + { + name: 'AI mode payload', + url: 'https://test.com', + prompt: 'Extract content', + schema: { title: 'string' }, + options: { extractionMode: true, depth: 3, maxPages: 10 }, + expectedPayload: { + url: 'https://test.com', + prompt: 'Extract content', + schema: { title: 'string' }, + extraction_mode: true, + depth: 3, + max_pages: 10, + same_domain_only: true, // default + cache_website: true, // default + batch_size: 1 // default + }, + }, + { + name: 'Full options payload', + url: 'https://full.com', + prompt: 'Full extract', + schema: { data: 'array' }, + options: { + extractionMode: true, + depth: 4, + maxPages: 20, + sitemap: true, + sameDomainOnly: false, + cacheWebsite: false, + batchSize: 5 + }, + expectedPayload: { + url: 'https://full.com', + prompt: 'Full extract', + schema: { data: 'array' }, + extraction_mode: true, + depth: 4, + max_pages: 20, + sitemap: true, + same_domain_only: false, + cache_website: false, + batch_size: 5 + }, + }, + ]; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing: ${testCase.name}`); + + // Simulate payload construction + const { options = {} } = testCase; + const { + extractionMode, + depth = 2, + maxPages = 2, + sitemap, + sameDomainOnly = true, + cacheWebsite = true, + batchSize = 1, + } = options; + + const payload = { + url: testCase.url, + prompt: testCase.prompt, + schema: testCase.schema, + depth, + max_pages: maxPages, + same_domain_only: sameDomainOnly, + cache_website: cacheWebsite, + batch_size: batchSize, + }; + + // Add optional parameters + if (extractionMode !== undefined) { + payload.extraction_mode = extractionMode; + } + + if (sitemap !== undefined) { + payload.sitemap = sitemap; + } + + console.log(' ๐Ÿ“ฆ Constructed Payload:', JSON.stringify(payload, null, 2)); + console.log(' โœ… PASS - Payload constructed correctly'); + }); +} + +/** + * Test polling functionality for crawl results + */ +function testPollingFunctionality() { + console.log('\n๐Ÿงช Testing Polling Functionality'); + console.log('='.repeat(50)); + + const mockResponses = [ + { status: 'pending', message: 'Job is being processed' }, + { status: 'running', message: 'Job is running' }, + { status: 'success', result: { pages: [], credits_used: 4 } }, + ]; + + console.log('1. Testing polling states'); + mockResponses.forEach((response, index) => { + console.log(` State ${index + 1}: ${response.status}`); + if (response.status === 'success') { + console.log(' โœ… PASS - Success state detected'); + } else if (response.status === 'failed') { + console.log(' โœ… PASS - Failed state detected'); + } else { + console.log(' โณ PASS - Pending state detected, continue polling'); + } + }); + + console.log('\n2. Testing error handling'); + const errorCases = [ + { error: 'Rate limit exceeded', shouldRetry: true }, + { error: 'Invalid API key', shouldRetry: false }, + { error: 'Network timeout', shouldRetry: true }, + ]; + + errorCases.forEach((errorCase, index) => { + console.log(` Error ${index + 1}: ${errorCase.error}`); + if (errorCase.shouldRetry) { + console.log(' โœ… PASS - Retryable error detected'); + } else { + console.log(' โœ… PASS - Non-retryable error detected'); + } + }); +} + +/** + * Test result parsing and validation + */ +function testResultParsing() { + console.log('\n๐Ÿงช Testing Result Parsing'); + console.log('='.repeat(50)); + + const mockSuccessResult = { + status: 'success', + result: { + pages: [ + { + url: 'https://example.com', + title: 'Example Page', + markdown: '# Example\n\nThis is example content.', + metadata: { + word_count: 50, + headers: ['Example'], + links_count: 5 + } + } + ], + crawled_urls: ['https://example.com'], + pages_processed: 1, + credits_used: 2 + } + }; + + console.log('1. Testing successful result parsing'); + + try { + const resultData = mockSuccessResult.result || {}; + const pages = resultData.pages || []; + const crawledUrls = resultData.crawled_urls || []; + const creditsUsed = resultData.credits_used || 0; + const pagesProcessed = resultData.pages_processed || 0; + + const parsedResult = { + conversion_results: { + pages_processed: pagesProcessed, + credits_used: creditsUsed, + cost_per_page: pagesProcessed > 0 ? creditsUsed / pagesProcessed : 0, + crawled_urls: crawledUrls + }, + markdown_content: { + total_pages: pages.length, + pages: pages.map((page, i) => ({ + page_number: i + 1, + url: page.url, + title: page.title, + metadata: page.metadata || {}, + markdown_content: page.markdown || "" + })) + } + }; + + console.log(' โœ… PASS - Result parsing successful'); + console.log(' ๐Ÿ“Š Parsed structure:', JSON.stringify(parsedResult, null, 2)); + + } catch (error) { + console.log(` โŒ FAIL - Result parsing error: ${error.message}`); + } +} + +/** + * Test backward compatibility + */ +function testBackwardCompatibility() { + console.log('\n๐Ÿงช Testing Backward Compatibility'); + console.log('='.repeat(50)); + + console.log('1. Testing existing crawl function calls'); + console.log(' - crawl(apiKey, url, prompt, schema) should work'); + console.log(' - crawl(apiKey, url, prompt, schema, options) should work'); + console.log(' โœ… PASS - All existing signatures remain compatible'); + + console.log('\n2. Testing default behavior'); + console.log(' - When extractionMode is not provided, should default to AI mode'); + console.log(' - When sitemap is not provided, should not include sitemap in payload'); + console.log(' โœ… PASS - Default behavior preserved'); + + console.log('\n3. Testing mixed parameter usage'); + console.log(' - Can use old parameters (depth, maxPages) with new parameters (extractionMode)'); + console.log(' - Old parameter names are converted to API format (maxPages -> max_pages)'); + console.log(' โœ… PASS - Mixed parameter usage works correctly'); +} + +/** + * Test usage examples and best practices + */ +function testUsageExamples() { + console.log('\n๐Ÿงช Testing Usage Examples'); + console.log('='.repeat(50)); + + const examples = [ + { + name: 'Basic Markdown Conversion', + code: `await crawl(apiKey, url, null, null, { + extractionMode: false, + depth: 2, + maxPages: 5 +});`, + description: 'Convert website to markdown without AI processing' + }, + { + name: 'Markdown with Sitemap', + code: `await crawl(apiKey, url, null, null, { + extractionMode: false, + sitemap: true, + depth: 3, + maxPages: 10 +});`, + description: 'Use sitemap for better page discovery' + }, + { + name: 'AI-Powered Extraction', + code: `await crawl(apiKey, url, prompt, schema, { + extractionMode: true, + depth: 2, + maxPages: 3 +});`, + description: 'Traditional AI-powered data extraction' + }, + { + name: 'Cross-Domain Crawling', + code: `await crawl(apiKey, url, null, null, { + extractionMode: false, + sameDomainOnly: false, + depth: 2, + maxPages: 20 +});`, + description: 'Crawl across multiple domains' + } + ]; + + examples.forEach((example, index) => { + console.log(`\n${index + 1}. ${example.name}`); + console.log(` Description: ${example.description}`); + console.log(` Code: ${example.code}`); + console.log(' โœ… PASS - Example is valid'); + }); +} + +/** + * Main test runner + */ +function runTests() { + console.log('๐Ÿš€ ScrapeGraph JS SDK - Crawl Markdown Tests'); + console.log('='.repeat(60)); + + if (!process.env.SGAI_APIKEY) { + console.log('โš ๏ธ Note: SGAI_APIKEY not set - using mock key for validation tests'); + } + + const results = { + validation: testMarkdownCrawlValidation(), + signatures: testMarkdownCrawlSignatures(), + payload: testMarkdownPayloadConstruction(), + polling: testPollingFunctionality(), + parsing: testResultParsing(), + compatibility: testBackwardCompatibility(), + examples: testUsageExamples(), + }; + + console.log('\n' + '='.repeat(60)); + console.log('๐Ÿ“Š Test Summary'); + console.log('='.repeat(60)); + console.log('โœ… Parameter Validation Tests: Completed'); + console.log('โœ… Function Signature Tests: Completed'); + console.log('โœ… Payload Construction Tests: Completed'); + console.log('โœ… Polling Functionality Tests: Completed'); + console.log('โœ… Result Parsing Tests: Completed'); + console.log('โœ… Backward Compatibility Tests: Completed'); + console.log('โœ… Usage Examples Tests: Completed'); + + const totalPassed = results.validation.passed; + const totalFailed = results.validation.failed; + + console.log(`\n๐Ÿ“Š Overall Results: ${totalPassed} passed, ${totalFailed} failed`); + + if (totalFailed === 0) { + console.log('๐ŸŽ‰ All tests passed!'); + } else { + console.log('โš ๏ธ Some tests failed - please review the results above'); + } + + console.log('\n๐Ÿ’ก Markdown Crawling Usage Examples:'); + console.log('// Basic markdown conversion (2 credits per page)'); + console.log('await crawl(apiKey, url, null, null, { extractionMode: false, depth: 2 });'); + console.log(''); + console.log('// Markdown with sitemap for better coverage'); + console.log('await crawl(apiKey, url, null, null, { extractionMode: false, sitemap: true });'); + console.log(''); + console.log('// Cross-domain markdown crawling'); + console.log('await crawl(apiKey, url, null, null, { extractionMode: false, sameDomainOnly: false });'); + console.log(''); + console.log('// Traditional AI extraction (more expensive but structured)'); + console.log('await crawl(apiKey, url, prompt, schema, { extractionMode: true });'); + + console.log('\n๐Ÿ”ง Next Steps:'); + console.log('1. Set SGAI_APIKEY environment variable for real API testing'); + console.log('2. Update crawl.js to support extractionMode and sitemap parameters'); + console.log('3. Run the markdown crawling examples'); + console.log('4. Implement proper polling with rate limit handling'); + console.log('5. Add result parsing utilities for markdown content'); + + console.log('\n๐Ÿ’ฐ Cost Comparison:'); + console.log('โ€ข Markdown Mode (extractionMode: false): 2 credits per page'); + console.log('โ€ข AI Mode (extractionMode: true): 10 credits per page'); + console.log('โ€ข Savings: 80% cost reduction with markdown mode!'); + + return totalFailed === 0; +} + +// Run the tests +const success = runTests(); +process.exit(success ? 0 : 1); diff --git a/scrapegraph-js/test/healthz_test.js b/scrapegraph-js/test/healthz_test.js new file mode 100644 index 0000000..ce8e860 --- /dev/null +++ b/scrapegraph-js/test/healthz_test.js @@ -0,0 +1,314 @@ +import { healthz, initMockConfig, enableMock, disableMock } from '../index.js'; +import 'dotenv/config'; + +/** + * Test suite for Health Check functionality + * This file demonstrates usage and validates the healthz endpoint + */ + +// Mock API key for testing (replace with real key for actual testing) +const API_KEY = process.env.SGAI_APIKEY || 'test-api-key'; + +/** + * Test basic health check + */ +async function testBasicHealthCheck() { + console.log('๐Ÿงช Testing Basic Health Check'); + console.log('='.repeat(50)); + + try { + console.log('Calling healthz endpoint...'); + const result = await healthz(API_KEY); + + console.log('โœ… Success! Health check response:'); + console.log(JSON.stringify(result, null, 2)); + + // Validate response structure + if (!result.status) { + throw new Error('Response missing "status" field'); + } + + console.log('โœ“ Response has required fields'); + console.log(''); + return true; + } catch (error) { + console.error('โŒ Test failed:', error.message); + console.log(''); + return false; + } +} + +/** + * Test health check with mock mode + */ +async function testHealthCheckMock() { + console.log('๐Ÿงช Testing Health Check with Mock Mode'); + console.log('='.repeat(50)); + + try { + // Enable mock mode + enableMock(); + console.log('Mock mode enabled'); + + console.log('Calling healthz endpoint in mock mode...'); + const result = await healthz(API_KEY, { mock: true }); + + console.log('โœ… Success! Mock health check response:'); + console.log(JSON.stringify(result, null, 2)); + + // Validate mock response structure + if (!result.status) { + throw new Error('Mock response missing "status" field'); + } + + if (result.status !== 'healthy') { + console.log('โš ๏ธ Warning: Expected mock status to be "healthy"'); + } + + console.log('โœ“ Mock response has required fields'); + + // Disable mock mode + disableMock(); + console.log('Mock mode disabled'); + console.log(''); + return true; + } catch (error) { + console.error('โŒ Test failed:', error.message); + disableMock(); + console.log(''); + return false; + } +} + +/** + * Test health check with custom mock response + */ +async function testHealthCheckCustomMock() { + console.log('๐Ÿงช Testing Health Check with Custom Mock Response'); + console.log('='.repeat(50)); + + try { + // Initialize mock with custom responses + initMockConfig({ + enabled: true, + customResponses: { + '/v1/healthz': { + status: 'degraded', + message: 'Custom mock status', + uptime: 12345 + } + } + }); + console.log('Custom mock configuration set'); + + console.log('Calling healthz endpoint with custom mock...'); + const result = await healthz(API_KEY); + + console.log('โœ… Success! Custom mock response:'); + console.log(JSON.stringify(result, null, 2)); + + // Validate custom response + if (result.status !== 'degraded') { + throw new Error(`Expected status "degraded", got "${result.status}"`); + } + + if (result.message !== 'Custom mock status') { + throw new Error(`Expected custom message, got "${result.message}"`); + } + + console.log('โœ“ Custom mock response validated'); + + // Reset mock configuration + disableMock(); + console.log('Mock mode disabled'); + console.log(''); + return true; + } catch (error) { + console.error('โŒ Test failed:', error.message); + disableMock(); + console.log(''); + return false; + } +} + +/** + * Test input validation + */ +function testInputValidation() { + console.log('๐Ÿงช Testing Input Validation'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'Valid API key', + apiKey: 'sgai-valid-key-123', + expected: true, + description: 'Should accept valid API key' + }, + { + name: 'Empty API key', + apiKey: '', + expected: false, + description: 'Should reject empty API key' + }, + { + name: 'Null API key', + apiKey: null, + expected: false, + description: 'Should reject null API key' + } + ]; + + let passed = 0; + let failed = 0; + + testCases.forEach(test => { + try { + if (!test.apiKey) { + console.log(` โœ“ ${test.name}: Correctly identified as invalid`); + passed++; + } else { + console.log(` โœ“ ${test.name}: Validated successfully`); + passed++; + } + } catch (error) { + console.log(` โœ— ${test.name}: ${error.message}`); + failed++; + } + }); + + console.log(`\nValidation Results: ${passed} passed, ${failed} failed`); + console.log(''); + return failed === 0; +} + +/** + * Test monitoring pattern + */ +async function testMonitoringPattern() { + console.log('๐Ÿงช Testing Monitoring Pattern'); + console.log('='.repeat(50)); + + try { + enableMock(); + + // Simulate multiple health checks + console.log('Performing 3 consecutive health checks...'); + const checks = []; + + for (let i = 1; i <= 3; i++) { + console.log(` Check ${i}/3...`); + const result = await healthz(API_KEY, { mock: true }); + checks.push(result); + } + + // Analyze results + const healthyChecks = checks.filter(c => c.status === 'healthy').length; + const successRate = (healthyChecks / checks.length) * 100; + + console.log(`\nโœ… Completed ${checks.length} checks`); + console.log(` Healthy: ${healthyChecks}/${checks.length} (${successRate}%)`); + + if (successRate === 100) { + console.log(' โœ“ All checks passed - service is stable'); + } else if (successRate > 0) { + console.log(' โš ๏ธ Some checks failed - service may be unstable'); + } else { + console.log(' โœ— All checks failed - service is down'); + } + + disableMock(); + console.log(''); + return true; + } catch (error) { + console.error('โŒ Test failed:', error.message); + disableMock(); + console.log(''); + return false; + } +} + +/** + * Run all tests + */ +async function runAllTests() { + console.log('๐Ÿš€ Starting Health Check Tests'); + console.log('='.repeat(60)); + console.log(''); + + const results = { + total: 0, + passed: 0, + failed: 0 + }; + + // Test 1: Input validation + results.total++; + if (testInputValidation()) { + results.passed++; + } else { + results.failed++; + } + + // Test 2: Mock mode + results.total++; + if (await testHealthCheckMock()) { + results.passed++; + } else { + results.failed++; + } + + // Test 3: Custom mock + results.total++; + if (await testHealthCheckCustomMock()) { + results.passed++; + } else { + results.failed++; + } + + // Test 4: Monitoring pattern + results.total++; + if (await testMonitoringPattern()) { + results.passed++; + } else { + results.failed++; + } + + // Test 5: Basic health check (only if API key is available) + if (process.env.SGAI_APIKEY) { + results.total++; + if (await testBasicHealthCheck()) { + results.passed++; + } else { + results.failed++; + } + } else { + console.log('โš ๏ธ Skipping live API test (SGAI_APIKEY not set)'); + console.log(''); + } + + // Print summary + console.log('='.repeat(60)); + console.log('๐Ÿ“Š Test Summary'); + console.log('='.repeat(60)); + console.log(`Total Tests: ${results.total}`); + console.log(`Passed: ${results.passed} โœ“`); + console.log(`Failed: ${results.failed} โœ—`); + console.log(`Success Rate: ${((results.passed / results.total) * 100).toFixed(1)}%`); + console.log('='.repeat(60)); + + if (results.failed === 0) { + console.log('โœ… All tests passed!'); + process.exit(0); + } else { + console.log('โŒ Some tests failed'); + process.exit(1); + } +} + +// Run tests +runAllTests().catch(error => { + console.error('Fatal error running tests:', error); + process.exit(1); +}); + diff --git a/scrapegraph-js/test/scheduledJobs_test.js b/scrapegraph-js/test/scheduledJobs_test.js new file mode 100644 index 0000000..fa6d78b --- /dev/null +++ b/scrapegraph-js/test/scheduledJobs_test.js @@ -0,0 +1,413 @@ +import { + createScheduledJob, + getScheduledJobs, + getScheduledJob, + updateScheduledJob, + replaceScheduledJob, + deleteScheduledJob, + pauseScheduledJob, + resumeScheduledJob, + triggerScheduledJob, + getJobExecutions, + enableMock, + disableMock +} from '../index.js'; + +describe('Scheduled Jobs', () => { + const apiKey = 'test-api-key'; + + beforeAll(() => { + enableMock(); + }); + + afterAll(() => { + disableMock(); + }); + + describe('createScheduledJob', () => { + test('should create a scheduled job successfully', async () => { + const jobConfig = { + website_url: 'https://example.com', + user_prompt: 'Extract data', + render_heavy_js: false + }; + + const result = await createScheduledJob( + apiKey, + 'Test Job', + 'smartscraper', + '0 9 * * 1', + jobConfig, + true + ); + + expect(result).toHaveProperty('id'); + expect(result.job_name).toBe('Mock Scheduled Job'); + expect(result.service_type).toBe('smartscraper'); + expect(result.is_active).toBe(true); + }); + + test('should create a job with default active status', async () => { + const jobConfig = { test: 'config' }; + + const result = await createScheduledJob( + apiKey, + 'Test Job', + 'searchscraper', + '0 8 * * 1', + jobConfig + ); + + expect(result.is_active).toBe(true); + }); + + test('should handle different service types', async () => { + const serviceTypes = ['smartscraper', 'searchscraper', 'crawl', 'agenticscraper']; + + for (const serviceType of serviceTypes) { + const result = await createScheduledJob( + apiKey, + `Test ${serviceType} Job`, + serviceType, + '0 9 * * 1', + { test: 'config' } + ); + + expect(result).toHaveProperty('id'); + expect(result.service_type).toBe('smartscraper'); // Mock always returns smartscraper + } + }); + }); + + describe('getScheduledJobs', () => { + test('should get list of scheduled jobs with default pagination', async () => { + const result = await getScheduledJobs(apiKey); + + expect(result).toHaveProperty('jobs'); + expect(result).toHaveProperty('total'); + expect(result).toHaveProperty('page'); + expect(result).toHaveProperty('page_size'); + expect(Array.isArray(result.jobs)).toBe(true); + expect(result.jobs.length).toBeGreaterThan(0); + }); + + test('should get jobs with custom pagination', async () => { + const result = await getScheduledJobs(apiKey, { + page: 2, + pageSize: 10 + }); + + expect(result.page).toBe(1); // Mock always returns page 1 + expect(result.page_size).toBe(20); // Mock uses default page_size + }); + + test('should filter jobs by service type', async () => { + const result = await getScheduledJobs(apiKey, { + serviceType: 'smartscraper' + }); + + expect(result.jobs.length).toBeGreaterThan(0); + }); + + test('should filter jobs by active status', async () => { + const activeJobs = await getScheduledJobs(apiKey, { isActive: true }); + const inactiveJobs = await getScheduledJobs(apiKey, { isActive: false }); + + expect(activeJobs.jobs.length).toBeGreaterThan(0); + expect(inactiveJobs.jobs.length).toBeGreaterThan(0); + }); + }); + + describe('getScheduledJob', () => { + test('should get a specific scheduled job', async () => { + const jobId = 'test-job-id'; + const result = await getScheduledJob(apiKey, jobId); + + expect(result).toHaveProperty('id'); + expect(result.job_name).toBe('Mock Scheduled Job'); + expect(result.service_type).toBe('smartscraper'); + }); + + test('should handle invalid job ID', async () => { + const jobId = 'invalid-job-id'; + const result = await getScheduledJob(apiKey, jobId); + + expect(result).toHaveProperty('id'); + expect(result.job_name).toBe('Mock Scheduled Job'); + }); + }); + + describe('updateScheduledJob', () => { + test('should update job name', async () => { + const jobId = 'test-job-id'; + const result = await updateScheduledJob(apiKey, jobId, { + jobName: 'Updated Job Name' + }); + + expect(result.job_name).toBe('Updated Mock Scheduled Job'); + }); + + test('should update cron expression', async () => { + const jobId = 'test-job-id'; + const result = await updateScheduledJob(apiKey, jobId, { + cronExpression: '0 10 * * 1' + }); + + expect(result.cron_expression).toBe('0 10 * * 1'); + }); + + test('should update job configuration', async () => { + const jobId = 'test-job-id'; + const newConfig = { updated: 'config' }; + const result = await updateScheduledJob(apiKey, jobId, { + jobConfig: newConfig + }); + + expect(result.job_config).toEqual({ mock: 'updated_config' }); + }); + + test('should update active status', async () => { + const jobId = 'test-job-id'; + const result = await updateScheduledJob(apiKey, jobId, { + isActive: false + }); + + expect(result.is_active).toBe(true); // Mock always returns true + }); + + test('should update multiple fields at once', async () => { + const jobId = 'test-job-id'; + const result = await updateScheduledJob(apiKey, jobId, { + jobName: 'Multi Update Job', + cronExpression: '0 11 * * 1', + isActive: false + }); + + expect(result.job_name).toBe('Updated Mock Scheduled Job'); + expect(result.cron_expression).toBe('0 10 * * 1'); + }); + }); + + describe('replaceScheduledJob', () => { + test('should replace a scheduled job completely', async () => { + const jobId = 'test-job-id'; + const jobConfig = { test: 'config' }; + + const result = await replaceScheduledJob( + apiKey, + jobId, + 'Replaced Job', + 'searchscraper', + '0 8 * * 1', + jobConfig, + true + ); + + expect(result.job_name).toBe('Updated Mock Scheduled Job'); + expect(result.service_type).toBe('smartscraper'); // Mock always returns smartscraper + }); + + test('should replace job with default active status', async () => { + const jobId = 'test-job-id'; + const jobConfig = { test: 'config' }; + + const result = await replaceScheduledJob( + apiKey, + jobId, + 'Replaced Job', + 'crawl', + '0 7 * * 1', + jobConfig + ); + + expect(result.is_active).toBe(true); + }); + }); + + describe('deleteScheduledJob', () => { + test('should delete a scheduled job', async () => { + const jobId = 'test-job-id'; + const result = await deleteScheduledJob(apiKey, jobId); + + expect(result).toHaveProperty('message'); + expect(result.message).toContain('deleted successfully'); + }); + }); + + describe('pauseScheduledJob', () => { + test('should pause a scheduled job', async () => { + const jobId = 'test-job-id'; + const result = await pauseScheduledJob(apiKey, jobId); + + expect(result).toHaveProperty('message'); + expect(result.message).toContain('paused successfully'); + expect(result.is_active).toBe(false); + }); + }); + + describe('resumeScheduledJob', () => { + test('should resume a scheduled job', async () => { + const jobId = 'test-job-id'; + const result = await resumeScheduledJob(apiKey, jobId); + + expect(result).toHaveProperty('message'); + expect(result.message).toContain('resumed successfully'); + expect(result.is_active).toBe(true); + }); + }); + + describe('triggerScheduledJob', () => { + test('should trigger a scheduled job manually', async () => { + const jobId = 'test-job-id'; + const result = await triggerScheduledJob(apiKey, jobId); + + expect(result).toHaveProperty('execution_id'); + expect(result).toHaveProperty('scheduled_job_id'); + expect(result).toHaveProperty('message'); + expect(result.message).toContain('triggered successfully'); + }); + }); + + describe('getJobExecutions', () => { + test('should get job execution history', async () => { + const jobId = 'test-job-id'; + const result = await getJobExecutions(apiKey, jobId); + + expect(result).toHaveProperty('executions'); + expect(result).toHaveProperty('total'); + expect(result).toHaveProperty('page'); + expect(result).toHaveProperty('page_size'); + expect(Array.isArray(result.executions)).toBe(true); + expect(result.executions.length).toBeGreaterThan(0); + }); + + test('should get executions with custom pagination', async () => { + const jobId = 'test-job-id'; + const result = await getJobExecutions(apiKey, jobId, { + page: 2, + pageSize: 10 + }); + + expect(result.page).toBe(1); // Mock always returns page 1 + expect(result.page_size).toBe(20); // Mock uses default page_size + }); + + test('should filter executions by status', async () => { + const jobId = 'test-job-id'; + const result = await getJobExecutions(apiKey, jobId, { + status: 'completed' + }); + + expect(result.executions.length).toBeGreaterThan(0); + const execution = result.executions[0]; + expect(execution).toHaveProperty('id'); + expect(execution).toHaveProperty('status'); + expect(execution).toHaveProperty('started_at'); + }); + + test('should return execution details', async () => { + const jobId = 'test-job-id'; + const result = await getJobExecutions(apiKey, jobId); + + const execution = result.executions[0]; + expect(execution).toHaveProperty('id'); + expect(execution).toHaveProperty('scheduled_job_id'); + expect(execution).toHaveProperty('execution_id'); + expect(execution).toHaveProperty('status'); + expect(execution).toHaveProperty('started_at'); + expect(execution).toHaveProperty('completed_at'); + expect(execution).toHaveProperty('result'); + expect(execution).toHaveProperty('credits_used'); + }); + }); + + describe('Error Handling', () => { + test('should handle API errors gracefully', async () => { + // This test would require mocking axios to throw an error + // For now, we'll test that the functions don't throw unexpected errors + const jobId = 'test-job-id'; + + await expect(getScheduledJob(apiKey, jobId)).resolves.toBeDefined(); + await expect(getScheduledJobs(apiKey)).resolves.toBeDefined(); + }); + }); + + describe('Mock Mode', () => { + test('should work in mock mode', async () => { + const result = await createScheduledJob( + apiKey, + 'Mock Test Job', + 'smartscraper', + '0 9 * * 1', + { test: 'config' }, + true, + { mock: true } + ); + + expect(result).toHaveProperty('id'); + expect(result.job_name).toBe('Mock Scheduled Job'); + }); + + test('should override mock mode per request', async () => { + disableMock(); + + const result = await createScheduledJob( + apiKey, + 'Override Mock Job', + 'smartscraper', + '0 9 * * 1', + { test: 'config' }, + true, + { mock: true } + ); + + expect(result).toHaveProperty('id'); + expect(result.job_name).toBe('Mock Scheduled Job'); + + enableMock(); + }); + }); + + describe('Concurrent Operations', () => { + test('should handle concurrent job creation', async () => { + const jobConfig = { test: 'config' }; + + const promises = Array.from({ length: 3 }, (_, i) => + createScheduledJob( + apiKey, + `Concurrent Job ${i}`, + 'smartscraper', + '0 9 * * 1', + jobConfig + ) + ); + + const results = await Promise.all(promises); + + expect(results).toHaveLength(3); + results.forEach(result => { + expect(result).toHaveProperty('id'); + expect(result.job_name).toBe('Mock Scheduled Job'); + }); + }); + + test('should handle concurrent job management operations', async () => { + const jobId = 'test-job-id'; + + const promises = [ + getScheduledJob(apiKey, jobId), + pauseScheduledJob(apiKey, jobId), + resumeScheduledJob(apiKey, jobId), + triggerScheduledJob(apiKey, jobId) + ]; + + const results = await Promise.all(promises); + + expect(results).toHaveLength(4); + expect(results[0]).toHaveProperty('id'); + expect(results[1]).toHaveProperty('message'); + expect(results[2]).toHaveProperty('message'); + expect(results[3]).toHaveProperty('execution_id'); + }); + }); +}); diff --git a/scrapegraph-js/test/scrape_test.js b/scrapegraph-js/test/scrape_test.js new file mode 100644 index 0000000..cc7e909 --- /dev/null +++ b/scrapegraph-js/test/scrape_test.js @@ -0,0 +1,451 @@ +import { scrape, getScrapeRequest } from '../index.js'; +import 'dotenv/config'; + +/** + * Test suite for Scrape functionality + * This file demonstrates usage and validates the Scrape parameters + */ + +// Mock API key for testing (replace with real key for actual testing) +const API_KEY = process.env.SGAI_APIKEY || 'test-api-key'; + +/** + * Test input validation for scrape + */ +function testInputValidation() { + console.log('๐Ÿงช Testing Scrape Input Validation'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'Valid inputs - basic', + apiKey: 'valid-key', + url: 'https://example.com', + options: {}, + expected: true, + description: 'All valid parameters with default options' + }, + { + name: 'Valid inputs - with heavy JS', + apiKey: 'valid-key', + url: 'https://example.com', + options: { renderHeavyJs: true }, + expected: true, + description: 'Valid parameters with heavy JS rendering' + }, + { + name: 'Valid inputs - with headers', + apiKey: 'valid-key', + url: 'https://example.com', + options: { + headers: { 'User-Agent': 'Test Agent' } + }, + expected: true, + description: 'Valid parameters with custom headers' + }, + { + name: 'Valid inputs - with all options', + apiKey: 'valid-key', + url: 'https://example.com', + options: { + renderHeavyJs: true, + headers: { 'User-Agent': 'Test Agent' } + }, + expected: true, + description: 'Valid parameters with all options enabled' + }, + { + name: 'Invalid URL - no protocol', + apiKey: 'valid-key', + url: 'example.com', + options: {}, + expected: false, + description: 'URL without http/https protocol' + }, + { + name: 'Invalid URL - relative path', + apiKey: 'valid-key', + url: '/path/to/page', + options: {}, + expected: false, + description: 'Relative path instead of absolute URL' + }, + { + name: 'Invalid URL - empty string', + apiKey: 'valid-key', + url: '', + options: {}, + expected: false, + description: 'Empty URL string' + }, + { + name: 'Invalid URL - null', + apiKey: 'valid-key', + url: null, + options: {}, + expected: false, + description: 'Null URL' + }, + { + name: 'Empty API key', + apiKey: '', + url: 'https://example.com', + options: {}, + expected: false, + description: 'Empty API key string' + }, + { + name: 'Invalid API key type', + apiKey: 123, + url: 'https://example.com', + options: {}, + expected: false, + description: 'API key as number instead of string' + } + ]; + + let passed = 0; + let total = testCases.length; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. ${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + try { + // Validate inputs + const isValid = validateScrapeInputs( + testCase.apiKey, + testCase.url, + testCase.options + ); + + if (isValid === testCase.expected) { + console.log(` โœ… PASSED`); + passed++; + } else { + console.log(` โŒ FAILED - Expected: ${testCase.expected}, Got: ${isValid}`); + } + } catch (error) { + if (!testCase.expected) { + console.log(` โœ… PASSED (Expected error: ${error.message})`); + passed++; + } else { + console.log(` โŒ FAILED - Unexpected error: ${error.message}`); + } + } + }); + + console.log(`\n๐Ÿ“Š Input Validation Results: ${passed}/${total} tests passed`); + return passed === total; +} + +/** + * Validate scrape function inputs + */ +function validateScrapeInputs(apiKey, url, options) { + // Check API key + if (!apiKey || typeof apiKey !== 'string' || apiKey.trim() === '') { + throw new Error('Invalid API key'); + } + + // Check URL + if (!url || typeof url !== 'string' || url.trim() === '') { + throw new Error('Invalid URL'); + } + + // Check URL format + if (!url.startsWith('http://') && !url.startsWith('https://')) { + throw new Error('URL must start with http:// or https://'); + } + + // Check options + if (options && typeof options !== 'object') { + throw new Error('Options must be an object'); + } + + // Check renderHeavyJs option + if (options.renderHeavyJs !== undefined && typeof options.renderHeavyJs !== 'boolean') { + throw new Error('renderHeavyJs must be a boolean'); + } + + // Check headers option + if (options.headers !== undefined && typeof options.headers !== 'object') { + throw new Error('Headers must be an object'); + } + + return true; +} + +/** + * Test scrape function with mock data + */ +async function testScrapeFunction() { + console.log('\n๐Ÿงช Testing Scrape Function (Mock)'); + console.log('='.repeat(50)); + + try { + // Mock the scrape function to avoid actual API calls during testing + const mockScrape = async (apiKey, url, options = {}) => { + // Simulate API delay + await new Promise(resolve => setTimeout(resolve, 100)); + + // Return mock response + return { + status: 'completed', + scrape_request_id: 'mock-request-id-12345', + html: 'Mock Page

Mock Content

', + created_at: new Date().toISOString(), + completed_at: new Date().toISOString() + }; + }; + + console.log('1. Testing basic scrape call...'); + const result1 = await mockScrape(API_KEY, 'https://example.com'); + console.log(` โœ… Status: ${result1.status}`); + console.log(` โœ… Request ID: ${result1.scrape_request_id}`); + console.log(` โœ… HTML length: ${result1.html.length} characters`); + + console.log('\n2. Testing scrape with heavy JS rendering...'); + const result2 = await mockScrape(API_KEY, 'https://example.com', { renderHeavyJs: true }); + console.log(` โœ… Status: ${result2.status}`); + console.log(` โœ… Request ID: ${result2.scrape_request_id}`); + + console.log('\n3. Testing scrape with custom headers...'); + const result3 = await mockScrape(API_KEY, 'https://example.com', { + headers: { 'User-Agent': 'Test Bot' } + }); + console.log(` โœ… Status: ${result3.status}`); + console.log(` โœ… Request ID: ${result3.scrape_request_id}`); + + console.log('\nโœ… All scrape function tests passed'); + return true; + + } catch (error) { + console.error(`โŒ Scrape function test failed: ${error.message}`); + return false; + } +} + +/** + * Test getScrapeRequest function with mock data + */ +async function testGetScrapeRequestFunction() { + console.log('\n๐Ÿงช Testing GetScrapeRequest Function (Mock)'); + console.log('='.repeat(50)); + + try { + // Mock the getScrapeRequest function + const mockGetScrapeRequest = async (apiKey, requestId) => { + // Simulate API delay + await new Promise(resolve => setTimeout(resolve, 50)); + + // Return mock response + return { + status: 'completed', + scrape_request_id: requestId, + html: 'Retrieved Page

Retrieved Content

', + created_at: new Date().toISOString(), + completed_at: new Date().toISOString() + }; + }; + + console.log('1. Testing getScrapeRequest with valid request ID...'); + const result1 = await mockGetScrapeRequest(API_KEY, 'test-request-123'); + console.log(` โœ… Status: ${result1.status}`); + console.log(` โœ… Request ID: ${result1.scrape_request_id}`); + console.log(` โœ… HTML length: ${result1.html.length} characters`); + + console.log('\n2. Testing getScrapeRequest with different request ID...'); + const result2 = await mockGetScrapeRequest(API_KEY, 'another-request-456'); + console.log(` โœ… Status: ${result2.status}`); + console.log(` โœ… Request ID: ${result2.scrape_request_id}`); + + console.log('\nโœ… All getScrapeRequest function tests passed'); + return true; + + } catch (error) { + console.error(`โŒ GetScrapeRequest function test failed: ${error.message}`); + return false; + } +} + +/** + * Test error handling + */ +function testErrorHandling() { + console.log('\n๐Ÿงช Testing Error Handling'); + console.log('='.repeat(50)); + + let passed = 0; + let total = 0; + + // Test 1: Invalid API key + total++; + try { + validateScrapeInputs('', 'https://example.com', {}); + console.log('1. Empty API key test: โŒ FAILED (should have thrown error)'); + } catch (error) { + console.log('1. Empty API key test: โœ… PASSED'); + passed++; + } + + // Test 2: Invalid URL + total++; + try { + validateScrapeInputs('valid-key', 'invalid-url', {}); + console.log('2. Invalid URL test: โŒ FAILED (should have thrown error)'); + } catch (error) { + console.log('2. Invalid URL test: โœ… PASSED'); + passed++; + } + + // Test 3: Invalid options + total++; + try { + validateScrapeInputs('valid-key', 'https://example.com', 'invalid-options'); + console.log('3. Invalid options test: โŒ FAILED (should have thrown error)'); + } catch (error) { + console.log('3. Invalid options test: โœ… PASSED'); + passed++; + } + + // Test 4: Invalid renderHeavyJs + total++; + try { + validateScrapeInputs('valid-key', 'https://example.com', { renderHeavyJs: 'invalid' }); + console.log('4. Invalid renderHeavyJs test: โŒ FAILED (should have thrown error)'); + } catch (error) { + console.log('4. Invalid renderHeavyJs test: โœ… PASSED'); + passed++; + } + + console.log(`\n๐Ÿ“Š Error Handling Results: ${passed}/${total} tests passed`); + return passed === total; +} + +/** + * Test URL validation + */ +function testUrlValidation() { + console.log('\n๐Ÿงช Testing URL Validation'); + console.log('='.repeat(50)); + + const testUrls = [ + { url: 'https://example.com', expected: true, description: 'HTTPS URL' }, + { url: 'http://example.com', expected: true, description: 'HTTP URL' }, + { url: 'https://sub.example.com', expected: true, description: 'Subdomain HTTPS' }, + { url: 'https://example.com/path', expected: true, description: 'HTTPS with path' }, + { url: 'https://example.com?param=value', expected: true, description: 'HTTPS with query params' }, + { url: 'https://example.com#fragment', expected: true, description: 'HTTPS with fragment' }, + { url: 'example.com', expected: false, description: 'No protocol' }, + { url: '/path/to/page', expected: false, description: 'Relative path' }, + { url: 'ftp://example.com', expected: false, description: 'FTP protocol' }, + { url: '', expected: false, description: 'Empty string' }, + { url: null, expected: false, description: 'Null value' }, + { url: undefined, expected: false, description: 'Undefined value' } + ]; + + let passed = 0; + let total = testUrls.length; + + testUrls.forEach((testCase, index) => { + console.log(`${index + 1}. ${testCase.description}: ${testCase.url}`); + + try { + if (testCase.url) { + const isValid = testCase.url.startsWith('http://') || testCase.url.startsWith('https://'); + if (isValid === testCase.expected) { + console.log(` โœ… PASSED`); + passed++; + } else { + console.log(` โŒ FAILED - Expected: ${testCase.expected}, Got: ${isValid}`); + } + } else { + if (!testCase.expected) { + console.log(` โœ… PASSED`); + passed++; + } else { + console.log(` โŒ FAILED - Expected: ${testCase.expected}, Got: false`); + } + } + } catch (error) { + if (!testCase.expected) { + console.log(` โœ… PASSED (Expected error)`); + passed++; + } else { + console.log(` โŒ FAILED - Unexpected error: ${error.message}`); + } + } + }); + + console.log(`\n๐Ÿ“Š URL Validation Results: ${passed}/${total} tests passed`); + return passed === total; +} + +/** + * Run all tests + */ +async function runAllTests() { + console.log('๐Ÿš€ Starting Scrape Test Suite'); + console.log('='.repeat(60)); + console.log(`๐Ÿ”‘ API Key: ${API_KEY.substring(0, 8)}...`); + console.log(`โฐ Timestamp: ${new Date().toISOString()}\n`); + + const tests = [ + { name: 'Input Validation', fn: testInputValidation }, + { name: 'Scrape Function', fn: testScrapeFunction }, + { name: 'GetScrapeRequest Function', fn: testGetScrapeRequestFunction }, + { name: 'Error Handling', fn: testErrorHandling }, + { name: 'URL Validation', fn: testUrlValidation } + ]; + + let passed = 0; + let total = tests.length; + + for (const test of tests) { + try { + const result = await test.fn(); + if (result) { + passed++; + } + } catch (error) { + console.error(`โŒ Test '${test.name}' failed with error: ${error.message}`); + } + console.log('\n' + '-'.repeat(60)); + } + + console.log('\n๐ŸŽฏ FINAL TEST RESULTS'); + console.log('='.repeat(30)); + console.log(`โœ… Passed: ${passed}`); + console.log(`โŒ Failed: ${total - passed}`); + console.log(`๐Ÿ“Š Success Rate: ${((passed / total) * 100).toFixed(1)}%`); + + if (passed === total) { + console.log('\n๐ŸŽ‰ All tests passed! Scrape functionality is working correctly.'); + return 0; + } else { + console.log('\nโš ๏ธ Some tests failed. Please review the output above.'); + return 1; + } +} + +// Run tests if this file is executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + runAllTests() + .then(exitCode => { + process.exit(exitCode); + }) + .catch(error => { + console.error('๐Ÿ’ฅ Fatal error during test execution:', error.message); + process.exit(1); + }); +} + +export { + testInputValidation, + testScrapeFunction, + testGetScrapeRequestFunction, + testErrorHandling, + testUrlValidation, + runAllTests +}; diff --git a/scrapegraph-js/test/searchScraper_markdown_test.js b/scrapegraph-js/test/searchScraper_markdown_test.js new file mode 100644 index 0000000..f420a18 --- /dev/null +++ b/scrapegraph-js/test/searchScraper_markdown_test.js @@ -0,0 +1,524 @@ +import { searchScraper, getSearchScraperRequest } from '../index.js'; +import 'dotenv/config'; + +/** + * Test suite for SearchScraper Markdown functionality + * This file demonstrates usage and validates the SearchScraper markdown mode parameters + */ + +// Mock API key for testing (replace with real key for actual testing) +const API_KEY = process.env.SGAI_APIKEY || 'test-api-key'; + +/** + * Test input validation for searchScraper with markdown mode + */ +function testMarkdownModeInputValidation() { + console.log('๐Ÿงช Testing SearchScraper Markdown Mode Input Validation'); + console.log('='.repeat(60)); + + const testCases = [ + { + name: 'Valid inputs - markdown mode', + apiKey: 'valid-key', + prompt: 'Latest AI developments', + numResults: 3, + options: { extractionMode: false }, + expected: true, + description: 'Valid parameters with markdown mode enabled' + }, + { + name: 'Valid inputs - AI extraction mode', + apiKey: 'valid-key', + prompt: 'Latest AI developments', + numResults: 3, + options: { extractionMode: true }, + expected: true, + description: 'Valid parameters with AI extraction mode enabled' + }, + { + name: 'Valid inputs - markdown mode with custom options', + apiKey: 'valid-key', + prompt: 'Latest AI developments', + numResults: 5, + options: { + extractionMode: false, + renderHeavyJs: true + }, + expected: true, + description: 'Markdown mode with additional options' + }, + { + name: 'Invalid extraction mode - string', + apiKey: 'valid-key', + prompt: 'Latest AI developments', + numResults: 3, + options: { extractionMode: 'false' }, + expected: false, + description: 'extractionMode as string instead of boolean' + }, + { + name: 'Invalid extraction mode - number', + apiKey: 'valid-key', + prompt: 'Latest AI developments', + numResults: 3, + options: { extractionMode: 0 }, + expected: false, + description: 'extractionMode as number instead of boolean' + }, + { + name: 'Empty prompt', + apiKey: 'valid-key', + prompt: '', + numResults: 3, + options: { extractionMode: false }, + expected: false, + description: 'Empty search prompt' + }, + { + name: 'Invalid numResults - too low', + apiKey: 'valid-key', + prompt: 'Latest AI developments', + numResults: 2, + options: { extractionMode: false }, + expected: false, + description: 'numResults below minimum (3)' + }, + { + name: 'Invalid numResults - too high', + apiKey: 'valid-key', + prompt: 'Latest AI developments', + numResults: 25, + options: { extractionMode: false }, + expected: false, + description: 'numResults above maximum (20)' + } + ]; + + let passed = 0; + let total = testCases.length; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. ${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + try { + // Validate inputs + const isValid = validateSearchScraperInputs( + testCase.apiKey, + testCase.prompt, + testCase.numResults, + null, // schema + null, // userAgent + testCase.options + ); + + if (isValid === testCase.expected) { + console.log(` โœ… PASSED`); + passed++; + } else { + console.log(` โŒ FAILED - Expected: ${testCase.expected}, Got: ${isValid}`); + } + } catch (error) { + if (!testCase.expected) { + console.log(` โœ… PASSED (Expected error: ${error.message})`); + passed++; + } else { + console.log(` โŒ FAILED - Unexpected error: ${error.message}`); + } + } + }); + + console.log(`\n๐Ÿ“Š Markdown Mode Input Validation Results: ${passed}/${total} tests passed`); + return passed === total; +} + +/** + * Validate searchScraper function inputs including markdown mode + */ +function validateSearchScraperInputs(apiKey, prompt, numResults, schema, userAgent, options) { + // Check API key + if (!apiKey || typeof apiKey !== 'string' || apiKey.trim() === '') { + throw new Error('Invalid API key'); + } + + // Check prompt + if (!prompt || typeof prompt !== 'string' || prompt.trim() === '') { + throw new Error('Invalid prompt'); + } + + // Check numResults + if (numResults < 3 || numResults > 20) { + throw new Error('numResults must be between 3 and 20'); + } + + // Check options + if (options && typeof options !== 'object') { + throw new Error('Options must be an object'); + } + + // Check extractionMode option + if (options && options.extractionMode !== undefined && typeof options.extractionMode !== 'boolean') { + throw new Error('extractionMode must be a boolean'); + } + + // Check renderHeavyJs option + if (options && options.renderHeavyJs !== undefined && typeof options.renderHeavyJs !== 'boolean') { + throw new Error('renderHeavyJs must be a boolean'); + } + + return true; +} + +/** + * Test searchScraper function with markdown mode using mock data + */ +async function testSearchScraperMarkdownFunction() { + console.log('\n๐Ÿงช Testing SearchScraper Markdown Function (Mock)'); + console.log('='.repeat(60)); + + try { + // Mock the searchScraper function to avoid actual API calls during testing + const mockSearchScraper = async (apiKey, prompt, numResults = 3, schema = null, userAgent = null, options = {}) => { + // Simulate API delay + await new Promise(resolve => setTimeout(resolve, 100)); + + const { extractionMode = true } = options; + + // Return different mock responses based on extraction mode + if (extractionMode) { + // AI extraction mode response + return { + status: 'completed', + request_id: 'mock-ai-request-id-12345', + result: 'AI-extracted structured data about the topic', + reference_urls: [ + 'https://example1.com', + 'https://example2.com', + 'https://example3.com' + ], + created_at: new Date().toISOString(), + completed_at: new Date().toISOString() + }; + } else { + // Markdown mode response + return { + status: 'completed', + request_id: 'mock-markdown-request-id-67890', + markdown_content: '# Mock Markdown Content\n\nThis is mock markdown content for testing purposes.\n\n## Section 1\n\nSome content here.\n\n## Section 2\n\nMore content here.', + reference_urls: [ + 'https://example1.com', + 'https://example2.com', + 'https://example3.com' + ], + created_at: new Date().toISOString(), + completed_at: new Date().toISOString() + }; + } + }; + + console.log('1. Testing searchScraper with AI extraction mode...'); + const result1 = await mockSearchScraper(API_KEY, 'Latest AI developments', 3, null, null, { extractionMode: true }); + console.log(` โœ… Status: ${result1.status}`); + console.log(` โœ… Request ID: ${result1.request_id}`); + console.log(` โœ… Has AI result: ${!!result1.result}`); + console.log(` โœ… Reference URLs: ${result1.reference_urls.length}`); + + console.log('\n2. Testing searchScraper with markdown mode...'); + const result2 = await mockSearchScraper(API_KEY, 'Latest AI developments', 3, null, null, { extractionMode: false }); + console.log(` โœ… Status: ${result2.status}`); + console.log(` โœ… Request ID: ${result2.request_id}`); + console.log(` โœ… Has markdown content: ${!!result2.markdown_content}`); + console.log(` โœ… Markdown length: ${result2.markdown_content?.length || 0} characters`); + console.log(` โœ… Reference URLs: ${result2.reference_urls.length}`); + + console.log('\n3. Testing searchScraper with markdown mode and additional options...'); + const result3 = await mockSearchScraper(API_KEY, 'Latest AI developments', 5, null, null, { + extractionMode: false, + renderHeavyJs: true + }); + console.log(` โœ… Status: ${result3.status}`); + console.log(` โœ… Request ID: ${result3.request_id}`); + console.log(` โœ… Has markdown content: ${!!result3.markdown_content}`); + + console.log('\n4. Testing searchScraper with default extraction mode (should be AI)...'); + const result4 = await mockSearchScraper(API_KEY, 'Latest AI developments', 3); + console.log(` โœ… Status: ${result4.status}`); + console.log(` โœ… Request ID: ${result4.request_id}`); + console.log(` โœ… Has AI result: ${!!result4.result}`); + console.log(` โœ… No markdown content (AI mode): ${!result4.markdown_content}`); + + console.log('\nโœ… All searchScraper markdown function tests passed'); + return true; + + } catch (error) { + console.error(`โŒ SearchScraper markdown function test failed: ${error.message}`); + return false; + } +} + +/** + * Test cost calculation for different modes + */ +function testCostCalculation() { + console.log('\n๐Ÿงช Testing Cost Calculation for Different Modes'); + console.log('='.repeat(60)); + + const testCases = [ + { + name: 'AI extraction - 3 results', + numResults: 3, + extractionMode: true, + expectedCredits: 30, + description: '3 websites ร— 10 credits per page' + }, + { + name: 'Markdown mode - 3 results', + numResults: 3, + extractionMode: false, + expectedCredits: 6, + description: '3 websites ร— 2 credits per page' + }, + { + name: 'AI extraction - 5 results', + numResults: 5, + extractionMode: true, + expectedCredits: 50, + description: '5 websites ร— 10 credits per page' + }, + { + name: 'Markdown mode - 5 results', + numResults: 5, + extractionMode: false, + expectedCredits: 10, + description: '5 websites ร— 2 credits per page' + }, + { + name: 'AI extraction - 10 results', + numResults: 10, + extractionMode: true, + expectedCredits: 100, + description: '10 websites ร— 10 credits per page' + }, + { + name: 'Markdown mode - 10 results', + numResults: 10, + extractionMode: false, + expectedCredits: 20, + description: '10 websites ร— 2 credits per page' + } + ]; + + let passed = 0; + let total = testCases.length; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. ${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + const calculatedCredits = testCase.extractionMode + ? testCase.numResults * 10 // AI extraction: 10 credits per page + : testCase.numResults * 2; // Markdown mode: 2 credits per page + + if (calculatedCredits === testCase.expectedCredits) { + console.log(` โœ… PASSED - Credits: ${calculatedCredits}`); + passed++; + } else { + console.log(` โŒ FAILED - Expected: ${testCase.expectedCredits}, Got: ${calculatedCredits}`); + } + }); + + console.log(`\n๐Ÿ“Š Cost Calculation Results: ${passed}/${total} tests passed`); + return passed === total; +} + +/** + * Test response structure validation + */ +function testResponseStructure() { + console.log('\n๐Ÿงช Testing Response Structure Validation'); + console.log('='.repeat(60)); + + const testCases = [ + { + name: 'AI extraction response structure', + response: { + status: 'completed', + request_id: 'test-id-123', + result: 'AI extracted data', + reference_urls: ['https://example.com'] + }, + extractionMode: true, + expected: true, + description: 'Valid AI extraction response' + }, + { + name: 'Markdown mode response structure', + response: { + status: 'completed', + request_id: 'test-id-456', + markdown_content: '# Test Content', + reference_urls: ['https://example.com'] + }, + extractionMode: false, + expected: true, + description: 'Valid markdown mode response' + }, + { + name: 'Missing markdown content in markdown mode', + response: { + status: 'completed', + request_id: 'test-id-789', + result: 'Some data', + reference_urls: ['https://example.com'] + }, + extractionMode: false, + expected: false, + description: 'Markdown mode response without markdown_content' + }, + { + name: 'Missing result in AI extraction mode', + response: { + status: 'completed', + request_id: 'test-id-101', + markdown_content: '# Content', + reference_urls: ['https://example.com'] + }, + extractionMode: true, + expected: false, + description: 'AI extraction response without result' + } + ]; + + let passed = 0; + let total = testCases.length; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. ${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + try { + const isValid = validateResponseStructure(testCase.response, testCase.extractionMode); + + if (isValid === testCase.expected) { + console.log(` โœ… PASSED`); + passed++; + } else { + console.log(` โŒ FAILED - Expected: ${testCase.expected}, Got: ${isValid}`); + } + } catch (error) { + if (!testCase.expected) { + console.log(` โœ… PASSED (Expected validation error: ${error.message})`); + passed++; + } else { + console.log(` โŒ FAILED - Unexpected error: ${error.message}`); + } + } + }); + + console.log(`\n๐Ÿ“Š Response Structure Validation Results: ${passed}/${total} tests passed`); + return passed === total; +} + +/** + * Validate response structure based on extraction mode + */ +function validateResponseStructure(response, extractionMode) { + if (!response || typeof response !== 'object') { + throw new Error('Response must be an object'); + } + + // Common fields + if (!response.status) { + throw new Error('Response must have status field'); + } + + if (!response.request_id) { + throw new Error('Response must have request_id field'); + } + + if (!Array.isArray(response.reference_urls)) { + throw new Error('Response must have reference_urls array'); + } + + // Mode-specific validation + if (extractionMode) { + // AI extraction mode should have 'result' field + if (!response.result) { + return false; + } + } else { + // Markdown mode should have 'markdown_content' field + if (!response.markdown_content) { + return false; + } + } + + return true; +} + +/** + * Run all markdown mode tests + */ +async function runAllMarkdownTests() { + console.log('๐Ÿš€ Starting SearchScraper Markdown Test Suite'); + console.log('='.repeat(70)); + console.log(`๐Ÿ”‘ API Key: ${API_KEY.substring(0, 8)}...`); + console.log(`โฐ Timestamp: ${new Date().toISOString()}\n`); + + const tests = [ + { name: 'Markdown Mode Input Validation', fn: testMarkdownModeInputValidation }, + { name: 'SearchScraper Markdown Function', fn: testSearchScraperMarkdownFunction }, + { name: 'Cost Calculation', fn: testCostCalculation }, + { name: 'Response Structure Validation', fn: testResponseStructure } + ]; + + let passed = 0; + let total = tests.length; + + for (const test of tests) { + try { + const result = await test.fn(); + if (result) { + passed++; + } + } catch (error) { + console.error(`โŒ Test '${test.name}' failed with error: ${error.message}`); + } + console.log('\n' + '-'.repeat(70)); + } + + console.log('\n๐ŸŽฏ FINAL MARKDOWN TEST RESULTS'); + console.log('='.repeat(40)); + console.log(`โœ… Passed: ${passed}`); + console.log(`โŒ Failed: ${total - passed}`); + console.log(`๐Ÿ“Š Success Rate: ${((passed / total) * 100).toFixed(1)}%`); + + if (passed === total) { + console.log('\n๐ŸŽ‰ All markdown tests passed! SearchScraper markdown functionality is working correctly.'); + return 0; + } else { + console.log('\nโš ๏ธ Some tests failed. Please review the output above.'); + return 1; + } +} + +// Run tests if this file is executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + runAllMarkdownTests() + .then(exitCode => { + process.exit(exitCode); + }) + .catch(error => { + console.error('๐Ÿ’ฅ Fatal error during test execution:', error.message); + process.exit(1); + }); +} + +export { + testMarkdownModeInputValidation, + testSearchScraperMarkdownFunction, + testCostCalculation, + testResponseStructure, + runAllMarkdownTests +}; + diff --git a/scrapegraph-js/test/sitemap_test.js b/scrapegraph-js/test/sitemap_test.js new file mode 100644 index 0000000..3aa64a9 --- /dev/null +++ b/scrapegraph-js/test/sitemap_test.js @@ -0,0 +1,371 @@ +import { sitemap } from '../index.js'; +import 'dotenv/config'; + +/** + * Test suite for Sitemap functionality + * This file demonstrates usage and validates the Sitemap parameters + */ + +// Mock API key for testing (replace with real key for actual testing) +const API_KEY = process.env.SGAI_APIKEY || 'test-api-key'; + +/** + * Test input validation for sitemap + */ +function testInputValidation() { + console.log('๐Ÿงช Testing Sitemap Input Validation'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'Valid inputs - basic', + apiKey: 'valid-key', + websiteUrl: 'https://example.com', + options: {}, + expected: true, + description: 'All valid parameters with default options' + }, + { + name: 'Valid inputs - subdomain', + apiKey: 'valid-key', + websiteUrl: 'https://blog.example.com', + options: {}, + expected: true, + description: 'Valid subdomain URL' + }, + { + name: 'Valid inputs - with path', + apiKey: 'valid-key', + websiteUrl: 'https://example.com/section', + options: {}, + expected: true, + description: 'URL with path component' + }, + { + name: 'Invalid URL - no protocol', + apiKey: 'valid-key', + websiteUrl: 'example.com', + options: {}, + expected: false, + description: 'URL without http/https protocol' + }, + { + name: 'Invalid URL - relative path', + apiKey: 'valid-key', + websiteUrl: '/path/to/page', + options: {}, + expected: false, + description: 'Relative path instead of absolute URL' + }, + { + name: 'Invalid URL - empty string', + apiKey: 'valid-key', + websiteUrl: '', + options: {}, + expected: false, + description: 'Empty URL string' + }, + { + name: 'Invalid URL - null', + apiKey: 'valid-key', + websiteUrl: null, + options: {}, + expected: false, + description: 'Null URL' + }, + { + name: 'Empty API key', + apiKey: '', + websiteUrl: 'https://example.com', + options: {}, + expected: false, + description: 'Empty API key string' + }, + { + name: 'Invalid API key type', + apiKey: 123, + websiteUrl: 'https://example.com', + options: {}, + expected: false, + description: 'API key as number instead of string' + } + ]; + + let passed = 0; + let total = testCases.length; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. ${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + try { + // Validate inputs + const isValid = validateSitemapInputs( + testCase.apiKey, + testCase.websiteUrl, + testCase.options + ); + + if (isValid === testCase.expected) { + console.log(` โœ… PASSED`); + passed++; + } else { + console.log(` โŒ FAILED - Expected: ${testCase.expected}, Got: ${isValid}`); + } + } catch (error) { + if (!testCase.expected) { + console.log(` โœ… PASSED (Expected error: ${error.message})`); + passed++; + } else { + console.log(` โŒ FAILED - Unexpected error: ${error.message}`); + } + } + }); + + console.log(`\n๐Ÿ“Š Input Validation Results: ${passed}/${total} tests passed`); + return passed === total; +} + +/** + * Validate sitemap function inputs + */ +function validateSitemapInputs(apiKey, websiteUrl, options) { + // Check API key + if (!apiKey || typeof apiKey !== 'string' || apiKey.trim() === '') { + throw new Error('Invalid API key'); + } + + // Check URL + if (!websiteUrl || typeof websiteUrl !== 'string' || websiteUrl.trim() === '') { + throw new Error('Invalid URL'); + } + + // Check URL format + if (!websiteUrl.startsWith('http://') && !websiteUrl.startsWith('https://')) { + throw new Error('URL must start with http:// or https://'); + } + + // Check options + if (options && typeof options !== 'object') { + throw new Error('Options must be an object'); + } + + return true; +} + +/** + * Test sitemap function with mock data + */ +async function testSitemapFunction() { + console.log('\n๐Ÿงช Testing Sitemap Function (Mock)'); + console.log('='.repeat(50)); + + try { + // Mock the sitemap function to avoid actual API calls during testing + const mockSitemap = async (apiKey, websiteUrl, options = {}) => { + // Simulate API delay + await new Promise(resolve => setTimeout(resolve, 100)); + + // Return mock response + return { + urls: [ + 'https://example.com/', + 'https://example.com/about', + 'https://example.com/products', + 'https://example.com/contact', + 'https://example.com/blog/post-1', + 'https://example.com/blog/post-2' + ] + }; + }; + + console.log('1. Testing basic sitemap call...'); + const result1 = await mockSitemap(API_KEY, 'https://example.com'); + console.log(` โœ… URLs found: ${result1.urls.length}`); + console.log(` โœ… First URL: ${result1.urls[0]}`); + + console.log('\n2. Testing sitemap for subdomain...'); + const result2 = await mockSitemap(API_KEY, 'https://blog.example.com'); + console.log(` โœ… URLs found: ${result2.urls.length}`); + + console.log('\n3. Testing sitemap for URL with path...'); + const result3 = await mockSitemap(API_KEY, 'https://example.com/section'); + console.log(` โœ… URLs found: ${result3.urls.length}`); + + console.log('\nโœ… All sitemap function tests passed'); + return true; + + } catch (error) { + console.error(`โŒ Sitemap function test failed: ${error.message}`); + return false; + } +} + +/** + * Test error handling + */ +function testErrorHandling() { + console.log('\n๐Ÿงช Testing Error Handling'); + console.log('='.repeat(50)); + + let passed = 0; + let total = 0; + + // Test 1: Invalid API key + total++; + try { + validateSitemapInputs('', 'https://example.com', {}); + console.log('1. Empty API key test: โŒ FAILED (should have thrown error)'); + } catch (error) { + console.log('1. Empty API key test: โœ… PASSED'); + passed++; + } + + // Test 2: Invalid URL + total++; + try { + validateSitemapInputs('valid-key', 'invalid-url', {}); + console.log('2. Invalid URL test: โŒ FAILED (should have thrown error)'); + } catch (error) { + console.log('2. Invalid URL test: โœ… PASSED'); + passed++; + } + + // Test 3: Invalid options + total++; + try { + validateSitemapInputs('valid-key', 'https://example.com', 'invalid-options'); + console.log('3. Invalid options test: โŒ FAILED (should have thrown error)'); + } catch (error) { + console.log('3. Invalid options test: โœ… PASSED'); + passed++; + } + + console.log(`\n๐Ÿ“Š Error Handling Results: ${passed}/${total} tests passed`); + return passed === total; +} + +/** + * Test URL validation + */ +function testUrlValidation() { + console.log('\n๐Ÿงช Testing URL Validation'); + console.log('='.repeat(50)); + + const testUrls = [ + { url: 'https://example.com', expected: true, description: 'HTTPS URL' }, + { url: 'http://example.com', expected: true, description: 'HTTP URL' }, + { url: 'https://sub.example.com', expected: true, description: 'Subdomain HTTPS' }, + { url: 'https://example.com/path', expected: true, description: 'HTTPS with path' }, + { url: 'https://example.com?param=value', expected: true, description: 'HTTPS with query params' }, + { url: 'https://example.com#fragment', expected: true, description: 'HTTPS with fragment' }, + { url: 'example.com', expected: false, description: 'No protocol' }, + { url: '/path/to/page', expected: false, description: 'Relative path' }, + { url: 'ftp://example.com', expected: false, description: 'FTP protocol' }, + { url: '', expected: false, description: 'Empty string' }, + { url: null, expected: false, description: 'Null value' }, + { url: undefined, expected: false, description: 'Undefined value' } + ]; + + let passed = 0; + let total = testUrls.length; + + testUrls.forEach((testCase, index) => { + console.log(`${index + 1}. ${testCase.description}: ${testCase.url}`); + + try { + if (testCase.url) { + const isValid = testCase.url.startsWith('http://') || testCase.url.startsWith('https://'); + if (isValid === testCase.expected) { + console.log(` โœ… PASSED`); + passed++; + } else { + console.log(` โŒ FAILED - Expected: ${testCase.expected}, Got: ${isValid}`); + } + } else { + if (!testCase.expected) { + console.log(` โœ… PASSED`); + passed++; + } else { + console.log(` โŒ FAILED - Expected: ${testCase.expected}, Got: false`); + } + } + } catch (error) { + if (!testCase.expected) { + console.log(` โœ… PASSED (Expected error)`); + passed++; + } else { + console.log(` โŒ FAILED - Unexpected error: ${error.message}`); + } + } + }); + + console.log(`\n๐Ÿ“Š URL Validation Results: ${passed}/${total} tests passed`); + return passed === total; +} + +/** + * Run all tests + */ +async function runAllTests() { + console.log('๐Ÿš€ Starting Sitemap Test Suite'); + console.log('='.repeat(60)); + console.log(`๐Ÿ”‘ API Key: ${API_KEY.substring(0, 8)}...`); + console.log(`โฐ Timestamp: ${new Date().toISOString()}\n`); + + const tests = [ + { name: 'Input Validation', fn: testInputValidation }, + { name: 'Sitemap Function', fn: testSitemapFunction }, + { name: 'Error Handling', fn: testErrorHandling }, + { name: 'URL Validation', fn: testUrlValidation } + ]; + + let passed = 0; + let total = tests.length; + + for (const test of tests) { + try { + const result = await test.fn(); + if (result) { + passed++; + } + } catch (error) { + console.error(`โŒ Test '${test.name}' failed with error: ${error.message}`); + } + console.log('\n' + '-'.repeat(60)); + } + + console.log('\n๐ŸŽฏ FINAL TEST RESULTS'); + console.log('='.repeat(30)); + console.log(`โœ… Passed: ${passed}`); + console.log(`โŒ Failed: ${total - passed}`); + console.log(`๐Ÿ“Š Success Rate: ${((passed / total) * 100).toFixed(1)}%`); + + if (passed === total) { + console.log('\n๐ŸŽ‰ All tests passed! Sitemap functionality is working correctly.'); + return 0; + } else { + console.log('\nโš ๏ธ Some tests failed. Please review the output above.'); + return 1; + } +} + +// Run tests if this file is executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + runAllTests() + .then(exitCode => { + process.exit(exitCode); + }) + .catch(error => { + console.error('๐Ÿ’ฅ Fatal error during test execution:', error.message); + process.exit(1); + }); +} + +export { + testInputValidation, + testSitemapFunction, + testErrorHandling, + testUrlValidation, + runAllTests +}; diff --git a/scrapegraph-js/test/smartScraper_markdown_html_test.js b/scrapegraph-js/test/smartScraper_markdown_html_test.js new file mode 100644 index 0000000..6f61bf0 --- /dev/null +++ b/scrapegraph-js/test/smartScraper_markdown_html_test.js @@ -0,0 +1,377 @@ +import { smartScraper } from '../index.js'; +import { z } from 'zod'; +import 'dotenv/config'; + +/** + * Test suite for SmartScraper HTML and Markdown functionality + * This file demonstrates usage and validates the new HTML and Markdown parameters + */ + +// Mock API key for testing (replace with real key for actual testing) +const API_KEY = process.env.SGAI_APIKEY || 'test-api-key'; + +// Test schema for structured data +const TestSchema = z.object({ + title: z.string(), + content: z.string(), + items: z.array(z.string()).optional(), +}); + +// Sample HTML content +const sampleHtml = ` + + +

Test Product

+
$29.99
+
A great product for testing
+
    +
  • Feature 1
  • +
  • Feature 2
  • +
  • Feature 3
  • +
+ + +`; + +// Sample Markdown content +const sampleMarkdown = ` +# Test Product + +**Price:** $29.99 + +**Description:** A great product for testing + +## Features +- Feature 1 +- Feature 2 +- Feature 3 +`; + +/** + * Test validation for mutually exclusive inputs + */ +function testMutuallyExclusiveInputs() { + console.log('๐Ÿงช Testing Mutually Exclusive Inputs'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'URL only (valid)', + args: { url: 'https://example.com', html: null, markdown: null }, + expected: true, + }, + { + name: 'HTML only (valid)', + args: { url: null, html: sampleHtml, markdown: null }, + expected: true, + }, + { + name: 'Markdown only (valid)', + args: { url: null, html: null, markdown: sampleMarkdown }, + expected: true, + }, + { + name: 'URL + HTML (invalid)', + args: { url: 'https://example.com', html: sampleHtml, markdown: null }, + expected: false, + }, + { + name: 'URL + Markdown (invalid)', + args: { url: 'https://example.com', html: null, markdown: sampleMarkdown }, + expected: false, + }, + { + name: 'HTML + Markdown (invalid)', + args: { url: null, html: sampleHtml, markdown: sampleMarkdown }, + expected: false, + }, + { + name: 'All three (invalid)', + args: { url: 'https://example.com', html: sampleHtml, markdown: sampleMarkdown }, + expected: false, + }, + { + name: 'None provided (invalid)', + args: { url: null, html: null, markdown: null }, + expected: false, + }, + ]; + + let passed = 0; + let failed = 0; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing ${testCase.name}`); + + try { + // Simulate the validation logic from smartScraper + const inputsProvided = [testCase.args.url, testCase.args.html, testCase.args.markdown] + .filter(input => input !== null && input !== undefined).length; + + if (inputsProvided === 0) { + throw new Error('Exactly one of url, websiteHtml, or websiteMarkdown must be provided'); + } + + if (inputsProvided > 1) { + throw new Error('Only one of url, websiteHtml, or websiteMarkdown can be provided'); + } + + if (testCase.expected) { + console.log(' โœ… PASS - Validation passed as expected'); + passed++; + } else { + console.log(' โŒ FAIL - Expected validation to fail, but it passed'); + failed++; + } + } catch (error) { + if (!testCase.expected) { + console.log(' โœ… PASS - Validation failed as expected:', error.message); + passed++; + } else { + console.log(' โŒ FAIL - Unexpected validation failure:', error.message); + failed++; + } + } + }); + + console.log(`\n๐Ÿ“Š Results: ${passed} passed, ${failed} failed`); + return { passed, failed }; +} + +/** + * Test content size validation + */ +function testContentSizeValidation() { + console.log('\n๐Ÿงช Testing Content Size Validation'); + console.log('='.repeat(50)); + + const MAX_SIZE = 2 * 1024 * 1024; // 2MB + + const testCases = [ + { + name: 'Small HTML (valid)', + content: sampleHtml, + type: 'html', + expected: true, + }, + { + name: 'Small Markdown (valid)', + content: sampleMarkdown, + type: 'markdown', + expected: true, + }, + { + name: 'Large HTML (>2MB, invalid)', + content: '' + 'x'.repeat(MAX_SIZE + 1) + '', + type: 'html', + expected: false, + }, + { + name: 'Large Markdown (>2MB, invalid)', + content: '# Title\n\n' + 'x'.repeat(MAX_SIZE + 1), + type: 'markdown', + expected: false, + }, + ]; + + let passed = 0; + let failed = 0; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing ${testCase.name}`); + + try { + // Simulate size validation + const size = Buffer.byteLength(testCase.content, 'utf8'); + console.log(` ๐Ÿ“ Content size: ${(size / 1024).toFixed(2)} KB`); + + if (size > MAX_SIZE) { + throw new Error(`${testCase.type} content exceeds maximum size of 2MB`); + } + + if (testCase.expected) { + console.log(' โœ… PASS - Size validation passed as expected'); + passed++; + } else { + console.log(' โŒ FAIL - Expected size validation to fail, but it passed'); + failed++; + } + } catch (error) { + if (!testCase.expected) { + console.log(' โœ… PASS - Size validation failed as expected:', error.message); + passed++; + } else { + console.log(' โŒ FAIL - Unexpected size validation failure:', error.message); + failed++; + } + } + }); + + console.log(`\n๐Ÿ“Š Results: ${passed} passed, ${failed} failed`); + return { passed, failed }; +} + +/** + * Test function signature with new parameters + */ +function testFunctionSignature() { + console.log('\n๐Ÿงช Testing Function Signature with HTML/Markdown'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'With HTML content', + description: 'apiKey, null, prompt, schema, null, null, null, {}, false, false, false, sampleHtml', + }, + { + name: 'With Markdown content', + description: 'apiKey, null, prompt, schema, null, null, null, {}, false, false, false, null, sampleMarkdown', + }, + { + name: 'With URL (backward compatible)', + description: 'apiKey, url, prompt, schema', + }, + ]; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing: ${testCase.name}`); + console.log(` Parameters: ${testCase.description}`); + console.log(' โœ… PASS - Function signature accepts parameters'); + }); +} + +/** + * Test payload construction + */ +function testPayloadConstruction() { + console.log('\n๐Ÿงช Testing Payload Construction'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'With HTML content', + input: { url: null, html: sampleHtml, markdown: null }, + expectedKey: 'website_html', + }, + { + name: 'With Markdown content', + input: { url: null, html: null, markdown: sampleMarkdown }, + expectedKey: 'website_markdown', + }, + { + name: 'With URL', + input: { url: 'https://example.com', html: null, markdown: null }, + expectedKey: 'website_url', + }, + ]; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing: ${testCase.name}`); + + // Simulate payload construction + const payload = { + user_prompt: 'Extract data', + plain_text: false, + }; + + if (testCase.input.url) { + payload.website_url = testCase.input.url; + } else if (testCase.input.html) { + payload.website_html = testCase.input.html; + } else if (testCase.input.markdown) { + payload.website_markdown = testCase.input.markdown; + } + + console.log(` ๐Ÿ“ฆ Expected key: ${testCase.expectedKey}`); + console.log(` ๐Ÿ“ฆ Has key: ${testCase.expectedKey in payload}`); + + if (testCase.expectedKey in payload) { + console.log(' โœ… PASS - Payload constructed correctly'); + } else { + console.log(' โŒ FAIL - Expected key not found in payload'); + } + }); +} + +/** + * Test backward compatibility + */ +function testBackwardCompatibility() { + console.log('\n๐Ÿงช Testing Backward Compatibility'); + console.log('='.repeat(50)); + + console.log('1. Testing existing function calls with URL'); + console.log(' - smartScraper(apiKey, url, prompt) should work'); + console.log(' - smartScraper(apiKey, url, prompt, schema) should work'); + console.log(' - smartScraper(apiKey, url, prompt, schema, numberOfScrolls) should work'); + console.log(' โœ… PASS - All existing signatures remain compatible'); + + console.log('\n2. Testing new HTML/Markdown functionality'); + console.log(' - smartScraper(apiKey, null, prompt, null, null, null, null, {}, false, false, false, html) works'); + console.log(' - smartScraper(apiKey, null, prompt, null, null, null, null, {}, false, false, false, null, markdown) works'); + console.log(' โœ… PASS - New parameters work correctly'); +} + +/** + * Main test runner + */ +function runTests() { + console.log('๐Ÿš€ ScrapeGraph JS SDK - SmartScraper HTML/Markdown Tests'); + console.log('='.repeat(60)); + + if (!process.env.SGAI_APIKEY) { + console.log('โš ๏ธ Note: SGAI_APIKEY not set - using mock key for validation tests'); + } + + const results = { + mutualExclusive: testMutuallyExclusiveInputs(), + sizeValidation: testContentSizeValidation(), + signature: testFunctionSignature(), + payload: testPayloadConstruction(), + compatibility: testBackwardCompatibility(), + }; + + console.log('\n' + '='.repeat(60)); + console.log('๐Ÿ“Š Test Summary'); + console.log('='.repeat(60)); + console.log('โœ… Mutual Exclusivity Tests: Completed'); + console.log('โœ… Content Size Validation Tests: Completed'); + console.log('โœ… Function Signature Tests: Completed'); + console.log('โœ… Payload Construction Tests: Completed'); + console.log('โœ… Backward Compatibility Tests: Completed'); + + const totalPassed = results.mutualExclusive.passed + results.sizeValidation.passed; + const totalFailed = results.mutualExclusive.failed + results.sizeValidation.failed; + + console.log(`\n๐Ÿ“Š Overall Results: ${totalPassed} passed, ${totalFailed} failed`); + + if (totalFailed === 0) { + console.log('๐ŸŽ‰ All tests passed!'); + } else { + console.log('โš ๏ธ Some tests failed - please review the results above'); + } + + console.log('\n๐Ÿ’ก Usage Examples:'); + console.log('// With HTML content'); + console.log('const html = "

Title

";'); + console.log('await smartScraper(apiKey, null, "Extract title", null, null, null, null, {}, false, false, false, html);'); + console.log(''); + console.log('// With Markdown content'); + console.log('const markdown = "# Title\\n\\nContent here";'); + console.log('await smartScraper(apiKey, null, "Extract data", null, null, null, null, {}, false, false, false, null, markdown);'); + console.log(''); + console.log('// Traditional URL-based (backward compatible)'); + console.log('await smartScraper(apiKey, "https://example.com", "Extract data");'); + + console.log('\n๐Ÿ”ง Next Steps:'); + console.log('1. Set SGAI_APIKEY environment variable for real API testing'); + console.log('2. Test with actual HTML and Markdown content'); + console.log('3. Verify content size limits (max 2MB)'); + console.log('4. Ensure only one input type (URL, HTML, or Markdown) is used at a time'); + + return totalFailed === 0; +} + +// Run the tests +const success = runTests(); +process.exit(success ? 0 : 1); diff --git a/scrapegraph-js/test/smartScraper_pagination_test.js b/scrapegraph-js/test/smartScraper_pagination_test.js new file mode 100644 index 0000000..e268beb --- /dev/null +++ b/scrapegraph-js/test/smartScraper_pagination_test.js @@ -0,0 +1,252 @@ +import { smartScraper } from '../index.js'; +import { z } from 'zod'; +import 'dotenv/config'; + +/** + * Test suite for SmartScraper pagination functionality + * This file demonstrates usage and validates the pagination parameter + */ + +// Mock API key for testing (replace with real key for actual testing) +const API_KEY = process.env.SGAI_APIKEY || 'test-api-key'; + +// Test schema for structured data +const TestSchema = z.object({ + title: z.string(), + content: z.string(), + items: z.array(z.string()).optional(), +}); + +/** + * Test parameter validation for totalPages + */ +function testPaginationValidation() { + console.log('๐Ÿงช Testing Pagination Parameter Validation'); + console.log('='.repeat(50)); + + const testCases = [ + { value: 1, expected: true, description: 'Minimum valid value (1)' }, + { value: 5, expected: true, description: 'Mid-range valid value (5)' }, + { value: 10, expected: true, description: 'Maximum valid value (10)' }, + { value: 0, expected: false, description: 'Below minimum (0)' }, + { value: 11, expected: false, description: 'Above maximum (11)' }, + { value: -1, expected: false, description: 'Negative value (-1)' }, + { value: 1.5, expected: false, description: 'Float value (1.5)' }, + { value: 'invalid', expected: false, description: 'String value' }, + { value: null, expected: true, description: 'Null value (should be allowed)' }, + ]; + + let passed = 0; + let failed = 0; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing ${testCase.description}`); + + try { + // Simulate the validation logic from smartScraper + if (testCase.value !== null) { + if (!Number.isInteger(testCase.value) || testCase.value < 1 || testCase.value > 10) { + throw new Error('totalPages must be an integer between 1 and 10'); + } + } + + if (testCase.expected) { + console.log(' โœ… PASS - Validation passed as expected'); + passed++; + } else { + console.log(' โŒ FAIL - Expected validation to fail, but it passed'); + failed++; + } + } catch (error) { + if (!testCase.expected) { + console.log(' โœ… PASS - Validation failed as expected'); + passed++; + } else { + console.log(' โŒ FAIL - Unexpected validation failure'); + failed++; + } + } + }); + + console.log(`\n๐Ÿ“Š Results: ${passed} passed, ${failed} failed`); + return { passed, failed }; +} + +/** + * Test function signature and parameter handling + */ +function testFunctionSignature() { + console.log('\n๐Ÿงช Testing Function Signature'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'All parameters provided', + args: [API_KEY, 'https://example.com', 'Extract data', TestSchema, 5, 3], + description: 'apiKey, url, prompt, schema, numberOfScrolls, totalPages', + }, + { + name: 'Without totalPages', + args: [API_KEY, 'https://example.com', 'Extract data', TestSchema, 5], + description: 'apiKey, url, prompt, schema, numberOfScrolls', + }, + { + name: 'Without numberOfScrolls and totalPages', + args: [API_KEY, 'https://example.com', 'Extract data', TestSchema], + description: 'apiKey, url, prompt, schema', + }, + { + name: 'Without schema, numberOfScrolls, and totalPages', + args: [API_KEY, 'https://example.com', 'Extract data'], + description: 'apiKey, url, prompt', + }, + { + name: 'Only pagination (no scrolls)', + args: [API_KEY, 'https://example.com', 'Extract data', null, null, 2], + description: 'apiKey, url, prompt, null, null, totalPages', + }, + ]; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing: ${testCase.name}`); + console.log(` Parameters: ${testCase.description}`); + + try { + // This would normally call the actual function, but we'll simulate it + // to avoid making actual API calls during testing + console.log(' โœ… PASS - Function signature accepts parameters'); + } catch (error) { + console.log(` โŒ FAIL - Function signature error: ${error.message}`); + } + }); +} + +/** + * Test payload construction for pagination + */ +function testPayloadConstruction() { + console.log('\n๐Ÿงช Testing Payload Construction'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'With pagination', + totalPages: 5, + expected: { total_pages: 5 }, + }, + { + name: 'Without pagination', + totalPages: null, + expected: null, + }, + { + name: 'With pagination and scrolling', + numberOfScrolls: 10, + totalPages: 3, + expected: { number_of_scrolls: 10, total_pages: 3 }, + }, + ]; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing: ${testCase.name}`); + + // Simulate payload construction + const payload = { + website_url: 'https://example.com', + user_prompt: 'Extract data', + }; + + if (testCase.numberOfScrolls !== undefined && testCase.numberOfScrolls !== null) { + payload.number_of_scrolls = testCase.numberOfScrolls; + } + + if (testCase.totalPages !== undefined && testCase.totalPages !== null) { + payload.total_pages = testCase.totalPages; + } + + console.log(' ๐Ÿ“ฆ Payload:', JSON.stringify(payload, null, 2)); + console.log(' โœ… PASS - Payload constructed correctly'); + }); +} + +/** + * Test backward compatibility + */ +function testBackwardCompatibility() { + console.log('\n๐Ÿงช Testing Backward Compatibility'); + console.log('='.repeat(50)); + + console.log('1. Testing existing function calls without totalPages'); + console.log(' - smartScraper(apiKey, url, prompt) should work'); + console.log(' - smartScraper(apiKey, url, prompt, schema) should work'); + console.log(' - smartScraper(apiKey, url, prompt, schema, numberOfScrolls) should work'); + console.log(' โœ… PASS - All existing signatures remain compatible'); + + console.log('\n2. Testing default behavior'); + console.log(' - When totalPages is not provided, should default to null'); + console.log(' - When totalPages is null, should not include total_pages in payload'); + console.log(' โœ… PASS - Default behavior preserved'); +} + +/** + * Main test runner + */ +function runTests() { + console.log('๐Ÿš€ ScrapeGraph JS SDK - SmartScraper Pagination Tests'); + console.log('='.repeat(60)); + + if (!process.env.SGAI_APIKEY) { + console.log('โš ๏ธ Note: SGAI_APIKEY not set - using mock key for validation tests'); + } + + const results = { + validation: testPaginationValidation(), + signature: testFunctionSignature(), + payload: testPayloadConstruction(), + compatibility: testBackwardCompatibility(), + }; + + console.log('\n' + '='.repeat(60)); + console.log('๐Ÿ“Š Test Summary'); + console.log('='.repeat(60)); + console.log('โœ… Parameter Validation Tests: Completed'); + console.log('โœ… Function Signature Tests: Completed'); + console.log('โœ… Payload Construction Tests: Completed'); + console.log('โœ… Backward Compatibility Tests: Completed'); + + const totalPassed = results.validation.passed; + const totalFailed = results.validation.failed; + + console.log(`\n๐Ÿ“Š Overall Results: ${totalPassed} passed, ${totalFailed} failed`); + + if (totalFailed === 0) { + console.log('๐ŸŽ‰ All tests passed!'); + } else { + console.log('โš ๏ธ Some tests failed - please review the results above'); + } + + console.log('\n๐Ÿ’ก Usage Examples:'); + console.log('// Basic pagination'); + console.log('await smartScraper(apiKey, url, prompt, null, null, 5);'); + console.log(''); + console.log('// Pagination with schema'); + console.log('await smartScraper(apiKey, url, prompt, schema, null, 3);'); + console.log(''); + console.log('// Pagination with scrolling'); + console.log('await smartScraper(apiKey, url, prompt, null, 10, 2);'); + console.log(''); + console.log('// All features combined'); + console.log('await smartScraper(apiKey, url, prompt, schema, 5, 3);'); + + console.log('\n๐Ÿ”ง Next Steps:'); + console.log('1. Set SGAI_APIKEY environment variable for real API testing'); + console.log('2. Run the example files in the examples/ directory'); + console.log('3. Try with different websites and pagination values'); + console.log('4. Adjust totalPages parameter (1-10) based on your needs'); + + return totalFailed === 0; +} + +// Run the tests +const success = runTests(); +process.exit(success ? 0 : 1); diff --git a/scrapegraph-js/test/smartScraper_render_heavy_test.js b/scrapegraph-js/test/smartScraper_render_heavy_test.js new file mode 100644 index 0000000..bad6ad6 --- /dev/null +++ b/scrapegraph-js/test/smartScraper_render_heavy_test.js @@ -0,0 +1,312 @@ +import { smartScraper } from '../index.js'; +import { z } from 'zod'; +import 'dotenv/config'; + +/** + * Test suite for SmartScraper render heavy JavaScript functionality + * This file demonstrates usage and validates the renderHeavyJs parameter + */ + +// Mock API key for testing (replace with real key for actual testing) +const API_KEY = process.env.SGAI_APIKEY || 'test-api-key'; + +// Test schema for structured data +const TestSchema = z.object({ + ceo: z.string(), + contact: z.string(), + company: z.string().optional(), +}); + +/** + * Test parameter validation for renderHeavyJs + */ +function testRenderHeavyJsValidation() { + console.log('๐Ÿงช Testing Render Heavy JS Parameter Validation'); + console.log('='.repeat(50)); + + const testCases = [ + { value: true, expected: true, description: 'Boolean true value' }, + { value: false, expected: true, description: 'Boolean false value' }, + { value: null, expected: true, description: 'Null value (should default to false)' }, + { value: undefined, expected: true, description: 'Undefined value (should default to false)' }, + { value: 1, expected: false, description: 'Number value (invalid)' }, + { value: 'true', expected: false, description: 'String value (invalid)' }, + { value: [], expected: false, description: 'Array value (invalid)' }, + { value: {}, expected: false, description: 'Object value (invalid)' }, + ]; + + let passed = 0; + let failed = 0; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing ${testCase.description}`); + + try { + // Simulate the validation logic + if (testCase.value !== null && testCase.value !== undefined && typeof testCase.value !== 'boolean') { + throw new Error('renderHeavyJs must be a boolean value'); + } + + if (testCase.expected) { + console.log(' โœ… PASS - Validation passed as expected'); + passed++; + } else { + console.log(' โŒ FAIL - Expected validation to fail, but it passed'); + failed++; + } + } catch (error) { + if (!testCase.expected) { + console.log(' โœ… PASS - Validation failed as expected'); + passed++; + } else { + console.log(' โŒ FAIL - Unexpected validation failure'); + failed++; + } + } + }); + + console.log(`\n๐Ÿ“Š Results: ${passed} passed, ${failed} failed`); + return { passed, failed }; +} + +/** + * Test function signature and parameter handling + */ +function testFunctionSignature() { + console.log('\n๐Ÿงช Testing Function Signature with Render Heavy JS'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'All parameters with renderHeavyJs true', + args: [API_KEY, 'https://example.com', 'Find CEO', TestSchema, 5, 3, null, {}, false, true], + description: 'apiKey, url, prompt, schema, numberOfScrolls, totalPages, cookies, options, plain_text, renderHeavyJs=true', + }, + { + name: 'All parameters with renderHeavyJs false', + args: [API_KEY, 'https://example.com', 'Find CEO', TestSchema, 5, 3, null, {}, false, false], + description: 'apiKey, url, prompt, schema, numberOfScrolls, totalPages, cookies, options, plain_text, renderHeavyJs=false', + }, + { + name: 'Only essential params with renderHeavyJs', + args: [API_KEY, 'https://example.com', 'Find CEO', null, null, null, null, {}, false, true], + description: 'apiKey, url, prompt, nulls..., renderHeavyJs=true', + }, + { + name: 'Default renderHeavyJs (should be false)', + args: [API_KEY, 'https://example.com', 'Find CEO'], + description: 'apiKey, url, prompt (renderHeavyJs defaults to false)', + }, + ]; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing: ${testCase.name}`); + console.log(` Parameters: ${testCase.description}`); + + try { + // This would normally call the actual function, but we'll simulate it + // to avoid making actual API calls during testing + console.log(' โœ… PASS - Function signature accepts parameters'); + } catch (error) { + console.log(` โŒ FAIL - Function signature error: ${error.message}`); + } + }); +} + +/** + * Test payload construction for render heavy JS + */ +function testPayloadConstruction() { + console.log('\n๐Ÿงช Testing Payload Construction'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'With render heavy JS enabled', + renderHeavyJs: true, + expected: { render_heavy_js: true }, + }, + { + name: 'With render heavy JS disabled', + renderHeavyJs: false, + expected: null, // Should not be in payload when false + }, + { + name: 'With render heavy JS and other parameters', + renderHeavyJs: true, + numberOfScrolls: 10, + totalPages: 3, + expected: { render_heavy_js: true, number_of_scrolls: 10, total_pages: 3 }, + }, + ]; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. Testing: ${testCase.name}`); + + // Simulate payload construction + const payload = { + website_url: 'https://example.com', + user_prompt: 'Find the CEO of company X and their contact details', + plain_text: false, + }; + + // Add renderHeavyJs if true (mimicking the actual implementation) + if (testCase.renderHeavyJs) { + payload.render_heavy_js = testCase.renderHeavyJs; + } + + if (testCase.numberOfScrolls !== undefined && testCase.numberOfScrolls !== null) { + payload.number_of_scrolls = testCase.numberOfScrolls; + } + + if (testCase.totalPages !== undefined && testCase.totalPages !== null) { + payload.total_pages = testCase.totalPages; + } + + console.log(' ๐Ÿ“ฆ Payload:', JSON.stringify(payload, null, 2)); + + // Verify expected behavior + if (testCase.renderHeavyJs) { + if (payload.render_heavy_js === true) { + console.log(' โœ… PASS - render_heavy_js included in payload when true'); + } else { + console.log(' โŒ FAIL - render_heavy_js not included in payload when expected'); + } + } else { + if (!payload.hasOwnProperty('render_heavy_js')) { + console.log(' โœ… PASS - render_heavy_js excluded from payload when false'); + } else { + console.log(' โŒ FAIL - render_heavy_js included in payload when it should be excluded'); + } + } + }); +} + +/** + * Test backward compatibility + */ +function testBackwardCompatibility() { + console.log('\n๐Ÿงช Testing Backward Compatibility'); + console.log('='.repeat(50)); + + console.log('1. Testing existing function calls without renderHeavyJs'); + console.log(' - smartScraper(apiKey, url, prompt) should work'); + console.log(' - smartScraper(apiKey, url, prompt, schema) should work'); + console.log(' - smartScraper(apiKey, url, prompt, schema, numberOfScrolls, totalPages) should work'); + console.log(' โœ… PASS - All existing signatures remain compatible'); + + console.log('\n2. Testing default behavior'); + console.log(' - When renderHeavyJs is not provided, should default to false'); + console.log(' - When renderHeavyJs is false, should not include render_heavy_js in payload'); + console.log(' โœ… PASS - Default behavior preserved'); + + console.log('\n3. Testing new functionality'); + console.log(' - When renderHeavyJs is true, should include render_heavy_js: true in payload'); + console.log(' - Should work alongside existing parameters like numberOfScrolls and totalPages'); + console.log(' โœ… PASS - New functionality works as expected'); +} + +/** + * Test real-world usage examples + */ +function testUsageExamples() { + console.log('\n๐Ÿงช Testing Real-world Usage Examples'); + console.log('='.repeat(50)); + + const examples = [ + { + name: 'CEO and contact extraction with heavy JS', + description: 'Extract CEO information from JavaScript-heavy company pages', + usage: 'await smartScraper(apiKey, url, "Find the CEO and their contact details", null, null, null, null, {}, false, true)', + }, + { + name: 'E-commerce product data with heavy JS', + description: 'Extract product information from dynamic e-commerce sites', + usage: 'await smartScraper(apiKey, url, "Extract product details and prices", ProductSchema, 5, null, null, {}, false, true)', + }, + { + name: 'Social media content with heavy JS', + description: 'Extract posts and comments from social media platforms', + usage: 'await smartScraper(apiKey, url, "Extract recent posts and engagement", null, 10, 3, cookies, {}, false, true)', + }, + ]; + + examples.forEach((example, index) => { + console.log(`\n${index + 1}. ${example.name}`); + console.log(` Use case: ${example.description}`); + console.log(` Usage: ${example.usage}`); + console.log(' โœ… Valid usage pattern'); + }); +} + +/** + * Main test runner + */ +function runTests() { + console.log('๐Ÿš€ ScrapeGraph JS SDK - SmartScraper Render Heavy JS Tests'); + console.log('='.repeat(60)); + + if (!process.env.SGAI_APIKEY) { + console.log('โš ๏ธ Note: SGAI_APIKEY not set - using mock key for validation tests'); + } + + const results = { + validation: testRenderHeavyJsValidation(), + signature: testFunctionSignature(), + payload: testPayloadConstruction(), + compatibility: testBackwardCompatibility(), + examples: testUsageExamples(), + }; + + console.log('\n' + '='.repeat(60)); + console.log('๐Ÿ“Š Test Summary'); + console.log('='.repeat(60)); + console.log('โœ… Parameter Validation Tests: Completed'); + console.log('โœ… Function Signature Tests: Completed'); + console.log('โœ… Payload Construction Tests: Completed'); + console.log('โœ… Backward Compatibility Tests: Completed'); + console.log('โœ… Usage Examples Tests: Completed'); + + const totalPassed = results.validation.passed; + const totalFailed = results.validation.failed; + + console.log(`\n๐Ÿ“Š Overall Results: ${totalPassed} passed, ${totalFailed} failed`); + + if (totalFailed === 0) { + console.log('๐ŸŽ‰ All tests passed!'); + } else { + console.log('โš ๏ธ Some tests failed - please review the results above'); + } + + console.log('\n๐Ÿ’ก Usage Examples:'); + console.log('// Basic render heavy JS'); + console.log('await smartScraper(apiKey, url, prompt, null, null, null, null, {}, false, true);'); + console.log(''); + console.log('// Render heavy JS with schema'); + console.log('await smartScraper(apiKey, url, prompt, schema, null, null, null, {}, false, true);'); + console.log(''); + console.log('// Render heavy JS with scrolling and pagination'); + console.log('await smartScraper(apiKey, url, prompt, null, 10, 3, null, {}, false, true);'); + console.log(''); + console.log('// All features combined'); + console.log('await smartScraper(apiKey, url, prompt, schema, 5, 3, cookies, {}, false, true);'); + + console.log('\n๐Ÿ”ง Next Steps:'); + console.log('1. Set SGAI_APIKEY environment variable for real API testing'); + console.log('2. Run the render heavy example file: smartScraper_render_heavy_example.js'); + console.log('3. Test with JavaScript-heavy websites that require full rendering'); + console.log('4. Compare results with renderHeavyJs=false vs renderHeavyJs=true'); + + console.log('\nโš ๏ธ When to use renderHeavyJs=true:'); + console.log('- Single Page Applications (SPAs)'); + console.log('- Sites with dynamic content loading'); + console.log('- JavaScript-generated content'); + console.log('- AJAX-heavy applications'); + console.log('- Sites requiring full DOM rendering'); + + return totalFailed === 0; +} + +// Run the tests +const success = runTests(); +process.exit(success ? 0 : 1); \ No newline at end of file diff --git a/scrapegraph-js/test/stealth_mode_test.js b/scrapegraph-js/test/stealth_mode_test.js new file mode 100644 index 0000000..2d17244 --- /dev/null +++ b/scrapegraph-js/test/stealth_mode_test.js @@ -0,0 +1,626 @@ +import { + smartScraper, + searchScraper, + markdownify, + scrape, + agenticScraper, + crawl, +} from '../index.js'; +import 'dotenv/config'; + +/** + * Test suite for Stealth Mode functionality + * This file demonstrates usage and validates stealth mode parameters across all endpoints + */ + +// Mock API key for testing (replace with real key for actual testing) +const API_KEY = process.env.SGAI_APIKEY || 'test-api-key'; + +/** + * Test input validation for stealth mode + */ +function testStealthModeValidation() { + console.log('๐Ÿงช Testing Stealth Mode Input Validation'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'Valid stealth mode - true', + stealth: true, + expected: true, + description: 'Stealth mode enabled (boolean true)' + }, + { + name: 'Valid stealth mode - false', + stealth: false, + expected: true, + description: 'Stealth mode disabled (boolean false)' + }, + { + name: 'Valid stealth mode - undefined (default)', + stealth: undefined, + expected: true, + description: 'Stealth mode not specified (should default to false)' + }, + { + name: 'Invalid stealth mode - string', + stealth: 'true', + expected: false, + description: 'Stealth mode as string instead of boolean' + }, + { + name: 'Invalid stealth mode - number', + stealth: 1, + expected: false, + description: 'Stealth mode as number instead of boolean' + }, + { + name: 'Invalid stealth mode - object', + stealth: {}, + expected: false, + description: 'Stealth mode as object instead of boolean' + }, + ]; + + let passed = 0; + let total = testCases.length; + + testCases.forEach((testCase, index) => { + console.log(`\n${index + 1}. ${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + try { + const isValid = validateStealthMode(testCase.stealth); + + if (isValid === testCase.expected) { + console.log(` โœ… PASSED`); + passed++; + } else { + console.log(` โŒ FAILED - Expected: ${testCase.expected}, Got: ${isValid}`); + } + } catch (error) { + if (!testCase.expected) { + console.log(` โœ… PASSED (Expected error: ${error.message})`); + passed++; + } else { + console.log(` โŒ FAILED - Unexpected error: ${error.message}`); + } + } + }); + + console.log(`\n๐Ÿ“Š Stealth Mode Validation Results: ${passed}/${total} tests passed`); + return passed === total; +} + +/** + * Validate stealth mode parameter + */ +function validateStealthMode(stealth) { + if (stealth !== undefined && typeof stealth !== 'boolean') { + throw new Error('Stealth mode must be a boolean value (true or false)'); + } + return true; +} + +/** + * Test SmartScraper with stealth mode + */ +async function testSmartScraperWithStealth() { + console.log('\n๐Ÿงช Testing SmartScraper with Stealth Mode'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'SmartScraper with stealth=true', + options: { stealth: true }, + description: 'Test smartScraper with stealth mode enabled' + }, + { + name: 'SmartScraper with stealth=false', + options: { stealth: false }, + description: 'Test smartScraper with stealth mode disabled' + }, + { + name: 'SmartScraper without stealth parameter', + options: {}, + description: 'Test smartScraper without stealth parameter (defaults to false)' + }, + ]; + + let passed = 0; + + for (const testCase of testCases) { + console.log(`\n${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + try { + // Mock function to avoid actual API calls + const mockSmartScraper = async (apiKey, url, prompt, schema, numScrolls, totalPages, cookies, options) => { + // Validate that stealth parameter is boolean if provided + if (options.stealth !== undefined && typeof options.stealth !== 'boolean') { + throw new Error('Stealth must be a boolean'); + } + + return { + request_id: 'mock-request-id', + status: 'completed', + result: { data: 'mock data' } + }; + }; + + const result = await mockSmartScraper( + API_KEY, + 'https://example.com', + 'Extract data', + null, + null, + null, + null, + testCase.options + ); + + console.log(` โœ… PASSED - Status: ${result.status}`); + passed++; + } catch (error) { + console.log(` โŒ FAILED - Error: ${error.message}`); + } + } + + console.log(`\n๐Ÿ“Š SmartScraper Stealth Tests: ${passed}/${testCases.length} passed`); + return passed === testCases.length; +} + +/** + * Test SearchScraper with stealth mode + */ +async function testSearchScraperWithStealth() { + console.log('\n๐Ÿงช Testing SearchScraper with Stealth Mode'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'SearchScraper with stealth=true', + options: { stealth: true }, + description: 'Test searchScraper with stealth mode enabled' + }, + { + name: 'SearchScraper with stealth=false', + options: { stealth: false }, + description: 'Test searchScraper with stealth mode disabled' + }, + ]; + + let passed = 0; + + for (const testCase of testCases) { + console.log(`\n${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + try { + // Mock function + const mockSearchScraper = async (apiKey, prompt, numResults, schema, userAgent, options) => { + if (options.stealth !== undefined && typeof options.stealth !== 'boolean') { + throw new Error('Stealth must be a boolean'); + } + + return { + request_id: 'mock-request-id', + status: 'completed', + result: { answer: 'mock answer' } + }; + }; + + const result = await mockSearchScraper( + API_KEY, + 'Search query', + 3, + null, + null, + testCase.options + ); + + console.log(` โœ… PASSED - Status: ${result.status}`); + passed++; + } catch (error) { + console.log(` โŒ FAILED - Error: ${error.message}`); + } + } + + console.log(`\n๐Ÿ“Š SearchScraper Stealth Tests: ${passed}/${testCases.length} passed`); + return passed === testCases.length; +} + +/** + * Test Markdownify with stealth mode + */ +async function testMarkdownifyWithStealth() { + console.log('\n๐Ÿงช Testing Markdownify with Stealth Mode'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'Markdownify with stealth=true', + options: { stealth: true }, + description: 'Test markdownify with stealth mode enabled' + }, + { + name: 'Markdownify with stealth=false', + options: { stealth: false }, + description: 'Test markdownify with stealth mode disabled' + }, + ]; + + let passed = 0; + + for (const testCase of testCases) { + console.log(`\n${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + try { + // Mock function + const mockMarkdownify = async (apiKey, url, options) => { + if (options.stealth !== undefined && typeof options.stealth !== 'boolean') { + throw new Error('Stealth must be a boolean'); + } + + return { + request_id: 'mock-request-id', + status: 'completed', + result: '# Markdown content' + }; + }; + + const result = await mockMarkdownify( + API_KEY, + 'https://example.com', + testCase.options + ); + + console.log(` โœ… PASSED - Status: ${result.status}`); + passed++; + } catch (error) { + console.log(` โŒ FAILED - Error: ${error.message}`); + } + } + + console.log(`\n๐Ÿ“Š Markdownify Stealth Tests: ${passed}/${testCases.length} passed`); + return passed === testCases.length; +} + +/** + * Test Scrape with stealth mode + */ +async function testScrapeWithStealth() { + console.log('\n๐Ÿงช Testing Scrape with Stealth Mode'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'Scrape with stealth=true', + options: { stealth: true }, + description: 'Test scrape with stealth mode enabled' + }, + { + name: 'Scrape with stealth=false', + options: { stealth: false }, + description: 'Test scrape with stealth mode disabled' + }, + { + name: 'Scrape with stealth=true and renderHeavyJs=true', + options: { stealth: true, renderHeavyJs: true }, + description: 'Test scrape with both stealth and heavy JS rendering' + }, + ]; + + let passed = 0; + + for (const testCase of testCases) { + console.log(`\n${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + try { + // Mock function + const mockScrape = async (apiKey, url, options) => { + if (options.stealth !== undefined && typeof options.stealth !== 'boolean') { + throw new Error('Stealth must be a boolean'); + } + + return { + scrape_request_id: 'mock-request-id', + status: 'completed', + html: 'Mock content' + }; + }; + + const result = await mockScrape( + API_KEY, + 'https://example.com', + testCase.options + ); + + console.log(` โœ… PASSED - Status: ${result.status}`); + passed++; + } catch (error) { + console.log(` โŒ FAILED - Error: ${error.message}`); + } + } + + console.log(`\n๐Ÿ“Š Scrape Stealth Tests: ${passed}/${testCases.length} passed`); + return passed === testCases.length; +} + +/** + * Test Agentic Scraper with stealth mode + */ +async function testAgenticScraperWithStealth() { + console.log('\n๐Ÿงช Testing Agentic Scraper with Stealth Mode'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'AgenticScraper with stealth=true', + options: { stealth: true }, + description: 'Test agenticScraper with stealth mode enabled' + }, + { + name: 'AgenticScraper with stealth=false', + options: { stealth: false }, + description: 'Test agenticScraper with stealth mode disabled' + }, + { + name: 'AgenticScraper with stealth and AI extraction', + options: { stealth: true }, + aiExtraction: true, + userPrompt: 'Extract user data', + description: 'Test agenticScraper with stealth and AI extraction' + }, + ]; + + let passed = 0; + + for (const testCase of testCases) { + console.log(`\n${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + try { + // Mock function + const mockAgenticScraper = async (apiKey, url, steps, useSession, userPrompt, outputSchema, aiExtraction, options) => { + if (options.stealth !== undefined && typeof options.stealth !== 'boolean') { + throw new Error('Stealth must be a boolean'); + } + + return { + request_id: 'mock-request-id', + status: 'processing', + message: 'Agentic scraping started' + }; + }; + + const result = await mockAgenticScraper( + API_KEY, + 'https://example.com', + ['Click button', 'Extract data'], + true, + testCase.userPrompt || null, + null, + testCase.aiExtraction || false, + testCase.options + ); + + console.log(` โœ… PASSED - Status: ${result.status}`); + passed++; + } catch (error) { + console.log(` โŒ FAILED - Error: ${error.message}`); + } + } + + console.log(`\n๐Ÿ“Š AgenticScraper Stealth Tests: ${passed}/${testCases.length} passed`); + return passed === testCases.length; +} + +/** + * Test Crawl with stealth mode + */ +async function testCrawlWithStealth() { + console.log('\n๐Ÿงช Testing Crawl with Stealth Mode'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'Crawl with stealth=true', + options: { stealth: true }, + description: 'Test crawl with stealth mode enabled' + }, + { + name: 'Crawl with stealth=false', + options: { stealth: false }, + description: 'Test crawl with stealth mode disabled' + }, + { + name: 'Crawl with stealth and sitemap', + options: { stealth: true, sitemap: true }, + description: 'Test crawl with stealth mode and sitemap enabled' + }, + ]; + + let passed = 0; + + for (const testCase of testCases) { + console.log(`\n${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + try { + // Mock function + const mockCrawl = async (apiKey, url, prompt, schema, options) => { + if (options.stealth !== undefined && typeof options.stealth !== 'boolean') { + throw new Error('Stealth must be a boolean'); + } + + return { + id: 'mock-crawl-id', + status: 'processing', + message: 'Crawl job started' + }; + }; + + const result = await mockCrawl( + API_KEY, + 'https://example.com', + 'Extract data', + { type: 'object', properties: { title: { type: 'string' } } }, + testCase.options + ); + + console.log(` โœ… PASSED - Status: ${result.status}`); + passed++; + } catch (error) { + console.log(` โŒ FAILED - Error: ${error.message}`); + } + } + + console.log(`\n๐Ÿ“Š Crawl Stealth Tests: ${passed}/${testCases.length} passed`); + return passed === testCases.length; +} + +/** + * Test combined features with stealth mode + */ +async function testCombinedFeaturesWithStealth() { + console.log('\n๐Ÿงช Testing Combined Features with Stealth Mode'); + console.log('='.repeat(50)); + + const testCases = [ + { + name: 'SmartScraper with stealth + headers + pagination', + endpoint: 'smartScraper', + options: { + stealth: true, + renderHeavyJs: true + }, + additionalParams: { + numberOfScrolls: 10, + totalPages: 5 + }, + description: 'Test smartScraper with stealth and multiple features' + }, + { + name: 'Scrape with stealth + headers + heavy JS', + endpoint: 'scrape', + options: { + stealth: true, + renderHeavyJs: true, + headers: { 'User-Agent': 'Test Agent' } + }, + description: 'Test scrape with stealth, custom headers, and JS rendering' + }, + { + name: 'SearchScraper with stealth + extraction mode', + endpoint: 'searchScraper', + options: { + stealth: true, + extractionMode: true, + renderHeavyJs: true + }, + description: 'Test searchScraper with stealth and extraction mode' + }, + ]; + + let passed = 0; + + for (const testCase of testCases) { + console.log(`\n${testCase.name}`); + console.log(` Description: ${testCase.description}`); + + try { + // Validate all options + if (testCase.options.stealth !== undefined && typeof testCase.options.stealth !== 'boolean') { + throw new Error('Stealth must be a boolean'); + } + if (testCase.options.renderHeavyJs !== undefined && typeof testCase.options.renderHeavyJs !== 'boolean') { + throw new Error('RenderHeavyJs must be a boolean'); + } + + console.log(` โœ… PASSED - All parameters validated successfully`); + passed++; + } catch (error) { + console.log(` โŒ FAILED - Error: ${error.message}`); + } + } + + console.log(`\n๐Ÿ“Š Combined Features Tests: ${passed}/${testCases.length} passed`); + return passed === testCases.length; +} + +/** + * Run all stealth mode tests + */ +async function runAllStealthTests() { + console.log('๐Ÿš€ Starting Stealth Mode Test Suite'); + console.log('='.repeat(60)); + console.log(`๐Ÿ”‘ API Key: ${API_KEY.substring(0, 8)}...`); + console.log(`โฐ Timestamp: ${new Date().toISOString()}\n`); + + const tests = [ + { name: 'Stealth Mode Validation', fn: testStealthModeValidation }, + { name: 'SmartScraper with Stealth', fn: testSmartScraperWithStealth }, + { name: 'SearchScraper with Stealth', fn: testSearchScraperWithStealth }, + { name: 'Markdownify with Stealth', fn: testMarkdownifyWithStealth }, + { name: 'Scrape with Stealth', fn: testScrapeWithStealth }, + { name: 'AgenticScraper with Stealth', fn: testAgenticScraperWithStealth }, + { name: 'Crawl with Stealth', fn: testCrawlWithStealth }, + { name: 'Combined Features with Stealth', fn: testCombinedFeaturesWithStealth }, + ]; + + let passed = 0; + let total = tests.length; + + for (const test of tests) { + try { + const result = await test.fn(); + if (result) { + passed++; + } + } catch (error) { + console.error(`โŒ Test '${test.name}' failed with error: ${error.message}`); + } + console.log('\n' + '-'.repeat(60)); + } + + console.log('\n๐ŸŽฏ FINAL TEST RESULTS'); + console.log('='.repeat(30)); + console.log(`โœ… Passed: ${passed}`); + console.log(`โŒ Failed: ${total - passed}`); + console.log(`๐Ÿ“Š Success Rate: ${((passed / total) * 100).toFixed(1)}%`); + + if (passed === total) { + console.log('\n๐ŸŽ‰ All stealth mode tests passed! Functionality is working correctly.'); + return 0; + } else { + console.log('\nโš ๏ธ Some tests failed. Please review the output above.'); + return 1; + } +} + +// Run tests if this file is executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + runAllStealthTests() + .then(exitCode => { + process.exit(exitCode); + }) + .catch(error => { + console.error('๐Ÿ’ฅ Fatal error during test execution:', error.message); + process.exit(1); + }); +} + +export { + testStealthModeValidation, + testSmartScraperWithStealth, + testSearchScraperWithStealth, + testMarkdownifyWithStealth, + testScrapeWithStealth, + testAgenticScraperWithStealth, + testCrawlWithStealth, + testCombinedFeaturesWithStealth, + runAllStealthTests +}; diff --git a/scrapegraph-js/test_cookies_integration.js b/scrapegraph-js/test_cookies_integration.js new file mode 100644 index 0000000..bd885b4 --- /dev/null +++ b/scrapegraph-js/test_cookies_integration.js @@ -0,0 +1,92 @@ +/** + * Test file to verify cookies integration functionality. + */ + +import { smartScraper } from './src/smartScraper.js'; + +function testCookiesIntegration() { + console.log('๐Ÿงช Testing Cookies Integration'); + console.log('='.repeat(50)); + + // Test 1: Basic cookies validation + console.log('\n1. Testing basic cookies validation...'); + + const cookies = { session_id: 'abc123', auth_token: 'xyz789' }; + + // Create a mock payload to test the logic + const mockPayload = { + website_url: 'https://httpbin.org/cookies', + user_prompt: 'Extract cookie information' + }; + + // Simulate the cookies validation logic + if (cookies) { + if (typeof cookies === 'object' && cookies !== null) { + mockPayload.cookies = cookies; + console.log('โœ… Cookies validation passed'); + console.log(`โœ… Cookies included: ${JSON.stringify(mockPayload.cookies)}`); + } else { + console.log('โŒ Cookies validation failed - not an object'); + } + } + + // Test 2: Complex cookies scenario + console.log('\n2. Testing complex cookies scenario...'); + + const complexCookies = { + session_id: 'abc123def456', + user_id: 'user789', + auth_token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', + preferences: 'dark_mode,usd', + cart_id: 'cart101112', + csrf_token: 'csrf_xyz789' + }; + + const complexPayload = { + website_url: 'https://example.com/dashboard', + user_prompt: 'Extract user profile and preferences' + }; + + if (complexCookies) { + if (typeof complexCookies === 'object' && complexCookies !== null) { + complexPayload.cookies = complexCookies; + console.log('โœ… Complex cookies validation passed'); + console.log(`โœ… Complex cookies count: ${Object.keys(complexPayload.cookies).length}`); + } + } + + // Test 3: Invalid cookies + console.log('\n3. Testing invalid cookies...'); + + const invalidCookies = 'not_an_object'; + + try { + if (invalidCookies) { + if (typeof invalidCookies === 'object' && invalidCookies !== null) { + console.log('โŒ Should have failed validation'); + } else { + console.log('โœ… Invalid cookies correctly rejected'); + } + } + } catch (error) { + console.log('โœ… Error handling works correctly'); + } + + // Test 4: Function signature validation + console.log('\n4. Testing function signature...'); + + // Check if the function accepts the cookies parameter + const functionString = smartScraper.toString(); + if (functionString.includes('cookies = null')) { + console.log('โœ… Function signature includes cookies parameter'); + } else { + console.log('โŒ Function signature missing cookies parameter'); + } + + console.log('\n' + '='.repeat(50)); + console.log('โœ… All cookies integration tests completed!'); + console.log('='.repeat(50)); +} + +// Run the test +testCookiesIntegration(); diff --git a/scrapegraph-js/test_schema_generation.js b/scrapegraph-js/test_schema_generation.js new file mode 100644 index 0000000..e2670b0 --- /dev/null +++ b/scrapegraph-js/test_schema_generation.js @@ -0,0 +1,187 @@ +#!/usr/bin/env node +/** + * Simple test for schema generation functionality in JavaScript SDK. + * + * This script tests the basic schema generation functions. + */ + +import { generateSchema, getSchemaStatus, pollSchemaGeneration } from './src/schema.js'; + +function testSchemaFunctions() { + console.log('๐Ÿงช Testing Schema Generation Functions...'); + + // Test 1: Check if functions are exported correctly + console.log('\n1. Testing function exports...'); + + if (typeof generateSchema === 'function') { + console.log('โœ… generateSchema function exported correctly'); + } else { + console.log('โŒ generateSchema function not exported correctly'); + return false; + } + + if (typeof getSchemaStatus === 'function') { + console.log('โœ… getSchemaStatus function exported correctly'); + } else { + console.log('โŒ getSchemaStatus function not exported correctly'); + return false; + } + + if (typeof pollSchemaGeneration === 'function') { + console.log('โœ… pollSchemaGeneration function exported correctly'); + } else { + console.log('โŒ pollSchemaGeneration function not exported correctly'); + return false; + } + + // Test 2: Check function signatures + console.log('\n2. Testing function signatures...'); + + try { + // Test generateSchema parameter validation + const testPrompt = 'Find laptops with brand, processor, and RAM'; + const testSchema = { type: 'object', properties: { name: { type: 'string' } } }; + + // These should not throw errors for parameter validation + console.log('โœ… Function signatures are correct'); + + } catch (error) { + console.log(`โŒ Function signature error: ${error.message}`); + return false; + } + + // Test 3: Test error handling for invalid inputs + console.log('\n3. Testing error handling...'); + + // Test with empty prompt (this should be handled by the API, not the function) + console.log('โœ… Error handling structure is correct'); + + console.log('\n๐ŸŽ‰ All basic function tests passed!'); + return true; +} + +function testValidationLogic() { + console.log('\n๐Ÿงช Testing Validation Logic...'); + + // Test 1: UUID validation regex + console.log('\n1. Testing UUID validation regex...'); + + const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; + + const validUUIDs = [ + '123e4567-e89b-12d3-a456-426614174000', + '550e8400-e29b-41d4-a716-446655440000', + '6ba7b810-9dad-11d1-80b4-00c04fd430c8' + ]; + + const invalidUUIDs = [ + 'invalid-uuid', + '123e4567-e89b-12d3-a456-42661417400', // too short + '123e4567-e89b-12d3-a456-4266141740000', // too long + '123e4567-e89b-12d3-a456-42661417400g' // invalid character + ]; + + let allValidPassed = true; + for (const uuid of validUUIDs) { + if (!uuidRegex.test(uuid)) { + console.log(`โŒ Valid UUID failed validation: ${uuid}`); + allValidPassed = false; + } + } + + let allInvalidPassed = true; + for (const uuid of invalidUUIDs) { + if (uuidRegex.test(uuid)) { + console.log(`โŒ Invalid UUID passed validation: ${uuid}`); + allInvalidPassed = false; + } + } + + if (allValidPassed && allInvalidPassed) { + console.log('โœ… UUID validation regex works correctly'); + } else { + console.log('โŒ UUID validation regex has issues'); + return false; + } + + console.log('\n๐ŸŽ‰ All validation logic tests passed!'); + return true; +} + +function testAsyncFunctionStructure() { + console.log('\n๐Ÿงช Testing Async Function Structure...'); + + // Test 1: Check if functions return promises + console.log('\n1. Testing async function structure...'); + + try { + // These should return promises (even if they fail due to missing API key) + const generatePromise = generateSchema('test', null, { apiKey: 'test' }); + const statusPromise = getSchemaStatus('123e4567-e89b-12d3-a456-426614174000', { apiKey: 'test' }); + const pollPromise = pollSchemaGeneration('123e4567-e89b-12d3-a456-426614174000', { apiKey: 'test' }); + + if (generatePromise instanceof Promise) { + console.log('โœ… generateSchema returns a Promise'); + } else { + console.log('โŒ generateSchema does not return a Promise'); + return false; + } + + if (statusPromise instanceof Promise) { + console.log('โœ… getSchemaStatus returns a Promise'); + } else { + console.log('โŒ getSchemaStatus does not return a Promise'); + return false; + } + + if (pollPromise instanceof Promise) { + console.log('โœ… pollSchemaGeneration returns a Promise'); + } else { + console.log('โŒ pollSchemaGeneration does not return a Promise'); + return false; + } + + } catch (error) { + console.log(`โŒ Error testing async structure: ${error.message}`); + return false; + } + + console.log('\n๐ŸŽ‰ All async function structure tests passed!'); + return true; +} + +async function main() { + console.log('๐Ÿš€ Schema Generation Test Suite - JavaScript SDK'); + console.log('='.repeat(50)); + + // Test basic functions + if (!testSchemaFunctions()) { + console.log('\nโŒ Function tests failed!'); + return; + } + + // Test validation logic + if (!testValidationLogic()) { + console.log('\nโŒ Validation logic tests failed!'); + return; + } + + // Test async function structure + if (!testAsyncFunctionStructure()) { + console.log('\nโŒ Async function structure tests failed!'); + return; + } + + console.log('\n๐ŸŽ‰ All tests passed successfully!'); + console.log('\n๐Ÿ“‹ Summary:'); + console.log(' โœ… All schema generation functions exported correctly'); + console.log(' โœ… Function signatures are correct'); + console.log(' โœ… Error handling structure is correct'); + console.log(' โœ… UUID validation regex works correctly'); + console.log(' โœ… All functions return Promises (async)'); + console.log('\n๐Ÿ’ก Note: These are structural tests only.'); + console.log(' To test actual API functionality, you need a valid API key.'); +} + +// Run the tests +main().catch(console.error); From 6727e958cdd487836bf59f8738f43dd0b2353522 Mon Sep 17 00:00:00 2001 From: Vikrant-Khedkar Date: Fri, 14 Nov 2025 18:49:34 +0530 Subject: [PATCH 2/3] chore(security): sanitize example cookies and test literals to avoid secret detection --- scrapegraph-js/README.md | 6 +-- .../cookies/cookies_integration_example.js | 40 +++++++++---------- .../smartScraper_cookies_simple_example.js | 6 +-- scrapegraph-js/test/healthz_test.js | 2 +- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/scrapegraph-js/README.md b/scrapegraph-js/README.md index 2d2e19e..5cfc0b6 100644 --- a/scrapegraph-js/README.md +++ b/scrapegraph-js/README.md @@ -230,9 +230,9 @@ const prompt = 'Extract user profile information'; // Define cookies for authentication const cookies = { - session_id: 'abc123def456', - auth_token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', - user_preferences: 'dark_mode,usd' + session_id: '', + auth_token: '', + user_preferences: '' }; (async () => { diff --git a/scrapegraph-js/examples/advanced_features/cookies/cookies_integration_example.js b/scrapegraph-js/examples/advanced_features/cookies/cookies_integration_example.js index 7d54f49..54d4a3b 100644 --- a/scrapegraph-js/examples/advanced_features/cookies/cookies_integration_example.js +++ b/scrapegraph-js/examples/advanced_features/cookies/cookies_integration_example.js @@ -58,11 +58,11 @@ async function scrapeEcommerceWithAuth() { // Example cookies for an e-commerce site const cookies = { - session_id: 'abc123def456', - user_id: 'user789', - cart_id: 'cart101112', - preferences: 'dark_mode,usd', - auth_token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...' + session_id: '', + user_id: '', + cart_id: '', + preferences: '', + auth_token: '' }; const websiteUrl = 'https://example-ecommerce.com/products'; @@ -94,11 +94,11 @@ async function scrapeSocialMediaWithSession() { // Example cookies for a social media site const cookies = { - session_token: 'xyz789abc123', - user_session: 'def456ghi789', - csrf_token: 'jkl012mno345', - remember_me: 'true', - language: 'en_US' + session_token: '', + user_session: '', + csrf_token: '', + remember_me: '', + language: '' }; const websiteUrl = 'https://example-social.com/feed'; @@ -166,11 +166,11 @@ async function scrapeBankingWithSecureCookies() { // Example secure cookies for a banking site const cookies = { - secure_session: 'pqr678stu901', - auth_token: 'vwx234yz567', - mfa_verified: 'true', - device_id: 'device_abc123', - last_activity: '2024-01-15T10:30:00Z' + secure_session: '', + auth_token: '', + mfa_verified: '', + device_id: '', + last_activity: '' }; const websiteUrl = 'https://example-bank.com/transactions'; @@ -202,11 +202,11 @@ async function scrapeApiWithAuthTokens() { // Example API authentication cookies const cookies = { - api_token: 'api_abc123def456', - client_id: 'client_789', - access_token: 'access_xyz789', - refresh_token: 'refresh_abc123', - scope: 'read:all' + api_token: '', + client_id: '', + access_token: '', + refresh_token: '', + scope: '' }; const websiteUrl = 'https://api.example.com/data'; diff --git a/scrapegraph-js/examples/smartscraper/smartScraper_cookies_simple_example.js b/scrapegraph-js/examples/smartscraper/smartScraper_cookies_simple_example.js index 369987f..4987294 100644 --- a/scrapegraph-js/examples/smartscraper/smartScraper_cookies_simple_example.js +++ b/scrapegraph-js/examples/smartscraper/smartScraper_cookies_simple_example.js @@ -11,9 +11,9 @@ const apiKey = process.env.SGAI_APIKEY; // Example cookies for authentication const cookies = { - session_id: 'abc123def456', - auth_token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', - user_preferences: 'dark_mode,usd' + session_id: '', + auth_token: '', + user_preferences: '' }; async function scrapeWithCookies() { diff --git a/scrapegraph-js/test/healthz_test.js b/scrapegraph-js/test/healthz_test.js index ce8e860..3f636ad 100644 --- a/scrapegraph-js/test/healthz_test.js +++ b/scrapegraph-js/test/healthz_test.js @@ -141,7 +141,7 @@ function testInputValidation() { const testCases = [ { name: 'Valid API key', - apiKey: 'sgai-valid-key-123', + apiKey: 'dummy-api-key', expected: true, description: 'Should accept valid API key' }, From 997ae16f92e2e69b0807a04d87aa7d2edf0342ea Mon Sep 17 00:00:00 2001 From: Vikrant-Khedkar Date: Fri, 14 Nov 2025 18:57:05 +0530 Subject: [PATCH 3/3] refactor: update console log formatting in advanced and complete agentic scraper examples to use repeat method --- .../agenticscraper/agenticScraper_advanced_example.js | 4 ++-- .../agenticscraper/agenticScraper_complete_example.js | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scrapegraph-js/examples/agenticscraper/agenticScraper_advanced_example.js b/scrapegraph-js/examples/agenticscraper/agenticScraper_advanced_example.js index bc5eab1..f8aed5f 100644 --- a/scrapegraph-js/examples/agenticscraper/agenticScraper_advanced_example.js +++ b/scrapegraph-js/examples/agenticscraper/agenticScraper_advanced_example.js @@ -8,7 +8,7 @@ const apiKey = process.env.SGAI_APIKEY; */ async function advancedAgenticScrapingExample() { console.log('๐Ÿš€ Advanced Agentic Scraping Example'); - console.log('=' * 45); + console.log('='.repeat(45)); // Example configurations for different scenarios const scenarios = [ @@ -247,7 +247,7 @@ async function monitorRequest(requestId, timeoutSeconds = 120) { */ async function errorHandlingExamples() { console.log('\n๐Ÿ›ก๏ธ Error Handling Examples'); - console.log('=' * 30); + console.log('='.repeat(30)); const errorScenarios = [ { diff --git a/scrapegraph-js/examples/agenticscraper/agenticScraper_complete_example.js b/scrapegraph-js/examples/agenticscraper/agenticScraper_complete_example.js index e848207..2a14be8 100644 --- a/scrapegraph-js/examples/agenticscraper/agenticScraper_complete_example.js +++ b/scrapegraph-js/examples/agenticscraper/agenticScraper_complete_example.js @@ -9,7 +9,7 @@ const apiKey = process.env.SGAI_APIKEY; */ async function completeAgenticScrapingExample() { console.log('๐Ÿค– Starting Complete Agentic Scraping Example'); - console.log('=' * 50); + console.log('='.repeat(50)); // Configuration const url = 'https://dashboard.scrapegraphai.com/'; @@ -78,7 +78,7 @@ async function completeAgenticScrapingExample() { */ async function ecommerceAutomationExample() { console.log('\n๐Ÿ›’ E-commerce Automation Example'); - console.log('=' * 40); + console.log('='.repeat(40)); const url = 'https://example-shop.com'; const steps = [