feat: initial polysearch - multi-engine search with proxy distribution, circuit breaker, REST API, and metrics

This commit is contained in:
amancca 2026-06-05 14:39:11 +03:00
commit 5bd3b9d06b
28 changed files with 2780 additions and 0 deletions

11
.env.example Normal file
View file

@ -0,0 +1,11 @@
# Webshare.io API key for automatic proxy fetching
# Get yours at https://www.webshare.io/user/api
WEBSHARE_API_KEY=
# Oxylabs datacenter proxies (dc.oxylabs.io:8000)
# https://developers.oxylabs.io/proxies/datacenter-proxies
OXYLABS_USERNAME=
OXYLABS_PASSWORD=
OXYLABS_COUNTRY=US
# Future: Add other proxy providers here (BrightData, Smartproxy, etc.)

5
.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
node_modules/
.env
config.json
proxies.txt
*.log

281
API.md Normal file
View file

@ -0,0 +1,281 @@
# PolySearch API
A search API for AI agents. Submit queries, get structured results. Supports single and batch requests.
**Base URL:** `http://<host>:9876`
**Auth:** All endpoints except `/health` require a Bearer token.
```http
Authorization: Bearer <api_key>
```
---
## Quick reference
| Endpoint | Method | Auth | Purpose |
|----------|--------|------|---------|
| `/health` | GET | No | Ping the server |
| `/search` | POST | Yes | One query, one response |
| `/batch` | POST | Yes | Multiple queries, one response |
| `/metrics` | GET | Yes | Proxy pool health and usage |
---
## Authentication
The server operator provides an API key. Include it in every request (except `/health`):
```bash
curl -H "Authorization: Bearer <key>" http://localhost:9876/search ...
```
---
## `GET /health`
Check if the server is running.
```bash
curl http://localhost:9876/health
```
```json
{
"success": true,
"status": "ok",
"uptime": 42.5,
"proxyPool": {
"total": 11,
"alive": 11,
"dead": 0,
"requestsTotal": 47,
"successRate": "91.5%",
"hourlyUsage": 12
}
}
```
---
## `POST /search`
Single search. Returns either image results, web results, or both.
```bash
curl -X POST http://localhost:9876/search \
-H "Authorization: Bearer <key>" \
-H "Content-Type: application/json" \
-d '{"query": "mars rover", "type": "image", "limit": 3}'
```
### Request
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `query` | string | yes | — | What to search for |
| `type` | string | no | `image` | `image`, `web`, or `both` |
| `limit` | number | no | `10` | Max results per type (150) |
### Response — Image results
```json
{
"success": true,
"query": "mars rover",
"type": "image",
"execution_time_ms": 3200,
"image": {
"engine": "duckduckgo",
"total": 3,
"results": [
{
"index": 1,
"title": "NASA Perseverance Rover's Stunning Find...",
"image_url": "https://scitechdaily.com/images/...jpg",
"source_url": "https://scitechdaily.com/...",
"domain": "scitechdaily.com",
"width": 2560,
"height": 1818,
"thumbnail": "https://tse4.mm.bing.net/th?id=...",
"engine": "duckduckgo"
}
],
"statistics": {
"avg_width": 3200,
"avg_height": 1989,
"domains": {
"scitechdaily.com": 1,
"static1.simpleflyingimages.com": 2
}
}
}
}
```
Each image result contains:
| Field | Type | Description |
|-------|------|-------------|
| `title` | string | Image description or caption |
| `image_url` | string | Direct URL to the image file |
| `source_url` | string | Page the image was found on |
| `domain` | string | Hosting domain |
| `width` | number | Image width (pixels) |
| `height` | number | Image height (pixels) |
| `thumbnail` | string | Thumbnail URL |
| `engine` | string | Search engine used |
### Response — Web results
When `type` is `web` or `both`:
```json
{
"web": {
"engine": "duckduckgo",
"total": 3,
"results": [
{
"index": 1,
"title": "Quantum computing - Wikipedia",
"url": "https://en.wikipedia.org/wiki/Quantum_computing",
"domain": "en.wikipedia.org",
"snippet": "A quantum computer is a computer that exploits quantum mechanical phenomena...",
"engine": "duckduckgo"
}
],
"statistics": {
"domains": {
"en.wikipedia.org": 1
}
}
}
}
```
### Response — Error
```json
{
"success": false,
"error": {
"code": "ENGINE_FAILED",
"message": "All search engines returned errors",
"type": "Error"
},
"query": "obscure term",
"timestamp": "2026-06-05T10:55:25.088Z"
}
```
---
## `POST /batch`
Run 250 queries in a single request. Queries execute concurrently. Results return when all are complete.
```bash
curl -X POST http://localhost:9876/batch \
-H "Authorization: Bearer <key>" \
-H "Content-Type: application/json" \
-d '{
"queries": [
{"query": "vintage radio", "type": "image", "limit": 2},
{"query": "mars rover", "type": "image", "limit": 2},
{"query": "aurora borealis","type": "image", "limit": 2}
]
}'
```
### Request
| Field | Type | Required | Description |
|-------|------|----------|-------------|
| `queries` | array | yes | 250 query objects |
Each query object:
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `query` | string | yes | — | Search term |
| `type` | string | no | `image` | `image`, `web`, or `both` |
| `limit` | number | no | `10` | Max results per type |
### Response
```json
{
"success": true,
"batch_size": 3,
"execution_time_ms": 4511,
"ok": 3,
"fail": 0,
"results": [
{
"index": 0,
"query": "vintage radio",
"type": "image",
"success": true,
"execution_time_ms": 4313,
"results": { ... full search response ... },
"errors": []
}
],
"metrics": {
"hourly_usage": 7
}
}
```
Each result mirrors the single `/search` response. Failed queries include an `error` field instead of `results`.
Use `batch_size` to verify all queries were accepted, and `ok` / `fail` for a quick success count.
---
## `GET /metrics`
Proxy pool statistics. Useful for monitoring health and utilization.
```bash
curl -H "Authorization: Bearer <key>" http://localhost:9876/metrics
```
```json
{
"success": true,
"metrics": {
"totalProxies": 11,
"alive": 11,
"dead": 0,
"circuitOpen": 0,
"requestsTotal": 47,
"successTotal": 43,
"failureTotal": 4,
"successRate": "91.5%",
"hourlyUsageCurrent": 12,
"byProvider": {
"webshare": {
"proxyCount": 10,
"alive": 10,
"requests": 47,
"success": 43,
"failure": 4,
"avgLatencyMs": 3604,
"successRate": "91.5%",
"hourlyUsage": { "currentHour": 12 }
}
}
}
}
```
---
## Notes
- **Rate limits** depend on the proxy provider. The system distributes requests evenly and opens circuit breakers when proxies fail repeatedly.
- **Timeouts** vary. Image searches typically take 215 seconds depending on query and network conditions.
- **Max batch size** is 50 queries per request.

115
README.md Normal file
View file

@ -0,0 +1,115 @@
# PolySearch
Multi-engine web + image search with smart proxy distribution, circuit breakers, structured AI agent output, and a REST API.
```bash
node src/index.js -q "quantum computing" -t both -l 10 -m agent
```
## Features
- **Web + Image search** — both result types, one tool
- **Smart proxy distribution** — least-used selection per hour, balanced across providers
- **Circuit breaker per proxy** — exponential backoff on failure, auto-recovery
- **Multi-provider proxy system** — add Webshare, Oxylabs, BrightData in one file each
- **Multi-engine architecture** — add Brave, Bing, Google in one file each
- **Per-provider metrics** — requests, success rate, latency, hourly usage grouped by provider
- **Dual output modes**:
- `human` — colorized terminal
- `agent` — structured JSON with statistics
- **REST API** — single search, batch search, auth with API keys. See [API.md](API.md)
## Requirements
Node.js 18+
## Quick start
```bash
# Single image search
node src/index.js -q "vintage radio"
# Web search
node src/index.js -q "quantum computing" -t web
# Both types, AI agent JSON
node src/index.js -q "spacex starship" -t both -l 10 -m agent
# Show proxy metrics after a search
node src/index.js -q "mars rover" -M
```
## CLI
| Flag | Long | Description | Default |
|------|------|-------------|---------|
| `-q` | `--query` | Search query | — |
| `-t` | `--type` | `web`, `image`, or `both` | `image` |
| `-l` | `--limit` | Max results per type | `10` |
| `-m` | `--mode` | `human` or `agent` | `human` |
| `-p` | `--proxy` | Single proxy URL override | — |
| `-c` | `--config` | Path to config file | auto-detect |
| `-M` | `--metrics` | Dump proxy pool metrics | — |
| | `--serve` | Start REST API server | — |
| | `--port` | API server port | `9876` |
| | `--generate-key` | Generate API key | — |
| `-h` | `--help` | Show help | — |
## REST API
For AI agent consumption. See [API.md](API.md) for full documentation.
```bash
node src/index.js --generate-key # create an API key
node src/index.js --serve --port 9876 # start the server
```
**Endpoints:** `GET /health`, `POST /search`, `POST /batch`, `GET /metrics`
---
## CLI
Providers are auto-discovered from environment variables:
| Provider | Env vars | Type |
|----------|----------|------|
| Webshare | `WEBSHARE_API_KEY` | API-fetched, 10 rotating IPs |
| Oxylabs | `OXYLABS_USERNAME`, `OXYLABS_PASSWORD`, `OXYLABS_COUNTRY` | Single datacenter endpoint |
Add a new provider by creating a file in `src/http/providers/` that calls `registerProvider(name, fetcher)`. The fetcher returns an array of proxy URL strings.
## Engine architecture
Engines are registered in `src/engines/setup.js`. Each engine supports `web`, `image`, or both. DuckDuckGo is the default. Add Brave, Bing, or custom engines by implementing the `search(query, opts)` interface.
## Project structure
```
src/
├── index.js # CLI + programmatic API + API server dispatch
├── api.js # REST API server (/search, /batch, /metrics, /health)
├── api-key.js # Key generation + env storage
├── cli.js # Argument parsing
├── config.js # Config loader (json + env + providers)
├── run.js # Search orchestration + engine fallback
├── engines/
│ ├── base.js # Abstract engine interface
│ ├── index.js # Engine registry
│ ├── setup.js # Built-in engine registration
│ └── duckduckgo.js # DuckDuckGo (web + image)
├── http/
│ ├── client.js # Fetch wrapper (proxy, retry, timeout, UA)
│ ├── proxy.js # Proxy pool (least-used, circuit breaker, metrics)
│ └── providers/
│ ├── index.js # Provider registry
│ ├── webshare.js # Webshare.io
│ └── oxylabs.js # Oxylabs datacenter
├── output/
│ ├── human.js # Terminal formatting
│ └── agent.js # JSON formatting
└── utils/
├── logger.js # Pino structured logging
├── retry.js # Exponential backoff + jitter
└── ua.js # User-agent pool
```

200
benchmark.js Normal file
View file

@ -0,0 +1,200 @@
#!/usr/bin/env node
import { loadConfig } from "./src/config.js";
import { HttpClient } from "./src/http/client.js";
import { ProxyPool } from "./src/http/proxy.js";
import { SearchRunner } from "./src/run.js";
import { setUserAgents } from "./src/utils/ua.js";
import { childLogger } from "./src/utils/logger.js";
const WEB_QUERIES = [
"quantum computing", "machine learning", "renaissance art", "solar system",
"ancient rome", "climate change", "python programming", "space exploration",
"world war 2", "ocean depth", "artificial intelligence", "mount everest",
"greek mythology", "industrial revolution", "human genome", "black holes",
"coral reef", "buddhism history", "cold war", "mars colonization",
"electric vehicles", "great barrier reef", "dark matter", "dinosaur fossils",
"ancient egypt pyramids", "big bang theory", "amazon rainforest",
"vitamin deficiency", "stock market crash 1929", "northern lights"
];
const IMAGE_QUERIES = [
"vintage radio", "mars rover", "aurora borealis", "mountain landscape",
"classic cars", "modern architecture", "street photography", "wild animals",
"space nebula", "underwater coral", "sunset beach", "city skyline",
"butterfly macro", "starry night sky", "tropical forest", "medieval castle",
"abstract art", "vintage motorcycles", "taj mahal", "rainforest waterfall",
"northern lights norway", "japanese garden", "safari animals", "galaxy cluster",
"old steam train", "desert dunes", "cherry blossom", "iceberg antarctica",
"neon city night", "autumn forest path"
];
function generateReport(results) {
const total = results.length;
const success = results.filter(r => r.success).length;
const failed = results.filter(r => !r.success).length;
const successRate = (success / total * 100).toFixed(1);
const byType = {};
for (const r of results) {
byType[r.type] = byType[r.type] || [];
byType[r.type].push(r);
}
console.log("\n" + "=".repeat(80));
console.log(" BENCHMARK REPORT");
console.log("=".repeat(80));
console.log(`\n Total requests: ${total}`);
console.log(` Successful: ${success} (${successRate}%)`);
console.log(` Failed: ${failed} (${(100 - parseFloat(successRate)).toFixed(1)}%)`);
console.log(` Date: ${new Date().toISOString()}`);
console.log(` Proxy pool: ${results[0]?.proxyCount || "N/A"} proxies`);
for (const [type, items] of Object.entries(byType)) {
const tSuccess = items.filter(r => r.success).length;
const tFailed = items.filter(r => !r.success).length;
const times = items.filter(r => r.success).map(r => r.durationMs).sort((a, b) => a - b);
const dataSizes = items.filter(r => r.dataSizeKb != null).map(r => r.dataSizeKb).sort((a, b) => a - b);
console.log(`\n ── ${type.toUpperCase()} (${items.length} requests, ${tSuccess} ok / ${tFailed} fail) ──`);
if (times.length > 0) {
const avg = times.reduce((s, v) => s + v, 0) / times.length;
const p50 = times[Math.floor(times.length * 0.5)];
const p95 = times[Math.floor(times.length * 0.95)];
const p99 = times[Math.floor(times.length * 0.99)];
const min = times[0];
const max = times[times.length - 1];
console.log(` Response time (ms): avg=${avg.toFixed(0)} p50=${p50} p95=${p95} p99=${p99} min=${min} max=${max}`);
}
if (dataSizes.length > 0) {
const avg = dataSizes.reduce((s, v) => s + v, 0) / dataSizes.length;
const p50 = dataSizes[Math.floor(dataSizes.length * 0.5)];
const p95 = dataSizes[Math.floor(dataSizes.length * 0.95)];
console.log(` Data size (KB): avg=${avg.toFixed(2)} p50=${p50.toFixed(1)} p95=${p95.toFixed(1)}`);
}
const engineCounts = {};
items.filter(r => r.success).forEach(r => {
const key = r.engine || "unknown";
engineCounts[key] = (engineCounts[key] || 0) + 1;
});
if (Object.keys(engineCounts).length > 0) {
console.log(` Engines used: ${Object.entries(engineCounts).map(([k, v]) => `${k}=${v}`).join(", ")}`);
}
const proxiesUsed = new Set(items.filter(r => r.success).map(r => r.proxyHost).filter(Boolean));
const proxyFails = items.filter(r => !r.success).map(r => r.proxyHost).filter(Boolean);
if (proxiesUsed.size > 0) {
console.log(` Distinct proxies used: ${proxiesUsed.size}`);
console.log(` Proxy failures: ${proxyFails.length}`);
}
}
const errorTypes = {};
for (const r of results) {
if (!r.success && r.error) {
const key = r.error.substring(0, 60);
errorTypes[key] = (errorTypes[key] || 0) + 1;
}
}
if (Object.keys(errorTypes).length > 0) {
console.log(`\n ── Error Breakdown ──`);
for (const [err, count] of Object.entries(errorTypes).sort((a, b) => b[1] - a[1])) {
console.log(` [${count}x] ${err}`);
}
}
console.log("=".repeat(80) + "\n");
}
async function runSingleTest(runner, query, type, index) {
const start = Date.now();
try {
const data = await runner.run({ query, type, limit: 3 });
const durationMs = Date.now() - start;
const results = data[type]?.results || [];
return {
index,
query,
type,
success: true,
durationMs,
dataSizeKb: JSON.stringify(data).length / 1024,
resultCount: results.length,
engine: data[type]?.engine || "none",
proxyHost: null,
errors: data.errors?.length || 0
};
} catch (err) {
return {
index,
query,
type,
success: false,
durationMs: Date.now() - start,
error: err.message,
resultCount: 0,
engine: "none",
proxyHost: null
};
}
}
async function main() {
const log = childLogger({ component: "benchmark" });
log.info({ webCount: WEB_QUERIES.length, imageCount: IMAGE_QUERIES.length }, "starting benchmark");
const config = await loadConfig();
if (config.http.user_agents) setUserAgents(config.http.user_agents);
const proxyPool = new ProxyPool(config.proxies, config.proxy);
const httpClient = new HttpClient(config.http);
if (config.proxy.enabled) {
httpClient.setProxyPool(proxyPool);
log.info({ count: config.proxies.length }, "proxy pool attached");
proxyPool.logState();
}
const runner = new SearchRunner({ httpClient, config });
const allResults = [];
const queries = [
...WEB_QUERIES.map(q => ({ query: q, type: "web" })),
...IMAGE_QUERIES.map(q => ({ query: q, type: "image" }))
];
const batchSize = 5;
for (let i = 0; i < queries.length; i += batchSize) {
const batch = queries.slice(i, i + batchSize);
log.info({ batch: Math.floor(i / batchSize) + 1, total: queries.length }, `running batch`);
const batchResults = await Promise.all(
batch.map((item, j) => {
const idx = i + j + 1;
return runSingleTest(runner, item.query, item.type, idx);
})
);
allResults.push(...batchResults);
for (const r of batchResults) {
const icon = r.success ? "✓" : "✗";
console.log(`${icon} [${r.type.padEnd(5)}] #${String(r.index).padStart(2)} "${r.query.slice(0, 30).padEnd(30)}" ${r.success ? `${r.durationMs}ms` : `FAIL: ${r.error?.slice(0, 40)}`}`);
}
}
const totalDuration = allResults.reduce((s, r) => s + r.durationMs, 0);
log.info({
totalRequests: allResults.length,
totalDurationMs: totalDuration,
avgDurationMs: Math.round(totalDuration / allResults.length),
successRate: `${(allResults.filter(r => r.success).length / allResults.length * 100).toFixed(1)}%`
}, "benchmark complete");
generateReport(allResults);
}
main();

39
config.example.json Normal file
View file

@ -0,0 +1,39 @@
{
"engines": {
"duckduckgo": {
"priority": 1,
"enabled": true,
"timeout_ms": 10000,
"retries": 2
},
"brave": {
"priority": 2,
"enabled": false,
"timeout_ms": 8000,
"retries": 1,
"api_key": ""
}
},
"proxy": {
"enabled": false,
"rotation": "round-robin",
"health_check_interval_ms": 60000,
"max_failures": 3
},
"http": {
"timeout_ms": 10000,
"retry_max_attempts": 3,
"retry_base_delay_ms": 1000,
"retry_max_delay_ms": 10000,
"user_agents": [
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
]
},
"search": {
"default_type": "image",
"default_limit": 10,
"max_limit": 50
}
}

182
package-lock.json generated Normal file
View file

@ -0,0 +1,182 @@
{
"name": "image-search",
"version": "2.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "image-search",
"version": "2.0.0",
"license": "MIT",
"dependencies": {
"dotenv": "^17.4.2",
"pino": "^10.3.1",
"undici": "^7.27.1"
},
"bin": {
"image-search": "src/index.js"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@pinojs/redact": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/@pinojs/redact/-/redact-0.4.0.tgz",
"integrity": "sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg==",
"license": "MIT"
},
"node_modules/atomic-sleep": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/atomic-sleep/-/atomic-sleep-1.0.0.tgz",
"integrity": "sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ==",
"license": "MIT",
"engines": {
"node": ">=8.0.0"
}
},
"node_modules/dotenv": {
"version": "17.4.2",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.4.2.tgz",
"integrity": "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://dotenvx.com"
}
},
"node_modules/on-exit-leak-free": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/on-exit-leak-free/-/on-exit-leak-free-2.1.2.tgz",
"integrity": "sha512-0eJJY6hXLGf1udHwfNftBqH+g73EU4B504nZeKpz1sYRKafAghwxEJunB2O7rDZkL4PGfsMVnTXZ2EjibbqcsA==",
"license": "MIT",
"engines": {
"node": ">=14.0.0"
}
},
"node_modules/pino": {
"version": "10.3.1",
"resolved": "https://registry.npmjs.org/pino/-/pino-10.3.1.tgz",
"integrity": "sha512-r34yH/GlQpKZbU1BvFFqOjhISRo1MNx1tWYsYvmj6KIRHSPMT2+yHOEb1SG6NMvRoHRF0a07kCOox/9yakl1vg==",
"license": "MIT",
"dependencies": {
"@pinojs/redact": "^0.4.0",
"atomic-sleep": "^1.0.0",
"on-exit-leak-free": "^2.1.0",
"pino-abstract-transport": "^3.0.0",
"pino-std-serializers": "^7.0.0",
"process-warning": "^5.0.0",
"quick-format-unescaped": "^4.0.3",
"real-require": "^0.2.0",
"safe-stable-stringify": "^2.3.1",
"sonic-boom": "^4.0.1",
"thread-stream": "^4.0.0"
},
"bin": {
"pino": "bin.js"
}
},
"node_modules/pino-abstract-transport": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/pino-abstract-transport/-/pino-abstract-transport-3.0.0.tgz",
"integrity": "sha512-wlfUczU+n7Hy/Ha5j9a/gZNy7We5+cXp8YL+X+PG8S0KXxw7n/JXA3c46Y0zQznIJ83URJiwy7Lh56WLokNuxg==",
"license": "MIT",
"dependencies": {
"split2": "^4.0.0"
}
},
"node_modules/pino-std-serializers": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/pino-std-serializers/-/pino-std-serializers-7.1.0.tgz",
"integrity": "sha512-BndPH67/JxGExRgiX1dX0w1FvZck5Wa4aal9198SrRhZjH3GxKQUKIBnYJTdj2HDN3UQAS06HlfcSbQj2OHmaw==",
"license": "MIT"
},
"node_modules/process-warning": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/process-warning/-/process-warning-5.0.0.tgz",
"integrity": "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fastify"
},
{
"type": "opencollective",
"url": "https://opencollective.com/fastify"
}
],
"license": "MIT"
},
"node_modules/quick-format-unescaped": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/quick-format-unescaped/-/quick-format-unescaped-4.0.4.tgz",
"integrity": "sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==",
"license": "MIT"
},
"node_modules/real-require": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/real-require/-/real-require-0.2.0.tgz",
"integrity": "sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==",
"license": "MIT",
"engines": {
"node": ">= 12.13.0"
}
},
"node_modules/safe-stable-stringify": {
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz",
"integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==",
"license": "MIT",
"engines": {
"node": ">=10"
}
},
"node_modules/sonic-boom": {
"version": "4.2.1",
"resolved": "https://registry.npmjs.org/sonic-boom/-/sonic-boom-4.2.1.tgz",
"integrity": "sha512-w6AxtubXa2wTXAUsZMMWERrsIRAdrK0Sc+FUytWvYAhBJLyuI4llrMIC1DtlNSdI99EI86KZum2MMq3EAZlF9Q==",
"license": "MIT",
"dependencies": {
"atomic-sleep": "^1.0.0"
}
},
"node_modules/split2": {
"version": "4.2.0",
"resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz",
"integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==",
"license": "ISC",
"engines": {
"node": ">= 10.x"
}
},
"node_modules/thread-stream": {
"version": "4.2.0",
"resolved": "https://registry.npmjs.org/thread-stream/-/thread-stream-4.2.0.tgz",
"integrity": "sha512-e2zZ96wSChazBsbENf/Pcm/4swHt2cEKQ92rhUjkL9GCKiTDJIaTBenjE/m9DXi0QBmTMDkFDdOomUy20A1tDQ==",
"license": "MIT",
"dependencies": {
"real-require": "^1.0.0"
},
"engines": {
"node": ">=20"
}
},
"node_modules/thread-stream/node_modules/real-require": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/real-require/-/real-require-1.0.0.tgz",
"integrity": "sha512-P4nbQYQfePJxRSmY+v/KINxVucm4NF3p3s7pJveMTtom52FR4YGltUQLB8idDXwDDWW+eYrWDFbuzUnjoWHF7g==",
"license": "MIT"
},
"node_modules/undici": {
"version": "7.27.1",
"resolved": "https://registry.npmjs.org/undici/-/undici-7.27.1.tgz",
"integrity": "sha512-UDdpiex+mzigiyrXrGbiUaF4HzTNhKbh2vRNFaTMzcqmLIPrZxaCtwo/1TMSuWoM1Xz3WiTo9KdgI3kRqYzJGg==",
"license": "MIT",
"engines": {
"node": ">=20.18.1"
}
}
}
}

39
package.json Normal file
View file

@ -0,0 +1,39 @@
{
"name": "polysearch",
"version": "2.0.0",
"description": "Multi-engine web + image search with proxy rotation, circuit breaker, and structured AI agent output",
"type": "module",
"main": "src/index.js",
"bin": {
"polysearch": "src/index.js"
},
"scripts": {
"search": "node src/index.js",
"agent": "node src/index.js --mode agent",
"human": "node src/index.js --mode human",
"metrics": "node src/index.js -M",
"help": "node src/index.js --help"
},
"keywords": [
"polysearch",
"web-search",
"image-search",
"multi-engine",
"proxy",
"circuit-breaker",
"ai-agent",
"duckduckgo",
"oxylabs",
"webshare"
],
"author": "",
"license": "MIT",
"engines": {
"node": ">=18.0.0"
},
"dependencies": {
"dotenv": "^17.4.2",
"pino": "^10.3.1",
"undici": "^7.27.1"
}
}

51
src/api-key.js Normal file
View file

@ -0,0 +1,51 @@
import { randomBytes } from "node:crypto";
import { existsSync, readFileSync, writeFileSync } from "node:fs";
import { resolve } from "node:path";
import { fileURLToPath } from "node:url";
import { dirname } from "node:path";
const PROJECT_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "..");
const ENV_PATH = resolve(PROJECT_ROOT, ".env");
export function generateKey() {
return randomBytes(32).toString("hex");
}
export function saveKeyToEnv(key) {
if (!existsSync(ENV_PATH)) {
writeFileSync(ENV_PATH, `POLYSEARCH_API_KEY=${key}\n`);
return { path: ENV_PATH, created: true };
}
const raw = readFileSync(ENV_PATH, "utf-8");
const lines = raw.split("\n");
let replaced = false;
const updated = lines.map(line => {
if (line.startsWith("POLYSEARCH_API_KEY=")) {
replaced = true;
return `POLYSEARCH_API_KEY=${key}`;
}
return line;
});
if (!replaced) {
updated.push(`POLYSEARCH_API_KEY=${key}`);
}
writeFileSync(ENV_PATH, updated.join("\n"));
return { path: ENV_PATH, created: !replaced };
}
export function loadApiKey() {
return process.env.POLYSEARCH_API_KEY || null;
}
export function requireApiKey() {
const key = loadApiKey();
if (!key) {
console.error("No API key found. Generate one with: node src/index.js --generate-key");
process.exit(1);
}
return key;
}

242
src/api.js Normal file
View file

@ -0,0 +1,242 @@
import { createServer } from "node:http";
import { loadConfig } from "./config.js";
import { HttpClient } from "./http/client.js";
import { ProxyPool } from "./http/proxy.js";
import { SearchRunner } from "./run.js";
import { setUserAgents } from "./utils/ua.js";
import { loadApiKey } from "./api-key.js";
import { logger, childLogger } from "./utils/logger.js";
import { formatSearchResponse, formatErrorResponse } from "./output/agent.js";
const log = childLogger({ component: "api-server" });
function unauthorized(res, msg = "Unauthorized") {
res.writeHead(401, { "Content-Type": "application/json" });
res.end(JSON.stringify({ success: false, error: { code: "UNAUTHORIZED", message: msg } }));
}
function badRequest(res, msg) {
res.writeHead(400, { "Content-Type": "application/json" });
res.end(JSON.stringify({ success: false, error: { code: "BAD_REQUEST", message: msg } }));
}
function serverError(res, msg) {
if (res.headersSent) return;
res.writeHead(500, { "Content-Type": "application/json" });
res.end(JSON.stringify({ success: false, error: { code: "INTERNAL_ERROR", message: msg } }));
}
function parseBody(req) {
return new Promise((resolve, reject) => {
let data = "";
req.on("data", chunk => data += chunk);
req.on("end", () => {
try { resolve(JSON.parse(data)); }
catch { reject(new Error("Invalid JSON")); }
});
req.on("error", reject);
});
}
function authenticate(req) {
const apiKey = loadApiKey();
if (!apiKey) return true;
const auth = req.headers["authorization"] || "";
const token = auth.startsWith("Bearer ") ? auth.slice(7) : "";
return token === apiKey;
}
export async function startServer(port = 9876) {
const config = await loadConfig();
if (config.http.user_agents) setUserAgents(config.http.user_agents);
const httpClient = new HttpClient(config.http);
const proxyPool = new ProxyPool(config.proxies, config.proxy);
if (config.proxy.enabled) {
httpClient.setProxyPool(proxyPool);
log.info({ proxyCount: config.proxies.length }, "proxy pool attached to API server");
}
const runner = new SearchRunner({ httpClient, config });
const server = createServer(async (req, res) => {
res.setHeader("Access-Control-Allow-Origin", "*");
res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
if (req.method === "OPTIONS") {
res.writeHead(204);
res.end();
return;
}
try {
const url = new URL(req.url, `http://${req.headers.host || "localhost"}`);
const path = url.pathname;
// Health check — no auth required
if (path === "/health" && req.method === "GET") {
res.writeHead(200, { "Content-Type": "application/json" });
const m = proxyPool.getMetrics();
res.end(JSON.stringify({
success: true,
status: "ok",
uptime: process.uptime(),
proxyCount: config.proxies.length,
proxyPool: {
total: m.totalProxies,
alive: m.alive,
dead: m.dead,
circuitOpen: m.circuitOpen,
requestsTotal: m.requestsTotal,
successRate: m.successRate,
hourlyUsage: m.hourlyUsageCurrent
}
}));
return;
}
// Metrics — requires auth
if (path === "/metrics" && req.method === "GET") {
if (!authenticate(req)) return unauthorized(res);
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({
success: true,
metrics: proxyPool.getMetrics(),
proxies: proxyPool.getProxyDetail()
}));
return;
}
// Auth required for search endpoints
if (!authenticate(req)) return unauthorized(res);
if (path === "/search" && req.method === "POST") {
const body = await parseBody(req);
const { query, type = "image", limit = 10 } = body;
if (!query) return badRequest(res, "Missing 'query' field");
const start = Date.now();
const data = await runner.run({ query, type, limit });
const response = JSON.parse(formatSearchResponse(data));
response.execution_time_ms = Date.now() - start;
res.writeHead(data.image?.results?.length > 0 || data.web?.results?.length > 0 ? 200 : 404, {
"Content-Type": "application/json"
});
res.end(JSON.stringify(response));
return;
}
if (path === "/batch" && req.method === "POST") {
const body = await parseBody(req);
const { queries = [] } = body;
if (!Array.isArray(queries) || queries.length === 0) {
return badRequest(res, "Missing or empty 'queries' array");
}
if (queries.length > 50) {
return badRequest(res, "Maximum 50 queries per batch");
}
const logBatch = childLogger({ component: "batch", batchSize: queries.length });
const batchStart = Date.now();
logBatch.info("batch started");
const batchResults = await Promise.allSettled(
queries.map(async (q, i) => {
const qStart = Date.now();
try {
const data = await runner.run({
query: q.query || "",
type: q.type || "image",
limit: q.limit || 10
});
return {
index: i,
query: q.query,
type: q.type || "image",
success: true,
execution_time_ms: Date.now() - qStart,
results: data,
errors: data.errors || []
};
} catch (err) {
return {
index: i,
query: q.query,
type: q.type || "image",
success: false,
execution_time_ms: Date.now() - qStart,
error: err.message
};
}
})
);
const totalMs = Date.now() - batchStart;
const ok = batchResults.filter(r => r.status === "fulfilled" && r.value.success).length;
const fail = batchResults.filter(r => r.status === "fulfilled" && !r.value.success).length;
logBatch.info({ total: queries.length, ok, fail, totalMs }, "batch completed");
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({
success: true,
batch_size: queries.length,
execution_time_ms: totalMs,
ok,
fail,
results: batchResults.map(r => r.status === "fulfilled" ? r.value : {
index: -1,
query: "unknown",
success: false,
error: r.reason?.message || "Promise rejected"
}),
metrics: {
pool: proxyPool.getMetrics(),
hourly_usage: proxyPool.getMetrics().hourlyUsageCurrent
}
}));
return;
}
// 404
res.writeHead(404, { "Content-Type": "application/json" });
res.end(JSON.stringify({ success: false, error: { code: "NOT_FOUND", message: `No endpoint: ${req.method} ${path}` } }));
} catch (err) {
log.error({ error: err.message }, "API error");
serverError(res, err.message);
}
});
server.listen(port, () => {
log.info({ port }, "API server started");
console.log(`\n PolySearch API running on http://localhost:${port}`);
console.log(` Health: http://localhost:${port}/health`);
console.log(` Search: POST http://localhost:${port}/search`);
console.log(` Batch: POST http://localhost:${port}/batch`);
console.log(` Metrics: GET http://localhost:${port}/metrics`);
const key = loadApiKey();
if (key) {
console.log(` Auth: Authorization: Bearer <key>`);
console.log(` Key: ${key.substring(0, 8)}...${key.slice(-4)}\n`);
} else {
console.log(` Auth: none (no API key configured)\n`);
}
});
const shutdown = () => {
log.info("shutting down API server");
proxyPool.destroy();
server.close(() => process.exit(0));
};
process.on("SIGINT", shutdown);
process.on("SIGTERM", shutdown);
return server;
}

50
src/cli.js Normal file
View file

@ -0,0 +1,50 @@
import { parseArgs } from "node:util";
export function parseCliArgs() {
const { values, positionals } = parseArgs({
args: process.argv.slice(2),
options: {
query: { type: "string", short: "q" },
type: { type: "string", short: "t", default: "image" },
limit: { type: "string", short: "l", default: "10" },
mode: { type: "string", short: "m", default: "human" },
config: { type: "string", short: "c" },
proxy: { type: "string", short: "p" },
metrics: { type: "boolean", short: "M", default: false },
"generate-key": { type: "boolean", default: false },
serve: { type: "boolean", default: false },
port: { type: "string", default: "9876" },
help: { type: "boolean", short: "h", default: false }
},
strict: false,
allowPositionals: true
});
const type = (values.type || "image").toLowerCase();
const mode = (values.mode || "human").toLowerCase();
if (!["web", "image", "both"].includes(type)) {
console.error(`Invalid type "${values.type}". Must be: web, image, or both`);
process.exit(1);
}
if (!["human", "agent"].includes(mode)) {
console.error(`Invalid mode "${values.mode}". Must be: human or agent`);
process.exit(1);
}
return {
query: values.query || null,
type,
limit: Math.min(parseInt(values.limit, 10) || 10, 50),
mode,
configPath: values.config || null,
proxy: values.proxy || null,
metrics: values.metrics,
generateKey: values["generate-key"],
serve: values.serve,
port: parseInt(values.port, 10) || 9876,
help: values.help,
positionals
};
}

134
src/config.js Normal file
View file

@ -0,0 +1,134 @@
import dotenv from "dotenv";
import { existsSync, readFileSync } from "node:fs";
import { resolve, dirname } from "node:path";
import { fileURLToPath } from "node:url";
import { homedir } from "node:os";
import { childLogger } from "./utils/logger.js";
import { fetchAllProxies, listProviders } from "./http/providers/index.js";
import "./http/providers/webshare.js";
import "./http/providers/oxylabs.js";
const PROJECT_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "..");
dotenv.config({ path: resolve(PROJECT_ROOT, ".env") });
const DEFAULTS = {
engines: {
duckduckgo: { priority: 1, enabled: true, timeout_ms: 10000, retries: 2 }
},
proxies: [],
proxy: {
enabled: false,
rotation: "round-robin",
health_check_interval_ms: 60000,
max_failures: 3
},
http: {
timeout_ms: 10000,
retry_max_attempts: 3,
retry_base_delay_ms: 1000,
retry_max_delay_ms: 10000,
user_agents: []
},
search: {
default_type: "image",
default_limit: 10,
max_limit: 50
}
};
function mergeDeep(target, source) {
const result = { ...target };
for (const key of Object.keys(source)) {
if (source[key] && typeof source[key] === "object" && !Array.isArray(source[key])) {
result[key] = mergeDeep(target[key] || {}, source[key]);
} else {
result[key] = source[key];
}
}
return result;
}
function findConfig() {
const paths = [
process.env.POLYSEARCH_CONFIG || process.env.IMAGE_SEARCH_CONFIG,
resolve(PROJECT_ROOT, "config.json"),
resolve(PROJECT_ROOT, "config.example.json"),
resolve(homedir(), ".polysearch.json"),
resolve(homedir(), ".image-search.json")
];
for (const p of paths) {
if (p && existsSync(p)) return p;
}
return null;
}
function loadProxyFile() {
const proxyFile = resolve(PROJECT_ROOT, "proxies.txt");
if (!existsSync(proxyFile)) return [];
const raw = readFileSync(proxyFile, "utf-8");
const proxies = [];
for (const line of raw.split("\n")) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith("#")) continue;
const parts = trimmed.split(":");
if (parts.length === 4) {
const [host, port, user, pass] = parts;
proxies.push(`http://${encodeURIComponent(user)}:${encodeURIComponent(pass)}@${host}:${port}`);
} else if (parts.length === 2) {
proxies.push(`http://${parts[0]}:${parts[1]}`);
}
}
return proxies;
}
export async function loadConfig(configPath) {
const finalPath = configPath || findConfig();
let config = { ...DEFAULTS };
if (finalPath && existsSync(finalPath)) {
try {
const raw = readFileSync(finalPath, "utf-8");
const parsed = JSON.parse(raw);
config = mergeDeep(config, parsed);
} catch (err) {
console.error(`Warning: Failed to load config from ${finalPath}: ${err.message}`);
}
}
const log = childLogger({ component: "config" });
const [providerProxies, fileProxies] = await Promise.all([
fetchAllProxies().catch(err => {
log.warn({ error: err.message }, "all proxy providers failed");
return [];
}),
Promise.resolve(loadProxyFile())
]);
const allProxies = [...new Set([...providerProxies, ...fileProxies, ...config.proxies])];
if (allProxies.length > 0) {
config.proxies = allProxies;
if (!config.proxy.enabled) {
config.proxy.enabled = true;
}
}
config.http.user_agents = config.http.user_agents.length > 0
? config.http.user_agents
: null;
log.info({
providers: listProviders().map(p => p.name),
proxyCount: allProxies.length,
proxyEnabled: config.proxy.enabled
}, "config loaded");
return config;
}

25
src/engines/base.js Normal file
View file

@ -0,0 +1,25 @@
export class SearchEngine {
static name = "base";
static supports = [];
static priority = 10;
constructor(httpClient) {
this.http = httpClient;
}
async search(query, options = {}) {
throw new Error("search() must be implemented by subclass");
}
get supportedTypes() {
return this.constructor.supports;
}
get engineName() {
return this.constructor.name;
}
get priority() {
return this.constructor.priority;
}
}

177
src/engines/duckduckgo.js Normal file
View file

@ -0,0 +1,177 @@
import { SearchEngine } from "./base.js";
export class DuckDuckGo extends SearchEngine {
static name = "duckduckgo";
static supports = ["web", "image"];
static priority = 1;
async search(query, options = {}) {
const { type = "image", limit = 10, proxy = null, signal = null } = options;
if (type === "web") {
return this._webSearch(query, limit, proxy, signal);
}
return this._imageSearch(query, limit, proxy, signal);
}
async _webSearch(query, limit, proxy, signal) {
const errors = [];
for (const strategy of [this._webViaLite.bind(this), this._webViaHtml.bind(this)]) {
try {
const result = await strategy(query, limit, proxy, signal);
if (result && result.results && result.results.length > 0) {
this.http.log.debug({ strategy: strategy.name, count: result.results.length }, "web search strategy succeeded");
return result;
}
} catch (err) {
const label = strategy.name || "unknown";
this.http.log.warn({ strategy: label, error: err.message }, "web search strategy failed");
errors.push({ strategy: label, error: err.message });
}
}
this.http.log.warn({ strategies: errors.length, query }, "all web search strategies exhausted");
if (errors.some(e => e.error.includes("RATE_LIMITED"))) {
throw new Error(`RATE_LIMITED: DuckDuckGo rejected all strategies. ${errors.map(e => e.error).join("; ")}`);
}
return { results: [], engine: "duckduckgo", type: "web", total: 0 };
}
async _webViaLite(query, limit, proxy, signal) {
const html = await this.http.fetch(
`https://lite.duckduckgo.com/lite/?q=${encodeURIComponent(query)}`,
{ responseType: "text", proxy, signal, retries: 1, timeoutMs: 8000 }
);
const results = [];
const linkRegex = /<a[^>]*rel="nofollow"[^>]*href="([^"]*)"[^>]*class='result-link'[^>]*>([\s\S]*?)<\/a>/gi;
const snippetRegex = /<td class='result-snippet'>([\s\S]*?)<\/td>/gi;
const linkTextRegex = /<span class='link-text'>([\s\S]*?)<\/span>/gi;
const links = [...html.matchAll(linkRegex)];
const snippets = [...html.matchAll(snippetRegex)];
const urls = [...html.matchAll(linkTextRegex)];
for (let i = 0; i < links.length && results.length < limit; i++) {
let rawUrl = decodeEntities(links[i][1].trim());
const title = decodeEntities(stripTags(links[i][2]).trim());
const snippet = snippets[i] ? decodeEntities(stripTags(snippets[i][1]).trim()) : "";
const displayUrl = urls[i] ? decodeEntities(urls[i][1].trim()) : "";
const uddgMatch = rawUrl.match(/uddg=([^&]+)/);
let url = uddgMatch ? decodeURIComponent(uddgMatch[1]) : rawUrl;
if (url.startsWith("//")) url = "https:" + url;
let domain = "unknown";
try { domain = new URL(url).hostname; } catch {}
if (domain === "unknown" && displayUrl) {
try { domain = new URL("https://" + displayUrl).hostname; } catch {}
}
results.push({ title, url, domain, snippet, engine: "duckduckgo" });
}
return { results, engine: "duckduckgo", type: "web", total: results.length };
}
async _webViaHtml(query, limit, proxy, signal) {
const html = await this.http.fetch(
`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`,
{ responseType: "text", proxy, signal, retries: 2, timeoutMs: 10000 }
);
const results = [];
const resultBlocks = html.split('<div class="result results_links');
resultBlocks.shift();
for (const block of resultBlocks) {
if (results.length >= limit) break;
const titleMatch = block.match(/<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/i);
if (!titleMatch) continue;
const snippetMatch = block.match(/<a[^>]*class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/a>/i);
let rawUrl = decodeEntities(titleMatch[1].trim());
const title = decodeEntities(stripTags(titleMatch[2]).trim());
const snippet = snippetMatch ? decodeEntities(stripTags(snippetMatch[1]).trim()) : "";
const uddgMatch = rawUrl.match(/uddg=([^&]+)/);
let url = uddgMatch ? decodeURIComponent(uddgMatch[1]) : rawUrl;
if (url.startsWith("//")) url = "https:" + url;
let domain = "unknown";
try { domain = new URL(url).hostname; } catch {}
results.push({ title, url, domain, snippet, engine: "duckduckgo" });
}
return { results, engine: "duckduckgo", type: "web", total: results.length };
}
async _imageSearch(query, limit, proxy, signal) {
const initHtml = await this.http.fetch(
`https://duckduckgo.com/?q=${encodeURIComponent(query)}&iax=images&ia=images`,
{ responseType: "text", proxy, signal }
);
const vqdMatch = initHtml.match(/vqd\s*=\s*['"]([^'"]+)['"]/);
if (!vqdMatch) {
throw new Error("Could not extract token (vqd) from DuckDuckGo");
}
const data = await this.http.fetch(
`https://duckduckgo.com/i.js?q=${encodeURIComponent(query)}&o=json&vqd=${vqdMatch[1]}&f=,,,`,
{ proxy, signal }
);
const rawResults = data.results || [];
const results = rawResults.slice(0, limit).map((item) => {
let domain = "unknown";
try { domain = new URL(item.image).hostname; }
catch {
if (item.url) {
try { domain = new URL(item.url).hostname; } catch {}
}
}
return {
title: item.title || "",
image_url: item.image || "",
source_url: item.url || "",
domain,
width: item.width || null,
height: item.height || null,
thumbnail: item.thumbnail || null,
engine: "duckduckgo"
};
});
return {
results,
engine: "duckduckgo",
type: "image",
total: results.length
};
}
}
function decodeEntities(str) {
return str
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&#x27;/g, "'")
.replace(/&#x2F;/g, "/")
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(code));
}
function stripTags(str) {
return str.replace(/<[^>]*>/g, "").trim();
}

24
src/engines/index.js Normal file
View file

@ -0,0 +1,24 @@
const registry = [];
export function registerEngine(EngineClass) {
registry.push(EngineClass);
}
export function getEngines(type, httpClient) {
return registry
.filter(Engine => Engine.supports.includes(type))
.sort((a, b) => a.priority - b.priority)
.map(Engine => new Engine(httpClient));
}
export function getEngineNames() {
return registry.map(Engine => ({
name: Engine.name,
supports: Engine.supports,
priority: Engine.priority
}));
}
export function clearEngines() {
registry.length = 0;
}

17
src/engines/setup.js Normal file
View file

@ -0,0 +1,17 @@
import { DuckDuckGo } from "./duckduckgo.js";
import { registerEngine, getEngines, getEngineNames, clearEngines } from "./index.js";
registerEngine(DuckDuckGo);
export function createEngines(httpClient) {
return {
web: () => getEngines("web", httpClient),
image: () => getEngines("image", httpClient),
all: () => ({
web: getEngines("web", httpClient),
image: getEngines("image", httpClient)
})
};
}
export { getEngineNames, clearEngines, registerEngine };

189
src/http/client.js Normal file
View file

@ -0,0 +1,189 @@
import { ProxyAgent } from "undici";
import { getNextUA } from "../utils/ua.js";
import { withRetry, isRetryable } from "../utils/retry.js";
import { childLogger } from "../utils/logger.js";
const proxyAgentCache = new Map();
function getOrCreateProxyAgent(proxyUrl) {
if (!proxyUrl) return undefined;
if (!proxyAgentCache.has(proxyUrl)) {
proxyAgentCache.set(proxyUrl, new ProxyAgent(proxyUrl));
}
return proxyAgentCache.get(proxyUrl);
}
export class HttpClient {
constructor(config = {}) {
this.log = childLogger({ component: "http-client" });
this.timeoutMs = config.timeout_ms || 10000;
this.retryAttempts = config.retry_max_attempts || 3;
this.retryBaseDelay = config.retry_base_delay_ms || 1000;
this.retryMaxDelay = config.retry_max_delay_ms || 10000;
this.userAgents = config.user_agents || null;
this.proxyPool = null;
}
setProxyPool(pool) {
this.proxyPool = pool;
this.log.info({ proxyCount: pool.proxies?.length }, "proxy pool attached to HTTP client");
}
async fetch(url, options = {}) {
const {
method = "GET",
headers = {},
body = null,
responseType = "json",
timeoutMs = this.timeoutMs,
retries = this.retryAttempts,
proxy = null,
signal = null,
engine = null
} = options;
const requestLog = this.log.child({
url: url.length > 100 ? url.substring(0, 100) + "..." : url,
method,
engine
});
requestLog.debug({ timeoutMs, retries }, "request starting");
return withRetry(
async (attempt) => {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
const combinedSignal = signal
? combineSignals(signal, controller.signal)
: controller.signal;
const ua = this.userAgents
? this.userAgents[Math.floor(Math.random() * this.userAgents.length)]
: getNextUA();
const fetchHeaders = {
"User-Agent": ua,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
...headers
};
let proxyUrl = proxy ? proxy.url : null;
let usedProxy = proxy;
if (!proxy && this.proxyPool) {
const p = this.proxyPool.select();
if (p) {
proxyUrl = p.url;
usedProxy = p;
}
}
requestLog.debug({ attempt: attempt + 1, proxy: usedProxy?.masked || "none" }, "request attempt");
const requestStart = Date.now();
try {
const fetchOpts = {
method,
headers: fetchHeaders,
signal: combinedSignal,
...(body ? { body } : {})
};
if (proxyUrl) {
const agent = getOrCreateProxyAgent(proxyUrl);
fetchOpts.dispatcher = agent;
}
const response = await fetch(url, fetchOpts);
if (!response.ok) {
const err = new Error(`HTTP ${response.status}: ${response.statusText}`);
err.status = response.status;
requestLog.warn({ status: response.status }, "request returned non-ok status");
throw err;
}
if (response.status === 202) {
const bodyHint = (await response.text().catch(() => "")).substring(0, 120);
const err = new Error(`HTTP 202 (Accepted): server deferred or blocked request. Body: "${bodyHint}"`);
err.status = 202;
requestLog.warn({ status: 202, bodyHint }, "request returned HTTP 202 (blocked/deferred)");
throw err;
}
const textOrBuffer = await response.clone().text();
const bytes = textOrBuffer.length;
const latencyMs = Date.now() - requestStart;
if (this.proxyPool) this.proxyPool.markSuccess(usedProxy, latencyMs, bytes);
requestLog.debug({ status: response.status, bytes, latencyMs }, "request succeeded");
if (responseType === "json") {
const text = await response.text();
try {
return JSON.parse(text);
} catch {
return text;
}
}
if (responseType === "text") return await response.text();
return response;
} catch (err) {
const latencyMs = Date.now() - requestStart;
if (this.proxyPool) this.proxyPool.markFailed(usedProxy, err, latencyMs);
if (err.name === "AbortError") {
requestLog.warn({ timeoutMs }, "request timed out");
const timeoutErr = new Error(`Request timed out after ${timeoutMs}ms`);
timeoutErr.status = 0;
throw timeoutErr;
}
if (!isRetryable(err) || attempt >= retries - 1) {
requestLog.error({ error: err.message, status: err.status, latencyMs }, "request failed, not retrying");
throw err;
}
requestLog.warn({ error: err.message, attempt: attempt + 1, latencyMs }, "request failed, will retry");
throw err;
} finally {
clearTimeout(timeout);
}
},
{
attempts: retries,
baseDelayMs: this.retryBaseDelay,
maxDelayMs: this.retryMaxDelay,
onRetry: ({ attempt, error, delayMs }) => {
requestLog.warn({ attempt, error: error.message, delayMs }, "retrying request");
}
}
);
}
}
function combineSignals(...signals) {
const controller = new AbortController();
const onAbort = () => controller.abort();
for (const signal of signals) {
if (signal.aborted) {
controller.abort();
return controller.signal;
}
signal.addEventListener("abort", onAbort, { once: true });
}
const original = controller.signal;
original._cleanup = () => {
for (const signal of signals) {
signal.removeEventListener("abort", onAbort);
}
};
return original;
}

View file

@ -0,0 +1,39 @@
import { childLogger } from "../../utils/logger.js";
const log = childLogger({ component: "proxy-provider" });
const registry = [];
export function registerProvider(name, fetcher, description = "") {
registry.push({ name, fetcher, description });
log.info({ name, description }, "proxy provider registered");
}
export async function fetchAllProxies() {
const all = [];
for (const { name, fetcher } of registry) {
try {
const proxies = await fetcher();
if (proxies && proxies.length > 0) {
log.info({ provider: name, count: proxies.length }, "proxies fetched");
for (const p of proxies) {
all.push(typeof p === "string" ? { url: p, provider: name } : { provider: name, ...p });
}
} else {
log.debug({ provider: name }, "provider returned 0 proxies");
}
} catch (err) {
log.warn({ provider: name, error: err.message }, "provider failed");
}
}
return all;
}
export function listProviders() {
return registry.map(r => ({ name: r.name, description: r.description }));
}
export function getProviderCount() {
return registry.length;
}

View file

@ -0,0 +1,26 @@
import { registerProvider } from "./index.js";
const ENTRY_POINT = "dc.oxylabs.io";
const PORT = "8000";
async function fetchOxylabsProxies() {
const username = process.env.OXYLABS_USERNAME;
const password = process.env.OXYLABS_PASSWORD;
const country = process.env.OXYLABS_COUNTRY || "US";
const stickySession = process.env.OXYLABS_STICKY_SESSION || "";
if (!username || !password) return [];
const sessParam = stickySession ? `-sessid-${stickySession}` : "";
const entry = `${ENTRY_POINT}:${PORT}`;
const userPart = `user-${username}-country-${country}${sessParam}`;
const proxyUrl = `http://${encodeURIComponent(userPart)}:${encodeURIComponent(password)}@${entry}`;
return [proxyUrl];
}
registerProvider(
"oxylabs",
fetchOxylabsProxies,
"Oxylabs datacenter proxies (dc.oxylabs.io:8000)"
);

View file

@ -0,0 +1,45 @@
import { registerProvider, fetchAllProxies } from "./index.js";
const API_BASE = "https://proxy.webshare.io/api/v2/proxy/list/";
const CACHE_TTL_MS = 5 * 60 * 1000;
let cached = null;
let lastFetch = 0;
async function fetchWebshareProxies() {
const apiKey = process.env.WEBSHARE_API_KEY;
if (!apiKey) return [];
if (cached && Date.now() - lastFetch < CACHE_TTL_MS) return cached;
const allProxies = [];
let page = 1;
let hasMore = true;
while (hasMore) {
const url = `${API_BASE}?mode=direct&page=${page}&page_size=100`;
const response = await fetch(url, {
headers: { Authorization: `Token ${apiKey}` }
});
if (!response.ok) {
throw new Error(`Webshare API HTTP ${response.status}`);
}
const data = await response.json();
for (const p of data.results) {
if (p.valid) {
allProxies.push(`http://${p.username}:${p.password}@${p.proxy_address}:${p.port}`);
}
}
hasMore = !!data.next;
page++;
}
cached = allProxies;
lastFetch = Date.now();
return allProxies;
}
registerProvider("webshare", fetchWebshareProxies, "proxy.webshare.io API");

304
src/http/proxy.js Normal file
View file

@ -0,0 +1,304 @@
import { isRetryable } from "../utils/retry.js";
import { childLogger } from "../utils/logger.js";
const HOUR_MS = 3600_000;
export class ProxyPool {
constructor(proxies = [], options = {}) {
this.log = childLogger({ component: "proxy-pool" });
this.enabled = options.enabled !== false && proxies.length > 0;
this.maxFailures = options.max_failures || 3;
this.circuitBaseMs = options.circuit_base_ms || 5000;
this.circuitMaxMs = options.circuit_max_ms || 300_000;
const entries = [];
const seen = new Set();
for (const p of proxies) {
const url = typeof p === "string" ? p : p.url;
if (seen.has(url)) continue;
seen.add(url);
const provider = typeof p === "string" ? "manual" : (p.provider || "manual");
const hasAuth = url.includes("@");
const masked = url.replace(/\/\/([^:]+):([^@]+)@/, "//$1:***@");
entries.push({
url,
masked,
provider,
authenticated: hasAuth,
alive: true,
failures: 0,
consecutiveFailures: 0,
lastUsed: 0,
lastCheck: 0,
circuitState: "closed",
circuitFailures: 0,
circuitOpenUntil: 0,
hourlyUsage: {},
totalRequests: 0,
totalSuccesses: 0,
totalFailures: 0,
totalTimeouts: 0,
totalRateLimited: 0,
totalBytes: 0,
totalLatencyMs: 0,
lastLatencyMs: 0
});
}
this.proxies = [
...entries.filter(p => p.authenticated),
...entries.filter(p => !p.authenticated)
];
this.index = 0;
if (this.enabled) {
const byProvider = {};
for (const p of this.proxies) {
byProvider[p.provider] = (byProvider[p.provider] || 0) + 1;
}
this.log.info({
count: this.proxies.length,
authenticated: this.proxies.filter(p => p.authenticated).length,
providers: byProvider
}, "proxy pool initialized");
}
this._metricsInterval = setInterval(() => {
this.log.info(this.getMetrics(), "proxy pool metrics periodic");
}, 300_000).unref();
}
select() {
if (!this.enabled || this.proxies.length === 0) return null;
const now = Date.now();
const candidates = this.proxies.filter(p => {
if (!p.alive) return false;
if (p.circuitState === "open" && now < p.circuitOpenUntil) return false;
if (p.circuitState === "open" && now >= p.circuitOpenUntil) {
p.circuitState = "half-open";
this.log.info({ proxy: p.masked }, "circuit half-open, allowing probe");
}
return true;
});
if (candidates.length === 0) {
this.log.warn({ total: this.proxies.length }, "no candidates available, resetting dead proxies");
for (const p of this.proxies) {
if (!p.alive) {
p.alive = true;
p.consecutiveFailures = 0;
}
}
const p = this.proxies[0];
if (p) {
p.circuitState = "closed";
p.lastUsed = now;
return p;
}
return null;
}
const hourKey = this._hourKey(now);
candidates.sort((a, b) => {
const aUsage = a.hourlyUsage[hourKey] || 0;
const bUsage = b.hourlyUsage[hourKey] || 0;
if (aUsage !== bUsage) return aUsage - bUsage;
if (a.consecutiveFailures !== b.consecutiveFailures) {
return a.consecutiveFailures - b.consecutiveFailures;
}
const aCircuit = a.circuitFailures;
const bCircuit = b.circuitFailures;
return aCircuit - bCircuit;
});
const proxy = candidates[0];
proxy.lastUsed = now;
proxy.hourlyUsage[hourKey] = (proxy.hourlyUsage[hourKey] || 0) + 1;
proxy.totalRequests++;
return proxy;
}
markSuccess(proxy, latencyMs = 0, bytes = 0) {
if (!proxy) return;
proxy.failures = 0;
proxy.consecutiveFailures = 0;
proxy.totalSuccesses++;
proxy.totalLatencyMs += latencyMs;
proxy.lastLatencyMs = latencyMs;
proxy.totalBytes += bytes;
proxy.lastCheck = Date.now();
if (proxy.circuitState === "half-open") {
proxy.circuitState = "closed";
proxy.circuitFailures = 0;
this.log.info({ proxy: proxy.masked }, "circuit closed after successful probe");
}
proxy.alive = true;
}
markFailed(proxy, error = null, latencyMs = 0) {
if (!proxy) return;
proxy.consecutiveFailures++;
proxy.failures++;
proxy.totalFailures++;
proxy.totalLatencyMs += latencyMs;
proxy.lastLatencyMs = latencyMs;
proxy.lastCheck = Date.now();
const is202 = error && (error.status === 202 || (error.message && error.message.includes("202")));
const isTimeout = error && (error.message && (error.message.includes("timeout") || error.message.includes("timed out") || error.name === "AbortError"));
const isNonRetryable = error && !isRetryable(error);
if (is202) {
proxy.totalRateLimited++;
this.log.warn({ proxy: proxy.masked, failures: proxy.failures }, "proxy rate-limited (202)");
if (proxy.failures >= this.maxFailures) {
this._tripCircuit(proxy, "rate-limited");
}
return;
}
if (isTimeout) {
proxy.totalTimeouts++;
}
if (isNonRetryable) {
this._tripCircuit(proxy, error.message);
return;
}
if (proxy.failures >= this.maxFailures) {
proxy.alive = false;
this.log.warn({
proxy: proxy.masked, failures: proxy.failures,
error: error?.message
}, "proxy marked dead");
} else {
this.log.warn({
proxy: proxy.masked, failures: proxy.failures,
error: error?.message
}, "proxy request failed");
}
}
_tripCircuit(proxy, reason) {
proxy.circuitFailures++;
const delay = Math.min(
this.circuitBaseMs * Math.pow(2, proxy.circuitFailures - 1),
this.circuitMaxMs
);
proxy.circuitState = "open";
proxy.circuitOpenUntil = Date.now() + delay;
this.log.warn({
proxy: proxy.masked,
circuitFailures: proxy.circuitFailures,
circuitOpenMs: delay,
reason
}, "circuit tripped");
}
getMetrics() {
const byProvider = {};
let totalReq = 0, totalOk = 0, totalFail = 0;
for (const p of this.proxies) {
const pr = byProvider[p.provider] || {
proxyCount: 0, alive: 0, dead: 0,
circuitOpen: 0, requests: 0, success: 0, failure: 0,
rateLimited: 0, timeout: 0, avgLatencyMs: 0, totalLatencyMs: 0,
hourlyUsage: {}
};
pr.proxyCount++;
if (p.alive) pr.alive++; else pr.dead++;
if (p.circuitState === "open") pr.circuitOpen++;
pr.requests += p.totalRequests;
pr.success += p.totalSuccesses;
pr.failure += p.totalFailures;
pr.rateLimited += p.totalRateLimited;
pr.timeout += p.totalTimeouts;
pr.totalLatencyMs += p.totalLatencyMs;
totalReq += p.totalRequests;
totalOk += p.totalSuccesses;
totalFail += p.totalFailures;
const hk = this._hourKey(Date.now());
pr.hourlyUsage[hk] = (pr.hourlyUsage[hk] || 0) + (p.hourlyUsage[hk] || 0);
byProvider[p.provider] = pr;
}
for (const [name, pr] of Object.entries(byProvider)) {
pr.avgLatencyMs = pr.success > 0 ? Math.round(pr.totalLatencyMs / pr.success) : 0;
delete pr.totalLatencyMs;
const hk = this._hourKey(Date.now());
const hourly = pr.hourlyUsage[hk] || 0;
pr.hourlyUsage = { currentHour: hourly };
pr.successRate = pr.requests > 0 ? (pr.success / pr.requests * 100).toFixed(1) + "%" : "0%";
}
const allHourly = this.proxies.reduce((s, p) => {
const hk = this._hourKey(Date.now());
return s + (p.hourlyUsage[hk] || 0);
}, 0);
return {
timestamp: new Date().toISOString(),
totalProxies: this.proxies.length,
alive: this.proxies.filter(p => p.alive).length,
dead: this.proxies.filter(p => !p.alive).length,
circuitOpen: this.proxies.filter(p => p.circuitState === "open").length,
requestsTotal: totalReq,
successTotal: totalOk,
failureTotal: totalFail,
successRate: totalReq > 0 ? (totalOk / totalReq * 100).toFixed(1) + "%" : "0%",
hourlyUsageCurrent: allHourly,
byProvider
};
}
getProxyDetail() {
return this.proxies.map(p => ({
url: p.masked,
provider: p.provider,
alive: p.alive,
authenticated: p.authenticated,
circuitState: p.circuitState,
circuitFailures: p.circuitFailures,
failures: p.failures,
consecutiveFailures: p.consecutiveFailures,
requests: p.totalRequests,
successes: p.totalSuccesses,
failures: p.totalFailures,
rateLimited: p.totalRateLimited,
timeouts: p.totalTimeouts,
avgLatencyMs: p.totalSuccesses > 0 ? Math.round(p.totalLatencyMs / p.totalSuccesses) : 0,
lastLatencyMs: p.lastLatencyMs,
hourlyUsage: p.hourlyUsage[this._hourKey(Date.now())] || 0
}));
}
_hourKey(ts) {
return Math.floor(ts / HOUR_MS);
}
logState() {
this.log.info({ metrics: this.getMetrics() }, "proxy pool state");
}
destroy() {
if (this._metricsInterval) clearInterval(this._metricsInterval);
}
}

160
src/index.js Executable file
View file

@ -0,0 +1,160 @@
#!/usr/bin/env node
import { loadConfig } from "./config.js";
import { HttpClient } from "./http/client.js";
import { ProxyPool } from "./http/proxy.js";
import { SearchRunner } from "./run.js";
import { parseCliArgs } from "./cli.js";
import { setUserAgents } from "./utils/ua.js";
import { getEngineNames } from "./engines/setup.js";
import { logger, childLogger } from "./utils/logger.js";
import { renderCombinedResults } from "./output/human.js";
import { formatSearchResponse, formatErrorResponse, formatHelpResponse } from "./output/agent.js";
import { generateKey, saveKeyToEnv } from "./api-key.js";
import { startServer } from "./api.js";
async function main() {
const log = childLogger({ component: "main" });
const args = parseCliArgs();
if (args.help) {
const engines = getEngineNames();
console.log(formatHelpResponse(engines, args));
console.log("\n\x1b[1mAPI Server:\x1b[0m");
console.log(" --serve Start API server");
console.log(" --port <number> Port for API server (default: 9876)");
console.log(" --generate-key Generate a new API key\n");
process.exit(0);
}
if (args.generateKey) {
const key = generateKey();
const result = saveKeyToEnv(key);
console.log(`\n \x1b[32m✓\x1b[0m New API key generated`);
console.log(` Key: ${key}`);
console.log(` Stored: ${result.path}`);
console.log(` \n Use with: curl -H "Authorization: Bearer ${key}" ...\n`);
process.exit(0);
}
if (args.serve) {
await startServer(args.port);
return;
}
if (!args.query) {
if (args.metrics) {
log.warn("--metrics requires a search to run first; run a query and send SIGUSR1 for live metrics");
}
if (args.mode === "agent") {
console.error(formatErrorResponse(
{ code: "MISSING_QUERY", message: "Search query parameter is required" },
{ type: args.type }
));
} else {
console.error("\n\x1b[31mError: Missing search query\x1b[0m\n");
console.log("Usage: node src/index.js -q \"search term\" [options]");
console.log(" -q, --query Search query (required)");
console.log(" -t, --type Search type: web, image, or both (default: image)");
console.log(" -l, --limit Max results per type (default: 10)");
console.log(" -m, --mode Output mode: human or agent (default: human)");
console.log(" -p, --proxy Proxy URL (e.g., http://user:pass@host:port)");
console.log(" -h, --help Show help\n");
}
process.exit(1);
}
log.info({ query: args.query, type: args.type, limit: args.limit, mode: args.mode }, "starting search");
const config = await loadConfig(args.configPath);
log.info({ proxyCount: config.proxies.length, proxyEnabled: config.proxy.enabled }, "config loaded");
if (config.http.user_agents) {
setUserAgents(config.http.user_agents);
log.debug({ count: config.http.user_agents.length }, "custom user agents loaded");
}
const httpClient = new HttpClient(config.http);
const proxyPool = new ProxyPool(config.proxies, config.proxy);
if (config.proxy.enabled) {
httpClient.setProxyPool(proxyPool);
proxyPool.logState();
}
if (args.proxy) {
log.info({ proxy: args.proxy.replace(/\/\/([^:]+):([^@]+)@/, "//$1:***@") }, "using CLI proxy override");
const singlePool = new ProxyPool([args.proxy], { enabled: true });
httpClient.setProxyPool(singlePool);
}
process.on("SIGUSR1", () => {
console.log("\n--- Proxy Pool Metrics (SIGUSR1) ---");
console.log(JSON.stringify(proxyPool.getMetrics(), null, 2));
console.log("--- Proxy Detail ---");
console.log(JSON.stringify(proxyPool.getProxyDetail(), null, 2));
});
const runner = new SearchRunner({ httpClient, config });
try {
const data = await runner.run({
query: args.query,
type: args.type,
limit: args.limit
});
if (args.mode === "agent") {
console.log(formatSearchResponse(data));
} else {
if (data.image || data.web) {
renderCombinedResults({
query: data.query,
image: data.image,
web: data.web
});
}
if (data.errors && data.errors.length > 0) {
const failed = data.errors.filter(e => e.code === "ENGINE_FAILED");
if (failed.length > 0) {
console.error(`\n \x1b[33mWarning: ${failed.length} engine(s) failed, used fallback\x1b[0m`);
}
}
if (args.metrics) {
console.log("\n\x1b[1m\x1b[36m── Proxy Pool Metrics ──\x1b[0m");
console.log(JSON.stringify(proxyPool.getMetrics(), null, 2));
console.log("\n\x1b[1m\x1b[36m── Per-Proxy Detail ──\x1b[0m");
console.log(JSON.stringify(proxyPool.getProxyDetail(), null, 2));
}
}
} catch (err) {
log.error({ error: err.message }, "search failed with unhandled error");
if (args.mode === "agent") {
console.error(formatErrorResponse(err, { query: args.query, type: args.type }));
} else {
console.error(`\n\x1b[31mError: ${err.message}\x1b[0m\n`);
}
process.exit(1);
}
}
export async function search(opts) {
const config = await loadConfig(opts?.configPath);
const httpClient = new HttpClient(config.http);
if (config.proxy.enabled) {
const proxyPool = new ProxyPool(config.proxies, config.proxy);
httpClient.setProxyPool(proxyPool);
}
const runner = new SearchRunner({ httpClient, config });
return runner.run({
query: opts.query,
type: opts.type || "image",
limit: opts.limit || 10
});
}
main();

133
src/output/agent.js Normal file
View file

@ -0,0 +1,133 @@
export function formatSearchResponse(data) {
const response = {
success: true,
query: data.query,
type: data.type,
timestamp: data.timestamp || new Date().toISOString(),
execution_time_ms: data.executionTimeMs || 0,
engines_used: data.enginesUsed || []
};
if (data.image) {
response.image = formatResultsSection(data.image, "image");
}
if (data.web) {
response.web = formatResultsSection(data.web, "web");
}
if (data.errors && data.errors.length > 0) {
response.errors = data.errors;
}
return JSON.stringify(response, null, 2);
}
export function formatErrorResponse(error, options = {}) {
const response = {
success: false,
error: {
code: error.code || "SEARCH_FAILED",
message: error.message || "An unknown error occurred",
type: error.constructor?.name || "Error"
},
query: options.query || null,
type: options.type || null,
timestamp: new Date().toISOString()
};
return JSON.stringify(response, null, 2);
}
export function formatHelpResponse(engines, options) {
const response = {
success: true,
help: {
name: "polysearch",
description: "Multi-engine web + image search with proxy rotation, circuit breaker, and AI agent output",
version: "2.0.0",
usage: "node src/index.js [OPTIONS]",
options: {
query: { flag: "-q, --query <string>", required: true, description: "Search query" },
type: { flag: "-t, --type <type>", required: false, default: "image", description: "Search type: web, image, or both" },
limit: { flag: "-l, --limit <number>", required: false, default: 10, description: "Max results per type" },
mode: { flag: "-m, --mode <mode>", required: false, default: "human", description: "Output mode: human or agent" },
config: { flag: "-c, --config <path>", required: false, description: "Path to config file" },
proxy: { flag: "-p, --proxy <url>", required: false, description: "Single proxy URL to use" },
help: { flag: "-h, --help", required: false, default: false, description: "Show this help" }
},
examples: [
"node src/index.js -q \"vintage radio\"",
"node src/index.js -q \"spacex launch\" -t both -l 15 --mode agent",
"node src/index.js -q \"cats\" -t web -l 5 -p http://user:pass@proxy:8080",
"node src/index.js -q \"1950 computer\" -m agent"
],
engines: engines.map(e => ({
name: e.name,
supports: e.supports,
priority: e.priority
})),
output_modes: {
human: "Colorized terminal output for humans",
agent: "Structured JSON for AI agent consumption"
}
}
};
return JSON.stringify(response, null, 2);
}
function formatResultsSection(section, type) {
const out = {
engine: section.engine || "unknown",
total: section.total || 0
};
if (type === "image") {
out.results = (section.results || []).map((item, i) => ({
index: i + 1,
title: item.title || null,
image_url: item.image_url || null,
source_url: item.source_url || null,
domain: item.domain || "unknown",
width: item.width || null,
height: item.height || null,
thumbnail: item.thumbnail || null,
engine: item.engine || section.engine || null
}));
out.statistics = {
avg_width: avg(section.results, "width"),
avg_height: avg(section.results, "height"),
domains: countBy(section.results, "domain")
};
} else {
out.results = (section.results || []).map((item, i) => ({
index: i + 1,
title: item.title || null,
url: item.url || null,
domain: item.domain || "unknown",
snippet: item.snippet || null,
engine: item.engine || section.engine || null
}));
out.statistics = {
domains: countBy(section.results, "domain")
};
}
return out;
}
function countBy(arr, key) {
const counts = {};
for (const item of arr || []) {
const val = item[key] || "unknown";
counts[val] = (counts[val] || 0) + 1;
}
return counts;
}
function avg(arr, key) {
const nums = (arr || []).map(i => i[key]).filter(n => n != null);
if (nums.length === 0) return null;
return Math.round(nums.reduce((s, n) => s + n, 0) / nums.length);
}

72
src/output/human.js Normal file
View file

@ -0,0 +1,72 @@
export function renderImageResults(results, query) {
if (!results || results.length === 0) {
console.log(`\n No image results found for: "${query}"\n`);
return;
}
console.log(`\n\x1b[1m\x1b[36mImage Results for:\x1b[0m "${query}"`);
console.log(`\x1b[90m${"─".repeat(80)}\x1b[0m\n`);
for (const img of results) {
const reset = "\x1b[0m";
const dims = (img.width && img.height) ? `${img.width}\u00d7${img.height}` : "Unknown";
console.log(`\x1b[1m[${img.index}]\x1b[0m`);
console.log(` \x1b[1mDescription:\x1b[0m ${img.title || "No description"}`);
console.log(` \x1b[1mDomain:\x1b[0m ${colorDomain(img.domain)}${img.domain}${reset}`);
console.log(` \x1b[1mDimensions:\x1b[0m ${dims}`);
console.log(` \x1b[1mImage URL:\x1b[0m \x1b[4m${img.image_url}${reset}`);
if (img.source_url) {
console.log(` \x1b[1mSource Page:\x1b[0m \x1b[4m${img.source_url}${reset}`);
}
console.log(`\x1b[90m${"─".repeat(80)}\x1b[0m`);
}
console.log(` Found ${results.length} image result${results.length !== 1 ? "s" : ""}\n`);
}
export function renderWebResults(results, query) {
if (!results || results.length === 0) {
console.log(`\n No web results found for: "${query}"\n`);
return;
}
console.log(`\n\x1b[1m\x1b[36mWeb Results for:\x1b[0m "${query}"`);
console.log(`\x1b[90m${"─".repeat(80)}\x1b[0m\n`);
for (const r of results) {
console.log(`\x1b[1m[${r.index}] ${r.title}\x1b[0m`);
console.log(` \x1b[1mURL:\x1b[0m \x1b[4m${r.url}${"\x1b[0m"}`);
console.log(` \x1b[1mDomain:\x1b[0m \x1b[32m${r.domain}\x1b[0m`);
if (r.snippet) {
console.log(` \x1b[1mSnippet:\x1b[0m ${r.snippet.slice(0, 200)}`);
}
console.log(`\x1b[90m${"─".repeat(80)}\x1b[0m`);
}
console.log(` Found ${results.length} web result${results.length !== 1 ? "s" : ""}\n`);
}
export function renderCombinedResults(data) {
if (data.image && data.image.results) {
renderImageResults(
data.image.results.map((r, i) => ({ ...r, index: i + 1 })),
data.query
);
}
if (data.web && data.web.results) {
renderWebResults(
data.web.results.map((r, i) => ({ ...r, index: i + 1 })),
data.query
);
}
}
function colorDomain(domain) {
const lower = domain.toLowerCase();
if (lower.includes("shutterstock") || lower.includes("alamy") ||
lower.includes("istock") || lower.includes("getty")) return "\x1b[31m";
if (lower.endsWith(".edu") || lower.endsWith(".gov") || lower.includes("wikimedia")) return "\x1b[35m";
if (lower.includes("unsplash") || lower.includes("pexels") || lower.includes("pixabay")) return "\x1b[33m";
return "\x1b[32m";
}

123
src/run.js Normal file
View file

@ -0,0 +1,123 @@
import { createEngines } from "./engines/setup.js";
import { childLogger } from "./utils/logger.js";
export class SearchRunner {
constructor({ httpClient, config = {} }) {
this.log = childLogger({ component: "runner" });
this.http = httpClient;
this.config = config;
this.engines = createEngines(httpClient);
}
async run({ query, type = "image", limit = 10, proxy = null, signal = null }) {
const startTime = Date.now();
const types = type === "both" ? ["web", "image"] : [type];
const result = {
query,
type,
timestamp: new Date().toISOString(),
executionTimeMs: 0,
enginesUsed: [],
errors: []
};
this.log.info({ query, type, limit }, "search started");
for (const t of types) {
const engines = t === "web" ? this.engines.web() : this.engines.image();
this.log.debug({ type: t, engineCount: engines.length }, "searching type");
if (engines.length === 0) {
this.log.error({ type: t }, "no registered engines for type");
result.errors.push({
type: t,
code: "NO_ENGINES",
message: `No registered engines support "${t}" search`
});
continue;
}
const { section, usedEngines, errors } = await this._tryEngines(
engines, query, t, limit, proxy, signal
);
result[t] = section;
result.enginesUsed.push(...usedEngines);
result.errors.push(...errors);
}
result.executionTimeMs = Date.now() - startTime;
this.log.info({
executionTimeMs: result.executionTimeMs,
enginesUsed: result.enginesUsed,
errors: result.errors.length,
imageCount: result.image?.results?.length || 0,
webCount: result.web?.results?.length || 0
}, "search completed");
return result;
}
async _tryEngines(engines, query, type, limit, proxy, signal) {
let section = null;
const usedEngines = [];
const errors = [];
for (const engine of engines) {
if (section) break;
this.log.debug({ engine: engine.engineName, type }, "trying engine");
try {
const engineResult = await engine.search(query, {
type,
limit,
proxy,
signal
});
if (engineResult && engineResult.results && engineResult.results.length > 0) {
this.log.info({
engine: engine.engineName,
type,
resultCount: engineResult.results.length
}, "engine returned results");
section = {
engine: engineResult.engine,
total: engineResult.results.length,
results: engineResult.results.map((r, i) => ({
...r,
index: i + 1
}))
};
usedEngines.push(engine.engineName);
} else {
this.log.debug({ engine: engine.engineName, type }, "engine returned 0 results");
}
} catch (err) {
this.log.warn({
engine: engine.engineName,
type,
error: err.message
}, "engine failed");
errors.push({
type,
engine: engine.engineName,
code: "ENGINE_FAILED",
message: err.message
});
usedEngines.push(`${engine.engineName}(failed)`);
}
}
if (!section) {
this.log.warn({ type }, "all engines failed for type");
section = { engine: "none", total: 0, results: [] };
}
return { section, usedEngines, errors };
}
}

23
src/utils/logger.js Normal file
View file

@ -0,0 +1,23 @@
import pino from "pino";
const level = process.env.LOG_LEVEL || "info";
export const logger = pino({
level,
transport: process.stdout.isTTY
? {
target: "pino/file",
options: { colorize: true }
}
: undefined,
formatters: {
level(label) {
return { level: label };
}
},
timestamp: pino.stdTimeFunctions.isoTime
});
export function childLogger(bindings) {
return logger.child(bindings);
}

45
src/utils/retry.js Normal file
View file

@ -0,0 +1,45 @@
export async function withRetry(fn, {
attempts = 3,
baseDelayMs = 1000,
maxDelayMs = 10000,
onRetry = null
} = {}) {
let lastError;
for (let i = 0; i < attempts; i++) {
try {
return await fn(i);
} catch (err) {
lastError = err;
if (i === attempts - 1) break;
const delay = Math.min(
baseDelayMs * Math.pow(2, i) + Math.random() * baseDelayMs,
maxDelayMs
);
if (onRetry) onRetry({ attempt: i + 1, error: err, delayMs: Math.round(delay) });
await new Promise(r => setTimeout(r, delay));
}
}
throw lastError;
}
export function isRetryable(err) {
if (!err) return false;
const msg = String(err.message || err).toLowerCase();
const status = err.status || err.statusCode || 0;
if (status >= 500 && status < 600) return true;
if (status === 429) return true;
if (status === 0) return true;
if (msg.includes('timeout') || msg.includes('timed out')) return true;
if (msg.includes('econnrefused') || msg.includes('econnreset')) return true;
if (msg.includes('etimedout') || msg.includes('enotfound') || msg.includes('eai_again')) return true;
if (msg.includes('network') || msg.includes('socket') || msg.includes('fetch failed')) return true;
if (msg.includes('could not extract token')) return false;
return false;
}

29
src/utils/ua.js Normal file
View file

@ -0,0 +1,29 @@
const DEFAULT_AGENTS = [
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14.5; rv:127.0) Gecko/20100101 Firefox/127.0",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0",
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Mobile/15E148 Safari/604.1"
];
let agents = [...DEFAULT_AGENTS];
let index = 0;
export function setUserAgents(list) {
if (Array.isArray(list) && list.length > 0) {
agents = list;
}
}
export function getRandomUA() {
return agents[Math.floor(Math.random() * agents.length)];
}
export function getNextUA() {
const ua = agents[index % agents.length];
index++;
return ua;
}