feat: initial polysearch - multi-engine search with proxy distribution, circuit breaker, REST API, and metrics
This commit is contained in:
commit
5bd3b9d06b
28 changed files with 2780 additions and 0 deletions
11
.env.example
Normal file
11
.env.example
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# Webshare.io API key for automatic proxy fetching
|
||||
# Get yours at https://www.webshare.io/user/api
|
||||
WEBSHARE_API_KEY=
|
||||
|
||||
# Oxylabs datacenter proxies (dc.oxylabs.io:8000)
|
||||
# https://developers.oxylabs.io/proxies/datacenter-proxies
|
||||
OXYLABS_USERNAME=
|
||||
OXYLABS_PASSWORD=
|
||||
OXYLABS_COUNTRY=US
|
||||
|
||||
# Future: Add other proxy providers here (BrightData, Smartproxy, etc.)
|
||||
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
node_modules/
|
||||
.env
|
||||
config.json
|
||||
proxies.txt
|
||||
*.log
|
||||
281
API.md
Normal file
281
API.md
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
# PolySearch API
|
||||
|
||||
A search API for AI agents. Submit queries, get structured results. Supports single and batch requests.
|
||||
|
||||
**Base URL:** `http://<host>:9876`
|
||||
|
||||
**Auth:** All endpoints except `/health` require a Bearer token.
|
||||
|
||||
```http
|
||||
Authorization: Bearer <api_key>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick reference
|
||||
|
||||
| Endpoint | Method | Auth | Purpose |
|
||||
|----------|--------|------|---------|
|
||||
| `/health` | GET | No | Ping the server |
|
||||
| `/search` | POST | Yes | One query, one response |
|
||||
| `/batch` | POST | Yes | Multiple queries, one response |
|
||||
| `/metrics` | GET | Yes | Proxy pool health and usage |
|
||||
|
||||
---
|
||||
|
||||
## Authentication
|
||||
|
||||
The server operator provides an API key. Include it in every request (except `/health`):
|
||||
|
||||
```bash
|
||||
curl -H "Authorization: Bearer <key>" http://localhost:9876/search ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `GET /health`
|
||||
|
||||
Check if the server is running.
|
||||
|
||||
```bash
|
||||
curl http://localhost:9876/health
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"status": "ok",
|
||||
"uptime": 42.5,
|
||||
"proxyPool": {
|
||||
"total": 11,
|
||||
"alive": 11,
|
||||
"dead": 0,
|
||||
"requestsTotal": 47,
|
||||
"successRate": "91.5%",
|
||||
"hourlyUsage": 12
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `POST /search`
|
||||
|
||||
Single search. Returns either image results, web results, or both.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:9876/search \
|
||||
-H "Authorization: Bearer <key>" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"query": "mars rover", "type": "image", "limit": 3}'
|
||||
```
|
||||
|
||||
### Request
|
||||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `query` | string | yes | — | What to search for |
|
||||
| `type` | string | no | `image` | `image`, `web`, or `both` |
|
||||
| `limit` | number | no | `10` | Max results per type (1–50) |
|
||||
|
||||
### Response — Image results
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"query": "mars rover",
|
||||
"type": "image",
|
||||
"execution_time_ms": 3200,
|
||||
"image": {
|
||||
"engine": "duckduckgo",
|
||||
"total": 3,
|
||||
"results": [
|
||||
{
|
||||
"index": 1,
|
||||
"title": "NASA Perseverance Rover's Stunning Find...",
|
||||
"image_url": "https://scitechdaily.com/images/...jpg",
|
||||
"source_url": "https://scitechdaily.com/...",
|
||||
"domain": "scitechdaily.com",
|
||||
"width": 2560,
|
||||
"height": 1818,
|
||||
"thumbnail": "https://tse4.mm.bing.net/th?id=...",
|
||||
"engine": "duckduckgo"
|
||||
}
|
||||
],
|
||||
"statistics": {
|
||||
"avg_width": 3200,
|
||||
"avg_height": 1989,
|
||||
"domains": {
|
||||
"scitechdaily.com": 1,
|
||||
"static1.simpleflyingimages.com": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Each image result contains:
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `title` | string | Image description or caption |
|
||||
| `image_url` | string | Direct URL to the image file |
|
||||
| `source_url` | string | Page the image was found on |
|
||||
| `domain` | string | Hosting domain |
|
||||
| `width` | number | Image width (pixels) |
|
||||
| `height` | number | Image height (pixels) |
|
||||
| `thumbnail` | string | Thumbnail URL |
|
||||
| `engine` | string | Search engine used |
|
||||
|
||||
### Response — Web results
|
||||
|
||||
When `type` is `web` or `both`:
|
||||
|
||||
```json
|
||||
{
|
||||
"web": {
|
||||
"engine": "duckduckgo",
|
||||
"total": 3,
|
||||
"results": [
|
||||
{
|
||||
"index": 1,
|
||||
"title": "Quantum computing - Wikipedia",
|
||||
"url": "https://en.wikipedia.org/wiki/Quantum_computing",
|
||||
"domain": "en.wikipedia.org",
|
||||
"snippet": "A quantum computer is a computer that exploits quantum mechanical phenomena...",
|
||||
"engine": "duckduckgo"
|
||||
}
|
||||
],
|
||||
"statistics": {
|
||||
"domains": {
|
||||
"en.wikipedia.org": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Response — Error
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"error": {
|
||||
"code": "ENGINE_FAILED",
|
||||
"message": "All search engines returned errors",
|
||||
"type": "Error"
|
||||
},
|
||||
"query": "obscure term",
|
||||
"timestamp": "2026-06-05T10:55:25.088Z"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `POST /batch`
|
||||
|
||||
Run 2–50 queries in a single request. Queries execute concurrently. Results return when all are complete.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:9876/batch \
|
||||
-H "Authorization: Bearer <key>" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"queries": [
|
||||
{"query": "vintage radio", "type": "image", "limit": 2},
|
||||
{"query": "mars rover", "type": "image", "limit": 2},
|
||||
{"query": "aurora borealis","type": "image", "limit": 2}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
### Request
|
||||
|
||||
| Field | Type | Required | Description |
|
||||
|-------|------|----------|-------------|
|
||||
| `queries` | array | yes | 2–50 query objects |
|
||||
|
||||
Each query object:
|
||||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `query` | string | yes | — | Search term |
|
||||
| `type` | string | no | `image` | `image`, `web`, or `both` |
|
||||
| `limit` | number | no | `10` | Max results per type |
|
||||
|
||||
### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"batch_size": 3,
|
||||
"execution_time_ms": 4511,
|
||||
"ok": 3,
|
||||
"fail": 0,
|
||||
"results": [
|
||||
{
|
||||
"index": 0,
|
||||
"query": "vintage radio",
|
||||
"type": "image",
|
||||
"success": true,
|
||||
"execution_time_ms": 4313,
|
||||
"results": { ... full search response ... },
|
||||
"errors": []
|
||||
}
|
||||
],
|
||||
"metrics": {
|
||||
"hourly_usage": 7
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Each result mirrors the single `/search` response. Failed queries include an `error` field instead of `results`.
|
||||
|
||||
Use `batch_size` to verify all queries were accepted, and `ok` / `fail` for a quick success count.
|
||||
|
||||
---
|
||||
|
||||
## `GET /metrics`
|
||||
|
||||
Proxy pool statistics. Useful for monitoring health and utilization.
|
||||
|
||||
```bash
|
||||
curl -H "Authorization: Bearer <key>" http://localhost:9876/metrics
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"metrics": {
|
||||
"totalProxies": 11,
|
||||
"alive": 11,
|
||||
"dead": 0,
|
||||
"circuitOpen": 0,
|
||||
"requestsTotal": 47,
|
||||
"successTotal": 43,
|
||||
"failureTotal": 4,
|
||||
"successRate": "91.5%",
|
||||
"hourlyUsageCurrent": 12,
|
||||
"byProvider": {
|
||||
"webshare": {
|
||||
"proxyCount": 10,
|
||||
"alive": 10,
|
||||
"requests": 47,
|
||||
"success": 43,
|
||||
"failure": 4,
|
||||
"avgLatencyMs": 3604,
|
||||
"successRate": "91.5%",
|
||||
"hourlyUsage": { "currentHour": 12 }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Notes
|
||||
|
||||
- **Rate limits** depend on the proxy provider. The system distributes requests evenly and opens circuit breakers when proxies fail repeatedly.
|
||||
- **Timeouts** vary. Image searches typically take 2–15 seconds depending on query and network conditions.
|
||||
- **Max batch size** is 50 queries per request.
|
||||
115
README.md
Normal file
115
README.md
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
# PolySearch
|
||||
|
||||
Multi-engine web + image search with smart proxy distribution, circuit breakers, structured AI agent output, and a REST API.
|
||||
|
||||
```bash
|
||||
node src/index.js -q "quantum computing" -t both -l 10 -m agent
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
- **Web + Image search** — both result types, one tool
|
||||
- **Smart proxy distribution** — least-used selection per hour, balanced across providers
|
||||
- **Circuit breaker per proxy** — exponential backoff on failure, auto-recovery
|
||||
- **Multi-provider proxy system** — add Webshare, Oxylabs, BrightData in one file each
|
||||
- **Multi-engine architecture** — add Brave, Bing, Google in one file each
|
||||
- **Per-provider metrics** — requests, success rate, latency, hourly usage grouped by provider
|
||||
- **Dual output modes**:
|
||||
- `human` — colorized terminal
|
||||
- `agent` — structured JSON with statistics
|
||||
- **REST API** — single search, batch search, auth with API keys. See [API.md](API.md)
|
||||
|
||||
## Requirements
|
||||
|
||||
Node.js 18+
|
||||
|
||||
## Quick start
|
||||
|
||||
```bash
|
||||
# Single image search
|
||||
node src/index.js -q "vintage radio"
|
||||
|
||||
# Web search
|
||||
node src/index.js -q "quantum computing" -t web
|
||||
|
||||
# Both types, AI agent JSON
|
||||
node src/index.js -q "spacex starship" -t both -l 10 -m agent
|
||||
|
||||
# Show proxy metrics after a search
|
||||
node src/index.js -q "mars rover" -M
|
||||
```
|
||||
|
||||
## CLI
|
||||
|
||||
| Flag | Long | Description | Default |
|
||||
|------|------|-------------|---------|
|
||||
| `-q` | `--query` | Search query | — |
|
||||
| `-t` | `--type` | `web`, `image`, or `both` | `image` |
|
||||
| `-l` | `--limit` | Max results per type | `10` |
|
||||
| `-m` | `--mode` | `human` or `agent` | `human` |
|
||||
| `-p` | `--proxy` | Single proxy URL override | — |
|
||||
| `-c` | `--config` | Path to config file | auto-detect |
|
||||
| `-M` | `--metrics` | Dump proxy pool metrics | — |
|
||||
| | `--serve` | Start REST API server | — |
|
||||
| | `--port` | API server port | `9876` |
|
||||
| | `--generate-key` | Generate API key | — |
|
||||
| `-h` | `--help` | Show help | — |
|
||||
|
||||
## REST API
|
||||
|
||||
For AI agent consumption. See [API.md](API.md) for full documentation.
|
||||
|
||||
```bash
|
||||
node src/index.js --generate-key # create an API key
|
||||
node src/index.js --serve --port 9876 # start the server
|
||||
```
|
||||
|
||||
**Endpoints:** `GET /health`, `POST /search`, `POST /batch`, `GET /metrics`
|
||||
|
||||
---
|
||||
|
||||
## CLI
|
||||
|
||||
Providers are auto-discovered from environment variables:
|
||||
|
||||
| Provider | Env vars | Type |
|
||||
|----------|----------|------|
|
||||
| Webshare | `WEBSHARE_API_KEY` | API-fetched, 10 rotating IPs |
|
||||
| Oxylabs | `OXYLABS_USERNAME`, `OXYLABS_PASSWORD`, `OXYLABS_COUNTRY` | Single datacenter endpoint |
|
||||
|
||||
Add a new provider by creating a file in `src/http/providers/` that calls `registerProvider(name, fetcher)`. The fetcher returns an array of proxy URL strings.
|
||||
|
||||
## Engine architecture
|
||||
|
||||
Engines are registered in `src/engines/setup.js`. Each engine supports `web`, `image`, or both. DuckDuckGo is the default. Add Brave, Bing, or custom engines by implementing the `search(query, opts)` interface.
|
||||
|
||||
## Project structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── index.js # CLI + programmatic API + API server dispatch
|
||||
├── api.js # REST API server (/search, /batch, /metrics, /health)
|
||||
├── api-key.js # Key generation + env storage
|
||||
├── cli.js # Argument parsing
|
||||
├── config.js # Config loader (json + env + providers)
|
||||
├── run.js # Search orchestration + engine fallback
|
||||
├── engines/
|
||||
│ ├── base.js # Abstract engine interface
|
||||
│ ├── index.js # Engine registry
|
||||
│ ├── setup.js # Built-in engine registration
|
||||
│ └── duckduckgo.js # DuckDuckGo (web + image)
|
||||
├── http/
|
||||
│ ├── client.js # Fetch wrapper (proxy, retry, timeout, UA)
|
||||
│ ├── proxy.js # Proxy pool (least-used, circuit breaker, metrics)
|
||||
│ └── providers/
|
||||
│ ├── index.js # Provider registry
|
||||
│ ├── webshare.js # Webshare.io
|
||||
│ └── oxylabs.js # Oxylabs datacenter
|
||||
├── output/
|
||||
│ ├── human.js # Terminal formatting
|
||||
│ └── agent.js # JSON formatting
|
||||
└── utils/
|
||||
├── logger.js # Pino structured logging
|
||||
├── retry.js # Exponential backoff + jitter
|
||||
└── ua.js # User-agent pool
|
||||
```
|
||||
200
benchmark.js
Normal file
200
benchmark.js
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
#!/usr/bin/env node
|
||||
import { loadConfig } from "./src/config.js";
|
||||
import { HttpClient } from "./src/http/client.js";
|
||||
import { ProxyPool } from "./src/http/proxy.js";
|
||||
import { SearchRunner } from "./src/run.js";
|
||||
import { setUserAgents } from "./src/utils/ua.js";
|
||||
import { childLogger } from "./src/utils/logger.js";
|
||||
|
||||
const WEB_QUERIES = [
|
||||
"quantum computing", "machine learning", "renaissance art", "solar system",
|
||||
"ancient rome", "climate change", "python programming", "space exploration",
|
||||
"world war 2", "ocean depth", "artificial intelligence", "mount everest",
|
||||
"greek mythology", "industrial revolution", "human genome", "black holes",
|
||||
"coral reef", "buddhism history", "cold war", "mars colonization",
|
||||
"electric vehicles", "great barrier reef", "dark matter", "dinosaur fossils",
|
||||
"ancient egypt pyramids", "big bang theory", "amazon rainforest",
|
||||
"vitamin deficiency", "stock market crash 1929", "northern lights"
|
||||
];
|
||||
|
||||
const IMAGE_QUERIES = [
|
||||
"vintage radio", "mars rover", "aurora borealis", "mountain landscape",
|
||||
"classic cars", "modern architecture", "street photography", "wild animals",
|
||||
"space nebula", "underwater coral", "sunset beach", "city skyline",
|
||||
"butterfly macro", "starry night sky", "tropical forest", "medieval castle",
|
||||
"abstract art", "vintage motorcycles", "taj mahal", "rainforest waterfall",
|
||||
"northern lights norway", "japanese garden", "safari animals", "galaxy cluster",
|
||||
"old steam train", "desert dunes", "cherry blossom", "iceberg antarctica",
|
||||
"neon city night", "autumn forest path"
|
||||
];
|
||||
|
||||
function generateReport(results) {
|
||||
const total = results.length;
|
||||
const success = results.filter(r => r.success).length;
|
||||
const failed = results.filter(r => !r.success).length;
|
||||
const successRate = (success / total * 100).toFixed(1);
|
||||
|
||||
const byType = {};
|
||||
for (const r of results) {
|
||||
byType[r.type] = byType[r.type] || [];
|
||||
byType[r.type].push(r);
|
||||
}
|
||||
|
||||
console.log("\n" + "=".repeat(80));
|
||||
console.log(" BENCHMARK REPORT");
|
||||
console.log("=".repeat(80));
|
||||
console.log(`\n Total requests: ${total}`);
|
||||
console.log(` Successful: ${success} (${successRate}%)`);
|
||||
console.log(` Failed: ${failed} (${(100 - parseFloat(successRate)).toFixed(1)}%)`);
|
||||
console.log(` Date: ${new Date().toISOString()}`);
|
||||
console.log(` Proxy pool: ${results[0]?.proxyCount || "N/A"} proxies`);
|
||||
|
||||
for (const [type, items] of Object.entries(byType)) {
|
||||
const tSuccess = items.filter(r => r.success).length;
|
||||
const tFailed = items.filter(r => !r.success).length;
|
||||
const times = items.filter(r => r.success).map(r => r.durationMs).sort((a, b) => a - b);
|
||||
const dataSizes = items.filter(r => r.dataSizeKb != null).map(r => r.dataSizeKb).sort((a, b) => a - b);
|
||||
|
||||
console.log(`\n ── ${type.toUpperCase()} (${items.length} requests, ${tSuccess} ok / ${tFailed} fail) ──`);
|
||||
|
||||
if (times.length > 0) {
|
||||
const avg = times.reduce((s, v) => s + v, 0) / times.length;
|
||||
const p50 = times[Math.floor(times.length * 0.5)];
|
||||
const p95 = times[Math.floor(times.length * 0.95)];
|
||||
const p99 = times[Math.floor(times.length * 0.99)];
|
||||
const min = times[0];
|
||||
const max = times[times.length - 1];
|
||||
console.log(` Response time (ms): avg=${avg.toFixed(0)} p50=${p50} p95=${p95} p99=${p99} min=${min} max=${max}`);
|
||||
}
|
||||
|
||||
if (dataSizes.length > 0) {
|
||||
const avg = dataSizes.reduce((s, v) => s + v, 0) / dataSizes.length;
|
||||
const p50 = dataSizes[Math.floor(dataSizes.length * 0.5)];
|
||||
const p95 = dataSizes[Math.floor(dataSizes.length * 0.95)];
|
||||
console.log(` Data size (KB): avg=${avg.toFixed(2)} p50=${p50.toFixed(1)} p95=${p95.toFixed(1)}`);
|
||||
}
|
||||
|
||||
const engineCounts = {};
|
||||
items.filter(r => r.success).forEach(r => {
|
||||
const key = r.engine || "unknown";
|
||||
engineCounts[key] = (engineCounts[key] || 0) + 1;
|
||||
});
|
||||
if (Object.keys(engineCounts).length > 0) {
|
||||
console.log(` Engines used: ${Object.entries(engineCounts).map(([k, v]) => `${k}=${v}`).join(", ")}`);
|
||||
}
|
||||
|
||||
const proxiesUsed = new Set(items.filter(r => r.success).map(r => r.proxyHost).filter(Boolean));
|
||||
const proxyFails = items.filter(r => !r.success).map(r => r.proxyHost).filter(Boolean);
|
||||
if (proxiesUsed.size > 0) {
|
||||
console.log(` Distinct proxies used: ${proxiesUsed.size}`);
|
||||
console.log(` Proxy failures: ${proxyFails.length}`);
|
||||
}
|
||||
}
|
||||
|
||||
const errorTypes = {};
|
||||
for (const r of results) {
|
||||
if (!r.success && r.error) {
|
||||
const key = r.error.substring(0, 60);
|
||||
errorTypes[key] = (errorTypes[key] || 0) + 1;
|
||||
}
|
||||
}
|
||||
if (Object.keys(errorTypes).length > 0) {
|
||||
console.log(`\n ── Error Breakdown ──`);
|
||||
for (const [err, count] of Object.entries(errorTypes).sort((a, b) => b[1] - a[1])) {
|
||||
console.log(` [${count}x] ${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log("=".repeat(80) + "\n");
|
||||
}
|
||||
|
||||
async function runSingleTest(runner, query, type, index) {
|
||||
const start = Date.now();
|
||||
try {
|
||||
const data = await runner.run({ query, type, limit: 3 });
|
||||
const durationMs = Date.now() - start;
|
||||
const results = data[type]?.results || [];
|
||||
return {
|
||||
index,
|
||||
query,
|
||||
type,
|
||||
success: true,
|
||||
durationMs,
|
||||
dataSizeKb: JSON.stringify(data).length / 1024,
|
||||
resultCount: results.length,
|
||||
engine: data[type]?.engine || "none",
|
||||
proxyHost: null,
|
||||
errors: data.errors?.length || 0
|
||||
};
|
||||
} catch (err) {
|
||||
return {
|
||||
index,
|
||||
query,
|
||||
type,
|
||||
success: false,
|
||||
durationMs: Date.now() - start,
|
||||
error: err.message,
|
||||
resultCount: 0,
|
||||
engine: "none",
|
||||
proxyHost: null
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const log = childLogger({ component: "benchmark" });
|
||||
|
||||
log.info({ webCount: WEB_QUERIES.length, imageCount: IMAGE_QUERIES.length }, "starting benchmark");
|
||||
|
||||
const config = await loadConfig();
|
||||
if (config.http.user_agents) setUserAgents(config.http.user_agents);
|
||||
|
||||
const proxyPool = new ProxyPool(config.proxies, config.proxy);
|
||||
const httpClient = new HttpClient(config.http);
|
||||
|
||||
if (config.proxy.enabled) {
|
||||
httpClient.setProxyPool(proxyPool);
|
||||
log.info({ count: config.proxies.length }, "proxy pool attached");
|
||||
proxyPool.logState();
|
||||
}
|
||||
|
||||
const runner = new SearchRunner({ httpClient, config });
|
||||
const allResults = [];
|
||||
|
||||
const queries = [
|
||||
...WEB_QUERIES.map(q => ({ query: q, type: "web" })),
|
||||
...IMAGE_QUERIES.map(q => ({ query: q, type: "image" }))
|
||||
];
|
||||
|
||||
const batchSize = 5;
|
||||
for (let i = 0; i < queries.length; i += batchSize) {
|
||||
const batch = queries.slice(i, i + batchSize);
|
||||
log.info({ batch: Math.floor(i / batchSize) + 1, total: queries.length }, `running batch`);
|
||||
|
||||
const batchResults = await Promise.all(
|
||||
batch.map((item, j) => {
|
||||
const idx = i + j + 1;
|
||||
return runSingleTest(runner, item.query, item.type, idx);
|
||||
})
|
||||
);
|
||||
|
||||
allResults.push(...batchResults);
|
||||
|
||||
for (const r of batchResults) {
|
||||
const icon = r.success ? "✓" : "✗";
|
||||
console.log(`${icon} [${r.type.padEnd(5)}] #${String(r.index).padStart(2)} "${r.query.slice(0, 30).padEnd(30)}" ${r.success ? `${r.durationMs}ms` : `FAIL: ${r.error?.slice(0, 40)}`}`);
|
||||
}
|
||||
}
|
||||
|
||||
const totalDuration = allResults.reduce((s, r) => s + r.durationMs, 0);
|
||||
log.info({
|
||||
totalRequests: allResults.length,
|
||||
totalDurationMs: totalDuration,
|
||||
avgDurationMs: Math.round(totalDuration / allResults.length),
|
||||
successRate: `${(allResults.filter(r => r.success).length / allResults.length * 100).toFixed(1)}%`
|
||||
}, "benchmark complete");
|
||||
|
||||
generateReport(allResults);
|
||||
}
|
||||
|
||||
main();
|
||||
39
config.example.json
Normal file
39
config.example.json
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
{
|
||||
"engines": {
|
||||
"duckduckgo": {
|
||||
"priority": 1,
|
||||
"enabled": true,
|
||||
"timeout_ms": 10000,
|
||||
"retries": 2
|
||||
},
|
||||
"brave": {
|
||||
"priority": 2,
|
||||
"enabled": false,
|
||||
"timeout_ms": 8000,
|
||||
"retries": 1,
|
||||
"api_key": ""
|
||||
}
|
||||
},
|
||||
"proxy": {
|
||||
"enabled": false,
|
||||
"rotation": "round-robin",
|
||||
"health_check_interval_ms": 60000,
|
||||
"max_failures": 3
|
||||
},
|
||||
"http": {
|
||||
"timeout_ms": 10000,
|
||||
"retry_max_attempts": 3,
|
||||
"retry_base_delay_ms": 1000,
|
||||
"retry_max_delay_ms": 10000,
|
||||
"user_agents": [
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
]
|
||||
},
|
||||
"search": {
|
||||
"default_type": "image",
|
||||
"default_limit": 10,
|
||||
"max_limit": 50
|
||||
}
|
||||
}
|
||||
182
package-lock.json
generated
Normal file
182
package-lock.json
generated
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
{
|
||||
"name": "image-search",
|
||||
"version": "2.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "image-search",
|
||||
"version": "2.0.0",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"dotenv": "^17.4.2",
|
||||
"pino": "^10.3.1",
|
||||
"undici": "^7.27.1"
|
||||
},
|
||||
"bin": {
|
||||
"image-search": "src/index.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@pinojs/redact": {
|
||||
"version": "0.4.0",
|
||||
"resolved": "https://registry.npmjs.org/@pinojs/redact/-/redact-0.4.0.tgz",
|
||||
"integrity": "sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/atomic-sleep": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/atomic-sleep/-/atomic-sleep-1.0.0.tgz",
|
||||
"integrity": "sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/dotenv": {
|
||||
"version": "17.4.2",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.4.2.tgz",
|
||||
"integrity": "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw==",
|
||||
"license": "BSD-2-Clause",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://dotenvx.com"
|
||||
}
|
||||
},
|
||||
"node_modules/on-exit-leak-free": {
|
||||
"version": "2.1.2",
|
||||
"resolved": "https://registry.npmjs.org/on-exit-leak-free/-/on-exit-leak-free-2.1.2.tgz",
|
||||
"integrity": "sha512-0eJJY6hXLGf1udHwfNftBqH+g73EU4B504nZeKpz1sYRKafAghwxEJunB2O7rDZkL4PGfsMVnTXZ2EjibbqcsA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/pino": {
|
||||
"version": "10.3.1",
|
||||
"resolved": "https://registry.npmjs.org/pino/-/pino-10.3.1.tgz",
|
||||
"integrity": "sha512-r34yH/GlQpKZbU1BvFFqOjhISRo1MNx1tWYsYvmj6KIRHSPMT2+yHOEb1SG6NMvRoHRF0a07kCOox/9yakl1vg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@pinojs/redact": "^0.4.0",
|
||||
"atomic-sleep": "^1.0.0",
|
||||
"on-exit-leak-free": "^2.1.0",
|
||||
"pino-abstract-transport": "^3.0.0",
|
||||
"pino-std-serializers": "^7.0.0",
|
||||
"process-warning": "^5.0.0",
|
||||
"quick-format-unescaped": "^4.0.3",
|
||||
"real-require": "^0.2.0",
|
||||
"safe-stable-stringify": "^2.3.1",
|
||||
"sonic-boom": "^4.0.1",
|
||||
"thread-stream": "^4.0.0"
|
||||
},
|
||||
"bin": {
|
||||
"pino": "bin.js"
|
||||
}
|
||||
},
|
||||
"node_modules/pino-abstract-transport": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/pino-abstract-transport/-/pino-abstract-transport-3.0.0.tgz",
|
||||
"integrity": "sha512-wlfUczU+n7Hy/Ha5j9a/gZNy7We5+cXp8YL+X+PG8S0KXxw7n/JXA3c46Y0zQznIJ83URJiwy7Lh56WLokNuxg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"split2": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/pino-std-serializers": {
|
||||
"version": "7.1.0",
|
||||
"resolved": "https://registry.npmjs.org/pino-std-serializers/-/pino-std-serializers-7.1.0.tgz",
|
||||
"integrity": "sha512-BndPH67/JxGExRgiX1dX0w1FvZck5Wa4aal9198SrRhZjH3GxKQUKIBnYJTdj2HDN3UQAS06HlfcSbQj2OHmaw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/process-warning": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/process-warning/-/process-warning-5.0.0.tgz",
|
||||
"integrity": "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/fastify"
|
||||
},
|
||||
{
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/fastify"
|
||||
}
|
||||
],
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/quick-format-unescaped": {
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/quick-format-unescaped/-/quick-format-unescaped-4.0.4.tgz",
|
||||
"integrity": "sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/real-require": {
|
||||
"version": "0.2.0",
|
||||
"resolved": "https://registry.npmjs.org/real-require/-/real-require-0.2.0.tgz",
|
||||
"integrity": "sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 12.13.0"
|
||||
}
|
||||
},
|
||||
"node_modules/safe-stable-stringify": {
|
||||
"version": "2.5.0",
|
||||
"resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz",
|
||||
"integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/sonic-boom": {
|
||||
"version": "4.2.1",
|
||||
"resolved": "https://registry.npmjs.org/sonic-boom/-/sonic-boom-4.2.1.tgz",
|
||||
"integrity": "sha512-w6AxtubXa2wTXAUsZMMWERrsIRAdrK0Sc+FUytWvYAhBJLyuI4llrMIC1DtlNSdI99EI86KZum2MMq3EAZlF9Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"atomic-sleep": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/split2": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz",
|
||||
"integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">= 10.x"
|
||||
}
|
||||
},
|
||||
"node_modules/thread-stream": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/thread-stream/-/thread-stream-4.2.0.tgz",
|
||||
"integrity": "sha512-e2zZ96wSChazBsbENf/Pcm/4swHt2cEKQ92rhUjkL9GCKiTDJIaTBenjE/m9DXi0QBmTMDkFDdOomUy20A1tDQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"real-require": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
}
|
||||
},
|
||||
"node_modules/thread-stream/node_modules/real-require": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/real-require/-/real-require-1.0.0.tgz",
|
||||
"integrity": "sha512-P4nbQYQfePJxRSmY+v/KINxVucm4NF3p3s7pJveMTtom52FR4YGltUQLB8idDXwDDWW+eYrWDFbuzUnjoWHF7g==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/undici": {
|
||||
"version": "7.27.1",
|
||||
"resolved": "https://registry.npmjs.org/undici/-/undici-7.27.1.tgz",
|
||||
"integrity": "sha512-UDdpiex+mzigiyrXrGbiUaF4HzTNhKbh2vRNFaTMzcqmLIPrZxaCtwo/1TMSuWoM1Xz3WiTo9KdgI3kRqYzJGg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=20.18.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
39
package.json
Normal file
39
package.json
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
{
|
||||
"name": "polysearch",
|
||||
"version": "2.0.0",
|
||||
"description": "Multi-engine web + image search with proxy rotation, circuit breaker, and structured AI agent output",
|
||||
"type": "module",
|
||||
"main": "src/index.js",
|
||||
"bin": {
|
||||
"polysearch": "src/index.js"
|
||||
},
|
||||
"scripts": {
|
||||
"search": "node src/index.js",
|
||||
"agent": "node src/index.js --mode agent",
|
||||
"human": "node src/index.js --mode human",
|
||||
"metrics": "node src/index.js -M",
|
||||
"help": "node src/index.js --help"
|
||||
},
|
||||
"keywords": [
|
||||
"polysearch",
|
||||
"web-search",
|
||||
"image-search",
|
||||
"multi-engine",
|
||||
"proxy",
|
||||
"circuit-breaker",
|
||||
"ai-agent",
|
||||
"duckduckgo",
|
||||
"oxylabs",
|
||||
"webshare"
|
||||
],
|
||||
"author": "",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"dotenv": "^17.4.2",
|
||||
"pino": "^10.3.1",
|
||||
"undici": "^7.27.1"
|
||||
}
|
||||
}
|
||||
51
src/api-key.js
Normal file
51
src/api-key.js
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
import { randomBytes } from "node:crypto";
|
||||
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { resolve } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { dirname } from "node:path";
|
||||
|
||||
const PROJECT_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "..");
|
||||
const ENV_PATH = resolve(PROJECT_ROOT, ".env");
|
||||
|
||||
export function generateKey() {
|
||||
return randomBytes(32).toString("hex");
|
||||
}
|
||||
|
||||
export function saveKeyToEnv(key) {
|
||||
if (!existsSync(ENV_PATH)) {
|
||||
writeFileSync(ENV_PATH, `POLYSEARCH_API_KEY=${key}\n`);
|
||||
return { path: ENV_PATH, created: true };
|
||||
}
|
||||
|
||||
const raw = readFileSync(ENV_PATH, "utf-8");
|
||||
const lines = raw.split("\n");
|
||||
let replaced = false;
|
||||
|
||||
const updated = lines.map(line => {
|
||||
if (line.startsWith("POLYSEARCH_API_KEY=")) {
|
||||
replaced = true;
|
||||
return `POLYSEARCH_API_KEY=${key}`;
|
||||
}
|
||||
return line;
|
||||
});
|
||||
|
||||
if (!replaced) {
|
||||
updated.push(`POLYSEARCH_API_KEY=${key}`);
|
||||
}
|
||||
|
||||
writeFileSync(ENV_PATH, updated.join("\n"));
|
||||
return { path: ENV_PATH, created: !replaced };
|
||||
}
|
||||
|
||||
export function loadApiKey() {
|
||||
return process.env.POLYSEARCH_API_KEY || null;
|
||||
}
|
||||
|
||||
export function requireApiKey() {
|
||||
const key = loadApiKey();
|
||||
if (!key) {
|
||||
console.error("No API key found. Generate one with: node src/index.js --generate-key");
|
||||
process.exit(1);
|
||||
}
|
||||
return key;
|
||||
}
|
||||
242
src/api.js
Normal file
242
src/api.js
Normal file
|
|
@ -0,0 +1,242 @@
|
|||
import { createServer } from "node:http";
|
||||
import { loadConfig } from "./config.js";
|
||||
import { HttpClient } from "./http/client.js";
|
||||
import { ProxyPool } from "./http/proxy.js";
|
||||
import { SearchRunner } from "./run.js";
|
||||
import { setUserAgents } from "./utils/ua.js";
|
||||
import { loadApiKey } from "./api-key.js";
|
||||
import { logger, childLogger } from "./utils/logger.js";
|
||||
import { formatSearchResponse, formatErrorResponse } from "./output/agent.js";
|
||||
|
||||
const log = childLogger({ component: "api-server" });
|
||||
|
||||
function unauthorized(res, msg = "Unauthorized") {
|
||||
res.writeHead(401, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({ success: false, error: { code: "UNAUTHORIZED", message: msg } }));
|
||||
}
|
||||
|
||||
function badRequest(res, msg) {
|
||||
res.writeHead(400, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({ success: false, error: { code: "BAD_REQUEST", message: msg } }));
|
||||
}
|
||||
|
||||
function serverError(res, msg) {
|
||||
if (res.headersSent) return;
|
||||
res.writeHead(500, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({ success: false, error: { code: "INTERNAL_ERROR", message: msg } }));
|
||||
}
|
||||
|
||||
function parseBody(req) {
|
||||
return new Promise((resolve, reject) => {
|
||||
let data = "";
|
||||
req.on("data", chunk => data += chunk);
|
||||
req.on("end", () => {
|
||||
try { resolve(JSON.parse(data)); }
|
||||
catch { reject(new Error("Invalid JSON")); }
|
||||
});
|
||||
req.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
function authenticate(req) {
|
||||
const apiKey = loadApiKey();
|
||||
if (!apiKey) return true;
|
||||
|
||||
const auth = req.headers["authorization"] || "";
|
||||
const token = auth.startsWith("Bearer ") ? auth.slice(7) : "";
|
||||
return token === apiKey;
|
||||
}
|
||||
|
||||
export async function startServer(port = 9876) {
|
||||
const config = await loadConfig();
|
||||
if (config.http.user_agents) setUserAgents(config.http.user_agents);
|
||||
|
||||
const httpClient = new HttpClient(config.http);
|
||||
const proxyPool = new ProxyPool(config.proxies, config.proxy);
|
||||
|
||||
if (config.proxy.enabled) {
|
||||
httpClient.setProxyPool(proxyPool);
|
||||
log.info({ proxyCount: config.proxies.length }, "proxy pool attached to API server");
|
||||
}
|
||||
|
||||
const runner = new SearchRunner({ httpClient, config });
|
||||
|
||||
const server = createServer(async (req, res) => {
|
||||
res.setHeader("Access-Control-Allow-Origin", "*");
|
||||
res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
|
||||
res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
|
||||
|
||||
if (req.method === "OPTIONS") {
|
||||
res.writeHead(204);
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const url = new URL(req.url, `http://${req.headers.host || "localhost"}`);
|
||||
const path = url.pathname;
|
||||
|
||||
// Health check — no auth required
|
||||
if (path === "/health" && req.method === "GET") {
|
||||
res.writeHead(200, { "Content-Type": "application/json" });
|
||||
const m = proxyPool.getMetrics();
|
||||
res.end(JSON.stringify({
|
||||
success: true,
|
||||
status: "ok",
|
||||
uptime: process.uptime(),
|
||||
proxyCount: config.proxies.length,
|
||||
proxyPool: {
|
||||
total: m.totalProxies,
|
||||
alive: m.alive,
|
||||
dead: m.dead,
|
||||
circuitOpen: m.circuitOpen,
|
||||
requestsTotal: m.requestsTotal,
|
||||
successRate: m.successRate,
|
||||
hourlyUsage: m.hourlyUsageCurrent
|
||||
}
|
||||
}));
|
||||
return;
|
||||
}
|
||||
|
||||
// Metrics — requires auth
|
||||
if (path === "/metrics" && req.method === "GET") {
|
||||
if (!authenticate(req)) return unauthorized(res);
|
||||
res.writeHead(200, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({
|
||||
success: true,
|
||||
metrics: proxyPool.getMetrics(),
|
||||
proxies: proxyPool.getProxyDetail()
|
||||
}));
|
||||
return;
|
||||
}
|
||||
|
||||
// Auth required for search endpoints
|
||||
if (!authenticate(req)) return unauthorized(res);
|
||||
|
||||
if (path === "/search" && req.method === "POST") {
|
||||
const body = await parseBody(req);
|
||||
const { query, type = "image", limit = 10 } = body;
|
||||
if (!query) return badRequest(res, "Missing 'query' field");
|
||||
|
||||
const start = Date.now();
|
||||
const data = await runner.run({ query, type, limit });
|
||||
const response = JSON.parse(formatSearchResponse(data));
|
||||
response.execution_time_ms = Date.now() - start;
|
||||
|
||||
res.writeHead(data.image?.results?.length > 0 || data.web?.results?.length > 0 ? 200 : 404, {
|
||||
"Content-Type": "application/json"
|
||||
});
|
||||
res.end(JSON.stringify(response));
|
||||
return;
|
||||
}
|
||||
|
||||
if (path === "/batch" && req.method === "POST") {
|
||||
const body = await parseBody(req);
|
||||
const { queries = [] } = body;
|
||||
|
||||
if (!Array.isArray(queries) || queries.length === 0) {
|
||||
return badRequest(res, "Missing or empty 'queries' array");
|
||||
}
|
||||
if (queries.length > 50) {
|
||||
return badRequest(res, "Maximum 50 queries per batch");
|
||||
}
|
||||
|
||||
const logBatch = childLogger({ component: "batch", batchSize: queries.length });
|
||||
|
||||
const batchStart = Date.now();
|
||||
logBatch.info("batch started");
|
||||
|
||||
const batchResults = await Promise.allSettled(
|
||||
queries.map(async (q, i) => {
|
||||
const qStart = Date.now();
|
||||
try {
|
||||
const data = await runner.run({
|
||||
query: q.query || "",
|
||||
type: q.type || "image",
|
||||
limit: q.limit || 10
|
||||
});
|
||||
return {
|
||||
index: i,
|
||||
query: q.query,
|
||||
type: q.type || "image",
|
||||
success: true,
|
||||
execution_time_ms: Date.now() - qStart,
|
||||
results: data,
|
||||
errors: data.errors || []
|
||||
};
|
||||
} catch (err) {
|
||||
return {
|
||||
index: i,
|
||||
query: q.query,
|
||||
type: q.type || "image",
|
||||
success: false,
|
||||
execution_time_ms: Date.now() - qStart,
|
||||
error: err.message
|
||||
};
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
const totalMs = Date.now() - batchStart;
|
||||
const ok = batchResults.filter(r => r.status === "fulfilled" && r.value.success).length;
|
||||
const fail = batchResults.filter(r => r.status === "fulfilled" && !r.value.success).length;
|
||||
|
||||
logBatch.info({ total: queries.length, ok, fail, totalMs }, "batch completed");
|
||||
|
||||
res.writeHead(200, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({
|
||||
success: true,
|
||||
batch_size: queries.length,
|
||||
execution_time_ms: totalMs,
|
||||
ok,
|
||||
fail,
|
||||
results: batchResults.map(r => r.status === "fulfilled" ? r.value : {
|
||||
index: -1,
|
||||
query: "unknown",
|
||||
success: false,
|
||||
error: r.reason?.message || "Promise rejected"
|
||||
}),
|
||||
metrics: {
|
||||
pool: proxyPool.getMetrics(),
|
||||
hourly_usage: proxyPool.getMetrics().hourlyUsageCurrent
|
||||
}
|
||||
}));
|
||||
return;
|
||||
}
|
||||
|
||||
// 404
|
||||
res.writeHead(404, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({ success: false, error: { code: "NOT_FOUND", message: `No endpoint: ${req.method} ${path}` } }));
|
||||
|
||||
} catch (err) {
|
||||
log.error({ error: err.message }, "API error");
|
||||
serverError(res, err.message);
|
||||
}
|
||||
});
|
||||
|
||||
server.listen(port, () => {
|
||||
log.info({ port }, "API server started");
|
||||
console.log(`\n PolySearch API running on http://localhost:${port}`);
|
||||
console.log(` Health: http://localhost:${port}/health`);
|
||||
console.log(` Search: POST http://localhost:${port}/search`);
|
||||
console.log(` Batch: POST http://localhost:${port}/batch`);
|
||||
console.log(` Metrics: GET http://localhost:${port}/metrics`);
|
||||
const key = loadApiKey();
|
||||
if (key) {
|
||||
console.log(` Auth: Authorization: Bearer <key>`);
|
||||
console.log(` Key: ${key.substring(0, 8)}...${key.slice(-4)}\n`);
|
||||
} else {
|
||||
console.log(` Auth: none (no API key configured)\n`);
|
||||
}
|
||||
});
|
||||
|
||||
const shutdown = () => {
|
||||
log.info("shutting down API server");
|
||||
proxyPool.destroy();
|
||||
server.close(() => process.exit(0));
|
||||
};
|
||||
process.on("SIGINT", shutdown);
|
||||
process.on("SIGTERM", shutdown);
|
||||
|
||||
return server;
|
||||
}
|
||||
50
src/cli.js
Normal file
50
src/cli.js
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
import { parseArgs } from "node:util";
|
||||
|
||||
export function parseCliArgs() {
|
||||
const { values, positionals } = parseArgs({
|
||||
args: process.argv.slice(2),
|
||||
options: {
|
||||
query: { type: "string", short: "q" },
|
||||
type: { type: "string", short: "t", default: "image" },
|
||||
limit: { type: "string", short: "l", default: "10" },
|
||||
mode: { type: "string", short: "m", default: "human" },
|
||||
config: { type: "string", short: "c" },
|
||||
proxy: { type: "string", short: "p" },
|
||||
metrics: { type: "boolean", short: "M", default: false },
|
||||
"generate-key": { type: "boolean", default: false },
|
||||
serve: { type: "boolean", default: false },
|
||||
port: { type: "string", default: "9876" },
|
||||
help: { type: "boolean", short: "h", default: false }
|
||||
},
|
||||
strict: false,
|
||||
allowPositionals: true
|
||||
});
|
||||
|
||||
const type = (values.type || "image").toLowerCase();
|
||||
const mode = (values.mode || "human").toLowerCase();
|
||||
|
||||
if (!["web", "image", "both"].includes(type)) {
|
||||
console.error(`Invalid type "${values.type}". Must be: web, image, or both`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (!["human", "agent"].includes(mode)) {
|
||||
console.error(`Invalid mode "${values.mode}". Must be: human or agent`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
return {
|
||||
query: values.query || null,
|
||||
type,
|
||||
limit: Math.min(parseInt(values.limit, 10) || 10, 50),
|
||||
mode,
|
||||
configPath: values.config || null,
|
||||
proxy: values.proxy || null,
|
||||
metrics: values.metrics,
|
||||
generateKey: values["generate-key"],
|
||||
serve: values.serve,
|
||||
port: parseInt(values.port, 10) || 9876,
|
||||
help: values.help,
|
||||
positionals
|
||||
};
|
||||
}
|
||||
134
src/config.js
Normal file
134
src/config.js
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
import dotenv from "dotenv";
|
||||
import { existsSync, readFileSync } from "node:fs";
|
||||
import { resolve, dirname } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { homedir } from "node:os";
|
||||
import { childLogger } from "./utils/logger.js";
|
||||
import { fetchAllProxies, listProviders } from "./http/providers/index.js";
|
||||
import "./http/providers/webshare.js";
|
||||
import "./http/providers/oxylabs.js";
|
||||
|
||||
const PROJECT_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "..");
|
||||
|
||||
dotenv.config({ path: resolve(PROJECT_ROOT, ".env") });
|
||||
|
||||
const DEFAULTS = {
|
||||
engines: {
|
||||
duckduckgo: { priority: 1, enabled: true, timeout_ms: 10000, retries: 2 }
|
||||
},
|
||||
proxies: [],
|
||||
proxy: {
|
||||
enabled: false,
|
||||
rotation: "round-robin",
|
||||
health_check_interval_ms: 60000,
|
||||
max_failures: 3
|
||||
},
|
||||
http: {
|
||||
timeout_ms: 10000,
|
||||
retry_max_attempts: 3,
|
||||
retry_base_delay_ms: 1000,
|
||||
retry_max_delay_ms: 10000,
|
||||
user_agents: []
|
||||
},
|
||||
search: {
|
||||
default_type: "image",
|
||||
default_limit: 10,
|
||||
max_limit: 50
|
||||
}
|
||||
};
|
||||
|
||||
function mergeDeep(target, source) {
|
||||
const result = { ...target };
|
||||
for (const key of Object.keys(source)) {
|
||||
if (source[key] && typeof source[key] === "object" && !Array.isArray(source[key])) {
|
||||
result[key] = mergeDeep(target[key] || {}, source[key]);
|
||||
} else {
|
||||
result[key] = source[key];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function findConfig() {
|
||||
const paths = [
|
||||
process.env.POLYSEARCH_CONFIG || process.env.IMAGE_SEARCH_CONFIG,
|
||||
resolve(PROJECT_ROOT, "config.json"),
|
||||
resolve(PROJECT_ROOT, "config.example.json"),
|
||||
resolve(homedir(), ".polysearch.json"),
|
||||
resolve(homedir(), ".image-search.json")
|
||||
];
|
||||
|
||||
for (const p of paths) {
|
||||
if (p && existsSync(p)) return p;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function loadProxyFile() {
|
||||
const proxyFile = resolve(PROJECT_ROOT, "proxies.txt");
|
||||
if (!existsSync(proxyFile)) return [];
|
||||
|
||||
const raw = readFileSync(proxyFile, "utf-8");
|
||||
const proxies = [];
|
||||
|
||||
for (const line of raw.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed || trimmed.startsWith("#")) continue;
|
||||
|
||||
const parts = trimmed.split(":");
|
||||
if (parts.length === 4) {
|
||||
const [host, port, user, pass] = parts;
|
||||
proxies.push(`http://${encodeURIComponent(user)}:${encodeURIComponent(pass)}@${host}:${port}`);
|
||||
} else if (parts.length === 2) {
|
||||
proxies.push(`http://${parts[0]}:${parts[1]}`);
|
||||
}
|
||||
}
|
||||
|
||||
return proxies;
|
||||
}
|
||||
|
||||
export async function loadConfig(configPath) {
|
||||
const finalPath = configPath || findConfig();
|
||||
let config = { ...DEFAULTS };
|
||||
|
||||
if (finalPath && existsSync(finalPath)) {
|
||||
try {
|
||||
const raw = readFileSync(finalPath, "utf-8");
|
||||
const parsed = JSON.parse(raw);
|
||||
config = mergeDeep(config, parsed);
|
||||
} catch (err) {
|
||||
console.error(`Warning: Failed to load config from ${finalPath}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
const log = childLogger({ component: "config" });
|
||||
|
||||
const [providerProxies, fileProxies] = await Promise.all([
|
||||
fetchAllProxies().catch(err => {
|
||||
log.warn({ error: err.message }, "all proxy providers failed");
|
||||
return [];
|
||||
}),
|
||||
Promise.resolve(loadProxyFile())
|
||||
]);
|
||||
|
||||
const allProxies = [...new Set([...providerProxies, ...fileProxies, ...config.proxies])];
|
||||
|
||||
if (allProxies.length > 0) {
|
||||
config.proxies = allProxies;
|
||||
if (!config.proxy.enabled) {
|
||||
config.proxy.enabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
config.http.user_agents = config.http.user_agents.length > 0
|
||||
? config.http.user_agents
|
||||
: null;
|
||||
|
||||
log.info({
|
||||
providers: listProviders().map(p => p.name),
|
||||
proxyCount: allProxies.length,
|
||||
proxyEnabled: config.proxy.enabled
|
||||
}, "config loaded");
|
||||
|
||||
return config;
|
||||
}
|
||||
25
src/engines/base.js
Normal file
25
src/engines/base.js
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
export class SearchEngine {
|
||||
static name = "base";
|
||||
static supports = [];
|
||||
static priority = 10;
|
||||
|
||||
constructor(httpClient) {
|
||||
this.http = httpClient;
|
||||
}
|
||||
|
||||
async search(query, options = {}) {
|
||||
throw new Error("search() must be implemented by subclass");
|
||||
}
|
||||
|
||||
get supportedTypes() {
|
||||
return this.constructor.supports;
|
||||
}
|
||||
|
||||
get engineName() {
|
||||
return this.constructor.name;
|
||||
}
|
||||
|
||||
get priority() {
|
||||
return this.constructor.priority;
|
||||
}
|
||||
}
|
||||
177
src/engines/duckduckgo.js
Normal file
177
src/engines/duckduckgo.js
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
import { SearchEngine } from "./base.js";
|
||||
|
||||
export class DuckDuckGo extends SearchEngine {
|
||||
static name = "duckduckgo";
|
||||
static supports = ["web", "image"];
|
||||
static priority = 1;
|
||||
|
||||
async search(query, options = {}) {
|
||||
const { type = "image", limit = 10, proxy = null, signal = null } = options;
|
||||
|
||||
if (type === "web") {
|
||||
return this._webSearch(query, limit, proxy, signal);
|
||||
}
|
||||
return this._imageSearch(query, limit, proxy, signal);
|
||||
}
|
||||
|
||||
async _webSearch(query, limit, proxy, signal) {
|
||||
const errors = [];
|
||||
|
||||
for (const strategy of [this._webViaLite.bind(this), this._webViaHtml.bind(this)]) {
|
||||
try {
|
||||
const result = await strategy(query, limit, proxy, signal);
|
||||
if (result && result.results && result.results.length > 0) {
|
||||
this.http.log.debug({ strategy: strategy.name, count: result.results.length }, "web search strategy succeeded");
|
||||
return result;
|
||||
}
|
||||
} catch (err) {
|
||||
const label = strategy.name || "unknown";
|
||||
this.http.log.warn({ strategy: label, error: err.message }, "web search strategy failed");
|
||||
errors.push({ strategy: label, error: err.message });
|
||||
}
|
||||
}
|
||||
|
||||
this.http.log.warn({ strategies: errors.length, query }, "all web search strategies exhausted");
|
||||
|
||||
if (errors.some(e => e.error.includes("RATE_LIMITED"))) {
|
||||
throw new Error(`RATE_LIMITED: DuckDuckGo rejected all strategies. ${errors.map(e => e.error).join("; ")}`);
|
||||
}
|
||||
|
||||
return { results: [], engine: "duckduckgo", type: "web", total: 0 };
|
||||
}
|
||||
|
||||
async _webViaLite(query, limit, proxy, signal) {
|
||||
const html = await this.http.fetch(
|
||||
`https://lite.duckduckgo.com/lite/?q=${encodeURIComponent(query)}`,
|
||||
{ responseType: "text", proxy, signal, retries: 1, timeoutMs: 8000 }
|
||||
);
|
||||
|
||||
const results = [];
|
||||
const linkRegex = /<a[^>]*rel="nofollow"[^>]*href="([^"]*)"[^>]*class='result-link'[^>]*>([\s\S]*?)<\/a>/gi;
|
||||
const snippetRegex = /<td class='result-snippet'>([\s\S]*?)<\/td>/gi;
|
||||
const linkTextRegex = /<span class='link-text'>([\s\S]*?)<\/span>/gi;
|
||||
|
||||
const links = [...html.matchAll(linkRegex)];
|
||||
const snippets = [...html.matchAll(snippetRegex)];
|
||||
const urls = [...html.matchAll(linkTextRegex)];
|
||||
|
||||
for (let i = 0; i < links.length && results.length < limit; i++) {
|
||||
let rawUrl = decodeEntities(links[i][1].trim());
|
||||
const title = decodeEntities(stripTags(links[i][2]).trim());
|
||||
const snippet = snippets[i] ? decodeEntities(stripTags(snippets[i][1]).trim()) : "";
|
||||
const displayUrl = urls[i] ? decodeEntities(urls[i][1].trim()) : "";
|
||||
|
||||
const uddgMatch = rawUrl.match(/uddg=([^&]+)/);
|
||||
let url = uddgMatch ? decodeURIComponent(uddgMatch[1]) : rawUrl;
|
||||
|
||||
if (url.startsWith("//")) url = "https:" + url;
|
||||
|
||||
let domain = "unknown";
|
||||
try { domain = new URL(url).hostname; } catch {}
|
||||
if (domain === "unknown" && displayUrl) {
|
||||
try { domain = new URL("https://" + displayUrl).hostname; } catch {}
|
||||
}
|
||||
|
||||
results.push({ title, url, domain, snippet, engine: "duckduckgo" });
|
||||
}
|
||||
|
||||
return { results, engine: "duckduckgo", type: "web", total: results.length };
|
||||
}
|
||||
|
||||
async _webViaHtml(query, limit, proxy, signal) {
|
||||
const html = await this.http.fetch(
|
||||
`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`,
|
||||
{ responseType: "text", proxy, signal, retries: 2, timeoutMs: 10000 }
|
||||
);
|
||||
|
||||
const results = [];
|
||||
const resultBlocks = html.split('<div class="result results_links');
|
||||
resultBlocks.shift();
|
||||
|
||||
for (const block of resultBlocks) {
|
||||
if (results.length >= limit) break;
|
||||
|
||||
const titleMatch = block.match(/<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/i);
|
||||
if (!titleMatch) continue;
|
||||
|
||||
const snippetMatch = block.match(/<a[^>]*class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/a>/i);
|
||||
|
||||
let rawUrl = decodeEntities(titleMatch[1].trim());
|
||||
const title = decodeEntities(stripTags(titleMatch[2]).trim());
|
||||
const snippet = snippetMatch ? decodeEntities(stripTags(snippetMatch[1]).trim()) : "";
|
||||
|
||||
const uddgMatch = rawUrl.match(/uddg=([^&]+)/);
|
||||
let url = uddgMatch ? decodeURIComponent(uddgMatch[1]) : rawUrl;
|
||||
if (url.startsWith("//")) url = "https:" + url;
|
||||
|
||||
let domain = "unknown";
|
||||
try { domain = new URL(url).hostname; } catch {}
|
||||
|
||||
results.push({ title, url, domain, snippet, engine: "duckduckgo" });
|
||||
}
|
||||
|
||||
return { results, engine: "duckduckgo", type: "web", total: results.length };
|
||||
}
|
||||
|
||||
async _imageSearch(query, limit, proxy, signal) {
|
||||
const initHtml = await this.http.fetch(
|
||||
`https://duckduckgo.com/?q=${encodeURIComponent(query)}&iax=images&ia=images`,
|
||||
{ responseType: "text", proxy, signal }
|
||||
);
|
||||
|
||||
const vqdMatch = initHtml.match(/vqd\s*=\s*['"]([^'"]+)['"]/);
|
||||
if (!vqdMatch) {
|
||||
throw new Error("Could not extract token (vqd) from DuckDuckGo");
|
||||
}
|
||||
|
||||
const data = await this.http.fetch(
|
||||
`https://duckduckgo.com/i.js?q=${encodeURIComponent(query)}&o=json&vqd=${vqdMatch[1]}&f=,,,`,
|
||||
{ proxy, signal }
|
||||
);
|
||||
|
||||
const rawResults = data.results || [];
|
||||
const results = rawResults.slice(0, limit).map((item) => {
|
||||
let domain = "unknown";
|
||||
try { domain = new URL(item.image).hostname; }
|
||||
catch {
|
||||
if (item.url) {
|
||||
try { domain = new URL(item.url).hostname; } catch {}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
title: item.title || "",
|
||||
image_url: item.image || "",
|
||||
source_url: item.url || "",
|
||||
domain,
|
||||
width: item.width || null,
|
||||
height: item.height || null,
|
||||
thumbnail: item.thumbnail || null,
|
||||
engine: "duckduckgo"
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
results,
|
||||
engine: "duckduckgo",
|
||||
type: "image",
|
||||
total: results.length
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function decodeEntities(str) {
|
||||
return str
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/'/g, "'")
|
||||
.replace(///g, "/")
|
||||
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(code));
|
||||
}
|
||||
|
||||
function stripTags(str) {
|
||||
return str.replace(/<[^>]*>/g, "").trim();
|
||||
}
|
||||
24
src/engines/index.js
Normal file
24
src/engines/index.js
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
const registry = [];
|
||||
|
||||
export function registerEngine(EngineClass) {
|
||||
registry.push(EngineClass);
|
||||
}
|
||||
|
||||
export function getEngines(type, httpClient) {
|
||||
return registry
|
||||
.filter(Engine => Engine.supports.includes(type))
|
||||
.sort((a, b) => a.priority - b.priority)
|
||||
.map(Engine => new Engine(httpClient));
|
||||
}
|
||||
|
||||
export function getEngineNames() {
|
||||
return registry.map(Engine => ({
|
||||
name: Engine.name,
|
||||
supports: Engine.supports,
|
||||
priority: Engine.priority
|
||||
}));
|
||||
}
|
||||
|
||||
export function clearEngines() {
|
||||
registry.length = 0;
|
||||
}
|
||||
17
src/engines/setup.js
Normal file
17
src/engines/setup.js
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
import { DuckDuckGo } from "./duckduckgo.js";
|
||||
import { registerEngine, getEngines, getEngineNames, clearEngines } from "./index.js";
|
||||
|
||||
registerEngine(DuckDuckGo);
|
||||
|
||||
export function createEngines(httpClient) {
|
||||
return {
|
||||
web: () => getEngines("web", httpClient),
|
||||
image: () => getEngines("image", httpClient),
|
||||
all: () => ({
|
||||
web: getEngines("web", httpClient),
|
||||
image: getEngines("image", httpClient)
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
export { getEngineNames, clearEngines, registerEngine };
|
||||
189
src/http/client.js
Normal file
189
src/http/client.js
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
import { ProxyAgent } from "undici";
|
||||
import { getNextUA } from "../utils/ua.js";
|
||||
import { withRetry, isRetryable } from "../utils/retry.js";
|
||||
import { childLogger } from "../utils/logger.js";
|
||||
|
||||
const proxyAgentCache = new Map();
|
||||
|
||||
function getOrCreateProxyAgent(proxyUrl) {
|
||||
if (!proxyUrl) return undefined;
|
||||
if (!proxyAgentCache.has(proxyUrl)) {
|
||||
proxyAgentCache.set(proxyUrl, new ProxyAgent(proxyUrl));
|
||||
}
|
||||
return proxyAgentCache.get(proxyUrl);
|
||||
}
|
||||
|
||||
export class HttpClient {
|
||||
constructor(config = {}) {
|
||||
this.log = childLogger({ component: "http-client" });
|
||||
this.timeoutMs = config.timeout_ms || 10000;
|
||||
this.retryAttempts = config.retry_max_attempts || 3;
|
||||
this.retryBaseDelay = config.retry_base_delay_ms || 1000;
|
||||
this.retryMaxDelay = config.retry_max_delay_ms || 10000;
|
||||
this.userAgents = config.user_agents || null;
|
||||
this.proxyPool = null;
|
||||
}
|
||||
|
||||
setProxyPool(pool) {
|
||||
this.proxyPool = pool;
|
||||
this.log.info({ proxyCount: pool.proxies?.length }, "proxy pool attached to HTTP client");
|
||||
}
|
||||
|
||||
async fetch(url, options = {}) {
|
||||
const {
|
||||
method = "GET",
|
||||
headers = {},
|
||||
body = null,
|
||||
responseType = "json",
|
||||
timeoutMs = this.timeoutMs,
|
||||
retries = this.retryAttempts,
|
||||
proxy = null,
|
||||
signal = null,
|
||||
engine = null
|
||||
} = options;
|
||||
|
||||
const requestLog = this.log.child({
|
||||
url: url.length > 100 ? url.substring(0, 100) + "..." : url,
|
||||
method,
|
||||
engine
|
||||
});
|
||||
|
||||
requestLog.debug({ timeoutMs, retries }, "request starting");
|
||||
|
||||
return withRetry(
|
||||
async (attempt) => {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
const combinedSignal = signal
|
||||
? combineSignals(signal, controller.signal)
|
||||
: controller.signal;
|
||||
|
||||
const ua = this.userAgents
|
||||
? this.userAgents[Math.floor(Math.random() * this.userAgents.length)]
|
||||
: getNextUA();
|
||||
|
||||
const fetchHeaders = {
|
||||
"User-Agent": ua,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
...headers
|
||||
};
|
||||
|
||||
let proxyUrl = proxy ? proxy.url : null;
|
||||
let usedProxy = proxy;
|
||||
|
||||
if (!proxy && this.proxyPool) {
|
||||
const p = this.proxyPool.select();
|
||||
if (p) {
|
||||
proxyUrl = p.url;
|
||||
usedProxy = p;
|
||||
}
|
||||
}
|
||||
|
||||
requestLog.debug({ attempt: attempt + 1, proxy: usedProxy?.masked || "none" }, "request attempt");
|
||||
|
||||
const requestStart = Date.now();
|
||||
|
||||
try {
|
||||
const fetchOpts = {
|
||||
method,
|
||||
headers: fetchHeaders,
|
||||
signal: combinedSignal,
|
||||
...(body ? { body } : {})
|
||||
};
|
||||
|
||||
if (proxyUrl) {
|
||||
const agent = getOrCreateProxyAgent(proxyUrl);
|
||||
fetchOpts.dispatcher = agent;
|
||||
}
|
||||
|
||||
const response = await fetch(url, fetchOpts);
|
||||
|
||||
if (!response.ok) {
|
||||
const err = new Error(`HTTP ${response.status}: ${response.statusText}`);
|
||||
err.status = response.status;
|
||||
requestLog.warn({ status: response.status }, "request returned non-ok status");
|
||||
throw err;
|
||||
}
|
||||
|
||||
if (response.status === 202) {
|
||||
const bodyHint = (await response.text().catch(() => "")).substring(0, 120);
|
||||
const err = new Error(`HTTP 202 (Accepted): server deferred or blocked request. Body: "${bodyHint}"`);
|
||||
err.status = 202;
|
||||
requestLog.warn({ status: 202, bodyHint }, "request returned HTTP 202 (blocked/deferred)");
|
||||
throw err;
|
||||
}
|
||||
|
||||
const textOrBuffer = await response.clone().text();
|
||||
const bytes = textOrBuffer.length;
|
||||
const latencyMs = Date.now() - requestStart;
|
||||
|
||||
if (this.proxyPool) this.proxyPool.markSuccess(usedProxy, latencyMs, bytes);
|
||||
requestLog.debug({ status: response.status, bytes, latencyMs }, "request succeeded");
|
||||
|
||||
if (responseType === "json") {
|
||||
const text = await response.text();
|
||||
try {
|
||||
return JSON.parse(text);
|
||||
} catch {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
if (responseType === "text") return await response.text();
|
||||
return response;
|
||||
} catch (err) {
|
||||
const latencyMs = Date.now() - requestStart;
|
||||
if (this.proxyPool) this.proxyPool.markFailed(usedProxy, err, latencyMs);
|
||||
|
||||
if (err.name === "AbortError") {
|
||||
requestLog.warn({ timeoutMs }, "request timed out");
|
||||
const timeoutErr = new Error(`Request timed out after ${timeoutMs}ms`);
|
||||
timeoutErr.status = 0;
|
||||
throw timeoutErr;
|
||||
}
|
||||
|
||||
if (!isRetryable(err) || attempt >= retries - 1) {
|
||||
requestLog.error({ error: err.message, status: err.status, latencyMs }, "request failed, not retrying");
|
||||
throw err;
|
||||
}
|
||||
|
||||
requestLog.warn({ error: err.message, attempt: attempt + 1, latencyMs }, "request failed, will retry");
|
||||
throw err;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
},
|
||||
{
|
||||
attempts: retries,
|
||||
baseDelayMs: this.retryBaseDelay,
|
||||
maxDelayMs: this.retryMaxDelay,
|
||||
onRetry: ({ attempt, error, delayMs }) => {
|
||||
requestLog.warn({ attempt, error: error.message, delayMs }, "retrying request");
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function combineSignals(...signals) {
|
||||
const controller = new AbortController();
|
||||
|
||||
const onAbort = () => controller.abort();
|
||||
for (const signal of signals) {
|
||||
if (signal.aborted) {
|
||||
controller.abort();
|
||||
return controller.signal;
|
||||
}
|
||||
signal.addEventListener("abort", onAbort, { once: true });
|
||||
}
|
||||
|
||||
const original = controller.signal;
|
||||
original._cleanup = () => {
|
||||
for (const signal of signals) {
|
||||
signal.removeEventListener("abort", onAbort);
|
||||
}
|
||||
};
|
||||
|
||||
return original;
|
||||
}
|
||||
39
src/http/providers/index.js
Normal file
39
src/http/providers/index.js
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import { childLogger } from "../../utils/logger.js";
|
||||
|
||||
const log = childLogger({ component: "proxy-provider" });
|
||||
const registry = [];
|
||||
|
||||
export function registerProvider(name, fetcher, description = "") {
|
||||
registry.push({ name, fetcher, description });
|
||||
log.info({ name, description }, "proxy provider registered");
|
||||
}
|
||||
|
||||
export async function fetchAllProxies() {
|
||||
const all = [];
|
||||
|
||||
for (const { name, fetcher } of registry) {
|
||||
try {
|
||||
const proxies = await fetcher();
|
||||
if (proxies && proxies.length > 0) {
|
||||
log.info({ provider: name, count: proxies.length }, "proxies fetched");
|
||||
for (const p of proxies) {
|
||||
all.push(typeof p === "string" ? { url: p, provider: name } : { provider: name, ...p });
|
||||
}
|
||||
} else {
|
||||
log.debug({ provider: name }, "provider returned 0 proxies");
|
||||
}
|
||||
} catch (err) {
|
||||
log.warn({ provider: name, error: err.message }, "provider failed");
|
||||
}
|
||||
}
|
||||
|
||||
return all;
|
||||
}
|
||||
|
||||
export function listProviders() {
|
||||
return registry.map(r => ({ name: r.name, description: r.description }));
|
||||
}
|
||||
|
||||
export function getProviderCount() {
|
||||
return registry.length;
|
||||
}
|
||||
26
src/http/providers/oxylabs.js
Normal file
26
src/http/providers/oxylabs.js
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import { registerProvider } from "./index.js";
|
||||
|
||||
const ENTRY_POINT = "dc.oxylabs.io";
|
||||
const PORT = "8000";
|
||||
|
||||
async function fetchOxylabsProxies() {
|
||||
const username = process.env.OXYLABS_USERNAME;
|
||||
const password = process.env.OXYLABS_PASSWORD;
|
||||
const country = process.env.OXYLABS_COUNTRY || "US";
|
||||
const stickySession = process.env.OXYLABS_STICKY_SESSION || "";
|
||||
|
||||
if (!username || !password) return [];
|
||||
|
||||
const sessParam = stickySession ? `-sessid-${stickySession}` : "";
|
||||
const entry = `${ENTRY_POINT}:${PORT}`;
|
||||
const userPart = `user-${username}-country-${country}${sessParam}`;
|
||||
const proxyUrl = `http://${encodeURIComponent(userPart)}:${encodeURIComponent(password)}@${entry}`;
|
||||
|
||||
return [proxyUrl];
|
||||
}
|
||||
|
||||
registerProvider(
|
||||
"oxylabs",
|
||||
fetchOxylabsProxies,
|
||||
"Oxylabs datacenter proxies (dc.oxylabs.io:8000)"
|
||||
);
|
||||
45
src/http/providers/webshare.js
Normal file
45
src/http/providers/webshare.js
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import { registerProvider, fetchAllProxies } from "./index.js";
|
||||
|
||||
const API_BASE = "https://proxy.webshare.io/api/v2/proxy/list/";
|
||||
const CACHE_TTL_MS = 5 * 60 * 1000;
|
||||
let cached = null;
|
||||
let lastFetch = 0;
|
||||
|
||||
async function fetchWebshareProxies() {
|
||||
const apiKey = process.env.WEBSHARE_API_KEY;
|
||||
if (!apiKey) return [];
|
||||
|
||||
if (cached && Date.now() - lastFetch < CACHE_TTL_MS) return cached;
|
||||
|
||||
const allProxies = [];
|
||||
let page = 1;
|
||||
let hasMore = true;
|
||||
|
||||
while (hasMore) {
|
||||
const url = `${API_BASE}?mode=direct&page=${page}&page_size=100`;
|
||||
const response = await fetch(url, {
|
||||
headers: { Authorization: `Token ${apiKey}` }
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Webshare API HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
for (const p of data.results) {
|
||||
if (p.valid) {
|
||||
allProxies.push(`http://${p.username}:${p.password}@${p.proxy_address}:${p.port}`);
|
||||
}
|
||||
}
|
||||
|
||||
hasMore = !!data.next;
|
||||
page++;
|
||||
}
|
||||
|
||||
cached = allProxies;
|
||||
lastFetch = Date.now();
|
||||
return allProxies;
|
||||
}
|
||||
|
||||
registerProvider("webshare", fetchWebshareProxies, "proxy.webshare.io API");
|
||||
304
src/http/proxy.js
Normal file
304
src/http/proxy.js
Normal file
|
|
@ -0,0 +1,304 @@
|
|||
import { isRetryable } from "../utils/retry.js";
|
||||
import { childLogger } from "../utils/logger.js";
|
||||
|
||||
const HOUR_MS = 3600_000;
|
||||
|
||||
export class ProxyPool {
|
||||
constructor(proxies = [], options = {}) {
|
||||
this.log = childLogger({ component: "proxy-pool" });
|
||||
this.enabled = options.enabled !== false && proxies.length > 0;
|
||||
this.maxFailures = options.max_failures || 3;
|
||||
this.circuitBaseMs = options.circuit_base_ms || 5000;
|
||||
this.circuitMaxMs = options.circuit_max_ms || 300_000;
|
||||
|
||||
const entries = [];
|
||||
const seen = new Set();
|
||||
|
||||
for (const p of proxies) {
|
||||
const url = typeof p === "string" ? p : p.url;
|
||||
if (seen.has(url)) continue;
|
||||
seen.add(url);
|
||||
|
||||
const provider = typeof p === "string" ? "manual" : (p.provider || "manual");
|
||||
const hasAuth = url.includes("@");
|
||||
const masked = url.replace(/\/\/([^:]+):([^@]+)@/, "//$1:***@");
|
||||
|
||||
entries.push({
|
||||
url,
|
||||
masked,
|
||||
provider,
|
||||
authenticated: hasAuth,
|
||||
alive: true,
|
||||
failures: 0,
|
||||
consecutiveFailures: 0,
|
||||
lastUsed: 0,
|
||||
lastCheck: 0,
|
||||
|
||||
circuitState: "closed",
|
||||
circuitFailures: 0,
|
||||
circuitOpenUntil: 0,
|
||||
|
||||
hourlyUsage: {},
|
||||
totalRequests: 0,
|
||||
totalSuccesses: 0,
|
||||
totalFailures: 0,
|
||||
totalTimeouts: 0,
|
||||
totalRateLimited: 0,
|
||||
totalBytes: 0,
|
||||
totalLatencyMs: 0,
|
||||
lastLatencyMs: 0
|
||||
});
|
||||
}
|
||||
|
||||
this.proxies = [
|
||||
...entries.filter(p => p.authenticated),
|
||||
...entries.filter(p => !p.authenticated)
|
||||
];
|
||||
|
||||
this.index = 0;
|
||||
|
||||
if (this.enabled) {
|
||||
const byProvider = {};
|
||||
for (const p of this.proxies) {
|
||||
byProvider[p.provider] = (byProvider[p.provider] || 0) + 1;
|
||||
}
|
||||
this.log.info({
|
||||
count: this.proxies.length,
|
||||
authenticated: this.proxies.filter(p => p.authenticated).length,
|
||||
providers: byProvider
|
||||
}, "proxy pool initialized");
|
||||
}
|
||||
|
||||
this._metricsInterval = setInterval(() => {
|
||||
this.log.info(this.getMetrics(), "proxy pool metrics periodic");
|
||||
}, 300_000).unref();
|
||||
}
|
||||
|
||||
select() {
|
||||
if (!this.enabled || this.proxies.length === 0) return null;
|
||||
|
||||
const now = Date.now();
|
||||
const candidates = this.proxies.filter(p => {
|
||||
if (!p.alive) return false;
|
||||
if (p.circuitState === "open" && now < p.circuitOpenUntil) return false;
|
||||
if (p.circuitState === "open" && now >= p.circuitOpenUntil) {
|
||||
p.circuitState = "half-open";
|
||||
this.log.info({ proxy: p.masked }, "circuit half-open, allowing probe");
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
if (candidates.length === 0) {
|
||||
this.log.warn({ total: this.proxies.length }, "no candidates available, resetting dead proxies");
|
||||
for (const p of this.proxies) {
|
||||
if (!p.alive) {
|
||||
p.alive = true;
|
||||
p.consecutiveFailures = 0;
|
||||
}
|
||||
}
|
||||
const p = this.proxies[0];
|
||||
if (p) {
|
||||
p.circuitState = "closed";
|
||||
p.lastUsed = now;
|
||||
return p;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const hourKey = this._hourKey(now);
|
||||
candidates.sort((a, b) => {
|
||||
const aUsage = a.hourlyUsage[hourKey] || 0;
|
||||
const bUsage = b.hourlyUsage[hourKey] || 0;
|
||||
if (aUsage !== bUsage) return aUsage - bUsage;
|
||||
|
||||
if (a.consecutiveFailures !== b.consecutiveFailures) {
|
||||
return a.consecutiveFailures - b.consecutiveFailures;
|
||||
}
|
||||
|
||||
const aCircuit = a.circuitFailures;
|
||||
const bCircuit = b.circuitFailures;
|
||||
return aCircuit - bCircuit;
|
||||
});
|
||||
|
||||
const proxy = candidates[0];
|
||||
proxy.lastUsed = now;
|
||||
proxy.hourlyUsage[hourKey] = (proxy.hourlyUsage[hourKey] || 0) + 1;
|
||||
proxy.totalRequests++;
|
||||
|
||||
return proxy;
|
||||
}
|
||||
|
||||
markSuccess(proxy, latencyMs = 0, bytes = 0) {
|
||||
if (!proxy) return;
|
||||
proxy.failures = 0;
|
||||
proxy.consecutiveFailures = 0;
|
||||
proxy.totalSuccesses++;
|
||||
proxy.totalLatencyMs += latencyMs;
|
||||
proxy.lastLatencyMs = latencyMs;
|
||||
proxy.totalBytes += bytes;
|
||||
proxy.lastCheck = Date.now();
|
||||
|
||||
if (proxy.circuitState === "half-open") {
|
||||
proxy.circuitState = "closed";
|
||||
proxy.circuitFailures = 0;
|
||||
this.log.info({ proxy: proxy.masked }, "circuit closed after successful probe");
|
||||
}
|
||||
|
||||
proxy.alive = true;
|
||||
}
|
||||
|
||||
markFailed(proxy, error = null, latencyMs = 0) {
|
||||
if (!proxy) return;
|
||||
proxy.consecutiveFailures++;
|
||||
proxy.failures++;
|
||||
proxy.totalFailures++;
|
||||
proxy.totalLatencyMs += latencyMs;
|
||||
proxy.lastLatencyMs = latencyMs;
|
||||
proxy.lastCheck = Date.now();
|
||||
|
||||
const is202 = error && (error.status === 202 || (error.message && error.message.includes("202")));
|
||||
const isTimeout = error && (error.message && (error.message.includes("timeout") || error.message.includes("timed out") || error.name === "AbortError"));
|
||||
const isNonRetryable = error && !isRetryable(error);
|
||||
|
||||
if (is202) {
|
||||
proxy.totalRateLimited++;
|
||||
this.log.warn({ proxy: proxy.masked, failures: proxy.failures }, "proxy rate-limited (202)");
|
||||
if (proxy.failures >= this.maxFailures) {
|
||||
this._tripCircuit(proxy, "rate-limited");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (isTimeout) {
|
||||
proxy.totalTimeouts++;
|
||||
}
|
||||
|
||||
if (isNonRetryable) {
|
||||
this._tripCircuit(proxy, error.message);
|
||||
return;
|
||||
}
|
||||
|
||||
if (proxy.failures >= this.maxFailures) {
|
||||
proxy.alive = false;
|
||||
this.log.warn({
|
||||
proxy: proxy.masked, failures: proxy.failures,
|
||||
error: error?.message
|
||||
}, "proxy marked dead");
|
||||
} else {
|
||||
this.log.warn({
|
||||
proxy: proxy.masked, failures: proxy.failures,
|
||||
error: error?.message
|
||||
}, "proxy request failed");
|
||||
}
|
||||
}
|
||||
|
||||
_tripCircuit(proxy, reason) {
|
||||
proxy.circuitFailures++;
|
||||
const delay = Math.min(
|
||||
this.circuitBaseMs * Math.pow(2, proxy.circuitFailures - 1),
|
||||
this.circuitMaxMs
|
||||
);
|
||||
proxy.circuitState = "open";
|
||||
proxy.circuitOpenUntil = Date.now() + delay;
|
||||
|
||||
this.log.warn({
|
||||
proxy: proxy.masked,
|
||||
circuitFailures: proxy.circuitFailures,
|
||||
circuitOpenMs: delay,
|
||||
reason
|
||||
}, "circuit tripped");
|
||||
}
|
||||
|
||||
getMetrics() {
|
||||
const byProvider = {};
|
||||
let totalReq = 0, totalOk = 0, totalFail = 0;
|
||||
|
||||
for (const p of this.proxies) {
|
||||
const pr = byProvider[p.provider] || {
|
||||
proxyCount: 0, alive: 0, dead: 0,
|
||||
circuitOpen: 0, requests: 0, success: 0, failure: 0,
|
||||
rateLimited: 0, timeout: 0, avgLatencyMs: 0, totalLatencyMs: 0,
|
||||
hourlyUsage: {}
|
||||
};
|
||||
pr.proxyCount++;
|
||||
if (p.alive) pr.alive++; else pr.dead++;
|
||||
if (p.circuitState === "open") pr.circuitOpen++;
|
||||
pr.requests += p.totalRequests;
|
||||
pr.success += p.totalSuccesses;
|
||||
pr.failure += p.totalFailures;
|
||||
pr.rateLimited += p.totalRateLimited;
|
||||
pr.timeout += p.totalTimeouts;
|
||||
pr.totalLatencyMs += p.totalLatencyMs;
|
||||
|
||||
totalReq += p.totalRequests;
|
||||
totalOk += p.totalSuccesses;
|
||||
totalFail += p.totalFailures;
|
||||
|
||||
const hk = this._hourKey(Date.now());
|
||||
pr.hourlyUsage[hk] = (pr.hourlyUsage[hk] || 0) + (p.hourlyUsage[hk] || 0);
|
||||
|
||||
byProvider[p.provider] = pr;
|
||||
}
|
||||
|
||||
for (const [name, pr] of Object.entries(byProvider)) {
|
||||
pr.avgLatencyMs = pr.success > 0 ? Math.round(pr.totalLatencyMs / pr.success) : 0;
|
||||
delete pr.totalLatencyMs;
|
||||
const hk = this._hourKey(Date.now());
|
||||
const hourly = pr.hourlyUsage[hk] || 0;
|
||||
pr.hourlyUsage = { currentHour: hourly };
|
||||
pr.successRate = pr.requests > 0 ? (pr.success / pr.requests * 100).toFixed(1) + "%" : "0%";
|
||||
}
|
||||
|
||||
const allHourly = this.proxies.reduce((s, p) => {
|
||||
const hk = this._hourKey(Date.now());
|
||||
return s + (p.hourlyUsage[hk] || 0);
|
||||
}, 0);
|
||||
|
||||
return {
|
||||
timestamp: new Date().toISOString(),
|
||||
totalProxies: this.proxies.length,
|
||||
alive: this.proxies.filter(p => p.alive).length,
|
||||
dead: this.proxies.filter(p => !p.alive).length,
|
||||
circuitOpen: this.proxies.filter(p => p.circuitState === "open").length,
|
||||
requestsTotal: totalReq,
|
||||
successTotal: totalOk,
|
||||
failureTotal: totalFail,
|
||||
successRate: totalReq > 0 ? (totalOk / totalReq * 100).toFixed(1) + "%" : "0%",
|
||||
hourlyUsageCurrent: allHourly,
|
||||
byProvider
|
||||
};
|
||||
}
|
||||
|
||||
getProxyDetail() {
|
||||
return this.proxies.map(p => ({
|
||||
url: p.masked,
|
||||
provider: p.provider,
|
||||
alive: p.alive,
|
||||
authenticated: p.authenticated,
|
||||
circuitState: p.circuitState,
|
||||
circuitFailures: p.circuitFailures,
|
||||
failures: p.failures,
|
||||
consecutiveFailures: p.consecutiveFailures,
|
||||
requests: p.totalRequests,
|
||||
successes: p.totalSuccesses,
|
||||
failures: p.totalFailures,
|
||||
rateLimited: p.totalRateLimited,
|
||||
timeouts: p.totalTimeouts,
|
||||
avgLatencyMs: p.totalSuccesses > 0 ? Math.round(p.totalLatencyMs / p.totalSuccesses) : 0,
|
||||
lastLatencyMs: p.lastLatencyMs,
|
||||
hourlyUsage: p.hourlyUsage[this._hourKey(Date.now())] || 0
|
||||
}));
|
||||
}
|
||||
|
||||
_hourKey(ts) {
|
||||
return Math.floor(ts / HOUR_MS);
|
||||
}
|
||||
|
||||
logState() {
|
||||
this.log.info({ metrics: this.getMetrics() }, "proxy pool state");
|
||||
}
|
||||
|
||||
destroy() {
|
||||
if (this._metricsInterval) clearInterval(this._metricsInterval);
|
||||
}
|
||||
}
|
||||
160
src/index.js
Executable file
160
src/index.js
Executable file
|
|
@ -0,0 +1,160 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
import { loadConfig } from "./config.js";
|
||||
import { HttpClient } from "./http/client.js";
|
||||
import { ProxyPool } from "./http/proxy.js";
|
||||
import { SearchRunner } from "./run.js";
|
||||
import { parseCliArgs } from "./cli.js";
|
||||
import { setUserAgents } from "./utils/ua.js";
|
||||
import { getEngineNames } from "./engines/setup.js";
|
||||
import { logger, childLogger } from "./utils/logger.js";
|
||||
import { renderCombinedResults } from "./output/human.js";
|
||||
import { formatSearchResponse, formatErrorResponse, formatHelpResponse } from "./output/agent.js";
|
||||
import { generateKey, saveKeyToEnv } from "./api-key.js";
|
||||
import { startServer } from "./api.js";
|
||||
|
||||
async function main() {
|
||||
const log = childLogger({ component: "main" });
|
||||
const args = parseCliArgs();
|
||||
|
||||
if (args.help) {
|
||||
const engines = getEngineNames();
|
||||
console.log(formatHelpResponse(engines, args));
|
||||
console.log("\n\x1b[1mAPI Server:\x1b[0m");
|
||||
console.log(" --serve Start API server");
|
||||
console.log(" --port <number> Port for API server (default: 9876)");
|
||||
console.log(" --generate-key Generate a new API key\n");
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (args.generateKey) {
|
||||
const key = generateKey();
|
||||
const result = saveKeyToEnv(key);
|
||||
console.log(`\n \x1b[32m✓\x1b[0m New API key generated`);
|
||||
console.log(` Key: ${key}`);
|
||||
console.log(` Stored: ${result.path}`);
|
||||
console.log(` \n Use with: curl -H "Authorization: Bearer ${key}" ...\n`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (args.serve) {
|
||||
await startServer(args.port);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!args.query) {
|
||||
if (args.metrics) {
|
||||
log.warn("--metrics requires a search to run first; run a query and send SIGUSR1 for live metrics");
|
||||
}
|
||||
if (args.mode === "agent") {
|
||||
console.error(formatErrorResponse(
|
||||
{ code: "MISSING_QUERY", message: "Search query parameter is required" },
|
||||
{ type: args.type }
|
||||
));
|
||||
} else {
|
||||
console.error("\n\x1b[31mError: Missing search query\x1b[0m\n");
|
||||
console.log("Usage: node src/index.js -q \"search term\" [options]");
|
||||
console.log(" -q, --query Search query (required)");
|
||||
console.log(" -t, --type Search type: web, image, or both (default: image)");
|
||||
console.log(" -l, --limit Max results per type (default: 10)");
|
||||
console.log(" -m, --mode Output mode: human or agent (default: human)");
|
||||
console.log(" -p, --proxy Proxy URL (e.g., http://user:pass@host:port)");
|
||||
console.log(" -h, --help Show help\n");
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
log.info({ query: args.query, type: args.type, limit: args.limit, mode: args.mode }, "starting search");
|
||||
|
||||
const config = await loadConfig(args.configPath);
|
||||
log.info({ proxyCount: config.proxies.length, proxyEnabled: config.proxy.enabled }, "config loaded");
|
||||
|
||||
if (config.http.user_agents) {
|
||||
setUserAgents(config.http.user_agents);
|
||||
log.debug({ count: config.http.user_agents.length }, "custom user agents loaded");
|
||||
}
|
||||
|
||||
const httpClient = new HttpClient(config.http);
|
||||
const proxyPool = new ProxyPool(config.proxies, config.proxy);
|
||||
|
||||
if (config.proxy.enabled) {
|
||||
httpClient.setProxyPool(proxyPool);
|
||||
proxyPool.logState();
|
||||
}
|
||||
|
||||
if (args.proxy) {
|
||||
log.info({ proxy: args.proxy.replace(/\/\/([^:]+):([^@]+)@/, "//$1:***@") }, "using CLI proxy override");
|
||||
const singlePool = new ProxyPool([args.proxy], { enabled: true });
|
||||
httpClient.setProxyPool(singlePool);
|
||||
}
|
||||
|
||||
process.on("SIGUSR1", () => {
|
||||
console.log("\n--- Proxy Pool Metrics (SIGUSR1) ---");
|
||||
console.log(JSON.stringify(proxyPool.getMetrics(), null, 2));
|
||||
console.log("--- Proxy Detail ---");
|
||||
console.log(JSON.stringify(proxyPool.getProxyDetail(), null, 2));
|
||||
});
|
||||
|
||||
const runner = new SearchRunner({ httpClient, config });
|
||||
|
||||
try {
|
||||
const data = await runner.run({
|
||||
query: args.query,
|
||||
type: args.type,
|
||||
limit: args.limit
|
||||
});
|
||||
|
||||
if (args.mode === "agent") {
|
||||
console.log(formatSearchResponse(data));
|
||||
} else {
|
||||
if (data.image || data.web) {
|
||||
renderCombinedResults({
|
||||
query: data.query,
|
||||
image: data.image,
|
||||
web: data.web
|
||||
});
|
||||
}
|
||||
|
||||
if (data.errors && data.errors.length > 0) {
|
||||
const failed = data.errors.filter(e => e.code === "ENGINE_FAILED");
|
||||
if (failed.length > 0) {
|
||||
console.error(`\n \x1b[33mWarning: ${failed.length} engine(s) failed, used fallback\x1b[0m`);
|
||||
}
|
||||
}
|
||||
|
||||
if (args.metrics) {
|
||||
console.log("\n\x1b[1m\x1b[36m── Proxy Pool Metrics ──\x1b[0m");
|
||||
console.log(JSON.stringify(proxyPool.getMetrics(), null, 2));
|
||||
console.log("\n\x1b[1m\x1b[36m── Per-Proxy Detail ──\x1b[0m");
|
||||
console.log(JSON.stringify(proxyPool.getProxyDetail(), null, 2));
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
log.error({ error: err.message }, "search failed with unhandled error");
|
||||
if (args.mode === "agent") {
|
||||
console.error(formatErrorResponse(err, { query: args.query, type: args.type }));
|
||||
} else {
|
||||
console.error(`\n\x1b[31mError: ${err.message}\x1b[0m\n`);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
export async function search(opts) {
|
||||
const config = await loadConfig(opts?.configPath);
|
||||
const httpClient = new HttpClient(config.http);
|
||||
|
||||
if (config.proxy.enabled) {
|
||||
const proxyPool = new ProxyPool(config.proxies, config.proxy);
|
||||
httpClient.setProxyPool(proxyPool);
|
||||
}
|
||||
|
||||
const runner = new SearchRunner({ httpClient, config });
|
||||
return runner.run({
|
||||
query: opts.query,
|
||||
type: opts.type || "image",
|
||||
limit: opts.limit || 10
|
||||
});
|
||||
}
|
||||
|
||||
main();
|
||||
133
src/output/agent.js
Normal file
133
src/output/agent.js
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
export function formatSearchResponse(data) {
|
||||
const response = {
|
||||
success: true,
|
||||
query: data.query,
|
||||
type: data.type,
|
||||
timestamp: data.timestamp || new Date().toISOString(),
|
||||
execution_time_ms: data.executionTimeMs || 0,
|
||||
engines_used: data.enginesUsed || []
|
||||
};
|
||||
|
||||
if (data.image) {
|
||||
response.image = formatResultsSection(data.image, "image");
|
||||
}
|
||||
|
||||
if (data.web) {
|
||||
response.web = formatResultsSection(data.web, "web");
|
||||
}
|
||||
|
||||
if (data.errors && data.errors.length > 0) {
|
||||
response.errors = data.errors;
|
||||
}
|
||||
|
||||
return JSON.stringify(response, null, 2);
|
||||
}
|
||||
|
||||
export function formatErrorResponse(error, options = {}) {
|
||||
const response = {
|
||||
success: false,
|
||||
error: {
|
||||
code: error.code || "SEARCH_FAILED",
|
||||
message: error.message || "An unknown error occurred",
|
||||
type: error.constructor?.name || "Error"
|
||||
},
|
||||
query: options.query || null,
|
||||
type: options.type || null,
|
||||
timestamp: new Date().toISOString()
|
||||
};
|
||||
|
||||
return JSON.stringify(response, null, 2);
|
||||
}
|
||||
|
||||
export function formatHelpResponse(engines, options) {
|
||||
const response = {
|
||||
success: true,
|
||||
help: {
|
||||
name: "polysearch",
|
||||
description: "Multi-engine web + image search with proxy rotation, circuit breaker, and AI agent output",
|
||||
version: "2.0.0",
|
||||
usage: "node src/index.js [OPTIONS]",
|
||||
options: {
|
||||
query: { flag: "-q, --query <string>", required: true, description: "Search query" },
|
||||
type: { flag: "-t, --type <type>", required: false, default: "image", description: "Search type: web, image, or both" },
|
||||
limit: { flag: "-l, --limit <number>", required: false, default: 10, description: "Max results per type" },
|
||||
mode: { flag: "-m, --mode <mode>", required: false, default: "human", description: "Output mode: human or agent" },
|
||||
config: { flag: "-c, --config <path>", required: false, description: "Path to config file" },
|
||||
proxy: { flag: "-p, --proxy <url>", required: false, description: "Single proxy URL to use" },
|
||||
help: { flag: "-h, --help", required: false, default: false, description: "Show this help" }
|
||||
},
|
||||
examples: [
|
||||
"node src/index.js -q \"vintage radio\"",
|
||||
"node src/index.js -q \"spacex launch\" -t both -l 15 --mode agent",
|
||||
"node src/index.js -q \"cats\" -t web -l 5 -p http://user:pass@proxy:8080",
|
||||
"node src/index.js -q \"1950 computer\" -m agent"
|
||||
],
|
||||
engines: engines.map(e => ({
|
||||
name: e.name,
|
||||
supports: e.supports,
|
||||
priority: e.priority
|
||||
})),
|
||||
output_modes: {
|
||||
human: "Colorized terminal output for humans",
|
||||
agent: "Structured JSON for AI agent consumption"
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return JSON.stringify(response, null, 2);
|
||||
}
|
||||
|
||||
function formatResultsSection(section, type) {
|
||||
const out = {
|
||||
engine: section.engine || "unknown",
|
||||
total: section.total || 0
|
||||
};
|
||||
|
||||
if (type === "image") {
|
||||
out.results = (section.results || []).map((item, i) => ({
|
||||
index: i + 1,
|
||||
title: item.title || null,
|
||||
image_url: item.image_url || null,
|
||||
source_url: item.source_url || null,
|
||||
domain: item.domain || "unknown",
|
||||
width: item.width || null,
|
||||
height: item.height || null,
|
||||
thumbnail: item.thumbnail || null,
|
||||
engine: item.engine || section.engine || null
|
||||
}));
|
||||
out.statistics = {
|
||||
avg_width: avg(section.results, "width"),
|
||||
avg_height: avg(section.results, "height"),
|
||||
domains: countBy(section.results, "domain")
|
||||
};
|
||||
} else {
|
||||
out.results = (section.results || []).map((item, i) => ({
|
||||
index: i + 1,
|
||||
title: item.title || null,
|
||||
url: item.url || null,
|
||||
domain: item.domain || "unknown",
|
||||
snippet: item.snippet || null,
|
||||
engine: item.engine || section.engine || null
|
||||
}));
|
||||
out.statistics = {
|
||||
domains: countBy(section.results, "domain")
|
||||
};
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
function countBy(arr, key) {
|
||||
const counts = {};
|
||||
for (const item of arr || []) {
|
||||
const val = item[key] || "unknown";
|
||||
counts[val] = (counts[val] || 0) + 1;
|
||||
}
|
||||
return counts;
|
||||
}
|
||||
|
||||
function avg(arr, key) {
|
||||
const nums = (arr || []).map(i => i[key]).filter(n => n != null);
|
||||
if (nums.length === 0) return null;
|
||||
return Math.round(nums.reduce((s, n) => s + n, 0) / nums.length);
|
||||
}
|
||||
72
src/output/human.js
Normal file
72
src/output/human.js
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
export function renderImageResults(results, query) {
|
||||
if (!results || results.length === 0) {
|
||||
console.log(`\n No image results found for: "${query}"\n`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`\n\x1b[1m\x1b[36mImage Results for:\x1b[0m "${query}"`);
|
||||
console.log(`\x1b[90m${"─".repeat(80)}\x1b[0m\n`);
|
||||
|
||||
for (const img of results) {
|
||||
const reset = "\x1b[0m";
|
||||
const dims = (img.width && img.height) ? `${img.width}\u00d7${img.height}` : "Unknown";
|
||||
|
||||
console.log(`\x1b[1m[${img.index}]\x1b[0m`);
|
||||
console.log(` \x1b[1mDescription:\x1b[0m ${img.title || "No description"}`);
|
||||
console.log(` \x1b[1mDomain:\x1b[0m ${colorDomain(img.domain)}${img.domain}${reset}`);
|
||||
console.log(` \x1b[1mDimensions:\x1b[0m ${dims}`);
|
||||
console.log(` \x1b[1mImage URL:\x1b[0m \x1b[4m${img.image_url}${reset}`);
|
||||
if (img.source_url) {
|
||||
console.log(` \x1b[1mSource Page:\x1b[0m \x1b[4m${img.source_url}${reset}`);
|
||||
}
|
||||
console.log(`\x1b[90m${"─".repeat(80)}\x1b[0m`);
|
||||
}
|
||||
|
||||
console.log(` Found ${results.length} image result${results.length !== 1 ? "s" : ""}\n`);
|
||||
}
|
||||
|
||||
export function renderWebResults(results, query) {
|
||||
if (!results || results.length === 0) {
|
||||
console.log(`\n No web results found for: "${query}"\n`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`\n\x1b[1m\x1b[36mWeb Results for:\x1b[0m "${query}"`);
|
||||
console.log(`\x1b[90m${"─".repeat(80)}\x1b[0m\n`);
|
||||
|
||||
for (const r of results) {
|
||||
console.log(`\x1b[1m[${r.index}] ${r.title}\x1b[0m`);
|
||||
console.log(` \x1b[1mURL:\x1b[0m \x1b[4m${r.url}${"\x1b[0m"}`);
|
||||
console.log(` \x1b[1mDomain:\x1b[0m \x1b[32m${r.domain}\x1b[0m`);
|
||||
if (r.snippet) {
|
||||
console.log(` \x1b[1mSnippet:\x1b[0m ${r.snippet.slice(0, 200)}`);
|
||||
}
|
||||
console.log(`\x1b[90m${"─".repeat(80)}\x1b[0m`);
|
||||
}
|
||||
|
||||
console.log(` Found ${results.length} web result${results.length !== 1 ? "s" : ""}\n`);
|
||||
}
|
||||
|
||||
export function renderCombinedResults(data) {
|
||||
if (data.image && data.image.results) {
|
||||
renderImageResults(
|
||||
data.image.results.map((r, i) => ({ ...r, index: i + 1 })),
|
||||
data.query
|
||||
);
|
||||
}
|
||||
if (data.web && data.web.results) {
|
||||
renderWebResults(
|
||||
data.web.results.map((r, i) => ({ ...r, index: i + 1 })),
|
||||
data.query
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function colorDomain(domain) {
|
||||
const lower = domain.toLowerCase();
|
||||
if (lower.includes("shutterstock") || lower.includes("alamy") ||
|
||||
lower.includes("istock") || lower.includes("getty")) return "\x1b[31m";
|
||||
if (lower.endsWith(".edu") || lower.endsWith(".gov") || lower.includes("wikimedia")) return "\x1b[35m";
|
||||
if (lower.includes("unsplash") || lower.includes("pexels") || lower.includes("pixabay")) return "\x1b[33m";
|
||||
return "\x1b[32m";
|
||||
}
|
||||
123
src/run.js
Normal file
123
src/run.js
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
import { createEngines } from "./engines/setup.js";
|
||||
import { childLogger } from "./utils/logger.js";
|
||||
|
||||
export class SearchRunner {
|
||||
constructor({ httpClient, config = {} }) {
|
||||
this.log = childLogger({ component: "runner" });
|
||||
this.http = httpClient;
|
||||
this.config = config;
|
||||
this.engines = createEngines(httpClient);
|
||||
}
|
||||
|
||||
async run({ query, type = "image", limit = 10, proxy = null, signal = null }) {
|
||||
const startTime = Date.now();
|
||||
const types = type === "both" ? ["web", "image"] : [type];
|
||||
const result = {
|
||||
query,
|
||||
type,
|
||||
timestamp: new Date().toISOString(),
|
||||
executionTimeMs: 0,
|
||||
enginesUsed: [],
|
||||
errors: []
|
||||
};
|
||||
|
||||
this.log.info({ query, type, limit }, "search started");
|
||||
|
||||
for (const t of types) {
|
||||
const engines = t === "web" ? this.engines.web() : this.engines.image();
|
||||
|
||||
this.log.debug({ type: t, engineCount: engines.length }, "searching type");
|
||||
|
||||
if (engines.length === 0) {
|
||||
this.log.error({ type: t }, "no registered engines for type");
|
||||
result.errors.push({
|
||||
type: t,
|
||||
code: "NO_ENGINES",
|
||||
message: `No registered engines support "${t}" search`
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const { section, usedEngines, errors } = await this._tryEngines(
|
||||
engines, query, t, limit, proxy, signal
|
||||
);
|
||||
|
||||
result[t] = section;
|
||||
result.enginesUsed.push(...usedEngines);
|
||||
result.errors.push(...errors);
|
||||
}
|
||||
|
||||
result.executionTimeMs = Date.now() - startTime;
|
||||
|
||||
this.log.info({
|
||||
executionTimeMs: result.executionTimeMs,
|
||||
enginesUsed: result.enginesUsed,
|
||||
errors: result.errors.length,
|
||||
imageCount: result.image?.results?.length || 0,
|
||||
webCount: result.web?.results?.length || 0
|
||||
}, "search completed");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async _tryEngines(engines, query, type, limit, proxy, signal) {
|
||||
let section = null;
|
||||
const usedEngines = [];
|
||||
const errors = [];
|
||||
|
||||
for (const engine of engines) {
|
||||
if (section) break;
|
||||
|
||||
this.log.debug({ engine: engine.engineName, type }, "trying engine");
|
||||
|
||||
try {
|
||||
const engineResult = await engine.search(query, {
|
||||
type,
|
||||
limit,
|
||||
proxy,
|
||||
signal
|
||||
});
|
||||
|
||||
if (engineResult && engineResult.results && engineResult.results.length > 0) {
|
||||
this.log.info({
|
||||
engine: engine.engineName,
|
||||
type,
|
||||
resultCount: engineResult.results.length
|
||||
}, "engine returned results");
|
||||
|
||||
section = {
|
||||
engine: engineResult.engine,
|
||||
total: engineResult.results.length,
|
||||
results: engineResult.results.map((r, i) => ({
|
||||
...r,
|
||||
index: i + 1
|
||||
}))
|
||||
};
|
||||
usedEngines.push(engine.engineName);
|
||||
} else {
|
||||
this.log.debug({ engine: engine.engineName, type }, "engine returned 0 results");
|
||||
}
|
||||
} catch (err) {
|
||||
this.log.warn({
|
||||
engine: engine.engineName,
|
||||
type,
|
||||
error: err.message
|
||||
}, "engine failed");
|
||||
errors.push({
|
||||
type,
|
||||
engine: engine.engineName,
|
||||
code: "ENGINE_FAILED",
|
||||
message: err.message
|
||||
});
|
||||
usedEngines.push(`${engine.engineName}(failed)`);
|
||||
}
|
||||
}
|
||||
|
||||
if (!section) {
|
||||
this.log.warn({ type }, "all engines failed for type");
|
||||
section = { engine: "none", total: 0, results: [] };
|
||||
}
|
||||
|
||||
return { section, usedEngines, errors };
|
||||
}
|
||||
}
|
||||
23
src/utils/logger.js
Normal file
23
src/utils/logger.js
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import pino from "pino";
|
||||
|
||||
const level = process.env.LOG_LEVEL || "info";
|
||||
|
||||
export const logger = pino({
|
||||
level,
|
||||
transport: process.stdout.isTTY
|
||||
? {
|
||||
target: "pino/file",
|
||||
options: { colorize: true }
|
||||
}
|
||||
: undefined,
|
||||
formatters: {
|
||||
level(label) {
|
||||
return { level: label };
|
||||
}
|
||||
},
|
||||
timestamp: pino.stdTimeFunctions.isoTime
|
||||
});
|
||||
|
||||
export function childLogger(bindings) {
|
||||
return logger.child(bindings);
|
||||
}
|
||||
45
src/utils/retry.js
Normal file
45
src/utils/retry.js
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
export async function withRetry(fn, {
|
||||
attempts = 3,
|
||||
baseDelayMs = 1000,
|
||||
maxDelayMs = 10000,
|
||||
onRetry = null
|
||||
} = {}) {
|
||||
let lastError;
|
||||
|
||||
for (let i = 0; i < attempts; i++) {
|
||||
try {
|
||||
return await fn(i);
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
if (i === attempts - 1) break;
|
||||
|
||||
const delay = Math.min(
|
||||
baseDelayMs * Math.pow(2, i) + Math.random() * baseDelayMs,
|
||||
maxDelayMs
|
||||
);
|
||||
|
||||
if (onRetry) onRetry({ attempt: i + 1, error: err, delayMs: Math.round(delay) });
|
||||
|
||||
await new Promise(r => setTimeout(r, delay));
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
export function isRetryable(err) {
|
||||
if (!err) return false;
|
||||
const msg = String(err.message || err).toLowerCase();
|
||||
const status = err.status || err.statusCode || 0;
|
||||
|
||||
if (status >= 500 && status < 600) return true;
|
||||
if (status === 429) return true;
|
||||
if (status === 0) return true;
|
||||
if (msg.includes('timeout') || msg.includes('timed out')) return true;
|
||||
if (msg.includes('econnrefused') || msg.includes('econnreset')) return true;
|
||||
if (msg.includes('etimedout') || msg.includes('enotfound') || msg.includes('eai_again')) return true;
|
||||
if (msg.includes('network') || msg.includes('socket') || msg.includes('fetch failed')) return true;
|
||||
if (msg.includes('could not extract token')) return false;
|
||||
|
||||
return false;
|
||||
}
|
||||
29
src/utils/ua.js
Normal file
29
src/utils/ua.js
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
const DEFAULT_AGENTS = [
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14.5; rv:127.0) Gecko/20100101 Firefox/127.0",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Mobile/15E148 Safari/604.1"
|
||||
];
|
||||
|
||||
let agents = [...DEFAULT_AGENTS];
|
||||
let index = 0;
|
||||
|
||||
export function setUserAgents(list) {
|
||||
if (Array.isArray(list) && list.length > 0) {
|
||||
agents = list;
|
||||
}
|
||||
}
|
||||
|
||||
export function getRandomUA() {
|
||||
return agents[Math.floor(Math.random() * agents.length)];
|
||||
}
|
||||
|
||||
export function getNextUA() {
|
||||
const ua = agents[index % agents.length];
|
||||
index++;
|
||||
return ua;
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue