new enhacements

This commit is contained in:
Balu Reddy 2025-09-20 17:55:22 +01:00
parent cfe7a6a328
commit 94ed09d087
11 changed files with 1582 additions and 205 deletions

4
.env
View file

@ -207,7 +207,9 @@ AZURE_AI_SEARCH_SEARCH_OPTION_QUERY_TYPE=semantic
AZURE_AI_SEARCH_AIRTABLE_INDEX=wpp-knowledge-dev-airtable
AZURE_AI_SEARCH_WEBSITE_INDEX=wpp-knowledge-dev-website
AZURE_AI_SEARCH_CYCLOPEDIA_INDEX=wpp-knowledge-dev-cyclopedia
AZURE_AI_SEARCH_CATALOG_INDEX = wpp-knowledge-dev-catalog # (optional; for product catalog data)
AZURE_AI_SEARCH_TRACTOR_INDEX = wpp-knowledge-dev-tractors # (optional; for tractor data)
AZURE_AI_SEARCH_CASES_INDEX = wpp-knowledge-dev-cases # (optional; for customer case data)
#==================================================#
# Search #
#==================================================#

View file

@ -13,6 +13,10 @@ const TraversaalSearch = require('./structured/TraversaalSearch');
const createOpenAIImageTools = require('./structured/OpenAIImageTools');
const TavilySearchResults = require('./structured/TavilySearchResults');
const StructuredWPPACS = require('./structured/WoodlandAISearch');
const StructuredWPPACSTractor = require('./structured/WoodlandAISearchTractor');
const StructuredWPPACSCases = require('./structured/WoodlandAISearchCases');
const StructuredWPPACSAll = require('./structured/WoodlandAISearchAll');
const StructuredWPPACSGeneral = require('./structured/WoodlandAISearchGeneral');
module.exports = {
...manifest,
@ -28,5 +32,9 @@ module.exports = {
createYouTubeTools,
TavilySearchResults,
createOpenAIImageTools,
StructuredWPPACS
StructuredWPPACS,
StructuredWPPACSTractor,
StructuredWPPACSCases,
StructuredWPPACSAll
,StructuredWPPACSGeneral
};

View file

@ -193,27 +193,164 @@
}
]
},
{
"name": "Woodland Search",
"pluginKey": "woodland-ai-search",
"description": "Prioritized Azure AI Search (Airtable → Cyclopedia → Website) for Woodland.",
"icon": "https://i.imgur.com/E7crPze.png",
"authConfig": [
{
"name": "Woodland Search",
"pluginKey": "woodland-ai-search",
"description": "Azure AI Search (Catalog → Website → Cyclopedia) for Woodland support.",
"icon": "https://i.imgur.com/E7crPze.png",
"authConfig": [
{
"authField": "AZURE_AI_SEARCH_SERVICE_ENDPOINT",
"label": "Azure AI Search Endpoint",
"description": "You need to provide your Endpoint for Azure AI Search."
},
{
"authField": "AZURE_AI_SEARCH_INDEX_NAME",
"label": "Azure AI Search Index Name",
"description": "You need to provide your Index Name for Azure AI Search."
"description": "Your Azure Cognitive Search service endpoint (e.g., https://<service>.search.windows.net)."
},
{
"authField": "AZURE_AI_SEARCH_API_KEY",
"label": "Azure AI Search API Key",
"description": "You need to provide your API Key for Azure AI Search."
"description": "Admin or query API key for your Azure Search service."
},
{
"authField": "AZURE_AI_SEARCH_CATALOG_INDEX",
"label": "Catalog Index Name",
"description": "Index name for the product/parts catalog (e.g., <base>-catalog)."
},
{
"authField": "AZURE_AI_SEARCH_WEBSITE_INDEX",
"label": "Website Index Name",
"description": "Index name for website content (e.g., <base>-website)."
},
{
"authField": "AZURE_AI_SEARCH_CYCLOPEDIA_INDEX",
"label": "Cyclopedia Index Name",
"description": "Index name for cyclopedia/procedure content (e.g., <base>-cyclopedia)."
}
]
}
},
{
"name": "Tractor Search",
"pluginKey": "woodland-ai-search-tractor",
"description": "Azure AI Search for the Tractor index (single-index, semantic results).",
"icon": "https://i.imgur.com/E7crPze.png",
"authConfig": [
{
"authField": "AZURE_AI_SEARCH_SERVICE_ENDPOINT",
"label": "Azure AI Search Endpoint",
"description": "Your Azure Cognitive Search service endpoint (e.g., https://<service>.search.windows.net)."
},
{
"authField": "AZURE_AI_SEARCH_API_KEY",
"label": "Azure AI Search API Key",
"description": "Admin or query API key for your Azure Search service."
},
{
"authField": "AZURE_AI_SEARCH_TRACTOR_INDEX||AZURE_AI_SEARCH_INDEX_NAME",
"label": "Tractor Index Name",
"description": "Index name for the Tractor data (e.g., <base>-tractor). Uses AZURE_AI_SEARCH_INDEX_NAME if not provided."
}
]
},
{
"name": "Cases Search",
"pluginKey": "woodland-ai-search-cases",
"description": "Azure AI Search for the Cases/Knowledge index (single-index, semantic answers).",
"icon": "https://i.imgur.com/E7crPze.png",
"authConfig": [
{
"authField": "AZURE_AI_SEARCH_SERVICE_ENDPOINT",
"label": "Azure AI Search Endpoint",
"description": "Your Azure Cognitive Search service endpoint (e.g., https://<service>.search.windows.net)."
},
{
"authField": "AZURE_AI_SEARCH_API_KEY",
"label": "Azure AI Search API Key",
"description": "Admin or query API key for your Azure Search service."
},
{
"authField": "AZURE_AI_SEARCH_CASES_INDEX||AZURE_AI_SEARCH_INDEX_NAME",
"label": "Cases Index Name",
"description": "Index name for the Cases/Knowledge data (e.g., <base>-cases). Uses AZURE_AI_SEARCH_INDEX_NAME if not provided."
}
]
},
{
"name": "Woodland Search (All)",
"pluginKey": "woodland-ai-search-all",
"description": "Calls Woodland, Tractor, and Cases tools in parallel and merges results.",
"icon": "https://i.imgur.com/E7crPze.png",
"authConfig": [
{
"authField": "AZURE_AI_SEARCH_SERVICE_ENDPOINT",
"label": "Azure AI Search Endpoint",
"description": "Your Azure Cognitive Search service endpoint (e.g., https://<service>.search.windows.net)."
},
{
"authField": "AZURE_AI_SEARCH_API_KEY",
"label": "Azure AI Search API Key",
"description": "Admin or query API key for your Azure Search service."
},
{
"authField": "AZURE_AI_SEARCH_CATALOG_INDEX",
"label": "Catalog Index Name",
"description": "Index name for product/parts catalog (e.g., <base>-catalog)."
},
{
"authField": "AZURE_AI_SEARCH_WEBSITE_INDEX",
"label": "Website Index Name",
"description": "Index name for website content (e.g., <base>-website)."
},
{
"authField": "AZURE_AI_SEARCH_CYCLOPEDIA_INDEX",
"label": "Cyclopedia Index Name",
"description": "Index name for cyclopedia content (e.g., <base>-cyclopedia)."
},
{
"authField": "AZURE_AI_SEARCH_TRACTOR_INDEX||AZURE_AI_SEARCH_INDEX_NAME",
"label": "Tractor Index Name",
"description": "Index name for tractors data (e.g., <base>-tractors)."
},
{
"authField": "AZURE_AI_SEARCH_CASES_INDEX||AZURE_AI_SEARCH_INDEX_NAME",
"label": "Cases Index Name",
"description": "Index name for cases/knowledge data (e.g., <base>-cases)."
}
]
},
{
"name": "Woodland Search (General)",
"pluginKey": "woodland-ai-search-general",
"description": "Grounded general search: tries Woodland first, then falls back to Cases if needed.",
"icon": "https://i.imgur.com/E7crPze.png",
"authConfig": [
{
"authField": "AZURE_AI_SEARCH_SERVICE_ENDPOINT",
"label": "Azure AI Search Endpoint",
"description": "Your Azure Cognitive Search service endpoint (e.g., https://<service>.search.windows.net)."
},
{
"authField": "AZURE_AI_SEARCH_API_KEY",
"label": "Azure AI Search API Key",
"description": "Admin or query API key for your Azure Search service."
},
{
"authField": "AZURE_AI_SEARCH_CATALOG_INDEX",
"label": "Catalog Index Name",
"description": "Index name for product/parts catalog (e.g., <base>-catalog)."
},
{
"authField": "AZURE_AI_SEARCH_WEBSITE_INDEX",
"label": "Website Index Name",
"description": "Index name for website content (e.g., <base>-website)."
},
{
"authField": "AZURE_AI_SEARCH_CYCLOPEDIA_INDEX",
"label": "Cyclopedia Index Name",
"description": "Index name for cyclopedia content (e.g., <base>-cyclopedia)."
},
{
"authField": "AZURE_AI_SEARCH_CASES_INDEX||AZURE_AI_SEARCH_INDEX_NAME",
"label": "Cases Index Name",
"description": "Index name for cases/knowledge data (e.g., <base>-cases)."
}
]
}
]

View file

@ -9,7 +9,7 @@ class WoodlandAISearch extends Tool {
static DEFAULT_TOP = 9;
// Default select is intentionally minimal; per-intent selects override
static DEFAULT_SELECT = 'id,title,content,url';
static GROUPS = ['airtable', 'cyclopedia', 'website'];
static GROUPS = ['catalog', 'cyclopedia', 'website'];
_env(v, fallback) {
return v ?? fallback;
@ -34,9 +34,9 @@ class WoodlandAISearch extends Tool {
this.apiKey = this._env(fields.AZURE_AI_SEARCH_API_KEY, process.env.AZURE_AI_SEARCH_API_KEY);
// Per-index names
this.airtableIndex = this._env(
fields.AZURE_AI_SEARCH_AIRTABLE_INDEX,
process.env.AZURE_AI_SEARCH_AIRTABLE_INDEX,
this.catalogIndex = this._env(
fields.AZURE_AI_SEARCH_CATALOG_INDEX,
process.env.AZURE_AI_SEARCH_CATALOG_INDEX,
);
this.websiteIndex = this._env(
fields.AZURE_AI_SEARCH_WEBSITE_INDEX,
@ -47,9 +47,9 @@ class WoodlandAISearch extends Tool {
process.env.AZURE_AI_SEARCH_CYCLOPEDIA_INDEX,
);
if (!this.serviceEndpoint || !this.apiKey || !this.airtableIndex || !this.websiteIndex || !this.cyclopediaIndex) {
if (!this.serviceEndpoint || !this.apiKey || !this.catalogIndex || !this.websiteIndex || !this.cyclopediaIndex) {
throw new Error(
'Missing one or more Azure AI Search envs: AZURE_AI_SEARCH_SERVICE_ENDPOINT, AZURE_AI_SEARCH_API_KEY, AZURE_AI_SEARCH_AIRTABLE_INDEX, AZURE_AI_SEARCH_WEBSITE_INDEX, AZURE_AI_SEARCH_CYCLOPEDIA_INDEX.',
'Missing one or more Azure AI Search envs: AZURE_AI_SEARCH_SERVICE_ENDPOINT, AZURE_AI_SEARCH_API_KEY, AZURE_AI_SEARCH_CATALOG_INDEX, AZURE_AI_SEARCH_WEBSITE_INDEX, AZURE_AI_SEARCH_CYCLOPEDIA_INDEX.',
);
}
@ -69,21 +69,31 @@ class WoodlandAISearch extends Tool {
this.defaultSources = (this._env(fields.AZURE_AI_SEARCH_DEFAULT_SOURCES, process.env.AZURE_AI_SEARCH_DEFAULT_SOURCES) || WoodlandAISearch.GROUPS.join(','))
.split(',').map(s => s.trim()).filter(Boolean);
this.searchFields = (() => {
// Prefer configured search fields; otherwise default to high-signal human fields
const v = this._env(fields.AZURE_AI_SEARCH_SEARCH_FIELDS, process.env.AZURE_AI_SEARCH_SEARCH_FIELDS);
if (!v) return undefined;
return String(v).split(',').map(s => s.trim()).filter(Boolean);
if (v) return String(v).split(',').map(s => s.trim()).filter(Boolean);
return ['title','content','categories','category_paths','promotion_names','part_numbers','sku'];
})();
// Optional per-index search field overrides
const parseList = (v) => (v ? String(v).split(',').map((s) => s.trim()).filter(Boolean) : undefined);
this.searchFieldOverrides = {
catalog: parseList(this._env(fields.AZURE_AI_SEARCH_CATALOG_SEARCH_FIELDS, process.env.AZURE_AI_SEARCH_CATALOG_SEARCH_FIELDS)),
website: parseList(this._env(fields.AZURE_AI_SEARCH_WEBSITE_SEARCH_FIELDS, process.env.AZURE_AI_SEARCH_WEBSITE_SEARCH_FIELDS)),
cyclopedia: parseList(this._env(fields.AZURE_AI_SEARCH_CYCLOPEDIA_SEARCH_FIELDS, process.env.AZURE_AI_SEARCH_CYCLOPEDIA_SEARCH_FIELDS)),
};
this.semanticConfiguration = this._env(fields.AZURE_AI_SEARCH_SEMANTIC_CONFIGURATION, process.env.AZURE_AI_SEARCH_SEMANTIC_CONFIGURATION || 'sem1');
this.queryLanguage = this._env(fields.AZURE_AI_SEARCH_QUERY_LANGUAGE, process.env.AZURE_AI_SEARCH_QUERY_LANGUAGE || 'en-us');
this.scoringProfile = this._env(fields.AZURE_AI_SEARCH_SCORING_PROFILE, process.env.AZURE_AI_SEARCH_SCORING_PROFILE);
this.hardFilter = String(this._env(fields.AZURE_AI_SEARCH_HARD_FILTER, process.env.AZURE_AI_SEARCH_HARD_FILTER || 'true')).toLowerCase() === 'true';
// Always return all fields unless explicitly disabled
this.returnAllFields = String(this._env(fields.AZURE_AI_SEARCH_RETURN_ALL_FIELDS, process.env.AZURE_AI_SEARCH_RETURN_ALL_FIELDS || 'true')).toLowerCase() === 'true';
// Link enrichment to attach website/cyclopedia URLs to catalog hits for citations
this.enableLinkEnrichment = String(this._env(fields.AZURE_AI_SEARCH_ENABLE_LINK_ENRICHMENT, process.env.AZURE_AI_SEARCH_ENABLE_LINK_ENRICHMENT || 'true')).toLowerCase() === 'true';
// Governance / guardrail flags
this.enforceReviewedOnly = String(this._env(fields.AZURE_AI_SEARCH_ENFORCE_REVIEWED_ONLY, process.env.AZURE_AI_SEARCH_ENFORCE_REVIEWED_ONLY || 'true')).toLowerCase() === 'true';
// Comma-separated domains to allow for Website results (e.g., "www.cyclonerake.com")
this.websiteDomainAllowlist = (this._env(fields.AZURE_AI_SEARCH_WEBSITE_DOMAIN_ALLOWLIST, process.env.AZURE_AI_SEARCH_WEBSITE_DOMAIN_ALLOWLIST) || 'www.cyclonerake.com')
this.websiteDomainAllowlist = (this._env(fields.AZURE_AI_SEARCH_WEBSITE_DOMAIN_ALLOWLIST, process.env.AZURE_AI_SEARCH_WEBSITE_DOMAIN_ALLOWLIST) || '')
.split(',')
.map(s => s.trim())
.filter(Boolean);
@ -91,7 +101,7 @@ class WoodlandAISearch extends Tool {
// Initialize one SearchClient per index
const credential = new AzureKeyCredential(this.apiKey);
this.clients = {
airtable: new SearchClient(this.serviceEndpoint, this.airtableIndex, credential, { apiVersion: this.apiVersion }),
catalog: new SearchClient(this.serviceEndpoint, this.catalogIndex, credential, { apiVersion: this.apiVersion }),
website: new SearchClient(this.serviceEndpoint, this.websiteIndex, credential, { apiVersion: this.apiVersion }),
cyclopedia: new SearchClient(this.serviceEndpoint, this.cyclopediaIndex, credential, { apiVersion: this.apiVersion }),
};
@ -100,7 +110,7 @@ class WoodlandAISearch extends Tool {
endpoint: this.serviceEndpoint,
apiVersion: this.apiVersion,
indexes: {
airtable: this.airtableIndex,
catalog: this.catalogIndex,
website: this.websiteIndex,
cyclopedia: this.cyclopediaIndex,
},
@ -130,13 +140,24 @@ class WoodlandAISearch extends Tool {
return `(${a}) and (${b})`;
}
_escapeLiteral(v) {
// Escape single quotes for OData literal strings
return String(v).replace(/'/g, "''");
}
_withReviewed(filter) {
if (!this.enforceReviewedOnly) return filter;
// All three indexes expose a boolean 'reviewed' field in our data
// Reviewed applies to website/cyclopedia documents; catalog may not carry 'reviewed'
const reviewedClause = `reviewed eq true`;
return this._andFilter(filter, reviewedClause);
}
_applySearchFields(indexKey, defaults) {
const override = this.searchFieldOverrides?.[indexKey];
if (Array.isArray(override) && override.length) return override;
return defaults;
}
_sanitizeSearchOptions(opts) {
const clean = { ...opts };
const asStr = (v) => (typeof v === 'string' ? v.toLowerCase() : undefined);
@ -163,58 +184,77 @@ class WoodlandAISearch extends Tool {
return items;
};
try {
const docs = await run(options);
return { docs, retried: false };
} catch (err) {
logger.warn('[woodland-ai-search] Initial search failed');
const msg = (err && (err.message || String(err))) || '';
let attempt = 0;
let opts = { ...options };
let lastErr;
let droppedSearchFields = false;
while (attempt < 3) {
try {
const docs = await run(opts);
return { docs, retried: attempt > 0 };
} catch (err) {
lastErr = err;
attempt += 1;
logger.warn('[woodland-ai-search] Search failed', { attempt, msg: err?.message || String(err) });
const msg = (err && (err.message || String(err))) || '';
const sanitized = { ...opts };
let changed = false;
const sanitized = { ...options };
let changed = false;
// Keep semantic; only adjust unsupported options
if (/orderby/i.test(msg) && String(sanitized.queryType).toLowerCase() === 'semantic') {
if (sanitized.orderBy) {
delete sanitized.orderBy;
changed = true;
logger.info('[woodland-ai-search] Removing orderBy for semantic query and retrying');
}
}
// If orderBy not supported with semantic, remove and retry
if (/orderby/i.test(msg) && String(sanitized.queryType).toLowerCase() === 'semantic') {
if (sanitized.orderBy) {
delete sanitized.orderBy;
const unknownFieldRegex = /Unknown field '([^']+)'/g;
const toRemove = [];
let m;
while ((m = unknownFieldRegex.exec(msg)) !== null) {
const fld = (m[1] || '').trim();
if (fld) toRemove.push(fld);
}
if (toRemove.length > 0) {
if (Array.isArray(sanitized.select)) {
const before = sanitized.select.length;
sanitized.select = sanitized.select.filter((f) => !toRemove.includes(f));
if (sanitized.select.length === 0) delete sanitized.select;
if (sanitized.select?.length !== before) changed = true;
}
if (Array.isArray(sanitized.searchFields)) {
const before = sanitized.searchFields.length;
sanitized.searchFields = sanitized.searchFields.filter((f) => !toRemove.includes(f));
if (sanitized.searchFields.length === 0) delete sanitized.searchFields;
if (sanitized.searchFields?.length !== before) changed = true;
}
if (!/search field list|select/i.test(msg)) {
if (sanitized.filter) {
delete sanitized.filter;
changed = true;
logger.info('[woodland-ai-search] Dropping filter due to unknown fields and retrying');
}
if (sanitized.orderBy) {
delete sanitized.orderBy;
changed = true;
}
}
if (changed) logger.info('[woodland-ai-search] Retrying without unknown fields');
}
// Final fallback: if still failing and searchFields remain, drop them entirely
if (!changed && !droppedSearchFields && sanitized.searchFields) {
delete sanitized.searchFields;
droppedSearchFields = true;
changed = true;
logger.info('[woodland-ai-search] Removing orderBy for semantic query and retrying');
logger.info('[woodland-ai-search] Dropping searchFields entirely and retrying');
}
}
// Remove unknown fields from select/searchFields
const unknownFieldRegex = /Unknown field '([^']+)'/g;
const toRemove = [];
let m;
while ((m = unknownFieldRegex.exec(msg)) !== null) {
const fld = (m[1] || '').trim();
if (fld) toRemove.push(fld);
if (!changed) break;
opts = sanitized;
}
if (toRemove.length > 0 && /search field list|select/i.test(msg)) {
if (Array.isArray(sanitized.select)) {
const before = sanitized.select.length;
sanitized.select = sanitized.select.filter((f) => !toRemove.includes(f));
if (sanitized.select.length === 0) delete sanitized.select;
if (sanitized.select?.length !== before) changed = true;
}
if (Array.isArray(sanitized.searchFields)) {
const before = sanitized.searchFields.length;
sanitized.searchFields = sanitized.searchFields.filter((f) => !toRemove.includes(f));
if (sanitized.searchFields.length === 0) delete sanitized.searchFields;
if (sanitized.searchFields?.length !== before) changed = true;
}
if (changed) {
logger.info('[woodland-ai-search] Retrying without unknown fields');
}
}
if (!changed) {
throw err;
}
const docs = await run(sanitized);
return { docs, retried: true };
}
throw lastErr;
}
// Backwards-compat placeholder: not used in multi-index mode
@ -228,28 +268,9 @@ class WoodlandAISearch extends Tool {
}
async _tieredSearch(query, baseOptions, client) {
// 1) try as-is
let r = await this._safeSearch(query, baseOptions, client);
if (r.docs?.length) return r.docs;
// 2) if semantic, retry simple then simple(no searchFields)
const toSimple = (opt) => {
const o = { ...opt, queryType: 'simple' };
delete o.semanticSearchOptions;
delete o.semanticConfiguration;
delete o.semanticConfigurationName;
return o;
};
if (String(baseOptions.queryType).toLowerCase() === 'semantic') {
r = await this._safeSearch(query, toSimple(baseOptions), client);
if (r.docs?.length) return r.docs;
const noFields = toSimple(baseOptions);
if (Array.isArray(noFields.searchFields)) delete noFields.searchFields;
r = await this._safeSearch(query, noFields, client);
if (r.docs?.length) return r.docs;
}
return [];
// Single-pass: always semantic (no downgrade)
const r = await this._safeSearch(query, baseOptions, client);
return r.docs ?? [];
}
// Run tiered search against a specific index, with per-index options
@ -260,6 +281,19 @@ class WoodlandAISearch extends Tool {
return [];
}
const docs = await this._tieredSearch(query, options, client);
// Verbose logging: print a compact sample of Azure results for this index
try {
const sample = Array.isArray(docs)
? docs.slice(0, 5).map((d) => ({ id: d?.id, title: d?.title, url: d?.url }))
: [];
logger.info('[woodland-ai-search] Azure results sample', {
index: indexName,
count: Array.isArray(docs) ? docs.length : 0,
sample,
});
} catch (e) {
logger.debug('[woodland-ai-search] Failed to log results sample', { index: indexName, error: e?.message || String(e) });
}
// Annotate provenance
const annotated = (docs || []).map(d => ({ ...d, index: indexName }));
logger.info('[woodland-ai-search] Index query done', { index: indexName, docs: annotated.length });
@ -267,8 +301,8 @@ class WoodlandAISearch extends Tool {
}
// Multi-index search and interleave
async _searchAcrossIndexes(query, baseOptions, indexList, perIndexTop, finalTop, perIndexOptionsMap = {}) {
logger.info('[woodland-ai-search] Running per-index queries', {
async _searchAcrossIndexes(query, baseOptions, indexList, perIndexTop, finalTop, perIndexOptionsMap = {}, intent = 'general') {
logger.info('[woodland-ai-search] Running per-index queries (ordered concat)', {
indexes: indexList, perIndexTop, finalTop
});
@ -296,28 +330,53 @@ class WoodlandAISearch extends Tool {
const results = await Promise.all(tasks);
// NOTE: For parts intent, indexList is ordered ['website','airtable','cyclopedia'] to ensure a View/Buy URL appears early in interleaved results.
// Interleave equally across indexes
// Build results ensuring at least some docs from each index (when available),
// then fill remaining slots by priority: catalog → cyclopedia → website
const out = [];
const seen = new Set();
for (let i = 0; out.length < finalTop; i++) {
let pushedAny = false;
for (const { docs } of results) {
if (i < docs.length) {
const k = this._keyOf(docs[i]);
if (!seen.has(k)) {
seen.add(k);
out.push(docs[i]);
pushedAny = true;
if (out.length >= finalTop) break;
}
}
// Minimum quotas per index to guarantee Cyclopedia presence for SOP
const minQuota = (() => {
const q = { catalog: 1, cyclopedia: 1, website: 1 };
if (intent === 'sop') {
q.cyclopedia = Math.min(2, finalTop); // prioritize at least two SOP docs when possible
}
if (!pushedAny) break;
return q;
})();
// 1) Priming pass: satisfy per-index minimum quotas in priority order
for (const idx of indexList) {
const need = Math.max(0, Math.min(minQuota[idx] || 0, finalTop - out.length));
if (need <= 0) continue;
const bucket = results.find(r => r.index === idx)?.docs || [];
let taken = 0;
for (let i = 0; i < bucket.length && taken < need && out.length < finalTop; i++) {
const d = bucket[i];
const k = this._keyOf(d);
if (seen.has(k)) continue;
seen.add(k);
out.push(d);
taken++;
}
if (out.length >= finalTop) break;
}
logger.debug('[woodland-ai-search] Per-index merged results', {
// 2) Fill remaining slots by priority order
if (out.length < finalTop) {
for (const idx of indexList) {
const bucket = results.find(r => r.index === idx)?.docs || [];
for (let i = 0; i < bucket.length && out.length < finalTop; i++) {
const d = bucket[i];
const k = this._keyOf(d);
if (seen.has(k)) continue;
seen.add(k);
out.push(d);
}
if (out.length >= finalTop) break;
}
}
logger.debug('[woodland-ai-search] Per-index merged results (quota+priority)', {
total: out.length,
breakdown: results.map(r => ({ index: r.index, count: r.docs.length })),
});
@ -325,7 +384,7 @@ class WoodlandAISearch extends Tool {
return out;
}
// Enrich results with cross-index links so the agent can always show Website (View/Buy), Airtable, and Cyclopedia URLs in tables
// Enrich results with cross-index links so the agent can always show Website (View/Buy) and Cyclopedia URLs in tables
_enrichWithLinks(intent, extracted, results) {
try {
if (intent !== 'parts' || !Array.isArray(results) || results.length === 0) return results;
@ -346,8 +405,11 @@ class WoodlandAISearch extends Tool {
const cands = uniq([...skus, ...mentioned].map(norm));
for (const p of cands) {
if (!p) continue;
// Strict: only use the exact URL returned by search (do not synthesize)
const u = typeof d.url === 'string' ? d.url : undefined;
if (!u) continue;
const arr = websiteByPart.get(p) || [];
arr.push(d.url || d.parent_id || d.canonical_product_url);
arr.push(u);
websiteByPart.set(p, uniq(arr));
}
} else if (idx === 'cyclopedia') {
@ -362,37 +424,40 @@ class WoodlandAISearch extends Tool {
for (const pRaw of cands) {
const p = norm(pRaw);
if (!p) continue;
// Strict: only use the exact URL returned by search
const u = typeof d.url === 'string' ? d.url : undefined;
if (!u) continue;
const arr = cyclopediaByPart.get(p) || [];
arr.push(d.url || d.parent_id);
arr.push(u);
cyclopediaByPart.set(p, uniq(arr));
}
}
}
// Enrich Airtable part docs with website & cyclopedia links
// Enrich Catalog part/SKU docs with website & cyclopedia links
const out = results.map((d) => {
if (d.index !== 'airtable') return d;
const pn = norm(d.part_number) || (() => {
const m = /\b\d{2}-[a-z0-9]{2}-[a-z0-9]{3,}\b/i.exec(`${d.part_number || ''} ${d.title || ''} ${d.content || ''}`);
return m ? norm(m[0]) : '';
if (d.index !== 'catalog') return d;
// Prefer first part number; fallback to SKU
const pn = (() => {
const firstPn = Array.isArray(d.part_numbers) && d.part_numbers.length ? d.part_numbers[0] : '';
if (firstPn) return norm(firstPn);
return d.sku ? norm(d.sku) : '';
})();
if (!pn) return d;
const primaryWebsite = d.canonical_product_url || undefined;
// Use URLs exactly as returned by Azure Search (no allowlist or modification)
const primaryWebsite = typeof d.url === 'string' ? d.url : undefined;
const crossWebsite = websiteByPart.get(pn) || [];
const website_urls = uniq([primaryWebsite, ...crossWebsite]);
const website_urls = uniq([primaryWebsite, ...crossWebsite].filter(Boolean));
const crossCyclopedia = cyclopediaByPart.get(pn) || [];
const cyclopedia_urls = uniq(crossCyclopedia);
const cyclopedia_urls = uniq(crossCyclopedia.filter((u) => typeof u === 'string' && u));
// Include any Airtable-attached Doc360 URL as an authoritative Cyclopedia link
return {
...d,
airtable_url: d.airtable_record_url || d.airtable_url,
website_urls,
website_url_primary: website_urls[0] || undefined,
// Include any Airtable-attached Doc360 URL as an authoritative Cyclopedia link
cyclopedia_urls: uniq([d.doc360_url, ...cyclopedia_urls].filter(Boolean)),
cyclopedia_urls,
};
});
@ -407,6 +472,9 @@ class WoodlandAISearch extends Tool {
_optionsByIndexForIntent(intent, extracted = {}) {
const opts = {};
const partNum = extracted.partNumber;
const partType = extracted.partType; // e.g., 'collector bag', 'impeller', 'hose', 'recoil starter', 'boot plate', 'side tube'
const family = extracted.family; // e.g., 'Classic', 'Commander Pro', 'Commercial PRO', 'Standard Complete Platinum'
const wantsPromo = extracted.wantsPromo;
const maybe = (o, sel) => (this.returnAllFields ? o : { ...o, select: sel });
// Helper to constrain Website results to allowlisted domains
@ -415,11 +483,19 @@ class WoodlandAISearch extends Tool {
: undefined;
if (intent === 'parts') {
// Airtable: only parts, reviewed only
opts.airtable = maybe({
filter: this._withReviewed(partNum ? `(type eq 'part') and (part_number eq '${partNum}')` : `type eq 'part'`),
orderBy: ['last_updated desc'],
}, ['title','content','part_number','part_type','categories','canonical_product_url','last_updated','airtable_record_url','doc360_url','id']);
// Catalog: treat as primary part/SKU context for now
const pn = partNum || '';
const pnNoHyphen = pn ? pn.replace(/-/g, '') : '';
const filters = [];
if (pn) filters.push(`part_numbers/any(p: p eq '${this._escapeLiteral(pn)}') or part_numbers_hyphenless/any(p: p eq '${this._escapeLiteral(pnNoHyphen)}') or sku eq '${this._escapeLiteral(pn)}' or normalized_sku eq '${this._escapeLiteral(pnNoHyphen)}'`);
if (partType) filters.push(`part_type/any(p: p eq '${this._escapeLiteral(partType)}')`);
if (family) filters.push(`family eq '${this._escapeLiteral(family)}'`);
const catalogFilter = filters.length ? filters.map(f => `(${f})`).join(' and ') : undefined;
opts.catalog = maybe({
filter: catalogFilter,
orderBy: ['promotion_active desc','price_after_promo asc','price asc','last_updated desc'],
searchFields: this._applySearchFields('catalog', ['title','content','sku','part_numbers'])
}, ['id','title','content','url','sku','part_numbers','part_type','family','categories','category_paths','price','price_after_promo','promotion_names','availability','stock_quantity','installation_pdf_url','troubleshooting_pdf_url','safety_pdf_url','video_url','exploded_view_url']);
// Website: prefer exact SKU/part hits; else fall back to product pages, reviewed only, allowlist
const websiteFilterBase = partNum
@ -429,6 +505,7 @@ class WoodlandAISearch extends Tool {
opts.website = maybe({
filter: websiteFilter,
orderBy: ['last_crawled desc'],
searchFields: this._applySearchFields('website', ['title','content','breadcrumb','page_type','headings','sku','skus','mentioned_parts'])
}, ['title','content','url','site','last_crawled','sku','skus','mentioned_parts','id']);
// Cyclopedia: include even without explicit part number, reviewed only
@ -438,36 +515,45 @@ class WoodlandAISearch extends Tool {
opts.cyclopedia = maybe({
filter: this._withReviewed(cycloBase),
orderBy: ['last_updated desc'],
searchFields: this._applySearchFields('cyclopedia', ['title','content','breadcrumb','page_type','toc_items','mentioned_parts','audience'])
}, ['title','content','url','site','page_type','breadcrumb','audience','last_updated','mentioned_parts','id']);
} else if (intent === 'compatibility') {
// Airtable: reviewed only
opts.airtable = maybe({
filter: this._withReviewed(undefined),
// Catalog: compatibility context (family/models)
const filters = [];
if (family) filters.push(`family eq '${this._escapeLiteral(family)}'`);
if (partType) filters.push(`part_type/any(p: p eq '${this._escapeLiteral(partType)}')`);
opts.catalog = maybe({
filter: filters.length ? filters.map(f => `(${f})`).join(' and ') : undefined,
orderBy: ['last_updated desc'],
}, ['title','content','last_updated','source_table','airtable_record_url','doc360_url','id']);
searchFields: this._applySearchFields('catalog', ['title','content'])
}, ['id','title','content','url','categories','category_paths','family','compatible_models','part_type']);
// Website: reviewed only, allowlist
opts.website = maybe({
filter: this._withReviewed(websiteDomainFilter),
orderBy: ['last_crawled desc'],
searchFields: this._applySearchFields('website', ['title','content','breadcrumb','page_type','headings'])
}, ['title','content','url','site','page_type','breadcrumb','last_crawled','id']);
// Cyclopedia: reviewed only
opts.cyclopedia = maybe({
filter: this._withReviewed(`audience eq 'internal' or page_type eq 'maintenance_guide' or page_type eq 'troubleshooting'`),
orderBy: ['last_updated desc','section_order asc'],
searchFields: this._applySearchFields('cyclopedia', ['title','content','breadcrumb','page_type','toc_items','audience'])
}, ['title','content','url','site','page_type','breadcrumb','audience','last_updated','toc_items','id']);
} else if (intent === 'sop') {
// Cyclopedia SOP/support, reviewed only
opts.cyclopedia = maybe({
filter: this._withReviewed(`audience eq 'internal' or page_type eq 'maintenance_guide'`),
orderBy: ['last_updated desc','section_order asc'],
searchFields: this._applySearchFields('cyclopedia', ['title','content','breadcrumb','page_type','toc_items','audience'])
}, ['title','content','url','site','page_type','breadcrumb','audience','last_updated','toc_items','id']);
// Airtable support, reviewed only
opts.airtable = maybe({
filter: this._withReviewed(`type eq 'support'`),
// Catalog doc pointers (if present)
opts.catalog = maybe({
filter: undefined,
orderBy: ['last_updated desc'],
}, ['title','content','last_updated','type','source_table','airtable_record_url','doc360_url','id']);
searchFields: this._applySearchFields('catalog', ['title','content'])
}, ['id','title','content','url','categories','category_paths','installation_pdf_url','troubleshooting_pdf_url','safety_pdf_url','video_url','exploded_view_url']);
// Website generic, reviewed only, allowlist
opts.website = maybe({
filter: this._withReviewed(websiteDomainFilter),
@ -477,28 +563,34 @@ class WoodlandAISearch extends Tool {
opts.website = maybe({
filter: this._withReviewed(this._andFilter(`page_type eq 'product_marketing'`, websiteDomainFilter)),
orderBy: ['last_crawled desc'],
searchFields: this._applySearchFields('website', ['title','content','breadcrumb','page_type','headings'])
}, ['title','content','url','site','page_type','headings','breadcrumb','last_crawled','id']);
opts.airtable = maybe({
filter: this._withReviewed(undefined),
orderBy: ['last_updated desc'],
}, ['title','content','last_updated','source_table','airtable_record_url','id']);
const promoFilter = wantsPromo ? `promotion_active eq true` : undefined;
opts.catalog = maybe({
filter: promoFilter,
orderBy: ['promotion_active desc','price_after_promo asc','last_updated desc'],
}, ['id','title','content','url','categories','category_paths','family','price_after_promo','promotion_names']);
opts.cyclopedia = maybe({
filter: this._withReviewed(undefined),
orderBy: ['last_updated desc'],
searchFields: this._applySearchFields('cyclopedia', ['title','content','breadcrumb','page_type'])
}, ['title','content','url','site','page_type','breadcrumb','last_updated','id']);
} else {
// general
opts.airtable = maybe({
filter: this._withReviewed(undefined),
opts.catalog = maybe({
filter: undefined,
orderBy: ['last_updated desc'],
}, ['title','content','last_updated','airtable_record_url','id']);
searchFields: this._applySearchFields('catalog', ['title','content'])
}, ['id','title','content','url','categories','category_paths']);
opts.website = maybe({
filter: this._withReviewed(websiteDomainFilter),
orderBy: ['last_crawled desc'],
searchFields: this._applySearchFields('website', ['title','content','breadcrumb','page_type','headings'])
}, ['title','content','url','site','last_crawled','id']);
opts.cyclopedia = maybe({
filter: this._withReviewed(undefined),
orderBy: ['last_updated desc'],
searchFields: this._applySearchFields('cyclopedia', ['title','content','breadcrumb','page_type'])
}, ['title','content','url','site','page_type','last_updated','id']);
}
@ -516,24 +608,41 @@ class WoodlandAISearch extends Tool {
const extracted = {};
if (partMatch) extracted.partNumber = partMatch[0];
// Extract part type tokens
const partTypes = [
'collector bag','impeller','hose','recoil starter','starter','boot plate','side tube','side discharge'
];
for (const t of partTypes) {
if (q.includes(t)) { extracted.partType = t === 'starter' ? 'recoil starter' : t; break; }
}
// Extract family names
if (q.includes('commercial pro')) extracted.family = 'Commercial PRO';
else if (q.includes('commander pro') || q.includes('commander')) extracted.family = 'Commander Pro';
else if (q.includes('standard complete platinum') || q.includes('platinum')) extracted.family = 'Standard Complete Platinum';
else if (q.includes('classic')) extracted.family = 'Classic';
// Promotions intent flag
if (containsAny(['promotion','sale','discount','coupon','financing'])) extracted.wantsPromo = true;
// Parts / purchase signals → WEBSITE first to ensure View/Buy pages show up early; always include cyclopedia
if (partMatch || containsAny(['part','replacement','buy','order','sku','view/buy','add to cart','price','bag','hose','clamp','mda','key'])) {
return { intent: 'parts', indexes: ['website','airtable','cyclopedia'], extracted };
return { intent: 'parts', indexes: ['catalog','cyclopedia','website'], extracted };
}
// Compatibility / fitment / engine-by-year
if (containsAny(['engine','fit','fits','fitment','compatible','compatibility','which engine','used in','hose size','diameter','model history','product history']) || yearRegex.test(q)) {
return { intent: 'compatibility', indexes: ['airtable','website','cyclopedia'], extracted };
return { intent: 'compatibility', indexes: ['catalog','cyclopedia','website'], extracted };
}
// SOP / How-to
if (containsAny(['how to','install','installation','guide','manual','troubleshoot','troubleshooting','winterization','sop'])) {
return { intent: 'sop', indexes: ['cyclopedia','airtable','website'], extracted };
return { intent: 'sop', indexes: ['catalog','cyclopedia','website'], extracted };
}
// Marketing / benefits
if (containsAny(['compare','benefits','why choose','financing','promotion','warranty'])) {
return { intent: 'marketing', indexes: ['website','airtable','cyclopedia'], extracted };
return { intent: 'marketing', indexes: ['catalog','cyclopedia','website'], extracted };
}
return { intent: 'general', indexes: WoodlandAISearch.GROUPS, extracted };
@ -567,9 +676,6 @@ class WoodlandAISearch extends Tool {
speller: 'lexicon'
};
if (this.scoringProfile) baseOptions.scoringProfile = this.scoringProfile;
if (Array.isArray(this.searchFields) && this.searchFields.length) {
baseOptions.searchFields = this.searchFields;
}
// Intent routing
const { intent, indexes, extracted } = this._detectIntent(query);
@ -585,6 +691,7 @@ class WoodlandAISearch extends Tool {
perIndexTop,
finalTop,
perIndexOptions,
intent,
);
// If we came up short, try a broader pass across all indexes with larger top
@ -600,6 +707,7 @@ class WoodlandAISearch extends Tool {
Math.max(3, Math.ceil(sampleTop / broaderIndexes.length)),
finalTop,
broaderPerIndexOptions,
'general',
);
// Merge while keeping uniques and limit to finalTop
const seen = new Set(result.map(d => this._keyOf(d)));
@ -613,11 +721,10 @@ class WoodlandAISearch extends Tool {
}
}
const enriched = this._enrichWithLinks(intent, extracted, result);
const payload = Array.isArray(enriched) ? enriched : result;
// Attach a governance hint (non-breaking) for downstream renderers
const wrapped = { results: payload, governance: { reviewedOnly: this.enforceReviewedOnly, websiteDomains: this.websiteDomainAllowlist } };
return JSON.stringify(wrapped);
// Optionally enrich with cross-index links to ensure real URLs from Azure results
const payload = this.enableLinkEnrichment ? this._enrichWithLinks(intent, extracted, result) : result;
// Match AzureAISearch: return raw array of documents
return JSON.stringify(payload);
} catch (error) {
logger.error('Azure AI Search request failed', { error: error?.message || String(error) });
const msg = (error && (error.message || String(error))) || 'Unknown error';

View file

@ -0,0 +1,198 @@
// woodland-ai-search-all.js (aggregator)
const { z } = require('zod');
const { Tool } = require('@langchain/core/tools');
const { logger } = require('~/config');
const WoodlandAISearch = require('./WoodlandAISearch');
const WoodlandAISearchTractor = require('./WoodlandAISearchTractor');
const WoodlandAISearchCases = require('./WoodlandAISearchCases');
class WoodlandAISearchAll extends Tool {
static DEFAULT_TOP = 18;
constructor(fields = {}) {
super();
this.name = 'woodland-ai-search-all';
this.description =
"Aggregates results from 'woodland-ai-search', 'woodland-ai-search-tractor', and 'woodland-ai-search-cases' in one call.";
this.schema = z.object({
query: z.string().describe('Search word or phrase to Woodland All-Tools'),
top: z.number().int().positive().optional(),
perToolTop: z.number().int().positive().optional(),
});
// Allow disabling specific sub-tools via env if desired
this.enableWoodland = String(fields.WOODLAND_ALL_ENABLE_WOODLAND ?? process.env.WOODLAND_ALL_ENABLE_WOODLAND ?? 'true').toLowerCase() === 'true';
this.enableTractor = String(fields.WOODLAND_ALL_ENABLE_TRACTOR ?? process.env.WOODLAND_ALL_ENABLE_TRACTOR ?? 'true').toLowerCase() === 'true';
this.enableCases = String(fields.WOODLAND_ALL_ENABLE_CASES ?? process.env.WOODLAND_ALL_ENABLE_CASES ?? 'true').toLowerCase() === 'true';
logger.info('[woodland-ai-search-all] Initialized', {
enableWoodland: this.enableWoodland,
enableTractor: this.enableTractor,
enableCases: this.enableCases,
});
}
_keyOf(d) {
return (
(typeof d?.url === 'string' && d.url) ||
(typeof d?.website_url_primary === 'string' && d.website_url_primary) ||
d?.id ||
d?.record_id ||
d?.key ||
JSON.stringify(d)
);
}
// Stronger key that attempts to avoid duplicates when url/id are missing
_strongKeyOf(d) {
const base = this._keyOf(d);
if (base && typeof base === 'string') return base;
try {
const title = (d?.title || '').toString().trim().toLowerCase();
const site = (d?.site || '').toString().trim().toLowerCase();
const pageType = (d?.page_type || '').toString().trim().toLowerCase();
const sku = (Array.isArray(d?.skus) ? d.skus.join('|') : d?.sku || '').toString().toLowerCase();
const partNums = (Array.isArray(d?.part_numbers) ? d.part_numbers.join('|') : d?.part_numbers || '').toString().toLowerCase();
const url = (d?.url || d?.website_url_primary || '').toString().toLowerCase();
const index = (d?.index || '').toString().toLowerCase();
if (url) return url;
const sig = [title, site, pageType, sku, partNums, index].filter(Boolean).join('#');
return sig || JSON.stringify(d);
} catch (_) {
return JSON.stringify(d);
}
}
async _call(data) {
const { query, top: topIn, perToolTop: perToolTopIn } = data;
const finalTop = typeof topIn === 'number' && Number.isFinite(topIn)
? Math.max(1, Math.floor(topIn))
: WoodlandAISearchAll.DEFAULT_TOP;
// Favor balanced breadth across subtools
const perToolTop = typeof perToolTopIn === 'number' && Number.isFinite(perToolTopIn)
? Math.max(1, Math.floor(perToolTopIn))
: Math.min(10, Math.max(8, Math.ceil(finalTop / 2)));
const tasks = [];
try {
if (this.enableWoodland) {
const w = new WoodlandAISearch();
tasks.push(
w
._call({ query, top: perToolTop })
.then((s) => ({ tool: 'woodland-ai-search', ok: true, docs: JSON.parse(s) }))
.catch((e) => ({ tool: 'woodland-ai-search', ok: false, err: e })),
);
}
if (this.enableTractor) {
const t = new WoodlandAISearchTractor();
tasks.push(
t
._call({ query, top: perToolTop })
.then((s) => ({ tool: 'woodland-ai-search-tractor', ok: true, docs: JSON.parse(s) }))
.catch((e) => ({ tool: 'woodland-ai-search-tractor', ok: false, err: e })),
);
}
if (this.enableCases) {
const c = new WoodlandAISearchCases();
tasks.push(
c
._call({ query, top: perToolTop })
.then((s) => ({ tool: 'woodland-ai-search-cases', ok: true, docs: JSON.parse(s) }))
.catch((e) => ({ tool: 'woodland-ai-search-cases', ok: false, err: e })),
);
}
const settled = await Promise.all(tasks);
const buckets = [];
for (const r of settled) {
if (!r?.ok) {
logger.warn('[woodland-ai-search-all] Subtool failed', { tool: r?.tool, error: r?.err?.message || String(r?.err) });
continue;
}
const arr = Array.isArray(r.docs) ? r.docs : [];
// Tag provenance
for (const d of arr) {
if (!d) continue;
if (!d.source_tool) d.source_tool = r.tool;
// Add normalized provenance to help downstream reasoning
try {
const url = (typeof d.url === 'string' && d.url) || (typeof d.website_url_primary === 'string' && d.website_url_primary) || '';
const host = url ? new URL(url).hostname : undefined;
d.provenance = {
source_tool: d.source_tool,
index: d.index,
site: d.site,
page_type: d.page_type,
host,
url: url || undefined,
};
} catch (_) {
d.provenance = {
source_tool: d.source_tool,
index: d.index,
site: d.site,
page_type: d.page_type,
};
}
}
buckets.push({ tool: r.tool, docs: arr });
}
// Merge strategy:
// 1) Guarantee minimum per-tool coverage, then 2) fill remaining by priority (woodland -> cases -> tractor)
const priority = ['woodland-ai-search', 'woodland-ai-search-cases', 'woodland-ai-search-tractor'];
buckets.sort((a, b) => priority.indexOf(a.tool) - priority.indexOf(b.tool));
const minPerTool = { 'woodland-ai-search': 3, 'woodland-ai-search-cases': 1, 'woodland-ai-search-tractor': 1 };
const out = [];
const seen = new Set();
const addDoc = (doc) => {
const k = this._strongKeyOf(doc);
if (seen.has(k)) return false;
seen.add(k);
out.push(doc);
return true;
};
// Phase 1: satisfy minimum quotas per tool (if available)
for (const b of buckets) {
const quota = minPerTool[b.tool] || 0;
if (quota <= 0) continue;
let added = 0;
for (const d of b.docs) {
if (out.length >= finalTop) break;
if (addDoc(d)) {
added += 1;
if (added >= quota) break;
}
}
if (out.length >= finalTop) break;
}
// Phase 2: fill remaining by priority order
for (const b of buckets) {
for (const d of b.docs) {
if (out.length >= finalTop) break;
addDoc(d);
}
if (out.length >= finalTop) break;
}
logger.info('[woodland-ai-search-all] Aggregated results', {
totalMerged: out.length,
sources: buckets.map((b) => ({ tool: b.tool, count: b.docs.length })),
});
return JSON.stringify(out);
} catch (error) {
logger.error('[woodland-ai-search-all] Failed', { error: error?.message || String(error) });
return `AZURE_SEARCH_FAILED: ${error?.message || String(error)}`;
}
}
}
module.exports = WoodlandAISearchAll;

View file

@ -0,0 +1,334 @@
// woodland-ai-search-cases.js (single-index)
const { z } = require('zod');
const { Tool } = require('@langchain/core/tools');
const { SearchClient, AzureKeyCredential } = require('@azure/search-documents');
const { logger } = require('~/config');
class WoodlandAISearchCases extends Tool {
static DEFAULT_API_VERSION = '2024-07-01';
static DEFAULT_TOP = 9;
static DEFAULT_SELECT = 'id,title,content,url';
_env(v, fallback) {
return v ?? fallback;
}
_provenance(d) {
try {
const url = (typeof d?.url === 'string' && d.url) || '';
const host = url ? new URL(url).hostname : undefined;
return { url: url || undefined, host, site: d?.site, page_type: d?.page_type };
} catch (_) {
return { site: d?.site, page_type: d?.page_type };
}
}
/** Lightweight normalization for Cases/Knowledge docs */
_extractList(text, labelRegexes) {
try {
const t = (text || '').toString();
for (const re of labelRegexes) {
const m = re.exec(t);
if (m && m[1]) {
const line = m[1]
.replace(/\r/g, '')
.split(/\n|;|•|\u2022|\-/)
.map((s) => s.trim())
.filter(Boolean);
if (line.length) return line;
}
}
} catch (_) {}
return undefined;
}
_extractSteps(text) {
try {
const t = (text || '').toString();
// Find numbered steps or lines starting with dash/bullet
const lines = t.split(/\r?\n/);
const steps = [];
for (const line of lines) {
const trimmed = line.trim();
if (/^(\d+\.|- |• |\u2022 )/.test(trimmed)) {
steps.push(trimmed.replace(/^(\d+\.|- |• |\u2022 )\s*/, ''));
}
}
return steps.length ? steps : undefined;
} catch (_) {
return undefined;
}
}
_normalizeDoc(d) {
const str = (v) => (v == null ? undefined : String(v));
const list = (v) => (Array.isArray(v) ? v.filter(Boolean).map(String) : undefined);
const title = str(d?.title);
const content = str(d?.content) || str(d?.summary) || str(d?.answer);
const requirements =
this._extractList(content, [/requirements?\s*[:\-]\s*([^\n]+)/i, /eligibility\s*[:\-]\s*([^\n]+)/i]) ||
list(d?.requirements);
const exceptions =
this._extractList(content, [/exceptions?\s*[:\-]\s*([^\n]+)/i]) || list(d?.exceptions);
const scope = str(d?.category) || str(d?.scope) || undefined;
const steps = this._extractSteps(content);
const normalized = {
policy_name: title,
scope,
summary: str(d?.summary),
effective_date: str(d?.effective_date),
last_updated: str(d?.last_updated),
requirements,
exceptions,
steps,
tags: list(d?.tags),
keywords: list(d?.keywords),
provenance: this._provenance(d),
};
return { ...d, normalized_cases: normalized };
}
constructor(fields = {}) {
super();
this.name = 'woodland-ai-search-cases';
this.description = "Use the 'woodland-ai-search-cases' tool to answer questions from the Cases Azure AI Search index";
this.schema = z.object({
query: z.string().describe('Question or search phrase for Cases index'),
top: z.number().int().positive().optional(),
});
// Shared endpoint + key
this.serviceEndpoint = this._env(
fields.AZURE_AI_SEARCH_SERVICE_ENDPOINT,
process.env.AZURE_AI_SEARCH_SERVICE_ENDPOINT,
);
this.apiKey = this._env(fields.AZURE_AI_SEARCH_API_KEY, process.env.AZURE_AI_SEARCH_API_KEY);
// Cases index name (support multiple env names; fallback to generic index name)
this.indexName =
this._env(fields.AZURE_AI_SEARCH_CASES_INDEX, process.env.AZURE_AI_SEARCH_CASES_INDEX) ||
this._env(fields.AZURE_AI_SEARCH_CASE_INDEX, process.env.AZURE_AI_SEARCH_CASE_INDEX) ||
this._env(fields.AZURE_AI_SEARCH_CASES_INDEX_NAME, process.env.AZURE_AI_SEARCH_CASES_INDEX_NAME) ||
this._env(fields.AZURE_AI_SEARCH_INDEX_NAME, process.env.AZURE_AI_SEARCH_INDEX_NAME);
if (!this.serviceEndpoint || !this.apiKey || !this.indexName) {
throw new Error(
'Missing Azure AI Search envs: AZURE_AI_SEARCH_SERVICE_ENDPOINT, AZURE_AI_SEARCH_API_KEY, and Cases index (AZURE_AI_SEARCH_CASES_INDEX or AZURE_AI_SEARCH_INDEX_NAME).',
);
}
// Optional API version
this.apiVersion = this._env(
fields.AZURE_AI_SEARCH_API_VERSION,
process.env.AZURE_AI_SEARCH_API_VERSION || WoodlandAISearchCases.DEFAULT_API_VERSION,
);
// Defaults
this.top = WoodlandAISearchCases.DEFAULT_TOP;
this.select = WoodlandAISearchCases.DEFAULT_SELECT.split(',').map((s) => s.trim());
// Semantic/search options
this.searchFields = (() => {
// Prefer cases-specific override, else global
const v =
this._env(
fields.AZURE_AI_SEARCH_CASES_SEARCH_FIELDS,
process.env.AZURE_AI_SEARCH_CASES_SEARCH_FIELDS,
) || this._env(fields.AZURE_AI_SEARCH_SEARCH_FIELDS, process.env.AZURE_AI_SEARCH_SEARCH_FIELDS);
if (v) return String(v).split(',').map((s) => s.trim()).filter(Boolean);
// Generic defaults suitable for Q&A corpora; avoid page_type
return ['title', 'content', 'summary', 'tags', 'keywords', 'category', 'question', 'answer'];
})();
this.semanticConfiguration = this._env(
fields.AZURE_AI_SEARCH_SEMANTIC_CONFIGURATION,
process.env.AZURE_AI_SEARCH_SEMANTIC_CONFIGURATION || 'sem1',
);
this.queryLanguage = this._env(
fields.AZURE_AI_SEARCH_QUERY_LANGUAGE,
process.env.AZURE_AI_SEARCH_QUERY_LANGUAGE || 'en-us',
);
this.scoringProfile = this._env(
fields.AZURE_AI_SEARCH_SCORING_PROFILE,
process.env.AZURE_AI_SEARCH_SCORING_PROFILE,
);
this.returnAllFields = String(
this._env(
fields.AZURE_AI_SEARCH_RETURN_ALL_FIELDS,
process.env.AZURE_AI_SEARCH_RETURN_ALL_FIELDS || 'true',
),
)
.toLowerCase()
.trim() === 'true';
// Client
const credential = new AzureKeyCredential(this.apiKey);
this.client = new SearchClient(this.serviceEndpoint, this.indexName, credential, {
apiVersion: this.apiVersion,
});
logger.info('[woodland-ai-search-cases] Initialized', {
endpoint: this.serviceEndpoint,
apiVersion: this.apiVersion,
index: this.indexName,
select: this.select,
searchFields: this.searchFields,
semanticConfiguration: this.semanticConfiguration,
queryLanguage: this.queryLanguage,
scoringProfile: this.scoringProfile,
});
}
_sanitizeSearchOptions(opts) {
const clean = { ...opts };
const asStr = (v) => (typeof v === 'string' ? v.toLowerCase() : undefined);
const answers = asStr(clean.answers);
if (answers !== 'extractive' && answers !== 'none') delete clean.answers;
const captions = asStr(clean.captions);
if (captions !== 'extractive' && captions !== 'none') delete clean.captions;
const speller = asStr(clean.speller);
if (speller !== 'lexicon' && speller !== 'simple' && speller !== 'none') delete clean.speller;
return clean;
}
async _safeSearch(query, options) {
const run = async (opts) => {
const send = this._sanitizeSearchOptions(opts);
logger.debug('[woodland-ai-search-cases] Sending request', {
query,
options: JSON.stringify(send, null, 2),
});
const rs = await this.client.search(query, send);
const items = [];
for await (const r of rs.results) items.push(r.document);
logger.debug('[woodland-ai-search-cases] Received response', {
count: items.length,
sample: items.slice(0, 2),
});
return items;
};
let attempt = 0;
let opts = { ...options };
let lastErr;
let droppedSearchFields = false;
while (attempt < 3) {
try {
const docs = await run(opts);
return { docs, retried: attempt > 0 };
} catch (err) {
lastErr = err;
attempt += 1;
const msg = (err && (err.message || String(err))) || '';
logger.warn('[woodland-ai-search-cases] Search failed', { attempt, msg });
const sanitized = { ...opts };
let changed = false;
if (/orderby/i.test(msg) && String(sanitized.queryType).toLowerCase() === 'semantic') {
if (sanitized.orderBy) {
delete sanitized.orderBy;
changed = true;
logger.info('[woodland-ai-search-cases] Removing orderBy for semantic query and retrying');
}
}
const unknownFieldRegex = /Unknown field '([^']+)'/gi;
const toRemove = [];
let m;
while ((m = unknownFieldRegex.exec(msg)) !== null) {
const fld = (m[1] || '').trim();
if (fld) toRemove.push(fld);
}
if (toRemove.length > 0) {
if (Array.isArray(sanitized.select)) {
const before = sanitized.select.length;
sanitized.select = sanitized.select.filter((f) => !toRemove.includes(f));
if (sanitized.select.length === 0) delete sanitized.select;
if (sanitized.select?.length !== before) changed = true;
}
if (Array.isArray(sanitized.searchFields)) {
const before = sanitized.searchFields.length;
sanitized.searchFields = sanitized.searchFields.filter((f) => !toRemove.includes(f));
if (sanitized.searchFields.length === 0) delete sanitized.searchFields;
if (sanitized.searchFields?.length !== before) changed = true;
}
if (!/search field list|select/i.test(msg)) {
if (sanitized.filter) {
delete sanitized.filter;
changed = true;
logger.info('[woodland-ai-search-cases] Dropping filter due to unknown fields and retrying');
}
if (sanitized.orderBy) {
delete sanitized.orderBy;
changed = true;
}
}
}
if (!changed && !droppedSearchFields && sanitized.searchFields) {
delete sanitized.searchFields;
droppedSearchFields = true;
changed = true;
logger.info('[woodland-ai-search-cases] Dropping searchFields entirely and retrying');
}
if (!changed) break;
opts = sanitized;
}
}
throw lastErr;
}
async _call(data) {
const { query, top: topIn } = data;
const finalTop = typeof topIn === 'number' && Number.isFinite(topIn) ? Math.max(1, Math.floor(topIn)) : this.top;
try {
const inferredMode = (() => {
const q = (query || '').toString();
if (/".+"/.test(q) || /\b(AND|OR|NOT)\b/i.test(q)) return 'all';
return 'any';
})();
const options = {
queryType: 'semantic',
searchMode: inferredMode,
top: finalTop,
semanticSearchOptions: {
configurationName: this.semanticConfiguration,
queryLanguage: this.queryLanguage,
},
answers: 'extractive',
captions: 'extractive',
speller: 'lexicon',
};
if (!this.returnAllFields) {
options.select = this.select;
}
if (this.scoringProfile) options.scoringProfile = this.scoringProfile;
// Ensure orderBy removed for semantic ranking
if (options.orderBy) delete options.orderBy;
const docs = await this._safeSearch(query, options);
let payload = docs.docs || [];
if (Array.isArray(payload)) {
payload = payload.map((d) => (d ? this._normalizeDoc(d) : d));
}
logger.info('[woodland-ai-search-cases] Query done', { count: Array.isArray(payload) ? payload.length : 0 });
return JSON.stringify(payload);
} catch (error) {
logger.error('[woodland-ai-search-cases] Azure AI Search request failed', {
error: error?.message || String(error),
});
const msg = (error && (error.message || String(error))) || 'Unknown error';
return `AZURE_SEARCH_FAILED: ${msg}`;
}
}
}
module.exports = WoodlandAISearchCases;

View file

@ -0,0 +1,109 @@
// woodland-ai-search-general.js (grounded two-phase aggregator)
const { z } = require('zod');
const { Tool } = require('@langchain/core/tools');
const { logger } = require('~/config');
const WoodlandAISearch = require('./WoodlandAISearch');
const WoodlandAISearchCases = require('./WoodlandAISearchCases');
class WoodlandAISearchGeneral extends Tool {
static DEFAULT_TOP = 9;
static DEFAULT_MIN_HITS = 3;
constructor(fields = {}) {
super();
this.name = 'woodland-ai-search-general';
this.description = "Grounded general Woodland search: query 'woodland-ai-search' first; if results are weak, fall back to 'woodland-ai-search-cases'.";
this.schema = z.object({
query: z.string().describe('Search phrase for Woodland General (Grounded)'),
top: z.number().int().positive().optional(),
minHits: z.number().int().positive().optional(),
perToolTop: z.number().int().positive().optional(),
});
this.minHits = Number(fields.minHits || process.env.WOODLAND_GENERAL_MIN_HITS || WoodlandAISearchGeneral.DEFAULT_MIN_HITS);
}
_keyOf(d) {
return (
(typeof d?.url === 'string' && d.url) ||
(typeof d?.website_url_primary === 'string' && d.website_url_primary) ||
d?.id ||
d?.record_id ||
d?.key ||
JSON.stringify(d)
);
}
_parseDocs(res) {
try {
if (typeof res === 'string') {
if (res.startsWith('AZURE_SEARCH_FAILED')) return [];
return JSON.parse(res);
}
return Array.isArray(res) ? res : [];
} catch (_) {
return [];
}
}
async _call(data) {
const { query, top: topIn, minHits: minIn, perToolTop: perToolTopIn } = data;
const finalTop = typeof topIn === 'number' && Number.isFinite(topIn) ? Math.max(1, Math.floor(topIn)) : WoodlandAISearchGeneral.DEFAULT_TOP;
const perToolTop = typeof perToolTopIn === 'number' && Number.isFinite(perToolTopIn) ? Math.max(1, Math.floor(perToolTopIn)) : finalTop;
const minHits = typeof minIn === 'number' && Number.isFinite(minIn) ? Math.max(1, Math.floor(minIn)) : this.minHits;
try {
// Phase 1: primary multi-index search
const primary = new WoodlandAISearch();
const primaryRaw = await primary._call({ query, top: perToolTop });
const primaryDocs = this._parseDocs(primaryRaw);
logger.info('[woodland-ai-search-general] Primary woodland hits', { count: primaryDocs.length });
if ((primaryDocs?.length || 0) >= minHits) {
// Return best N uniques from woodland only
const out = [];
const seen = new Set();
for (const d of primaryDocs) {
const k = this._keyOf(d);
if (seen.has(k)) continue;
seen.add(k);
out.push({ ...d, source_tool: d?.source_tool || 'woodland-ai-search' });
if (out.length >= finalTop) break;
}
return JSON.stringify(out);
}
// Phase 2: fallback to cases (exactly once)
const cases = new WoodlandAISearchCases();
const casesRaw = await cases._call({ query, top: perToolTop });
const casesDocs = this._parseDocs(casesRaw);
logger.info('[woodland-ai-search-general] Fallback cases hits', { count: casesDocs.length });
// Merge woodland -> cases with dedupe
const merged = [];
const seen = new Set();
for (const src of [primaryDocs, casesDocs]) {
for (const d of src) {
const k = this._keyOf(d);
if (seen.has(k)) continue;
seen.add(k);
merged.push({ ...d, source_tool: d?.source_tool || (src === primaryDocs ? 'woodland-ai-search' : 'woodland-ai-search-cases') });
if (merged.length >= finalTop) break;
}
if (merged.length >= finalTop) break;
}
return JSON.stringify(merged);
} catch (error) {
logger.error('[woodland-ai-search-general] Failed', { error: error?.message || String(error) });
return `AZURE_SEARCH_FAILED: ${error?.message || String(error)}`;
}
}
}
module.exports = WoodlandAISearchGeneral;

View file

@ -0,0 +1,494 @@
// woodland-ai-search-tractor.js (single-index)
const { z } = require('zod');
const { Tool } = require('@langchain/core/tools');
const { SearchClient, AzureKeyCredential } = require('@azure/search-documents');
const { logger } = require('~/config');
class WoodlandAISearchTractor extends Tool {
static DEFAULT_API_VERSION = '2024-07-01';
static DEFAULT_TOP = 9;
static DEFAULT_SELECT = 'id,title,content,url';
_env(v, fallback) {
return v ?? fallback;
}
constructor(fields = {}) {
super();
this.name = 'woodland-ai-search-tractor';
this.description = "Use the 'woodland-ai-search-tractor' tool to retrieve search results from the Tractor Azure AI Search index";
this.schema = z.object({
query: z.string().describe('Search word or phrase for Tractor Azure AI Search'),
top: z.number().int().positive().optional(),
});
// Shared endpoint + key
this.serviceEndpoint = this._env(
fields.AZURE_AI_SEARCH_SERVICE_ENDPOINT,
process.env.AZURE_AI_SEARCH_SERVICE_ENDPOINT,
);
this.apiKey = this._env(fields.AZURE_AI_SEARCH_API_KEY, process.env.AZURE_AI_SEARCH_API_KEY);
// Single Tractor index name (supports multiple possible env names, falls back to generic index name)
this.indexName =
this._env(fields.AZURE_AI_SEARCH_TRACTOR_INDEX, process.env.AZURE_AI_SEARCH_TRACTOR_INDEX) ||
this._env(fields.AZURE_AI_SEARCH_TRACTOR_INDEX_NAME, process.env.AZURE_AI_SEARCH_TRACTOR_INDEX_NAME) ||
this._env(fields.AZURE_AI_SEARCH_INDEX_NAME, process.env.AZURE_AI_SEARCH_INDEX_NAME);
if (!this.serviceEndpoint || !this.apiKey || !this.indexName) {
throw new Error(
'Missing Azure AI Search envs: AZURE_AI_SEARCH_SERVICE_ENDPOINT, AZURE_AI_SEARCH_API_KEY, and Tractor index (AZURE_AI_SEARCH_TRACTOR_INDEX or AZURE_AI_SEARCH_INDEX_NAME).',
);
}
// Optional API version
this.apiVersion = this._env(
fields.AZURE_AI_SEARCH_API_VERSION,
process.env.AZURE_AI_SEARCH_API_VERSION || WoodlandAISearchTractor.DEFAULT_API_VERSION,
);
// Defaults
this.top = WoodlandAISearchTractor.DEFAULT_TOP;
this.select = WoodlandAISearchTractor.DEFAULT_SELECT.split(',').map((s) => s.trim());
// Search/semantic options
this.searchFields = (() => {
// Prefer tractor-specific override, else global override
const v =
this._env(
fields.AZURE_AI_SEARCH_TRACTOR_SEARCH_FIELDS,
process.env.AZURE_AI_SEARCH_TRACTOR_SEARCH_FIELDS,
) || this._env(fields.AZURE_AI_SEARCH_SEARCH_FIELDS, process.env.AZURE_AI_SEARCH_SEARCH_FIELDS);
if (v) return String(v).split(',').map((s) => s.trim()).filter(Boolean);
// Keep to known searchable fields in the Tractors index
return ['title', 'content', 'mda_instructions', 'hitch_instructions'];
})();
this.semanticConfiguration = this._env(
fields.AZURE_AI_SEARCH_SEMANTIC_CONFIGURATION,
process.env.AZURE_AI_SEARCH_SEMANTIC_CONFIGURATION || 'sem1',
);
this.queryLanguage = this._env(
fields.AZURE_AI_SEARCH_QUERY_LANGUAGE,
process.env.AZURE_AI_SEARCH_QUERY_LANGUAGE || 'en-us',
);
this.scoringProfile = this._env(
fields.AZURE_AI_SEARCH_SCORING_PROFILE,
process.env.AZURE_AI_SEARCH_SCORING_PROFILE,
);
this.returnAllFields = String(
this._env(
fields.AZURE_AI_SEARCH_RETURN_ALL_FIELDS,
process.env.AZURE_AI_SEARCH_RETURN_ALL_FIELDS || 'true',
),
)
.toLowerCase()
.trim() === 'true';
// Initialize SearchClient
const credential = new AzureKeyCredential(this.apiKey);
this.client = new SearchClient(this.serviceEndpoint, this.indexName, credential, {
apiVersion: this.apiVersion,
});
logger.info('[woodland-ai-search-tractor] Initialized', {
endpoint: this.serviceEndpoint,
apiVersion: this.apiVersion,
index: this.indexName,
select: this.select,
searchFields: this.searchFields,
semanticConfiguration: this.semanticConfiguration,
queryLanguage: this.queryLanguage,
scoringProfile: this.scoringProfile,
});
}
_keyOf(d) {
return d?.url || d?.id || d?.record_id || d?.key || JSON.stringify(d);
}
_andFilter(a, b) {
if (!a && !b) return undefined;
if (!a) return b;
if (!b) return a;
return `(${a}) and (${b})`;
}
_escapeLiteral(v) {
return String(v).replace(/'/g, "''");
}
_provenance(d) {
try {
const url = (typeof d?.url === 'string' && d.url) || '';
const host = url ? new URL(url).hostname : undefined;
return { url: url || undefined, host, site: d?.site, page_type: d?.page_type };
} catch (_) {
return { site: d?.site, page_type: d?.page_type };
}
}
/**
* Normalize tractor compatibility-related fields for downstream rendering.
* Does not change original values; attaches a new `normalized_compat` object.
*/
_extractCompatFromText(text, tags) {
try {
const out = new Set();
const addMany = (arr) => arr.forEach((s) => {
const v = String(s).trim();
if (v) out.add(v);
});
const t = (text || '').toString();
// Common patterns: "compatible with X, Y and Z", "fits: X; Y; Z", "models: X, Y"
const patterns = [
/compatible\s+with\s*[:\-]?\s*([^\n\.]+)/gi,
/fits\s*[:\-]?\s*([^\n\.]+)/gi,
/models?\s*[:\-]?\s*([^\n\.]+)/gi,
/supported\s+models?\s*[:\-]?\s*([^\n\.]+)/gi,
];
for (const re of patterns) {
let m;
while ((m = re.exec(t)) !== null) {
const list = (m[1] || '')
.replace(/\band\b/gi, ',')
.split(/[;,]/)
.map((s) => s.trim())
.filter(Boolean);
addMany(list);
}
}
if (Array.isArray(tags)) {
// Heuristic: tags that look like model/series names (contain letters/numbers/dashes)
const tagModels = tags
.map((x) => String(x).trim())
.filter((x) => /[A-Za-z0-9]/.test(x) && x.length <= 40);
addMany(tagModels);
}
return out.size ? Array.from(out) : undefined;
} catch (_) {
return undefined;
}
}
_normalizeDoc(d) {
const bool = (v) => (typeof v === 'boolean' ? v : undefined);
const str = (v) => (v == null ? undefined : String(v));
const list = (v) => (Array.isArray(v) ? v.filter(Boolean).map(String) : undefined);
const tractor = [d?.tractor_make, d?.tractor_model, d?.tractor_deck_size]
.filter((x) => x != null && String(x).trim().length > 0)
.join(' ')
.trim() || undefined;
const normalized = {
tractor, // e.g., "AMF 836 36"
kit_or_assembly: str(d?.title) || str(d?.group_name),
deck_opening_measurements_required: bool(d?.need_deck_open_measurements),
mda_pre_cut: bool(d?.is_boot_pre_cut),
customer_drilling_required: bool(d?.need_to_drill_deck),
exhaust_deflection_needed: bool(d?.need_to_deflect_mower),
compatible_with_large_rakes: bool(d?.can_connect_to_large_rakes),
aftermarket: {
mda: str(d?.ammda_sku) || str(d?.mda_sku),
hitch: str(d?.amhitch_sku) || str(d?.hitch_sku),
hose: str(d?.amhose_sku) || str(d?.hose_sku),
upgrade_hose: str(d?.amupgradehose_sku) || str(d?.upgradehose_sku),
rubber_collar: str(d?.rubbercollar_sku),
},
compatible_with:
list(d?.compatible_models) ||
list(d?.compatible_series) ||
this._extractCompatFromText(d?.content, d?.tags) ||
undefined,
notes: str(d?.content),
picture_thumbnail_url: str(d?.picture_thumbnail_url),
tags: list(d?.tags),
provenance: this._provenance(d),
};
return { ...d, normalized_compat: normalized };
}
_sanitizeSearchOptions(opts) {
const clean = { ...opts };
const asStr = (v) => (typeof v === 'string' ? v.toLowerCase() : undefined);
const answers = asStr(clean.answers);
if (answers !== 'extractive' && answers !== 'none') delete clean.answers;
const captions = asStr(clean.captions);
if (captions !== 'extractive' && captions !== 'none') delete clean.captions;
const speller = asStr(clean.speller);
if (speller !== 'lexicon' && speller !== 'simple' && speller !== 'none') delete clean.speller;
return clean;
}
async _safeSearch(query, options) {
const run = async (opts) => {
const send = this._sanitizeSearchOptions(opts);
logger.debug('[woodland-ai-search-tractor] Sending request', {
query,
options: JSON.stringify(send, null, 2),
});
const rs = await this.client.search(query, send);
const items = [];
for await (const r of rs.results) items.push(r.document);
logger.debug('[woodland-ai-search-tractor] Received response', {
count: items.length,
sample: items.slice(0, 2),
});
return items;
};
let attempt = 0;
let opts = { ...options };
let lastErr;
let droppedSearchFields = false;
while (attempt < 3) {
try {
const docs = await run(opts);
return { docs, retried: attempt > 0 };
} catch (err) {
lastErr = err;
attempt += 1;
const msg = (err && (err.message || String(err))) || '';
logger.warn('[woodland-ai-search-tractor] Search failed', { attempt, msg });
const sanitized = { ...opts };
let changed = false;
// Remove orderBy for semantic queries (Azure restriction)
if (/orderby/i.test(msg) && String(sanitized.queryType).toLowerCase() === 'semantic') {
if (sanitized.orderBy) {
delete sanitized.orderBy;
changed = true;
logger.info('[woodland-ai-search-tractor] Removing orderBy for semantic query and retrying');
}
}
// Strip unknown fields from select/searchFields; drop filter if unknown field appears there
const unknownFieldRegex = /Unknown field '([^']+)'/gi;
const toRemove = [];
let m;
while ((m = unknownFieldRegex.exec(msg)) !== null) {
const fld = (m[1] || '').trim();
if (fld) toRemove.push(fld);
}
if (toRemove.length > 0) {
if (Array.isArray(sanitized.select)) {
const before = sanitized.select.length;
sanitized.select = sanitized.select.filter((f) => !toRemove.includes(f));
if (sanitized.select.length === 0) delete sanitized.select;
if (sanitized.select?.length !== before) changed = true;
}
if (Array.isArray(sanitized.searchFields)) {
const before = sanitized.searchFields.length;
sanitized.searchFields = sanitized.searchFields.filter((f) => !toRemove.includes(f));
if (sanitized.searchFields.length === 0) delete sanitized.searchFields;
if (sanitized.searchFields?.length !== before) changed = true;
}
if (!/search field list|select/i.test(msg)) {
if (sanitized.filter) {
delete sanitized.filter;
changed = true;
logger.info('[woodland-ai-search-tractor] Dropping filter due to unknown fields and retrying');
}
if (sanitized.orderBy) {
delete sanitized.orderBy;
changed = true;
}
}
}
if (!changed && !droppedSearchFields && sanitized.searchFields) {
delete sanitized.searchFields;
droppedSearchFields = true;
changed = true;
logger.info('[woodland-ai-search-tractor] Dropping searchFields entirely and retrying');
}
if (!changed) break;
opts = sanitized;
}
}
throw lastErr;
}
async _tieredSearch(query, baseOptions) {
const r = await this._safeSearch(query, baseOptions);
return r.docs ?? [];
}
// Intent and entity detection (lightweight heuristics)
_detectIntent(query) {
const q = (query || '').toString().toLowerCase();
const containsAny = (arr) => arr.some((w) => q.includes(w));
const yearRegex = /\b(19|20)\d{2}\b/;
const partRegex = /\b\d{2}-[a-z0-9]{2}-[a-z0-9]{3,}\b/i;
const partMatch = q.match(partRegex);
const extracted = {};
if (partMatch) extracted.partNumber = partMatch[0];
const partTypes = ['collector bag', 'impeller', 'hose', 'recoil starter', 'starter', 'boot plate', 'side tube'];
for (const t of partTypes) {
if (q.includes(t)) {
extracted.partType = t === 'starter' ? 'recoil starter' : t;
break;
}
}
if (q.includes('commercial pro')) extracted.family = 'Commercial PRO';
else if (q.includes('commander pro') || q.includes('commander')) extracted.family = 'Commander Pro';
else if (q.includes('standard complete platinum') || q.includes('platinum')) extracted.family = 'Standard Complete Platinum';
else if (q.includes('classic')) extracted.family = 'Classic';
if (containsAny(['promotion', 'sale', 'discount', 'coupon', 'financing'])) extracted.wantsPromo = true;
if (
partMatch ||
containsAny(['part', 'replacement', 'buy', 'order', 'sku', 'view/buy', 'add to cart', 'price', 'bag', 'hose', 'clamp'])
) {
return { intent: 'parts', extracted };
}
if (containsAny(['engine', 'fit', 'fits', 'fitment', 'compatible', 'compatibility', 'which engine', 'used in']) || yearRegex.test(q)) {
return { intent: 'compatibility', extracted };
}
if (containsAny(['how to', 'install', 'installation', 'guide', 'manual', 'troubleshoot', 'troubleshooting', 'winterization', 'sop'])) {
return { intent: 'sop', extracted };
}
if (containsAny(['compare', 'benefits', 'why choose', 'financing', 'promotion', 'warranty'])) {
return { intent: 'marketing', extracted };
}
return { intent: 'general', extracted };
}
// Per-intent options for the single Tractor index.
_optionsForIntent(intent, extracted = {}) {
const maybe = (o, sel) => (this.returnAllFields ? o : { ...o, select: sel });
const pn = extracted.partNumber || '';
const skuFields = [
'mda_sku',
'ammda_sku',
'hitch_sku',
'amhitch_sku',
'rubbercollar_sku',
'hose_sku',
'amhose_sku',
'upgradehose_sku',
'amupgradehose_sku',
];
const baseSelect = [
'id',
'title',
'content',
'tractor_make',
'tractor_model',
'tractor_deck_size',
'group_name',
'is_active',
'mda_sku',
'ammda_sku',
'hitch_sku',
'amhitch_sku',
'rubbercollar_sku',
'hose_sku',
'amhose_sku',
'upgradehose_sku',
'amupgradehose_sku',
'is_boot_pre_cut',
'can_connect_to_large_rakes',
'need_to_drill_deck',
'need_to_deflect_mower',
'need_deck_open_measurements',
'category',
'picture_thumbnail_url',
'tags',
'mda_instructions',
'hitch_instructions',
];
if (intent === 'parts') {
let filter;
if (pn) {
const eqs = skuFields.map((f) => `${f} eq '${this._escapeLiteral(pn)}'`).join(' or ');
filter = eqs || undefined;
}
return maybe({ filter, searchFields: this.searchFields }, baseSelect);
}
if (intent === 'compatibility') {
// Optionally, we could filter by group_name if a family-like term was extracted
let filter;
if (extracted.family) {
filter = `group_name eq '${this._escapeLiteral(extracted.family)}'`;
}
return maybe({ filter, searchFields: this.searchFields }, baseSelect);
}
if (intent === 'sop') {
// Prioritize instructional text
return maybe({ filter: undefined, searchFields: this.searchFields }, baseSelect);
}
if (intent === 'marketing') {
return maybe({ filter: undefined, searchFields: this.searchFields }, baseSelect);
}
// general
return maybe({ filter: undefined, searchFields: this.searchFields }, baseSelect);
}
async _call(data) {
const { query, top: topIn } = data;
const finalTop = typeof topIn === 'number' && Number.isFinite(topIn) ? Math.max(1, Math.floor(topIn)) : this.top;
try {
const inferredMode = (() => {
const q = (query || '').toString();
if (/".+"/.test(q) || /\b(AND|OR|NOT)\b/i.test(q)) return 'all';
return 'any';
})();
const baseOptions = {
queryType: 'semantic',
searchMode: inferredMode,
top: finalTop,
semanticSearchOptions: {
configurationName: this.semanticConfiguration,
queryLanguage: this.queryLanguage,
},
answers: 'extractive',
captions: 'extractive',
speller: 'lexicon',
select: this.returnAllFields ? undefined : this.select,
};
if (this.scoringProfile) baseOptions.scoringProfile = this.scoringProfile;
const { intent, extracted } = this._detectIntent(query);
const intentOptions = this._optionsForIntent(intent, extracted);
const options = { ...baseOptions, ...intentOptions };
// orderBy not supported with semantic ranking
if (String(options.queryType).toLowerCase() === 'semantic' && options.orderBy) {
delete options.orderBy;
}
let docs = await this._tieredSearch(query, options);
// Attach normalized compatibility projection and provenance to each doc
if (Array.isArray(docs)) {
docs = docs.map((d) => (d ? this._normalizeDoc(d) : d));
}
logger.info('[woodland-ai-search-tractor] Query done', { count: Array.isArray(docs) ? docs.length : 0 });
return JSON.stringify(docs || []);
} catch (error) {
logger.error('[woodland-ai-search-tractor] Azure AI Search request failed', {
error: error?.message || String(error),
});
const msg = (error && (error.message || String(error))) || 'Unknown error';
return `AZURE_SEARCH_FAILED: ${msg}`;
}
}
}
module.exports = WoodlandAISearchTractor;

View file

@ -23,6 +23,10 @@ const {
StructuredSD,
StructuredACS,
StructuredWPPACS,
StructuredWPPACSTractor,
StructuredWPPACSCases,
StructuredWPPACSAll,
StructuredWPPACSGeneral,
TraversaalSearch,
StructuredWolfram,
createYouTubeTools,
@ -171,6 +175,10 @@ const loadTools = async ({
'stable-diffusion': StructuredSD,
'azure-ai-search': StructuredACS,
'woodland-ai-search': StructuredWPPACS,
'woodland-ai-search-tractor': StructuredWPPACSTractor,
'woodland-ai-search-cases': StructuredWPPACSCases,
'woodland-ai-search-all': StructuredWPPACSAll,
'woodland-ai-search-general': StructuredWPPACSGeneral,
traversaal_search: TraversaalSearch,
tavily_search_results_json: TavilySearchResults,
};

View file

@ -1,53 +1,29 @@
// Strict citation builder for Woodland results
// - Uses only URL fields present in the payload (never constructs URLs)
// - Applies allow-list filtering for hosts
// Simple citation builder for Woodland results
// Returns URLs exactly as present in the search document payload.
// Optional: if WOODLAND_CITATIONS_URL_ALLOWLIST is set (comma-separated hosts),
// only URLs whose hostname matches the allowlist (or its subdomains) are returned.
const allowList = new Set([
'airtable.com',
// Base domain covers website and subdomains such as support.cyclonerake.com
'cyclonerake.com',
// Kept for back-compat; optional explicit subdomain entry
'support.cyclonerake.com',
]);
const rawAllow = (process.env.WOODLAND_CITATIONS_URL_ALLOWLIST || '').split(',').map(s => s.trim()).filter(Boolean);
const allowSet = new Set(rawAllow);
function isAllowedUrl(u) {
function isAllowed(u) {
if (allowSet.size === 0) return true; // pass-through when no allowlist configured
try {
const url = new URL(u);
const proto = url.protocol.toLowerCase();
if (proto !== 'http:' && proto !== 'https:') return false;
const host = url.hostname.toLowerCase();
for (const d of allowList) {
if (host === d || host.endsWith('.' + d)) return true;
for (const d of allowSet) {
const dd = d.toLowerCase();
if (host === dd || host.endsWith('.' + dd)) return true;
}
return false;
} catch (_) {
return false;
}
}
// Extracts the first allowed URL found in a block of text
function extractAllowedUrl(text) {
if (typeof text !== 'string' || !text) return undefined;
// Basic http/https URL matcher
const urlRegex = /(https?:\/\/[^\s)]+)[)\]\s]?/gi;
let match;
while ((match = urlRegex.exec(text)) !== null) {
const candidate = match[1];
if (isAllowedUrl(candidate)) return candidate;
}
return undefined;
} catch (_) { return false; }
}
function urlFromHit(hit) {
const u = hit?.url;
if (typeof u === 'string' && u && isAllowedUrl(u)) return u;
// Fallback: scan chunk/text/snippet for the first allowed URL
return (
extractAllowedUrl(hit?.chunk) ||
extractAllowedUrl(hit?.text) ||
extractAllowedUrl(hit?.snippet) ||
undefined
);
if (typeof u !== 'string' || !u) return undefined;
if (!isAllowed(u)) return undefined;
return u;
}
function shortSummary(hit) {
@ -102,8 +78,6 @@ function buildCitations({ airtable = [], cyclopedia = [], website = [] }) {
}
module.exports = {
isAllowedUrl,
extractAllowedUrl,
urlFromHit,
shortSummary,
classifySource,

View file

@ -17,7 +17,7 @@ interface:
runCode: false
webSearch: false
fileSearch: false
modelSelect: false
modelSelect: true
sidePanel : true # Enable/disable the side panel (default: false)
# MCP Servers UI configuration
mcpServers:
@ -32,6 +32,11 @@ interface:
# Temporary chat retention period in hours (default: 720, min: 1, max: 8760)
# temporaryChatRetention: 1
# Limit the selector to only show the Agents endpoint
#modelSpecs:
#addedEndpoints:
#- agents
# Example Cloudflare turnstile (optional)
#turnstile:
# siteKey: "your-site-key-here"
@ -149,6 +154,7 @@ endpoints:
disableBuilder: false
# Limit global Agent capabilities; individual Agents can still narrow these down
capabilities: ["actions", "tools"]
minRelevanceScore: 0.7
# fileConfig:
# endpoints:
@ -182,7 +188,7 @@ endpoints:
# Memory configuration for user memories
memory:
disabled: false
disabled: true
validKeys: ["preferences", "work_info", "personal_info", "skills", "interests", "context"]
tokenLimit: 10000
personalize: true