mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-22 08:12:00 +02:00
new enhacements
This commit is contained in:
parent
cfe7a6a328
commit
94ed09d087
11 changed files with 1582 additions and 205 deletions
4
.env
4
.env
|
@ -207,7 +207,9 @@ AZURE_AI_SEARCH_SEARCH_OPTION_QUERY_TYPE=semantic
|
|||
AZURE_AI_SEARCH_AIRTABLE_INDEX=wpp-knowledge-dev-airtable
|
||||
AZURE_AI_SEARCH_WEBSITE_INDEX=wpp-knowledge-dev-website
|
||||
AZURE_AI_SEARCH_CYCLOPEDIA_INDEX=wpp-knowledge-dev-cyclopedia
|
||||
|
||||
AZURE_AI_SEARCH_CATALOG_INDEX = wpp-knowledge-dev-catalog # (optional; for product catalog data)
|
||||
AZURE_AI_SEARCH_TRACTOR_INDEX = wpp-knowledge-dev-tractors # (optional; for tractor data)
|
||||
AZURE_AI_SEARCH_CASES_INDEX = wpp-knowledge-dev-cases # (optional; for customer case data)
|
||||
#==================================================#
|
||||
# Search #
|
||||
#==================================================#
|
||||
|
|
|
@ -13,6 +13,10 @@ const TraversaalSearch = require('./structured/TraversaalSearch');
|
|||
const createOpenAIImageTools = require('./structured/OpenAIImageTools');
|
||||
const TavilySearchResults = require('./structured/TavilySearchResults');
|
||||
const StructuredWPPACS = require('./structured/WoodlandAISearch');
|
||||
const StructuredWPPACSTractor = require('./structured/WoodlandAISearchTractor');
|
||||
const StructuredWPPACSCases = require('./structured/WoodlandAISearchCases');
|
||||
const StructuredWPPACSAll = require('./structured/WoodlandAISearchAll');
|
||||
const StructuredWPPACSGeneral = require('./structured/WoodlandAISearchGeneral');
|
||||
|
||||
module.exports = {
|
||||
...manifest,
|
||||
|
@ -28,5 +32,9 @@ module.exports = {
|
|||
createYouTubeTools,
|
||||
TavilySearchResults,
|
||||
createOpenAIImageTools,
|
||||
StructuredWPPACS
|
||||
StructuredWPPACS,
|
||||
StructuredWPPACSTractor,
|
||||
StructuredWPPACSCases,
|
||||
StructuredWPPACSAll
|
||||
,StructuredWPPACSGeneral
|
||||
};
|
||||
|
|
|
@ -193,27 +193,164 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Woodland Search",
|
||||
"pluginKey": "woodland-ai-search",
|
||||
"description": "Prioritized Azure AI Search (Airtable → Cyclopedia → Website) for Woodland.",
|
||||
"icon": "https://i.imgur.com/E7crPze.png",
|
||||
"authConfig": [
|
||||
{
|
||||
"name": "Woodland Search",
|
||||
"pluginKey": "woodland-ai-search",
|
||||
"description": "Azure AI Search (Catalog → Website → Cyclopedia) for Woodland support.",
|
||||
"icon": "https://i.imgur.com/E7crPze.png",
|
||||
"authConfig": [
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_SERVICE_ENDPOINT",
|
||||
"label": "Azure AI Search Endpoint",
|
||||
"description": "You need to provide your Endpoint for Azure AI Search."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_INDEX_NAME",
|
||||
"label": "Azure AI Search Index Name",
|
||||
"description": "You need to provide your Index Name for Azure AI Search."
|
||||
"description": "Your Azure Cognitive Search service endpoint (e.g., https://<service>.search.windows.net)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_API_KEY",
|
||||
"label": "Azure AI Search API Key",
|
||||
"description": "You need to provide your API Key for Azure AI Search."
|
||||
"description": "Admin or query API key for your Azure Search service."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_CATALOG_INDEX",
|
||||
"label": "Catalog Index Name",
|
||||
"description": "Index name for the product/parts catalog (e.g., <base>-catalog)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_WEBSITE_INDEX",
|
||||
"label": "Website Index Name",
|
||||
"description": "Index name for website content (e.g., <base>-website)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_CYCLOPEDIA_INDEX",
|
||||
"label": "Cyclopedia Index Name",
|
||||
"description": "Index name for cyclopedia/procedure content (e.g., <base>-cyclopedia)."
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Tractor Search",
|
||||
"pluginKey": "woodland-ai-search-tractor",
|
||||
"description": "Azure AI Search for the Tractor index (single-index, semantic results).",
|
||||
"icon": "https://i.imgur.com/E7crPze.png",
|
||||
"authConfig": [
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_SERVICE_ENDPOINT",
|
||||
"label": "Azure AI Search Endpoint",
|
||||
"description": "Your Azure Cognitive Search service endpoint (e.g., https://<service>.search.windows.net)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_API_KEY",
|
||||
"label": "Azure AI Search API Key",
|
||||
"description": "Admin or query API key for your Azure Search service."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_TRACTOR_INDEX||AZURE_AI_SEARCH_INDEX_NAME",
|
||||
"label": "Tractor Index Name",
|
||||
"description": "Index name for the Tractor data (e.g., <base>-tractor). Uses AZURE_AI_SEARCH_INDEX_NAME if not provided."
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Cases Search",
|
||||
"pluginKey": "woodland-ai-search-cases",
|
||||
"description": "Azure AI Search for the Cases/Knowledge index (single-index, semantic answers).",
|
||||
"icon": "https://i.imgur.com/E7crPze.png",
|
||||
"authConfig": [
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_SERVICE_ENDPOINT",
|
||||
"label": "Azure AI Search Endpoint",
|
||||
"description": "Your Azure Cognitive Search service endpoint (e.g., https://<service>.search.windows.net)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_API_KEY",
|
||||
"label": "Azure AI Search API Key",
|
||||
"description": "Admin or query API key for your Azure Search service."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_CASES_INDEX||AZURE_AI_SEARCH_INDEX_NAME",
|
||||
"label": "Cases Index Name",
|
||||
"description": "Index name for the Cases/Knowledge data (e.g., <base>-cases). Uses AZURE_AI_SEARCH_INDEX_NAME if not provided."
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Woodland Search (All)",
|
||||
"pluginKey": "woodland-ai-search-all",
|
||||
"description": "Calls Woodland, Tractor, and Cases tools in parallel and merges results.",
|
||||
"icon": "https://i.imgur.com/E7crPze.png",
|
||||
"authConfig": [
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_SERVICE_ENDPOINT",
|
||||
"label": "Azure AI Search Endpoint",
|
||||
"description": "Your Azure Cognitive Search service endpoint (e.g., https://<service>.search.windows.net)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_API_KEY",
|
||||
"label": "Azure AI Search API Key",
|
||||
"description": "Admin or query API key for your Azure Search service."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_CATALOG_INDEX",
|
||||
"label": "Catalog Index Name",
|
||||
"description": "Index name for product/parts catalog (e.g., <base>-catalog)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_WEBSITE_INDEX",
|
||||
"label": "Website Index Name",
|
||||
"description": "Index name for website content (e.g., <base>-website)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_CYCLOPEDIA_INDEX",
|
||||
"label": "Cyclopedia Index Name",
|
||||
"description": "Index name for cyclopedia content (e.g., <base>-cyclopedia)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_TRACTOR_INDEX||AZURE_AI_SEARCH_INDEX_NAME",
|
||||
"label": "Tractor Index Name",
|
||||
"description": "Index name for tractors data (e.g., <base>-tractors)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_CASES_INDEX||AZURE_AI_SEARCH_INDEX_NAME",
|
||||
"label": "Cases Index Name",
|
||||
"description": "Index name for cases/knowledge data (e.g., <base>-cases)."
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Woodland Search (General)",
|
||||
"pluginKey": "woodland-ai-search-general",
|
||||
"description": "Grounded general search: tries Woodland first, then falls back to Cases if needed.",
|
||||
"icon": "https://i.imgur.com/E7crPze.png",
|
||||
"authConfig": [
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_SERVICE_ENDPOINT",
|
||||
"label": "Azure AI Search Endpoint",
|
||||
"description": "Your Azure Cognitive Search service endpoint (e.g., https://<service>.search.windows.net)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_API_KEY",
|
||||
"label": "Azure AI Search API Key",
|
||||
"description": "Admin or query API key for your Azure Search service."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_CATALOG_INDEX",
|
||||
"label": "Catalog Index Name",
|
||||
"description": "Index name for product/parts catalog (e.g., <base>-catalog)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_WEBSITE_INDEX",
|
||||
"label": "Website Index Name",
|
||||
"description": "Index name for website content (e.g., <base>-website)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_CYCLOPEDIA_INDEX",
|
||||
"label": "Cyclopedia Index Name",
|
||||
"description": "Index name for cyclopedia content (e.g., <base>-cyclopedia)."
|
||||
},
|
||||
{
|
||||
"authField": "AZURE_AI_SEARCH_CASES_INDEX||AZURE_AI_SEARCH_INDEX_NAME",
|
||||
"label": "Cases Index Name",
|
||||
"description": "Index name for cases/knowledge data (e.g., <base>-cases)."
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
|
@ -9,7 +9,7 @@ class WoodlandAISearch extends Tool {
|
|||
static DEFAULT_TOP = 9;
|
||||
// Default select is intentionally minimal; per-intent selects override
|
||||
static DEFAULT_SELECT = 'id,title,content,url';
|
||||
static GROUPS = ['airtable', 'cyclopedia', 'website'];
|
||||
static GROUPS = ['catalog', 'cyclopedia', 'website'];
|
||||
|
||||
_env(v, fallback) {
|
||||
return v ?? fallback;
|
||||
|
@ -34,9 +34,9 @@ class WoodlandAISearch extends Tool {
|
|||
this.apiKey = this._env(fields.AZURE_AI_SEARCH_API_KEY, process.env.AZURE_AI_SEARCH_API_KEY);
|
||||
|
||||
// Per-index names
|
||||
this.airtableIndex = this._env(
|
||||
fields.AZURE_AI_SEARCH_AIRTABLE_INDEX,
|
||||
process.env.AZURE_AI_SEARCH_AIRTABLE_INDEX,
|
||||
this.catalogIndex = this._env(
|
||||
fields.AZURE_AI_SEARCH_CATALOG_INDEX,
|
||||
process.env.AZURE_AI_SEARCH_CATALOG_INDEX,
|
||||
);
|
||||
this.websiteIndex = this._env(
|
||||
fields.AZURE_AI_SEARCH_WEBSITE_INDEX,
|
||||
|
@ -47,9 +47,9 @@ class WoodlandAISearch extends Tool {
|
|||
process.env.AZURE_AI_SEARCH_CYCLOPEDIA_INDEX,
|
||||
);
|
||||
|
||||
if (!this.serviceEndpoint || !this.apiKey || !this.airtableIndex || !this.websiteIndex || !this.cyclopediaIndex) {
|
||||
if (!this.serviceEndpoint || !this.apiKey || !this.catalogIndex || !this.websiteIndex || !this.cyclopediaIndex) {
|
||||
throw new Error(
|
||||
'Missing one or more Azure AI Search envs: AZURE_AI_SEARCH_SERVICE_ENDPOINT, AZURE_AI_SEARCH_API_KEY, AZURE_AI_SEARCH_AIRTABLE_INDEX, AZURE_AI_SEARCH_WEBSITE_INDEX, AZURE_AI_SEARCH_CYCLOPEDIA_INDEX.',
|
||||
'Missing one or more Azure AI Search envs: AZURE_AI_SEARCH_SERVICE_ENDPOINT, AZURE_AI_SEARCH_API_KEY, AZURE_AI_SEARCH_CATALOG_INDEX, AZURE_AI_SEARCH_WEBSITE_INDEX, AZURE_AI_SEARCH_CYCLOPEDIA_INDEX.',
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -69,21 +69,31 @@ class WoodlandAISearch extends Tool {
|
|||
this.defaultSources = (this._env(fields.AZURE_AI_SEARCH_DEFAULT_SOURCES, process.env.AZURE_AI_SEARCH_DEFAULT_SOURCES) || WoodlandAISearch.GROUPS.join(','))
|
||||
.split(',').map(s => s.trim()).filter(Boolean);
|
||||
this.searchFields = (() => {
|
||||
// Prefer configured search fields; otherwise default to high-signal human fields
|
||||
const v = this._env(fields.AZURE_AI_SEARCH_SEARCH_FIELDS, process.env.AZURE_AI_SEARCH_SEARCH_FIELDS);
|
||||
if (!v) return undefined;
|
||||
return String(v).split(',').map(s => s.trim()).filter(Boolean);
|
||||
if (v) return String(v).split(',').map(s => s.trim()).filter(Boolean);
|
||||
return ['title','content','categories','category_paths','promotion_names','part_numbers','sku'];
|
||||
})();
|
||||
// Optional per-index search field overrides
|
||||
const parseList = (v) => (v ? String(v).split(',').map((s) => s.trim()).filter(Boolean) : undefined);
|
||||
this.searchFieldOverrides = {
|
||||
catalog: parseList(this._env(fields.AZURE_AI_SEARCH_CATALOG_SEARCH_FIELDS, process.env.AZURE_AI_SEARCH_CATALOG_SEARCH_FIELDS)),
|
||||
website: parseList(this._env(fields.AZURE_AI_SEARCH_WEBSITE_SEARCH_FIELDS, process.env.AZURE_AI_SEARCH_WEBSITE_SEARCH_FIELDS)),
|
||||
cyclopedia: parseList(this._env(fields.AZURE_AI_SEARCH_CYCLOPEDIA_SEARCH_FIELDS, process.env.AZURE_AI_SEARCH_CYCLOPEDIA_SEARCH_FIELDS)),
|
||||
};
|
||||
this.semanticConfiguration = this._env(fields.AZURE_AI_SEARCH_SEMANTIC_CONFIGURATION, process.env.AZURE_AI_SEARCH_SEMANTIC_CONFIGURATION || 'sem1');
|
||||
this.queryLanguage = this._env(fields.AZURE_AI_SEARCH_QUERY_LANGUAGE, process.env.AZURE_AI_SEARCH_QUERY_LANGUAGE || 'en-us');
|
||||
this.scoringProfile = this._env(fields.AZURE_AI_SEARCH_SCORING_PROFILE, process.env.AZURE_AI_SEARCH_SCORING_PROFILE);
|
||||
this.hardFilter = String(this._env(fields.AZURE_AI_SEARCH_HARD_FILTER, process.env.AZURE_AI_SEARCH_HARD_FILTER || 'true')).toLowerCase() === 'true';
|
||||
// Always return all fields unless explicitly disabled
|
||||
this.returnAllFields = String(this._env(fields.AZURE_AI_SEARCH_RETURN_ALL_FIELDS, process.env.AZURE_AI_SEARCH_RETURN_ALL_FIELDS || 'true')).toLowerCase() === 'true';
|
||||
// Link enrichment to attach website/cyclopedia URLs to catalog hits for citations
|
||||
this.enableLinkEnrichment = String(this._env(fields.AZURE_AI_SEARCH_ENABLE_LINK_ENRICHMENT, process.env.AZURE_AI_SEARCH_ENABLE_LINK_ENRICHMENT || 'true')).toLowerCase() === 'true';
|
||||
|
||||
// Governance / guardrail flags
|
||||
this.enforceReviewedOnly = String(this._env(fields.AZURE_AI_SEARCH_ENFORCE_REVIEWED_ONLY, process.env.AZURE_AI_SEARCH_ENFORCE_REVIEWED_ONLY || 'true')).toLowerCase() === 'true';
|
||||
// Comma-separated domains to allow for Website results (e.g., "www.cyclonerake.com")
|
||||
this.websiteDomainAllowlist = (this._env(fields.AZURE_AI_SEARCH_WEBSITE_DOMAIN_ALLOWLIST, process.env.AZURE_AI_SEARCH_WEBSITE_DOMAIN_ALLOWLIST) || 'www.cyclonerake.com')
|
||||
this.websiteDomainAllowlist = (this._env(fields.AZURE_AI_SEARCH_WEBSITE_DOMAIN_ALLOWLIST, process.env.AZURE_AI_SEARCH_WEBSITE_DOMAIN_ALLOWLIST) || '')
|
||||
.split(',')
|
||||
.map(s => s.trim())
|
||||
.filter(Boolean);
|
||||
|
@ -91,7 +101,7 @@ class WoodlandAISearch extends Tool {
|
|||
// Initialize one SearchClient per index
|
||||
const credential = new AzureKeyCredential(this.apiKey);
|
||||
this.clients = {
|
||||
airtable: new SearchClient(this.serviceEndpoint, this.airtableIndex, credential, { apiVersion: this.apiVersion }),
|
||||
catalog: new SearchClient(this.serviceEndpoint, this.catalogIndex, credential, { apiVersion: this.apiVersion }),
|
||||
website: new SearchClient(this.serviceEndpoint, this.websiteIndex, credential, { apiVersion: this.apiVersion }),
|
||||
cyclopedia: new SearchClient(this.serviceEndpoint, this.cyclopediaIndex, credential, { apiVersion: this.apiVersion }),
|
||||
};
|
||||
|
@ -100,7 +110,7 @@ class WoodlandAISearch extends Tool {
|
|||
endpoint: this.serviceEndpoint,
|
||||
apiVersion: this.apiVersion,
|
||||
indexes: {
|
||||
airtable: this.airtableIndex,
|
||||
catalog: this.catalogIndex,
|
||||
website: this.websiteIndex,
|
||||
cyclopedia: this.cyclopediaIndex,
|
||||
},
|
||||
|
@ -130,13 +140,24 @@ class WoodlandAISearch extends Tool {
|
|||
return `(${a}) and (${b})`;
|
||||
}
|
||||
|
||||
_escapeLiteral(v) {
|
||||
// Escape single quotes for OData literal strings
|
||||
return String(v).replace(/'/g, "''");
|
||||
}
|
||||
|
||||
_withReviewed(filter) {
|
||||
if (!this.enforceReviewedOnly) return filter;
|
||||
// All three indexes expose a boolean 'reviewed' field in our data
|
||||
// Reviewed applies to website/cyclopedia documents; catalog may not carry 'reviewed'
|
||||
const reviewedClause = `reviewed eq true`;
|
||||
return this._andFilter(filter, reviewedClause);
|
||||
}
|
||||
|
||||
_applySearchFields(indexKey, defaults) {
|
||||
const override = this.searchFieldOverrides?.[indexKey];
|
||||
if (Array.isArray(override) && override.length) return override;
|
||||
return defaults;
|
||||
}
|
||||
|
||||
_sanitizeSearchOptions(opts) {
|
||||
const clean = { ...opts };
|
||||
const asStr = (v) => (typeof v === 'string' ? v.toLowerCase() : undefined);
|
||||
|
@ -163,58 +184,77 @@ class WoodlandAISearch extends Tool {
|
|||
return items;
|
||||
};
|
||||
|
||||
try {
|
||||
const docs = await run(options);
|
||||
return { docs, retried: false };
|
||||
} catch (err) {
|
||||
logger.warn('[woodland-ai-search] Initial search failed');
|
||||
const msg = (err && (err.message || String(err))) || '';
|
||||
let attempt = 0;
|
||||
let opts = { ...options };
|
||||
let lastErr;
|
||||
let droppedSearchFields = false;
|
||||
while (attempt < 3) {
|
||||
try {
|
||||
const docs = await run(opts);
|
||||
return { docs, retried: attempt > 0 };
|
||||
} catch (err) {
|
||||
lastErr = err;
|
||||
attempt += 1;
|
||||
logger.warn('[woodland-ai-search] Search failed', { attempt, msg: err?.message || String(err) });
|
||||
const msg = (err && (err.message || String(err))) || '';
|
||||
const sanitized = { ...opts };
|
||||
let changed = false;
|
||||
|
||||
const sanitized = { ...options };
|
||||
let changed = false;
|
||||
// Keep semantic; only adjust unsupported options
|
||||
if (/orderby/i.test(msg) && String(sanitized.queryType).toLowerCase() === 'semantic') {
|
||||
if (sanitized.orderBy) {
|
||||
delete sanitized.orderBy;
|
||||
changed = true;
|
||||
logger.info('[woodland-ai-search] Removing orderBy for semantic query and retrying');
|
||||
}
|
||||
}
|
||||
|
||||
// If orderBy not supported with semantic, remove and retry
|
||||
if (/orderby/i.test(msg) && String(sanitized.queryType).toLowerCase() === 'semantic') {
|
||||
if (sanitized.orderBy) {
|
||||
delete sanitized.orderBy;
|
||||
const unknownFieldRegex = /Unknown field '([^']+)'/g;
|
||||
const toRemove = [];
|
||||
let m;
|
||||
while ((m = unknownFieldRegex.exec(msg)) !== null) {
|
||||
const fld = (m[1] || '').trim();
|
||||
if (fld) toRemove.push(fld);
|
||||
}
|
||||
if (toRemove.length > 0) {
|
||||
if (Array.isArray(sanitized.select)) {
|
||||
const before = sanitized.select.length;
|
||||
sanitized.select = sanitized.select.filter((f) => !toRemove.includes(f));
|
||||
if (sanitized.select.length === 0) delete sanitized.select;
|
||||
if (sanitized.select?.length !== before) changed = true;
|
||||
}
|
||||
if (Array.isArray(sanitized.searchFields)) {
|
||||
const before = sanitized.searchFields.length;
|
||||
sanitized.searchFields = sanitized.searchFields.filter((f) => !toRemove.includes(f));
|
||||
if (sanitized.searchFields.length === 0) delete sanitized.searchFields;
|
||||
if (sanitized.searchFields?.length !== before) changed = true;
|
||||
}
|
||||
if (!/search field list|select/i.test(msg)) {
|
||||
if (sanitized.filter) {
|
||||
delete sanitized.filter;
|
||||
changed = true;
|
||||
logger.info('[woodland-ai-search] Dropping filter due to unknown fields and retrying');
|
||||
}
|
||||
if (sanitized.orderBy) {
|
||||
delete sanitized.orderBy;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
if (changed) logger.info('[woodland-ai-search] Retrying without unknown fields');
|
||||
}
|
||||
// Final fallback: if still failing and searchFields remain, drop them entirely
|
||||
if (!changed && !droppedSearchFields && sanitized.searchFields) {
|
||||
delete sanitized.searchFields;
|
||||
droppedSearchFields = true;
|
||||
changed = true;
|
||||
logger.info('[woodland-ai-search] Removing orderBy for semantic query and retrying');
|
||||
logger.info('[woodland-ai-search] Dropping searchFields entirely and retrying');
|
||||
}
|
||||
}
|
||||
|
||||
// Remove unknown fields from select/searchFields
|
||||
const unknownFieldRegex = /Unknown field '([^']+)'/g;
|
||||
const toRemove = [];
|
||||
let m;
|
||||
while ((m = unknownFieldRegex.exec(msg)) !== null) {
|
||||
const fld = (m[1] || '').trim();
|
||||
if (fld) toRemove.push(fld);
|
||||
if (!changed) break;
|
||||
opts = sanitized;
|
||||
}
|
||||
if (toRemove.length > 0 && /search field list|select/i.test(msg)) {
|
||||
if (Array.isArray(sanitized.select)) {
|
||||
const before = sanitized.select.length;
|
||||
sanitized.select = sanitized.select.filter((f) => !toRemove.includes(f));
|
||||
if (sanitized.select.length === 0) delete sanitized.select;
|
||||
if (sanitized.select?.length !== before) changed = true;
|
||||
}
|
||||
if (Array.isArray(sanitized.searchFields)) {
|
||||
const before = sanitized.searchFields.length;
|
||||
sanitized.searchFields = sanitized.searchFields.filter((f) => !toRemove.includes(f));
|
||||
if (sanitized.searchFields.length === 0) delete sanitized.searchFields;
|
||||
if (sanitized.searchFields?.length !== before) changed = true;
|
||||
}
|
||||
if (changed) {
|
||||
logger.info('[woodland-ai-search] Retrying without unknown fields');
|
||||
}
|
||||
}
|
||||
|
||||
if (!changed) {
|
||||
throw err;
|
||||
}
|
||||
|
||||
const docs = await run(sanitized);
|
||||
return { docs, retried: true };
|
||||
}
|
||||
throw lastErr;
|
||||
}
|
||||
|
||||
// Backwards-compat placeholder: not used in multi-index mode
|
||||
|
@ -228,28 +268,9 @@ class WoodlandAISearch extends Tool {
|
|||
}
|
||||
|
||||
async _tieredSearch(query, baseOptions, client) {
|
||||
// 1) try as-is
|
||||
let r = await this._safeSearch(query, baseOptions, client);
|
||||
if (r.docs?.length) return r.docs;
|
||||
|
||||
// 2) if semantic, retry simple then simple(no searchFields)
|
||||
const toSimple = (opt) => {
|
||||
const o = { ...opt, queryType: 'simple' };
|
||||
delete o.semanticSearchOptions;
|
||||
delete o.semanticConfiguration;
|
||||
delete o.semanticConfigurationName;
|
||||
return o;
|
||||
};
|
||||
if (String(baseOptions.queryType).toLowerCase() === 'semantic') {
|
||||
r = await this._safeSearch(query, toSimple(baseOptions), client);
|
||||
if (r.docs?.length) return r.docs;
|
||||
|
||||
const noFields = toSimple(baseOptions);
|
||||
if (Array.isArray(noFields.searchFields)) delete noFields.searchFields;
|
||||
r = await this._safeSearch(query, noFields, client);
|
||||
if (r.docs?.length) return r.docs;
|
||||
}
|
||||
return [];
|
||||
// Single-pass: always semantic (no downgrade)
|
||||
const r = await this._safeSearch(query, baseOptions, client);
|
||||
return r.docs ?? [];
|
||||
}
|
||||
|
||||
// Run tiered search against a specific index, with per-index options
|
||||
|
@ -260,6 +281,19 @@ class WoodlandAISearch extends Tool {
|
|||
return [];
|
||||
}
|
||||
const docs = await this._tieredSearch(query, options, client);
|
||||
// Verbose logging: print a compact sample of Azure results for this index
|
||||
try {
|
||||
const sample = Array.isArray(docs)
|
||||
? docs.slice(0, 5).map((d) => ({ id: d?.id, title: d?.title, url: d?.url }))
|
||||
: [];
|
||||
logger.info('[woodland-ai-search] Azure results sample', {
|
||||
index: indexName,
|
||||
count: Array.isArray(docs) ? docs.length : 0,
|
||||
sample,
|
||||
});
|
||||
} catch (e) {
|
||||
logger.debug('[woodland-ai-search] Failed to log results sample', { index: indexName, error: e?.message || String(e) });
|
||||
}
|
||||
// Annotate provenance
|
||||
const annotated = (docs || []).map(d => ({ ...d, index: indexName }));
|
||||
logger.info('[woodland-ai-search] Index query done', { index: indexName, docs: annotated.length });
|
||||
|
@ -267,8 +301,8 @@ class WoodlandAISearch extends Tool {
|
|||
}
|
||||
|
||||
// Multi-index search and interleave
|
||||
async _searchAcrossIndexes(query, baseOptions, indexList, perIndexTop, finalTop, perIndexOptionsMap = {}) {
|
||||
logger.info('[woodland-ai-search] Running per-index queries', {
|
||||
async _searchAcrossIndexes(query, baseOptions, indexList, perIndexTop, finalTop, perIndexOptionsMap = {}, intent = 'general') {
|
||||
logger.info('[woodland-ai-search] Running per-index queries (ordered concat)', {
|
||||
indexes: indexList, perIndexTop, finalTop
|
||||
});
|
||||
|
||||
|
@ -296,28 +330,53 @@ class WoodlandAISearch extends Tool {
|
|||
|
||||
const results = await Promise.all(tasks);
|
||||
|
||||
// NOTE: For parts intent, indexList is ordered ['website','airtable','cyclopedia'] to ensure a View/Buy URL appears early in interleaved results.
|
||||
|
||||
// Interleave equally across indexes
|
||||
// Build results ensuring at least some docs from each index (when available),
|
||||
// then fill remaining slots by priority: catalog → cyclopedia → website
|
||||
const out = [];
|
||||
const seen = new Set();
|
||||
for (let i = 0; out.length < finalTop; i++) {
|
||||
let pushedAny = false;
|
||||
for (const { docs } of results) {
|
||||
if (i < docs.length) {
|
||||
const k = this._keyOf(docs[i]);
|
||||
if (!seen.has(k)) {
|
||||
seen.add(k);
|
||||
out.push(docs[i]);
|
||||
pushedAny = true;
|
||||
if (out.length >= finalTop) break;
|
||||
}
|
||||
}
|
||||
|
||||
// Minimum quotas per index to guarantee Cyclopedia presence for SOP
|
||||
const minQuota = (() => {
|
||||
const q = { catalog: 1, cyclopedia: 1, website: 1 };
|
||||
if (intent === 'sop') {
|
||||
q.cyclopedia = Math.min(2, finalTop); // prioritize at least two SOP docs when possible
|
||||
}
|
||||
if (!pushedAny) break;
|
||||
return q;
|
||||
})();
|
||||
|
||||
// 1) Priming pass: satisfy per-index minimum quotas in priority order
|
||||
for (const idx of indexList) {
|
||||
const need = Math.max(0, Math.min(minQuota[idx] || 0, finalTop - out.length));
|
||||
if (need <= 0) continue;
|
||||
const bucket = results.find(r => r.index === idx)?.docs || [];
|
||||
let taken = 0;
|
||||
for (let i = 0; i < bucket.length && taken < need && out.length < finalTop; i++) {
|
||||
const d = bucket[i];
|
||||
const k = this._keyOf(d);
|
||||
if (seen.has(k)) continue;
|
||||
seen.add(k);
|
||||
out.push(d);
|
||||
taken++;
|
||||
}
|
||||
if (out.length >= finalTop) break;
|
||||
}
|
||||
|
||||
logger.debug('[woodland-ai-search] Per-index merged results', {
|
||||
// 2) Fill remaining slots by priority order
|
||||
if (out.length < finalTop) {
|
||||
for (const idx of indexList) {
|
||||
const bucket = results.find(r => r.index === idx)?.docs || [];
|
||||
for (let i = 0; i < bucket.length && out.length < finalTop; i++) {
|
||||
const d = bucket[i];
|
||||
const k = this._keyOf(d);
|
||||
if (seen.has(k)) continue;
|
||||
seen.add(k);
|
||||
out.push(d);
|
||||
}
|
||||
if (out.length >= finalTop) break;
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug('[woodland-ai-search] Per-index merged results (quota+priority)', {
|
||||
total: out.length,
|
||||
breakdown: results.map(r => ({ index: r.index, count: r.docs.length })),
|
||||
});
|
||||
|
@ -325,7 +384,7 @@ class WoodlandAISearch extends Tool {
|
|||
return out;
|
||||
}
|
||||
|
||||
// Enrich results with cross-index links so the agent can always show Website (View/Buy), Airtable, and Cyclopedia URLs in tables
|
||||
// Enrich results with cross-index links so the agent can always show Website (View/Buy) and Cyclopedia URLs in tables
|
||||
_enrichWithLinks(intent, extracted, results) {
|
||||
try {
|
||||
if (intent !== 'parts' || !Array.isArray(results) || results.length === 0) return results;
|
||||
|
@ -346,8 +405,11 @@ class WoodlandAISearch extends Tool {
|
|||
const cands = uniq([...skus, ...mentioned].map(norm));
|
||||
for (const p of cands) {
|
||||
if (!p) continue;
|
||||
// Strict: only use the exact URL returned by search (do not synthesize)
|
||||
const u = typeof d.url === 'string' ? d.url : undefined;
|
||||
if (!u) continue;
|
||||
const arr = websiteByPart.get(p) || [];
|
||||
arr.push(d.url || d.parent_id || d.canonical_product_url);
|
||||
arr.push(u);
|
||||
websiteByPart.set(p, uniq(arr));
|
||||
}
|
||||
} else if (idx === 'cyclopedia') {
|
||||
|
@ -362,37 +424,40 @@ class WoodlandAISearch extends Tool {
|
|||
for (const pRaw of cands) {
|
||||
const p = norm(pRaw);
|
||||
if (!p) continue;
|
||||
// Strict: only use the exact URL returned by search
|
||||
const u = typeof d.url === 'string' ? d.url : undefined;
|
||||
if (!u) continue;
|
||||
const arr = cyclopediaByPart.get(p) || [];
|
||||
arr.push(d.url || d.parent_id);
|
||||
arr.push(u);
|
||||
cyclopediaByPart.set(p, uniq(arr));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Enrich Airtable part docs with website & cyclopedia links
|
||||
// Enrich Catalog part/SKU docs with website & cyclopedia links
|
||||
const out = results.map((d) => {
|
||||
if (d.index !== 'airtable') return d;
|
||||
const pn = norm(d.part_number) || (() => {
|
||||
const m = /\b\d{2}-[a-z0-9]{2}-[a-z0-9]{3,}\b/i.exec(`${d.part_number || ''} ${d.title || ''} ${d.content || ''}`);
|
||||
return m ? norm(m[0]) : '';
|
||||
if (d.index !== 'catalog') return d;
|
||||
// Prefer first part number; fallback to SKU
|
||||
const pn = (() => {
|
||||
const firstPn = Array.isArray(d.part_numbers) && d.part_numbers.length ? d.part_numbers[0] : '';
|
||||
if (firstPn) return norm(firstPn);
|
||||
return d.sku ? norm(d.sku) : '';
|
||||
})();
|
||||
if (!pn) return d;
|
||||
|
||||
const primaryWebsite = d.canonical_product_url || undefined;
|
||||
// Use URLs exactly as returned by Azure Search (no allowlist or modification)
|
||||
const primaryWebsite = typeof d.url === 'string' ? d.url : undefined;
|
||||
const crossWebsite = websiteByPart.get(pn) || [];
|
||||
const website_urls = uniq([primaryWebsite, ...crossWebsite]);
|
||||
const website_urls = uniq([primaryWebsite, ...crossWebsite].filter(Boolean));
|
||||
|
||||
const crossCyclopedia = cyclopediaByPart.get(pn) || [];
|
||||
const cyclopedia_urls = uniq(crossCyclopedia);
|
||||
const cyclopedia_urls = uniq(crossCyclopedia.filter((u) => typeof u === 'string' && u));
|
||||
|
||||
// Include any Airtable-attached Doc360 URL as an authoritative Cyclopedia link
|
||||
return {
|
||||
...d,
|
||||
airtable_url: d.airtable_record_url || d.airtable_url,
|
||||
website_urls,
|
||||
website_url_primary: website_urls[0] || undefined,
|
||||
// Include any Airtable-attached Doc360 URL as an authoritative Cyclopedia link
|
||||
cyclopedia_urls: uniq([d.doc360_url, ...cyclopedia_urls].filter(Boolean)),
|
||||
cyclopedia_urls,
|
||||
};
|
||||
});
|
||||
|
||||
|
@ -407,6 +472,9 @@ class WoodlandAISearch extends Tool {
|
|||
_optionsByIndexForIntent(intent, extracted = {}) {
|
||||
const opts = {};
|
||||
const partNum = extracted.partNumber;
|
||||
const partType = extracted.partType; // e.g., 'collector bag', 'impeller', 'hose', 'recoil starter', 'boot plate', 'side tube'
|
||||
const family = extracted.family; // e.g., 'Classic', 'Commander Pro', 'Commercial PRO', 'Standard Complete Platinum'
|
||||
const wantsPromo = extracted.wantsPromo;
|
||||
const maybe = (o, sel) => (this.returnAllFields ? o : { ...o, select: sel });
|
||||
|
||||
// Helper to constrain Website results to allowlisted domains
|
||||
|
@ -415,11 +483,19 @@ class WoodlandAISearch extends Tool {
|
|||
: undefined;
|
||||
|
||||
if (intent === 'parts') {
|
||||
// Airtable: only parts, reviewed only
|
||||
opts.airtable = maybe({
|
||||
filter: this._withReviewed(partNum ? `(type eq 'part') and (part_number eq '${partNum}')` : `type eq 'part'`),
|
||||
orderBy: ['last_updated desc'],
|
||||
}, ['title','content','part_number','part_type','categories','canonical_product_url','last_updated','airtable_record_url','doc360_url','id']);
|
||||
// Catalog: treat as primary part/SKU context for now
|
||||
const pn = partNum || '';
|
||||
const pnNoHyphen = pn ? pn.replace(/-/g, '') : '';
|
||||
const filters = [];
|
||||
if (pn) filters.push(`part_numbers/any(p: p eq '${this._escapeLiteral(pn)}') or part_numbers_hyphenless/any(p: p eq '${this._escapeLiteral(pnNoHyphen)}') or sku eq '${this._escapeLiteral(pn)}' or normalized_sku eq '${this._escapeLiteral(pnNoHyphen)}'`);
|
||||
if (partType) filters.push(`part_type/any(p: p eq '${this._escapeLiteral(partType)}')`);
|
||||
if (family) filters.push(`family eq '${this._escapeLiteral(family)}'`);
|
||||
const catalogFilter = filters.length ? filters.map(f => `(${f})`).join(' and ') : undefined;
|
||||
opts.catalog = maybe({
|
||||
filter: catalogFilter,
|
||||
orderBy: ['promotion_active desc','price_after_promo asc','price asc','last_updated desc'],
|
||||
searchFields: this._applySearchFields('catalog', ['title','content','sku','part_numbers'])
|
||||
}, ['id','title','content','url','sku','part_numbers','part_type','family','categories','category_paths','price','price_after_promo','promotion_names','availability','stock_quantity','installation_pdf_url','troubleshooting_pdf_url','safety_pdf_url','video_url','exploded_view_url']);
|
||||
|
||||
// Website: prefer exact SKU/part hits; else fall back to product pages, reviewed only, allowlist
|
||||
const websiteFilterBase = partNum
|
||||
|
@ -429,6 +505,7 @@ class WoodlandAISearch extends Tool {
|
|||
opts.website = maybe({
|
||||
filter: websiteFilter,
|
||||
orderBy: ['last_crawled desc'],
|
||||
searchFields: this._applySearchFields('website', ['title','content','breadcrumb','page_type','headings','sku','skus','mentioned_parts'])
|
||||
}, ['title','content','url','site','last_crawled','sku','skus','mentioned_parts','id']);
|
||||
|
||||
// Cyclopedia: include even without explicit part number, reviewed only
|
||||
|
@ -438,36 +515,45 @@ class WoodlandAISearch extends Tool {
|
|||
opts.cyclopedia = maybe({
|
||||
filter: this._withReviewed(cycloBase),
|
||||
orderBy: ['last_updated desc'],
|
||||
searchFields: this._applySearchFields('cyclopedia', ['title','content','breadcrumb','page_type','toc_items','mentioned_parts','audience'])
|
||||
}, ['title','content','url','site','page_type','breadcrumb','audience','last_updated','mentioned_parts','id']);
|
||||
} else if (intent === 'compatibility') {
|
||||
// Airtable: reviewed only
|
||||
opts.airtable = maybe({
|
||||
filter: this._withReviewed(undefined),
|
||||
// Catalog: compatibility context (family/models)
|
||||
const filters = [];
|
||||
if (family) filters.push(`family eq '${this._escapeLiteral(family)}'`);
|
||||
if (partType) filters.push(`part_type/any(p: p eq '${this._escapeLiteral(partType)}')`);
|
||||
opts.catalog = maybe({
|
||||
filter: filters.length ? filters.map(f => `(${f})`).join(' and ') : undefined,
|
||||
orderBy: ['last_updated desc'],
|
||||
}, ['title','content','last_updated','source_table','airtable_record_url','doc360_url','id']);
|
||||
searchFields: this._applySearchFields('catalog', ['title','content'])
|
||||
}, ['id','title','content','url','categories','category_paths','family','compatible_models','part_type']);
|
||||
|
||||
// Website: reviewed only, allowlist
|
||||
opts.website = maybe({
|
||||
filter: this._withReviewed(websiteDomainFilter),
|
||||
orderBy: ['last_crawled desc'],
|
||||
searchFields: this._applySearchFields('website', ['title','content','breadcrumb','page_type','headings'])
|
||||
}, ['title','content','url','site','page_type','breadcrumb','last_crawled','id']);
|
||||
|
||||
// Cyclopedia: reviewed only
|
||||
opts.cyclopedia = maybe({
|
||||
filter: this._withReviewed(`audience eq 'internal' or page_type eq 'maintenance_guide' or page_type eq 'troubleshooting'`),
|
||||
orderBy: ['last_updated desc','section_order asc'],
|
||||
searchFields: this._applySearchFields('cyclopedia', ['title','content','breadcrumb','page_type','toc_items','audience'])
|
||||
}, ['title','content','url','site','page_type','breadcrumb','audience','last_updated','toc_items','id']);
|
||||
} else if (intent === 'sop') {
|
||||
// Cyclopedia SOP/support, reviewed only
|
||||
opts.cyclopedia = maybe({
|
||||
filter: this._withReviewed(`audience eq 'internal' or page_type eq 'maintenance_guide'`),
|
||||
orderBy: ['last_updated desc','section_order asc'],
|
||||
searchFields: this._applySearchFields('cyclopedia', ['title','content','breadcrumb','page_type','toc_items','audience'])
|
||||
}, ['title','content','url','site','page_type','breadcrumb','audience','last_updated','toc_items','id']);
|
||||
// Airtable support, reviewed only
|
||||
opts.airtable = maybe({
|
||||
filter: this._withReviewed(`type eq 'support'`),
|
||||
// Catalog doc pointers (if present)
|
||||
opts.catalog = maybe({
|
||||
filter: undefined,
|
||||
orderBy: ['last_updated desc'],
|
||||
}, ['title','content','last_updated','type','source_table','airtable_record_url','doc360_url','id']);
|
||||
searchFields: this._applySearchFields('catalog', ['title','content'])
|
||||
}, ['id','title','content','url','categories','category_paths','installation_pdf_url','troubleshooting_pdf_url','safety_pdf_url','video_url','exploded_view_url']);
|
||||
// Website generic, reviewed only, allowlist
|
||||
opts.website = maybe({
|
||||
filter: this._withReviewed(websiteDomainFilter),
|
||||
|
@ -477,28 +563,34 @@ class WoodlandAISearch extends Tool {
|
|||
opts.website = maybe({
|
||||
filter: this._withReviewed(this._andFilter(`page_type eq 'product_marketing'`, websiteDomainFilter)),
|
||||
orderBy: ['last_crawled desc'],
|
||||
searchFields: this._applySearchFields('website', ['title','content','breadcrumb','page_type','headings'])
|
||||
}, ['title','content','url','site','page_type','headings','breadcrumb','last_crawled','id']);
|
||||
opts.airtable = maybe({
|
||||
filter: this._withReviewed(undefined),
|
||||
orderBy: ['last_updated desc'],
|
||||
}, ['title','content','last_updated','source_table','airtable_record_url','id']);
|
||||
const promoFilter = wantsPromo ? `promotion_active eq true` : undefined;
|
||||
opts.catalog = maybe({
|
||||
filter: promoFilter,
|
||||
orderBy: ['promotion_active desc','price_after_promo asc','last_updated desc'],
|
||||
}, ['id','title','content','url','categories','category_paths','family','price_after_promo','promotion_names']);
|
||||
opts.cyclopedia = maybe({
|
||||
filter: this._withReviewed(undefined),
|
||||
orderBy: ['last_updated desc'],
|
||||
searchFields: this._applySearchFields('cyclopedia', ['title','content','breadcrumb','page_type'])
|
||||
}, ['title','content','url','site','page_type','breadcrumb','last_updated','id']);
|
||||
} else {
|
||||
// general
|
||||
opts.airtable = maybe({
|
||||
filter: this._withReviewed(undefined),
|
||||
opts.catalog = maybe({
|
||||
filter: undefined,
|
||||
orderBy: ['last_updated desc'],
|
||||
}, ['title','content','last_updated','airtable_record_url','id']);
|
||||
searchFields: this._applySearchFields('catalog', ['title','content'])
|
||||
}, ['id','title','content','url','categories','category_paths']);
|
||||
opts.website = maybe({
|
||||
filter: this._withReviewed(websiteDomainFilter),
|
||||
orderBy: ['last_crawled desc'],
|
||||
searchFields: this._applySearchFields('website', ['title','content','breadcrumb','page_type','headings'])
|
||||
}, ['title','content','url','site','last_crawled','id']);
|
||||
opts.cyclopedia = maybe({
|
||||
filter: this._withReviewed(undefined),
|
||||
orderBy: ['last_updated desc'],
|
||||
searchFields: this._applySearchFields('cyclopedia', ['title','content','breadcrumb','page_type'])
|
||||
}, ['title','content','url','site','page_type','last_updated','id']);
|
||||
}
|
||||
|
||||
|
@ -516,24 +608,41 @@ class WoodlandAISearch extends Tool {
|
|||
const extracted = {};
|
||||
if (partMatch) extracted.partNumber = partMatch[0];
|
||||
|
||||
// Extract part type tokens
|
||||
const partTypes = [
|
||||
'collector bag','impeller','hose','recoil starter','starter','boot plate','side tube','side discharge'
|
||||
];
|
||||
for (const t of partTypes) {
|
||||
if (q.includes(t)) { extracted.partType = t === 'starter' ? 'recoil starter' : t; break; }
|
||||
}
|
||||
|
||||
// Extract family names
|
||||
if (q.includes('commercial pro')) extracted.family = 'Commercial PRO';
|
||||
else if (q.includes('commander pro') || q.includes('commander')) extracted.family = 'Commander Pro';
|
||||
else if (q.includes('standard complete platinum') || q.includes('platinum')) extracted.family = 'Standard Complete Platinum';
|
||||
else if (q.includes('classic')) extracted.family = 'Classic';
|
||||
|
||||
// Promotions intent flag
|
||||
if (containsAny(['promotion','sale','discount','coupon','financing'])) extracted.wantsPromo = true;
|
||||
|
||||
// Parts / purchase signals → WEBSITE first to ensure View/Buy pages show up early; always include cyclopedia
|
||||
if (partMatch || containsAny(['part','replacement','buy','order','sku','view/buy','add to cart','price','bag','hose','clamp','mda','key'])) {
|
||||
return { intent: 'parts', indexes: ['website','airtable','cyclopedia'], extracted };
|
||||
return { intent: 'parts', indexes: ['catalog','cyclopedia','website'], extracted };
|
||||
}
|
||||
|
||||
// Compatibility / fitment / engine-by-year
|
||||
if (containsAny(['engine','fit','fits','fitment','compatible','compatibility','which engine','used in','hose size','diameter','model history','product history']) || yearRegex.test(q)) {
|
||||
return { intent: 'compatibility', indexes: ['airtable','website','cyclopedia'], extracted };
|
||||
return { intent: 'compatibility', indexes: ['catalog','cyclopedia','website'], extracted };
|
||||
}
|
||||
|
||||
// SOP / How-to
|
||||
if (containsAny(['how to','install','installation','guide','manual','troubleshoot','troubleshooting','winterization','sop'])) {
|
||||
return { intent: 'sop', indexes: ['cyclopedia','airtable','website'], extracted };
|
||||
return { intent: 'sop', indexes: ['catalog','cyclopedia','website'], extracted };
|
||||
}
|
||||
|
||||
// Marketing / benefits
|
||||
if (containsAny(['compare','benefits','why choose','financing','promotion','warranty'])) {
|
||||
return { intent: 'marketing', indexes: ['website','airtable','cyclopedia'], extracted };
|
||||
return { intent: 'marketing', indexes: ['catalog','cyclopedia','website'], extracted };
|
||||
}
|
||||
|
||||
return { intent: 'general', indexes: WoodlandAISearch.GROUPS, extracted };
|
||||
|
@ -567,9 +676,6 @@ class WoodlandAISearch extends Tool {
|
|||
speller: 'lexicon'
|
||||
};
|
||||
if (this.scoringProfile) baseOptions.scoringProfile = this.scoringProfile;
|
||||
if (Array.isArray(this.searchFields) && this.searchFields.length) {
|
||||
baseOptions.searchFields = this.searchFields;
|
||||
}
|
||||
|
||||
// Intent routing
|
||||
const { intent, indexes, extracted } = this._detectIntent(query);
|
||||
|
@ -585,6 +691,7 @@ class WoodlandAISearch extends Tool {
|
|||
perIndexTop,
|
||||
finalTop,
|
||||
perIndexOptions,
|
||||
intent,
|
||||
);
|
||||
|
||||
// If we came up short, try a broader pass across all indexes with larger top
|
||||
|
@ -600,6 +707,7 @@ class WoodlandAISearch extends Tool {
|
|||
Math.max(3, Math.ceil(sampleTop / broaderIndexes.length)),
|
||||
finalTop,
|
||||
broaderPerIndexOptions,
|
||||
'general',
|
||||
);
|
||||
// Merge while keeping uniques and limit to finalTop
|
||||
const seen = new Set(result.map(d => this._keyOf(d)));
|
||||
|
@ -613,11 +721,10 @@ class WoodlandAISearch extends Tool {
|
|||
}
|
||||
}
|
||||
|
||||
const enriched = this._enrichWithLinks(intent, extracted, result);
|
||||
const payload = Array.isArray(enriched) ? enriched : result;
|
||||
// Attach a governance hint (non-breaking) for downstream renderers
|
||||
const wrapped = { results: payload, governance: { reviewedOnly: this.enforceReviewedOnly, websiteDomains: this.websiteDomainAllowlist } };
|
||||
return JSON.stringify(wrapped);
|
||||
// Optionally enrich with cross-index links to ensure real URLs from Azure results
|
||||
const payload = this.enableLinkEnrichment ? this._enrichWithLinks(intent, extracted, result) : result;
|
||||
// Match AzureAISearch: return raw array of documents
|
||||
return JSON.stringify(payload);
|
||||
} catch (error) {
|
||||
logger.error('Azure AI Search request failed', { error: error?.message || String(error) });
|
||||
const msg = (error && (error.message || String(error))) || 'Unknown error';
|
||||
|
|
198
api/app/clients/tools/structured/WoodlandAISearchAll.js
Normal file
198
api/app/clients/tools/structured/WoodlandAISearchAll.js
Normal file
|
@ -0,0 +1,198 @@
|
|||
// woodland-ai-search-all.js (aggregator)
|
||||
const { z } = require('zod');
|
||||
const { Tool } = require('@langchain/core/tools');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
const WoodlandAISearch = require('./WoodlandAISearch');
|
||||
const WoodlandAISearchTractor = require('./WoodlandAISearchTractor');
|
||||
const WoodlandAISearchCases = require('./WoodlandAISearchCases');
|
||||
|
||||
class WoodlandAISearchAll extends Tool {
|
||||
static DEFAULT_TOP = 18;
|
||||
|
||||
constructor(fields = {}) {
|
||||
super();
|
||||
this.name = 'woodland-ai-search-all';
|
||||
this.description =
|
||||
"Aggregates results from 'woodland-ai-search', 'woodland-ai-search-tractor', and 'woodland-ai-search-cases' in one call.";
|
||||
|
||||
this.schema = z.object({
|
||||
query: z.string().describe('Search word or phrase to Woodland All-Tools'),
|
||||
top: z.number().int().positive().optional(),
|
||||
perToolTop: z.number().int().positive().optional(),
|
||||
});
|
||||
|
||||
// Allow disabling specific sub-tools via env if desired
|
||||
this.enableWoodland = String(fields.WOODLAND_ALL_ENABLE_WOODLAND ?? process.env.WOODLAND_ALL_ENABLE_WOODLAND ?? 'true').toLowerCase() === 'true';
|
||||
this.enableTractor = String(fields.WOODLAND_ALL_ENABLE_TRACTOR ?? process.env.WOODLAND_ALL_ENABLE_TRACTOR ?? 'true').toLowerCase() === 'true';
|
||||
this.enableCases = String(fields.WOODLAND_ALL_ENABLE_CASES ?? process.env.WOODLAND_ALL_ENABLE_CASES ?? 'true').toLowerCase() === 'true';
|
||||
|
||||
logger.info('[woodland-ai-search-all] Initialized', {
|
||||
enableWoodland: this.enableWoodland,
|
||||
enableTractor: this.enableTractor,
|
||||
enableCases: this.enableCases,
|
||||
});
|
||||
}
|
||||
|
||||
_keyOf(d) {
|
||||
return (
|
||||
(typeof d?.url === 'string' && d.url) ||
|
||||
(typeof d?.website_url_primary === 'string' && d.website_url_primary) ||
|
||||
d?.id ||
|
||||
d?.record_id ||
|
||||
d?.key ||
|
||||
JSON.stringify(d)
|
||||
);
|
||||
}
|
||||
|
||||
// Stronger key that attempts to avoid duplicates when url/id are missing
|
||||
_strongKeyOf(d) {
|
||||
const base = this._keyOf(d);
|
||||
if (base && typeof base === 'string') return base;
|
||||
try {
|
||||
const title = (d?.title || '').toString().trim().toLowerCase();
|
||||
const site = (d?.site || '').toString().trim().toLowerCase();
|
||||
const pageType = (d?.page_type || '').toString().trim().toLowerCase();
|
||||
const sku = (Array.isArray(d?.skus) ? d.skus.join('|') : d?.sku || '').toString().toLowerCase();
|
||||
const partNums = (Array.isArray(d?.part_numbers) ? d.part_numbers.join('|') : d?.part_numbers || '').toString().toLowerCase();
|
||||
const url = (d?.url || d?.website_url_primary || '').toString().toLowerCase();
|
||||
const index = (d?.index || '').toString().toLowerCase();
|
||||
if (url) return url;
|
||||
const sig = [title, site, pageType, sku, partNums, index].filter(Boolean).join('#');
|
||||
return sig || JSON.stringify(d);
|
||||
} catch (_) {
|
||||
return JSON.stringify(d);
|
||||
}
|
||||
}
|
||||
|
||||
async _call(data) {
|
||||
const { query, top: topIn, perToolTop: perToolTopIn } = data;
|
||||
const finalTop = typeof topIn === 'number' && Number.isFinite(topIn)
|
||||
? Math.max(1, Math.floor(topIn))
|
||||
: WoodlandAISearchAll.DEFAULT_TOP;
|
||||
// Favor balanced breadth across subtools
|
||||
const perToolTop = typeof perToolTopIn === 'number' && Number.isFinite(perToolTopIn)
|
||||
? Math.max(1, Math.floor(perToolTopIn))
|
||||
: Math.min(10, Math.max(8, Math.ceil(finalTop / 2)));
|
||||
|
||||
const tasks = [];
|
||||
try {
|
||||
if (this.enableWoodland) {
|
||||
const w = new WoodlandAISearch();
|
||||
tasks.push(
|
||||
w
|
||||
._call({ query, top: perToolTop })
|
||||
.then((s) => ({ tool: 'woodland-ai-search', ok: true, docs: JSON.parse(s) }))
|
||||
.catch((e) => ({ tool: 'woodland-ai-search', ok: false, err: e })),
|
||||
);
|
||||
}
|
||||
if (this.enableTractor) {
|
||||
const t = new WoodlandAISearchTractor();
|
||||
tasks.push(
|
||||
t
|
||||
._call({ query, top: perToolTop })
|
||||
.then((s) => ({ tool: 'woodland-ai-search-tractor', ok: true, docs: JSON.parse(s) }))
|
||||
.catch((e) => ({ tool: 'woodland-ai-search-tractor', ok: false, err: e })),
|
||||
);
|
||||
}
|
||||
if (this.enableCases) {
|
||||
const c = new WoodlandAISearchCases();
|
||||
tasks.push(
|
||||
c
|
||||
._call({ query, top: perToolTop })
|
||||
.then((s) => ({ tool: 'woodland-ai-search-cases', ok: true, docs: JSON.parse(s) }))
|
||||
.catch((e) => ({ tool: 'woodland-ai-search-cases', ok: false, err: e })),
|
||||
);
|
||||
}
|
||||
|
||||
const settled = await Promise.all(tasks);
|
||||
const buckets = [];
|
||||
for (const r of settled) {
|
||||
if (!r?.ok) {
|
||||
logger.warn('[woodland-ai-search-all] Subtool failed', { tool: r?.tool, error: r?.err?.message || String(r?.err) });
|
||||
continue;
|
||||
}
|
||||
const arr = Array.isArray(r.docs) ? r.docs : [];
|
||||
// Tag provenance
|
||||
for (const d of arr) {
|
||||
if (!d) continue;
|
||||
if (!d.source_tool) d.source_tool = r.tool;
|
||||
// Add normalized provenance to help downstream reasoning
|
||||
try {
|
||||
const url = (typeof d.url === 'string' && d.url) || (typeof d.website_url_primary === 'string' && d.website_url_primary) || '';
|
||||
const host = url ? new URL(url).hostname : undefined;
|
||||
d.provenance = {
|
||||
source_tool: d.source_tool,
|
||||
index: d.index,
|
||||
site: d.site,
|
||||
page_type: d.page_type,
|
||||
host,
|
||||
url: url || undefined,
|
||||
};
|
||||
} catch (_) {
|
||||
d.provenance = {
|
||||
source_tool: d.source_tool,
|
||||
index: d.index,
|
||||
site: d.site,
|
||||
page_type: d.page_type,
|
||||
};
|
||||
}
|
||||
}
|
||||
buckets.push({ tool: r.tool, docs: arr });
|
||||
}
|
||||
|
||||
// Merge strategy:
|
||||
// 1) Guarantee minimum per-tool coverage, then 2) fill remaining by priority (woodland -> cases -> tractor)
|
||||
const priority = ['woodland-ai-search', 'woodland-ai-search-cases', 'woodland-ai-search-tractor'];
|
||||
buckets.sort((a, b) => priority.indexOf(a.tool) - priority.indexOf(b.tool));
|
||||
|
||||
const minPerTool = { 'woodland-ai-search': 3, 'woodland-ai-search-cases': 1, 'woodland-ai-search-tractor': 1 };
|
||||
const out = [];
|
||||
const seen = new Set();
|
||||
|
||||
const addDoc = (doc) => {
|
||||
const k = this._strongKeyOf(doc);
|
||||
if (seen.has(k)) return false;
|
||||
seen.add(k);
|
||||
out.push(doc);
|
||||
return true;
|
||||
};
|
||||
|
||||
// Phase 1: satisfy minimum quotas per tool (if available)
|
||||
for (const b of buckets) {
|
||||
const quota = minPerTool[b.tool] || 0;
|
||||
if (quota <= 0) continue;
|
||||
let added = 0;
|
||||
for (const d of b.docs) {
|
||||
if (out.length >= finalTop) break;
|
||||
if (addDoc(d)) {
|
||||
added += 1;
|
||||
if (added >= quota) break;
|
||||
}
|
||||
}
|
||||
if (out.length >= finalTop) break;
|
||||
}
|
||||
|
||||
// Phase 2: fill remaining by priority order
|
||||
for (const b of buckets) {
|
||||
for (const d of b.docs) {
|
||||
if (out.length >= finalTop) break;
|
||||
addDoc(d);
|
||||
}
|
||||
if (out.length >= finalTop) break;
|
||||
}
|
||||
|
||||
logger.info('[woodland-ai-search-all] Aggregated results', {
|
||||
totalMerged: out.length,
|
||||
sources: buckets.map((b) => ({ tool: b.tool, count: b.docs.length })),
|
||||
});
|
||||
|
||||
return JSON.stringify(out);
|
||||
} catch (error) {
|
||||
logger.error('[woodland-ai-search-all] Failed', { error: error?.message || String(error) });
|
||||
return `AZURE_SEARCH_FAILED: ${error?.message || String(error)}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = WoodlandAISearchAll;
|
334
api/app/clients/tools/structured/WoodlandAISearchCases.js
Normal file
334
api/app/clients/tools/structured/WoodlandAISearchCases.js
Normal file
|
@ -0,0 +1,334 @@
|
|||
// woodland-ai-search-cases.js (single-index)
|
||||
const { z } = require('zod');
|
||||
const { Tool } = require('@langchain/core/tools');
|
||||
const { SearchClient, AzureKeyCredential } = require('@azure/search-documents');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
class WoodlandAISearchCases extends Tool {
|
||||
static DEFAULT_API_VERSION = '2024-07-01';
|
||||
static DEFAULT_TOP = 9;
|
||||
static DEFAULT_SELECT = 'id,title,content,url';
|
||||
|
||||
_env(v, fallback) {
|
||||
return v ?? fallback;
|
||||
}
|
||||
|
||||
_provenance(d) {
|
||||
try {
|
||||
const url = (typeof d?.url === 'string' && d.url) || '';
|
||||
const host = url ? new URL(url).hostname : undefined;
|
||||
return { url: url || undefined, host, site: d?.site, page_type: d?.page_type };
|
||||
} catch (_) {
|
||||
return { site: d?.site, page_type: d?.page_type };
|
||||
}
|
||||
}
|
||||
|
||||
/** Lightweight normalization for Cases/Knowledge docs */
|
||||
_extractList(text, labelRegexes) {
|
||||
try {
|
||||
const t = (text || '').toString();
|
||||
for (const re of labelRegexes) {
|
||||
const m = re.exec(t);
|
||||
if (m && m[1]) {
|
||||
const line = m[1]
|
||||
.replace(/\r/g, '')
|
||||
.split(/\n|;|•|\u2022|\-/)
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
if (line.length) return line;
|
||||
}
|
||||
}
|
||||
} catch (_) {}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
_extractSteps(text) {
|
||||
try {
|
||||
const t = (text || '').toString();
|
||||
// Find numbered steps or lines starting with dash/bullet
|
||||
const lines = t.split(/\r?\n/);
|
||||
const steps = [];
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (/^(\d+\.|- |• |\u2022 )/.test(trimmed)) {
|
||||
steps.push(trimmed.replace(/^(\d+\.|- |• |\u2022 )\s*/, ''));
|
||||
}
|
||||
}
|
||||
return steps.length ? steps : undefined;
|
||||
} catch (_) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
_normalizeDoc(d) {
|
||||
const str = (v) => (v == null ? undefined : String(v));
|
||||
const list = (v) => (Array.isArray(v) ? v.filter(Boolean).map(String) : undefined);
|
||||
|
||||
const title = str(d?.title);
|
||||
const content = str(d?.content) || str(d?.summary) || str(d?.answer);
|
||||
const requirements =
|
||||
this._extractList(content, [/requirements?\s*[:\-]\s*([^\n]+)/i, /eligibility\s*[:\-]\s*([^\n]+)/i]) ||
|
||||
list(d?.requirements);
|
||||
const exceptions =
|
||||
this._extractList(content, [/exceptions?\s*[:\-]\s*([^\n]+)/i]) || list(d?.exceptions);
|
||||
const scope = str(d?.category) || str(d?.scope) || undefined;
|
||||
const steps = this._extractSteps(content);
|
||||
|
||||
const normalized = {
|
||||
policy_name: title,
|
||||
scope,
|
||||
summary: str(d?.summary),
|
||||
effective_date: str(d?.effective_date),
|
||||
last_updated: str(d?.last_updated),
|
||||
requirements,
|
||||
exceptions,
|
||||
steps,
|
||||
tags: list(d?.tags),
|
||||
keywords: list(d?.keywords),
|
||||
provenance: this._provenance(d),
|
||||
};
|
||||
|
||||
return { ...d, normalized_cases: normalized };
|
||||
}
|
||||
|
||||
constructor(fields = {}) {
|
||||
super();
|
||||
this.name = 'woodland-ai-search-cases';
|
||||
this.description = "Use the 'woodland-ai-search-cases' tool to answer questions from the Cases Azure AI Search index";
|
||||
|
||||
this.schema = z.object({
|
||||
query: z.string().describe('Question or search phrase for Cases index'),
|
||||
top: z.number().int().positive().optional(),
|
||||
});
|
||||
|
||||
// Shared endpoint + key
|
||||
this.serviceEndpoint = this._env(
|
||||
fields.AZURE_AI_SEARCH_SERVICE_ENDPOINT,
|
||||
process.env.AZURE_AI_SEARCH_SERVICE_ENDPOINT,
|
||||
);
|
||||
this.apiKey = this._env(fields.AZURE_AI_SEARCH_API_KEY, process.env.AZURE_AI_SEARCH_API_KEY);
|
||||
|
||||
// Cases index name (support multiple env names; fallback to generic index name)
|
||||
this.indexName =
|
||||
this._env(fields.AZURE_AI_SEARCH_CASES_INDEX, process.env.AZURE_AI_SEARCH_CASES_INDEX) ||
|
||||
this._env(fields.AZURE_AI_SEARCH_CASE_INDEX, process.env.AZURE_AI_SEARCH_CASE_INDEX) ||
|
||||
this._env(fields.AZURE_AI_SEARCH_CASES_INDEX_NAME, process.env.AZURE_AI_SEARCH_CASES_INDEX_NAME) ||
|
||||
this._env(fields.AZURE_AI_SEARCH_INDEX_NAME, process.env.AZURE_AI_SEARCH_INDEX_NAME);
|
||||
|
||||
if (!this.serviceEndpoint || !this.apiKey || !this.indexName) {
|
||||
throw new Error(
|
||||
'Missing Azure AI Search envs: AZURE_AI_SEARCH_SERVICE_ENDPOINT, AZURE_AI_SEARCH_API_KEY, and Cases index (AZURE_AI_SEARCH_CASES_INDEX or AZURE_AI_SEARCH_INDEX_NAME).',
|
||||
);
|
||||
}
|
||||
|
||||
// Optional API version
|
||||
this.apiVersion = this._env(
|
||||
fields.AZURE_AI_SEARCH_API_VERSION,
|
||||
process.env.AZURE_AI_SEARCH_API_VERSION || WoodlandAISearchCases.DEFAULT_API_VERSION,
|
||||
);
|
||||
|
||||
// Defaults
|
||||
this.top = WoodlandAISearchCases.DEFAULT_TOP;
|
||||
this.select = WoodlandAISearchCases.DEFAULT_SELECT.split(',').map((s) => s.trim());
|
||||
|
||||
// Semantic/search options
|
||||
this.searchFields = (() => {
|
||||
// Prefer cases-specific override, else global
|
||||
const v =
|
||||
this._env(
|
||||
fields.AZURE_AI_SEARCH_CASES_SEARCH_FIELDS,
|
||||
process.env.AZURE_AI_SEARCH_CASES_SEARCH_FIELDS,
|
||||
) || this._env(fields.AZURE_AI_SEARCH_SEARCH_FIELDS, process.env.AZURE_AI_SEARCH_SEARCH_FIELDS);
|
||||
if (v) return String(v).split(',').map((s) => s.trim()).filter(Boolean);
|
||||
// Generic defaults suitable for Q&A corpora; avoid page_type
|
||||
return ['title', 'content', 'summary', 'tags', 'keywords', 'category', 'question', 'answer'];
|
||||
})();
|
||||
this.semanticConfiguration = this._env(
|
||||
fields.AZURE_AI_SEARCH_SEMANTIC_CONFIGURATION,
|
||||
process.env.AZURE_AI_SEARCH_SEMANTIC_CONFIGURATION || 'sem1',
|
||||
);
|
||||
this.queryLanguage = this._env(
|
||||
fields.AZURE_AI_SEARCH_QUERY_LANGUAGE,
|
||||
process.env.AZURE_AI_SEARCH_QUERY_LANGUAGE || 'en-us',
|
||||
);
|
||||
this.scoringProfile = this._env(
|
||||
fields.AZURE_AI_SEARCH_SCORING_PROFILE,
|
||||
process.env.AZURE_AI_SEARCH_SCORING_PROFILE,
|
||||
);
|
||||
this.returnAllFields = String(
|
||||
this._env(
|
||||
fields.AZURE_AI_SEARCH_RETURN_ALL_FIELDS,
|
||||
process.env.AZURE_AI_SEARCH_RETURN_ALL_FIELDS || 'true',
|
||||
),
|
||||
)
|
||||
.toLowerCase()
|
||||
.trim() === 'true';
|
||||
|
||||
// Client
|
||||
const credential = new AzureKeyCredential(this.apiKey);
|
||||
this.client = new SearchClient(this.serviceEndpoint, this.indexName, credential, {
|
||||
apiVersion: this.apiVersion,
|
||||
});
|
||||
|
||||
logger.info('[woodland-ai-search-cases] Initialized', {
|
||||
endpoint: this.serviceEndpoint,
|
||||
apiVersion: this.apiVersion,
|
||||
index: this.indexName,
|
||||
select: this.select,
|
||||
searchFields: this.searchFields,
|
||||
semanticConfiguration: this.semanticConfiguration,
|
||||
queryLanguage: this.queryLanguage,
|
||||
scoringProfile: this.scoringProfile,
|
||||
});
|
||||
}
|
||||
|
||||
_sanitizeSearchOptions(opts) {
|
||||
const clean = { ...opts };
|
||||
const asStr = (v) => (typeof v === 'string' ? v.toLowerCase() : undefined);
|
||||
const answers = asStr(clean.answers);
|
||||
if (answers !== 'extractive' && answers !== 'none') delete clean.answers;
|
||||
const captions = asStr(clean.captions);
|
||||
if (captions !== 'extractive' && captions !== 'none') delete clean.captions;
|
||||
const speller = asStr(clean.speller);
|
||||
if (speller !== 'lexicon' && speller !== 'simple' && speller !== 'none') delete clean.speller;
|
||||
return clean;
|
||||
}
|
||||
|
||||
async _safeSearch(query, options) {
|
||||
const run = async (opts) => {
|
||||
const send = this._sanitizeSearchOptions(opts);
|
||||
logger.debug('[woodland-ai-search-cases] Sending request', {
|
||||
query,
|
||||
options: JSON.stringify(send, null, 2),
|
||||
});
|
||||
const rs = await this.client.search(query, send);
|
||||
const items = [];
|
||||
for await (const r of rs.results) items.push(r.document);
|
||||
logger.debug('[woodland-ai-search-cases] Received response', {
|
||||
count: items.length,
|
||||
sample: items.slice(0, 2),
|
||||
});
|
||||
return items;
|
||||
};
|
||||
|
||||
let attempt = 0;
|
||||
let opts = { ...options };
|
||||
let lastErr;
|
||||
let droppedSearchFields = false;
|
||||
while (attempt < 3) {
|
||||
try {
|
||||
const docs = await run(opts);
|
||||
return { docs, retried: attempt > 0 };
|
||||
} catch (err) {
|
||||
lastErr = err;
|
||||
attempt += 1;
|
||||
const msg = (err && (err.message || String(err))) || '';
|
||||
logger.warn('[woodland-ai-search-cases] Search failed', { attempt, msg });
|
||||
|
||||
const sanitized = { ...opts };
|
||||
let changed = false;
|
||||
|
||||
if (/orderby/i.test(msg) && String(sanitized.queryType).toLowerCase() === 'semantic') {
|
||||
if (sanitized.orderBy) {
|
||||
delete sanitized.orderBy;
|
||||
changed = true;
|
||||
logger.info('[woodland-ai-search-cases] Removing orderBy for semantic query and retrying');
|
||||
}
|
||||
}
|
||||
|
||||
const unknownFieldRegex = /Unknown field '([^']+)'/gi;
|
||||
const toRemove = [];
|
||||
let m;
|
||||
while ((m = unknownFieldRegex.exec(msg)) !== null) {
|
||||
const fld = (m[1] || '').trim();
|
||||
if (fld) toRemove.push(fld);
|
||||
}
|
||||
|
||||
if (toRemove.length > 0) {
|
||||
if (Array.isArray(sanitized.select)) {
|
||||
const before = sanitized.select.length;
|
||||
sanitized.select = sanitized.select.filter((f) => !toRemove.includes(f));
|
||||
if (sanitized.select.length === 0) delete sanitized.select;
|
||||
if (sanitized.select?.length !== before) changed = true;
|
||||
}
|
||||
if (Array.isArray(sanitized.searchFields)) {
|
||||
const before = sanitized.searchFields.length;
|
||||
sanitized.searchFields = sanitized.searchFields.filter((f) => !toRemove.includes(f));
|
||||
if (sanitized.searchFields.length === 0) delete sanitized.searchFields;
|
||||
if (sanitized.searchFields?.length !== before) changed = true;
|
||||
}
|
||||
if (!/search field list|select/i.test(msg)) {
|
||||
if (sanitized.filter) {
|
||||
delete sanitized.filter;
|
||||
changed = true;
|
||||
logger.info('[woodland-ai-search-cases] Dropping filter due to unknown fields and retrying');
|
||||
}
|
||||
if (sanitized.orderBy) {
|
||||
delete sanitized.orderBy;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!changed && !droppedSearchFields && sanitized.searchFields) {
|
||||
delete sanitized.searchFields;
|
||||
droppedSearchFields = true;
|
||||
changed = true;
|
||||
logger.info('[woodland-ai-search-cases] Dropping searchFields entirely and retrying');
|
||||
}
|
||||
|
||||
if (!changed) break;
|
||||
opts = sanitized;
|
||||
}
|
||||
}
|
||||
throw lastErr;
|
||||
}
|
||||
|
||||
async _call(data) {
|
||||
const { query, top: topIn } = data;
|
||||
const finalTop = typeof topIn === 'number' && Number.isFinite(topIn) ? Math.max(1, Math.floor(topIn)) : this.top;
|
||||
|
||||
try {
|
||||
const inferredMode = (() => {
|
||||
const q = (query || '').toString();
|
||||
if (/".+"/.test(q) || /\b(AND|OR|NOT)\b/i.test(q)) return 'all';
|
||||
return 'any';
|
||||
})();
|
||||
|
||||
const options = {
|
||||
queryType: 'semantic',
|
||||
searchMode: inferredMode,
|
||||
top: finalTop,
|
||||
semanticSearchOptions: {
|
||||
configurationName: this.semanticConfiguration,
|
||||
queryLanguage: this.queryLanguage,
|
||||
},
|
||||
answers: 'extractive',
|
||||
captions: 'extractive',
|
||||
speller: 'lexicon',
|
||||
};
|
||||
if (!this.returnAllFields) {
|
||||
options.select = this.select;
|
||||
}
|
||||
if (this.scoringProfile) options.scoringProfile = this.scoringProfile;
|
||||
|
||||
// Ensure orderBy removed for semantic ranking
|
||||
if (options.orderBy) delete options.orderBy;
|
||||
|
||||
const docs = await this._safeSearch(query, options);
|
||||
let payload = docs.docs || [];
|
||||
if (Array.isArray(payload)) {
|
||||
payload = payload.map((d) => (d ? this._normalizeDoc(d) : d));
|
||||
}
|
||||
logger.info('[woodland-ai-search-cases] Query done', { count: Array.isArray(payload) ? payload.length : 0 });
|
||||
return JSON.stringify(payload);
|
||||
} catch (error) {
|
||||
logger.error('[woodland-ai-search-cases] Azure AI Search request failed', {
|
||||
error: error?.message || String(error),
|
||||
});
|
||||
const msg = (error && (error.message || String(error))) || 'Unknown error';
|
||||
return `AZURE_SEARCH_FAILED: ${msg}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = WoodlandAISearchCases;
|
109
api/app/clients/tools/structured/WoodlandAISearchGeneral.js
Normal file
109
api/app/clients/tools/structured/WoodlandAISearchGeneral.js
Normal file
|
@ -0,0 +1,109 @@
|
|||
// woodland-ai-search-general.js (grounded two-phase aggregator)
|
||||
const { z } = require('zod');
|
||||
const { Tool } = require('@langchain/core/tools');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
const WoodlandAISearch = require('./WoodlandAISearch');
|
||||
const WoodlandAISearchCases = require('./WoodlandAISearchCases');
|
||||
|
||||
class WoodlandAISearchGeneral extends Tool {
|
||||
static DEFAULT_TOP = 9;
|
||||
static DEFAULT_MIN_HITS = 3;
|
||||
|
||||
constructor(fields = {}) {
|
||||
super();
|
||||
this.name = 'woodland-ai-search-general';
|
||||
this.description = "Grounded general Woodland search: query 'woodland-ai-search' first; if results are weak, fall back to 'woodland-ai-search-cases'.";
|
||||
|
||||
this.schema = z.object({
|
||||
query: z.string().describe('Search phrase for Woodland General (Grounded)'),
|
||||
top: z.number().int().positive().optional(),
|
||||
minHits: z.number().int().positive().optional(),
|
||||
perToolTop: z.number().int().positive().optional(),
|
||||
});
|
||||
|
||||
this.minHits = Number(fields.minHits || process.env.WOODLAND_GENERAL_MIN_HITS || WoodlandAISearchGeneral.DEFAULT_MIN_HITS);
|
||||
}
|
||||
|
||||
_keyOf(d) {
|
||||
return (
|
||||
(typeof d?.url === 'string' && d.url) ||
|
||||
(typeof d?.website_url_primary === 'string' && d.website_url_primary) ||
|
||||
d?.id ||
|
||||
d?.record_id ||
|
||||
d?.key ||
|
||||
JSON.stringify(d)
|
||||
);
|
||||
}
|
||||
|
||||
_parseDocs(res) {
|
||||
try {
|
||||
if (typeof res === 'string') {
|
||||
if (res.startsWith('AZURE_SEARCH_FAILED')) return [];
|
||||
return JSON.parse(res);
|
||||
}
|
||||
return Array.isArray(res) ? res : [];
|
||||
} catch (_) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async _call(data) {
|
||||
const { query, top: topIn, minHits: minIn, perToolTop: perToolTopIn } = data;
|
||||
const finalTop = typeof topIn === 'number' && Number.isFinite(topIn) ? Math.max(1, Math.floor(topIn)) : WoodlandAISearchGeneral.DEFAULT_TOP;
|
||||
const perToolTop = typeof perToolTopIn === 'number' && Number.isFinite(perToolTopIn) ? Math.max(1, Math.floor(perToolTopIn)) : finalTop;
|
||||
const minHits = typeof minIn === 'number' && Number.isFinite(minIn) ? Math.max(1, Math.floor(minIn)) : this.minHits;
|
||||
|
||||
try {
|
||||
// Phase 1: primary multi-index search
|
||||
const primary = new WoodlandAISearch();
|
||||
const primaryRaw = await primary._call({ query, top: perToolTop });
|
||||
const primaryDocs = this._parseDocs(primaryRaw);
|
||||
|
||||
logger.info('[woodland-ai-search-general] Primary woodland hits', { count: primaryDocs.length });
|
||||
|
||||
if ((primaryDocs?.length || 0) >= minHits) {
|
||||
// Return best N uniques from woodland only
|
||||
const out = [];
|
||||
const seen = new Set();
|
||||
for (const d of primaryDocs) {
|
||||
const k = this._keyOf(d);
|
||||
if (seen.has(k)) continue;
|
||||
seen.add(k);
|
||||
out.push({ ...d, source_tool: d?.source_tool || 'woodland-ai-search' });
|
||||
if (out.length >= finalTop) break;
|
||||
}
|
||||
return JSON.stringify(out);
|
||||
}
|
||||
|
||||
// Phase 2: fallback to cases (exactly once)
|
||||
const cases = new WoodlandAISearchCases();
|
||||
const casesRaw = await cases._call({ query, top: perToolTop });
|
||||
const casesDocs = this._parseDocs(casesRaw);
|
||||
|
||||
logger.info('[woodland-ai-search-general] Fallback cases hits', { count: casesDocs.length });
|
||||
|
||||
// Merge woodland -> cases with dedupe
|
||||
const merged = [];
|
||||
const seen = new Set();
|
||||
for (const src of [primaryDocs, casesDocs]) {
|
||||
for (const d of src) {
|
||||
const k = this._keyOf(d);
|
||||
if (seen.has(k)) continue;
|
||||
seen.add(k);
|
||||
merged.push({ ...d, source_tool: d?.source_tool || (src === primaryDocs ? 'woodland-ai-search' : 'woodland-ai-search-cases') });
|
||||
if (merged.length >= finalTop) break;
|
||||
}
|
||||
if (merged.length >= finalTop) break;
|
||||
}
|
||||
|
||||
return JSON.stringify(merged);
|
||||
} catch (error) {
|
||||
logger.error('[woodland-ai-search-general] Failed', { error: error?.message || String(error) });
|
||||
return `AZURE_SEARCH_FAILED: ${error?.message || String(error)}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = WoodlandAISearchGeneral;
|
||||
|
494
api/app/clients/tools/structured/WoodlandAISearchTractor.js
Normal file
494
api/app/clients/tools/structured/WoodlandAISearchTractor.js
Normal file
|
@ -0,0 +1,494 @@
|
|||
// woodland-ai-search-tractor.js (single-index)
|
||||
const { z } = require('zod');
|
||||
const { Tool } = require('@langchain/core/tools');
|
||||
const { SearchClient, AzureKeyCredential } = require('@azure/search-documents');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
class WoodlandAISearchTractor extends Tool {
|
||||
static DEFAULT_API_VERSION = '2024-07-01';
|
||||
static DEFAULT_TOP = 9;
|
||||
static DEFAULT_SELECT = 'id,title,content,url';
|
||||
|
||||
_env(v, fallback) {
|
||||
return v ?? fallback;
|
||||
}
|
||||
|
||||
constructor(fields = {}) {
|
||||
super();
|
||||
this.name = 'woodland-ai-search-tractor';
|
||||
this.description = "Use the 'woodland-ai-search-tractor' tool to retrieve search results from the Tractor Azure AI Search index";
|
||||
|
||||
this.schema = z.object({
|
||||
query: z.string().describe('Search word or phrase for Tractor Azure AI Search'),
|
||||
top: z.number().int().positive().optional(),
|
||||
});
|
||||
|
||||
// Shared endpoint + key
|
||||
this.serviceEndpoint = this._env(
|
||||
fields.AZURE_AI_SEARCH_SERVICE_ENDPOINT,
|
||||
process.env.AZURE_AI_SEARCH_SERVICE_ENDPOINT,
|
||||
);
|
||||
this.apiKey = this._env(fields.AZURE_AI_SEARCH_API_KEY, process.env.AZURE_AI_SEARCH_API_KEY);
|
||||
|
||||
// Single Tractor index name (supports multiple possible env names, falls back to generic index name)
|
||||
this.indexName =
|
||||
this._env(fields.AZURE_AI_SEARCH_TRACTOR_INDEX, process.env.AZURE_AI_SEARCH_TRACTOR_INDEX) ||
|
||||
this._env(fields.AZURE_AI_SEARCH_TRACTOR_INDEX_NAME, process.env.AZURE_AI_SEARCH_TRACTOR_INDEX_NAME) ||
|
||||
this._env(fields.AZURE_AI_SEARCH_INDEX_NAME, process.env.AZURE_AI_SEARCH_INDEX_NAME);
|
||||
|
||||
if (!this.serviceEndpoint || !this.apiKey || !this.indexName) {
|
||||
throw new Error(
|
||||
'Missing Azure AI Search envs: AZURE_AI_SEARCH_SERVICE_ENDPOINT, AZURE_AI_SEARCH_API_KEY, and Tractor index (AZURE_AI_SEARCH_TRACTOR_INDEX or AZURE_AI_SEARCH_INDEX_NAME).',
|
||||
);
|
||||
}
|
||||
|
||||
// Optional API version
|
||||
this.apiVersion = this._env(
|
||||
fields.AZURE_AI_SEARCH_API_VERSION,
|
||||
process.env.AZURE_AI_SEARCH_API_VERSION || WoodlandAISearchTractor.DEFAULT_API_VERSION,
|
||||
);
|
||||
|
||||
// Defaults
|
||||
this.top = WoodlandAISearchTractor.DEFAULT_TOP;
|
||||
this.select = WoodlandAISearchTractor.DEFAULT_SELECT.split(',').map((s) => s.trim());
|
||||
|
||||
// Search/semantic options
|
||||
this.searchFields = (() => {
|
||||
// Prefer tractor-specific override, else global override
|
||||
const v =
|
||||
this._env(
|
||||
fields.AZURE_AI_SEARCH_TRACTOR_SEARCH_FIELDS,
|
||||
process.env.AZURE_AI_SEARCH_TRACTOR_SEARCH_FIELDS,
|
||||
) || this._env(fields.AZURE_AI_SEARCH_SEARCH_FIELDS, process.env.AZURE_AI_SEARCH_SEARCH_FIELDS);
|
||||
if (v) return String(v).split(',').map((s) => s.trim()).filter(Boolean);
|
||||
// Keep to known searchable fields in the Tractors index
|
||||
return ['title', 'content', 'mda_instructions', 'hitch_instructions'];
|
||||
})();
|
||||
this.semanticConfiguration = this._env(
|
||||
fields.AZURE_AI_SEARCH_SEMANTIC_CONFIGURATION,
|
||||
process.env.AZURE_AI_SEARCH_SEMANTIC_CONFIGURATION || 'sem1',
|
||||
);
|
||||
this.queryLanguage = this._env(
|
||||
fields.AZURE_AI_SEARCH_QUERY_LANGUAGE,
|
||||
process.env.AZURE_AI_SEARCH_QUERY_LANGUAGE || 'en-us',
|
||||
);
|
||||
this.scoringProfile = this._env(
|
||||
fields.AZURE_AI_SEARCH_SCORING_PROFILE,
|
||||
process.env.AZURE_AI_SEARCH_SCORING_PROFILE,
|
||||
);
|
||||
this.returnAllFields = String(
|
||||
this._env(
|
||||
fields.AZURE_AI_SEARCH_RETURN_ALL_FIELDS,
|
||||
process.env.AZURE_AI_SEARCH_RETURN_ALL_FIELDS || 'true',
|
||||
),
|
||||
)
|
||||
.toLowerCase()
|
||||
.trim() === 'true';
|
||||
|
||||
// Initialize SearchClient
|
||||
const credential = new AzureKeyCredential(this.apiKey);
|
||||
this.client = new SearchClient(this.serviceEndpoint, this.indexName, credential, {
|
||||
apiVersion: this.apiVersion,
|
||||
});
|
||||
|
||||
logger.info('[woodland-ai-search-tractor] Initialized', {
|
||||
endpoint: this.serviceEndpoint,
|
||||
apiVersion: this.apiVersion,
|
||||
index: this.indexName,
|
||||
select: this.select,
|
||||
searchFields: this.searchFields,
|
||||
semanticConfiguration: this.semanticConfiguration,
|
||||
queryLanguage: this.queryLanguage,
|
||||
scoringProfile: this.scoringProfile,
|
||||
});
|
||||
}
|
||||
|
||||
_keyOf(d) {
|
||||
return d?.url || d?.id || d?.record_id || d?.key || JSON.stringify(d);
|
||||
}
|
||||
|
||||
_andFilter(a, b) {
|
||||
if (!a && !b) return undefined;
|
||||
if (!a) return b;
|
||||
if (!b) return a;
|
||||
return `(${a}) and (${b})`;
|
||||
}
|
||||
|
||||
_escapeLiteral(v) {
|
||||
return String(v).replace(/'/g, "''");
|
||||
}
|
||||
|
||||
_provenance(d) {
|
||||
try {
|
||||
const url = (typeof d?.url === 'string' && d.url) || '';
|
||||
const host = url ? new URL(url).hostname : undefined;
|
||||
return { url: url || undefined, host, site: d?.site, page_type: d?.page_type };
|
||||
} catch (_) {
|
||||
return { site: d?.site, page_type: d?.page_type };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize tractor compatibility-related fields for downstream rendering.
|
||||
* Does not change original values; attaches a new `normalized_compat` object.
|
||||
*/
|
||||
_extractCompatFromText(text, tags) {
|
||||
try {
|
||||
const out = new Set();
|
||||
const addMany = (arr) => arr.forEach((s) => {
|
||||
const v = String(s).trim();
|
||||
if (v) out.add(v);
|
||||
});
|
||||
const t = (text || '').toString();
|
||||
// Common patterns: "compatible with X, Y and Z", "fits: X; Y; Z", "models: X, Y"
|
||||
const patterns = [
|
||||
/compatible\s+with\s*[:\-]?\s*([^\n\.]+)/gi,
|
||||
/fits\s*[:\-]?\s*([^\n\.]+)/gi,
|
||||
/models?\s*[:\-]?\s*([^\n\.]+)/gi,
|
||||
/supported\s+models?\s*[:\-]?\s*([^\n\.]+)/gi,
|
||||
];
|
||||
for (const re of patterns) {
|
||||
let m;
|
||||
while ((m = re.exec(t)) !== null) {
|
||||
const list = (m[1] || '')
|
||||
.replace(/\band\b/gi, ',')
|
||||
.split(/[;,]/)
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
addMany(list);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(tags)) {
|
||||
// Heuristic: tags that look like model/series names (contain letters/numbers/dashes)
|
||||
const tagModels = tags
|
||||
.map((x) => String(x).trim())
|
||||
.filter((x) => /[A-Za-z0-9]/.test(x) && x.length <= 40);
|
||||
addMany(tagModels);
|
||||
}
|
||||
return out.size ? Array.from(out) : undefined;
|
||||
} catch (_) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
_normalizeDoc(d) {
|
||||
const bool = (v) => (typeof v === 'boolean' ? v : undefined);
|
||||
const str = (v) => (v == null ? undefined : String(v));
|
||||
const list = (v) => (Array.isArray(v) ? v.filter(Boolean).map(String) : undefined);
|
||||
|
||||
const tractor = [d?.tractor_make, d?.tractor_model, d?.tractor_deck_size]
|
||||
.filter((x) => x != null && String(x).trim().length > 0)
|
||||
.join(' ')
|
||||
.trim() || undefined;
|
||||
|
||||
const normalized = {
|
||||
tractor, // e.g., "AMF 836 36"
|
||||
kit_or_assembly: str(d?.title) || str(d?.group_name),
|
||||
deck_opening_measurements_required: bool(d?.need_deck_open_measurements),
|
||||
mda_pre_cut: bool(d?.is_boot_pre_cut),
|
||||
customer_drilling_required: bool(d?.need_to_drill_deck),
|
||||
exhaust_deflection_needed: bool(d?.need_to_deflect_mower),
|
||||
compatible_with_large_rakes: bool(d?.can_connect_to_large_rakes),
|
||||
aftermarket: {
|
||||
mda: str(d?.ammda_sku) || str(d?.mda_sku),
|
||||
hitch: str(d?.amhitch_sku) || str(d?.hitch_sku),
|
||||
hose: str(d?.amhose_sku) || str(d?.hose_sku),
|
||||
upgrade_hose: str(d?.amupgradehose_sku) || str(d?.upgradehose_sku),
|
||||
rubber_collar: str(d?.rubbercollar_sku),
|
||||
},
|
||||
compatible_with:
|
||||
list(d?.compatible_models) ||
|
||||
list(d?.compatible_series) ||
|
||||
this._extractCompatFromText(d?.content, d?.tags) ||
|
||||
undefined,
|
||||
notes: str(d?.content),
|
||||
picture_thumbnail_url: str(d?.picture_thumbnail_url),
|
||||
tags: list(d?.tags),
|
||||
provenance: this._provenance(d),
|
||||
};
|
||||
|
||||
return { ...d, normalized_compat: normalized };
|
||||
}
|
||||
|
||||
_sanitizeSearchOptions(opts) {
|
||||
const clean = { ...opts };
|
||||
const asStr = (v) => (typeof v === 'string' ? v.toLowerCase() : undefined);
|
||||
const answers = asStr(clean.answers);
|
||||
if (answers !== 'extractive' && answers !== 'none') delete clean.answers;
|
||||
const captions = asStr(clean.captions);
|
||||
if (captions !== 'extractive' && captions !== 'none') delete clean.captions;
|
||||
const speller = asStr(clean.speller);
|
||||
if (speller !== 'lexicon' && speller !== 'simple' && speller !== 'none') delete clean.speller;
|
||||
return clean;
|
||||
}
|
||||
|
||||
async _safeSearch(query, options) {
|
||||
const run = async (opts) => {
|
||||
const send = this._sanitizeSearchOptions(opts);
|
||||
logger.debug('[woodland-ai-search-tractor] Sending request', {
|
||||
query,
|
||||
options: JSON.stringify(send, null, 2),
|
||||
});
|
||||
const rs = await this.client.search(query, send);
|
||||
const items = [];
|
||||
for await (const r of rs.results) items.push(r.document);
|
||||
logger.debug('[woodland-ai-search-tractor] Received response', {
|
||||
count: items.length,
|
||||
sample: items.slice(0, 2),
|
||||
});
|
||||
return items;
|
||||
};
|
||||
|
||||
let attempt = 0;
|
||||
let opts = { ...options };
|
||||
let lastErr;
|
||||
let droppedSearchFields = false;
|
||||
while (attempt < 3) {
|
||||
try {
|
||||
const docs = await run(opts);
|
||||
return { docs, retried: attempt > 0 };
|
||||
} catch (err) {
|
||||
lastErr = err;
|
||||
attempt += 1;
|
||||
const msg = (err && (err.message || String(err))) || '';
|
||||
logger.warn('[woodland-ai-search-tractor] Search failed', { attempt, msg });
|
||||
|
||||
const sanitized = { ...opts };
|
||||
let changed = false;
|
||||
|
||||
// Remove orderBy for semantic queries (Azure restriction)
|
||||
if (/orderby/i.test(msg) && String(sanitized.queryType).toLowerCase() === 'semantic') {
|
||||
if (sanitized.orderBy) {
|
||||
delete sanitized.orderBy;
|
||||
changed = true;
|
||||
logger.info('[woodland-ai-search-tractor] Removing orderBy for semantic query and retrying');
|
||||
}
|
||||
}
|
||||
|
||||
// Strip unknown fields from select/searchFields; drop filter if unknown field appears there
|
||||
const unknownFieldRegex = /Unknown field '([^']+)'/gi;
|
||||
const toRemove = [];
|
||||
let m;
|
||||
while ((m = unknownFieldRegex.exec(msg)) !== null) {
|
||||
const fld = (m[1] || '').trim();
|
||||
if (fld) toRemove.push(fld);
|
||||
}
|
||||
|
||||
if (toRemove.length > 0) {
|
||||
if (Array.isArray(sanitized.select)) {
|
||||
const before = sanitized.select.length;
|
||||
sanitized.select = sanitized.select.filter((f) => !toRemove.includes(f));
|
||||
if (sanitized.select.length === 0) delete sanitized.select;
|
||||
if (sanitized.select?.length !== before) changed = true;
|
||||
}
|
||||
if (Array.isArray(sanitized.searchFields)) {
|
||||
const before = sanitized.searchFields.length;
|
||||
sanitized.searchFields = sanitized.searchFields.filter((f) => !toRemove.includes(f));
|
||||
if (sanitized.searchFields.length === 0) delete sanitized.searchFields;
|
||||
if (sanitized.searchFields?.length !== before) changed = true;
|
||||
}
|
||||
if (!/search field list|select/i.test(msg)) {
|
||||
if (sanitized.filter) {
|
||||
delete sanitized.filter;
|
||||
changed = true;
|
||||
logger.info('[woodland-ai-search-tractor] Dropping filter due to unknown fields and retrying');
|
||||
}
|
||||
if (sanitized.orderBy) {
|
||||
delete sanitized.orderBy;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!changed && !droppedSearchFields && sanitized.searchFields) {
|
||||
delete sanitized.searchFields;
|
||||
droppedSearchFields = true;
|
||||
changed = true;
|
||||
logger.info('[woodland-ai-search-tractor] Dropping searchFields entirely and retrying');
|
||||
}
|
||||
|
||||
if (!changed) break;
|
||||
opts = sanitized;
|
||||
}
|
||||
}
|
||||
throw lastErr;
|
||||
}
|
||||
|
||||
async _tieredSearch(query, baseOptions) {
|
||||
const r = await this._safeSearch(query, baseOptions);
|
||||
return r.docs ?? [];
|
||||
}
|
||||
|
||||
// Intent and entity detection (lightweight heuristics)
|
||||
_detectIntent(query) {
|
||||
const q = (query || '').toString().toLowerCase();
|
||||
const containsAny = (arr) => arr.some((w) => q.includes(w));
|
||||
const yearRegex = /\b(19|20)\d{2}\b/;
|
||||
const partRegex = /\b\d{2}-[a-z0-9]{2}-[a-z0-9]{3,}\b/i;
|
||||
const partMatch = q.match(partRegex);
|
||||
const extracted = {};
|
||||
if (partMatch) extracted.partNumber = partMatch[0];
|
||||
|
||||
const partTypes = ['collector bag', 'impeller', 'hose', 'recoil starter', 'starter', 'boot plate', 'side tube'];
|
||||
for (const t of partTypes) {
|
||||
if (q.includes(t)) {
|
||||
extracted.partType = t === 'starter' ? 'recoil starter' : t;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (q.includes('commercial pro')) extracted.family = 'Commercial PRO';
|
||||
else if (q.includes('commander pro') || q.includes('commander')) extracted.family = 'Commander Pro';
|
||||
else if (q.includes('standard complete platinum') || q.includes('platinum')) extracted.family = 'Standard Complete Platinum';
|
||||
else if (q.includes('classic')) extracted.family = 'Classic';
|
||||
|
||||
if (containsAny(['promotion', 'sale', 'discount', 'coupon', 'financing'])) extracted.wantsPromo = true;
|
||||
|
||||
if (
|
||||
partMatch ||
|
||||
containsAny(['part', 'replacement', 'buy', 'order', 'sku', 'view/buy', 'add to cart', 'price', 'bag', 'hose', 'clamp'])
|
||||
) {
|
||||
return { intent: 'parts', extracted };
|
||||
}
|
||||
if (containsAny(['engine', 'fit', 'fits', 'fitment', 'compatible', 'compatibility', 'which engine', 'used in']) || yearRegex.test(q)) {
|
||||
return { intent: 'compatibility', extracted };
|
||||
}
|
||||
if (containsAny(['how to', 'install', 'installation', 'guide', 'manual', 'troubleshoot', 'troubleshooting', 'winterization', 'sop'])) {
|
||||
return { intent: 'sop', extracted };
|
||||
}
|
||||
if (containsAny(['compare', 'benefits', 'why choose', 'financing', 'promotion', 'warranty'])) {
|
||||
return { intent: 'marketing', extracted };
|
||||
}
|
||||
return { intent: 'general', extracted };
|
||||
}
|
||||
|
||||
// Per-intent options for the single Tractor index.
|
||||
_optionsForIntent(intent, extracted = {}) {
|
||||
const maybe = (o, sel) => (this.returnAllFields ? o : { ...o, select: sel });
|
||||
const pn = extracted.partNumber || '';
|
||||
const skuFields = [
|
||||
'mda_sku',
|
||||
'ammda_sku',
|
||||
'hitch_sku',
|
||||
'amhitch_sku',
|
||||
'rubbercollar_sku',
|
||||
'hose_sku',
|
||||
'amhose_sku',
|
||||
'upgradehose_sku',
|
||||
'amupgradehose_sku',
|
||||
];
|
||||
|
||||
const baseSelect = [
|
||||
'id',
|
||||
'title',
|
||||
'content',
|
||||
'tractor_make',
|
||||
'tractor_model',
|
||||
'tractor_deck_size',
|
||||
'group_name',
|
||||
'is_active',
|
||||
'mda_sku',
|
||||
'ammda_sku',
|
||||
'hitch_sku',
|
||||
'amhitch_sku',
|
||||
'rubbercollar_sku',
|
||||
'hose_sku',
|
||||
'amhose_sku',
|
||||
'upgradehose_sku',
|
||||
'amupgradehose_sku',
|
||||
'is_boot_pre_cut',
|
||||
'can_connect_to_large_rakes',
|
||||
'need_to_drill_deck',
|
||||
'need_to_deflect_mower',
|
||||
'need_deck_open_measurements',
|
||||
'category',
|
||||
'picture_thumbnail_url',
|
||||
'tags',
|
||||
'mda_instructions',
|
||||
'hitch_instructions',
|
||||
];
|
||||
|
||||
if (intent === 'parts') {
|
||||
let filter;
|
||||
if (pn) {
|
||||
const eqs = skuFields.map((f) => `${f} eq '${this._escapeLiteral(pn)}'`).join(' or ');
|
||||
filter = eqs || undefined;
|
||||
}
|
||||
return maybe({ filter, searchFields: this.searchFields }, baseSelect);
|
||||
}
|
||||
|
||||
if (intent === 'compatibility') {
|
||||
// Optionally, we could filter by group_name if a family-like term was extracted
|
||||
let filter;
|
||||
if (extracted.family) {
|
||||
filter = `group_name eq '${this._escapeLiteral(extracted.family)}'`;
|
||||
}
|
||||
return maybe({ filter, searchFields: this.searchFields }, baseSelect);
|
||||
}
|
||||
|
||||
if (intent === 'sop') {
|
||||
// Prioritize instructional text
|
||||
return maybe({ filter: undefined, searchFields: this.searchFields }, baseSelect);
|
||||
}
|
||||
|
||||
if (intent === 'marketing') {
|
||||
return maybe({ filter: undefined, searchFields: this.searchFields }, baseSelect);
|
||||
}
|
||||
|
||||
// general
|
||||
return maybe({ filter: undefined, searchFields: this.searchFields }, baseSelect);
|
||||
}
|
||||
|
||||
async _call(data) {
|
||||
const { query, top: topIn } = data;
|
||||
const finalTop = typeof topIn === 'number' && Number.isFinite(topIn) ? Math.max(1, Math.floor(topIn)) : this.top;
|
||||
|
||||
try {
|
||||
const inferredMode = (() => {
|
||||
const q = (query || '').toString();
|
||||
if (/".+"/.test(q) || /\b(AND|OR|NOT)\b/i.test(q)) return 'all';
|
||||
return 'any';
|
||||
})();
|
||||
|
||||
const baseOptions = {
|
||||
queryType: 'semantic',
|
||||
searchMode: inferredMode,
|
||||
top: finalTop,
|
||||
semanticSearchOptions: {
|
||||
configurationName: this.semanticConfiguration,
|
||||
queryLanguage: this.queryLanguage,
|
||||
},
|
||||
answers: 'extractive',
|
||||
captions: 'extractive',
|
||||
speller: 'lexicon',
|
||||
select: this.returnAllFields ? undefined : this.select,
|
||||
};
|
||||
if (this.scoringProfile) baseOptions.scoringProfile = this.scoringProfile;
|
||||
|
||||
const { intent, extracted } = this._detectIntent(query);
|
||||
const intentOptions = this._optionsForIntent(intent, extracted);
|
||||
const options = { ...baseOptions, ...intentOptions };
|
||||
|
||||
// orderBy not supported with semantic ranking
|
||||
if (String(options.queryType).toLowerCase() === 'semantic' && options.orderBy) {
|
||||
delete options.orderBy;
|
||||
}
|
||||
|
||||
let docs = await this._tieredSearch(query, options);
|
||||
// Attach normalized compatibility projection and provenance to each doc
|
||||
if (Array.isArray(docs)) {
|
||||
docs = docs.map((d) => (d ? this._normalizeDoc(d) : d));
|
||||
}
|
||||
logger.info('[woodland-ai-search-tractor] Query done', { count: Array.isArray(docs) ? docs.length : 0 });
|
||||
|
||||
return JSON.stringify(docs || []);
|
||||
} catch (error) {
|
||||
logger.error('[woodland-ai-search-tractor] Azure AI Search request failed', {
|
||||
error: error?.message || String(error),
|
||||
});
|
||||
const msg = (error && (error.message || String(error))) || 'Unknown error';
|
||||
return `AZURE_SEARCH_FAILED: ${msg}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = WoodlandAISearchTractor;
|
|
@ -23,6 +23,10 @@ const {
|
|||
StructuredSD,
|
||||
StructuredACS,
|
||||
StructuredWPPACS,
|
||||
StructuredWPPACSTractor,
|
||||
StructuredWPPACSCases,
|
||||
StructuredWPPACSAll,
|
||||
StructuredWPPACSGeneral,
|
||||
TraversaalSearch,
|
||||
StructuredWolfram,
|
||||
createYouTubeTools,
|
||||
|
@ -171,6 +175,10 @@ const loadTools = async ({
|
|||
'stable-diffusion': StructuredSD,
|
||||
'azure-ai-search': StructuredACS,
|
||||
'woodland-ai-search': StructuredWPPACS,
|
||||
'woodland-ai-search-tractor': StructuredWPPACSTractor,
|
||||
'woodland-ai-search-cases': StructuredWPPACSCases,
|
||||
'woodland-ai-search-all': StructuredWPPACSAll,
|
||||
'woodland-ai-search-general': StructuredWPPACSGeneral,
|
||||
traversaal_search: TraversaalSearch,
|
||||
tavily_search_results_json: TavilySearchResults,
|
||||
};
|
||||
|
|
|
@ -1,53 +1,29 @@
|
|||
// Strict citation builder for Woodland results
|
||||
// - Uses only URL fields present in the payload (never constructs URLs)
|
||||
// - Applies allow-list filtering for hosts
|
||||
// Simple citation builder for Woodland results
|
||||
// Returns URLs exactly as present in the search document payload.
|
||||
// Optional: if WOODLAND_CITATIONS_URL_ALLOWLIST is set (comma-separated hosts),
|
||||
// only URLs whose hostname matches the allowlist (or its subdomains) are returned.
|
||||
|
||||
const allowList = new Set([
|
||||
'airtable.com',
|
||||
// Base domain covers website and subdomains such as support.cyclonerake.com
|
||||
'cyclonerake.com',
|
||||
// Kept for back-compat; optional explicit subdomain entry
|
||||
'support.cyclonerake.com',
|
||||
]);
|
||||
const rawAllow = (process.env.WOODLAND_CITATIONS_URL_ALLOWLIST || '').split(',').map(s => s.trim()).filter(Boolean);
|
||||
const allowSet = new Set(rawAllow);
|
||||
|
||||
function isAllowedUrl(u) {
|
||||
function isAllowed(u) {
|
||||
if (allowSet.size === 0) return true; // pass-through when no allowlist configured
|
||||
try {
|
||||
const url = new URL(u);
|
||||
const proto = url.protocol.toLowerCase();
|
||||
if (proto !== 'http:' && proto !== 'https:') return false;
|
||||
const host = url.hostname.toLowerCase();
|
||||
for (const d of allowList) {
|
||||
if (host === d || host.endsWith('.' + d)) return true;
|
||||
for (const d of allowSet) {
|
||||
const dd = d.toLowerCase();
|
||||
if (host === dd || host.endsWith('.' + dd)) return true;
|
||||
}
|
||||
return false;
|
||||
} catch (_) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Extracts the first allowed URL found in a block of text
|
||||
function extractAllowedUrl(text) {
|
||||
if (typeof text !== 'string' || !text) return undefined;
|
||||
// Basic http/https URL matcher
|
||||
const urlRegex = /(https?:\/\/[^\s)]+)[)\]\s]?/gi;
|
||||
let match;
|
||||
while ((match = urlRegex.exec(text)) !== null) {
|
||||
const candidate = match[1];
|
||||
if (isAllowedUrl(candidate)) return candidate;
|
||||
}
|
||||
return undefined;
|
||||
} catch (_) { return false; }
|
||||
}
|
||||
|
||||
function urlFromHit(hit) {
|
||||
const u = hit?.url;
|
||||
if (typeof u === 'string' && u && isAllowedUrl(u)) return u;
|
||||
// Fallback: scan chunk/text/snippet for the first allowed URL
|
||||
return (
|
||||
extractAllowedUrl(hit?.chunk) ||
|
||||
extractAllowedUrl(hit?.text) ||
|
||||
extractAllowedUrl(hit?.snippet) ||
|
||||
undefined
|
||||
);
|
||||
if (typeof u !== 'string' || !u) return undefined;
|
||||
if (!isAllowed(u)) return undefined;
|
||||
return u;
|
||||
}
|
||||
|
||||
function shortSummary(hit) {
|
||||
|
@ -102,8 +78,6 @@ function buildCitations({ airtable = [], cyclopedia = [], website = [] }) {
|
|||
}
|
||||
|
||||
module.exports = {
|
||||
isAllowedUrl,
|
||||
extractAllowedUrl,
|
||||
urlFromHit,
|
||||
shortSummary,
|
||||
classifySource,
|
||||
|
|
|
@ -17,7 +17,7 @@ interface:
|
|||
runCode: false
|
||||
webSearch: false
|
||||
fileSearch: false
|
||||
modelSelect: false
|
||||
modelSelect: true
|
||||
sidePanel : true # Enable/disable the side panel (default: false)
|
||||
# MCP Servers UI configuration
|
||||
mcpServers:
|
||||
|
@ -32,6 +32,11 @@ interface:
|
|||
# Temporary chat retention period in hours (default: 720, min: 1, max: 8760)
|
||||
# temporaryChatRetention: 1
|
||||
|
||||
# Limit the selector to only show the Agents endpoint
|
||||
#modelSpecs:
|
||||
#addedEndpoints:
|
||||
#- agents
|
||||
|
||||
# Example Cloudflare turnstile (optional)
|
||||
#turnstile:
|
||||
# siteKey: "your-site-key-here"
|
||||
|
@ -149,6 +154,7 @@ endpoints:
|
|||
disableBuilder: false
|
||||
# Limit global Agent capabilities; individual Agents can still narrow these down
|
||||
capabilities: ["actions", "tools"]
|
||||
minRelevanceScore: 0.7
|
||||
|
||||
# fileConfig:
|
||||
# endpoints:
|
||||
|
@ -182,7 +188,7 @@ endpoints:
|
|||
|
||||
# Memory configuration for user memories
|
||||
memory:
|
||||
disabled: false
|
||||
disabled: true
|
||||
validKeys: ["preferences", "work_info", "personal_info", "skills", "interests", "context"]
|
||||
tokenLimit: 10000
|
||||
personalize: true
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue