tracks/vendor/selenium/xpath/xpath.js

2182 lines
58 KiB
JavaScript

// Copyright 2005 Google Inc.
// All Rights Reserved
//
// An XPath parser and evaluator written in JavaScript. The
// implementation is complete except for functions handling
// namespaces.
//
// Reference: [XPATH] XPath Specification
// <http://www.w3.org/TR/1999/REC-xpath-19991116>.
//
//
// The API of the parser has several parts:
//
// 1. The parser function xpathParse() that takes a string and returns
// an expession object.
//
// 2. The expression object that has an evaluate() method to evaluate the
// XPath expression it represents. (It is actually a hierarchy of
// objects that resembles the parse tree, but an application will call
// evaluate() only on the top node of this hierarchy.)
//
// 3. The context object that is passed as an argument to the evaluate()
// method, which represents the DOM context in which the expression is
// evaluated.
//
// 4. The value object that is returned from evaluate() and represents
// values of the different types that are defined by XPath (number,
// string, boolean, and node-set), and allows to convert between them.
//
// These parts are near the top of the file, the functions and data
// that are used internally follow after them.
//
//
// TODO(mesch): add jsdoc comments. Use more coherent naming.
//
//
// Author: Steffen Meschkat <mesch@google.com>
// The entry point for the parser.
//
// @param expr a string that contains an XPath expression.
// @return an expression object that can be evaluated with an
// expression context.
function xpathParse(expr) {
if (xpathdebug) {
Log.write('XPath parse ' + expr);
}
xpathParseInit();
var cached = xpathCacheLookup(expr);
if (cached) {
if (xpathdebug) {
Log.write(' ... cached');
}
return cached;
}
// Optimize for a few common cases: simple attribute node tests
// (@id), simple element node tests (page), variable references
// ($address), numbers (4), multi-step path expressions where each
// step is a plain element node test
// (page/overlay/locations/location).
if (expr.match(/^(\$|@)?\w+$/i)) {
var ret = makeSimpleExpr(expr);
xpathParseCache[expr] = ret;
if (xpathdebug) {
Log.write(' ... simple');
}
return ret;
}
if (expr.match(/^\w+(\/\w+)*$/i)) {
var ret = makeSimpleExpr2(expr);
xpathParseCache[expr] = ret;
if (xpathdebug) {
Log.write(' ... simple 2');
}
return ret;
}
var cachekey = expr; // expr is modified during parse
if (xpathdebug) {
Timer.start('XPath parse', cachekey);
}
var stack = [];
var ahead = null;
var previous = null;
var done = false;
var parse_count = 0;
var lexer_count = 0;
var reduce_count = 0;
while (!done) {
parse_count++;
expr = expr.replace(/^\s*/, '');
previous = ahead;
ahead = null;
var rule = null;
var match = '';
for (var i = 0; i < xpathTokenRules.length; ++i) {
var result = xpathTokenRules[i].re.exec(expr);
lexer_count++;
if (result && result.length > 0 && result[0].length > match.length) {
rule = xpathTokenRules[i];
match = result[0];
break;
}
}
// Special case: allow operator keywords to be element and
// variable names.
// NOTE(mesch): The parser resolves conflicts by looking ahead,
// and this is the only case where we look back to
// disambiguate. So this is indeed something different, and
// looking back is usually done in the lexer (via states in the
// general case, called "start conditions" in flex(1)). Also,the
// conflict resolution in the parser is not as robust as it could
// be, so I'd like to keep as much off the parser as possible (all
// these precedence values should be computed from the grammar
// rules and possibly associativity declarations, as in bison(1),
// and not explicitly set.
if (rule &&
(rule == TOK_DIV ||
rule == TOK_MOD ||
rule == TOK_AND ||
rule == TOK_OR) &&
(!previous ||
previous.tag == TOK_AT ||
previous.tag == TOK_DSLASH ||
previous.tag == TOK_SLASH ||
previous.tag == TOK_AXIS ||
previous.tag == TOK_DOLLAR)) {
rule = TOK_QNAME;
}
if (rule) {
expr = expr.substr(match.length);
if (xpathdebug) {
Log.write('token: ' + match + ' -- ' + rule.label);
}
ahead = {
tag: rule,
match: match,
prec: rule.prec ? rule.prec : 0, // || 0 is removed by the compiler
expr: makeTokenExpr(match)
};
} else {
if (xpathdebug) {
Log.write('DONE');
}
done = true;
}
while (xpathReduce(stack, ahead)) {
reduce_count++;
if (xpathdebug) {
Log.write('stack: ' + stackToString(stack));
}
}
}
if (xpathdebug) {
Log.write(stackToString(stack));
}
if (stack.length != 1) {
throw 'XPath parse error ' + cachekey + ':\n' + stackToString(stack);
}
var result = stack[0].expr;
xpathParseCache[cachekey] = result;
if (xpathdebug) {
Timer.end('XPath parse', cachekey);
}
if (xpathdebug) {
Log.write('XPath parse: ' + parse_count + ' / ' +
lexer_count + ' / ' + reduce_count);
}
return result;
}
var xpathParseCache = {};
function xpathCacheLookup(expr) {
return xpathParseCache[expr];
}
function xpathReduce(stack, ahead) {
var cand = null;
if (stack.length > 0) {
var top = stack[stack.length-1];
var ruleset = xpathRules[top.tag.key];
if (ruleset) {
for (var i = 0; i < ruleset.length; ++i) {
var rule = ruleset[i];
var match = xpathMatchStack(stack, rule[1]);
if (match.length) {
cand = {
tag: rule[0],
rule: rule,
match: match
};
cand.prec = xpathGrammarPrecedence(cand);
break;
}
}
}
}
var ret;
if (cand && (!ahead || cand.prec > ahead.prec ||
(ahead.tag.left && cand.prec >= ahead.prec))) {
for (var i = 0; i < cand.match.matchlength; ++i) {
stack.pop();
}
if (xpathdebug) {
Log.write('reduce ' + cand.tag.label + ' ' + cand.prec +
' ahead ' + (ahead ? ahead.tag.label + ' ' + ahead.prec +
(ahead.tag.left ? ' left' : '')
: ' none '));
}
var matchexpr = mapExpr(cand.match, function(m) { return m.expr; });
cand.expr = cand.rule[3].apply(null, matchexpr);
stack.push(cand);
ret = true;
} else {
if (ahead) {
if (xpathdebug) {
Log.write('shift ' + ahead.tag.label + ' ' + ahead.prec +
(ahead.tag.left ? ' left' : '') +
' over ' + (cand ? cand.tag.label + ' ' +
cand.prec : ' none'));
}
stack.push(ahead);
}
ret = false;
}
return ret;
}
function xpathMatchStack(stack, pattern) {
// NOTE(mesch): The stack matches for variable cardinality are
// greedy but don't do backtracking. This would be an issue only
// with rules of the form A* A, i.e. with an element with variable
// cardinality followed by the same element. Since that doesn't
// occur in the grammar at hand, all matches on the stack are
// unambiguous.
var S = stack.length;
var P = pattern.length;
var p, s;
var match = [];
match.matchlength = 0;
var ds = 0;
for (p = P - 1, s = S - 1; p >= 0 && s >= 0; --p, s -= ds) {
ds = 0;
var qmatch = [];
if (pattern[p] == Q_MM) {
p -= 1;
match.push(qmatch);
while (s - ds >= 0 && stack[s - ds].tag == pattern[p]) {
qmatch.push(stack[s - ds]);
ds += 1;
match.matchlength += 1;
}
} else if (pattern[p] == Q_01) {
p -= 1;
match.push(qmatch);
while (s - ds >= 0 && ds < 2 && stack[s - ds].tag == pattern[p]) {
qmatch.push(stack[s - ds]);
ds += 1;
match.matchlength += 1;
}
} else if (pattern[p] == Q_1M) {
p -= 1;
match.push(qmatch);
if (stack[s].tag == pattern[p]) {
while (s - ds >= 0 && stack[s - ds].tag == pattern[p]) {
qmatch.push(stack[s - ds]);
ds += 1;
match.matchlength += 1;
}
} else {
return [];
}
} else if (stack[s].tag == pattern[p]) {
match.push(stack[s]);
ds += 1;
match.matchlength += 1;
} else {
return [];
}
reverseInplace(qmatch);
qmatch.expr = mapExpr(qmatch, function(m) { return m.expr; });
}
reverseInplace(match);
if (p == -1) {
return match;
} else {
return [];
}
}
function xpathTokenPrecedence(tag) {
return tag.prec || 2;
}
function xpathGrammarPrecedence(frame) {
var ret = 0;
if (frame.rule) { /* normal reduce */
if (frame.rule.length >= 3 && frame.rule[2] >= 0) {
ret = frame.rule[2];
} else {
for (var i = 0; i < frame.rule[1].length; ++i) {
var p = xpathTokenPrecedence(frame.rule[1][i]);
ret = Math.max(ret, p);
}
}
} else if (frame.tag) { /* TOKEN match */
ret = xpathTokenPrecedence(frame.tag);
} else if (frame.length) { /* Q_ match */
for (var j = 0; j < frame.length; ++j) {
var p = xpathGrammarPrecedence(frame[j]);
ret = Math.max(ret, p);
}
}
return ret;
}
function stackToString(stack) {
var ret = '';
for (var i = 0; i < stack.length; ++i) {
if (ret) {
ret += '\n';
}
ret += stack[i].tag.label;
}
return ret;
}
// XPath expression evaluation context. An XPath context consists of a
// DOM node, a list of DOM nodes that contains this node, a number
// that represents the position of the single node in the list, and a
// current set of variable bindings. (See XPath spec.)
//
// The interface of the expression context:
//
// Constructor -- gets the node, its position, the node set it
// belongs to, and a parent context as arguments. The parent context
// is used to implement scoping rules for variables: if a variable
// is not found in the current context, it is looked for in the
// parent context, recursively. Except for node, all arguments have
// default values: default position is 0, default node set is the
// set that contains only the node, and the default parent is null.
//
// Notice that position starts at 0 at the outside interface;
// inside XPath expressions this shows up as position()=1.
//
// clone() -- creates a new context with the current context as
// parent. If passed as argument to clone(), the new context has a
// different node, position, or node set. What is not passed is
// inherited from the cloned context.
//
// setVariable(name, expr) -- binds given XPath expression to the
// name.
//
// getVariable(name) -- what the name says.
//
// setNode(node, position) -- sets the context to the new node and
// its corresponding position. Needed to implement scoping rules for
// variables in XPath. (A variable is visible to all subsequent
// siblings, not only to its children.)
function ExprContext(node, position, nodelist, parent) {
this.node = node;
this.position = position || 0;
this.nodelist = nodelist || [ node ];
this.variables = {};
this.parent = parent || null;
this.root = parent ? parent.root : node.ownerDocument;
}
ExprContext.prototype.clone = function(node, position, nodelist) {
return new
ExprContext(node || this.node,
typeof position != 'undefined' ? position : this.position,
nodelist || this.nodelist, this);
};
ExprContext.prototype.setVariable = function(name, value) {
this.variables[name] = value;
};
ExprContext.prototype.getVariable = function(name) {
if (typeof this.variables[name] != 'undefined') {
return this.variables[name];
} else if (this.parent) {
return this.parent.getVariable(name);
} else {
return null;
}
}
ExprContext.prototype.setNode = function(node, position) {
this.node = node;
this.position = position;
}
// XPath expression values. They are what XPath expressions evaluate
// to. Strangely, the different value types are not specified in the
// XPath syntax, but only in the semantics, so they don't show up as
// nonterminals in the grammar. Yet, some expressions are required to
// evaluate to particular types, and not every type can be coerced
// into every other type. Although the types of XPath values are
// similar to the types present in JavaScript, the type coercion rules
// are a bit peculiar, so we explicitly model XPath types instead of
// mapping them onto JavaScript types. (See XPath spec.)
//
// The four types are:
//
// StringValue
//
// NumberValue
//
// BooleanValue
//
// NodeSetValue
//
// The common interface of the value classes consists of methods that
// implement the XPath type coercion rules:
//
// stringValue() -- returns the value as a JavaScript String,
//
// numberValue() -- returns the value as a JavaScript Number,
//
// booleanValue() -- returns the value as a JavaScript Boolean,
//
// nodeSetValue() -- returns the value as a JavaScript Array of DOM
// Node objects.
//
function StringValue(value) {
this.value = value;
this.type = 'string';
}
StringValue.prototype.stringValue = function() {
return this.value;
}
StringValue.prototype.booleanValue = function() {
return this.value.length > 0;
}
StringValue.prototype.numberValue = function() {
return this.value - 0;
}
StringValue.prototype.nodeSetValue = function() {
throw this + ' ' + Error().stack;
}
function BooleanValue(value) {
this.value = value;
this.type = 'boolean';
}
BooleanValue.prototype.stringValue = function() {
return '' + this.value;
}
BooleanValue.prototype.booleanValue = function() {
return this.value;
}
BooleanValue.prototype.numberValue = function() {
return this.value ? 1 : 0;
}
BooleanValue.prototype.nodeSetValue = function() {
throw this + ' ' + Error().stack;
}
function NumberValue(value) {
this.value = value;
this.type = 'number';
}
NumberValue.prototype.stringValue = function() {
return '' + this.value;
}
NumberValue.prototype.booleanValue = function() {
return !!this.value;
}
NumberValue.prototype.numberValue = function() {
return this.value - 0;
}
NumberValue.prototype.nodeSetValue = function() {
throw this + ' ' + Error().stack;
}
function NodeSetValue(value) {
this.value = value;
this.type = 'node-set';
}
NodeSetValue.prototype.stringValue = function() {
if (this.value.length == 0) {
return '';
} else {
return xmlValue(this.value[0]);
}
}
NodeSetValue.prototype.booleanValue = function() {
return this.value.length > 0;
}
NodeSetValue.prototype.numberValue = function() {
return this.stringValue() - 0;
}
NodeSetValue.prototype.nodeSetValue = function() {
return this.value;
};
// XPath expressions. They are used as nodes in the parse tree and
// possess an evaluate() method to compute an XPath value given an XPath
// context. Expressions are returned from the parser. Teh set of
// expression classes closely mirrors the set of non terminal symbols
// in the grammar. Every non trivial nonterminal symbol has a
// corresponding expression class.
//
// The common expression interface consists of the following methods:
//
// evaluate(context) -- evaluates the expression, returns a value.
//
// toString() -- returns the XPath text representation of the
// expression (defined in xsltdebug.js).
//
// parseTree(indent) -- returns a parse tree representation of the
// expression (defined in xsltdebug.js).
function TokenExpr(m) {
this.value = m;
}
TokenExpr.prototype.evaluate = function() {
return new StringValue(this.value);
};
function LocationExpr() {
this.absolute = false;
this.steps = [];
}
LocationExpr.prototype.appendStep = function(s) {
this.steps.push(s);
}
LocationExpr.prototype.prependStep = function(s) {
var steps0 = this.steps;
this.steps = [ s ];
for (var i = 0; i < steps0.length; ++i) {
this.steps.push(steps0[i]);
}
};
LocationExpr.prototype.evaluate = function(ctx) {
var start;
if (this.absolute) {
start = ctx.root;
} else {
start = ctx.node;
}
var nodes = [];
xPathStep(nodes, this.steps, 0, start, ctx);
return new NodeSetValue(nodes);
};
function xPathStep(nodes, steps, step, input, ctx) {
var s = steps[step];
var ctx2 = ctx.clone(input);
var nodelist = s.evaluate(ctx2).nodeSetValue();
for (var i = 0; i < nodelist.length; ++i) {
if (step == steps.length - 1) {
nodes.push(nodelist[i]);
} else {
xPathStep(nodes, steps, step + 1, nodelist[i], ctx);
}
}
}
function StepExpr(axis, nodetest, predicate) {
this.axis = axis;
this.nodetest = nodetest;
this.predicate = predicate || [];
}
StepExpr.prototype.appendPredicate = function(p) {
this.predicate.push(p);
}
StepExpr.prototype.evaluate = function(ctx) {
var input = ctx.node;
var nodelist = [];
// NOTE(mesch): When this was a switch() statement, it didn't work
// in Safari/2.0. Not sure why though; it resulted in the JavaScript
// console output "undefined" (without any line number or so).
if (this.axis == xpathAxis.ANCESTOR_OR_SELF) {
nodelist.push(input);
for (var n = input.parentNode; n; n = input.parentNode) {
nodelist.push(n);
}
} else if (this.axis == xpathAxis.ANCESTOR) {
for (var n = input.parentNode; n; n = input.parentNode) {
nodelist.push(n);
}
} else if (this.axis == xpathAxis.ATTRIBUTE) {
copyArray(nodelist, input.attributes);
} else if (this.axis == xpathAxis.CHILD) {
copyArray(nodelist, input.childNodes);
} else if (this.axis == xpathAxis.DESCENDANT_OR_SELF) {
nodelist.push(input);
xpathCollectDescendants(nodelist, input);
} else if (this.axis == xpathAxis.DESCENDANT) {
xpathCollectDescendants(nodelist, input);
} else if (this.axis == xpathAxis.FOLLOWING) {
for (var n = input.parentNode; n; n = n.parentNode) {
for (var nn = n.nextSibling; nn; nn = nn.nextSibling) {
nodelist.push(nn);
xpathCollectDescendants(nodelist, nn);
}
}
} else if (this.axis == xpathAxis.FOLLOWING_SIBLING) {
for (var n = input.nextSibling; n; n = input.nextSibling) {
nodelist.push(n);
}
} else if (this.axis == xpathAxis.NAMESPACE) {
alert('not implemented: axis namespace');
} else if (this.axis == xpathAxis.PARENT) {
if (input.parentNode) {
nodelist.push(input.parentNode);
}
} else if (this.axis == xpathAxis.PRECEDING) {
for (var n = input.parentNode; n; n = n.parentNode) {
for (var nn = n.previousSibling; nn; nn = nn.previousSibling) {
nodelist.push(nn);
xpathCollectDescendantsReverse(nodelist, nn);
}
}
} else if (this.axis == xpathAxis.PRECEDING_SIBLING) {
for (var n = input.previousSibling; n; n = input.previousSibling) {
nodelist.push(n);
}
} else if (this.axis == xpathAxis.SELF) {
nodelist.push(input);
} else {
throw 'ERROR -- NO SUCH AXIS: ' + this.axis;
}
// process node test
var nodelist0 = nodelist;
nodelist = [];
for (var i = 0; i < nodelist0.length; ++i) {
var n = nodelist0[i];
if (this.nodetest.evaluate(ctx.clone(n, i, nodelist0)).booleanValue()) {
nodelist.push(n);
}
}
// process predicates
for (var i = 0; i < this.predicate.length; ++i) {
var nodelist0 = nodelist;
nodelist = [];
for (var ii = 0; ii < nodelist0.length; ++ii) {
var n = nodelist0[ii];
if (this.predicate[i].evaluate(ctx.clone(n, ii, nodelist0)).booleanValue()) {
nodelist.push(n);
}
}
}
return new NodeSetValue(nodelist);
};
function NodeTestAny() {
this.value = new BooleanValue(true);
}
NodeTestAny.prototype.evaluate = function(ctx) {
return this.value;
};
function NodeTestElement() {}
NodeTestElement.prototype.evaluate = function(ctx) {
return new BooleanValue(ctx.node.nodeType == DOM_ELEMENT_NODE);
}
function NodeTestText() {}
NodeTestText.prototype.evaluate = function(ctx) {
return new BooleanValue(ctx.node.nodeType == DOM_TEXT_NODE);
}
function NodeTestComment() {}
NodeTestComment.prototype.evaluate = function(ctx) {
return new BooleanValue(ctx.node.nodeType == DOM_COMMENT_NODE);
}
function NodeTestPI(target) {
this.target = target;
}
NodeTestPI.prototype.evaluate = function(ctx) {
return new
BooleanValue(ctx.node.nodeType == DOM_PROCESSING_INSTRUCTION_NODE &&
(!this.target || ctx.node.nodeName == this.target));
}
function NodeTestNC(nsprefix) {
this.regex = new RegExp("^" + nsprefix + ":");
this.nsprefix = nsprefix;
}
NodeTestNC.prototype.evaluate = function(ctx) {
var n = ctx.node;
return new BooleanValue(this.regex.match(n.nodeName));
}
function NodeTestName(name) {
this.name = name;
}
NodeTestName.prototype.evaluate = function(ctx) {
var n = ctx.node;
// NOTE (Patrick Lightbody): this change allows node selection to be case-insensitive
return new BooleanValue(n.nodeName.toUpperCase() == this.name.toUpperCase());
}
function PredicateExpr(expr) {
this.expr = expr;
}
PredicateExpr.prototype.evaluate = function(ctx) {
var v = this.expr.evaluate(ctx);
if (v.type == 'number') {
// NOTE(mesch): Internally, position is represented starting with
// 0, however in XPath position starts with 1. See functions
// position() and last().
return new BooleanValue(ctx.position == v.numberValue() - 1);
} else {
return new BooleanValue(v.booleanValue());
}
};
function FunctionCallExpr(name) {
this.name = name;
this.args = [];
}
FunctionCallExpr.prototype.appendArg = function(arg) {
this.args.push(arg);
};
FunctionCallExpr.prototype.evaluate = function(ctx) {
var fn = '' + this.name.value;
var f = this.xpathfunctions[fn];
if (f) {
return f.call(this, ctx);
} else {
Log.write('XPath NO SUCH FUNCTION ' + fn);
return new BooleanValue(false);
}
};
FunctionCallExpr.prototype.xpathfunctions = {
'last': function(ctx) {
assert(this.args.length == 0);
// NOTE(mesch): XPath position starts at 1.
return new NumberValue(ctx.nodelist.length);
},
'position': function(ctx) {
assert(this.args.length == 0);
// NOTE(mesch): XPath position starts at 1.
return new NumberValue(ctx.position + 1);
},
'count': function(ctx) {
assert(this.args.length == 1);
var v = this.args[0].evaluate(ctx);
return new NumberValue(v.nodeSetValue().length);
},
'id': function(ctx) {
assert(this.args.length == 1);
var e = this.args.evaluate(ctx);
var ret = [];
var ids;
if (e.type == 'node-set') {
ids = [];
for (var i = 0; i < e.length; ++i) {
var v = xmlValue(e[i]).split(/\s+/);
for (var ii = 0; ii < v.length; ++ii) {
ids.push(v[ii]);
}
}
} else {
ids = e.split(/\s+/);
}
var d = ctx.node.ownerDocument;
for (var i = 0; i < ids.length; ++i) {
var n = d.getElementById(ids[i]);
if (n) {
ret.push(n);
}
}
return new NodeSetValue(ret);
},
'local-name': function(ctx) {
alert('not implmented yet: XPath function local-name()');
},
'namespace-uri': function(ctx) {
alert('not implmented yet: XPath function namespace-uri()');
},
'name': function(ctx) {
assert(this.args.length == 1 || this.args.length == 0);
var n;
if (this.args.length == 0) {
n = [ ctx.node ];
} else {
n = this.args[0].evaluate(ctx).nodeSetValue();
}
if (n.length == 0) {
return new StringValue('');
} else {
return new StringValue(n[0].nodeName);
}
},
'string': function(ctx) {
assert(this.args.length == 1 || this.args.length == 0);
if (this.args.length == 0) {
return new StringValue(new NodeSetValue([ ctx.node ]).stringValue());
} else {
return new StringValue(this.args[0].evaluate(ctx).stringValue());
}
},
'concat': function(ctx) {
var ret = '';
for (var i = 0; i < this.args.length; ++i) {
ret += this.args[i].evaluate(ctx).stringValue();
}
return new StringValue(ret);
},
'starts-with': function(ctx) {
assert(this.args.length == 2);
var s0 = this.args[0].evaluate(ctx).stringValue();
var s1 = this.args[1].evaluate(ctx).stringValue();
return new BooleanValue(s0.indexOf(s1) == 0);
},
'contains': function(ctx) {
assert(this.args.length == 2);
var s0 = this.args[0].evaluate(ctx).stringValue();
var s1 = this.args[1].evaluate(ctx).stringValue();
return new BooleanValue(s0.indexOf(s1) != -1);
},
'substring-before': function(ctx) {
assert(this.args.length == 2);
var s0 = this.args[0].evaluate(ctx).stringValue();
var s1 = this.args[1].evaluate(ctx).stringValue();
var i = s0.indexOf(s1);
var ret;
if (i == -1) {
ret = '';
} else {
ret = s0.substr(0,i);
}
return new StringValue(ret);
},
'substring-after': function(ctx) {
assert(this.args.length == 2);
var s0 = this.args[0].evaluate(ctx).stringValue();
var s1 = this.args[1].evaluate(ctx).stringValue();
var i = s0.indexOf(s1);
var ret;
if (i == -1) {
ret = '';
} else {
ret = s0.substr(i + s1.length);
}
return new StringValue(ret);
},
'substring': function(ctx) {
// NOTE: XPath defines the position of the first character in a
// string to be 1, in JavaScript this is 0 ([XPATH] Section 4.2).
assert(this.args.length == 2 || this.args.length == 3);
var s0 = this.args[0].evaluate(ctx).stringValue();
var s1 = this.args[1].evaluate(ctx).numberValue();
var ret;
if (this.args.length == 2) {
var i1 = Math.max(0, Math.round(s1) - 1);
ret = s0.substr(i1);
} else {
var s2 = this.args[2].evaluate(ctx).numberValue();
var i0 = Math.round(s1) - 1;
var i1 = Math.max(0, i0);
var i2 = Math.round(s2) - Math.max(0, -i0);
ret = s0.substr(i1, i2);
}
return new StringValue(ret);
},
'string-length': function(ctx) {
var s;
if (this.args.length > 0) {
s = this.args[0].evaluate(ctx).stringValue();
} else {
s = new NodeSetValue([ ctx.node ]).stringValue();
}
return new NumberValue(s.length);
},
'normalize-space': function(ctx) {
var s;
if (this.args.length > 0) {
s = this.args[0].evaluate(ctx).stringValue();
} else {
s = new NodeSetValue([ ctx.node ]).stringValue();
}
s = s.replace(/^\s*/,'').replace(/\s*$/,'').replace(/\s+/g, ' ');
return new StringValue(s);
},
'translate': function(ctx) {
assert(this.args.length == 3);
var s0 = this.args[0].evaluate(ctx).stringValue();
var s1 = this.args[1].evaluate(ctx).stringValue();
var s2 = this.args[2].evaluate(ctx).stringValue();
for (var i = 0; i < s1.length; ++i) {
s0 = s0.replace(new RegExp(s1.charAt(i), 'g'), s2.charAt(i));
}
return new StringValue(s0);
},
'boolean': function(ctx) {
assert(this.args.length == 1);
return new BooleanValue(this.args[0].evaluate(ctx).booleanValue());
},
'not': function(ctx) {
assert(this.args.length == 1);
var ret = !this.args[0].evaluate(ctx).booleanValue();
return new BooleanValue(ret);
},
'true': function(ctx) {
assert(this.args.length == 0);
return new BooleanValue(true);
},
'false': function(ctx) {
assert(this.args.length == 0);
return new BooleanValue(false);
},
'lang': function(ctx) {
assert(this.args.length == 1);
var lang = this.args[0].evaluate(ctx).stringValue();
var xmllang;
var n = ctx.node;
while (n && n != n.parentNode /* just in case ... */) {
xmllang = n.getAttribute('xml:lang');
if (xmllang) {
break;
}
n = n.parentNode;
}
if (!xmllang) {
return new BooleanValue(false);
} else {
var re = new RegExp('^' + lang + '$', 'i');
return new BooleanValue(xmllang.match(re) ||
xmllang.replace(/_.*$/,'').match(re));
}
},
'number': function(ctx) {
assert(this.args.length == 1 || this.args.length == 0);
if (this.args.length == 1) {
return new NumberValue(this.args[0].evaluate(ctx).numberValue());
} else {
return new NumberValue(new NodeSetValue([ ctx.node ]).numberValue());
}
},
'sum': function(ctx) {
assert(this.args.length == 1);
var n = this.args[0].evaluate(ctx).nodeSetValue();
var sum = 0;
for (var i = 0; i < n.length; ++i) {
sum += xmlValue(n[i]) - 0;
}
return new NumberValue(sum);
},
'floor': function(ctx) {
assert(this.args.length == 1);
var num = this.args[0].evaluate(ctx).numberValue();
return new NumberValue(Math.floor(num));
},
'ceiling': function(ctx) {
assert(this.args.length == 1);
var num = this.args[0].evaluate(ctx).numberValue();
return new NumberValue(Math.ceil(num));
},
'round': function(ctx) {
assert(this.args.length == 1);
var num = this.args[0].evaluate(ctx).numberValue();
return new NumberValue(Math.round(num));
},
// TODO(mesch): The following functions are custom. There is a
// standard that defines how to add functions, which should be
// applied here.
'ext-join': function(ctx) {
assert(this.args.length == 2);
var nodes = this.args[0].evaluate(ctx).nodeSetValue();
var delim = this.args[1].evaluate(ctx).stringValue();
var ret = '';
for (var i = 0; i < nodes.length; ++i) {
if (ret) {
ret += delim;
}
ret += xmlValue(nodes[i]);
}
return new StringValue(ret);
},
// ext-if() evaluates and returns its second argument, if the
// boolean value of its first argument is true, otherwise it
// evaluates and returns its third argument.
'ext-if': function(ctx) {
assert(this.args.length == 3);
if (this.args[0].evaluate(ctx).booleanValue()) {
return this.args[1].evaluate(ctx);
} else {
return this.args[2].evaluate(ctx);
}
},
'ext-sprintf': function(ctx) {
assert(this.args.length >= 1);
var args = [];
for (var i = 0; i < this.args.length; ++i) {
args.push(this.args[i].evaluate(ctx).stringValue());
}
return new StringValue(sprintf.apply(null, args));
},
// ext-cardinal() evaluates its single argument as a number, and
// returns the current node that many times. It can be used in the
// select attribute to iterate over an integer range.
'ext-cardinal': function(ctx) {
assert(this.args.length >= 1);
var c = this.args[0].evaluate(ctx).numberValue();
var ret = [];
for (var i = 0; i < c; ++i) {
ret.push(ctx.node);
}
return new NodeSetValue(ret);
}
};
function UnionExpr(expr1, expr2) {
this.expr1 = expr1;
this.expr2 = expr2;
}
UnionExpr.prototype.evaluate = function(ctx) {
var nodes1 = this.expr1.evaluate(ctx).nodeSetValue();
var nodes2 = this.expr2.evaluate(ctx).nodeSetValue();
var I1 = nodes1.length;
for (var i2 = 0; i2 < nodes2.length; ++i2) {
for (var i1 = 0; i1 < I1; ++i1) {
if (nodes1[i1] == nodes2[i2]) {
// break inner loop and continue outer loop, labels confuse
// the js compiler, so we don't use them here.
i1 = I1;
}
}
nodes1.push(nodes2[i2]);
}
return new NodeSetValue(nodes2);
};
function PathExpr(filter, rel) {
this.filter = filter;
this.rel = rel;
}
PathExpr.prototype.evaluate = function(ctx) {
var nodes = this.filter.evaluate(ctx).nodeSetValue();
var nodes1 = [];
for (var i = 0; i < nodes.length; ++i) {
var nodes0 = this.rel.evaluate(ctx.clone(nodes[i], i, nodes)).nodeSetValue();
for (var ii = 0; ii < nodes0.length; ++ii) {
nodes1.push(nodes0[ii]);
}
}
return new NodeSetValue(nodes1);
};
function FilterExpr(expr, predicate) {
this.expr = expr;
this.predicate = predicate;
}
FilterExpr.prototype.evaluate = function(ctx) {
var nodes = this.expr.evaluate(ctx).nodeSetValue();
for (var i = 0; i < this.predicate.length; ++i) {
var nodes0 = nodes;
nodes = [];
for (var j = 0; j < nodes0.length; ++j) {
var n = nodes0[j];
if (this.predicate[i].evaluate(ctx.clone(n, j, nodes0)).booleanValue()) {
nodes.push(n);
}
}
}
return new NodeSetValue(nodes);
}
function UnaryMinusExpr(expr) {
this.expr = expr;
}
UnaryMinusExpr.prototype.evaluate = function(ctx) {
return new NumberValue(-this.expr.evaluate(ctx).numberValue());
};
function BinaryExpr(expr1, op, expr2) {
this.expr1 = expr1;
this.expr2 = expr2;
this.op = op;
}
BinaryExpr.prototype.evaluate = function(ctx) {
var ret;
switch (this.op.value) {
case 'or':
ret = new BooleanValue(this.expr1.evaluate(ctx).booleanValue() ||
this.expr2.evaluate(ctx).booleanValue());
break;
case 'and':
ret = new BooleanValue(this.expr1.evaluate(ctx).booleanValue() &&
this.expr2.evaluate(ctx).booleanValue());
break;
case '+':
ret = new NumberValue(this.expr1.evaluate(ctx).numberValue() +
this.expr2.evaluate(ctx).numberValue());
break;
case '-':
ret = new NumberValue(this.expr1.evaluate(ctx).numberValue() -
this.expr2.evaluate(ctx).numberValue());
break;
case '*':
ret = new NumberValue(this.expr1.evaluate(ctx).numberValue() *
this.expr2.evaluate(ctx).numberValue());
break;
case 'mod':
ret = new NumberValue(this.expr1.evaluate(ctx).numberValue() %
this.expr2.evaluate(ctx).numberValue());
break;
case 'div':
ret = new NumberValue(this.expr1.evaluate(ctx).numberValue() /
this.expr2.evaluate(ctx).numberValue());
break;
case '=':
ret = this.compare(ctx, function(x1, x2) { return x1 == x2; });
break;
case '!=':
ret = this.compare(ctx, function(x1, x2) { return x1 != x2; });
break;
case '<':
ret = this.compare(ctx, function(x1, x2) { return x1 < x2; });
break;
case '<=':
ret = this.compare(ctx, function(x1, x2) { return x1 <= x2; });
break;
case '>':
ret = this.compare(ctx, function(x1, x2) { return x1 > x2; });
break;
case '>=':
ret = this.compare(ctx, function(x1, x2) { return x1 >= x2; });
break;
default:
alert('BinaryExpr.evaluate: ' + this.op.value);
}
return ret;
};
BinaryExpr.prototype.compare = function(ctx, cmp) {
var v1 = this.expr1.evaluate(ctx);
var v2 = this.expr2.evaluate(ctx);
var ret;
if (v1.type == 'node-set' && v2.type == 'node-set') {
var n1 = v1.nodeSetValue();
var n2 = v2.nodeSetValue();
ret = false;
for (var i1 = 0; i1 < n1.length; ++i1) {
for (var i2 = 0; i2 < n2.length; ++i2) {
if (cmp(xmlValue(n1[i1]), xmlValue(n2[i2]))) {
ret = true;
// Break outer loop. Labels confuse the jscompiler and we
// don't use them.
i2 = n2.length;
i1 = n1.length;
}
}
}
} else if (v1.type == 'node-set' || v2.type == 'node-set') {
if (v1.type == 'number') {
var s = v1.numberValue();
var n = v2.nodeSetValue();
ret = false;
for (var i = 0; i < n.length; ++i) {
var nn = xmlValue(n[i]) - 0;
if (cmp(s, nn)) {
ret = true;
break;
}
}
} else if (v2.type == 'number') {
var n = v1.nodeSetValue();
var s = v2.numberValue();
ret = false;
for (var i = 0; i < n.length; ++i) {
var nn = xmlValue(n[i]) - 0;
if (cmp(nn, s)) {
ret = true;
break;
}
}
} else if (v1.type == 'string') {
var s = v1.stringValue();
var n = v2.nodeSetValue();
ret = false;
for (var i = 0; i < n.length; ++i) {
var nn = xmlValue(n[i]);
if (cmp(s, nn)) {
ret = true;
break;
}
}
} else if (v2.type == 'string') {
var n = v1.nodeSetValue();
var s = v2.stringValue();
ret = false;
for (var i = 0; i < n.length; ++i) {
var nn = xmlValue(n[i]);
if (cmp(nn, s)) {
ret = true;
break;
}
}
} else {
ret = cmp(v1.booleanValue(), v2.booleanValue());
}
} else if (v1.type == 'boolean' || v2.type == 'boolean') {
ret = cmp(v1.booleanValue(), v2.booleanValue());
} else if (v1.type == 'number' || v2.type == 'number') {
ret = cmp(v1.numberValue(), v2.numberValue());
} else {
ret = cmp(v1.stringValue(), v2.stringValue());
}
return new BooleanValue(ret);
}
function LiteralExpr(value) {
this.value = value;
}
LiteralExpr.prototype.evaluate = function(ctx) {
return new StringValue(this.value);
};
function NumberExpr(value) {
this.value = value;
}
NumberExpr.prototype.evaluate = function(ctx) {
return new NumberValue(this.value);
};
function VariableExpr(name) {
this.name = name;
}
VariableExpr.prototype.evaluate = function(ctx) {
return ctx.getVariable(this.name);
}
// Factory functions for semantic values (i.e. Expressions) of the
// productions in the grammar. When a production is matched to reduce
// the current parse state stack, the function is called with the
// semantic values of the matched elements as arguments, and returns
// another semantic value. The semantic value is a node of the parse
// tree, an expression object with an evaluate() method that evaluates the
// expression in an actual context. These factory functions are used
// in the specification of the grammar rules, below.
function makeTokenExpr(m) {
return new TokenExpr(m);
}
function passExpr(e) {
return e;
}
function makeLocationExpr1(slash, rel) {
rel.absolute = true;
return rel;
}
function makeLocationExpr2(dslash, rel) {
rel.absolute = true;
rel.prependStep(makeAbbrevStep(dslash.value));
return rel;
}
function makeLocationExpr3(slash) {
var ret = new LocationExpr();
ret.appendStep(makeAbbrevStep('.'));
ret.absolute = true;
return ret;
}
function makeLocationExpr4(dslash) {
var ret = new LocationExpr();
ret.absolute = true;
ret.appendStep(makeAbbrevStep(dslash.value));
return ret;
}
function makeLocationExpr5(step) {
var ret = new LocationExpr();
ret.appendStep(step);
return ret;
}
function makeLocationExpr6(rel, slash, step) {
rel.appendStep(step);
return rel;
}
function makeLocationExpr7(rel, dslash, step) {
rel.appendStep(makeAbbrevStep(dslash.value));
return rel;
}
function makeStepExpr1(dot) {
return makeAbbrevStep(dot.value);
}
function makeStepExpr2(ddot) {
return makeAbbrevStep(ddot.value);
}
function makeStepExpr3(axisname, axis, nodetest) {
return new StepExpr(axisname.value, nodetest);
}
function makeStepExpr4(at, nodetest) {
return new StepExpr('attribute', nodetest);
}
function makeStepExpr5(nodetest) {
return new StepExpr('child', nodetest);
}
function makeStepExpr6(step, predicate) {
step.appendPredicate(predicate);
return step;
}
function makeAbbrevStep(abbrev) {
switch (abbrev) {
case '//':
return new StepExpr('descendant-or-self', new NodeTestAny);
case '.':
return new StepExpr('self', new NodeTestAny);
case '..':
return new StepExpr('parent', new NodeTestAny);
}
}
function makeNodeTestExpr1(asterisk) {
return new NodeTestElement;
}
function makeNodeTestExpr2(ncname, colon, asterisk) {
return new NodeTestNC(ncname.value);
}
function makeNodeTestExpr3(qname) {
return new NodeTestName(qname.value);
}
function makeNodeTestExpr4(typeo, parenc) {
var type = typeo.value.replace(/\s*\($/, '');
switch(type) {
case 'node':
return new NodeTestAny;
case 'text':
return new NodeTestText;
case 'comment':
return new NodeTestComment;
case 'processing-instruction':
return new NodeTestPI;
}
}
function makeNodeTestExpr5(typeo, target, parenc) {
var type = typeo.replace(/\s*\($/, '');
if (type != 'processing-instruction') {
throw type + ' ' + Error().stack;
}
return new NodeTestPI(target.value);
}
function makePredicateExpr(pareno, expr, parenc) {
return new PredicateExpr(expr);
}
function makePrimaryExpr(pareno, expr, parenc) {
return expr;
}
function makeFunctionCallExpr1(name, pareno, parenc) {
return new FunctionCallExpr(name);
}
function makeFunctionCallExpr2(name, pareno, arg1, args, parenc) {
var ret = new FunctionCallExpr(name);
ret.appendArg(arg1);
for (var i = 0; i < args.length; ++i) {
ret.appendArg(args[i]);
}
return ret;
}
function makeArgumentExpr(comma, expr) {
return expr;
}
function makeUnionExpr(expr1, pipe, expr2) {
return new UnionExpr(expr1, expr2);
}
function makePathExpr1(filter, slash, rel) {
return new PathExpr(filter, rel);
}
function makePathExpr2(filter, dslash, rel) {
rel.prependStep(makeAbbrevStep(dslash.value));
return new PathExpr(filter, rel);
}
function makeFilterExpr(expr, predicates) {
if (predicates.length > 0) {
return new FilterExpr(expr, predicates);
} else {
return expr;
}
}
function makeUnaryMinusExpr(minus, expr) {
return new UnaryMinusExpr(expr);
}
function makeBinaryExpr(expr1, op, expr2) {
return new BinaryExpr(expr1, op, expr2);
}
function makeLiteralExpr(token) {
// remove quotes from the parsed value:
var value = token.value.substring(1, token.value.length - 1);
return new LiteralExpr(value);
}
function makeNumberExpr(token) {
return new NumberExpr(token.value);
}
function makeVariableReference(dollar, name) {
return new VariableExpr(name.value);
}
// Used before parsing for optimization of common simple cases. See
// the begin of xpathParse() for which they are.
function makeSimpleExpr(expr) {
if (expr.charAt(0) == '$') {
return new VariableExpr(expr.substr(1));
} else if (expr.charAt(0) == '@') {
var a = new NodeTestName(expr.substr(1));
var b = new StepExpr('attribute', a);
var c = new LocationExpr();
c.appendStep(b);
return c;
} else if (expr.match(/^[0-9]+$/)) {
return new NumberExpr(expr);
} else {
var a = new NodeTestName(expr);
var b = new StepExpr('child', a);
var c = new LocationExpr();
c.appendStep(b);
return c;
}
}
function makeSimpleExpr2(expr) {
var steps = expr.split('/');
var c = new LocationExpr();
for (var i in steps) {
var a = new NodeTestName(steps[i]);
var b = new StepExpr('child', a);
c.appendStep(b);
}
return c;
}
// The axes of XPath expressions.
var xpathAxis = {
ANCESTOR_OR_SELF: 'ancestor-or-self',
ANCESTOR: 'ancestor',
ATTRIBUTE: 'attribute',
CHILD: 'child',
DESCENDANT_OR_SELF: 'descendant-or-self',
DESCENDANT: 'descendant',
FOLLOWING_SIBLING: 'following-sibling',
FOLLOWING: 'following',
NAMESPACE: 'namespace',
PARENT: 'parent',
PRECEDING_SIBLING: 'preceding-sibling',
PRECEDING: 'preceding',
SELF: 'self'
};
var xpathAxesRe = [
xpathAxis.ANCESTOR_OR_SELF,
xpathAxis.ANCESTOR,
xpathAxis.ATTRIBUTE,
xpathAxis.CHILD,
xpathAxis.DESCENDANT_OR_SELF,
xpathAxis.DESCENDANT,
xpathAxis.FOLLOWING_SIBLING,
xpathAxis.FOLLOWING,
xpathAxis.NAMESPACE,
xpathAxis.PARENT,
xpathAxis.PRECEDING_SIBLING,
xpathAxis.PRECEDING,
xpathAxis.SELF
].join('|');
// The tokens of the language. The label property is just used for
// generating debug output. The prec property is the precedence used
// for shift/reduce resolution. Default precedence is 0 as a lookahead
// token and 2 on the stack. TODO(mesch): this is certainly not
// necessary and too complicated. Simplify this!
// NOTE: tabular formatting is the big exception, but here it should
// be OK.
var TOK_PIPE = { label: "|", prec: 17, re: new RegExp("^\\|") };
var TOK_DSLASH = { label: "//", prec: 19, re: new RegExp("^//") };
var TOK_SLASH = { label: "/", prec: 30, re: new RegExp("^/") };
var TOK_AXIS = { label: "::", prec: 20, re: new RegExp("^::") };
var TOK_COLON = { label: ":", prec: 1000, re: new RegExp("^:") };
var TOK_AXISNAME = { label: "[axis]", re: new RegExp('^(' + xpathAxesRe + ')') };
var TOK_PARENO = { label: "(", prec: 34, re: new RegExp("^\\(") };
var TOK_PARENC = { label: ")", re: new RegExp("^\\)") };
var TOK_DDOT = { label: "..", prec: 34, re: new RegExp("^\\.\\.") };
var TOK_DOT = { label: ".", prec: 34, re: new RegExp("^\\.") };
var TOK_AT = { label: "@", prec: 34, re: new RegExp("^@") };
var TOK_COMMA = { label: ",", re: new RegExp("^,") };
var TOK_OR = { label: "or", prec: 10, re: new RegExp("^or\\b") };
var TOK_AND = { label: "and", prec: 11, re: new RegExp("^and\\b") };
var TOK_EQ = { label: "=", prec: 12, re: new RegExp("^=") };
var TOK_NEQ = { label: "!=", prec: 12, re: new RegExp("^!=") };
var TOK_GE = { label: ">=", prec: 13, re: new RegExp("^>=") };
var TOK_GT = { label: ">", prec: 13, re: new RegExp("^>") };
var TOK_LE = { label: "<=", prec: 13, re: new RegExp("^<=") };
var TOK_LT = { label: "<", prec: 13, re: new RegExp("^<") };
var TOK_PLUS = { label: "+", prec: 14, re: new RegExp("^\\+"), left: true };
var TOK_MINUS = { label: "-", prec: 14, re: new RegExp("^\\-"), left: true };
var TOK_DIV = { label: "div", prec: 15, re: new RegExp("^div\\b"), left: true };
var TOK_MOD = { label: "mod", prec: 15, re: new RegExp("^mod\\b"), left: true };
var TOK_BRACKO = { label: "[", prec: 32, re: new RegExp("^\\[") };
var TOK_BRACKC = { label: "]", re: new RegExp("^\\]") };
var TOK_DOLLAR = { label: "$", re: new RegExp("^\\$") };
var TOK_NCNAME = { label: "[ncname]", re: new RegExp('^[a-z][-\\w]*','i') };
var TOK_ASTERISK = { label: "*", prec: 15, re: new RegExp("^\\*"), left: true };
var TOK_LITERALQ = { label: "[litq]", prec: 20, re: new RegExp("^'[^\\']*'") };
var TOK_LITERALQQ = {
label: "[litqq]",
prec: 20,
re: new RegExp('^"[^\\"]*"')
};
var TOK_NUMBER = {
label: "[number]",
prec: 35,
re: new RegExp('^\\d+(\\.\\d*)?') };
var TOK_QNAME = {
label: "[qname]",
re: new RegExp('^([a-z][-\\w]*:)?[a-z][-\\w]*','i')
};
var TOK_NODEO = {
label: "[nodetest-start]",
re: new RegExp('^(processing-instruction|comment|text|node)\\(')
};
// The table of the tokens of our grammar, used by the lexer: first
// column the tag, second column a regexp to recognize it in the
// input, third column the precedence of the token, fourth column a
// factory function for the semantic value of the token.
//
// NOTE: order of this list is important, because the first match
// counts. Cf. DDOT and DOT, and AXIS and COLON.
var xpathTokenRules = [
TOK_DSLASH,
TOK_SLASH,
TOK_DDOT,
TOK_DOT,
TOK_AXIS,
TOK_COLON,
TOK_AXISNAME,
TOK_NODEO,
TOK_PARENO,
TOK_PARENC,
TOK_BRACKO,
TOK_BRACKC,
TOK_AT,
TOK_COMMA,
TOK_OR,
TOK_AND,
TOK_NEQ,
TOK_EQ,
TOK_GE,
TOK_GT,
TOK_LE,
TOK_LT,
TOK_PLUS,
TOK_MINUS,
TOK_ASTERISK,
TOK_PIPE,
TOK_MOD,
TOK_DIV,
TOK_LITERALQ,
TOK_LITERALQQ,
TOK_NUMBER,
TOK_QNAME,
TOK_NCNAME,
TOK_DOLLAR
];
// All the nonterminals of the grammar. The nonterminal objects are
// identified by object identity; the labels are used in the debug
// output only.
var XPathLocationPath = { label: "LocationPath" };
var XPathRelativeLocationPath = { label: "RelativeLocationPath" };
var XPathAbsoluteLocationPath = { label: "AbsoluteLocationPath" };
var XPathStep = { label: "Step" };
var XPathNodeTest = { label: "NodeTest" };
var XPathPredicate = { label: "Predicate" };
var XPathLiteral = { label: "Literal" };
var XPathExpr = { label: "Expr" };
var XPathPrimaryExpr = { label: "PrimaryExpr" };
var XPathVariableReference = { label: "Variablereference" };
var XPathNumber = { label: "Number" };
var XPathFunctionCall = { label: "FunctionCall" };
var XPathArgumentRemainder = { label: "ArgumentRemainder" };
var XPathPathExpr = { label: "PathExpr" };
var XPathUnionExpr = { label: "UnionExpr" };
var XPathFilterExpr = { label: "FilterExpr" };
var XPathDigits = { label: "Digits" };
var xpathNonTerminals = [
XPathLocationPath,
XPathRelativeLocationPath,
XPathAbsoluteLocationPath,
XPathStep,
XPathNodeTest,
XPathPredicate,
XPathLiteral,
XPathExpr,
XPathPrimaryExpr,
XPathVariableReference,
XPathNumber,
XPathFunctionCall,
XPathArgumentRemainder,
XPathPathExpr,
XPathUnionExpr,
XPathFilterExpr,
XPathDigits
];
// Quantifiers that are used in the productions of the grammar.
var Q_01 = { label: "?" };
var Q_MM = { label: "*" };
var Q_1M = { label: "+" };
// Tag for left associativity (right assoc is implied by undefined).
var ASSOC_LEFT = true;
// The productions of the grammar. Columns of the table:
//
// - target nonterminal,
// - pattern,
// - precedence,
// - semantic value factory
//
// The semantic value factory is a function that receives parse tree
// nodes from the stack frames of the matched symbols as arguments and
// returns an a node of the parse tree. The node is stored in the top
// stack frame along with the target object of the rule. The node in
// the parse tree is an expression object that has an evaluate() method
// and thus evaluates XPath expressions.
//
// The precedence is used to decide between reducing and shifting by
// comparing the precendence of the rule that is candidate for
// reducing with the precedence of the look ahead token. Precedence of
// -1 means that the precedence of the tokens in the pattern is used
// instead. TODO: It shouldn't be necessary to explicitly assign
// precedences to rules.
var xpathGrammarRules =
[
[ XPathLocationPath, [ XPathRelativeLocationPath ], 18,
passExpr ],
[ XPathLocationPath, [ XPathAbsoluteLocationPath ], 18,
passExpr ],
[ XPathAbsoluteLocationPath, [ TOK_SLASH, XPathRelativeLocationPath ], 18,
makeLocationExpr1 ],
[ XPathAbsoluteLocationPath, [ TOK_DSLASH, XPathRelativeLocationPath ], 18,
makeLocationExpr2 ],
[ XPathAbsoluteLocationPath, [ TOK_SLASH ], 0,
makeLocationExpr3 ],
[ XPathAbsoluteLocationPath, [ TOK_DSLASH ], 0,
makeLocationExpr4 ],
[ XPathRelativeLocationPath, [ XPathStep ], 31,
makeLocationExpr5 ],
[ XPathRelativeLocationPath,
[ XPathRelativeLocationPath, TOK_SLASH, XPathStep ], 31,
makeLocationExpr6 ],
[ XPathRelativeLocationPath,
[ XPathRelativeLocationPath, TOK_DSLASH, XPathStep ], 31,
makeLocationExpr7 ],
[ XPathStep, [ TOK_DOT ], 33,
makeStepExpr1 ],
[ XPathStep, [ TOK_DDOT ], 33,
makeStepExpr2 ],
[ XPathStep,
[ TOK_AXISNAME, TOK_AXIS, XPathNodeTest ], 33,
makeStepExpr3 ],
[ XPathStep, [ TOK_AT, XPathNodeTest ], 33,
makeStepExpr4 ],
[ XPathStep, [ XPathNodeTest ], 33,
makeStepExpr5 ],
[ XPathStep, [ XPathStep, XPathPredicate ], 33,
makeStepExpr6 ],
[ XPathNodeTest, [ TOK_ASTERISK ], 33,
makeNodeTestExpr1 ],
[ XPathNodeTest, [ TOK_NCNAME, TOK_COLON, TOK_ASTERISK ], 33,
makeNodeTestExpr2 ],
[ XPathNodeTest, [ TOK_QNAME ], 33,
makeNodeTestExpr3 ],
[ XPathNodeTest, [ TOK_NODEO, TOK_PARENC ], 33,
makeNodeTestExpr4 ],
[ XPathNodeTest, [ TOK_NODEO, XPathLiteral, TOK_PARENC ], 33,
makeNodeTestExpr5 ],
[ XPathPredicate, [ TOK_BRACKO, XPathExpr, TOK_BRACKC ], 33,
makePredicateExpr ],
[ XPathPrimaryExpr, [ XPathVariableReference ], 33,
passExpr ],
[ XPathPrimaryExpr, [ TOK_PARENO, XPathExpr, TOK_PARENC ], 33,
makePrimaryExpr ],
[ XPathPrimaryExpr, [ XPathLiteral ], 30,
passExpr ],
[ XPathPrimaryExpr, [ XPathNumber ], 30,
passExpr ],
[ XPathPrimaryExpr, [ XPathFunctionCall ], 30,
passExpr ],
[ XPathFunctionCall, [ TOK_QNAME, TOK_PARENO, TOK_PARENC ], -1,
makeFunctionCallExpr1 ],
[ XPathFunctionCall,
[ TOK_QNAME, TOK_PARENO, XPathExpr, XPathArgumentRemainder, Q_MM,
TOK_PARENC ], -1,
makeFunctionCallExpr2 ],
[ XPathArgumentRemainder, [ TOK_COMMA, XPathExpr ], -1,
makeArgumentExpr ],
[ XPathUnionExpr, [ XPathPathExpr ], 20,
passExpr ],
[ XPathUnionExpr, [ XPathUnionExpr, TOK_PIPE, XPathPathExpr ], 20,
makeUnionExpr ],
[ XPathPathExpr, [ XPathLocationPath ], 20,
passExpr ],
[ XPathPathExpr, [ XPathFilterExpr ], 19,
passExpr ],
[ XPathPathExpr,
[ XPathFilterExpr, TOK_SLASH, XPathRelativeLocationPath ], 20,
makePathExpr1 ],
[ XPathPathExpr,
[ XPathFilterExpr, TOK_DSLASH, XPathRelativeLocationPath ], 20,
makePathExpr2 ],
[ XPathFilterExpr, [ XPathPrimaryExpr, XPathPredicate, Q_MM ], 20,
makeFilterExpr ],
[ XPathExpr, [ XPathPrimaryExpr ], 16,
passExpr ],
[ XPathExpr, [ XPathUnionExpr ], 16,
passExpr ],
[ XPathExpr, [ TOK_MINUS, XPathExpr ], -1,
makeUnaryMinusExpr ],
[ XPathExpr, [ XPathExpr, TOK_OR, XPathExpr ], -1,
makeBinaryExpr ],
[ XPathExpr, [ XPathExpr, TOK_AND, XPathExpr ], -1,
makeBinaryExpr ],
[ XPathExpr, [ XPathExpr, TOK_EQ, XPathExpr ], -1,
makeBinaryExpr ],
[ XPathExpr, [ XPathExpr, TOK_NEQ, XPathExpr ], -1,
makeBinaryExpr ],
[ XPathExpr, [ XPathExpr, TOK_LT, XPathExpr ], -1,
makeBinaryExpr ],
[ XPathExpr, [ XPathExpr, TOK_LE, XPathExpr ], -1,
makeBinaryExpr ],
[ XPathExpr, [ XPathExpr, TOK_GT, XPathExpr ], -1,
makeBinaryExpr ],
[ XPathExpr, [ XPathExpr, TOK_GE, XPathExpr ], -1,
makeBinaryExpr ],
[ XPathExpr, [ XPathExpr, TOK_PLUS, XPathExpr ], -1,
makeBinaryExpr, ASSOC_LEFT ],
[ XPathExpr, [ XPathExpr, TOK_MINUS, XPathExpr ], -1,
makeBinaryExpr, ASSOC_LEFT ],
[ XPathExpr, [ XPathExpr, TOK_ASTERISK, XPathExpr ], -1,
makeBinaryExpr, ASSOC_LEFT ],
[ XPathExpr, [ XPathExpr, TOK_DIV, XPathExpr ], -1,
makeBinaryExpr, ASSOC_LEFT ],
[ XPathExpr, [ XPathExpr, TOK_MOD, XPathExpr ], -1,
makeBinaryExpr, ASSOC_LEFT ],
[ XPathLiteral, [ TOK_LITERALQ ], -1,
makeLiteralExpr ],
[ XPathLiteral, [ TOK_LITERALQQ ], -1,
makeLiteralExpr ],
[ XPathNumber, [ TOK_NUMBER ], -1,
makeNumberExpr ],
[ XPathVariableReference, [ TOK_DOLLAR, TOK_QNAME ], 200,
makeVariableReference ]
];
// That function computes some optimizations of the above data
// structures and will be called right here. It merely takes the
// counter variables out of the global scope.
var xpathRules = [];
function xpathParseInit() {
if (xpathRules.length) {
return;
}
// Some simple optimizations for the xpath expression parser: sort
// grammar rules descending by length, so that the longest match is
// first found.
xpathGrammarRules.sort(function(a,b) {
var la = a[1].length;
var lb = b[1].length;
if (la < lb) {
return 1;
} else if (la > lb) {
return -1;
} else {
return 0;
}
});
var k = 1;
for (var i = 0; i < xpathNonTerminals.length; ++i) {
xpathNonTerminals[i].key = k++;
}
for (i = 0; i < xpathTokenRules.length; ++i) {
xpathTokenRules[i].key = k++;
}
Log.write('XPath parse INIT: ' + k + ' rules');
// Another slight optimization: sort the rules into bins according
// to the last element (observing quantifiers), so we can restrict
// the match against the stack to the subest of rules that match the
// top of the stack.
//
// TODO(mesch): What we actually want is to compute states as in
// bison, so that we don't have to do any explicit and iterated
// match against the stack.
function push_(array, position, element) {
if (!array[position]) {
array[position] = [];
}
array[position].push(element);
}
for (i = 0; i < xpathGrammarRules.length; ++i) {
var rule = xpathGrammarRules[i];
var pattern = rule[1];
for (var j = pattern.length - 1; j >= 0; --j) {
if (pattern[j] == Q_1M) {
push_(xpathRules, pattern[j-1].key, rule);
break;
} else if (pattern[j] == Q_MM || pattern[j] == Q_01) {
push_(xpathRules, pattern[j-1].key, rule);
--j;
} else {
push_(xpathRules, pattern[j].key, rule);
break;
}
}
}
Log.write('XPath parse INIT: ' + xpathRules.length + ' rule bins');
var sum = 0;
mapExec(xpathRules, function(i) {
if (i) {
sum += i.length;
}
});
Log.write('XPath parse INIT: ' + (sum / xpathRules.length) + ' average bin size');
}
// Local utility functions that are used by the lexer or parser.
function xpathCollectDescendants(nodelist, node) {
for (var n = node.firstChild; n; n = n.nextSibling) {
nodelist.push(n);
arguments.callee(nodelist, n);
}
}
function xpathCollectDescendantsReverse(nodelist, node) {
for (var n = node.lastChild; n; n = n.previousSibling) {
nodelist.push(n);
arguments.callee(nodelist, n);
}
}
// The entry point for the library: match an expression against a DOM
// node. Returns an XPath value.
function xpathDomEval(expr, node) {
var expr1 = xpathParse(expr);
var ret = expr1.evaluate(new ExprContext(node));
return ret;
}
// Utility function to sort a list of nodes. Used by xsltSort() and
// nxslSelect().
function xpathSort(input, sort) {
if (sort.length == 0) {
return;
}
var sortlist = [];
for (var i = 0; i < input.nodelist.length; ++i) {
var node = input.nodelist[i];
var sortitem = { node: node, key: [] };
var context = input.clone(node, 0, [ node ]);
for (var j = 0; j < sort.length; ++j) {
var s = sort[j];
var value = s.expr.evaluate(context);
var evalue;
if (s.type == 'text') {
evalue = value.stringValue();
} else if (s.type == 'number') {
evalue = value.numberValue();
}
sortitem.key.push({ value: evalue, order: s.order });
}
// Make the sort stable by adding a lowest priority sort by
// id. This is very convenient and furthermore required by the
// spec ([XSLT] - Section 10 Sorting).
sortitem.key.push({ value: i, order: 'ascending' });
sortlist.push(sortitem);
}
sortlist.sort(xpathSortByKey);
var nodes = [];
for (var i = 0; i < sortlist.length; ++i) {
nodes.push(sortlist[i].node);
}
input.nodelist = nodes;
input.setNode(nodes[0], 0);
}
// Sorts by all order criteria defined. According to the JavaScript
// spec ([ECMA] Section 11.8.5), the compare operators compare strings
// as strings and numbers as numbers.
//
// NOTE: In browsers which do not follow the spec, this breaks only in
// the case that numbers should be sorted as strings, which is very
// uncommon.
function xpathSortByKey(v1, v2) {
// NOTE: Sort key vectors of different length never occur in
// xsltSort.
for (var i = 0; i < v1.key.length; ++i) {
var o = v1.key[i].order == 'descending' ? -1 : 1;
if (v1.key[i].value > v2.key[i].value) {
return +1 * o;
} else if (v1.key[i].value < v2.key[i].value) {
return -1 * o;
}
}
return 0;
}