lucene-filter.js

Data filter for lucene queries
git clone git://git.finwo.net/lib/lucene-filter.js
Log | Files | Refs | README | LICENSE

commit 53f604e3dc918e35397cfcc7cbc19b830fd570de
parent 0b1a1ac0383036b189097a967fa6acabcdf12582
Author: finwo <finwo@pm.me>
Date:   Tue, 27 Nov 2018 13:40:47 +0100

This should work

Diffstat:
Apackage-lock.json | 13+++++++++++++
Apackage.json | 40++++++++++++++++++++++++++++++++++++++++
Asrc/field.js | 4++++
Asrc/filters/index.js | 7+++++++
Asrc/filters/number/exact.js | 21+++++++++++++++++++++
Asrc/filters/number/range.js | 18++++++++++++++++++
Asrc/filters/regexp.js | 17+++++++++++++++++
Asrc/filters/string/contains.js | 18++++++++++++++++++
Asrc/filters/string/ends-with.js | 19+++++++++++++++++++
Asrc/filters/string/starts-with.js | 19+++++++++++++++++++
Asrc/implicit.js | 5+++++
Asrc/index.js | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/operators/and-not.js | 8++++++++
Asrc/operators/and.js | 9+++++++++
Asrc/operators/index.js | 5+++++
Asrc/operators/or-not.js | 7+++++++
Asrc/operators/or.js | 9+++++++++
17 files changed, 270 insertions(+), 0 deletions(-)

diff --git a/package-lock.json b/package-lock.json @@ -0,0 +1,13 @@ +{ + "name": "lucene-filter", + "version": "0.0.1", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "lucene": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/lucene/-/lucene-2.0.2.tgz", + "integrity": "sha512-4Ufhi6TYe5pS4kZizrMrSHfjhseQkWZqAP9O2+OAhdjCtVA9toQ1pTLMS0JYf3bKRnBEPTyoAKRNKjXExUPgrA==" + } + } +} diff --git a/package.json b/package.json @@ -0,0 +1,40 @@ +{ + "name": "lucene-filter", + "version": "0.0.1", + "description": "Data filter for lucene queries", + "main": "jest.config.js", + "scripts": { + "test": "NODE_ENV=babel jest --detectOpenHandles" + }, + "repository": { + "type": "git", + "url": "git+https://gitlab.com/finwo/lucene-filter.git" + }, + "author": "Robin Bron <robin@finwo.nl>", + "license": "MIT", + "bugs": { + "url": "https://gitlab.com/finwo/lucene-filter/issues" + }, + "homepage": "https://gitlab.com/finwo/lucene-filter#readme", + "dependencies": { + "lucene": "^2.0.2" + }, + "devDependencies": { + "jest": "^23.6.0", + "babel-jest": "^23.6.0", + "babel-plugin-add-module-exports": "^1.0.0", + "babel-plugin-transform-class-properties": "^6.24.1", + "babel-preset-env": "^1.7.0", + "expect": "^23.6.0", + "jest-isa": "^0.0.4" + }, + "keywords": [ + "lucene", + "filter", + "data", + "query", + "rating", + "score", + "scoring" + ] +} diff --git a/src/field.js b/src/field.js @@ -0,0 +1,4 @@ +const field = module.exports = function (field, data, test) { + if ('<implicit>' === field) return require('./implicit')(data, test); + return test(data[field]); +}; diff --git a/src/filters/index.js b/src/filters/index.js @@ -0,0 +1,7 @@ +const filters = module.exports = []; +filters.push(require('./regexp')); +filters.push(require('./number/range')); +filters.push(require('./number/exact')); +filters.push(require('./string/starts-with')); +filters.push(require('./string/ends-with')); +filters.push(require('./string/contains')); diff --git a/src/filters/number/exact.js b/src/filters/number/exact.js @@ -0,0 +1,21 @@ +const field = require('../../field'); + +module.exports = { + detect : function (query) { + if (!query) return false; + if ('object' !== typeof query) return false; + if (!query.term) return false; + return !isNaN(query.term); + }, + compile: function (query) { + query.similarity = query.similarity || 0; + return function (data) { + return field(query.field, data, function (value) { + value = parseFloat(value); + let min = parseFloat(query.term) - query.similarity; + let max = parseFloat(query.term) + query.similarity; + return (min <= value) && (value <= max); + }) ? query.boost : 0; + } + }, +}; diff --git a/src/filters/number/range.js b/src/filters/number/range.js @@ -0,0 +1,18 @@ +const field = require('../../field'); + +module.exports = { + detect : function (query) { + if (!query) return false; + if ('object' !== typeof query) return false; + if (!('inclusive' in query)) return false; + if (isNaN(query.term_min)) return false; + return !isNaN(query.term_max); + }, + compile: function (query) { + return function (data) { + return field(query.field, data, function (value) { + console.log('range value', query, value); + }) ? query.boost : 0; + } + }, +}; diff --git a/src/filters/regexp.js b/src/filters/regexp.js @@ -0,0 +1,17 @@ +const field = require('../field'); + +module.exports = { + detect : function (query) { + if (!query) return false; + if ('object' !== typeof query) return false; + return !!query.regexpr; + }, + compile: function (query) { + const regex = new RegExp(query.term, 'i'); + return function (data) { + return field(query.field, data, function (value) { + return regex.test(value.toLowerCase()); + }) ? query.boost : 0; + }; + }, +}; diff --git a/src/filters/string/contains.js b/src/filters/string/contains.js @@ -0,0 +1,18 @@ +const field = require('../../field'); + +module.exports = { + detect : function (query) { + if (!query) return false; + if ('object' !== typeof query) return false; + if ('string' !== typeof query.field) return false; + return 'string' !== typeof query.term; + }, + compile: function (query) { + return function (data) { + return field(query.field, data, function (value) { + if ('string' !== typeof value) return false; + return ~value.toLowerCase().indexOf(query.term.toLowerCase()); + }) ? query.boost : 0; + }; + }, +}; diff --git a/src/filters/string/ends-with.js b/src/filters/string/ends-with.js @@ -0,0 +1,19 @@ +const field = require('../../field'); + +module.exports = { + detect : function (query) { + if (!query) return false; + if ('object' !== typeof query) return false; + if ('string' !== typeof query.field) return false; + return ('*' === query.field.substr(0, 1)); + }, + compile: function (query) { + query.field = query.field.substr(1); + return function (data) { + return field(query.field, data, function (value) { + if ('string' !== typeof value) return false; + return value.toLowerCase().substr(-query.term.length) === query.term.toLowerCase(); + }) ? query.boost : 0; + }; + }, +}; diff --git a/src/filters/string/starts-with.js b/src/filters/string/starts-with.js @@ -0,0 +1,19 @@ +const field = require('../../field'); + +module.exports = { + detect : function (query) { + if (!query) return false; + if ('object' !== typeof query) return false; + if ('string' !== typeof query.field) return false; + return ('*' === query.field.substr(-1)); + }, + compile: function (query) { + query.field = query.field.substr(0, query.field.length - 1); + return function (data) { + return field(query.field, data, function (value) { + if ('string' !== typeof value) return false; + return value.toLowerCase().substr(0, query.term.length) === query.term.toLowerCase(); + }) ? query.boost : 0; + }; + }, +}; diff --git a/src/implicit.js b/src/implicit.js @@ -0,0 +1,5 @@ +const implicit = module.exports = function(data,test) { + return Object.keys(data).reduce((r,key) => { + return r || require('./field')(key,data,test); + }, false); +}; diff --git a/src/index.js b/src/index.js @@ -0,0 +1,51 @@ +const parser = require('lucene'); + +const lucene = module.exports = function compiler(query) { + + // Parse string query + if (!query) return () => 0; + if ('string' === typeof query) { + try { + query = parser.parse(query); + } catch (e) { + return () => 0; + } + } + + // Compile combined + if (query.operator) { + return operators[query.operator]( + lucene(query.left), + lucene(query.right), + ); + } + + // Wrapped + if (query.left) { + return lucene(query.left); + } + + // Ensure default boost + query.boost = query.boost || 1; + + // Return the first detected filter + for (const filter of lucene.filters) { + if (filter.detect(query)) { + return filter.compile(query); + } + } + + // Return no match + return () => 0; +}; + +// Add filters & operators +lucene.filters = require('./filters')({lucene}); +lucene.operators = require('./operators'); + +// Browser exports +if (('function' === typeof define) && define.amd) { + define(() => lucene); +} else if ('object' === typeof window) { + window.lucene = lucene; +} diff --git a/src/operators/and-not.js b/src/operators/and-not.js @@ -0,0 +1,8 @@ +module.exports = function (operators) { + return (l, r) => (data) => { + return operators['AND'](l, function (...args) { + return -r(...args); + })(data); + }; +}; + diff --git a/src/operators/and.js b/src/operators/and.js @@ -0,0 +1,9 @@ +module.exports = (l, r) => (data) => { + let rl = l(data) || 0, + rr = r(data) || 0, + rla = Math.abs(rl), + rra = Math.abs(rr); + if (rla > rra) return rr; + if (rla < rra) return rl; + return Math.min(rl, rr); +}; diff --git a/src/operators/index.js b/src/operators/index.js @@ -0,0 +1,5 @@ +const operators = module.exports = {}; +operators['OR'] = require('./or'); +operators['AND'] = require('./and'); +operators['OR NOT'] = require('./or-not')(operators); +operators['AND NOT'] = require('./and-not')(operators); diff --git a/src/operators/or-not.js b/src/operators/or-not.js @@ -0,0 +1,7 @@ +module.exports = function (operators) { + return (l, r) => (data) => { + return operators['OR'](l, function (...args) { + return -r(...args); + })(data); + }; +}; diff --git a/src/operators/or.js b/src/operators/or.js @@ -0,0 +1,9 @@ +module.exports = (l, r) => (data) => { + let rl = l(data) || 0, + rr = r(data) || 0, + rla = Math.abs(rl), + rra = Math.abs(rr); + if (rla > rra) return rl; + if (rla < rra) return rr; + return Math.max(rl, rr); +};