Fix query parser exact match for nested queries

2025-01-22 16:36:42 +00:00 · 2020-11-28 15:57:21 +01:00 · 2020-11-28 15:57:21 +01:00 · a9049e373a
commit a9049e373a
parent 9f48e86d5b
4 changed files with 105 additions and 43 deletions
--- a/alerta/database/backends/mongodb/queryparser.py
+++ b/alerta/database/backends/mongodb/queryparser.py
@ -66,18 +66,19 @@ class SearchTerm:
                return '{{"attributes.{}": {{"$exists": true}}}}'.format(self.tokens.singleterm)
            else:
                if self.tokens.field[0] == '__default_field__':
-                    return '{{"{}": {{"{}": "{}"}}}}'.format('__default_field__', '__default_operator__', self.tokens.singleterm)
+                    return '{{"{}": {{"{}": "{}", "$options": "i"}}}}'.format('__default_field__', '__default_operator__', self.tokens.singleterm)
                else:
-                    return '{{"{}": {{"$regex": "{}"}}}}'.format(tokens_fieldname, self.tokens.singleterm)
+                    return '{{"{}": {{"$regex": "{}", "$options": "i"}}}}'.format(tokens_fieldname, self.tokens.singleterm)
        if 'phrase' in self.tokens:
-            if self.tokens.field[0] == '__default_field__':
-                return '{{"{}": {{"{}": "{}"}}}}'.format('__default_field__', '__default_operator__', self.tokens.phrase)
+            tokens_field0 = self.tokens.field[0].replace('_.', 'attributes.')
+            if tokens_field0 == '__default_field__':
+                return '{{"{}": {{"{}": "{}", "$options": "i"}}}}'.format('__default_field__', '__default_operator__', self.tokens.phrase)
            else:
-                return '{{"{}": {{"$regex": "{}"}}}}'.format(self.tokens.field[0], self.tokens.phrase)
+                return '{{"{}": {{"$regex": "\\\\b{}\\\\b", "$options": "i"}}}}'.format(tokens_field0, self.tokens.phrase)
        if 'wildcard' in self.tokens:
-            return '{{"{}": {{"$regex": "\\\\b{}\\\\b"}}}}'.format(self.tokens.field[0], self.tokens.wildcard)
+            return '{{"{}": {{"$regex": "\\\\b{}\\\\b", "$options": "i"}}}}'.format(self.tokens.field[0], self.tokens.wildcard)
        if 'regex' in self.tokens:
-            return '{{"{}": {{"$regex": "{}"}}}}'.format(self.tokens.field[0], self.tokens.regex)
+            return '{{"{}": {{"$regex": "{}", "$options": "i"}}}}'.format(self.tokens.field[0], self.tokens.regex)

        def range_term(field, operator, range):
            if field in ['duplicateCount', 'timeout']:
--- a/alerta/database/backends/postgres/queryparser.py
+++ b/alerta/database/backends/postgres/queryparser.py
@ -69,6 +69,9 @@ class SearchTerm:
                return '"{}" ~* \'\\y{}\\y\''.format('__default_field__', self.tokens.phrase)
            elif self.tokens.field[0] in ['correlate', 'service', 'tags']:
                return '\'{}\'=ANY("{}")'.format(self.tokens.term, self.tokens.field[0])
+            elif self.tokens.attr:
+                tokens_attr = self.tokens.attr.replace('_', 'attributes')
+                return '"{}"::jsonb ->>\'{}\' ~* \'\\y{}\\y\''.format(tokens_attr, self.tokens.fieldname, self.tokens.phrase)
            else:
                return '"{}" ~* \'\\y{}\\y\''.format(self.tokens.field[0], self.tokens.phrase)
        if 'wildcard' in self.tokens:
--- a/tests/test_queryparser.py
+++ b/tests/test_queryparser.py
@ -26,7 +26,17 @@ class PostgresQueryTestCase(unittest.TestCase):
        r = self.parser.parse(string)
        self.assertEqual(r, '"text" ILIKE \'%%quick%%\'')

-        # default field (ie. "text") contains phrase
+        # default field (ie. "text") contains either words
+        string = r'''quick OR brown'''
+        r = self.parser.parse(string)
+        self.assertEqual(r, '("text" ILIKE \'%%quick%%\' OR "text" ILIKE \'%%brown%%\')')
+
+        # default field (ie. "text") contains either words (default operator)
+        string = r'''quick brown'''
+        r = self.parser.parse(string)
+        self.assertEqual(r, '("text" ILIKE \'%%quick%%\' OR "text" ILIKE \'%%brown%%\')')
+
+        # default field (ie. "text") contains exact phrase
        string = r'''"quick brown"'''
        r = self.parser.parse(string)
        self.assertEqual(r, '"text" ~* \'\\yquick brown\\y\'')
@ -48,7 +58,7 @@ class PostgresQueryTestCase(unittest.TestCase):
        r = self.parser.parse(string)
        self.assertEqual(r, '("title" ILIKE \'%%quick%%\' OR "title" ILIKE \'%%brown%%\')')

-        # field exact match
+        # field contains exact phrase
        string = r'''author:"John Smith"'''
        r = self.parser.parse(string)
        self.assertEqual(r, '"author" ~* \'\\yJohn Smith\\y\'')
@ -73,6 +83,11 @@ class PostgresQueryTestCase(unittest.TestCase):
        r = self.parser.parse(string)
        self.assertEqual(r, '"attributes"::jsonb ->>\'vendor\' ILIKE \'%%cisco%%\'')

+        # attribute contains either words
+        string = r'''attributes.vendor:(cisco OR juniper)'''
+        r = self.parser.parse(string)
+        self.assertEqual(r, '("attributes"::jsonb ->>\'vendor\' ILIKE \'%%cisco%%\' OR "attributes"::jsonb ->>\'vendor\' ILIKE \'%%juniper%%\')')
+
        # attribute contains either words (default operator)
        string = r'''attributes.vendor:(cisco juniper)'''
        r = self.parser.parse(string)
@ -83,6 +98,16 @@ class PostgresQueryTestCase(unittest.TestCase):
        r = self.parser.parse(string)
        self.assertEqual(r, '("attributes"::jsonb ->>\'vendor\' ILIKE \'%%cisco%%\' OR "attributes"::jsonb ->>\'vendor\' ILIKE \'%%juniper%%\')')

+        # attribute contains exact phrase
+        string = r'''foo.vendor:"quick brown"'''
+        r = self.parser.parse(string)
+        self.assertEqual(r, '"foo"::jsonb ->>\'vendor\' ~* \'\\yquick brown\\y\'')
+
+        # attribute contains exact phrase ("_" shortcut)
+        string = r'''_.vendor:"quick brown"'''
+        r = self.parser.parse(string)
+        self.assertEqual(r, '"attributes"::jsonb ->>\'vendor\' ~* \'\\yquick brown\\y\'')
+
    def test_wildcards(self):

        # ? = single character, * = one or more characters
@ -233,39 +258,49 @@ class MongoQueryTestCase(unittest.TestCase):
        # default field (ie. "text") contains word
        string = r'''quick'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"text": {"$regex": "quick"}}')
+        self.assertEqual(r, '{"text": {"$regex": "quick", "$options": "i"}}')

-        # default field (ie. "text") contains phrase
+        # default field (ie. "text") contains either words
+        string = r'''quick OR brown'''
+        r = self.parser.parse(string)
+        self.assertEqual(r, '{"$or": [{"text": {"$regex": "quick", "$options": "i"}}, {"text": {"$regex": "brown", "$options": "i"}}]}')
+
+        # default field (ie. "text") contains either words (default operator)
+        string = r'''quick brown'''
+        r = self.parser.parse(string)
+        self.assertEqual(r, '{"$or": [{"text": {"$regex": "quick", "$options": "i"}}, {"text": {"$regex": "brown", "$options": "i"}}]}')
+
+        # default field (ie. "text") contains exact phrase
        string = r'''"quick brown"'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"text": {"$regex": "quick brown"}}')
+        self.assertEqual(r, '{"text": {"$regex": "quick brown", "$options": "i"}}')

    def test_field_names(self):

        # field contains word
        string = r'''status:active'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"status": {"$regex": "active"}}')
+        self.assertEqual(r, '{"status": {"$regex": "active", "$options": "i"}}')

        # field contains either words
        string = r'''title:(quick OR brown)'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"$or": [{"title": {"$regex": "quick"}}, {"title": {"$regex": "brown"}}]}')
+        self.assertEqual(r, '{"$or": [{"title": {"$regex": "quick", "$options": "i"}}, {"title": {"$regex": "brown", "$options": "i"}}]}')

        # field contains either words (default operator)
        string = r'''title:(quick brown)'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"$or": [{"title": {"$regex": "quick"}}, {"title": {"$regex": "brown"}}]}')
+        self.assertEqual(r, '{"$or": [{"title": {"$regex": "quick", "$options": "i"}}, {"title": {"$regex": "brown", "$options": "i"}}]}')

-        # field exact match
+        # field contains exact phrase
        string = r'''author:"John Smith"'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"author": {"$regex": "John Smith"}}')
+        self.assertEqual(r, r'{"author": {"$regex": "\\bJohn Smith\\b", "$options": "i"}}')

-        # # # any attribute contains word or phrase
-        # # string = r'''attributes.\*:(quick brown)'''
-        # # r = self.parser.parse(string)
-        # # self.assertEqual(r, '??')
+        # # any attribute contains word or phrase
+        # string = r'''attributes.\*:(quick brown)'''
+        # r = self.parser.parse(string)
+        # self.assertEqual(r, '??')

        # attribute field has non-null value
        string = r'''_exists_:title'''
@ -275,22 +310,37 @@ class MongoQueryTestCase(unittest.TestCase):
        # attribute contains word
        string = r'''foo.vendor:cisco'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"foo.vendor": {"$regex": "cisco"}}')
+        self.assertEqual(r, '{"foo.vendor": {"$regex": "cisco", "$options": "i"}}')

        # attribute contains word ("_" shortcut)
        string = r'''_.vendor:cisco'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"attributes.vendor": {"$regex": "cisco"}}')
+        self.assertEqual(r, '{"attributes.vendor": {"$regex": "cisco", "$options": "i"}}')
+
+        # attribute contains either words
+        string = r'''attributes.vendor:(cisco OR juniper)'''
+        r = self.parser.parse(string)
+        self.assertEqual(r, '{"$or": [{"attributes.vendor": {"$regex": "cisco", "$options": "i"}}, {"attributes.vendor": {"$regex": "juniper", "$options": "i"}}]}')

        # attribute contains either words (default operator)
        string = r'''attributes.vendor:(cisco juniper)'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"$or": [{"attributes.vendor": {"$regex": "cisco"}}, {"attributes.vendor": {"$regex": "juniper"}}]}')
+        self.assertEqual(r, '{"$or": [{"attributes.vendor": {"$regex": "cisco", "$options": "i"}}, {"attributes.vendor": {"$regex": "juniper", "$options": "i"}}]}')

        # attribute contains either words ("_" shortcut, default operator)
        string = r'''_.vendor:(cisco juniper)'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"$or": [{"attributes.vendor": {"$regex": "cisco"}}, {"attributes.vendor": {"$regex": "juniper"}}]}')
+        self.assertEqual(r, '{"$or": [{"attributes.vendor": {"$regex": "cisco", "$options": "i"}}, {"attributes.vendor": {"$regex": "juniper", "$options": "i"}}]}')
+
+        # attribute contains exact phrase
+        string = r'''foo.vendor:"quick brown"'''
+        r = self.parser.parse(string)
+        self.assertEqual(r, r'{"foo.vendor": {"$regex": "\\bquick brown\\b", "$options": "i"}}')
+
+        # attribute contains exact phrase ("_" shortcut)
+        string = r'''_.vendor:"quick brown"'''
+        r = self.parser.parse(string)
+        self.assertEqual(r, r'{"attributes.vendor": {"$regex": "\\bquick brown\\b", "$options": "i"}}')

    def test_wildcards(self):

@ -298,13 +348,13 @@ class MongoQueryTestCase(unittest.TestCase):
        string = r'''text:qu?ck bro*'''
        r = self.parser.parse(string)
        self.assertEqual(
-            r, '{"$or": [{"text": {"$regex": "\\\\bqu.?ck\\\\b"}}, {"text": {"$regex": "\\\\bbro.*\\\\b"}}]}')
+            r, '{"$or": [{"text": {"$regex": "\\\\bqu.?ck\\\\b", "$options": "i"}}, {"text": {"$regex": "\\\\bbro.*\\\\b", "$options": "i"}}]}')

    def test_regular_expressions(self):

        string = r'''name:/joh?n(ath[oa]n)/'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"name": {"$regex": "joh?n(ath[oa]n)"}}')
+        self.assertEqual(r, '{"name": {"$regex": "joh?n(ath[oa]n)", "$options": "i"}}')

    def test_fuzziness(self):
        pass
@ -365,24 +415,24 @@ class MongoQueryTestCase(unittest.TestCase):
        # OR (||)
        string = r'''"jakarta apache" jakarta'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"$or": [{"text": {"$regex": "jakarta apache"}}, {"text": {"$regex": "jakarta"}}]}')
+        self.assertEqual(r, '{"$or": [{"text": {"$regex": "jakarta apache", "$options": "i"}}, {"text": {"$regex": "jakarta", "$options": "i"}}]}')

        string = r'''"jakarta apache" OR jakarta'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"$or": [{"text": {"$regex": "jakarta apache"}}, {"text": {"$regex": "jakarta"}}]}')
+        self.assertEqual(r, '{"$or": [{"text": {"$regex": "jakarta apache", "$options": "i"}}, {"text": {"$regex": "jakarta", "$options": "i"}}]}')

        string = r'''"jakarta apache" || jakarta'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"$or": [{"text": {"$regex": "jakarta apache"}}, {"text": {"$regex": "jakarta"}}]}')
+        self.assertEqual(r, '{"$or": [{"text": {"$regex": "jakarta apache", "$options": "i"}}, {"text": {"$regex": "jakarta", "$options": "i"}}]}')

        # AND (&&)
        string = r'''"jakarta apache" AND "Apache Lucene"'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"$and": [{"text": {"$regex": "jakarta apache"}}, {"text": {"$regex": "Apache Lucene"}}]}')
+        self.assertEqual(r, '{"$and": [{"text": {"$regex": "jakarta apache", "$options": "i"}}, {"text": {"$regex": "Apache Lucene", "$options": "i"}}]}')

        string = r'''"jakarta apache" && "Apache Lucene"'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"$and": [{"text": {"$regex": "jakarta apache"}}, {"text": {"$regex": "Apache Lucene"}}]}')
+        self.assertEqual(r, '{"$and": [{"text": {"$regex": "jakarta apache", "$options": "i"}}, {"text": {"$regex": "Apache Lucene", "$options": "i"}}]}')

        # + (required)
        pass
@ -390,19 +440,19 @@ class MongoQueryTestCase(unittest.TestCase):
        # NOT (!)
        string = r'''"jakarta apache" NOT "Apache Lucene"'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"$and": [{"text": {"$regex": "jakarta apache"}}, {"text": {"$not": {"$regex": "Apache Lucene"}}}]}')
+        self.assertEqual(r, '{"$and": [{"text": {"$regex": "jakarta apache", "$options": "i"}}, {"text": {"$not": {"$regex": "Apache Lucene", "$options": "i"}}}]}')

        string = r'''"jakarta apache" !"Apache Lucene"'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"$and": [{"text": {"$regex": "jakarta apache"}}, {"text": {"$not": {"$regex": "Apache Lucene"}}}]}')
+        self.assertEqual(r, '{"$and": [{"text": {"$regex": "jakarta apache", "$options": "i"}}, {"text": {"$not": {"$regex": "Apache Lucene", "$options": "i"}}}]}')

        string = r'''NOT "jakarta apache"'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"text": {"$not": {"$regex": "jakarta apache"}}}')
+        self.assertEqual(r, '{"text": {"$not": {"$regex": "jakarta apache", "$options": "i"}}}')

        string = r'''group:"jakarta apache" NOT group:"Apache Lucene"'''
        r = self.parser.parse(string)
-        self.assertEqual(r, '{"$and": [{"group": {"$regex": "jakarta apache"}}, {"group": {"$not": {"$regex": "Apache Lucene"}}}]}')
+        self.assertEqual(r, r'{"$and": [{"group": {"$regex": "\\bjakarta apache\\b", "$options": "i"}}, {"group": {"$not": {"$regex": "\\bApache Lucene\\b", "$options": "i"}}}]}')

        # - (prohibit)
        pass
@ -413,10 +463,10 @@ class MongoQueryTestCase(unittest.TestCase):
        string = r'''(quick OR brown) AND fox'''
        r = self.parser.parse(string)
        self.assertEqual(
-            r, '{"$and": [{"$or": [{"text": {"$regex": "quick"}}, {"text": {"$regex": "brown"}}]}, {"text": {"$regex": "fox"}}]}')
+            r, '{"$and": [{"$or": [{"text": {"$regex": "quick", "$options": "i"}}, {"text": {"$regex": "brown", "$options": "i"}}]}, {"text": {"$regex": "fox", "$options": "i"}}]}')

        # field exact match
        string = r'''status:(active OR pending) title:(full text search)'''
        r = self.parser.parse(string)
        self.assertEqual(
-            r, '{"$or": [{"$or": [{"status": {"$regex": "active"}}, {"status": {"$regex": "pending"}}]}, {"$or": [{"title": {"$regex": "full"}}, {"title": {"$regex": "text"}}]}]}')
+            r, '{"$or": [{"$or": [{"status": {"$regex": "active", "$options": "i"}}, {"status": {"$regex": "pending", "$options": "i"}}]}, {"$or": [{"title": {"$regex": "full", "$options": "i"}}, {"title": {"$regex": "text", "$options": "i"}}]}]}')
--- a/tests/test_search.py
+++ b/tests/test_search.py
@ -3,7 +3,7 @@ import unittest

 from werkzeug.datastructures import MultiDict

-from alerta.app import create_app, qb
+from alerta.app import create_app, db, qb

 # service, tags (=, !=, =~, !=~)
 # attributes (=, !=, =~, !=~)
@ -70,7 +70,7 @@ class QueryParserTestCase(unittest.TestCase):
                'value': 'johno',
                'text': 'panic: this is a foo alert',
                'tags': ['aaa', 'bbb', 'ccc'],
-                'attributes': {'region': 'EMEA'},
+                'attributes': {'region': 'EMEA', 'partition': '7.0'},
                'origin': 'alpha',
                'timeout': 100,
                'rawData': ''
@ -87,7 +87,7 @@ class QueryParserTestCase(unittest.TestCase):
                'value': 'jonathon',
                'text': 'Kernel Panic: this is a bar test alert',
                'tags': ['bbb', 'ccc', 'ddd'],
-                'attributes': {'region': 'LATAM'},
+                'attributes': {'region': 'LATAM', 'partition': '72'},
                'origin': 'bravo',
                'timeout': 200,
                'rawData': ''
@ -104,7 +104,7 @@ class QueryParserTestCase(unittest.TestCase):
                'value': 'jonathan',
                'text': 'kernel panic: this is a foo bar text alert',
                'tags': ['ccc', 'ddd', 'eee'],
-                'attributes': {'region': 'APAC'},
+                'attributes': {'region': 'APAC', 'partition': '727'},
                'origin': 'charlie',
                'timeout': 300,
                'rawData': ''
@ -121,7 +121,7 @@ class QueryParserTestCase(unittest.TestCase):
                'value': 'john',
                'text': 'kernel panick: this is a fu bar baz quux tests alert (i have a boat)',
                'tags': ['ddd', 'eee', 'aaa'],
-                'attributes': {'region': 'EMEA'},
+                'attributes': {'region': 'EMEA', 'partition': '27'},
                'origin': 'delta',
                'timeout': 400,
                'rawData': ''
@ -149,6 +149,9 @@ class QueryParserTestCase(unittest.TestCase):
            response = self.client.post('/alert', json=alert, content_type='application/json')
            self.assertEqual(response.status_code, 201)

+    def tearDown(self):
+        db.destroy()
+
    def _search(self, q):
        response = self.client.get('/alerts?q={}'.format(q))
        self.assertEqual(response.status_code, 200)
@ -180,6 +183,11 @@ class QueryParserTestCase(unittest.TestCase):
        self.assertEqual(self._search(q='attributes.region:EMEA'), 2)
        self.assertEqual(self._search(q='_.region:EMEA'), 2)
        self.assertEqual(self._search(q='_.region:(EMEA LATAM)'), 3)
+        self.assertEqual(self._search(q='_.region:(EMEA OR LATAM)'), 3)
+        self.assertEqual(self._search(q='attributes.partition:7'), 4)
+        self.assertEqual(self._search(q='_.partition:7'), 4)
+        self.assertEqual(self._search(q='attributes.partition:"7"'), 1)
+        self.assertEqual(self._search(q='_.partition:"7"'), 1)

    def test_wildcards(self):
        self.assertEqual(self._search(q='f*'), 4)