mirror of
https://github.com/netdata/netdata.git
synced 2025-04-13 17:19:11 +00:00

This PR adds the logs-management external plugin. See the included README for an extensive list of features. ------------------------------------------------------------------------------------- * Add proper status return in JSON response of functions * Add column info to functions * Escape special characters when returning JSON response * Add proper functions help and defaults. Fix help not working * Add 'logs_management_meta' object in functions results * Fix compiler warnings * Replace tabs with 3 spaces in web_client_api_request_v1_logsmanagement_sources() * Add 'sources' in functions to display list of log sources * Update functions column values for logs * Update chart titles and remove '/s' from units * Add support for compound queries in circular buffers * Refactor circ_buff_search() to get rid of circ_buff_search_compound() * Fix incorrect docker events nano timestamp padding * Fixed botched rebasing * Replace get_unix_time_ms() with now_realtime_msec() * Remove binary generation from Fluent-Bit lib build * Fix compiler warnings due to new timestamp type * Remove STDIN and STDOUT support from Fluent-Bit library * Initial support for FLB_KMSG kernel logs collection * Add kernel logs charts * Add kernel logs subsystem and device charts * Skip collection of pre-existing logs in kmsg ring buffer * Add example of custom kmsg charts * Add extra initialization error logs * Fix bug of Docker Events collector failure disabling whole logs management engine * Remove reduntant FLB output code * Remove some obsolete TODO comments * Remove some commented out error/debug prints * Disable some Fluent-Bit config options not required * Make circular buffer spare items option configurable * Add DB mode configuration option * Replace p_file_infos_arr->data[i] with p_file_info in db_api.c * Remove db_loop due to all function calls being synchronous * Add initial README.md * Add DB mode = none changes * Add a simple webpage to visualize log query results * Add support for source selection to logs_query.html * Add option to query multiple log sources * Mark non-queryable sources as such in logs_query.html * Add option to use either GET or functions request in logs_query.html * Install logs_query.html when running stress tests * Update README.md requirements * Change installer behavior to build logs management by default * Disable logs management at runtime by default * Add global db mode configuration in 'logs management' config section * Split logsmanagement.conf into required & optional sections * Remove --enable-logsmanagement from stress test script * Add global config option for 'circular buffer max size MiB' * Add global config option for 'circular buffer drop logs if full' * Update 'General Configuration' in README.md * Add global config option for remaining optional settings * Add systemd collector requirements to TOC * README: Convert general configuration to table * README: Fix previous botched commit * Enable logs management by default when building for stress testing * Move logging to collector.log from error.log * Fix contenttype compilation errors * Move logging to collector.log in plugin_logsmanagement.c * Rename 'rows' to 'records' in charts * Add Netdata error.log parsing * Add more dashboard descriptions * Sanitize chart ids * Attempt to fix failing CI * Update README.md * Update README.md * Another attempt to fix CI failures * Fix undefined reference to 'uv_sleep' on certain platforms * Support FLB forward input and FLB output plugins. Squashed commit of the following: commit 55e2bf4fb34a2e02ffd0b280790197310a5299f3 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Apr 13 16:41:09 2023 +0300 Remove error.log from stock config commit bbdc62c2c9727359bc3c8ef8c33ee734d0039be7 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Apr 13 16:37:48 2023 +0300 Add cleanup of Fluent Bit outputs in p_file_info_destroy() commit 09b0aa4268ec1ccef160c99c5d5f31b6388edd28 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Apr 13 14:34:17 2023 +0300 Some code and config cleanup commit 030d074667d5ee2cad10f85cd836ca90e29346ad Author: Dim-P <dimitris1703@gmail.com> Date: Thu Apr 13 13:04:08 2023 +0300 Enable additional Fluent Bit output plugins for shared library commit 490aa5d44caa38042521d24c6b886b8b4a59a73c Author: Dim-P <dimitris1703@gmail.com> Date: Thu Apr 13 01:33:19 2023 +0300 Add initialization of Fluent Bit user-configured outputs commit c96e9fe9cea96549aa5eae09d0deeb130da02793 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Apr 4 23:13:16 2023 +0100 Complete read of parameters for FLB outputs config commit 00988897f9b86d1ecc5c141b19df7ad7d74f7e96 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Apr 3 19:43:31 2023 +0100 Update README.md commit 6deea5399c2707942aeaa51408f999ca45dfd351 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Apr 3 16:02:28 2023 +0100 Refactor Syslog_parser_config_t and add Flb_socket_config_t commit 7bf998a4c298bbd489ef735c56a6e85a137772c9 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Apr 3 14:19:57 2023 +0100 Update README.md commit c353d194b12c54f134936072ebaded0424d73cc0 Author: Dim-P <dimitris1703@gmail.com> Date: Fri Mar 31 14:52:57 2023 +0100 Update README.md commit 6be726eaff3738ba7884de799aa52949833af65a Author: Dim-P <dimitris1703@gmail.com> Date: Fri Mar 31 13:06:29 2023 +0100 Update README. Fix docker_events streaming commit 6aabfb0f1ef0529a7a0ecbaf940bc0952bf42518 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Mar 30 21:27:45 2023 +0100 Fix stuck in infinite loop bug for FLB_GENERIC, FLB_WEB_LOG and FLB_SERIAL remote log sources commit eea6346b708cc7a5ce6e2249366870f4924eabae Author: Dim-P <dimitris1703@gmail.com> Date: Thu Mar 30 21:04:12 2023 +0100 Remove callback that searches for streamed p_file_info match commit bc9c5a523b0b0ab5588adbff391a43ba8d9a0cdf Author: Dim-P <dimitris1703@gmail.com> Date: Thu Mar 30 15:51:39 2023 +0100 Basic streaming works commit 4c80f59f0214bc07895f0b2edca47cb02bc06420 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Mar 28 22:05:22 2023 +0100 WIP commit eeb37a71b602fb0738fe8077ccddc0a8ce632304 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Mar 27 22:52:09 2023 +0100 Add generic forward streaming input commit 1459b91847c80c4d97de96b75b00771039458ad6 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Mar 23 18:50:14 2023 +0000 FLB_FORWARD: WIP * Add number of logs per item in DB and in queries response * Fix wrong number of lines stored in DB for web logs * Refactor number of logs parsers and charts code * Add option to toggle number of collected logs metrics and charts * Disable kmsg log collector by default * Fix logs_query.html to work with any server ip * Fix regressed wrong number of web log lines bug * Change query quota type from size_t to long long * Update alpine version when searching for fts-dev requirements * Update query results to return both requested and actual quota * Fix bug of circ buffs not being read if head == read but not empty * Squashed commit of the following: commit 34edb316a737f3edcffcf8fa88a3801599011495 Author: Dim-P <dimitris1703@gmail.com> Date: Thu May 4 20:02:36 2023 +0100 Comment out some debug prints commit 51b9b87a88516186530f5b4b65f785b543fefe8c Author: Dim-P <dimitris1703@gmail.com> Date: Fri Apr 28 19:21:54 2023 +0100 Fix wrong filenames in BLOBS_TABLE after rotation commit 6055fc2893b48661af324f20ee61511a40abbc02 Author: Dim-P <dimitris1703@gmail.com> Date: Fri Apr 28 12:22:04 2023 +0100 Add chart showing number of circular buffer items commit 0bb5210b0847f4b7596f633ec96fc10aa8ebc791 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Apr 25 16:47:29 2023 +0300 Various fixes. Fix num_lines calculation. Add debug prints for circ buffers. Remove circ buff spare items option. Fix calculation of circ buff memory consumption. Add buff_realloc_rwlock for db_mode = none case. Fix circ buff read to be done correctly when buff is full. commit f494af8c95be84404c7d854494d26da3bcbd3ad7 Author: Dim-P <dimitris1703@gmail.com> Date: Fri Apr 21 16:03:50 2023 +0300 Fix freez() on non-malloced address commit cce6d09e9cf9b847aface7309643e2c0a6041390 Author: Dim-P <dimitris1703@gmail.com> Date: Fri Apr 21 15:41:25 2023 +0300 Add option to dynamically expand circ buffs when full * Use log timestamps when possible, instead of collection timestamps. Also, add config options for Fluent Bit engine and remove tail_plugin. Squashed commit of the following: commit b16a02eb6e3a90565c90e0a274b87b123e7b18e5 Author: Dim-P <dimitris1703@gmail.com> Date: Tue May 16 19:38:57 2023 +0100 Add Fluent Bit service config options to netdata.conf. Add monitoring of new log file fluentbit.log commit ab77c286294548ea62a3879ac0f8b8bbfe6a0687 Author: Dim-P <dimitris1703@gmail.com> Date: Mon May 15 21:25:17 2023 +0100 Remove some debug prints commit 46d64ad2434e69b1d20720297aec1ddb869e1f84 Author: Dim-P <dimitris1703@gmail.com> Date: Mon May 15 21:19:32 2023 +0100 Fix null values in charts commit 8ec96821d6a882f28cbd19244ebdfc86c807d2f4 Author: Dim-P <dimitris1703@gmail.com> Date: Mon May 15 17:43:04 2023 +0100 Update README.md to reflect log timestamp changes commit 079a91858cf9db2f74711581235bc17eb97c7dad Author: Dim-P <dimitris1703@gmail.com> Date: Mon May 15 16:23:14 2023 +0100 Add configurable option for 'update timeout' commit 72b5e2505d4657fcbb5ccb6eeee00c45eb0b51ff Author: Dim-P <dimitris1703@gmail.com> Date: Mon May 15 16:05:08 2023 +0100 Revert logsmanagement.conf to logs-manag-master one commit 70d0ea6f8d272fff318aa3095d90a78dcc3411a7 Author: Dim-P <dimitris1703@gmail.com> Date: Mon May 15 16:02:00 2023 +0100 Fix bug of circ buff items not marked as done commit 5716420838771edb7842be4669bf96235b15cf71 Author: Dim-P <dimitris1703@gmail.com> Date: Mon May 15 16:01:41 2023 +0100 Fix do_custom_charts_update() to work for all log sources commit a8def8f53fd25c3efa56ef27e267df3261913a8e Author: Dim-P <dimitris1703@gmail.com> Date: Fri May 12 18:20:20 2023 +0100 Remove GENERIC and WEB_LOG cases. Remove tail_plugin.c/h. Remove generic_parser(). commit 1cf05966e33491dbeb9b877f18d1ea8643aabeba Author: Dim-P <dimitris1703@gmail.com> Date: Fri May 12 16:54:59 2023 +0100 Fix FLB_GENERIC and FLB_SERIAL to work with new timestamp logic commit df3266810531f1af5f99b666fbf44c503b304a39 Author: Dim-P <dimitris1703@gmail.com> Date: Fri May 12 14:55:04 2023 +0100 Get rid of *_collect() functions and restructure plugin_logsmanagement workers commit 3eee069842f3257fffe60dacfc274363bc43491c Author: Dim-P <dimitris1703@gmail.com> Date: Fri May 12 14:28:33 2023 +0100 Fix wrong order of #define _XOPEN_SOURCE 700 in parser.c commit 941aa80cb55d5a7d6fe8926da930d9803be52312 Author: Dim-P <dimitris1703@gmail.com> Date: Thu May 11 22:27:39 2023 +0100 Update plugin_logsmanagement_web_log to use new timestamp logic and to support delayed logs. Refactor req_method metrics code. commit 427a7d0e2366d43cb5eab7daa1ed82dfc3bc8bc8 Author: Dim-P <dimitris1703@gmail.com> Date: Tue May 9 20:26:08 2023 +0100 Update plugin_logsmanagement_kernel to use new timestamp logic and to support delayed charts commit a7e95a6d3e5c8b62531b671fd3ec7b8a3196b5bb Author: Dim-P <dimitris1703@gmail.com> Date: Tue May 9 15:22:14 2023 +0100 Update plugin_logsmanagement_systemd to use new timestamp logic and support delayed charts commit 48237ac2ce49c82abdf2783952fd9f0ef05d72e1 Author: Dim-P <dimitris1703@gmail.com> Date: Tue May 9 13:29:44 2023 +0100 Refactor number of collected logs chart update code commit a933c8fcae61c23fa0ec6d0074526ac5d243cf16 Author: Dim-P <dimitris1703@gmail.com> Date: Mon May 8 22:11:19 2023 +0100 Update plugin_logsmanagement_docker_ev to use new timestamp logic and support delayed charts commit 5d8db057155affd5cb721399a639d75a81801b7f Author: Dim-P <dimitris1703@gmail.com> Date: Fri May 5 15:18:06 2023 +0100 Change some Fluent Bit collectors to use log timestamps instead of collection timestamps * Remove some unused defines and typedefs * Improve flb_init() * Update file-level doxygen. Add SPDX license declaration. * Better handling of termination of Fluent Bit * Better handling of DB errors. Various fixes. Squashed commit of the following: commit f55feea1274c3857eda1e9d899743db6e3eb5bf5 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Jun 6 13:28:00 2023 +0100 Fix web log parsing in case of lines terminated by \r commit 9e05758a4ecfac57a0db14757cff9536deda51d8 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Jun 5 20:42:05 2023 +0100 Fix warnings due to -Wformat-truncation=2 commit 63477666fa42446d74693aae542580d4e1e81f03 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Jun 5 16:48:45 2023 +0100 Autodiscovery of Netdata error.log based on netdata_configured_log_dir commit cab5e6d6061f4259172bbf72666e8b4a3a35dd66 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Jun 5 16:24:39 2023 +0100 Replace Forward config default string literals with macros commit 4213398031dbb53afbc943d76bf7df202d12bf6f Author: Dim-P <dimitris1703@gmail.com> Date: Mon Jun 5 15:56:29 2023 +0100 Proper cleanup of flb_lib_out_cb *callback in case of error commit f76fd7cc7bc2d0241e4d3517f61ae192d4246300 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Jun 5 15:36:07 2023 +0100 Proper termination of Forward input and respective log sources in case of error commit 3739fd96c29e13298eb3a6e943a63172cdf39d5f Author: Dim-P <dimitris1703@gmail.com> Date: Thu Jun 1 21:19:56 2023 +0100 Merge db_search() and db_search_compound() commit fcface90cb0a6df3c3a2de5e1908b1b3467dd579 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Jun 1 19:17:26 2023 +0100 Proper error handling in db_search() and db_search_compound(). Refactor the code too. commit c10667ebee2510a1af77114b3a7e18a0054b5dae Author: Dim-P <dimitris1703@gmail.com> Date: Thu Jun 1 14:23:34 2023 +0100 Update DB mode and dir when switching to db_mode_none commit d37d4c3d79333bb9fa430650c13ad625458620e8 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Jun 1 12:56:13 2023 +0100 Fix flb_stop() SIGSEGV commit 892e231c68775ff1a1f052d292d26384f1ef54b1 Author: Dim-P <dimitris1703@gmail.com> Date: Tue May 30 21:14:58 2023 +0100 Switch to db_writer_db_mode_none if db_writer_db_mode_full encounters error commit f7a0c2135ff61d3a5b0460ec5964eb6bce164bd6 Author: Dim-P <dimitris1703@gmail.com> Date: Mon May 29 21:41:21 2023 +0100 Complete error handling changes to db_init(). Add some const type qualifiers. Refactor some code for readability commit 13dbeac936d22958394cb1aaec394384f5a93fdd Author: Dim-P <dimitris1703@gmail.com> Date: Mon May 29 17:14:17 2023 +0100 More error handling changes in db_init(). Change some global default settings if stress testing. commit eb0691c269cd09054190bf0ee9c4e9247b4a2548 Author: Dim-P <dimitris1703@gmail.com> Date: Fri May 26 23:29:12 2023 +0100 Better handling of db writer threads errors. Add db timings charts * Fix mystrsep() replaced by strsep_skip_consecutive_separators() * Fix older GCC failure due to label before declaration * Fix failed builds when using libuv <= v1.19 * Fix some Codacy warnings * Fix warning: implicit declaration of function ‘strsep’ * Use USEC_PER_SEC instead of 1000000ULL * Use UUID_STR_LEN instead of GUID_LEN + 1 * Combine multiple 'ln -sf' Docker instructions to one * Update README with systemd development libraries requirement * Comment out mallocz() success checkes in parser_csv() * Fix shellcheck warnings * Remove asserts for empty SYSLOG_IDENTIFIER or PID * Fix FreeBSD failing builds * Fix some more shellcheck warnings * Update Alpine fts-dev required packages * First changes to use web log timestamp for correct metrics timings * Initial work to add test_parse_web_log_line() unit test * Complete test_parse_web_log_line() tests * Improve parse_web_log_line() for better handling of \n, \r, double quotes etc. * Fix 'Invalid TIME error when timezone sign is negative * Add more logs to compression unit test case * Misc read_last_line() improvements * Fix failing test_auto_detect_web_log_parser_config() when test case terminated without '\n' * Remove unused preprocessor macro * Factor out setup of parse_config_expected_num_fields * Add test for count_fields() * Add unit test for read_last_line() * Fix a read_last_line() bug * Remove PLUGIN[logsmanagement] static thread and update charts synchronously, right before data buffering * Fix web log parser potential SIGSEGV * Fix web log metrics bug where they could show delayed by 1 collection interval * WIP: Add multiline support to kmsg logs and fix metric timings * Fix kmsg subsystem and device parsing and metrics * Add option 'use log timestamp' to select between log timestamps or collection timestamps * Add 'Getting Started' docs section * Move logs management functions code to separate source files * Add 'Nginx access.log' chart description * Remove logsmanagement.plugin source files * Fix some memory leaks * Improve cleanup of logsmanagement_main() * Fix a potential memory leak of fwd_input_out_cb * Better termination and cleanup of main_loop and its handles * Fix main_db_dir access() check bug * Avoid uv_walk() SIGSEGV * Remove main_db_dir access() check * Better termination and cleanup of DB code * Remove flb_socket_config_destroy() that could cause a segmentation fault * Disable unique client IPs - all-time chart by default * Update README.md * Fix debug() -> netdata_log_debug() * Fix read_last_line() * Fix timestamp sign adjustment and wrong unit tests * Change WEB_CLIENT_ACL_DASHBOARD to WEB_CLIENT_ACL_DASHBOARD_ACLK_WEBRTC * Do not parse web log timestamps if 'use_log_timestamp = no' * Add Logs Management back into buildinfo.c * Update README.md * Do not build Fluent Bit executable binary * Change logs rate chart to RRDSET_TYPE_LINE * Add kludge to prevent metrics breaking due to out of order logs * Fix wrong flb_tmp_buff_cpy_timer expiration * Refactor initialization of input plugin for local log sources. * Rename FLB_GENERIC collector to FLB_TAIL. * Switch 'Netdata fluentbit.log' to disabled by default * Add 'use inotify' configuration option * Update in README.md * Add docker event actions metrics * Update README.md to include event action chart * Remove commented out PLUGIN[logsmanagement] code block * Fix some warnings * Add documentation for outgoing log streaming and exporting * Fix some code block formatting in README.md * Refactor code related to error status of log query results and add new invalid timestamp case * Reduce query mem allocs and fix end timestamp == 0 bug * Add support for duplicate timestamps in db_search() * Add support for duplicate timestamps in circ_buff_search() * Fix docker events contexts * Various query fixes prior to reverse order search. - Add reverse qsort() function in circ buffers. - Fix issues to properly support of duplicate timestamps. - Separate requested from actual timestamps in query parameters. - Rename results buffer variable name to be consistent between DB and buffers. - Remove default start and end timestamp from functions. - Improve handling of invalid quotas provided by users. - Rename 'until' timestamp name to 'to'. - Increase default quota to 10MB from 1MB. - Allow start timestamp to be > than end timestamp. * Complete descending timestamp search for circular buffers * Complete descending timestamp search for DB * Remove MEASURE_QUERY_TIME code block * Complete descending timestamp search when data resides in both DB and circular buffers * Use pointer instead of copying res_hdr in query results * Refactor web log timezone parsing to use static memory allocation * Add stats for CPU user & system time per MiB of query results * Micro-optimization to slightly speed up queries * More micro-optimizations and some code cleanup * Remove LOGS_QUERY_DATA_FORMAT_NEW_LINE option * Escape iscntrl() chars at collection rather at query * Reduce number of buffer_strcat() calls * Complete descending timestamp order queries for web_api_v1 * Complete descending timestamp order queries for functions * Fix functions query timings to match web_api_v1 ones * Add MQTT message collector Squashed commit of the following: commit dbe515372ee04880b1841ef7800abe9385b12e1c Author: Dim-P <dimitris1703@gmail.com> Date: Mon Aug 21 15:18:46 2023 +0100 Update README.md with MQTT information commit c0b5dbcb7cdef8c6fbd5e72e7bdd08957a0fd3de Author: Dim-P <dimitris1703@gmail.com> Date: Mon Aug 21 14:59:36 2023 +0100 Tidy up before merge commit 9a69c4f17eac858532918a8f850a770b12710f80 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Aug 21 12:54:33 2023 +0100 Fix issue with duplicate Log_Source_Path in DB, introduced in commit e417af3 commit 48213e9713216d62fca8a5bc1bbc41a3883fdc14 Author: Dim-P <dimitris1703@gmail.com> Date: Sat Aug 19 05:05:36 2023 +0100 WIP commit e417af3b947f11bd61e3255306bc95953863998d Author: Dim-P <dimitris1703@gmail.com> Date: Thu Aug 17 18:03:39 2023 +0100 Update functions logsmanagement help output * Inhibit Fluent Bit build warnings * Fix missing allow_subpaths value in api_commands_v1[]. * Fix missing HTTP_RESP_BACKEND_FETCH_FAILED error * Fix an enum print warning * Remove systemd-devel requirement from README and fix codacy warnings * Update Alpine versions for musl-fts-dev * Update Fluent Bit to v2.1.8 Squashed commit of the following: commit faf6fc4b7919cc2611124acc67cb1973ce705530 Author: Dim-P <dimitris1703@gmail.com> Date: Fri Aug 25 17:13:30 2023 +0100 Fix wrong default CORE_STACK_SIZE on Alpine commit a810238fe7830ce626f6d57245d68035b29723f7 Author: Dim-P <dimitris1703@gmail.com> Date: Fri Aug 25 00:40:02 2023 +0100 Update Fluent Bit patches for musl commit 8bed3b611dba94a053e22c2b4aa1d46f7787d9b4 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Aug 24 21:54:38 2023 +0100 Fix an edge case crash when web log method is '-' commit b29b48ea230363142697f9749508cd926e18ee19 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Aug 24 16:26:13 2023 +0100 Disable FLB_OUT_CALYPTIA to fix Alpine dlsym() error commit eabe0d0523ffe98ff881675c21b0763a49c05f16 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Aug 22 21:25:54 2023 +0100 Add 'use inotify = no' troubleshooting Q&A in README commit 7f7ae85bdb0def63b4fc05ab88f6572db948e0e7 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Aug 22 18:06:36 2023 +0100 Update README.md links to latest version commit 610c5ac7b920d4a1dfe364ad48f1ca14a0acc346 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Aug 22 16:23:30 2023 +0100 Update flb_parser_create() definition commit f99608ff524b6f3462264e626a1073f9c2fdfdf5 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Aug 22 16:23:04 2023 +0100 Add new config.cmake options commit 446b0d564626055a0a125f525d0bd3754184b830 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Aug 22 12:21:25 2023 +0100 Update Fluent Bit submodule to v2.1.8 * Add logs_management_unittest() to CI 'unittest' * Remove obsolete query testing files * Patch Fluent Bit log format to match netdata's format * Update README with instructions on how to monitor Podman events logs * Fix core dump in case of flb_lib_path dlopen() * Fix some potential compiler warnings * Fix queries crash if logs manag engine not running * Much faster termination of LOGS MANAGEMENT * Add facets support and other minor fixes. logsmanagement_function_execute_cb() is replaced by logsmanagement_function_facets() which adds facets support to logs management queries. Internal query results header now includes additional fields (log_source, log_type, basename, filename, chartname), that are used as facets. Queries now support timeout as a query parameter. A web log timestamp bug is fixed (by using timegm() instead of mktime(). web_api_v1 logsmanagement API is only available in debugging now. Squashed commit of the following: commit 32cf0381283029d793ec3af30d96e6cd77ee9149 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Sep 19 16:21:32 2023 +0300 Tidy up commit f956b5846451c6b955a150b5d071947037e935f0 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Sep 19 13:30:54 2023 +0300 Add more accepted params. Add data_only option. Add if_modified_since option. commit 588c2425c60dcdd14349b7b346467dba32fda4e9 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Sep 18 18:39:50 2023 +0300 Add timeout to queries commit da0f055fc47a36d9af4b7cc4cefb8eb6630e36d9 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Sep 14 19:17:16 2023 +0300 Fix histogram commit 7149890974e0d26420ec1c5cfe1023801dc973fa Author: Dim-P <dimitris1703@gmail.com> Date: Thu Sep 14 17:58:52 2023 +0300 Add keyword query using simple patterns and fix descending timestamp values commit 0bd068c5a76e694b876027e9fa5af6f333ab825b Author: Dim-P <dimitris1703@gmail.com> Date: Thu Sep 14 13:54:05 2023 +0300 Add basename, filename, chartname as facets commit 023c2b5f758b2479a0e48da575cd59500a1373b6 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Sep 14 13:26:06 2023 +0300 Add info and sources functions options commit ab4d555b7d445f7291af474847bd9177d3726a76 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Sep 14 12:54:37 2023 +0300 Fix facet id filter commit a69c9e2732f5a6da1764bb57d1c06d8d65979225 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Sep 14 12:07:13 2023 +0300 WIP: Add facet id filters commit 3c02b5de81fa8a20c712863c347539a52936ddd8 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Sep 12 18:19:17 2023 +0300 Add log source and log type to circ buff query results header commit 8ca98672c4911c126e50f3cbdd69ac363abdb33d Author: Dim-P <dimitris1703@gmail.com> Date: Tue Sep 12 18:18:13 2023 +0300 Fix logsmanagement facet function after master rebasing commit 3f1517ad56cda2473a279a8d130bec869fc2cbb8 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Sep 12 18:14:25 2023 +0300 Restrict /logsmanagement to ACL_DEV_OPEN_ACCESS only commit 8ca98d69b08d006c682997268d5d2523ddde6be0 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Sep 12 14:40:22 2023 +0300 Fix incorrectly parsed timestamps due to DST commit f9b0848037b29c7fcc46da951ca5cd9eb129066f Author: Dim-P <dimitris1703@gmail.com> Date: Mon Sep 11 13:42:18 2023 +0300 Add logs_management_meta object to facet query results commit babc978f6c97107aaf8b337d8d31735d61761b6a Author: Dim-P <dimitris1703@gmail.com> Date: Mon Sep 11 13:03:52 2023 +0300 Query all sources if no arguments provided commit 486d56de87af56aae6c0dc5d165341418222ce8b Author: Dim-P <dimitris1703@gmail.com> Date: Thu Sep 7 18:38:04 2023 +0300 Add log_source and log_type (only for DB logs) as facets. Add relative time support commit b564c12843d355c4da6436af358d5f352cb58bfe Author: Dim-P <dimitris1703@gmail.com> Date: Thu Sep 7 13:47:20 2023 +0300 Working facet with descending timestamps commit 68c6a5c64e8425cf28ec16adfb0c50289caa82a9 Author: Dim-P <dimitris1703@gmail.com> Date: Wed Sep 6 01:55:51 2023 +0300 WIP * Fix linking errors * Convert logs management to external plugin. Squashed commit of the following: commit 16da6ba70ebde0859aed734087f04af497ce3a77 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 24 18:44:12 2023 +0100 Use higher value of update every from netdata.conf or logsmanagement.d.conf commit 88cc3497c403e07686e9fc0876ebb0c610a1404c Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 24 18:43:02 2023 +0100 Tidy up commit c3fca57aac169842637d210269519612b1a91e28 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 24 18:02:04 2023 +0100 Use external update_every from agent, if available commit f7470708ba82495b03297cdf8962a09b16617ddd Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 24 17:40:46 2023 +0100 Re-enable debug logs commit b34f5ac6a2228361ab41df7d7e5e713f724368c0 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 24 15:49:20 2023 +0100 Remove old API calls from web_api_v1.c/h commit 7fbc1e699a7785ec837233b9562199ee6c7684da Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 24 15:32:04 2023 +0100 Add proper termination of stats charts thread commit 4c0fc05c8b14593bd7a0aa68f75a8a1205e04db4 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 24 15:31:36 2023 +0100 Add tests for logsmanag_config functions commit 4dfdacb55707ab46ed6c2d5ce538ac012574b27e Author: Dim-P <dimitris1703@gmail.com> Date: Mon Oct 23 22:01:19 2023 +0100 Remove unused headers from logsmanagement.c commit b324ef396207c5c32e40ea9ad462bf374470b230 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Oct 23 21:56:26 2023 +0100 Remove inline from get_X_dir() functions commit e9656e8121b66cd7ef8b5daaa5d27a134427aa35 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Oct 23 21:50:32 2023 +0100 Proper termination when a signal is received commit b09eec147bdeffae7b268b6335f6ba89f084e050 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Oct 23 20:12:13 2023 +0100 Refactor logs management config code in separate source files commit 014b46a5008fd296f7d25854079c518d018abdec Author: Dim-P <dimitris1703@gmail.com> Date: Mon Oct 23 14:54:47 2023 +0100 Fix p_file_info_destroy() crash commit e0bdfd182513bb8d5d4b4b5b8a4cc248ccf2d64e Author: Dim-P <dimitris1703@gmail.com> Date: Mon Oct 23 14:18:27 2023 +0100 Code refactoring and cleanup commit 6a61cb6e2fd3a535db150b01d9450f44b3e27b30 Author: Dim-P <dimitris1703@gmail.com> Date: Fri Oct 20 14:08:43 2023 +0100 Fix 'source:all' queries commit 45b516aaf819ac142353e323209b7d01e487393f Author: Dim-P <dimitris1703@gmail.com> Date: Thu Oct 19 21:51:05 2023 +0100 Working 'source:...' queries and regular data queries (but not 'source:all') commit 8064b0ee71c63da9803f79424802f860e96326e5 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Oct 19 15:34:23 2023 +0100 Fix issue due to p_file_info_destroy() commit a0aacc9cd00cea60218c9bfd2b9f164918a1e3de Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 17 22:06:34 2023 +0100 Work on facet API changes commit 480584ff9040c07e996b14efb4d21970a347633f Author: Dim-P <dimitris1703@gmail.com> Date: Mon Oct 16 21:43:06 2023 +0100 Add stats charts, running as separate thread commit 34d582dbe4bf2d8d048afab41681e337705bc611 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Oct 16 16:24:02 2023 +0100 Add SSL cipher charts commit ced27ee4e2c981d291f498244f2eef2556a074fb Author: Dim-P <dimitris1703@gmail.com> Date: Sun Oct 15 21:33:29 2023 +0100 Add Response code family, Response code, Response code type, SSL protocol charts commit 40c4a1d91892d49b1e4e18a1c3c43258ded4014d Author: Dim-P <dimitris1703@gmail.com> Date: Sat Oct 14 00:48:48 2023 +0100 Add more web log charts commit 890ed3ff97153dd18d15df2d1b57a181bc498ca8 Author: Dim-P <dimitris1703@gmail.com> Date: Fri Oct 13 22:14:11 2023 +0100 Add web log vhosts and ports charts commit 84733b6b1d353aff70687603019443610a8500c3 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Oct 12 21:40:16 2023 +0100 Add systemd charts commit 14673501e8f48560956f53d5b670bbe801b8f2ae Author: Dim-P <dimitris1703@gmail.com> Date: Wed Oct 11 00:28:43 2023 +0100 Add MQTT charts commit 366eb63b0a27dde6f0f8ba65120f34c18c1b21fd Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 10 21:46:19 2023 +0100 Complete kmsg changes. Reduce mem usage. Fix a dictionary key size bug commit 3d0216365a526ffbc9ce13a20c45447bfccb47d9 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 10 19:18:41 2023 +0100 Add kmsg Subsystem charts commit e61af4bb130a5cf5a5a78133f1e44b2b4c457b24 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 10 16:21:29 2023 +0100 Fix bug of wrong kmsg timestamps in case of use_log_timestamp == 0 commit 03d22e0b26bddf249aab431a4f977bbd5cde98ca Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 10 16:20:47 2023 +0100 Add kmsg charts, except for Subsystem and Device commit f60b0787537a21ed3c4cea5101fcddc50f3bc55a Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 10 13:12:13 2023 +0100 Initialise all docker events chart dimensions at startup commit 5d873d3439abaf3768530cb5b72c6b4ef6565353 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 10 00:53:35 2023 +0100 WIP: Add Docker events logs commit 2cc3d6d98f58fc3ab67a8da3014210b14d0926a1 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Oct 9 18:52:27 2023 +0100 Use macros for num_of_logs_charts and custom_charts functions commit fbd48ad3c9af674601238990d74192427475f2e3 Author: Dim-P <dimitris1703@gmail.com> Date: Mon Oct 9 18:26:17 2023 +0100 Refactor custom charts code for clarity and speed commit a31d80b5dc91161c0d74b10d00bc4fd1e6da7965 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Oct 5 23:58:27 2023 +0100 Add first working iteration of custom charts commit b1e4ab8a460f4b4c3e2804e2f775787d21fbee45 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Oct 5 23:57:27 2023 +0100 Add more custom charts for Netdata error.log commit f1b7605e564da3e297942f073593cdd4c21f88e1 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Oct 5 20:39:40 2023 +0100 Convert collected_logs_* chart updates to macros commit 1459bc2b8bcd5ba21e024b10a8a5101048938f71 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Oct 5 19:11:54 2023 +0100 Use rrdset_timed_done() instead of duration_since_last_update for correct chart timings commit 876854c6ee7586a3eb9fdbf795bcc17a5fd1e6ad Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 3 21:53:14 2023 +0100 Fix some bugs in chart updates commit ae87508485499984bcb9b72bbc7d249c4168b380 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Oct 3 21:32:55 2023 +0100 Functioning generic_chart_init() and generic_chart_update() commit 982a9c4108dbea9571c785b5ff8a9d1e5472066c Author: Dim-P <dimitris1703@gmail.com> Date: Thu Sep 28 23:32:52 2023 +0100 Add support for multiple .conf files. Add stock examples. commit 8e8abd0731227eb3fb3c6bcd811349575160799e Author: Dim-P <dimitris1703@gmail.com> Date: Thu Sep 28 17:38:30 2023 +0100 Add support for logsmanagement.d/default.conf commit 1bf0732217b1d9e9959e1507ea96fc2c92ffb2ff Author: Dim-P <dimitris1703@gmail.com> Date: Thu Sep 28 14:31:03 2023 +0100 Add capabilities. Fix paths in logsmanagement.d.conf commit a849d5b405bb4e5d770726fe99413a4efa7df274 Author: Dim-P <dimitris1703@gmail.com> Date: Tue Sep 26 23:06:31 2023 +0100 Change logs_manag_config_load() commit b0d1783b996286cd87e0832bfb74c29a845d61fc Author: Dim-P <dimitris1703@gmail.com> Date: Tue Sep 26 15:35:30 2023 +0100 Working unit tests and argument parsing commit 6da1b4267a4d58d3a7cbcca9507afe8158a2e324 Author: Dim-P <dimitris1703@gmail.com> Date: Fri Sep 22 00:32:47 2023 +0300 Build logs-management.plugin successfully commit 9e30efe0422e4941f99cc66998d9f42e00a24676 Author: Dim-P <dimitris1703@gmail.com> Date: Thu Sep 21 16:13:21 2023 +0300 Fix print format specifier in web_client_api_request_v1_logsmanagement() * Modify CODEOWNERS * Update README.md Fix indentation * Change snprintfz() to stncpyz() in circ_buff_search(). Change remaining 'chart_name' to 'chartname'. * Replace SQLite version function with macro * Fix some codacy warnings * Update README.md * Update Fluent Bit to v2.1.10 * Remove some comments * Fix Fluent Bit shared library linking for armv7l and FreeBSD * Remove compression source files * Add prefix to rrd_api.h functions * Add more unit tests * Fix kmsg capabilities * Separate kmsg and systemd default paths * Fix some memory leaks and better termination of DB * Add iterative queries if quota is exceeded * Fix centos7 builds * Fix issue where SYSTEMD timestamps are not parsed * Fix logs management packaging. * Fix typo in DEB control file. * Fix indentation and missing new line at EOF * Clean up functions and update help * Fix 400 error when no queryable sources are available * Fix if_modified_since. Add FACET_MAX_VALUE_LENGTH * Add delta parameter and use anchor points in queries * Fix CodeQL #182 warning * Fix packaging issues. * Fix postinstall script for DEB packages. * Improve plugin shutdown speed * Fix docker events chart grouping * Fix functions evloop threads not terminating upon shutdown * Fix coverity issues * Fix logging * Replace 'Netdata error.log' with 'Netdata daemon.log' in 'default.conf' * Remove 'enabled = yes/no' config in logsmanagement.d.conf * Remove 'enabled = X' unused config from logsmanagement.d.conf --------- Co-authored-by: Austin S. Hemmelgarn <austin@netdata.cloud>
1500 lines
63 KiB
C
1500 lines
63 KiB
C
// SPDX-License-Identifier: GPL-3.0-or-later
|
||
|
||
/** @file parser.c
|
||
* @brief API to parse and search logs
|
||
*/
|
||
|
||
#if !defined(_XOPEN_SOURCE) && !defined(__DARWIN__) && !defined(__APPLE__) && !defined(__FreeBSD__)
|
||
/* _XOPEN_SOURCE 700 required by strptime (POSIX 2004) and strndup (POSIX 2008)
|
||
* Will need to find a cleaner way of doing this, as currently defining
|
||
* _XOPEN_SOURCE 700 can cause issues on Centos 7, MacOS and FreeBSD too. */
|
||
#define _XOPEN_SOURCE 700
|
||
/* _BSD_SOURCE (glibc <= 2.19) and _DEFAULT_SOURCE (glibc >= 2.20) are required
|
||
* to silence "warning: implicit declaration of function ‘strsep’;" that is
|
||
* included through libnetdata/inlined.h. */
|
||
#define _BSD_SOURCE
|
||
#define _DEFAULT_SOURCE
|
||
#include <time.h>
|
||
#endif
|
||
|
||
#include "parser.h"
|
||
#include "helper.h"
|
||
#include <stdio.h>
|
||
#include <sys/resource.h>
|
||
#include <math.h>
|
||
#include <string.h>
|
||
|
||
static regex_t vhost_regex, req_client_regex, cipher_suite_regex;
|
||
|
||
const char* const csv_auto_format_guess_matrix[] = {
|
||
"$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - - $request_length $request_time $upstream_response_time", // csvVhostCustom4
|
||
"$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - - $request_length $request_time", // csvVhostCustom3
|
||
"$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - -", // csvVhostCombined
|
||
"$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent $request_length $request_time $upstream_response_time", // csvVhostCustom2
|
||
"$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent $request_length $request_time", // csvVhostCustom1
|
||
"$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent", // csvVhostCommon
|
||
"$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - - $request_length $request_time $upstream_response_time", // csvCustom4
|
||
"$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - - $request_length $request_time", // csvCustom3
|
||
"$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - -", // csvCombined
|
||
"$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent $request_length $request_time $upstream_response_time", // csvCustom2
|
||
"$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent $request_length $request_time", // csvCustom1
|
||
"$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent", // csvCommon
|
||
NULL}
|
||
;
|
||
|
||
UNIT_STATIC int count_fields(const char *line, const char delimiter){
|
||
const char *ptr;
|
||
int cnt, fQuote;
|
||
|
||
for (cnt = 1, fQuote = 0, ptr = line; *ptr != '\n' && *ptr != '\r' && *ptr != '\0'; ptr++ ){
|
||
if (fQuote) {
|
||
if (*ptr == '\"') {
|
||
if ( ptr[1] == '\"' ) {
|
||
ptr++;
|
||
continue;
|
||
}
|
||
fQuote = 0;
|
||
}
|
||
continue;
|
||
}
|
||
|
||
if(*ptr == '\"'){
|
||
fQuote = 1;
|
||
continue;
|
||
}
|
||
if(*ptr == delimiter){
|
||
cnt++;
|
||
while(*(ptr+1) == delimiter){ ptr++;};
|
||
continue;
|
||
}
|
||
}
|
||
|
||
if (fQuote) {
|
||
return -1;
|
||
}
|
||
|
||
return cnt;
|
||
}
|
||
|
||
/**
|
||
* @brief Parse a delimited string into an array of strings.
|
||
* @details Given a string containing no linebreaks, or containing line breaks
|
||
* which are escaped by "double quotes", extract a NULL-terminated
|
||
* array of strings, one for every delimiter-separated value in the row.
|
||
* @param[in] line The input string to be parsed.
|
||
* @param[in] delimiter The delimiter to be used to split the string.
|
||
* @param[in] num_fields The expected number of fields in \p line. If a negative
|
||
* number is provided, they will be counted.
|
||
* @return A NULL-terminated array of strings with the delimited values in \p line,
|
||
* or NULL in any other case.
|
||
* @todo This function has not been benchmarked or optimised.
|
||
*/
|
||
static inline char **parse_csv( const char *line, const char delimiter, int num_fields) {
|
||
char **buf, **bptr, *tmp, *tptr;
|
||
const char *ptr;
|
||
int fQuote, fEnd;
|
||
|
||
if(num_fields < 0){
|
||
num_fields = count_fields(line, delimiter);
|
||
|
||
if ( num_fields == -1 ) {
|
||
return NULL;
|
||
}
|
||
}
|
||
|
||
buf = mallocz( sizeof(char*) * (num_fields+1) );
|
||
|
||
tmp = mallocz( strlen(line) + 1 );
|
||
|
||
bptr = buf;
|
||
|
||
for ( ptr = line, fQuote = 0, *tmp = '\0', tptr = tmp, fEnd = 0; ; ptr++ ) {
|
||
if ( fQuote ) {
|
||
if ( !*ptr ) {
|
||
break;
|
||
}
|
||
|
||
if ( *ptr == '\"' ) {
|
||
if ( ptr[1] == '\"' ) {
|
||
*tptr++ = '\"';
|
||
ptr++;
|
||
continue;
|
||
}
|
||
fQuote = 0;
|
||
}
|
||
else {
|
||
*tptr++ = *ptr;
|
||
}
|
||
|
||
continue;
|
||
}
|
||
|
||
|
||
if(*ptr == '\"'){
|
||
fQuote = 1;
|
||
continue;
|
||
}
|
||
else if(*ptr == '\0'){
|
||
fEnd = 1;
|
||
*tptr = '\0';
|
||
*bptr = strdupz( tmp );
|
||
|
||
if ( !*bptr ) {
|
||
for ( bptr--; bptr >= buf; bptr-- ) {
|
||
freez( *bptr );
|
||
}
|
||
freez( buf );
|
||
freez( tmp );
|
||
|
||
return NULL;
|
||
}
|
||
|
||
bptr++;
|
||
tptr = tmp;
|
||
break;
|
||
}
|
||
else if(*ptr == delimiter){
|
||
*tptr = '\0';
|
||
*bptr = strdupz( tmp );
|
||
|
||
if ( !*bptr ) {
|
||
for ( bptr--; bptr >= buf; bptr-- ) {
|
||
freez( *bptr );
|
||
}
|
||
freez( buf );
|
||
freez( tmp );
|
||
|
||
return NULL;
|
||
}
|
||
|
||
bptr++;
|
||
tptr = tmp;
|
||
|
||
continue;
|
||
}
|
||
else{
|
||
*tptr++ = *ptr;
|
||
continue;
|
||
}
|
||
|
||
if ( fEnd ) {
|
||
break;
|
||
}
|
||
}
|
||
|
||
*bptr = NULL;
|
||
freez( tmp );
|
||
return buf;
|
||
}
|
||
|
||
/**
|
||
* @brief Search a buffer for a keyword (or regular expression)
|
||
* @details Search the source buffer for a keyword (or regular expression) and
|
||
* copy matches to the destination buffer.
|
||
* @param[in] src The source buffer to be searched
|
||
* @param[in] src_sz Size of \p src
|
||
* @param[in, out] dest The destination buffer where the results will be
|
||
* written out to. If NULL, the results will just be discarded.
|
||
* @param[out] dest_sz Size of \p dest
|
||
* @param[in] keyword The keyword or pattern to be searched in the src buffer
|
||
* @param[in] regex The precompiled regular expression to be search in the
|
||
* src buffer. If NULL, \p keyword will be used instead.
|
||
* @param[in] ignore_case Perform case insensitive search if 1.
|
||
* @return Number of matches, or -1 in case of error
|
||
*/
|
||
int search_keyword( char *src, size_t src_sz __maybe_unused,
|
||
char *dest, size_t *dest_sz,
|
||
const char *keyword, regex_t *regex,
|
||
const int ignore_case){
|
||
|
||
m_assert(src[src_sz - 1] == '\0', "src[src_sz - 1] should be '\0' but it's not");
|
||
m_assert((dest && dest_sz) || (!dest && !dest_sz), "either both dest and dest_sz exist, or none does");
|
||
|
||
if(unlikely(dest && !dest_sz))
|
||
return -1;
|
||
|
||
regex_t regex_compiled;
|
||
|
||
if(regex)
|
||
regex_compiled = *regex;
|
||
else{
|
||
char regexString[MAX_REGEX_SIZE];
|
||
const int regex_flags = ignore_case ? REG_EXTENDED | REG_NEWLINE | REG_ICASE : REG_EXTENDED | REG_NEWLINE;
|
||
snprintf(regexString, MAX_REGEX_SIZE, ".*(%s).*", keyword);
|
||
int rc;
|
||
if (unlikely((rc = regcomp(®ex_compiled, regexString, regex_flags)))){
|
||
size_t regcomp_err_str_size = regerror(rc, ®ex_compiled, 0, 0);
|
||
char *regcomp_err_str = mallocz(regcomp_err_str_size);
|
||
regerror(rc, ®ex_compiled, regcomp_err_str, regcomp_err_str_size);
|
||
freez(regcomp_err_str);
|
||
fatal("Could not compile regular expression:%.*s, error: %s", (int) MAX_REGEX_SIZE, regexString, regcomp_err_str);
|
||
};
|
||
}
|
||
|
||
regmatch_t groupArray[1];
|
||
int matches = 0;
|
||
char *cursor = src;
|
||
|
||
if(dest_sz)
|
||
*dest_sz = 0;
|
||
|
||
for ( ; ; matches++){
|
||
if (regexec(®ex_compiled, cursor, 1, groupArray, REG_NOTBOL | REG_NOTEOL))
|
||
break; // No more matches
|
||
if (groupArray[0].rm_so == -1)
|
||
break; // No more groups
|
||
|
||
size_t match_len = (size_t) (groupArray[0].rm_eo - groupArray[0].rm_so);
|
||
|
||
// debug_log( "Match %d [%2d-%2d]:%.*s\n", matches, groupArray[0].rm_so,
|
||
// groupArray[0].rm_eo, (int) match_len, cursor + groupArray[0].rm_so);
|
||
|
||
if(dest && dest_sz){
|
||
memcpy( &dest[*dest_sz], cursor + groupArray[0].rm_so, match_len);
|
||
*dest_sz += match_len + 1;
|
||
dest[*dest_sz - 1] = '\n';
|
||
}
|
||
|
||
cursor += groupArray[0].rm_eo;
|
||
}
|
||
|
||
if(!regex)
|
||
regfree(®ex_compiled);
|
||
|
||
return matches;
|
||
}
|
||
|
||
/**
|
||
* @brief Extract web log parser configuration from string
|
||
* @param[in] log_format String that describes the log format
|
||
* @param[in] delimiter Delimiter to be used when parsing a CSV log format
|
||
* @return Pointer to struct that contains the extracted log format
|
||
* configuration or NULL if no fields found in log_format.
|
||
*/
|
||
Web_log_parser_config_t *read_web_log_parser_config(const char *log_format, const char delimiter){
|
||
int num_fields = count_fields(log_format, delimiter);
|
||
if(num_fields <= 0) return NULL;
|
||
|
||
/* If first execution of this function, initialise regexs */
|
||
static int regexs_initialised = 0;
|
||
|
||
// TODO: Tests needed for following regexs.
|
||
if(!regexs_initialised){
|
||
assert(regcomp(&vhost_regex, "^[a-zA-Z0-9:.-]+$", REG_NOSUB | REG_EXTENDED) == 0);
|
||
assert(regcomp(&req_client_regex, "^([0-9a-f:.]+|localhost)$", REG_NOSUB | REG_EXTENDED) == 0);
|
||
assert(regcomp(&cipher_suite_regex, "^[A-Z0-9_-]+$", REG_NOSUB | REG_EXTENDED) == 0);
|
||
regexs_initialised = 1;
|
||
}
|
||
|
||
Web_log_parser_config_t *wblp_config = callocz(1, sizeof(Web_log_parser_config_t));
|
||
wblp_config->num_fields = num_fields;
|
||
wblp_config->delimiter = delimiter;
|
||
|
||
char **parsed_format = parse_csv(log_format, delimiter, num_fields); // parsed_format is NULL-terminated
|
||
wblp_config->fields = callocz(num_fields, sizeof(web_log_line_field_t));
|
||
unsigned int fields_off = 0;
|
||
|
||
for(int i = 0; i < num_fields; i++ ){
|
||
|
||
if(strcmp(parsed_format[i], "$host:$server_port") == 0 ||
|
||
strcmp(parsed_format[i], "%v:%p") == 0) {
|
||
wblp_config->fields[fields_off++] = VHOST_WITH_PORT;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$host") == 0 ||
|
||
strcmp(parsed_format[i], "$http_host") == 0 ||
|
||
strcmp(parsed_format[i], "%v") == 0) {
|
||
wblp_config->fields[fields_off++] = VHOST;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$server_port") == 0 ||
|
||
strcmp(parsed_format[i], "%p") == 0) {
|
||
wblp_config->fields[fields_off++] = PORT;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$scheme") == 0) {
|
||
wblp_config->fields[fields_off++] = REQ_SCHEME;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$remote_addr") == 0 ||
|
||
strcmp(parsed_format[i], "%a") == 0 ||
|
||
strcmp(parsed_format[i], "%h") == 0) {
|
||
wblp_config->fields[fields_off++] = REQ_CLIENT;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$request") == 0 ||
|
||
strcmp(parsed_format[i], "%r") == 0) {
|
||
wblp_config->fields[fields_off++] = REQ;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$request_method") == 0 ||
|
||
strcmp(parsed_format[i], "%m") == 0) {
|
||
wblp_config->fields[fields_off++] = REQ_METHOD;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$request_uri") == 0 ||
|
||
strcmp(parsed_format[i], "%U") == 0) {
|
||
wblp_config->fields[fields_off++] = REQ_URL;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$server_protocol") == 0 ||
|
||
strcmp(parsed_format[i], "%H") == 0) {
|
||
wblp_config->fields[fields_off++] = REQ_PROTO;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$request_length") == 0 ||
|
||
strcmp(parsed_format[i], "%I") == 0) {
|
||
wblp_config->fields[fields_off++] = REQ_SIZE;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$request_time") == 0 ||
|
||
strcmp(parsed_format[i], "%D") == 0) {
|
||
wblp_config->fields[fields_off++] = REQ_PROC_TIME;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$status") == 0 ||
|
||
strcmp(parsed_format[i], "%>s") == 0 ||
|
||
strcmp(parsed_format[i], "%s") == 0) {
|
||
wblp_config->fields[fields_off++] = RESP_CODE;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$bytes_sent") == 0 ||
|
||
strcmp(parsed_format[i], "$body_bytes_sent") == 0 ||
|
||
strcmp(parsed_format[i], "%b") == 0 ||
|
||
strcmp(parsed_format[i], "%O") == 0 ||
|
||
strcmp(parsed_format[i], "%B") == 0) {
|
||
wblp_config->fields[fields_off++] = RESP_SIZE;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$upstream_response_time") == 0) {
|
||
wblp_config->fields[fields_off++] = UPS_RESP_TIME;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$ssl_protocol") == 0) {
|
||
wblp_config->fields[fields_off++] = SSL_PROTO;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$ssl_cipher") == 0) {
|
||
wblp_config->fields[fields_off++] = SSL_CIPHER_SUITE;
|
||
continue;
|
||
}
|
||
|
||
if(strcmp(parsed_format[i], "$time_local") == 0 || strcmp(parsed_format[i], "[$time_local]") == 0 ||
|
||
strcmp(parsed_format[i], "%t") == 0 || strcmp(parsed_format[i], "[%t]") == 0) {
|
||
wblp_config->fields = reallocz(wblp_config->fields, (num_fields + 1) * sizeof(web_log_line_field_t));
|
||
wblp_config->fields[fields_off++] = TIME;
|
||
wblp_config->fields[fields_off++] = TIME; // TIME takes 2 fields
|
||
wblp_config->num_fields++; // TIME takes 2 fields
|
||
continue;
|
||
}
|
||
|
||
wblp_config->fields[fields_off++] = CUSTOM;
|
||
|
||
}
|
||
|
||
for(int i = 0; parsed_format[i] != NULL; i++)
|
||
freez(parsed_format[i]);
|
||
|
||
freez(parsed_format);
|
||
return wblp_config;
|
||
}
|
||
|
||
/**
|
||
* @brief Parse a web log line to extract individual fields.
|
||
* @param[in] wblp_config Configuration that specifies how to parse the line.
|
||
* @param[in] line Web log record to be parsed. '\n', '\r' or '\0' terminated.
|
||
* @param[out] log_line_parsed Struct that stores the results of parsing.
|
||
*/
|
||
void parse_web_log_line(const Web_log_parser_config_t *wblp_config,
|
||
char *line, size_t line_len,
|
||
Log_line_parsed_t *log_line_parsed){
|
||
|
||
/* Read parsing configuration */
|
||
web_log_line_field_t *fields_format = wblp_config->fields;
|
||
const int num_fields_config = wblp_config->num_fields;
|
||
const char delimiter = wblp_config->delimiter;
|
||
const int verify = wblp_config->verify_parsed_logs;
|
||
|
||
/* Consume new lines and spaces at end of line */
|
||
for(; line[line_len-1] == '\n' || line[line_len-1] == '\r' || line[line_len-1] == ' '; line_len--);
|
||
|
||
char *field = line;
|
||
char *offset = line;
|
||
size_t field_size = 0;
|
||
|
||
for(int i = 0; i < num_fields_config; i++ ){
|
||
|
||
/* Consume double quotes and extra delimiters at beginning of field */
|
||
while(*field == '"' || *field == delimiter) field++, offset++;
|
||
|
||
/* Find offset boundaries of next field in line */
|
||
while(((size_t)(offset - line) < line_len) && *offset != delimiter) offset++;
|
||
|
||
if(unlikely(*(offset - 1) == '"')) offset--;
|
||
|
||
field_size = (size_t) (offset - field);
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Field[%d]:%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
if(fields_format[i] == CUSTOM){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: CUSTOM or UNKNOWN):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
goto next_item;
|
||
}
|
||
|
||
|
||
char *port = field;
|
||
size_t port_size = 0;
|
||
size_t vhost_size = 0;
|
||
|
||
if(fields_format[i] == VHOST_WITH_PORT){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: VHOST_WITH_PORT):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
if(unlikely(field[0] == '-' && field_size == 1)){
|
||
log_line_parsed->vhost[0] = '\0';
|
||
log_line_parsed->port = WEB_LOG_INVALID_PORT;
|
||
log_line_parsed->parsing_errors++;
|
||
goto next_item;
|
||
}
|
||
|
||
while(*port != ':' && vhost_size < field_size) { port++; vhost_size++; };
|
||
if(likely(vhost_size < field_size)){
|
||
/* ':' detected in string */
|
||
port++;
|
||
port_size = field_size - vhost_size - 1;
|
||
field_size = vhost_size; // now field represents vhost and port is separate
|
||
}
|
||
else {
|
||
/* no ':' detected in string - invalid */
|
||
log_line_parsed->vhost[0] = '\0';
|
||
log_line_parsed->port = WEB_LOG_INVALID_PORT;
|
||
log_line_parsed->parsing_errors++;
|
||
goto next_item;
|
||
}
|
||
}
|
||
|
||
if(fields_format[i] == VHOST_WITH_PORT || fields_format[i] == VHOST){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: VHOST):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
if(unlikely(field[0] == '-' && field_size == 1)){
|
||
log_line_parsed->vhost[0] = '\0';
|
||
log_line_parsed->parsing_errors++;
|
||
goto next_item;
|
||
}
|
||
|
||
// TODO: Add below case in code!!!
|
||
// nginx $host and $http_host return ipv6 in [], apache doesn't
|
||
// TODO: TEST! This case hasn't been tested!
|
||
// char *pch = strchr(parsed[i], ']');
|
||
// if(pch){
|
||
// *pch = '\0';
|
||
// memmove(parsed[i], parsed[i]+1, strlen(parsed[i]));
|
||
// }
|
||
|
||
snprintfz(log_line_parsed->vhost, VHOST_MAX_LEN, "%.*s", (int) field_size, field);
|
||
|
||
if(verify){
|
||
// if(field_size >= VHOST_MAX_LEN){
|
||
// #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
// collector_error("VHOST is invalid");
|
||
// #endif
|
||
// log_line_parsed->vhost[0] = '\0';
|
||
// log_line_parsed->parsing_errors++;
|
||
// goto next_item; // TODO: Not entirely right, as it will also skip PORT parsing in case of VHOST_WITH_PORT
|
||
// }
|
||
|
||
if(unlikely(regexec(&vhost_regex, log_line_parsed->vhost, 0, NULL, 0) == REG_NOMATCH)){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("VHOST is invalid");
|
||
#endif
|
||
// log_line_parsed->vhost[0] = 'invalid';
|
||
snprintf(log_line_parsed->vhost, sizeof(WEB_LOG_INVALID_HOST_STR), WEB_LOG_INVALID_HOST_STR);
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted VHOST:%s", log_line_parsed->vhost);
|
||
#endif
|
||
|
||
if(fields_format[i] == VHOST) goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == VHOST_WITH_PORT || fields_format[i] == PORT){
|
||
|
||
if(fields_format[i] != VHOST_WITH_PORT){
|
||
port = field;
|
||
port_size = field_size;
|
||
}
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: PORT):%.*s", i, (int) port_size, port);
|
||
#endif
|
||
|
||
if(unlikely(port[0] == '-' && port_size == 1)){
|
||
log_line_parsed->port = WEB_LOG_INVALID_PORT;
|
||
log_line_parsed->parsing_errors++;
|
||
goto next_item;
|
||
}
|
||
|
||
char port_d[PORT_MAX_LEN];
|
||
snprintfz( port_d, PORT_MAX_LEN, "%.*s", (int) port_size, port);
|
||
|
||
if(likely(str2int(&log_line_parsed->port, port_d, 10) == STR2XX_SUCCESS)){
|
||
if(verify){
|
||
if(unlikely(log_line_parsed->port < 80 || log_line_parsed->port > 49151)){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("PORT is invalid (<80 or >49151)");
|
||
#endif
|
||
log_line_parsed->port = WEB_LOG_INVALID_PORT;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
}
|
||
else{
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("Error while extracting PORT from string");
|
||
#endif
|
||
log_line_parsed->port = WEB_LOG_INVALID_PORT;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted PORT:%d", log_line_parsed->port);
|
||
#endif
|
||
|
||
goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == REQ_SCHEME){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: REQ_SCHEME):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
if(unlikely(field[0] == '-' && field_size == 1)){
|
||
log_line_parsed->req_scheme[0] = '\0';
|
||
log_line_parsed->parsing_errors++;
|
||
goto next_item;
|
||
}
|
||
|
||
snprintfz(log_line_parsed->req_scheme, REQ_SCHEME_MAX_LEN, "%.*s", (int) field_size, field);
|
||
|
||
if(verify){
|
||
if(unlikely( strcmp(log_line_parsed->req_scheme, "http") &&
|
||
strcmp(log_line_parsed->req_scheme, "https"))){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("REQ_SCHEME is invalid (must be either 'http' or 'https')");
|
||
#endif
|
||
log_line_parsed->req_scheme[0] = '\0';
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted REQ_SCHEME:%s", log_line_parsed->req_scheme);
|
||
#endif
|
||
goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == REQ_CLIENT){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: REQ_CLIENT):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
if(unlikely(field[0] == '-' && field_size == 1)){
|
||
log_line_parsed->req_client[0] = '\0';
|
||
log_line_parsed->parsing_errors++;
|
||
goto next_item;
|
||
}
|
||
|
||
snprintfz(log_line_parsed->req_client, REQ_CLIENT_MAX_LEN, "%.*s", (int)field_size, field);
|
||
|
||
if(verify){
|
||
int regex_rc = regexec(&req_client_regex, log_line_parsed->req_client, 0, NULL, 0);
|
||
if (likely(regex_rc == 0)) {/* do nothing */}
|
||
else if (unlikely(regex_rc == REG_NOMATCH)) {
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("REQ_CLIENT is invalid");
|
||
#endif
|
||
snprintf(log_line_parsed->req_client, REQ_CLIENT_MAX_LEN, "%s", WEB_LOG_INVALID_CLIENT_IP_STR);
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
else {
|
||
size_t err_msg_size = regerror(regex_rc, &req_client_regex, NULL, 0);
|
||
char *err_msg = mallocz(err_msg_size);
|
||
regerror(regex_rc, &req_client_regex, err_msg, err_msg_size);
|
||
collector_error("req_client_regex error:%s", err_msg);
|
||
freez(err_msg);
|
||
m_assert(0, "req_client_regex has failed");
|
||
}
|
||
}
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted REQ_CLIENT:%s", log_line_parsed->req_client);
|
||
#endif
|
||
|
||
goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == REQ || fields_format[i] == REQ_METHOD){
|
||
|
||
/* If fields_format[i] == REQ, then field is filled in with request in the previous code */
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: REQ or REQ_METHOD):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
snprintfz( log_line_parsed->req_method, REQ_METHOD_MAX_LEN, "%.*s", (int)field_size, field);
|
||
|
||
if(verify){
|
||
if( unlikely(
|
||
/* GET and POST are the most common requests, so check them first */
|
||
strcmp(log_line_parsed->req_method, "GET") &&
|
||
strcmp(log_line_parsed->req_method, "POST") &&
|
||
|
||
strcmp(log_line_parsed->req_method, "ACL") &&
|
||
strcmp(log_line_parsed->req_method, "BASELINE-CONTROL") &&
|
||
strcmp(log_line_parsed->req_method, "BIND") &&
|
||
strcmp(log_line_parsed->req_method, "CHECKIN") &&
|
||
strcmp(log_line_parsed->req_method, "CHECKOUT") &&
|
||
strcmp(log_line_parsed->req_method, "CONNECT") &&
|
||
strcmp(log_line_parsed->req_method, "COPY") &&
|
||
strcmp(log_line_parsed->req_method, "DELETE") &&
|
||
strcmp(log_line_parsed->req_method, "HEAD") &&
|
||
strcmp(log_line_parsed->req_method, "LABEL") &&
|
||
strcmp(log_line_parsed->req_method, "LINK") &&
|
||
strcmp(log_line_parsed->req_method, "LOCK") &&
|
||
strcmp(log_line_parsed->req_method, "MERGE") &&
|
||
strcmp(log_line_parsed->req_method, "MKACTIVITY") &&
|
||
strcmp(log_line_parsed->req_method, "MKCALENDAR") &&
|
||
strcmp(log_line_parsed->req_method, "MKCOL") &&
|
||
strcmp(log_line_parsed->req_method, "MKREDIRECTREF") &&
|
||
strcmp(log_line_parsed->req_method, "MKWORKSPACE") &&
|
||
strcmp(log_line_parsed->req_method, "MOVE") &&
|
||
strcmp(log_line_parsed->req_method, "OPTIONS") &&
|
||
strcmp(log_line_parsed->req_method, "ORDERPATCH") &&
|
||
strcmp(log_line_parsed->req_method, "PATCH") &&
|
||
strcmp(log_line_parsed->req_method, "PRI") &&
|
||
strcmp(log_line_parsed->req_method, "PROPFIND") &&
|
||
strcmp(log_line_parsed->req_method, "PROPPATCH") &&
|
||
strcmp(log_line_parsed->req_method, "PUT") &&
|
||
strcmp(log_line_parsed->req_method, "REBIND") &&
|
||
strcmp(log_line_parsed->req_method, "REPORT") &&
|
||
strcmp(log_line_parsed->req_method, "SEARCH") &&
|
||
strcmp(log_line_parsed->req_method, "TRACE") &&
|
||
strcmp(log_line_parsed->req_method, "UNBIND") &&
|
||
strcmp(log_line_parsed->req_method, "UNCHECKOUT") &&
|
||
strcmp(log_line_parsed->req_method, "UNLINK") &&
|
||
strcmp(log_line_parsed->req_method, "UNLOCK") &&
|
||
strcmp(log_line_parsed->req_method, "UPDATE") &&
|
||
strcmp(log_line_parsed->req_method, "UPDATEREDIRECTREF") &&
|
||
strcmp(log_line_parsed->req_method, "-"))) {
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("REQ_METHOD is invalid");
|
||
#endif
|
||
log_line_parsed->req_method[0] = '\0';
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted REQ_METHOD:%s", log_line_parsed->req_method);
|
||
#endif
|
||
|
||
if(fields_format[i] == REQ && field[0] != '-') {
|
||
while(*(offset + 1) == delimiter) offset++; // Consume extra whitespace characters
|
||
field = ++offset;
|
||
while(*offset != delimiter && ((size_t)(offset - line) < line_len)) offset++;
|
||
field_size = (size_t) (offset - field);
|
||
}
|
||
else goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == REQ || fields_format[i] == REQ_URL){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: REQ or REQ_URL):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
snprintfz( log_line_parsed->req_URL, REQ_URL_MAX_LEN, "%.*s", (int)field_size, field);
|
||
|
||
// if(unlikely(field[0] == '-' && field_size == 1)){
|
||
// log_line_parsed->req_method[0] = '\0';
|
||
// log_line_parsed->parsing_errors++;
|
||
// }
|
||
|
||
//if(verify){} ??
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted REQ_URL:%s", log_line_parsed->req_URL ? log_line_parsed->req_URL : "NULL!");
|
||
#endif
|
||
|
||
if(fields_format[i] == REQ) {
|
||
while(*(offset + 1) == delimiter) offset++; // Consume extra whitespace characters
|
||
field = ++offset;
|
||
while(*offset != delimiter && ((size_t)(offset - line) < line_len)) offset++;
|
||
field_size = (size_t) (offset - field);
|
||
}
|
||
else goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == REQ || fields_format[i] == REQ_PROTO){
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: REQ or REQ_PROTO):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
if(unlikely(field[0] == '-' && field_size == 1)){
|
||
log_line_parsed->req_proto[0] = '\0';
|
||
log_line_parsed->parsing_errors++;
|
||
goto next_item;
|
||
}
|
||
|
||
if(unlikely( field_size > REQ_PROTO_PREF_SIZE + REQ_PROTO_MAX_LEN - 1)){
|
||
field_size = REQ_PROTO_PREF_SIZE + REQ_PROTO_MAX_LEN - 1;
|
||
}
|
||
|
||
size_t req_proto_num_size = field_size - REQ_PROTO_PREF_SIZE;
|
||
|
||
if(verify){
|
||
if(unlikely(field_size < 6 ||
|
||
req_proto_num_size == 0 ||
|
||
strncmp(field, "HTTP/", REQ_PROTO_PREF_SIZE) ||
|
||
( strncmp(&field[REQ_PROTO_PREF_SIZE], "1", req_proto_num_size) &&
|
||
strncmp(&field[REQ_PROTO_PREF_SIZE], "1.0", req_proto_num_size) &&
|
||
strncmp(&field[REQ_PROTO_PREF_SIZE], "1.1", req_proto_num_size) &&
|
||
strncmp(&field[REQ_PROTO_PREF_SIZE], "2", req_proto_num_size) &&
|
||
strncmp(&field[REQ_PROTO_PREF_SIZE], "2.0", req_proto_num_size)))) {
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("REQ_PROTO is invalid");
|
||
#endif
|
||
log_line_parsed->req_proto[0] = '\0';
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
else snprintfz( log_line_parsed->req_proto, req_proto_num_size + 1,
|
||
"%.*s", (int)req_proto_num_size, &field[REQ_PROTO_PREF_SIZE]);
|
||
}
|
||
else snprintfz( log_line_parsed->req_proto, req_proto_num_size + 1,
|
||
"%.*s", (int)req_proto_num_size, &field[REQ_PROTO_PREF_SIZE]);
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted REQ_PROTO:%s", log_line_parsed->req_proto);
|
||
#endif
|
||
|
||
goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == REQ_SIZE){
|
||
/* TODO: Differentiate between '-' or 0 and an invalid request size.
|
||
* right now, all these will set req_size == 0 */
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: REQ_SIZE):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
char req_size_d[REQ_SIZE_MAX_LEN];
|
||
snprintfz( req_size_d, REQ_SIZE_MAX_LEN, "%.*s", (int) field_size, field);
|
||
|
||
if(field[0] == '-' && field_size == 1) {
|
||
log_line_parsed->req_size = 0; // Request size can be '-'
|
||
}
|
||
else if(likely(str2int(&log_line_parsed->req_size, req_size_d, 10) == STR2XX_SUCCESS)){
|
||
if(verify){
|
||
if(unlikely(log_line_parsed->req_size < 0)){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("REQ_SIZE is invalid (<0)");
|
||
#endif
|
||
log_line_parsed->req_size = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
}
|
||
else{
|
||
collector_error("Error while extracting REQ_SIZE from string");
|
||
log_line_parsed->req_size = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted REQ_SIZE:%d", log_line_parsed->req_size);
|
||
#endif
|
||
|
||
goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == REQ_PROC_TIME){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: REQ_PROC_TIME):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
if(unlikely(field[0] == '-' && field_size == 1)){
|
||
log_line_parsed->req_proc_time = WEB_LOG_INVALID_PORT;
|
||
log_line_parsed->parsing_errors++;
|
||
goto next_item;
|
||
}
|
||
|
||
float f = 0;
|
||
|
||
char req_proc_time_d[REQ_PROC_TIME_MAX_LEN];
|
||
snprintfz( req_proc_time_d, REQ_PROC_TIME_MAX_LEN, "%.*s", (int) field_size, field);
|
||
|
||
if(memchr(field, '.', field_size)){ // nginx time is in seconds with a milliseconds resolution.
|
||
if(likely(str2float(&f, req_proc_time_d) == STR2XX_SUCCESS)){
|
||
log_line_parsed->req_proc_time = (int) (f * 1.0E6);
|
||
}
|
||
else {
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("Error while extracting REQ_PROC_TIME from string");
|
||
#endif
|
||
log_line_parsed->req_proc_time = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
else{ // apache time is in microseconds
|
||
if(unlikely(str2int(&log_line_parsed->req_proc_time, req_proc_time_d, 10) != STR2XX_SUCCESS)) {
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("Error while extracting REQ_PROC_TIME from string");
|
||
#endif
|
||
log_line_parsed->req_proc_time = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
|
||
if(verify){
|
||
if(unlikely(log_line_parsed->req_proc_time < 0)){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("REQ_PROC_TIME is invalid (<0)");
|
||
#endif
|
||
log_line_parsed->req_proc_time = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted REQ_PROC_TIME:%d", log_line_parsed->req_proc_time);
|
||
#endif
|
||
|
||
goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == RESP_CODE){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: RESP_CODE):%.*s\n", i, (int)field_size, field);
|
||
#endif
|
||
|
||
if(unlikely(field[0] == '-' && field_size == 1)){
|
||
log_line_parsed->resp_code = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
goto next_item;
|
||
}
|
||
|
||
char resp_code_d[REQ_RESP_CODE_MAX_LEN];
|
||
snprintfz( resp_code_d, REQ_RESP_CODE_MAX_LEN, "%.*s", (int)field_size, field);
|
||
|
||
if(likely(str2int(&log_line_parsed->resp_code, resp_code_d, 10) == STR2XX_SUCCESS)){
|
||
if(verify){
|
||
/* rfc7231
|
||
* Informational responses (100–199),
|
||
* Successful responses (200–299),
|
||
* Redirects (300–399),
|
||
* Client errors (400–499),
|
||
* Server errors (500–599). */
|
||
if(unlikely(log_line_parsed->resp_code < 100 || log_line_parsed->resp_code > 599)){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("RESP_CODE is invalid (<100 or >599)");
|
||
#endif
|
||
log_line_parsed->resp_code = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
}
|
||
else{
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("Error while extracting RESP_CODE from string");
|
||
#endif
|
||
log_line_parsed->resp_code = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted RESP_CODE:%d", log_line_parsed->resp_code);
|
||
#endif
|
||
|
||
goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == RESP_SIZE){
|
||
/* TODO: Differentiate between '-' or 0 and an invalid response size.
|
||
* right now, all these will set resp_size == 0 */
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: RESP_SIZE):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
char resp_size_d[REQ_RESP_SIZE_MAX_LEN];
|
||
snprintfz( resp_size_d, REQ_RESP_SIZE_MAX_LEN, "%.*s", (int)field_size, field);
|
||
|
||
if(field[0] == '-' && field_size == 1) {
|
||
log_line_parsed->resp_size = 0; // Response size can be '-'
|
||
}
|
||
else if(likely(str2int(&log_line_parsed->resp_size, resp_size_d, 10) == STR2XX_SUCCESS)){
|
||
if(verify){
|
||
if(unlikely(log_line_parsed->resp_size < 0)){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("RESP_SIZE is invalid (<0)");
|
||
#endif
|
||
log_line_parsed->resp_size = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
}
|
||
else {
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("Error while extracting RESP_SIZE from string");
|
||
#endif
|
||
log_line_parsed->resp_size = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted RESP_SIZE:%d", log_line_parsed->resp_size);
|
||
#endif
|
||
|
||
goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == UPS_RESP_TIME){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: UPS_RESP_TIME):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
if(field[0] == '-' && field_size == 1) {
|
||
log_line_parsed->ups_resp_time = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
goto next_item;
|
||
}
|
||
|
||
/* Times of several responses are separated by commas and colons. Following the
|
||
* Go parser implementation, where only the first one is kept, the others are
|
||
* discarded. Also, there must be no space in between them. Needs testing... */
|
||
char *pch = memchr(field, ',', field_size);
|
||
if(pch) field_size = pch - field;
|
||
|
||
float f = 0;
|
||
|
||
char ups_resp_time_d[UPS_RESP_TIME_MAX_LEN];
|
||
snprintfz( ups_resp_time_d, UPS_RESP_TIME_MAX_LEN, "%.*s", (int)field_size, field);
|
||
|
||
if(memchr(field, '.', field_size)){ // nginx time is in seconds with a milliseconds resolution.
|
||
if(likely(str2float(&f, ups_resp_time_d) == STR2XX_SUCCESS)){
|
||
log_line_parsed->ups_resp_time = (int) (f * 1.0E6);
|
||
}
|
||
else {
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("Error while extracting UPS_RESP_TIME from string");
|
||
#endif
|
||
log_line_parsed->ups_resp_time = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
else{ // unlike in the REQ_PROC_TIME case, apache doesn't have an equivalent here
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("Error while extracting UPS_RESP_TIME from string");
|
||
#endif
|
||
log_line_parsed->ups_resp_time = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
if(verify){
|
||
if(unlikely(log_line_parsed->ups_resp_time < 0)){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("UPS_RESP_TIME is invalid (<0)");
|
||
#endif
|
||
log_line_parsed->ups_resp_time = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted UPS_RESP_TIME:%d", log_line_parsed->ups_resp_time);
|
||
#endif
|
||
|
||
goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == SSL_PROTO){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: SSL_PROTO):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
if(field[0] == '-' && field_size == 1) {
|
||
log_line_parsed->ssl_proto[0] = '\0';
|
||
log_line_parsed->parsing_errors++;
|
||
goto next_item;
|
||
}
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "SSL_PROTO field size:%zu", field_size);
|
||
#endif
|
||
|
||
snprintfz( log_line_parsed->ssl_proto, SSL_PROTO_MAX_LEN, "%.*s", (int)field_size, field);
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "log_line_parsed->ssl_proto:%s", log_line_parsed->ssl_proto);
|
||
#endif
|
||
|
||
if(verify){
|
||
if(unlikely(strcmp(log_line_parsed->ssl_proto, "TLSv1") &&
|
||
strcmp(log_line_parsed->ssl_proto, "TLSv1.1") &&
|
||
strcmp(log_line_parsed->ssl_proto, "TLSv1.2") &&
|
||
strcmp(log_line_parsed->ssl_proto, "TLSv1.3") &&
|
||
strcmp(log_line_parsed->ssl_proto, "SSLv2") &&
|
||
strcmp(log_line_parsed->ssl_proto, "SSLv3"))) {
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("SSL_PROTO is invalid");
|
||
#endif
|
||
log_line_parsed->ssl_proto[0] = '\0';
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
}
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted SSL_PROTO:%s", log_line_parsed->ssl_proto);
|
||
#endif
|
||
|
||
goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == SSL_CIPHER_SUITE){
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: SSL_CIPHER_SUITE):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
if(field[0] == '-' && field_size == 1) {
|
||
log_line_parsed->ssl_cipher[0] = '\0';
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
|
||
snprintfz( log_line_parsed->ssl_cipher, SSL_CIPHER_SUITE_MAX_LEN, "%.*s", (int)field_size, field);
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "before: SSL_CIPHER_SUITE:%s", log_line_parsed->ssl_cipher);
|
||
#endif
|
||
|
||
if(verify){
|
||
int regex_rc = regexec(&cipher_suite_regex, log_line_parsed->ssl_cipher, 0, NULL, 0);
|
||
if (likely(regex_rc == 0)){/* do nothing */}
|
||
else if (unlikely(regex_rc == REG_NOMATCH)) {
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
collector_error("SSL_CIPHER_SUITE is invalid");
|
||
#endif
|
||
log_line_parsed->ssl_cipher[0] = '\0';
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
else {
|
||
size_t err_msg_size = regerror(regex_rc, &cipher_suite_regex, NULL, 0);
|
||
char *err_msg = mallocz(err_msg_size);
|
||
regerror(regex_rc, &cipher_suite_regex, err_msg, err_msg_size);
|
||
collector_error("cipher_suite_regex error:%s", err_msg);
|
||
freez(err_msg);
|
||
m_assert(0, "cipher_suite_regex has failed");
|
||
}
|
||
}
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Extracted SSL_CIPHER_SUITE:%s", log_line_parsed->ssl_cipher);
|
||
#endif
|
||
|
||
goto next_item;
|
||
}
|
||
|
||
if(fields_format[i] == TIME){
|
||
|
||
if(wblp_config->skip_timestamp_parsing){
|
||
while(*offset != ']') {offset++;};
|
||
i++;
|
||
offset++;
|
||
goto next_item;
|
||
}
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Item %d (type: TIME - 1st of 2 fields):%.*s", i, (int)field_size, field);
|
||
#endif
|
||
|
||
// TODO: What if TIME is invalid?
|
||
// if(field[0] == '-' && field_size == 1) {
|
||
// log_line_parsed->timestamp = 0;
|
||
// log_line_parsed->parsing_errors++;
|
||
// ++i;
|
||
// goto next_item;
|
||
// }
|
||
|
||
char *datetime = field;
|
||
|
||
if(memchr(datetime, '[', field_size)) {
|
||
datetime++;
|
||
field_size--;
|
||
}
|
||
|
||
struct tm ltm = {0};
|
||
char *tz_str = strptime(datetime, "%d/%b/%Y:%H:%M:%S", <m);
|
||
if(unlikely(tz_str == NULL)){
|
||
collector_error("TIME datetime parsing failed");
|
||
log_line_parsed->timestamp = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
goto next_item;
|
||
}
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "strptime() result: year:%d mon:%d day:%d hour:%d min:%d sec:%d",
|
||
ltm.tm_year, ltm.tm_mon, ltm.tm_mday,
|
||
ltm.tm_hour, ltm.tm_min, ltm.tm_sec);
|
||
#endif
|
||
|
||
/* Deal with 2nd part of datetime i.e. timezone */
|
||
|
||
m_assert(*tz_str == ' ', "Invalid TIME timezone");
|
||
++tz_str;
|
||
m_assert(*tz_str == '+' || *tz_str == '-', "Invalid TIME timezone");
|
||
char tz_sign = *tz_str;
|
||
|
||
char *tz_str_end = ++tz_str;
|
||
while(*tz_str_end != ']') tz_str_end++;
|
||
|
||
m_assert(tz_str_end - tz_str == 4, "Invalid TIME timezone string length");
|
||
|
||
char tz_num[4];
|
||
memcpy(tz_num, tz_str, tz_str_end - tz_str);
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "TIME 2nd part: %.*s", (int)(tz_str_end - tz_str), tz_str);
|
||
#endif
|
||
|
||
long int tz = strtol(tz_str, NULL, 10);
|
||
long int tz_h = tz / 100;
|
||
long int tz_m = tz % 100;
|
||
int64_t tz_adj = (int64_t) tz_h * 3600 + (int64_t) tz_m * 60;
|
||
if(tz_sign == '+') tz_adj *= -1; // if timezone is positive, we need to subtract it to get GMT
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
debug_log( "Timezone: int:%ld, hrs:%ld, mins:%ld", tz, tz_h, tz_m);
|
||
#endif
|
||
|
||
if(-1 == (log_line_parsed->timestamp = timegm(<m) + tz_adj)){
|
||
collector_error("TIME datetime parsing failed");
|
||
log_line_parsed->timestamp = 0;
|
||
log_line_parsed->parsing_errors++;
|
||
}
|
||
|
||
#if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
|
||
char tb[80];
|
||
strftime(tb, sizeof(tb), "%c", <m );
|
||
debug_log( "Extracted TIME:%ld", log_line_parsed->timestamp);
|
||
debug_log( "Extracted TIME string:%s", tb);
|
||
#endif
|
||
|
||
offset = tz_str_end + 1; // WARNING! this modifies the offset but it is required in the TIME case.
|
||
++i; // TIME takes up 2 fields_format[] spaces, so skip the next one
|
||
|
||
goto next_item;
|
||
}
|
||
|
||
next_item:
|
||
/* If offset is located beyond the end of the line, terminate parsing */
|
||
if(unlikely((size_t) (offset - line) >= line_len)) break;
|
||
|
||
field = ++offset;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* @brief Extract web log metrics from a group of web log fields.
|
||
* @param[in] parser_config Configuration specifying how and what web log
|
||
* metrics to extract.
|
||
* @param[in] line_parsed Web logs fields extracted from a web log line.
|
||
* @param[out] metrics Web logs metrics exctracted from the \p line_parsed
|
||
* web log fields, using the \p parser_config configuration.
|
||
*/
|
||
void extract_web_log_metrics(Log_parser_config_t *parser_config,
|
||
Log_line_parsed_t *line_parsed,
|
||
Web_log_metrics_t *metrics){
|
||
|
||
/* Extract number of parsed lines */
|
||
/* NOTE: Commented out as it is done in flb_collect_logs_cb() now. */
|
||
// metrics->num_lines++;
|
||
|
||
/* Extract vhost */
|
||
// TODO: Reduce number of reallocs
|
||
if((parser_config->chart_config & CHART_VHOST) && *line_parsed->vhost){
|
||
int i;
|
||
for(i = 0; i < metrics->vhost_arr.size; i++){
|
||
if(!strcmp(metrics->vhost_arr.vhosts[i].name, line_parsed->vhost)){
|
||
metrics->vhost_arr.vhosts[i].count++;
|
||
break;
|
||
}
|
||
}
|
||
if(metrics->vhost_arr.size == i){ // Vhost not found in array - need to append
|
||
metrics->vhost_arr.size++;
|
||
if(metrics->vhost_arr.size >= metrics->vhost_arr.size_max){
|
||
metrics->vhost_arr.size_max = metrics->vhost_arr.size * VHOST_BUFFS_SCALE_FACTOR + 1;
|
||
metrics->vhost_arr.vhosts = reallocz( metrics->vhost_arr.vhosts,
|
||
metrics->vhost_arr.size_max * sizeof(struct log_parser_metrics_vhost));
|
||
}
|
||
snprintf(metrics->vhost_arr.vhosts[metrics->vhost_arr.size - 1].name, VHOST_MAX_LEN, "%s", line_parsed->vhost);
|
||
metrics->vhost_arr.vhosts[metrics->vhost_arr.size - 1].count = 1;
|
||
}
|
||
}
|
||
|
||
/* Extract port */
|
||
// TODO: Reduce number of reallocs
|
||
if((parser_config->chart_config & CHART_PORT) && line_parsed->port){
|
||
int i;
|
||
for(i = 0; i < metrics->port_arr.size; i++){
|
||
if(metrics->port_arr.ports[i].port == line_parsed->port){
|
||
metrics->port_arr.ports[i].count++;
|
||
break;
|
||
}
|
||
}
|
||
if(metrics->port_arr.size == i){ // Port not found in array - need to append
|
||
metrics->port_arr.size++;
|
||
if(metrics->port_arr.size >= metrics->port_arr.size_max){
|
||
metrics->port_arr.size_max = metrics->port_arr.size * PORT_BUFFS_SCALE_FACTOR + 1;
|
||
metrics->port_arr.ports = reallocz( metrics->port_arr.ports,
|
||
metrics->port_arr.size_max * sizeof(struct log_parser_metrics_port));
|
||
}
|
||
if(line_parsed->port == WEB_LOG_INVALID_PORT)
|
||
snprintfz(metrics->port_arr.ports[metrics->port_arr.size - 1].name, PORT_MAX_LEN, WEB_LOG_INVALID_PORT_STR);
|
||
else
|
||
snprintfz(metrics->port_arr.ports[metrics->port_arr.size - 1].name, PORT_MAX_LEN, "%d", line_parsed->port);
|
||
metrics->port_arr.ports[metrics->port_arr.size - 1].port = line_parsed->port;
|
||
metrics->port_arr.ports[metrics->port_arr.size - 1].count = 1;
|
||
}
|
||
}
|
||
|
||
/* Extract client metrics */
|
||
if(( parser_config->chart_config & ( CHART_IP_VERSION | CHART_REQ_CLIENT_CURRENT | CHART_REQ_CLIENT_ALL_TIME)) && *line_parsed->req_client) {
|
||
|
||
/* Invalid IP version */
|
||
if(unlikely(!strcmp(line_parsed->req_client, WEB_LOG_INVALID_CLIENT_IP_STR))){
|
||
if(parser_config->chart_config & CHART_IP_VERSION) metrics->ip_ver.invalid++;
|
||
}
|
||
|
||
else if(strchr(line_parsed->req_client, ':')){
|
||
/* IPv6 version */
|
||
if(parser_config->chart_config & CHART_IP_VERSION) metrics->ip_ver.v6++;
|
||
|
||
/* Unique Client IPv6 Address current poll */
|
||
if(parser_config->chart_config & CHART_REQ_CLIENT_CURRENT){
|
||
int i;
|
||
for(i = 0; i < metrics->req_clients_current_arr.ipv6_size; i++){
|
||
if(!strcmp(metrics->req_clients_current_arr.ipv6_req_clients[i], line_parsed->req_client)) break;
|
||
}
|
||
if(metrics->req_clients_current_arr.ipv6_size == i){ // Req client not found in array - need to append
|
||
metrics->req_clients_current_arr.ipv6_size++;
|
||
metrics->req_clients_current_arr.ipv6_req_clients = reallocz(metrics->req_clients_current_arr.ipv6_req_clients,
|
||
metrics->req_clients_current_arr.ipv6_size * sizeof(*metrics->req_clients_current_arr.ipv6_req_clients));
|
||
snprintf(metrics->req_clients_current_arr.ipv6_req_clients[metrics->req_clients_current_arr.ipv6_size - 1],
|
||
REQ_CLIENT_MAX_LEN, "%s", line_parsed->req_client);
|
||
}
|
||
}
|
||
|
||
/* Unique Client IPv6 Address all-time */
|
||
if(parser_config->chart_config & CHART_REQ_CLIENT_ALL_TIME){
|
||
int i;
|
||
for(i = 0; i < metrics->req_clients_alltime_arr.ipv6_size; i++){
|
||
if(!strcmp(metrics->req_clients_alltime_arr.ipv6_req_clients[i], line_parsed->req_client)) break;
|
||
}
|
||
if(metrics->req_clients_alltime_arr.ipv6_size == i){ // Req client not found in array - need to append
|
||
metrics->req_clients_alltime_arr.ipv6_size++;
|
||
metrics->req_clients_alltime_arr.ipv6_req_clients = reallocz(metrics->req_clients_alltime_arr.ipv6_req_clients,
|
||
metrics->req_clients_alltime_arr.ipv6_size * sizeof(*metrics->req_clients_alltime_arr.ipv6_req_clients));
|
||
snprintf(metrics->req_clients_alltime_arr.ipv6_req_clients[metrics->req_clients_alltime_arr.ipv6_size - 1],
|
||
REQ_CLIENT_MAX_LEN, "%s", line_parsed->req_client);
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
else{
|
||
/* IPv4 version */
|
||
if(parser_config->chart_config & CHART_IP_VERSION) metrics->ip_ver.v4++;
|
||
|
||
/* Unique Client IPv4 Address current poll */
|
||
if(parser_config->chart_config & CHART_REQ_CLIENT_CURRENT){
|
||
int i;
|
||
for(i = 0; i < metrics->req_clients_current_arr.ipv4_size; i++){
|
||
if(!strcmp(metrics->req_clients_current_arr.ipv4_req_clients[i], line_parsed->req_client)) break;
|
||
}
|
||
if(metrics->req_clients_current_arr.ipv4_size == i){ // Req client not found in array - need to append
|
||
metrics->req_clients_current_arr.ipv4_size++;
|
||
metrics->req_clients_current_arr.ipv4_req_clients = reallocz(metrics->req_clients_current_arr.ipv4_req_clients,
|
||
metrics->req_clients_current_arr.ipv4_size * sizeof(*metrics->req_clients_current_arr.ipv4_req_clients));
|
||
snprintf(metrics->req_clients_current_arr.ipv4_req_clients[metrics->req_clients_current_arr.ipv4_size - 1],
|
||
REQ_CLIENT_MAX_LEN, "%s", line_parsed->req_client);
|
||
}
|
||
}
|
||
|
||
/* Unique Client IPv4 Address all-time */
|
||
if(parser_config->chart_config & CHART_REQ_CLIENT_ALL_TIME){
|
||
int i;
|
||
for(i = 0; i < metrics->req_clients_alltime_arr.ipv4_size; i++){
|
||
if(!strcmp(metrics->req_clients_alltime_arr.ipv4_req_clients[i], line_parsed->req_client)) break;
|
||
}
|
||
if(metrics->req_clients_alltime_arr.ipv4_size == i){ // Req client not found in array - need to append
|
||
metrics->req_clients_alltime_arr.ipv4_size++;
|
||
metrics->req_clients_alltime_arr.ipv4_req_clients = reallocz(metrics->req_clients_alltime_arr.ipv4_req_clients,
|
||
metrics->req_clients_alltime_arr.ipv4_size * sizeof(*metrics->req_clients_alltime_arr.ipv4_req_clients));
|
||
snprintf(metrics->req_clients_alltime_arr.ipv4_req_clients[metrics->req_clients_alltime_arr.ipv4_size - 1],
|
||
REQ_CLIENT_MAX_LEN, "%s", line_parsed->req_client);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Extract request method */
|
||
if(parser_config->chart_config & CHART_REQ_METHODS){
|
||
for(int i = 0; i < REQ_METHOD_ARR_SIZE; i++){
|
||
if(!strcmp(line_parsed->req_method, req_method_str[i])){
|
||
metrics->req_method[i]++;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Extract request protocol */
|
||
if(parser_config->chart_config & CHART_REQ_PROTO){
|
||
if(!strcmp(line_parsed->req_proto, "1") || !strcmp(line_parsed->req_proto, "1.0")) metrics->req_proto.http_1++;
|
||
else if(!strcmp(line_parsed->req_proto, "1.1")) metrics->req_proto.http_1_1++;
|
||
else if(!strcmp(line_parsed->req_proto, "2") || !strcmp(line_parsed->req_proto, "2.0")) metrics->req_proto.http_2++;
|
||
else metrics->req_proto.other++;
|
||
}
|
||
|
||
/* Extract bytes received and sent */
|
||
if(parser_config->chart_config & CHART_BANDWIDTH){
|
||
metrics->bandwidth.req_size += line_parsed->req_size;
|
||
metrics->bandwidth.resp_size += line_parsed->resp_size;
|
||
}
|
||
|
||
/* Extract request processing time */
|
||
if((parser_config->chart_config & CHART_REQ_PROC_TIME) && line_parsed->req_proc_time){
|
||
if(line_parsed->req_proc_time < metrics->req_proc_time.min || metrics->req_proc_time.min == 0){
|
||
metrics->req_proc_time.min = line_parsed->req_proc_time;
|
||
}
|
||
if(line_parsed->req_proc_time > metrics->req_proc_time.max || metrics->req_proc_time.max == 0){
|
||
metrics->req_proc_time.max = line_parsed->req_proc_time;
|
||
}
|
||
metrics->req_proc_time.sum += line_parsed->req_proc_time;
|
||
metrics->req_proc_time.count++;
|
||
}
|
||
|
||
/* Extract response code family, response code & response code type */
|
||
if(parser_config->chart_config & (CHART_RESP_CODE_FAMILY | CHART_RESP_CODE | CHART_RESP_CODE_TYPE)){
|
||
switch(line_parsed->resp_code / 100){
|
||
/* Note: 304 and 401 should be treated as resp_success */
|
||
case 1:
|
||
metrics->resp_code_family.resp_1xx++;
|
||
metrics->resp_code[line_parsed->resp_code - 100]++;
|
||
metrics->resp_code_type.resp_success++;
|
||
break;
|
||
case 2:
|
||
metrics->resp_code_family.resp_2xx++;
|
||
metrics->resp_code[line_parsed->resp_code - 100]++;
|
||
metrics->resp_code_type.resp_success++;
|
||
break;
|
||
case 3:
|
||
metrics->resp_code_family.resp_3xx++;
|
||
metrics->resp_code[line_parsed->resp_code - 100]++;
|
||
if(line_parsed->resp_code == 304) metrics->resp_code_type.resp_success++;
|
||
else metrics->resp_code_type.resp_redirect++;
|
||
break;
|
||
case 4:
|
||
metrics->resp_code_family.resp_4xx++;
|
||
metrics->resp_code[line_parsed->resp_code - 100]++;
|
||
if(line_parsed->resp_code == 401) metrics->resp_code_type.resp_success++;
|
||
else metrics->resp_code_type.resp_bad++;
|
||
break;
|
||
case 5:
|
||
metrics->resp_code_family.resp_5xx++;
|
||
metrics->resp_code[line_parsed->resp_code - 100]++;
|
||
metrics->resp_code_type.resp_error++;
|
||
break;
|
||
default:
|
||
metrics->resp_code_family.other++;
|
||
metrics->resp_code[RESP_CODE_ARR_SIZE - 1]++;
|
||
metrics->resp_code_type.other++;
|
||
break;
|
||
}
|
||
}
|
||
|
||
/* Extract SSL protocol */
|
||
if(parser_config->chart_config & CHART_SSL_PROTO){
|
||
if(!strcmp(line_parsed->ssl_proto, "TLSv1")) metrics->ssl_proto.tlsv1++;
|
||
else if(!strcmp(line_parsed->ssl_proto, "TLSv1.1")) metrics->ssl_proto.tlsv1_1++;
|
||
else if(!strcmp(line_parsed->ssl_proto, "TLSv1.2")) metrics->ssl_proto.tlsv1_2++;
|
||
else if(!strcmp(line_parsed->ssl_proto, "TLSv1.3")) metrics->ssl_proto.tlsv1_3++;
|
||
else if(!strcmp(line_parsed->ssl_proto, "SSLv2")) metrics->ssl_proto.sslv2++;
|
||
else if(!strcmp(line_parsed->ssl_proto, "SSLv3")) metrics->ssl_proto.sslv3++;
|
||
else metrics->ssl_proto.other++;
|
||
}
|
||
|
||
/* Extract SSL cipher suite */
|
||
// TODO: Reduce number of reallocs
|
||
if((parser_config->chart_config & CHART_SSL_CIPHER) && *line_parsed->ssl_cipher){
|
||
int i;
|
||
for(i = 0; i < metrics->ssl_cipher_arr.size; i++){
|
||
if(!strcmp(metrics->ssl_cipher_arr.ssl_ciphers[i].name, line_parsed->ssl_cipher)){
|
||
metrics->ssl_cipher_arr.ssl_ciphers[i].count++;
|
||
break;
|
||
}
|
||
}
|
||
if(metrics->ssl_cipher_arr.size == i){ // SSL cipher suite not found in array - need to append
|
||
metrics->ssl_cipher_arr.size++;
|
||
metrics->ssl_cipher_arr.ssl_ciphers = reallocz(metrics->ssl_cipher_arr.ssl_ciphers,
|
||
metrics->ssl_cipher_arr.size * sizeof(struct log_parser_metrics_ssl_cipher));
|
||
snprintf( metrics->ssl_cipher_arr.ssl_ciphers[metrics->ssl_cipher_arr.size - 1].name,
|
||
SSL_CIPHER_SUITE_MAX_LEN, "%s", line_parsed->ssl_cipher);
|
||
metrics->ssl_cipher_arr.ssl_ciphers[metrics->ssl_cipher_arr.size - 1].count = 1;
|
||
}
|
||
}
|
||
|
||
metrics->timestamp = line_parsed->timestamp;
|
||
}
|
||
|
||
/**
|
||
* @brief Try to automatically detect the configuration for a web log parser.
|
||
* @details It tries to automatically detect the configuration to be used for
|
||
* a web log parser, by parsing a single web log line record and trying to pick
|
||
* a matching configuration (from a static list of predefined ones.)
|
||
* @param[in] line Null-terminated web log line to use in guessing the configuration.
|
||
* @param[in] delimiter Delimiter used to break down \p line in separate fields.
|
||
* @returns Pointer to the web log parser configuration if automatic detection
|
||
* was sucessful, otherwise NULL.
|
||
*/
|
||
Web_log_parser_config_t *auto_detect_web_log_parser_config(char *line, const char delimiter){
|
||
for(int i = 0; csv_auto_format_guess_matrix[i] != NULL; i++){
|
||
Web_log_parser_config_t *wblp_config = read_web_log_parser_config(csv_auto_format_guess_matrix[i], delimiter);
|
||
if(count_fields(line, delimiter) == wblp_config->num_fields){
|
||
wblp_config->verify_parsed_logs = 1; // Verification must be turned on to be able to pick up parsing_errors
|
||
Log_line_parsed_t line_parsed = (Log_line_parsed_t) {0};
|
||
parse_web_log_line(wblp_config, line, strlen(line), &line_parsed);
|
||
if(line_parsed.parsing_errors == 0){
|
||
return wblp_config;
|
||
}
|
||
}
|
||
|
||
freez(wblp_config->fields);
|
||
freez(wblp_config);
|
||
}
|
||
return NULL;
|
||
}
|