diff --git a/src/libnetdata/line_splitter/fuzzer.sh b/src/libnetdata/line_splitter/fuzzer.sh new file mode 100755 index 0000000000..1fad49ba70 --- /dev/null +++ b/src/libnetdata/line_splitter/fuzzer.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +set -exu -o pipefail + +CFLAGS="-Wall -Wextra -O2 -g -fsanitize=fuzzer,undefined,address" + +re2c -b re2c_line_splitter.re.c -o re2c_line_splitter.c +clang $CFLAGS -c re2c_line_splitter.c -o re2c_line_splitter.o +clang++ $CFLAGS -c lines_splitter_pluginsd_fuzzer.cc -o lines_splitter_pluginsd_fuzzer.o +clang++ $CFLAGS re2c_line_splitter.o lines_splitter_pluginsd_fuzzer.o -o pluginsd_line_splitter_fuzzer + +mkdir -p /tmp/corpus +echo 'simple word test' > /tmp/corpus/simple.txt +echo '"quoted string" test' > /tmp/corpus/quoted.txt +echo "'single quoted' test" > /tmp/corpus/single_quoted.txt +echo 'word1=word2 word3="quoted value"' > /tmp/corpus/mixed.txt +./pluginsd_line_splitter_fuzzer -workers=12 -jobs=16 /tmp/corpus diff --git a/src/libnetdata/line_splitter/lines_splitter_pluginsd_fuzzer.cc b/src/libnetdata/line_splitter/lines_splitter_pluginsd_fuzzer.cc new file mode 100644 index 0000000000..229b28a9b9 --- /dev/null +++ b/src/libnetdata/line_splitter/lines_splitter_pluginsd_fuzzer.cc @@ -0,0 +1,49 @@ +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <vector> +#include <cassert> + +// Function declaration +extern "C" size_t quoted_strings_splitter_pluginsd_re2c(char *start, char **words, size_t max_words); + + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) +{ + if (Size == 0) + return 0; + + const size_t MAX_WORDS = 5; + + std::vector<char> buffer(Size + 1); + std::memcpy(buffer.data(), Data, Size); + buffer[Size] = '\0'; // Ensure null-termination + + // Create array to store word pointers + std::vector<char *> words(MAX_WORDS, nullptr); + + // Run the splitter + size_t word_count = quoted_strings_splitter_pluginsd_re2c(buffer.data(), words.data(), MAX_WORDS); + + // Basic invariant checks + assert(word_count <= MAX_WORDS && "Returned more words than max_words"); + + // Verify all returned word pointers are within our buffer + for (size_t i = 0; i < word_count; i++) { + if (words[i]) { + assert( + words[i] >= buffer.data() && words[i] < (buffer.data() + buffer.size()) && + "Word pointer outside buffer bounds"); + + // Verify null-termination of each word + assert(strlen(words[i]) < buffer.size() && "Word not properly null-terminated"); + } + } + + // Verify remaining array elements are null + for (size_t i = word_count; i < MAX_WORDS; i++) { + assert(words[i] == nullptr && "Non-null pointer beyond word_count"); + } + + return 0; +} diff --git a/src/libnetdata/line_splitter/re2c_line_splitter.c b/src/libnetdata/line_splitter/re2c_line_splitter.c index ae6f66624c..2452550ffa 100644 --- a/src/libnetdata/line_splitter/re2c_line_splitter.c +++ b/src/libnetdata/line_splitter/re2c_line_splitter.c @@ -1,8 +1,8 @@ -/* Generated by re2c 3.0 on Tue Nov 26 12:06:58 2024 */ -#line 1 "src/libnetdata/line_splitter/re2c_line_splitter.re.c" -#line 4 "src/libnetdata/line_splitter/re2c_line_splitter.c" -#define YYMAXFILL 1 -#line 1 "src/libnetdata/line_splitter/re2c_line_splitter.re.c" +/* Generated by re2c 3.0 on Tue Dec 3 14:14:31 2024 */ +#line 1 "re2c_line_splitter.re.c" +#line 4 "re2c_line_splitter.c" +#define YYMAXFILL 2 +#line 1 "re2c_line_splitter.re.c" #include <stddef.h> @@ -10,15 +10,16 @@ size_t quoted_strings_splitter_pluginsd_re2c(char *start, char **words, size_t max_words) { size_t count = 0; + char *YYMARKER = NULL; - const char *YYCURSOR = start; + char *YYCURSOR = start; for (;;) { -#line 18 "src/libnetdata/line_splitter/re2c_line_splitter.c" +#line 19 "re2c_line_splitter.c" { char yych; static const unsigned char yybm[] = { - 192, 208, 208, 208, 208, 208, 208, 208, + 0, 208, 208, 208, 208, 208, 208, 208, 208, 224, 224, 224, 224, 224, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, @@ -58,56 +59,75 @@ size_t quoted_strings_splitter_pluginsd_re2c(char *start, char **words, size_t m if (yych <= 0x00) goto yy1; if (yych <= ' ') goto yy3; if (yych <= '"') goto yy4; - if (yych <= '\'') goto yy5; + if (yych <= '\'') goto yy6; goto yy3; yy1: ++YYCURSOR; -#line 23 "src/libnetdata/line_splitter/re2c_line_splitter.re.c" +#line 26 "re2c_line_splitter.re.c" { + if (count < max_words) + words[count] = NULL; + return count; } -#line 70 "src/libnetdata/line_splitter/re2c_line_splitter.c" +#line 74 "re2c_line_splitter.c" yy2: yych = *++YYCURSOR; if (yybm[0+yych] & 16) { goto yy2; } -#line 35 "src/libnetdata/line_splitter/re2c_line_splitter.re.c" +#line 41 "re2c_line_splitter.re.c" { if (count == max_words) return count; - start[YYCURSOR - start] = '\0'; + if (*YYCURSOR != '\0') + *YYCURSOR++ = '\0'; + words[count++] = start; - start = (char *) ++YYCURSOR; + start = YYCURSOR; continue; } -#line 86 "src/libnetdata/line_splitter/re2c_line_splitter.c" +#line 92 "re2c_line_splitter.c" yy3: yych = *++YYCURSOR; if (yybm[0+yych] & 32) { goto yy3; } -#line 44 "src/libnetdata/line_splitter/re2c_line_splitter.re.c" +#line 52 "re2c_line_splitter.re.c" { start = (char *) YYCURSOR; continue; } -#line 97 "src/libnetdata/line_splitter/re2c_line_splitter.c" +#line 103 "re2c_line_splitter.c" yy4: - yych = *++YYCURSOR; - if (yybm[0+yych] & 64) { - goto yy4; - } - goto yy6; + yych = *(YYMARKER = ++YYCURSOR); + if (yych >= 0x01) goto yy8; yy5: - yych = *++YYCURSOR; - if (yybm[0+yych] & 128) { - goto yy5; - } +#line 21 "re2c_line_splitter.re.c" + { + if (count < max_words) + words[count] = NULL; + return count; + } +#line 114 "re2c_line_splitter.c" yy6: + yych = *(YYMARKER = ++YYCURSOR); + if (yych <= 0x00) goto yy5; + goto yy12; +yy7: + yych = *++YYCURSOR; +yy8: + if (yybm[0+yych] & 64) { + goto yy7; + } + if (yych >= 0x01) goto yy10; +yy9: + YYCURSOR = YYMARKER; + goto yy5; +yy10: ++YYCURSOR; -#line 26 "src/libnetdata/line_splitter/re2c_line_splitter.re.c" +#line 32 "re2c_line_splitter.re.c" { if (count == max_words) return count; @@ -117,9 +137,17 @@ yy6: words[count++] = start; continue; } -#line 121 "src/libnetdata/line_splitter/re2c_line_splitter.c" +#line 141 "re2c_line_splitter.c" +yy11: + yych = *++YYCURSOR; +yy12: + if (yybm[0+yych] & 128) { + goto yy11; + } + if (yych <= 0x00) goto yy9; + goto yy10; } -#line 48 "src/libnetdata/line_splitter/re2c_line_splitter.re.c" +#line 56 "re2c_line_splitter.re.c" } } diff --git a/src/libnetdata/line_splitter/re2c_line_splitter.re.c b/src/libnetdata/line_splitter/re2c_line_splitter.re.c index d5159ea4a2..62571cb887 100644 --- a/src/libnetdata/line_splitter/re2c_line_splitter.re.c +++ b/src/libnetdata/line_splitter/re2c_line_splitter.re.c @@ -5,15 +5,16 @@ size_t quoted_strings_splitter_pluginsd_re2c(char *start, char **words, size_t max_words) { size_t count = 0; + char *YYMARKER = NULL; - const char *YYCURSOR = start; + char *YYCURSOR = start; for (;;) { /*!re2c re2c:define:YYCTYPE = char; re2c:yyfill:enable = 0; - single_quotes_word = ["] [^"]* ["]; - double_quotes_word = ['] [^']* [']; + single_quotes_word = ["] [^"\x00]* ["]; + double_quotes_word = ['] [^'\x00]* [']; unquoted_word = [^= "'\t\n\v\f\r\x00]+; whitespace = [= \t\n\v\f\r]+; @@ -41,9 +42,11 @@ size_t quoted_strings_splitter_pluginsd_re2c(char *start, char **words, size_t m if (count == max_words) return count; - start[YYCURSOR - start] = '\0'; + if (*YYCURSOR != '\0') + *YYCURSOR++ = '\0'; + words[count++] = start; - start = (char *) ++YYCURSOR; + start = YYCURSOR; continue; } whitespace {