diff --git a/.packit.yaml b/.packit.yaml index 4049a2e..3ddaa8c 100644 --- a/.packit.yaml +++ b/.packit.yaml @@ -27,11 +27,9 @@ jobs: targets: - epel-all-aarch64 - epel-all-ppc64le - - epel-all-s390x - epel-all-x86_64 - fedora-all-aarch64 - fedora-all-ppc64le - - fedora-all-s390x - fedora-all-x86_64 - <<: *copr diff --git a/src/csgrep.cc b/src/csgrep.cc index e2063f0..14c8216 100644 --- a/src/csgrep.cc +++ b/src/csgrep.cc @@ -619,6 +619,7 @@ int main(int argc, char *argv[]) ("file-glob", "expand glob patterns in the names of input files") ("ignore-case,i", "ignore case when matching regular expressions") ("ignore-parser-warnings", "if enabled, parser warnings about the input files do not affect exit code") + ("record-input-locations", "if enabled, events in json will contain input_line and input_file fields referencing the original location of input file/stream") ("invert-match,v", "select defects that do not match the selected criteria") ("invert-regex,n", "invert regular expressions in all predicates") ("filter-file,f", po::value(), "read custom filtering rules from a file in JSON format"); @@ -639,7 +640,7 @@ int main(int argc, char *argv[]) p.add("input-file", -1); po::store(po::parse_command_line(argc, argv, desc), vm); - po::notify(vm); + po::notify(vm); po::options_description opts; opts.add(desc).add(hidden); @@ -735,6 +736,9 @@ int main(int argc, char *argv[]) if (vm.count("ignore-parser-warnings")) eng->setIgnoreParserWarnings(true); + if (vm.count("record-input-locations")) + eng->setRecordInputLocations(true); + bool hasError = false; // if no input file is given, read from stdin diff --git a/src/lib/defect.hh b/src/lib/defect.hh index 554b7ee..f39e5e9 100644 --- a/src/lib/defect.hh +++ b/src/lib/defect.hh @@ -49,6 +49,8 @@ struct DefEvent { std::string fileName; int line = 0; int column = 0; + int inputLine = 0; + std::string inputFile; std::string event; std::string msg; diff --git a/src/lib/instream.cc b/src/lib/instream.cc index ab9225f..7f542f4 100644 --- a/src/lib/instream.cc +++ b/src/lib/instream.cc @@ -19,9 +19,10 @@ #include "instream.hh" -InStream::InStream(const std::string &fileName, const bool silent): +InStream::InStream(const std::string &fileName, const bool silent, const bool recordInputLocations): fileName_(fileName), silent_(silent), + recordInputLocations_(recordInputLocations), str_((fileName_ == "-") ? std::cin : fileStr_) diff --git a/src/lib/instream.hh b/src/lib/instream.hh index fcbb480..038aa84 100644 --- a/src/lib/instream.hh +++ b/src/lib/instream.hh @@ -38,20 +38,23 @@ struct InFileException { class InStream { public: - InStream(const std::string &fileName, bool silent = false); + InStream(const std::string &fileName, bool silent = false, + bool recordInputLocations = false); InStream(std::istringstream &str, bool silent = false); ~InStream() = default; - const std::string& fileName() const { return fileName_; } - std::istream& str() const { return str_; } - bool silent() const { return silent_; } - bool anyError() const { return anyError_; } + const std::string& fileName() const { return fileName_; } + std::istream& str() const { return str_; } + bool silent() const { return silent_; } + bool recordInputLocations() const { return recordInputLocations_; } + bool anyError() const { return anyError_; } void handleError(const std::string &msg = "", unsigned long line = 0UL); private: const std::string fileName_; const bool silent_; + const bool recordInputLocations_ = false; bool anyError_ = false; std::ifstream fileStr_; std::istream &str_; diff --git a/src/lib/parser-cov.cc b/src/lib/parser-cov.cc index 5d4e4dc..89f49ae 100644 --- a/src/lib/parser-cov.cc +++ b/src/lib/parser-cov.cc @@ -114,8 +114,10 @@ std::ostream& operator<<(std::ostream &str, EToken code) class ErrFileLexer { public: - ErrFileLexer(std::istream &input): - lineReader_(input), + ErrFileLexer(InStream &input): + lineReader_(input.str()), + fileName_(input.fileName()), + recordInputLocations_(input.recordInputLocations()), hasError_(false) { } @@ -140,6 +142,8 @@ class ErrFileLexer { private: LineReader lineReader_; + std::string fileName_; + bool recordInputLocations_; bool hasError_; Defect def_; DefEvent evt_; @@ -181,6 +185,10 @@ EToken ErrFileLexer::readNext() evt_ = DefEvent(); evt_.event = sm[/* # */ 1]; evt_.msg = sm[/* msg */ 2]; + if (recordInputLocations_) { + evt_.inputLine = lineReader_.lineNo(); + evt_.inputFile = fileName_; + } return T_COMMENT; } @@ -202,6 +210,11 @@ EToken ErrFileLexer::readNext() evt_.event = sm[/* event */ 4]; evt_.msg = sm[/* msg */ 5]; + if (recordInputLocations_) { + evt_.inputLine = lineReader_.lineNo(); + evt_.inputFile = fileName_; + } + return T_EVENT; } @@ -523,7 +536,7 @@ struct CovParser::Private { ImpliedAttrDigger digger; Private(InStream &input_): - lexer(input_.str()), + lexer(input_), fileName(input_.fileName()), silent(input_.silent()), hasError(false), diff --git a/src/lib/parser-gcc.cc b/src/lib/parser-gcc.cc index 2f94d46..e1bbeb7 100644 --- a/src/lib/parser-gcc.cc +++ b/src/lib/parser-gcc.cc @@ -68,9 +68,11 @@ class AbstractTokenFilter: public ITokenizer { class Tokenizer: public ITokenizer { public: - Tokenizer(std::istream &input): - input_(input), - lineNo_(0) + Tokenizer(InStream &input): + input_(input.str()), + lineNo_(0), + fileName_(input.fileName()), + recordInputLocations_(input.recordInputLocations()) { } @@ -83,6 +85,8 @@ class Tokenizer: public ITokenizer { private: std::istream &input_; int lineNo_; + std::string fileName_; + bool recordInputLocations_; const RE reSideBar_ = RE("^ *((([0-9]+)? \\| )|(\\+\\+\\+ \\|\\+)).*$"); @@ -131,6 +135,11 @@ EToken Tokenizer::readNext(DefEvent *pEvt) *pEvt = DefEvent(); pEvt->msg = line; + if (recordInputLocations_) { + pEvt->inputLine = lineNo_; + pEvt->inputFile = fileName_; + } + // check for line markers produced by gcc-9.2.1 (a.k.a. sidebar) if (boost::regex_match(pEvt->msg, reSideBar_)) // xxx.c:2:1: note: include '' or provide a declaration... @@ -387,7 +396,7 @@ EToken MultilineConcatenator::readNext(DefEvent *pEvt) class BasicGccParser { public: BasicGccParser(InStream &input): - rawTokenizer_(input.str()), + rawTokenizer_(input), noiseFilter_(&rawTokenizer_), markerConverter_(&noiseFilter_), tokenizer_(&markerConverter_), @@ -535,6 +544,7 @@ bool BasicGccParser::getNext(Defect *pDef) DefEvent evt; const EToken tok = tokenizer_.readNext(&evt); + switch (tok) { case T_NULL: if (!hasKeyEvent_ && !defCurrent_.events.empty()) @@ -828,7 +838,7 @@ bool GccParser::getNext(Defect *pDef) while (d->core.getNext(&d->lastDef) && d->tryMerge(pDef)) ; - // initialize verbosityLevel + // initialize verbosityLevel // FIXME: similar code to KeyEventDigger::initVerbosity() TEvtList &evtList = pDef->events; const unsigned evtCount = evtList.size(); diff --git a/src/lib/parser-json-simple.cc b/src/lib/parser-json-simple.cc index 9b97367..ce85da9 100644 --- a/src/lib/parser-json-simple.cc +++ b/src/lib/parser-json-simple.cc @@ -73,6 +73,8 @@ SimpleTreeDecoder::Private::Private(InStream &input): "event", "file_name", "h_size", + "input_file", + "input_line", "line", "message", "v_size", @@ -148,6 +150,8 @@ bool SimpleTreeDecoder::readNode(Defect *def) evt.column = valueOf(evtNode, "column"); evt.hSize = valueOf(evtNode, "h_size"); evt.vSize = valueOf(evtNode, "v_size"); + evt.inputFile = valueOf(evtNode, "input_file"); + evt.inputLine = valueOf(evtNode, "input_line"); evt.event = valueOf(evtNode, "event"); evt.msg = valueOf(evtNode, "message"); evt.verbosityLevel = valueOf(evtNode, "verbosity_level", -1); @@ -189,4 +193,3 @@ bool SimpleTreeDecoder::readNode(Defect *def) return true; } - diff --git a/src/lib/writer-json-simple.cc b/src/lib/writer-json-simple.cc index daaf857..dc6e8eb 100644 --- a/src/lib/writer-json-simple.cc +++ b/src/lib/writer-json-simple.cc @@ -40,7 +40,7 @@ static array simpleEncodeEvents(const TEvtList &events) for (const DefEvent &evt : events) { object evtNode; - // describe the location + // describe the location from the source code evtNode["file_name"] = evt.fileName; evtNode["line"] = evt.line; if (0 < evt.column) @@ -50,6 +50,12 @@ static array simpleEncodeEvents(const TEvtList &events) if (0 < evt.vSize) evtNode["v_size"] = evt.vSize; + // describe the location from the compilation error log/input + if (!evt.inputFile.empty()) + evtNode["input_file"] = evt.inputFile; + if (0 < evt.inputLine) + evtNode["input_line"] = evt.inputLine; + // describe the event evtNode["event"] = evt.event; evtNode["message"] = sanitizeUTF8(evt.msg); diff --git a/src/lib/writer.cc b/src/lib/writer.cc index 8b34384..1ea3030 100644 --- a/src/lib/writer.cc +++ b/src/lib/writer.cc @@ -56,7 +56,7 @@ bool AbstractWriter::handleFile(InStream &input) bool AbstractWriter::handleFile(const std::string &fileName, bool silent) { try { - InStream str(fileName, silent); + InStream str(fileName, silent, recordInputLocations_); return this->handleFile(str); } catch (const InFileException &e) { diff --git a/src/lib/writer.hh b/src/lib/writer.hh index a702c72..42e655b 100644 --- a/src/lib/writer.hh +++ b/src/lib/writer.hh @@ -62,11 +62,15 @@ class AbstractWriter { ignoreParserWarnings_ = val; } + void setRecordInputLocations(const bool val) { + recordInputLocations_ = val; + } private: EFileFormat inputFormat_ = FF_INVALID; const TScanProps emptyProps_{}; bool ignoreParserWarnings_ = false; + bool recordInputLocations_ = false; }; using TWriterPtr = std::unique_ptr; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e183973..a37b74c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -28,6 +28,10 @@ set(diffcmd "diff -up") set(jsfilter "sed -e 's|\"version\": \"[^\"]*\"|\"version\": \"\"|g'") set(jsfilter "${jsfilter} -e 's|${CMAKE_SOURCE_DIR}/tests/csfilter-kfp/|\$PROJECT_ROOT/tests/csfilter-kfp/|'") +# strip the absolute paths for "input_file" strings in the --record-input-locations tests +# (namely csgrep/0134 test) for portability +set(jsfilter "${jsfilter} -e 's|${CMAKE_SOURCE_DIR}/tests/csgrep/0134|0134|g'") + macro(add_test_wrap test_name cmd) add_test("${test_name}" bash -c "${cmd}") set_tests_properties(${test_name} PROPERTIES ENVIRONMENT diff --git a/tests/csgrep/0134-json-record-locations-args.txt b/tests/csgrep/0134-json-record-locations-args.txt new file mode 100644 index 0000000..8979da7 --- /dev/null +++ b/tests/csgrep/0134-json-record-locations-args.txt @@ -0,0 +1 @@ +--mode=json --record-input-locations diff --git a/tests/csgrep/0134-json-record-locations-stdin.txt b/tests/csgrep/0134-json-record-locations-stdin.txt new file mode 100644 index 0000000..d022393 --- /dev/null +++ b/tests/csgrep/0134-json-record-locations-stdin.txt @@ -0,0 +1,7 @@ +Error: SHELLCHECK_WARNING: +/etc/rc.d/rc.sysinit:492:13: warning: Quote this to prevent word splitting. [SC2046] +# 490| if [ -n "$SELINUX_STATE" -a "$READONLY" != "yes" ]; then +# 491| if [ -f /.autorelabel ] || strstr "$cmdline" autorelabel ; then +# 492|-> restorecon $(awk '!/^#/ && $4 !~ /noauto/ && $2 ~ /^\// { print $2 }' /etc/fstab) >/dev/null 2>&1 +# 493| fi +# 494| fi diff --git a/tests/csgrep/0134-json-record-locations-stdout.txt b/tests/csgrep/0134-json-record-locations-stdout.txt new file mode 100644 index 0000000..0af4be3 --- /dev/null +++ b/tests/csgrep/0134-json-record-locations-stdout.txt @@ -0,0 +1,68 @@ +{ + "defects": [ + { + "checker": "SHELLCHECK_WARNING", + "language": "shell", + "tool": "shellcheck", + "hash_v1": "b6311c1fdc52c47d4279cd6650af36e6f8299960", + "key_event_idx": 0, + "events": [ + { + "file_name": "/etc/rc.d/rc.sysinit", + "line": 492, + "column": 13, + "input_file": "0134-json-record-locations-stdin.txt", + "input_line": 2, + "event": "warning", + "message": "Quote this to prevent word splitting. [SC2046]", + "verbosity_level": 0 + }, + { + "file_name": "", + "line": 0, + "input_file": "0134-json-record-locations-stdin.txt", + "input_line": 3, + "event": "#", + "message": " 490| if [ -n \"$SELINUX_STATE\" -a \"$READONLY\" != \"yes\" ]; then", + "verbosity_level": 1 + }, + { + "file_name": "", + "line": 0, + "input_file": "0134-json-record-locations-stdin.txt", + "input_line": 4, + "event": "#", + "message": " 491| if [ -f /.autorelabel ] || strstr \"$cmdline\" autorelabel ; then", + "verbosity_level": 1 + }, + { + "file_name": "", + "line": 0, + "input_file": "0134-json-record-locations-stdin.txt", + "input_line": 5, + "event": "#", + "message": " 492|-> \trestorecon $(awk '!/^#/ && $4 !~ /noauto/ && $2 ~ /^\\// { print $2 }' /etc/fstab) >/dev/null 2>&1", + "verbosity_level": 1 + }, + { + "file_name": "", + "line": 0, + "input_file": "0134-json-record-locations-stdin.txt", + "input_line": 6, + "event": "#", + "message": " 493| fi", + "verbosity_level": 1 + }, + { + "file_name": "", + "line": 0, + "input_file": "0134-json-record-locations-stdin.txt", + "input_line": 7, + "event": "#", + "message": " 494| fi", + "verbosity_level": 1 + } + ] + } + ] +} diff --git a/tests/csgrep/0135-json-record-locations-reverse-args.txt b/tests/csgrep/0135-json-record-locations-reverse-args.txt new file mode 100644 index 0000000..e69de29 diff --git a/tests/csgrep/0135-json-record-locations-reverse-stdin.txt b/tests/csgrep/0135-json-record-locations-reverse-stdin.txt new file mode 100644 index 0000000..c16cba2 --- /dev/null +++ b/tests/csgrep/0135-json-record-locations-reverse-stdin.txt @@ -0,0 +1,66 @@ +{ + "defects": [ + { + "checker": "SHELLCHECK_WARNING", + "language": "shell", + "key_event_idx": 0, + "events": [ + { + "file_name": "/etc/rc.d/rc.sysinit", + "line": 492, + "column": 13, + "input_file": "0134-json-record-locations-stdin.txt", + "input_line": 2, + "event": "warning", + "message": "Quote this to prevent word splitting. [SC2046]", + "verbosity_level": "0" + }, + { + "file_name": "", + "line": 0, + "input_file": "0134-json-record-locations-stdin.txt", + "input_line": 3, + "event": "#", + "message": " 490| if [ -n \"$SELINUX_STATE\" -a \"$READONLY\" != \"yes\" ]; then", + "verbosity_level": "1" + }, + { + "file_name": "", + "line": 0, + "input_file": "0134-json-record-locations-stdin.txt", + "input_line": 4, + "event": "#", + "message": " 491| if [ -f /.autorelabel ] || strstr \"$cmdline\" autorelabel ; then", + "verbosity_level": "1" + }, + { + "file_name": "", + "line": 0, + "input_file": "0134-json-record-locations-stdin.txt", + "input_line": 5, + "event": "#", + "message": " 492|-> \trestorecon $(awk '!/^#/ && $4 !~ /noauto/ && $2 ~ /^\\// { print $2 }' /etc/fstab) >/dev/null 2>&1", + "verbosity_level": "1" + }, + { + "file_name": "", + "line": 0, + "input_file": "0134-json-record-locations-stdin.txt", + "input_line": 6, + "event": "#", + "message": " 493| fi", + "verbosity_level": "1" + }, + { + "file_name": "", + "line": 0, + "input_file": "0134-json-record-locations-stdin.txt", + "input_line": 7, + "event": "#", + "message": " 494| fi", + "verbosity_level": "1" + } + ] + } + ] +} diff --git a/tests/csgrep/0135-json-record-locations-reverse-stdout.txt b/tests/csgrep/0135-json-record-locations-reverse-stdout.txt new file mode 100644 index 0000000..d022393 --- /dev/null +++ b/tests/csgrep/0135-json-record-locations-reverse-stdout.txt @@ -0,0 +1,7 @@ +Error: SHELLCHECK_WARNING: +/etc/rc.d/rc.sysinit:492:13: warning: Quote this to prevent word splitting. [SC2046] +# 490| if [ -n "$SELINUX_STATE" -a "$READONLY" != "yes" ]; then +# 491| if [ -f /.autorelabel ] || strstr "$cmdline" autorelabel ; then +# 492|-> restorecon $(awk '!/^#/ && $4 !~ /noauto/ && $2 ~ /^\// { print $2 }' /etc/fstab) >/dev/null 2>&1 +# 493| fi +# 494| fi diff --git a/tests/csgrep/0136-json-keep-recorded-locations-args.txt b/tests/csgrep/0136-json-keep-recorded-locations-args.txt new file mode 100644 index 0000000..7df3c95 --- /dev/null +++ b/tests/csgrep/0136-json-keep-recorded-locations-args.txt @@ -0,0 +1 @@ +--mode=json diff --git a/tests/csgrep/0136-json-keep-recorded-locations-stdin.txt b/tests/csgrep/0136-json-keep-recorded-locations-stdin.txt new file mode 100644 index 0000000..7d6cb9d --- /dev/null +++ b/tests/csgrep/0136-json-keep-recorded-locations-stdin.txt @@ -0,0 +1,66 @@ +{ + "defects": [ + { + "checker": "SHELLCHECK_WARNING", + "language": "shell", + "key_event_idx": 0, + "events": [ + { + "file_name": "/etc/rc.d/rc.sysinit", + "line": 492, + "column": 13, + "input_file": "0136-json-keep-recorded-locations-stdin.txt", + "input_line": 2, + "event": "warning", + "message": "Quote this to prevent word splitting. [SC2046]", + "verbosity_level": 0 + }, + { + "file_name": "", + "line": 0, + "input_file": "0136-json-keep-recorded-locations-stdin.txt", + "input_line": 3, + "event": "#", + "message": " 490| if [ -n \"$SELINUX_STATE\" -a \"$READONLY\" != \"yes\" ]; then", + "verbosity_level": 1 + }, + { + "file_name": "", + "line": 0, + "input_file": "0136-json-keep-recorded-locations-stdin.txt", + "input_line": 4, + "event": "#", + "message": " 491| if [ -f /.autorelabel ] || strstr \"$cmdline\" autorelabel ; then", + "verbosity_level": 1 + }, + { + "file_name": "", + "line": 0, + "input_file": "0136-json-keep-recorded-locations-stdin.txt", + "input_line": 5, + "event": "#", + "message": " 492|-> \trestorecon $(awk '!/^#/ && $4 !~ /noauto/ && $2 ~ /^\\// { print $2 }' /etc/fstab) >/dev/null 2>&1", + "verbosity_level": 1 + }, + { + "file_name": "", + "line": 0, + "input_file": "0136-json-keep-recorded-locations-stdin.txt", + "input_line": 6, + "event": "#", + "message": " 493| fi", + "verbosity_level": 1 + }, + { + "file_name": "", + "line": 0, + "input_file": "0136-json-keep-recorded-locations-stdin.txt", + "input_line": 7, + "event": "#", + "message": " 494| fi", + "verbosity_level": 1 + } + ] + } + ] +} diff --git a/tests/csgrep/0136-json-keep-recorded-locations-stdout.txt b/tests/csgrep/0136-json-keep-recorded-locations-stdout.txt new file mode 100644 index 0000000..90d3c38 --- /dev/null +++ b/tests/csgrep/0136-json-keep-recorded-locations-stdout.txt @@ -0,0 +1,67 @@ +{ + "defects": [ + { + "checker": "SHELLCHECK_WARNING", + "language": "shell", + "hash_v1": "b6311c1fdc52c47d4279cd6650af36e6f8299960", + "key_event_idx": 0, + "events": [ + { + "file_name": "/etc/rc.d/rc.sysinit", + "line": 492, + "column": 13, + "input_file": "0136-json-keep-recorded-locations-stdin.txt", + "input_line": 2, + "event": "warning", + "message": "Quote this to prevent word splitting. [SC2046]", + "verbosity_level": 0 + }, + { + "file_name": "", + "line": 0, + "input_file": "0136-json-keep-recorded-locations-stdin.txt", + "input_line": 3, + "event": "#", + "message": " 490| if [ -n \"$SELINUX_STATE\" -a \"$READONLY\" != \"yes\" ]; then", + "verbosity_level": 1 + }, + { + "file_name": "", + "line": 0, + "input_file": "0136-json-keep-recorded-locations-stdin.txt", + "input_line": 4, + "event": "#", + "message": " 491| if [ -f /.autorelabel ] || strstr \"$cmdline\" autorelabel ; then", + "verbosity_level": 1 + }, + { + "file_name": "", + "line": 0, + "input_file": "0136-json-keep-recorded-locations-stdin.txt", + "input_line": 5, + "event": "#", + "message": " 492|-> \trestorecon $(awk '!/^#/ && $4 !~ /noauto/ && $2 ~ /^\\// { print $2 }' /etc/fstab) >/dev/null 2>&1", + "verbosity_level": 1 + }, + { + "file_name": "", + "line": 0, + "input_file": "0136-json-keep-recorded-locations-stdin.txt", + "input_line": 6, + "event": "#", + "message": " 493| fi", + "verbosity_level": 1 + }, + { + "file_name": "", + "line": 0, + "input_file": "0136-json-keep-recorded-locations-stdin.txt", + "input_line": 7, + "event": "#", + "message": " 494| fi", + "verbosity_level": 1 + } + ] + } + ] +} diff --git a/tests/csgrep/CMakeLists.txt b/tests/csgrep/CMakeLists.txt index 34479ba..fad53c7 100644 --- a/tests/csgrep/CMakeLists.txt +++ b/tests/csgrep/CMakeLists.txt @@ -177,3 +177,6 @@ test_csgrep("0130-file-glob" ) test_csgrep("0131-unicontrol-perl-man-page" ) test_csgrep("0132-cov-parser-nested-evt" ) test_csgrep("0133-sarif-gcc-pwd" ) +test_csgrep("0134-json-record-locations" ) +test_csgrep("0135-json-record-locations-reverse" ) +test_csgrep("0136-json-keep-recorded-locations" )