From 98533319cd91d26f86336228a1524faaa238a18b Mon Sep 17 00:00:00 2001 From: "Jonathan I. Maletic" Date: Wed, 17 Jun 2026 13:50:15 -0400 Subject: [PATCH 1/4] Resolve issue 41 (and 40) concerning typedef structs --- nameCollectorHandler.hpp | 23 +++++++++++++++++------ testsuite/test_c_typedef.sh | 12 ++++++------ testsuite/test_cpp_typedef.sh | 15 ++++++++------- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/nameCollectorHandler.hpp b/nameCollectorHandler.hpp index ae2cb94..50a690b 100644 --- a/nameCollectorHandler.hpp +++ b/nameCollectorHandler.hpp @@ -266,8 +266,7 @@ class nameCollectorHandler : public srcSAXHandler { scopeStack.push_back(classScope); } - //Need to collect some type info for struct and anonymous struct - // struct foo { } x; // x has type foo + //Need to collect some type info for struct and anonymous struct // struct { } x; // x has type struct if (isStruct(std::string(localname))) { typeInfo insertType; @@ -282,7 +281,7 @@ class nameCollectorHandler : public srcSAXHandler { typeStack[typeStack.size()-1].gatherContent = false; } } - + if (isStereotypableCategory(localname)) { // Check for stereotype information from stereocode for (int i = 0; i < numAttributes; ++i) { @@ -418,14 +417,26 @@ class nameCollectorHandler : public srcSAXHandler { } //Get type from type stack of and + //Deals with anonymous struct etc. std::string type = ""; if (isTypedCategory(category) && (typeStack.size() >= 1) && !isUntypedLanguage()) { if ((category == "field") && (typeStack[typeStack.size()-1].type.find("enum") != std::string::npos)) { std::string type = ""; //Deal with enum fields without a type } else { - type = typeStack.size() >= 1 ? typeStack[typeStack.size()-1].type : ""; - replaceSubStringInPlace(type, ",", ","); - replaceSubStringInPlace(type, "\n", ""); + //Deal with typedefs with structs etc. + if (typeStack.size() >= 1 && typeStack[typeStack.size()-1].associatedTag == "typedef") { + type = typeStack[typeStack.size()-1].type; + size_t blank = type.find(' '); + if (blank != std::string::npos) { + if (type.substr(0, blank).find("struct")!= std::string::npos) type = "struct"; + if (type.substr(0, blank).find("enum")!= std::string::npos) type = "enum"; + if (type.substr(0, blank).find("class")!= std::string::npos) type = "class"; + if (type.substr(0, blank).find("union")!= std::string::npos) type = "union"; + } + } + else + type = typeStack.size() >= 1 ? typeStack[typeStack.size()-1].type : ""; + if (typeStack.size() >= 1 && type == typeStack[typeStack.size()-1].associatedTag + " ") replaceSubStringInPlace(type, " ", ""); if (typeStack.size() >= 1 && isStruct(typeStack[typeStack.size()-1].associatedTag)) { diff --git a/testsuite/test_c_typedef.sh b/testsuite/test_c_typedef.sh index 47a1947..b4a8d6e 100755 --- a/testsuite/test_c_typedef.sh +++ b/testsuite/test_c_typedef.sh @@ -45,21 +45,21 @@ EOF input=$(srcml test_typedef.c --position) output=$(echo "$input" | ./nameCollector ) -expected="Integer is typedef in C file: test_typedef.c:3:13 +expected="Integer is a int typedef in C file: test_typedef.c:3:13 x is a int field in C file: test_typedef.c:6:9 y is a int field in C file: test_typedef.c:7:9 -Point is a typedef of struct in C file: test_typedef.c:8:3 +Point is a struct typedef in C file: test_typedef.c:8:3 PointAgain is a struct in C file: test_typedef.c:11:16 a is a int field in C file: test_typedef.c:12:9 b is a int field in C file: test_typedef.c:13:9 -namedStructPoint is a typedef of struct in C file: test_typedef.c:14:3 +namedStructPoint is a struct typedef in C file: test_typedef.c:14:3 Color is a enum in C file: test_typedef.c:16:14 RED is a field in C file: test_typedef.c:17:5 GREEN is a field in C file: test_typedef.c:18:5 BLUE is a field in C file: test_typedef.c:19:5 -ColorEnum is a typedef of enum in C file: test_typedef.c:20:3 +ColorEnum is a enum typedef in C file: test_typedef.c:20:3 functionPointer is a int function in C file: test_typedef.c:22:15 -characterArrayPtr is a typedef in C file: test_typedef.c:24:15 +characterArrayPtr is a char* typedef in C file: test_typedef.c:24:15 add is a int function in C file: test_typedef.c:26:5 x is a int parameter in C file: test_typedef.c:26:13 y is a int parameter in C file: test_typedef.c:26:20 @@ -80,4 +80,4 @@ fi echo "Test test_c_typedef passed!" # Repeat tests -exit 0 \ No newline at end of file +exit 0 diff --git a/testsuite/test_cpp_typedef.sh b/testsuite/test_cpp_typedef.sh index cc42c13..bdeb25f 100755 --- a/testsuite/test_cpp_typedef.sh +++ b/testsuite/test_cpp_typedef.sh @@ -5,7 +5,8 @@ cat < test_typedef.cpp #include #include -typedef int Integer; + +typedef int Integer; typedef std::vector int_vector; typedef char* char_array[5]; //array of char ptr typedef int (*functionPtr)(int, int); // with fxn pointer @@ -27,17 +28,17 @@ EOF input=$(srcml test_typedef.cpp --position) output=$(echo "$input" | ./nameCollector ) -expected="Integer is a typedef in C++ file: test_typedef.cpp:4:13 -int_vector is a typedef in C++ file: test_typedef.cpp:5:21 -char_array is a typedef in C++ file: test_typedef.cpp:6:15 +expected="Integer is a int typedef in C++ file: test_typedef.cpp:4:13 +int_vector is a std::vector typedef in C++ file: test_typedef.cpp:5:26 +char_array is a char* typedef in C++ file: test_typedef.cpp:6:15 functionPtr is a int function in C++ file: test_typedef.cpp:7:15 Point is a struct in C++ file: test_typedef.cpp:10:16 x is a int field in C++ file: test_typedef.cpp:11:9 y is a int field in C++ file: test_typedef.cpp:11:12 -pt is a typedef of struct in C++ file: test_typedef.cpp:12:3 +pt is a struct typedef in C++ file: test_typedef.cpp:12:3 v is a int field in C++ file: test_typedef.cpp:15:9 w is a int field in C++ file: test_typedef.cpp:15:12 -anon_struct_typedef is a typedef of struct in C++ file: test_typedef.cpp:16:3 +anon_struct_typedef is a struct typedef in C++ file: test_typedef.cpp:16:3 main is a int function in C++ file: test_typedef.cpp:18:5" if [[ "$output" != "$expected" ]]; then @@ -49,4 +50,4 @@ fi echo "Test test_cpp_typedef passed!" # Repeat tests -exit 0 \ No newline at end of file +exit 0 From 98e649c2d82d7d4c9dd5e3cfab7b931cd26f2de4 Mon Sep 17 00:00:00 2001 From: "Jonathan I. Maletic" Date: Wed, 17 Jun 2026 14:10:11 -0400 Subject: [PATCH 2/4] Edits to comments copyright added John and Ali --- README.md | 15 ++++++++++++--- nameCollectorHandler.hpp | 4 ++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3160d3f..62bdde3 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,10 @@ + # nameCollector A tool for collecting all user-defined identifier names from a source code file. @@ -5,7 +12,7 @@ Works for C, C++, C#, Java, and Python Input: A srcML file of source code with --position option. srcML file can be a single unit (one source code file) or an archive (multiple source code files). -Output: A list of identifier names, their type (for declartions and functions), their syntactic category, the file name, and position (line:column) the identifier occurs (declared), the programming langauge, and for methods and classes their stereotype, from [stereocode](https://github.com/srcML/stereocode), if it is in the srcML. Output is plain text (default) or csv with column headings (as below). +Output: A list of identifier names, their type (for declartions and functions), their syntactic category, the file name, and position (line:column) the identifier occurs (declared), the programming langauge, and for methods and classes their stereotype, from [stereocode](https://github.com/srcML/stereocode), if in the srcML. Output is plain text (default) or csv with column headings (as below). Example: @@ -44,7 +51,7 @@ Example: ## Python notes: -In Python, globals, locals, and fields are collected at their first appearance. If a name is assigned to twice within a scope, only the first use of that name will be collected. +In Python, globals, locals, and fields are collected at their first appearance. If a name is assigned more than once within a scope, only the first use of the name is collected. Additionally, type information is NOT collected for any Python variables or functions. @@ -83,15 +90,17 @@ Output is plain text by default. Use -f csv or --csv for comma separated output ## Developer Notes: -The initial version of the application was developed by Decker from the srcSAX examples in June 2023. This was extended to collect the different types of names by Maletic. Maletic added the CLI11 interface and made the first public release (July 2023). Testa added testing framework and testsuite in summer 2025. Behler added support for Python alongside the 1.1.0 release of srcML (August 2025). +The initial version of the application was developed by Decker from the srcSAX examples in June 2023. This was extended to collect the different types of names by Maletic. Maletic added the CLI11 interface and made the first public release (July 2023). Testa added testing framework and testsuite in summer 2025. Behler added support for Python alongside the 1.1.0 release of srcML (August 2025). Sipanhioglu fixed a memory bug June 2026. Ramadan set up Docker image in June 2026. nameCollector is a good simple example of how to use srcSAX to build fast and scalable tools for collecting analysis information. Developers of nameCollector: +- Ali Al-Ramadan - Joshua Behler - Michael Collard - Michael Decker - Jonathan Maletic +- John Sipanhioglu - Sophia Testa diff --git a/nameCollectorHandler.hpp b/nameCollectorHandler.hpp index 50a690b..d8236e9 100644 --- a/nameCollectorHandler.hpp +++ b/nameCollectorHandler.hpp @@ -2,12 +2,12 @@ /** * @file nameCollectorHandler.cpp * - * @copyright Copyright (C) 2013-2023 srcML, LLC. (www.srcML.org) + * @copyright Copyright (C) 2013-2026 srcML, LLC. (www.srcML.org) * * This file is part of the nameCollector application. */ -/** Modified by MaleticJuly 2023. +/** * * Collects all user defined names in a given C, C++, C#, Java file * From fadd4a539383ba477d70ad45e357c69247d4bbcc Mon Sep 17 00:00:00 2001 From: "Jonathan I. Maletic" Date: Wed, 17 Jun 2026 14:54:26 -0400 Subject: [PATCH 3/4] Addressed part of issue 48 --- nameCollectorHandler.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nameCollectorHandler.hpp b/nameCollectorHandler.hpp index d8236e9..73b09de 100644 --- a/nameCollectorHandler.hpp +++ b/nameCollectorHandler.hpp @@ -182,7 +182,8 @@ class nameCollectorHandler : public srcSAXHandler { // this is adding all elements, so you might only want to push certain elements - std::string back = elementStack.back(); + std::string back = ""; + if (elementStack.size() > 0) back = elementStack.back(); if (back == "name" && std::string(localname) == "name") // Top-level Names elementStack.push_back("name_2"); @@ -398,7 +399,7 @@ class nameCollectorHandler : public srcSAXHandler { //Deal with complex function names //If it is a function name, collect the complex name ex. String::length, String::operator+= //If it is a decl collect simple name only - if (((category == "destructor") || (category == "constructor") || (category == "function") || (category == "decl")) && (elementStack.back() != "name")) { + if (((category == "destructor") || (category == "constructor") || (category == "function") || (category == "decl")) && ((elementStack.size() != 0) && (elementStack.back() != "name"))) { if (elementStack.size() != 0) elementStack.pop_back(); return; } From 7b8f2b7262fb84b1077844f14a4bb6524d95c384 Mon Sep 17 00:00:00 2001 From: "Jonathan I. Maletic" Date: Wed, 17 Jun 2026 16:35:24 -0400 Subject: [PATCH 4/4] Refactored stack.size()!=0 to stack.empty() --- nameCollectorHandler.hpp | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/nameCollectorHandler.hpp b/nameCollectorHandler.hpp index 73b09de..a942bc8 100644 --- a/nameCollectorHandler.hpp +++ b/nameCollectorHandler.hpp @@ -183,7 +183,7 @@ class nameCollectorHandler : public srcSAXHandler { std::string back = ""; - if (elementStack.size() > 0) back = elementStack.back(); + if (!elementStack.empty()) back = elementStack.back(); if (back == "name" && std::string(localname) == "name") // Top-level Names elementStack.push_back("name_2"); @@ -277,7 +277,7 @@ class nameCollectorHandler : public srcSAXHandler { } //Stop gathering contents of structs when a block is encountered - if ((std::string(localname) == "block") && (typeStack.size() != 0)) { + if ((std::string(localname) == "block") && (!typeStack.empty())) { if (isStruct(typeStack[typeStack.size()-1].associatedTag)) { typeStack[typeStack.size()-1].gatherContent = false; } @@ -319,10 +319,10 @@ class nameCollectorHandler : public srcSAXHandler { * Overide for desired behaviour. */ virtual void endUnit(const char* localname, const char* prefix, const char* URI) { - if (elementStack.size() != 0) elementStack.clear(); - if (scopeStack.size() != 0) scopeStack.clear(); - if (typeStack.size() != 0) typeStack.clear(); - if (stereotypeStack.size() != 0) stereotypeStack.clear(); + if (!elementStack.empty()) elementStack.clear(); + if (!scopeStack.empty()) scopeStack.clear(); + if (!typeStack.empty()) typeStack.clear(); + if (!stereotypeStack.empty()) stereotypeStack.clear(); } /** @@ -341,12 +341,12 @@ class nameCollectorHandler : public srcSAXHandler { bool isComplexName = false; if ((std::string(localname) == "name") && (content != "") && inIndexCount == 0) { size_t nameDepth = 0; - if (elementStack.size() != 0 && elementStack.back() == "name") { + if (!elementStack.empty() && elementStack.back() == "name") { category = elementStack.size() >= 2 ? elementStack[elementStack.size()-2] : ""; //Normal name nameDepth = 1; complexNameCount = 0; } - else if (elementStack.size() != 0) { + else if (!elementStack.empty()) { nameDepth = std::stoi(elementStack.back().substr(5)); category = elementStack.size() >= (nameDepth + 1) ? elementStack[elementStack.size()-(nameDepth+1)] : ""; isComplexName = true; @@ -399,8 +399,8 @@ class nameCollectorHandler : public srcSAXHandler { //Deal with complex function names //If it is a function name, collect the complex name ex. String::length, String::operator+= //If it is a decl collect simple name only - if (((category == "destructor") || (category == "constructor") || (category == "function") || (category == "decl")) && ((elementStack.size() != 0) && (elementStack.back() != "name"))) { - if (elementStack.size() != 0) elementStack.pop_back(); + if (((category == "destructor") || (category == "constructor") || (category == "function") || (category == "decl")) && ((!elementStack.empty()) && (elementStack.back() != "name"))) { + if (!elementStack.empty()) elementStack.pop_back(); return; } @@ -448,8 +448,9 @@ class nameCollectorHandler : public srcSAXHandler { } } - std::string stereotype = (isStereotypableCategory(category) && stereotypeStack.size() != 0 ? stereotypeStack[stereotypeStack.size() - 1] : ""); - if (stereotypeStack.size() != 0) stereotypeStack.pop_back(); + std::string stereotype = (isStereotypableCategory(category) && !stereotypeStack.empty() ? + stereotypeStack[stereotypeStack.size() - 1] : ""); + if (!stereotypeStack.empty()) stereotypeStack.pop_back(); //Remove any prefix String:: from context - for functions if (content.find("::") != std::string::npos) @@ -642,7 +643,7 @@ class nameCollectorHandler : public srcSAXHandler { if (typeStack[typeStack.size()-1].associatedTag == localname) typeStack.pop_back(); - if (elementStack.size() != 0) elementStack.pop_back(); + if (!elementStack.empty()) elementStack.pop_back(); if (std::string(localname) == "operator" && isNoDeclLanguage()) { // If at an = operator in expr_stmt, output and then clear the expressions name list @@ -684,14 +685,14 @@ class nameCollectorHandler : public srcSAXHandler { elementStack.push_back("init"); // Deal with namespace foo = x::y; } if (std::string(localname) == "namespace" && category != "" && !isNoDeclLanguage()) { - if (elementStack.size() != 0) elementStack.pop_back(); // Deal with namespace foo = x::y; + if (!elementStack.empty()) elementStack.pop_back(); // Deal with namespace foo = x::y; } // If in a no decl language, need to keep track of scope if (isNoDeclLanguage() && (std::string(localname) == "function" || std::string(localname) == "lambda" || std::string(localname) == "class")) { - if (scopeStack.size() != 0) scopeStack.pop_back(); + if (!scopeStack.empty()) scopeStack.pop_back(); } if (isNoDeclLanguage() && (std::string(localname) == "expr_stmt" ||