diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b8d4506..5d7c8b3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -21,6 +21,7 @@ jobs: arch-name: linuxx86-64 - os: ubuntu-24.04-arm arch-name: linuxarm64 + extra-flags: -DARM64_BUILD=ON - os: windows-latest arch-name: windowsx86-64 extra-flags: -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl diff --git a/CMakeLists.txt b/CMakeLists.txt index 08ef020..54ae1ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,11 @@ if(WITH_ASAN) add_compile_options(-fsanitize=address -g -Wall -fsanitize=undefined) endif() +# GCC-specific flags to catch uninitialized variables and undefined behavior +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + add_compile_options(-Wuninitialized -Werror=uninitialized -fno-common) +endif() + # Include FetchContent module for downloading dependencies include(FetchContent) @@ -75,20 +80,53 @@ set(USE_LOCKING ON) set(USE_THREAD OFF) set(NOFORTRAN ON) -# ARMv8 is what our coprocessors run, but not the CI machines. Ensure we don't accidentally include ARMv9 instructions -if( - CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64" - AND CMAKE_SYSTEM_NAME STREQUAL "Linux" -) - set(TARGET ARMV8) -endif() # We need PIC for this to work as a shared library set(CMAKE_POSITION_INDEPENDENT_CODE ON) # Silence OpenBLAS/LAPACK warnings set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") + if(NOT APPLE) set(BLA_VENDOR OpenBLAS) + + if(ARM64_BUILD) + # ---------------------------------------------------------------- + # Variables for subdirectory builds must be set in the parent cache + # BEFORE FetchContent_MakeAvailable. + # + # We also force CMAKE_CROSSCOMPILING so OpenBLAS skips its getarch + # host-CPU detection (which would detect the build machine's CPU, + # potentially a Neoverse/SVE host, and silently override TARGET). + # Without this, TARGET=ARMV8 is ignored when not cross-compiling. + # ---------------------------------------------------------------- + + # Force OpenBLAS to treat this as a cross-compile so it respects TARGET + # and does not run getarch to auto-detect the build host's CPU. + set(CMAKE_CROSSCOMPILING TRUE CACHE BOOL "" FORCE) + + # TARGET=ARMV8 selects the generic AArch64 kernel (NEON/ASIMD only). + # getarch2.c confirms ARMV8 sets: HAVE_NEON, HAVE_VFP, HAVE_VFPV3, + # HAVE_VFPV4 — and critically does NOT set HAVE_SVE. + # Do NOT use: ARMV8SVE, NEOVERSEN1, NEOVERSEN2, NEOVERSEV1, CORTEXA510, + # CORTEXX2, ARMV9 — all of those enable SVE kernels. + set(TARGET "ARMV8" CACHE STRING "" FORCE) + + # Disable dynamic multi-arch dispatch. If left ON, OpenBLAS compiles + # kernels for every supported core including SVE targets and picks one + # at runtime — your RK3588 could still land on an SVE kernel. + set(DYNAMIC_ARCH OFF CACHE BOOL "" FORCE) + set(DYNAMIC_OLDER OFF CACHE BOOL "" FORCE) + + # Lock the compiler to ARMv8-A baseline. This prevents the compiler + # from emitting SVE intrinsics even if OpenBLAS assembly kernels + # happen to include SVE .S files guarded by preprocessor. + set(CMAKE_C_FLAGS_OPENBLAS "-march=armv8-a" CACHE STRING "" FORCE) + set(COMMON_OPT "-march=armv8-a" CACHE STRING "" FORCE) + + # Explicitly set ARCH so OpenBLAS does not re-detect it. + set(ARCH "aarch64" CACHE STRING "" FORCE) + endif() + FetchContent_Declare( BLAS GIT_REPOSITORY https://github.com/OpenMathLib/OpenBLAS.git diff --git a/src/mrcal-uncertainty.cpp b/src/mrcal-uncertainty.cpp index 386611f..1180651 100644 --- a/src/mrcal-uncertainty.cpp +++ b/src/mrcal-uncertainty.cpp @@ -270,12 +270,13 @@ double _observed_pixel_uncertainty_from_inputs(std::vector &x, int measurement_index_board) { // Compute variance from residuals double sum = 0.0, sum_sq = 0.0; - for (size_t i = measurement_index_board; + for (int i = measurement_index_board; i < measurement_index_board + num_measurements_board; i++) { double val = x[i]; sum += val; sum_sq += val * val; } + double mean = sum / x.size(); double variance = (sum_sq / x.size()) - (mean * mean); @@ -312,6 +313,8 @@ CalibrationUncertaintyContext create_calibration_uncertainty_context( std::vector Jt_i(N_j_nonzero); std::vector Jt_x(N_j_nonzero); + Jt_p[0] = 0; + cholmod_sparse Jt = {.nrow = static_cast(Nstate), .ncol = static_cast(Nmeasurements), .nzmax = static_cast(N_j_nonzero), @@ -475,7 +478,7 @@ std::vector compute_uncertainty( cv::Size imagerSize, cv::Size calobjectSize, double calobjectSpacing, cv::Size sampleResolution) { - mrcal_lensmodel_t lensmodel; + mrcal_lensmodel_t lensmodel{}; lensmodel.type = MRCAL_LENSMODEL_OPENCV8; // Create calibration uncertainty context once