Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
93 commits
Select commit Hold shift + click to select a range
2760da7
feat(sim): add active_box parameter and m_active_box skeleton (no beh…
sbryngelson Jun 28, 2026
f257117
fix(sim): export ab_ambient from m_active_box per documented interface
sbryngelson Jun 28, 2026
2078c55
feat(sim): detect ambient state and initialize active box from IC sup…
sbryngelson Jun 28, 2026
5891b94
feat(sim): grow active box by light-cone with single device-update ch…
sbryngelson Jun 28, 2026
19c06a2
feat(sim): restrict RK update and convert window to the active box
sbryngelson Jun 28, 2026
0beb6b1
feat(sim): restrict reconstruction and Riemann windows to the active box
sbryngelson Jun 28, 2026
c79c45b
feat(sim): add debug envelope tripwire for active-box under-growth
sbryngelson Jun 29, 2026
1896baa
fix(sim): active-box tripwire checks inner margin (outer layer is fro…
sbryngelson Jun 29, 2026
a72f6b5
test(sim): golden regression for active_box on a 3D shock case
sbryngelson Jun 29, 2026
1f37c90
test(sim): strengthen active_box golden so box stays a strict subset …
sbryngelson Jun 29, 2026
e1f7ad5
fix(sim): make active-box convert bounds device-resident (GPU present…
sbryngelson Jun 29, 2026
2bb5fdc
fix(sim): active-box final-review fixes (init ab_active, tighten gate…
sbryngelson Jun 29, 2026
bbf6b2a
feat(sim): add load_weight_wrt param and m_load_weight skeleton (no b…
sbryngelson Jun 29, 2026
e474a22
feat(sim): load_weight base + active-box contributor with field output
sbryngelson Jun 29, 2026
796f884
feat(sim): per-rank load-imbalance metric
sbryngelson Jun 29, 2026
fa1629b
feat(sim): bubble (EE/EL) load-weight contributors
sbryngelson Jun 29, 2026
e54c161
fix(sim): gate bubble load-weight contributions by the active box
sbryngelson Jun 29, 2026
7c2fb22
feat(sim): IB load-weight contributor
sbryngelson Jun 29, 2026
a12b5f5
feat(sim): phase-change Newton-iteration load-weight contributor
sbryngelson Jun 29, 2026
14b837c
fix(sim): load-weight metric on parallel_io path + EE reads q_cons (f…
sbryngelson Jun 29, 2026
0161fac
feat(sim): add sfc_partition params and m_sfc_partition skeleton (no …
sbryngelson Jun 29, 2026
5b30010
fix(sim): export m_sfc_partition state + guard partition_tile_size>=1
sbryngelson Jun 29, 2026
71b503d
feat(sim): aggregate per-cell load weight into global tile weights
sbryngelson Jun 29, 2026
0d2ac75
feat(sim): Morton space-filling-curve tile ordering
sbryngelson Jun 29, 2026
0ba87e2
feat(sim): chains-on-chains balanced contiguous SFC partition
sbryngelson Jun 29, 2026
f7617e8
feat(sim): report SFC-partition predicted imbalance + call wiring
sbryngelson Jun 29, 2026
2795e26
fix(sim): self-contained SFC report (current + predicted + gain)
sbryngelson Jun 29, 2026
6df9c1f
spike(sim): prove weighted re-decompose + re-read mechanism
sbryngelson Jun 30, 2026
5570a71
feat(sim): weighted-split function for load-balanced decomposition
sbryngelson Jun 30, 2026
8c01acb
feat(sim): global axis marginals from per-cell load weight
sbryngelson Jun 30, 2026
b992103
feat(sim): wire load_balance weighted decomposition (param, checks, o…
sbryngelson Jun 30, 2026
c00c8b2
feat(sim): minimal probe read for functional weighted decomposition
sbryngelson Jun 30, 2026
69227fe
fix(sim): load_balance min-cells floor only applies to axes split acr…
sbryngelson Jun 30, 2026
ffadfe0
fix(sim): conforming sizes_glb/loc assignment in probe; drop supersed…
sbryngelson Jun 30, 2026
c43c02a
fix(sim): scope num_procs_x/y/z to simulation (post_process build), a…
sbryngelson Jun 30, 2026
95398eb
feat(sim): rank_time_wrt param + m_rank_timing per-rank RHS-time diag…
sbryngelson Jun 30, 2026
adacca1
feat(sim): wire per-rank RHS-time diagnostic into stepper and data ou…
sbryngelson Jun 30, 2026
b637627
fix(sim): drop EE bubble load-weight term (calibration: K_bub*void ov…
sbryngelson Jun 30, 2026
f8cfe78
feat(sim): broaden rank-timing to include phase-change relaxation (t_…
sbryngelson Jun 30, 2026
cc7882d
fix(sim): rank-timing measures compute only (exclude halo exchange) +…
sbryngelson Jun 30, 2026
21c60ff
feat(sim): hybrid_weno + hybrid_weno_eps params and checks
sbryngelson Jun 30, 2026
99fd63e
feat(sim): WENO discontinuity sensor (density+pressure Jameson, stenc…
sbryngelson Jun 30, 2026
f94573c
feat(sim): hybrid WENO branch (linear-optimal in smooth cells) + sens…
sbryngelson Jun 30, 2026
0d1337e
fix(sim): allocate hybrid-WENO sensor scratch once (module-level weno…
sbryngelson Jun 30, 2026
33bd3aa
fix(sim): hybrid-WENO covers buffered domain (halo flagging) + PROHIB…
sbryngelson Jul 1, 2026
7efb010
feat(sim): hybrid_riemann + hybrid_smooth_flux params, defaults, and …
sbryngelson Jul 1, 2026
1ec23ae
feat(sim): generalize discontinuity sensor to velocity+volume-fractio…
sbryngelson Jul 1, 2026
08502db
feat(sim): hybrid_riemann cheap central/Rusanov flux in the 5-equatio…
sbryngelson Jul 1, 2026
9e484ac
feat(sim): hybrid_riemann cheap flux in the 6-equation HLLC block
sbryngelson Jul 1, 2026
5082b53
fix(sim): prohibit hybrid_riemann/hybrid_weno with non-WENO recon, we…
sbryngelson Jul 1, 2026
74b5877
feat(common): add m_box (t_box + partition arithmetic); relocate f_we…
sbryngelson Jul 1, 2026
f727133
refactor(common): s_mpi_decompose computes equal split via m_box (t_b…
sbryngelson Jul 1, 2026
ac2edef
test(sim): load_balance manual validation; document diagnostic gate
sbryngelson Jul 1, 2026
de24440
fix(common): explicit allocate for equal-split offsets (Intel noreall…
sbryngelson Jul 1, 2026
352f564
feat(sim): amr param + static-patch spec + SP1 gate (default off)
sbryngelson Jul 2, 2026
69f5c36
feat(sim): m_amr two-level static hierarchy (inert refined level-1); …
sbryngelson Jul 2, 2026
3bc4d4b
feat(sim): AMR conservative restriction + conservative-linear prolong…
sbryngelson Jul 2, 2026
66b209d
feat(sim): AMR fine-advance infrastructure (stage storage, globals sw…
sbryngelson Jul 2, 2026
ff1d75a
feat(sim): AMR no-subcycle two-level advance (per-stage coupling, end…
sbryngelson Jul 2, 2026
434eec1
feat(sim): AMR refluxing - per-stage flux registers close conservatio…
sbryngelson Jul 2, 2026
8269e79
refactor(sim): AMR region-based indexing + fixed max-size preallocati…
sbryngelson Jul 2, 2026
622b37b
feat(sim): AMR dynamic regrid - gradient tagging, padded bounding box…
sbryngelson Jul 2, 2026
e015a34
feat(sim): AMR subcycling groundwork - accumulating flux registers, s…
sbryngelson Jul 2, 2026
5f65b62
feat(sim): AMR subcycling - coarse at case dt, two dt/2 fine substeps…
sbryngelson Jul 2, 2026
8e1f8c3
fix(sim): prohibit amr + acoustic_source (dt-dependent RHS source, un…
sbryngelson Jul 2, 2026
247b2f3
feat(sim): AMR multi-rank owner model - containment, global-local ind…
sbryngelson Jul 2, 2026
725f50b
feat(sim): AMR multi-rank regrid - allreduced tag box, owner-window c…
sbryngelson Jul 2, 2026
2c1dd4e
refactor(sim): AMR copy-based coordinate swap (GPU-safe semantics, be…
sbryngelson Jul 2, 2026
2048cdb
feat(sim): AMR on GPU builds - device-resident fine fields, transfer-…
sbryngelson Jul 3, 2026
cc9719f
chore: AMR upstream hygiene - docs, python validation, message cleanu…
sbryngelson Jul 3, 2026
b203dc8
feat(sim): AMR GPU kernel-ported coupling (M2) - device-resident regi…
sbryngelson Jul 3, 2026
41530c4
feat(sim): AMR mirror-decomposed fine level (T1) - spanning patches, …
sbryngelson Jul 3, 2026
316c28b
feat(sim): AMR rank-local regrid under mirror decomposition (T2) - sp…
sbryngelson Jul 3, 2026
03b5951
feat(sim): AMR-weighted decomposition (SP7c) - fine-work-aware load_b…
sbryngelson Jul 3, 2026
a1a7e3a
Merge remote-tracking branch 'upstream/master' into up/mega
sbryngelson Jul 3, 2026
2b1e8a9
test(sim): AMR 3D validation - free-stream/blast/spanning/subcycle ga…
sbryngelson Jul 3, 2026
8dddd82
feat(sim): multi-fluid AMR (SP9a) - per-fluid conservative reflux, su…
sbryngelson Jul 3, 2026
9e936de
merge: multi-fluid AMR (SP9a) with 3D validation
sbryngelson Jul 3, 2026
a57dcb6
merge: 3D validation + multi-fluid AMR (SP9a) into the upstream PR br…
sbryngelson Jul 3, 2026
9064ecd
feat(sim): AMR restart - fine-level save/restore with regridded-box p…
sbryngelson Jul 3, 2026
b8a14ae
merge: AMR restart (SP10) into the PR branch
sbryngelson Jul 3, 2026
3003068
feat(sim): viscous AMR (SP11) - viscous flux registers, c/f-matched t…
sbryngelson Jul 3, 2026
6129592
fix(sim): AMR fine ghost coordinates for viscous stencil (SP11 np=2 e…
sbryngelson Jul 3, 2026
3c6e77d
merge: viscous AMR (SP11) + fine-ghost-coordinate fix into the PR branch
sbryngelson Jul 3, 2026
931470d
refactor(sim): AMR patch slots (amr_fine -> amr_slots, fixed pool arr…
sbryngelson Jul 3, 2026
bc32128
merge: AMR patch-slot infrastructure (SP12a T1) into PR branch
sbryngelson Jul 3, 2026
8bb32ed
feat(sim): AMR multi-patch - Berger-Rigoutsos clustering, min-separat…
sbryngelson Jul 4, 2026
11d46e3
refactor: rename AMR "patch" -> "block" (params, symbols, docs) to di…
sbryngelson Jul 4, 2026
fc65bad
merge: AMR multi-patch (SP12a) + patch->block rename into PR branch
sbryngelson Jul 4, 2026
7bf7bee
feat(sim): Euler-Euler bubbles AMR (SP13) - realizability-preserving …
sbryngelson Jul 4, 2026
00c9378
merge: Euler-Euler bubbles AMR (SP13) into PR branch
sbryngelson Jul 4, 2026
7bc7cde
feat(sim): phase-change (relax) AMR (SP15) - per-block pressure relax…
sbryngelson Jul 4, 2026
17a8e51
merge: phase-change (relax) AMR (SP15) into PR branch
sbryngelson Jul 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ TKE = "TKE"
HSA = "HSA"
infp = "infp"
Sur = "Sur"
thi = "thi" # AMR clustering local: tagged-box hi index (tlo/thi)
alo = "alo" # AMR clustering local: accepted-box lo array (alo/ahi)
thr = "thr" # AMR clustering local: min-separation merge threshold
chioces = "chioces" # typo for "choices" - tests constraint key validation
reqires = "reqires" # typo for "requires" - tests dependency key validation
choises = "choises" # appears in comment explaining validation purpose
Expand Down
118 changes: 118 additions & 0 deletions docs/documentation/case.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,15 @@ is equivalent to `"riemann_solver": 2`. Defined names appear in each parameter's
| ---: | :----: | :--- |
| `run_time_info` | Logical | Output run-time information |
| `rdma_mpi` | Logical | (GPUs) Enable RDMA for MPI communication. |
| `active_box` | Logical | Enable causal-envelope active-box restriction of the RHS compute window. |
| `case_dir` | String | Case directory path |
| `old_grid` | Logical | Use grid from previous simulation |
| `old_ic` | Logical | Use initial conditions from previous simulation |
| `t_step_old` | Integer | Time step to restart from |
| `n_start_old` | Integer | Starting index from previous simulation |

- `run_time_info` generates a text file that includes run-time information including the CFL number(s) at each time-step.
- `active_box` enables the causal-envelope active-box optimization, restricting the RHS compute window to the region where the solution deviates from a uniform ambient state. Requires WENO reconstruction (`recon_type = 1`) and SSP-RK3 time stepping (`time_stepper = 3`). Incompatible with immersed boundaries, acoustic sources, body forces, Lagrangian bubbles, phase change, and the IGR solver.
- `rdma_mpi` optimizes data transfers between GPUs using Remote Direct Memory Access (RDMA).
The underlying MPI implementation and communication infrastructure must support this
feature, detecting GPU pointers and performing RDMA accordingly.
Expand Down Expand Up @@ -670,6 +672,24 @@ To restart the simulation from $k$-th time step, see @ref running "Restarting Ca
| `file_per_process` | Logical | Whether or not to write one IO file per process |
| `cons_vars_wrt` | Logical | Write conservative variables |
| `prim_vars_wrt` | Logical | Write primitive variables |
| `load_weight_wrt` | Logical | Write per-cell load-weight diagnostic field |
| `sfc_partition_wrt` | Logical | Report SFC-weighted load-balance partition |
| `rank_time_wrt` | Logical | Report per-rank RHS compute-time imbalance (max/mean) |
| `load_balance` | Logical | (Experimental/diagnostic) Weighted static Cartesian decomposition at init (requires `parallel_io = T`, >1 rank). Measured gain is small on CPU (~5%) and can be slower on GPU due to the occupancy floor; equal decomposition is near-optimal for uniform-cost workloads. |
| `amr` | Logical | (Experimental) Enable block-structured AMR: a 2:1 refined level-1 block with gradient-based dynamic regrid, optional dt/2 subcycling, and conservative coupling with refluxing. Requires WENO reconstruction, SSP-RK3, model_eqns=2; num_fluids > 1 requires mpp_lim; supports physical viscosity. |
| `amr_block_beg(i)` | Integer | Refined-block start cell index in direction $i$ (level-0 index space) |
| `amr_block_end(i)` | Integer | Refined-block end cell index in direction $i$ (level-0 index space) |
| `amr_regrid_int` | Integer | Steps between AMR regrid events (0 = static block) |
| `amr_tag_eps` | Real | Relative density-gradient threshold for AMR refinement tagging (default 0.1) |
| `amr_buf` | Integer | Coarse-cell padding around tagged cells when regridding (default 3) |
| `amr_subcycle` | Logical | Advance the coarse level at the case dt and the fine level at dt/2 (two substeps; Berger-Colella refluxing). Requires `amr`; incompatible with `cfl_dt`. |
| `amr_max_blocks` | Integer | Number of fixed refined-block slots preallocated (each max-block sized; ~N x device memory); must be >= 1 (default 4) |
| `amr_cluster_eff` | Real | Berger-Rigoutsos min tag efficiency a clustered block box reaches before splitting stops; must satisfy 0 < eff <= 1 (default 0.7) |
| `hybrid_weno` | Logical | Use linear-optimal reconstruction in smooth cells, full WENO only at flagged discontinuities (requires WENO reconstruction) |
| `hybrid_weno_eps` | Real | Smoothness threshold for hybrid WENO shock flagging; must be > 0 (default 1e-2) |
| `hybrid_riemann` | Logical | Use a cheap central/Rusanov flux in smooth cells, full HLLC only at flagged discontinuities (requires HLLC, 5eq/6eq) |
| `hybrid_smooth_flux` | Integer | Smooth-region flux for hybrid Riemann: 1 = central, 2 = Rusanov (default 2) |
| `partition_tile_size` | Integer | Tile side for the SFC partitioner (default 8) |
| `alpha_rho_wrt(i)` | Logical | Add the partial density of the fluid $i$ to the database \|
| `rho_wrt` | Logical | Add the mixture density to the database |
| `mom_wrt(i)` | Logical | Add the $i$-direction momentum to the database |
Expand Down Expand Up @@ -754,6 +774,104 @@ This is useful for large domains where only a portion of the domain is of intere
It is not supported when `precision = 1` and `format = 1`.
It also cannot be enabled with `flux_wrt`, `heat_ratio_wrt`, `pres_inf_wrt`, `c_wrt`, `omega_wrt`, `ib`, `schlieren_wrt`, `qm_wrt`, or 'liutex_wrt'.

### 7.1. Adaptive Mesh Refinement (AMR) {#sec-amr}

MFC supports block-structured AMR (Experimental) via a single 2:1 refined level-1 block
that coexists with the base-level solve.
The fine block is initialized from the base grid by piecewise-linear interpolation and
remains continuously coupled to the base solve through conservative ghost-cell exchange
and flux refluxing at the coarse–fine interface.

**Restrictions.**
AMR requires WENO reconstruction (`recon_type = 1`, any order), SSP-RK3 time-stepping
(`time_stepper = 3`), and the 5-equation model (`model_eqns = 2`).
Multiple fluids (`num_fluids > 1`) are supported and additionally require `mpp_lim`,
whose volume-fraction clamp+renormalize maintains coarse/fine alpha consistency; the
per-fluid masses are refluxed exactly, and volume fractions are prolonged with a
sum-preserving closure (fine-level volume fractions sum to one by construction).
Physical viscosity (`viscous = T`) is supported: the viscous stress/work travels through
the momentum- and energy-equation source fluxes, which are captured into the same
coarse–fine flux registers as the advective fluxes, so the interface is refluxed against
the matched *total* (advective + viscous) flux and energy — including viscous work — is
conserved. Fine-ghost velocity gradients at the coarse–fine boundary are taken from the
conservative-linear prolongation of the coarse state (no special gradient reconstruction);
that interface inconsistency is bounded and conservation is enforced by the flux-register
matching. The density-gradient regrid tagger does not sense shear or boundary layers well,
so viscous features may need a static or generously buffered block (error-estimator taggers
are future work).
Monodisperse (`nb = 1`) polytropic Euler-Euler bubbles (`bubbles_euler = T` with
`polytropic = T`) are supported: the bubble moments are flux-based conserved variables
refluxed through the same registers, and prolongation floors the radius moment so the
reconstructed radius and number density stay positive (realizability). QBMM, non-polytropic,
and polydisperse bubbles are not yet supported (their internal pressure / vapor-mass
sub-fields and quadrature weights are not advanced on the fine level).
Phase change (`relax`) is supported: the cell-local, mass/energy-conserving relaxation
runs on the fine solution before restriction (matching the coarse once-per-step timing).
It is incompatible with surface tension, Lagrangian bubbles, QBMM, non-polytropic bubbles,
polydisperse bubbles, immersed boundaries, IGR, cylindrical
coordinates, MHD, chemistry, `hybrid_weno`, `hybrid_riemann`, and `acoustic_source`.
Multi-rank runs are supported: the fine level mirrors the base decomposition (each rank
holds the fine cells covering the block's intersection with its own subdomain), so the
block may span rank boundaries and move freely across them under dynamic regrid.
The block may cover at most about half of any rank's subdomain per dimension (the fine
advance reuses the rank-local solver scratch).

**Static vs. dynamic block.**
Setting `amr_regrid_int = 0` fixes the block at the initial `amr_block_beg`/`amr_block_end`
position for the entire run (useful for convergence studies or GPU correctness testing).
Setting `amr_regrid_int > 0` triggers dynamic regrid every that many coarse steps:
cells whose normalized density gradient exceeds `amr_tag_eps` are tagged, then clustered
by a Berger–Rigoutsos recursive bisection into a list of separated block boxes (each grown
by `amr_buf` coarse cells of buffer padding). Boxes whose padded extents would come within a
ghost-cell buffer width of each other are merged, so separated features get their own refined
box while nearby ones stay a single box (guaranteeing no fine–fine adjacency). Splitting stops once a
box's tag efficiency (tagged/total cells) reaches `amr_cluster_eff`; the number of blocks
is capped at `amr_max_blocks`.
A positive `amr_tag_eps` and `amr_buf >= 1` are required whenever regridding is active.

**Subcycling.**
`amr_subcycle = T` enables Berger–Colella dt/2 subcycling: the coarse level advances
one full step at the case `dt`, while the fine level takes two half-steps at `dt/2` with
time-interpolated ghost values at the intermediate stage.
Accumulated fine-level fluxes are applied back to the coarse level (reflux correction)
after each coarse step.
`amr_subcycle` is incompatible with `cfl_dt` (variable time step) and requires `amr = T`.

**Block slots.**
`amr_max_blocks` (default 4) sets the number of fixed refined-block slots preallocated
for the run. Each slot is sized to the maximum block extent, so `N` slots require roughly
`N` times the device memory of a single block; the goal is the compute win of refining
separated features independently, and memory efficiency (compact per-block pools) is a
follow-up. Dynamic regrid clusters the tagged cells into up to `amr_max_blocks` separated
boxes (`amr_cluster_eff` sets the min tag efficiency each box reaches before splitting stops).

**Restart.**
Each save step writes a fine-level AMR restart file alongside the level-0 restart data
(whose format is unchanged): the current — possibly regridded — block box and the fine
solution, per rank (an `amr_fine.dat` in each rank's step directory, or a single shared
`amr_*.dat` next to the level-0 MPI-IO restart file when `parallel_io` is on).
Restarting (`t_step_start > 0`) restores the saved box and fine state seamlessly; it
requires the same rank count (and decomposition) as the run that wrote the file, and
aborts with a clear message otherwise.
If the AMR file is absent (e.g., data from an older run), the run proceeds with a
warning and re-initializes the fine level by prolongation from the coarse restart data,
losing the accumulated fine-level accuracy.
Note that level-0 output already contains the restricted (coarse-resolution) fine
solution over the block, so existing visualization works unchanged; fine-resolution
visualization output is future work.

| Parameter | Type | Description |
| ---: | :----: | :--- |
| `amr` | Logical | Enable AMR (see prose above for requirements and restrictions) |
| `amr_block_beg(i)` | Integer | Initial refined-block start cell index in direction $i$ (level-0 index space) |
| `amr_block_end(i)` | Integer | Initial refined-block end cell index in direction \f$i\f$ (level-0 index space); must satisfy \f$2\,(e_i - b_i + 1) - 1 \le N_i\f$ |
| `amr_regrid_int` | Integer | Coarse steps between regrid events (0 = static block) |
| `amr_tag_eps` | Real | Normalized density-gradient threshold for refinement tagging; must be > 0 when `amr_regrid_int > 0` (default 0.1) |
| `amr_buf` | Integer | Coarse-cell padding around tagged cells; must be >= 1 when `amr_regrid_int > 0` (default 3) |
| `amr_subcycle` | Logical | Advance fine level at dt/2 (two substeps per coarse step) with Berger–Colella refluxing |
| `amr_max_blocks` | Integer | Number of fixed refined-block slots preallocated (each max-block sized; ~N x device memory); must be >= 1 (default 4) |
| `amr_cluster_eff` | Real | Berger-Rigoutsos min tag efficiency a clustered block box reaches before splitting stops; must satisfy 0 < eff <= 1 (default 0.7) |

### 8. Acoustic Source {#sec-acoustic-source}

| Parameter | Type | Description |
Expand Down
10 changes: 9 additions & 1 deletion docs/module_categories.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"category": "Solver Core",
"modules": [
"m_rhs",
"m_active_box",
"m_time_steppers",
"m_weno",
"m_riemann_solvers",
Expand Down Expand Up @@ -58,7 +59,11 @@
"m_start_up",
"m_data_output",
"m_data_input",
"m_delay_file_access"
"m_delay_file_access",
"m_load_weight",
"m_load_balance",
"m_sfc_partition",
"m_rank_timing"
]
},
{
Expand All @@ -69,6 +74,9 @@
"m_global_parameters_common",
"m_mpi_common",
"m_mpi_proxy",
"m_box",
"m_amr",
"m_amr_registers",
"m_constants",
"m_precision_select",
"m_helper",
Expand Down
4 changes: 4 additions & 0 deletions src/common/m_boundary_common.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ contains
type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
type(scalar_field), optional, intent(inout) :: q_T_sf

#ifdef MFC_SIMULATION
if (amr_in_fine_advance) return ! AMR fine block: ghosts pre-filled from the coarse level
#endif

call s_populate_bc_direction(1, -1, bc_x, bc_type(1, 1), q_prim_vf, pb_in, mv_in, q_T_sf)
call s_populate_bc_direction(1, 1, bc_x, bc_type(1, 2), q_prim_vf, pb_in, mv_in, q_T_sf)

Expand Down
85 changes: 85 additions & 0 deletions src/common/m_box.fpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
!>
!!@file
!!@brief Contains module m_box

#:include 'macros.fpp'

!> @brief Owned domain-decomposition Box abstraction and partition arithmetic (v1: one box per rank).
module m_box

use m_derived_types, only: t_box
use m_global_parameters, only: wp

implicit none

private
public :: t_box, f_equal_splits, f_weighted_splits, f_box_from_splits

contains

!> Cumulative equal-cell offsets for g cells over n_parts ranks: off(r) = r*(g/n_parts) + min(r, mod(g,n_parts)). Reproduces
!! MFC's block distribution (remainder to the first ranks) exactly. Pure integer path.
pure function f_equal_splits(g, n_parts) result(off)

integer, intent(in) :: g, n_parts
integer, dimension(0:n_parts) :: off
integer :: q, rem, r

q = g/n_parts
rem = mod(g, n_parts)
do r = 0, n_parts
off(r) = r*q + min(r, rem)
end do

end function f_equal_splits

!> Cumulative offsets splitting marginal w into n_parts contiguous chunks of near-equal weight, each >= l_min cells. off(0)=0,
!! off(n_parts)=size(w). Feasibility (size(w) >= n_parts*l_min) is the caller's responsibility (pure; no abort).
pure function f_weighted_splits(w, n_parts, l_min) result(off)

real(wp), dimension(0:), intent(in) :: w
integer, intent(in) :: n_parts, l_min
integer, dimension(0:n_parts) :: off
real(wp) :: csum, total
integer :: g, i, r

g = size(w)
off(0) = 0
off(n_parts) = g
if (n_parts == 1) return
total = sum(w)
r = 1
csum = 0._wp
do i = 0, g - 1
csum = csum + w(i)
do while (r < n_parts .and. csum >= real(r, wp)*total/real(n_parts, wp))
off(r) = i + 1
r = r + 1
end do
end do
do while (r < n_parts)
off(r) = g; r = r + 1
end do
do r = 1, n_parts - 1
if (off(r) < r*l_min) off(r) = r*l_min
if (off(r) > g - (n_parts - r)*l_min) off(r) = g - (n_parts - r)*l_min
if (off(r) <= off(r - 1)) off(r) = off(r - 1) + l_min
end do

end function f_weighted_splits

!> Assemble this rank's box from per-axis cumulative offsets and the rank's Cartesian coords (0-based). lo(d) =
!! off_d(coords(d)); hi(d) = off_d(coords(d)+1) - 1. Works for collapsed axes (off_d = [0,1] -> lo=hi=0).
pure function f_box_from_splits(off_x, off_y, off_z, coords) result(box)

integer, dimension(0:), intent(in) :: off_x, off_y, off_z
integer, intent(in) :: coords(3)
type(t_box) :: box

box%lo(1) = off_x(coords(1)); box%hi(1) = off_x(coords(1) + 1) - 1
box%lo(2) = off_y(coords(2)); box%hi(2) = off_y(coords(2) + 1) - 1
box%lo(3) = off_z(coords(3)); box%hi(3) = off_z(coords(3) + 1) - 1

end function f_box_from_splits

end module m_box
7 changes: 7 additions & 0 deletions src/common/m_derived_types.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,4 +537,11 @@ module m_derived_types
real(wp), dimension(1:num_fluids_max) :: perturb_dens_scale
real(wp), dimension(1:num_fluids_max,3) :: perturb_dens_offset
end type simplex_noise_params

!> An index-space rectangle in global cell indices. In v1, one t_box = one rank's subdomain. Flat leaf: no allocatable/pointer
!! components, host-only, never namelist/broadcast.
type t_box
integer :: lo(3) !< global low cell index per axis (x,y,z)
integer :: hi(3) !< global high cell index per axis
end type t_box
end module m_derived_types
12 changes: 7 additions & 5 deletions src/common/m_global_parameters_common.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ module m_global_parameters_common
$:GPU_DECLARE(create='[hyperelasticity, elasticity, low_Mach]')
$:GPU_DECLARE(create='[cont_damage, hyper_cleaning]')
$:GPU_DECLARE(create='[relax, relax_model, palpha_eps, ptgalpha_eps]')
$:GPU_DECLARE(create='[load_weight_wrt]')
$:GPU_DECLARE(create='[down_sample]')
$:GPU_DECLARE(create='[fd_order]')
$:GPU_DECLARE(create='[rhoref, pref]')
Expand Down Expand Up @@ -348,6 +349,11 @@ contains

allocate (proc_coords(1:num_dims))

#ifdef MFC_MPI
! start_idx is always needed (e.g. for sfc_partition_wrt); parallel I/O setup below is optional.
allocate (start_idx(1:num_dims))
#endif

if (parallel_io .neqv. .true.) return

#ifdef MFC_MPI
Expand All @@ -359,8 +365,6 @@ contains

! Option for UNIX file system (Hooke/Thomson) WRITE(mpiiofs, '(A)') '/ufs_' mpiiofs = TRIM(mpiiofs) mpi_info_int =
! MPI_INFO_NULL

allocate (start_idx(1:num_dims))
#endif

end subroutine s_initialize_parallel_io_common
Expand All @@ -373,9 +377,7 @@ contains
deallocate (proc_coords)

#ifdef MFC_MPI
if (parallel_io) then
deallocate (start_idx)
end if
deallocate (start_idx)
#endif

end subroutine s_finalize_global_parameters_common
Expand Down
Loading