Skip to content

interpret: move the native call preparation logic into Miri #143324

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 27 additions & 34 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
/// The caller is responsible for calling the access hooks!
///
/// You almost certainly want to use `get_ptr_alloc`/`get_ptr_alloc_mut` instead.
fn get_alloc_raw(
pub fn get_alloc_raw(
&self,
id: AllocId,
) -> InterpResult<'tcx, &Allocation<M::Provenance, M::AllocExtra, M::Bytes>> {
Expand Down Expand Up @@ -757,7 +757,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
///
/// Also returns a ptr to `self.extra` so that the caller can use it in parallel with the
/// allocation.
fn get_alloc_raw_mut(
///
/// You almost certainly want to use `get_ptr_alloc`/`get_ptr_alloc_mut` instead.
pub fn get_alloc_raw_mut(
&mut self,
id: AllocId,
) -> InterpResult<'tcx, (&mut Allocation<M::Provenance, M::AllocExtra, M::Bytes>, &mut M)> {
Expand Down Expand Up @@ -976,47 +978,36 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
interp_ok(())
}

/// Handle the effect an FFI call might have on the state of allocations.
/// This overapproximates the modifications which external code might make to memory:
/// We set all reachable allocations as initialized, mark all reachable provenances as exposed
/// and overwrite them with `Provenance::WILDCARD`.
///
/// The allocations in `ids` are assumed to be already exposed.
pub fn prepare_for_native_call(&mut self, ids: Vec<AllocId>) -> InterpResult<'tcx> {
/// Visit all allocations reachable from the given start set, by recursively traversing the
/// provenance information of those allocations.
pub fn visit_reachable_allocs(
&mut self,
start: Vec<AllocId>,
mut visit: impl FnMut(&mut Self, AllocId, &AllocInfo) -> InterpResult<'tcx>,
) -> InterpResult<'tcx> {
let mut done = FxHashSet::default();
let mut todo = ids;
let mut todo = start;
while let Some(id) = todo.pop() {
if !done.insert(id) {
// We already saw this allocation before, don't process it again.
continue;
}
let info = self.get_alloc_info(id);

// If there is no data behind this pointer, skip this.
if !matches!(info.kind, AllocKind::LiveData) {
continue;
}

// Expose all provenances in this allocation, and add them to `todo`.
let alloc = self.get_alloc_raw(id)?;
for prov in alloc.provenance().provenances() {
M::expose_provenance(self, prov)?;
if let Some(id) = prov.get_alloc_id() {
todo.push(id);
// Recurse, if there is data here.
// Do this *before* invoking the callback, as the callback might mutate the
// allocation and e.g. replace all provenance by wildcards!
if matches!(info.kind, AllocKind::LiveData) {
let alloc = self.get_alloc_raw(id)?;
for prov in alloc.provenance().provenances() {
if let Some(id) = prov.get_alloc_id() {
todo.push(id);
}
}
}
// Also expose the provenance of the interpreter-level allocation, so it can
// be read by FFI. The `black_box` is defensive programming as LLVM likes
// to (incorrectly) optimize away ptr2int casts whose result is unused.
std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());

// Prepare for possible write from native code if mutable.
if info.mutbl.is_mut() {
self.get_alloc_raw_mut(id)?
.0
.prepare_for_native_write()
.map_err(|e| e.to_interp_error(id))?;
}

// Call the callback.
visit(self, id, &info)?;
}
interp_ok(())
}
Expand Down Expand Up @@ -1073,7 +1064,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
todo.extend(static_roots(self));
while let Some(id) = todo.pop() {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have loved to also port this recursive traversal over to visit_reachable_allocs... but visit_reachable_allocs calls get_alloc_raw which can fail, and this here is in the context of a function that cannot fail, so it didn't seem worth it unfortunately.

if reachable.insert(id) {
// This is a new allocation, add the allocation it points to `todo`.
// This is a new allocation, add the allocations it points to `todo`.
// We only need to care about `alloc_map` memory here, as entirely unchanged
// global memory cannot point to memory relevant for the leak check.
if let Some((_, alloc)) = self.memory.alloc_map.get(id) {
todo.extend(
alloc.provenance().provenances().filter_map(|prov| prov.get_alloc_id()),
Expand Down
9 changes: 1 addition & 8 deletions compiler/rustc_middle/src/mir/interpret/allocation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,7 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
/// Initialize all previously uninitialized bytes in the entire allocation, and set
/// provenance of everything to `Wildcard`. Before calling this, make sure all
/// provenance in this allocation is exposed!
pub fn prepare_for_native_write(&mut self) -> AllocResult {
pub fn prepare_for_native_access(&mut self) {
let full_range = AllocRange { start: Size::ZERO, size: Size::from_bytes(self.len()) };
// Overwrite uninitialized bytes with 0, to ensure we don't leak whatever their value happens to be.
for chunk in self.init_mask.range_as_init_chunks(full_range) {
Expand All @@ -814,13 +814,6 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>

// Set provenance of all bytes to wildcard.
self.provenance.write_wildcards(self.len());

// Also expose the provenance of the interpreter-level allocation, so it can
// be written by FFI. The `black_box` is defensive programming as LLVM likes
// to (incorrectly) optimize away ptr2int casts whose result is unused.
std::hint::black_box(self.get_bytes_unchecked_raw_mut().expose_provenance());

Ok(())
}

/// Remove all provenance in the given memory range.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ impl<Prov: Provenance> ProvenanceMap<Prov> {
}
}

/// Check if here is ptr-sized provenance at the given index.
/// Check if there is ptr-sized provenance at the given index.
/// Does not mean anything for bytewise provenance! But can be useful as an optimization.
pub fn get_ptr(&self, offset: Size) -> Option<Prov> {
self.ptrs.get(&offset).copied()
Expand Down
15 changes: 4 additions & 11 deletions src/tools/miri/src/alloc_addresses/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -466,17 +466,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
Some((alloc_id, Size::from_bytes(rel_offset)))
}

/// Prepare all exposed memory for a native call.
/// This overapproximates the modifications which external code might make to memory:
/// We set all reachable allocations as initialized, mark all reachable provenances as exposed
/// and overwrite them with `Provenance::WILDCARD`.
fn prepare_exposed_for_native_call(&mut self) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
// We need to make a deep copy of this list, but it's fine; it also serves as scratch space
// for the search within `prepare_for_native_call`.
let exposed: Vec<AllocId> =
this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();
this.prepare_for_native_call(exposed)
/// Return a list of all exposed allocations.
fn exposed_allocs(&self) -> Vec<AllocId> {
let this = self.eval_context_ref();
this.machine.alloc_addresses.borrow().exposed.iter().copied().collect()
}
}

Expand Down
41 changes: 34 additions & 7 deletions src/tools/miri/src/shims/native_lib/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
let mut libffi_args = Vec::<CArg>::with_capacity(args.len());
for arg in args.iter() {
if !matches!(arg.layout.backend_repr, BackendRepr::Scalar(_)) {
throw_unsup_format!("only scalar argument types are support for native calls")
throw_unsup_format!("only scalar argument types are supported for native calls")
}
let imm = this.read_immediate(arg)?;
libffi_args.push(imm_to_carg(&imm, this)?);
Expand All @@ -224,16 +224,42 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
this.expose_provenance(prov)?;
}
}

// Prepare all exposed memory.
this.prepare_exposed_for_native_call()?;

// Convert them to `libffi::high::Arg` type.
// Convert arguments to `libffi::high::Arg` type.
let libffi_args = libffi_args
.iter()
.map(|arg| arg.arg_downcast())
.collect::<Vec<libffi::high::Arg<'_>>>();

// Prepare all exposed memory (both previously exposed, and just newly exposed since a
// pointer was passed as argument).
this.visit_reachable_allocs(this.exposed_allocs(), |this, alloc_id, info| {
// If there is no data behind this pointer, skip this.
if !matches!(info.kind, AllocKind::LiveData) {
return interp_ok(());
}
// It's okay to get raw access, what we do does not correspond to any actual
// AM operation, it just approximates the state to account for the native call.
let alloc = this.get_alloc_raw(alloc_id)?;
// Also expose the provenance of the interpreter-level allocation, so it can
// be read by FFI. The `black_box` is defensive programming as LLVM likes
// to (incorrectly) optimize away ptr2int casts whose result is unused.
std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
// Expose all provenances in this allocation, since the native code can do $whatever.
for prov in alloc.provenance().provenances() {
this.expose_provenance(prov)?;
}

// Prepare for possible write from native code if mutable.
if info.mutbl.is_mut() {
let alloc = &mut this.get_alloc_raw_mut(alloc_id)?.0;
alloc.prepare_for_native_access();
// Also expose *mutable* provenance for the interpreter-level allocation.
std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
}

interp_ok(())
})?;

// Call the function and store output, depending on return type in the function signature.
let (ret, maybe_memevents) =
this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
Expand Down Expand Up @@ -321,7 +347,8 @@ fn imm_to_carg<'tcx>(v: &ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'
CArg::USize(v.to_scalar().to_target_usize(cx)?.try_into().unwrap()),
ty::RawPtr(..) => {
let s = v.to_scalar().to_pointer(cx)?.addr();
// This relies on the `expose_provenance` in `prepare_for_native_call`.
// This relies on the `expose_provenance` in the `visit_reachable_allocs` callback
// above.
CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize()))
}
_ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty),
Expand Down
Loading