From 854f619d6a023b15a8b5461a7923134f347b3bb6 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Mon, 15 Dec 2025 14:36:18 +0100 Subject: [PATCH 1/4] Correctly encode doc attribute metadata --- .../rustc_hir/src/attrs/data_structures.rs | 58 ++++++++++++++++++- compiler/rustc_metadata/src/rmeta/encoder.rs | 10 +--- 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_hir/src/attrs/data_structures.rs b/compiler/rustc_hir/src/attrs/data_structures.rs index 34d78afca9b2c..5ebe121dd67ca 100644 --- a/compiler/rustc_hir/src/attrs/data_structures.rs +++ b/compiler/rustc_hir/src/attrs/data_structures.rs @@ -530,7 +530,7 @@ pub struct CfgHideShow { pub values: ThinVec, } -#[derive(Clone, Debug, Default, HashStable_Generic, Encodable, Decodable, PrintAttribute)] +#[derive(Clone, Debug, Default, HashStable_Generic, Decodable, PrintAttribute)] pub struct DocAttribute { pub aliases: FxIndexMap, pub hidden: Option, @@ -566,6 +566,62 @@ pub struct DocAttribute { pub no_crate_inject: Option, } +impl rustc_serialize::Encodable for DocAttribute { + fn encode(&self, encoder: &mut E) { + let DocAttribute { + aliases, + hidden, + inline, + cfg, + auto_cfg, + auto_cfg_change, + fake_variadic, + keyword, + attribute, + masked, + notable_trait, + search_unbox, + html_favicon_url, + html_logo_url, + html_playground_url, + html_root_url, + html_no_source, + issue_tracker_base_url, + rust_logo, + test_attrs, + no_crate_inject, + } = self; + rustc_serialize::Encodable::::encode(aliases, encoder); + rustc_serialize::Encodable::::encode(hidden, encoder); + + // FIXME: The `doc(inline)` attribute is never encoded, but is it actually the right thing + // to do? I suspect the condition was broken, should maybe instead not encode anything if we + // have `doc(no_inline)`. + let inline: ThinVec<_> = + inline.iter().filter(|(i, _)| *i != DocInline::Inline).cloned().collect(); + rustc_serialize::Encodable::::encode(&inline, encoder); + + rustc_serialize::Encodable::::encode(cfg, encoder); + rustc_serialize::Encodable::::encode(auto_cfg, encoder); + rustc_serialize::Encodable::::encode(auto_cfg_change, encoder); + rustc_serialize::Encodable::::encode(fake_variadic, encoder); + rustc_serialize::Encodable::::encode(keyword, encoder); + rustc_serialize::Encodable::::encode(attribute, encoder); + rustc_serialize::Encodable::::encode(masked, encoder); + rustc_serialize::Encodable::::encode(notable_trait, encoder); + rustc_serialize::Encodable::::encode(search_unbox, encoder); + rustc_serialize::Encodable::::encode(html_favicon_url, encoder); + rustc_serialize::Encodable::::encode(html_logo_url, encoder); + rustc_serialize::Encodable::::encode(html_playground_url, encoder); + rustc_serialize::Encodable::::encode(html_root_url, encoder); + rustc_serialize::Encodable::::encode(html_no_source, encoder); + rustc_serialize::Encodable::::encode(issue_tracker_base_url, encoder); + rustc_serialize::Encodable::::encode(rust_logo, encoder); + rustc_serialize::Encodable::::encode(test_attrs, encoder); + rustc_serialize::Encodable::::encode(no_crate_inject, encoder); + } +} + /// Represents parsed *built-in* inert attributes. /// /// ## Overview diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index b15ed34fc3569..6abde26e7e6d1 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -879,13 +879,9 @@ fn analyze_attr(attr: &hir::Attribute, state: &mut AnalyzeAttrState<'_>) -> bool should_encode = true; } } else if let hir::Attribute::Parsed(AttributeKind::Doc(d)) = attr { - // If this is a `doc` attribute that doesn't have anything except maybe `inline` (as in - // `#[doc(inline)]`), then we can remove it. It won't be inlinable in downstream crates. - if d.inline.is_empty() { - should_encode = true; - if d.hidden.is_some() { - state.is_doc_hidden = true; - } + should_encode = true; + if d.hidden.is_some() { + state.is_doc_hidden = true; } } else if let &[sym::diagnostic, seg] = &*attr.path() { should_encode = rustc_feature::is_stable_diagnostic_attribute(seg, state.features); From 7f6f52276159ed9d175f98c950adfa2306d46b44 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Mon, 15 Dec 2025 18:26:44 +1100 Subject: [PATCH 2/4] Rename `TestCase` to `TestableCase` --- .../src/builder/matches/buckets.rs | 35 +++++++++---------- .../src/builder/matches/match_pair.rs | 26 ++++++++------ .../src/builder/matches/mod.rs | 25 +++++++------ .../src/builder/matches/test.rs | 26 ++++++++------ .../src/builder/matches/util.rs | 6 ++-- 5 files changed, 64 insertions(+), 54 deletions(-) diff --git a/compiler/rustc_mir_build/src/builder/matches/buckets.rs b/compiler/rustc_mir_build/src/builder/matches/buckets.rs index f53e73d406c3c..6c5f0ed28a82d 100644 --- a/compiler/rustc_mir_build/src/builder/matches/buckets.rs +++ b/compiler/rustc_mir_build/src/builder/matches/buckets.rs @@ -7,7 +7,7 @@ use tracing::debug; use crate::builder::Builder; use crate::builder::matches::test::is_switch_ty; -use crate::builder::matches::{Candidate, Test, TestBranch, TestCase, TestKind}; +use crate::builder::matches::{Candidate, Test, TestBranch, TestKind, TestableCase}; /// Output of [`Builder::partition_candidates_into_buckets`]. pub(crate) struct PartitionedCandidates<'tcx, 'b, 'c> { @@ -140,12 +140,12 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { // branch, so it can be removed. If false, the match pair is _compatible_ // with its test branch, but still needs a more specific test. let fully_matched; - let ret = match (&test.kind, &match_pair.test_case) { + let ret = match (&test.kind, &match_pair.testable_case) { // If we are performing a variant switch, then this // informs variant patterns, but nothing else. ( &TestKind::Switch { adt_def: tested_adt_def }, - &TestCase::Variant { adt_def, variant_index }, + &TestableCase::Variant { adt_def, variant_index }, ) => { assert_eq!(adt_def, tested_adt_def); fully_matched = true; @@ -159,7 +159,7 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { // things out here, in some cases. // // FIXME(Zalathar): Is the `is_switch_ty` test unnecessary? - (TestKind::SwitchInt, &TestCase::Constant { value }) + (TestKind::SwitchInt, &TestableCase::Constant { value }) if is_switch_ty(match_pair.pattern_ty) => { // An important invariant of candidate bucketing is that a candidate @@ -167,16 +167,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { // a new value might invalidate that property for range patterns that // have already been partitioned into the failure arm, so we must take care // not to add such values here. - let is_covering_range = |test_case: &TestCase<'tcx>| { - test_case.as_range().is_some_and(|range| { + let is_covering_range = |testable_case: &TestableCase<'tcx>| { + testable_case.as_range().is_some_and(|range| { matches!(range.contains(value, self.tcx), None | Some(true)) }) }; let is_conflicting_candidate = |candidate: &&mut Candidate<'tcx>| { - candidate - .match_pairs - .iter() - .any(|mp| mp.place == Some(test_place) && is_covering_range(&mp.test_case)) + candidate.match_pairs.iter().any(|mp| { + mp.place == Some(test_place) && is_covering_range(&mp.testable_case) + }) }; if prior_candidates .get(&TestBranch::Failure) @@ -189,7 +188,7 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { Some(TestBranch::Constant(value)) } } - (TestKind::SwitchInt, TestCase::Range(range)) => { + (TestKind::SwitchInt, TestableCase::Range(range)) => { // When performing a `SwitchInt` test, a range pattern can be // sorted into the failure arm if it doesn't contain _any_ of // the values being tested. (This restricts what values can be @@ -207,7 +206,7 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { }) } - (TestKind::If, TestCase::Constant { value }) => { + (TestKind::If, TestableCase::Constant { value }) => { fully_matched = true; let value = value.try_to_bool().unwrap_or_else(|| { span_bug!(test.span, "expected boolean value but got {value:?}") @@ -217,7 +216,7 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { ( &TestKind::Len { len: test_len, op: BinOp::Eq }, - &TestCase::Slice { len, variable_length }, + &TestableCase::Slice { len, variable_length }, ) => { match (test_len.cmp(&(len as u64)), variable_length) { (Ordering::Equal, false) => { @@ -249,7 +248,7 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { } ( &TestKind::Len { len: test_len, op: BinOp::Ge }, - &TestCase::Slice { len, variable_length }, + &TestableCase::Slice { len, variable_length }, ) => { // the test is `$actual_len >= test_len` match (test_len.cmp(&(len as u64)), variable_length) { @@ -281,7 +280,7 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { } } - (TestKind::Range(test), TestCase::Range(pat)) => { + (TestKind::Range(test), TestableCase::Range(pat)) => { if test == pat { fully_matched = true; Some(TestBranch::Success) @@ -292,7 +291,7 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { if !test.overlaps(pat, self.tcx)? { Some(TestBranch::Failure) } else { None } } } - (TestKind::Range(range), &TestCase::Constant { value }) => { + (TestKind::Range(range), &TestableCase::Constant { value }) => { fully_matched = false; if !range.contains(value, self.tcx)? { // `value` is not contained in the testing range, @@ -303,7 +302,7 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { } } - (TestKind::Eq { value: test_val, .. }, TestCase::Constant { value: case_val }) => { + (TestKind::Eq { value: test_val, .. }, TestableCase::Constant { value: case_val }) => { if test_val == case_val { fully_matched = true; Some(TestBranch::Success) @@ -313,7 +312,7 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { } } - (TestKind::Deref { temp: test_temp, .. }, TestCase::Deref { temp, .. }) + (TestKind::Deref { temp: test_temp, .. }, TestableCase::Deref { temp, .. }) if test_temp == temp => { fully_matched = true; diff --git a/compiler/rustc_mir_build/src/builder/matches/match_pair.rs b/compiler/rustc_mir_build/src/builder/matches/match_pair.rs index a0d54354a9c63..dc1545c9278a9 100644 --- a/compiler/rustc_mir_build/src/builder/matches/match_pair.rs +++ b/compiler/rustc_mir_build/src/builder/matches/match_pair.rs @@ -8,7 +8,7 @@ use rustc_middle::ty::{self, Pinnedness, Ty, TypeVisitableExt}; use crate::builder::Builder; use crate::builder::expr::as_place::{PlaceBase, PlaceBuilder}; -use crate::builder::matches::{FlatPat, MatchPairTree, PatternExtraData, TestCase}; +use crate::builder::matches::{FlatPat, MatchPairTree, PatternExtraData, TestableCase}; impl<'a, 'tcx> Builder<'a, 'tcx> { /// Builds and pushes [`MatchPairTree`] subtrees, one for each pattern in @@ -132,7 +132,7 @@ impl<'tcx> MatchPairTree<'tcx> { let place = place_builder.try_to_place(cx); let mut subpairs = Vec::new(); - let test_case = match pattern.kind { + let testable_case = match pattern.kind { PatKind::Missing | PatKind::Wild | PatKind::Error(_) => None, PatKind::Or { ref pats } => { @@ -146,18 +146,18 @@ impl<'tcx> MatchPairTree<'tcx> { // FIXME(@dianne): this needs updating/removing if we always merge or-patterns extra_data.bindings.push(super::SubpatternBindings::FromOrPattern); } - Some(TestCase::Or { pats }) + Some(TestableCase::Or { pats }) } PatKind::Range(ref range) => { if range.is_full_range(cx.tcx) == Some(true) { None } else { - Some(TestCase::Range(Arc::clone(range))) + Some(TestableCase::Range(Arc::clone(range))) } } - PatKind::Constant { value } => Some(TestCase::Constant { value }), + PatKind::Constant { value } => Some(TestableCase::Constant { value }), PatKind::AscribeUserType { ascription: Ascription { ref annotation, variance }, @@ -256,7 +256,7 @@ impl<'tcx> MatchPairTree<'tcx> { if prefix.is_empty() && slice.is_some() && suffix.is_empty() { None } else { - Some(TestCase::Slice { + Some(TestableCase::Slice { len: prefix.len() + suffix.len(), variable_length: slice.is_some(), }) @@ -275,7 +275,11 @@ impl<'tcx> MatchPairTree<'tcx> { cx.def_id.into(), ) }) && !adt_def.variant_list_has_applicable_non_exhaustive(); - if irrefutable { None } else { Some(TestCase::Variant { adt_def, variant_index }) } + if irrefutable { + None + } else { + Some(TestableCase::Variant { adt_def, variant_index }) + } } PatKind::Leaf { ref subpatterns } => { @@ -331,17 +335,17 @@ impl<'tcx> MatchPairTree<'tcx> { &mut subpairs, extra_data, ); - Some(TestCase::Deref { temp, mutability }) + Some(TestableCase::Deref { temp, mutability }) } - PatKind::Never => Some(TestCase::Never), + PatKind::Never => Some(TestableCase::Never), }; - if let Some(test_case) = test_case { + if let Some(testable_case) = testable_case { // This pattern is refutable, so push a new match-pair node. match_pairs.push(MatchPairTree { place, - test_case, + testable_case, subpairs, pattern_ty: pattern.ty, pattern_span: pattern.span, diff --git a/compiler/rustc_mir_build/src/builder/matches/mod.rs b/compiler/rustc_mir_build/src/builder/matches/mod.rs index 5074ce3367986..ecdc3842a9b86 100644 --- a/compiler/rustc_mir_build/src/builder/matches/mod.rs +++ b/compiler/rustc_mir_build/src/builder/matches/mod.rs @@ -1078,7 +1078,7 @@ struct Candidate<'tcx> { /// (see [`Builder::test_remaining_match_pairs_after_or`]). /// /// Invariants: - /// - All or-patterns ([`TestCase::Or`]) have been sorted to the end. + /// - All or-patterns ([`TestableCase::Or`]) have been sorted to the end. match_pairs: Vec>, /// ...and if this is non-empty, one of these subcandidates also has to match... @@ -1164,12 +1164,15 @@ impl<'tcx> Candidate<'tcx> { /// Restores the invariant that or-patterns must be sorted to the end. fn sort_match_pairs(&mut self) { - self.match_pairs.sort_by_key(|pair| matches!(pair.test_case, TestCase::Or { .. })); + self.match_pairs.sort_by_key(|pair| matches!(pair.testable_case, TestableCase::Or { .. })); } /// Returns whether the first match pair of this candidate is an or-pattern. fn starts_with_or_pattern(&self) -> bool { - matches!(&*self.match_pairs, [MatchPairTree { test_case: TestCase::Or { .. }, .. }, ..]) + matches!( + &*self.match_pairs, + [MatchPairTree { testable_case: TestableCase::Or { .. }, .. }, ..] + ) } /// Visit the leaf candidates (those with no subcandidates) contained in @@ -1261,7 +1264,7 @@ struct Ascription<'tcx> { /// Instead they participate in or-pattern expansion, where they are transformed into /// subcandidates. See [`Builder::expand_and_match_or_candidates`]. #[derive(Debug, Clone)] -enum TestCase<'tcx> { +enum TestableCase<'tcx> { Variant { adt_def: ty::AdtDef<'tcx>, variant_index: VariantIdx }, Constant { value: ty::Value<'tcx> }, Range(Arc>), @@ -1271,7 +1274,7 @@ enum TestCase<'tcx> { Or { pats: Box<[FlatPat<'tcx>]> }, } -impl<'tcx> TestCase<'tcx> { +impl<'tcx> TestableCase<'tcx> { fn as_range(&self) -> Option<&PatRange<'tcx>> { if let Self::Range(v) = self { Some(v.as_ref()) } else { None } } @@ -1289,12 +1292,12 @@ pub(crate) struct MatchPairTree<'tcx> { /// --- /// This can be `None` if it referred to a non-captured place in a closure. /// - /// Invariant: Can only be `None` when `test_case` is `Or`. + /// Invariant: Can only be `None` when `testable_case` is `Or`. /// Therefore this must be `Some(_)` after or-pattern expansion. place: Option>, /// ... must pass this test... - test_case: TestCase<'tcx>, + testable_case: TestableCase<'tcx>, /// ... and these subpairs must match. /// @@ -1317,7 +1320,7 @@ enum TestKind<'tcx> { /// Test what enum variant a value is. /// /// The subset of expected variants is not stored here; instead they are - /// extracted from the [`TestCase`]s of the candidates participating in the + /// extracted from the [`TestableCase`]s of the candidates participating in the /// test. Switch { /// The enum type being tested. @@ -1327,7 +1330,7 @@ enum TestKind<'tcx> { /// Test what value an integer or `char` has. /// /// The test's target values are not stored here; instead they are extracted - /// from the [`TestCase`]s of the candidates participating in the test. + /// from the [`TestableCase`]s of the candidates participating in the test. SwitchInt, /// Test whether a `bool` is `true` or `false`. @@ -1948,7 +1951,7 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { candidate: &mut Candidate<'tcx>, match_pair: MatchPairTree<'tcx>, ) { - let TestCase::Or { pats } = match_pair.test_case else { bug!() }; + let TestableCase::Or { pats } = match_pair.testable_case else { bug!() }; debug!("expanding or-pattern: candidate={:#?}\npats={:#?}", candidate, pats); candidate.or_span = Some(match_pair.pattern_span); candidate.subcandidates = pats @@ -2116,7 +2119,7 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { debug_assert!( remaining_match_pairs .iter() - .all(|match_pair| matches!(match_pair.test_case, TestCase::Or { .. })) + .all(|match_pair| matches!(match_pair.testable_case, TestableCase::Or { .. })) ); // Visit each leaf candidate within this subtree, add a copy of the remaining diff --git a/compiler/rustc_mir_build/src/builder/matches/test.rs b/compiler/rustc_mir_build/src/builder/matches/test.rs index 46a76a7b9a30b..55e21ff18aae4 100644 --- a/compiler/rustc_mir_build/src/builder/matches/test.rs +++ b/compiler/rustc_mir_build/src/builder/matches/test.rs @@ -19,7 +19,7 @@ use rustc_span::{DUMMY_SP, Span, Symbol, sym}; use tracing::{debug, instrument}; use crate::builder::Builder; -use crate::builder::matches::{MatchPairTree, Test, TestBranch, TestCase, TestKind}; +use crate::builder::matches::{MatchPairTree, Test, TestBranch, TestKind, TestableCase}; impl<'a, 'tcx> Builder<'a, 'tcx> { /// Identifies what test is needed to decide if `match_pair` is applicable. @@ -29,30 +29,34 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { &mut self, match_pair: &MatchPairTree<'tcx>, ) -> Test<'tcx> { - let kind = match match_pair.test_case { - TestCase::Variant { adt_def, variant_index: _ } => TestKind::Switch { adt_def }, + let kind = match match_pair.testable_case { + TestableCase::Variant { adt_def, variant_index: _ } => TestKind::Switch { adt_def }, - TestCase::Constant { .. } if match_pair.pattern_ty.is_bool() => TestKind::If, - TestCase::Constant { .. } if is_switch_ty(match_pair.pattern_ty) => TestKind::SwitchInt, - TestCase::Constant { value } => TestKind::Eq { value, cast_ty: match_pair.pattern_ty }, + TestableCase::Constant { .. } if match_pair.pattern_ty.is_bool() => TestKind::If, + TestableCase::Constant { .. } if is_switch_ty(match_pair.pattern_ty) => { + TestKind::SwitchInt + } + TestableCase::Constant { value } => { + TestKind::Eq { value, cast_ty: match_pair.pattern_ty } + } - TestCase::Range(ref range) => { + TestableCase::Range(ref range) => { assert_eq!(range.ty, match_pair.pattern_ty); TestKind::Range(Arc::clone(range)) } - TestCase::Slice { len, variable_length } => { + TestableCase::Slice { len, variable_length } => { let op = if variable_length { BinOp::Ge } else { BinOp::Eq }; TestKind::Len { len: len as u64, op } } - TestCase::Deref { temp, mutability } => TestKind::Deref { temp, mutability }, + TestableCase::Deref { temp, mutability } => TestKind::Deref { temp, mutability }, - TestCase::Never => TestKind::Never, + TestableCase::Never => TestKind::Never, // Or-patterns are not tested directly; instead they are expanded into subcandidates, // which are then distinguished by testing whatever non-or patterns they contain. - TestCase::Or { .. } => bug!("or-patterns should have already been handled"), + TestableCase::Or { .. } => bug!("or-patterns should have already been handled"), }; Test { span: match_pair.pattern_span, kind } diff --git a/compiler/rustc_mir_build/src/builder/matches/util.rs b/compiler/rustc_mir_build/src/builder/matches/util.rs index 2c8ad95b6afdb..f1df90f93fd63 100644 --- a/compiler/rustc_mir_build/src/builder/matches/util.rs +++ b/compiler/rustc_mir_build/src/builder/matches/util.rs @@ -6,7 +6,7 @@ use tracing::debug; use crate::builder::Builder; use crate::builder::expr::as_place::PlaceBase; -use crate::builder::matches::{Binding, Candidate, FlatPat, MatchPairTree, TestCase}; +use crate::builder::matches::{Binding, Candidate, FlatPat, MatchPairTree, TestableCase}; impl<'a, 'tcx> Builder<'a, 'tcx> { /// Creates a false edge to `imaginary_target` and a real edge to @@ -159,11 +159,11 @@ impl<'a, 'b, 'tcx> FakeBorrowCollector<'a, 'b, 'tcx> { } fn visit_match_pair(&mut self, match_pair: &MatchPairTree<'tcx>) { - if let TestCase::Or { pats, .. } = &match_pair.test_case { + if let TestableCase::Or { pats, .. } = &match_pair.testable_case { for flat_pat in pats.iter() { self.visit_flat_pat(flat_pat) } - } else if matches!(match_pair.test_case, TestCase::Deref { .. }) { + } else if matches!(match_pair.testable_case, TestableCase::Deref { .. }) { // The subpairs of a deref pattern are all places relative to the deref temporary, so we // don't fake borrow them. Problem is, if we only shallowly fake-borrowed // `match_pair.place`, this would allow: From 4b07875505bdd5c4b635fbf821401fcd4573dffd Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 17 Dec 2025 14:33:52 +0000 Subject: [PATCH 3/4] Revert #148937 (Remove initialized-bytes tracking from `BorrowedBuf` and `BorrowedCursor`) This caused several performance regressions because of existing code which uses `Read::read` and therefore requires full buffer initialization. This is particularly a problem when the same buffer is re-used for multiple read calls since this means it needs to be fully re-initialized each time. There is still some benefit to landing the API changes, but we will have to add private APIs so that the existing infrastructure can track and avoid redundant initialization. --- library/core/src/io/borrowed_buf.rs | 162 +++++++++++++++--- library/coretests/tests/io/borrowed_buf.rs | 90 ++++++++-- library/std/src/fs/tests.rs | 2 + library/std/src/io/buffered/bufreader.rs | 9 + .../std/src/io/buffered/bufreader/buffer.rs | 27 ++- library/std/src/io/buffered/tests.rs | 24 +++ library/std/src/io/copy.rs | 13 ++ library/std/src/io/mod.rs | 53 +++++- library/std/src/io/tests.rs | 9 + library/std/src/io/util.rs | 2 +- library/std/src/io/util/tests.rs | 8 + library/std/src/net/tcp/tests.rs | 2 + library/std/src/process/tests.rs | 2 + library/std/src/sys/fd/hermit.rs | 2 +- library/std/src/sys/fd/unix.rs | 4 +- library/std/src/sys/fs/solid.rs | 2 +- .../src/sys/net/connection/socket/hermit.rs | 2 +- .../src/sys/net/connection/socket/solid.rs | 2 +- .../std/src/sys/net/connection/socket/unix.rs | 2 +- .../src/sys/net/connection/socket/windows.rs | 2 +- .../std/src/sys/pal/sgx/abi/usercalls/mod.rs | 2 +- library/std/src/sys/pal/windows/handle.rs | 4 +- .../std/src/sys/process/windows/child_pipe.rs | 2 +- library/std/src/sys/stdio/zkvm.rs | 2 +- 24 files changed, 366 insertions(+), 63 deletions(-) diff --git a/library/core/src/io/borrowed_buf.rs b/library/core/src/io/borrowed_buf.rs index b765b96fd00a5..088dea7812945 100644 --- a/library/core/src/io/borrowed_buf.rs +++ b/library/core/src/io/borrowed_buf.rs @@ -2,26 +2,27 @@ use crate::fmt::{self, Debug, Formatter}; use crate::mem::{self, MaybeUninit}; +use crate::{cmp, ptr}; -/// A borrowed buffer of initially uninitialized bytes, which is incrementally filled. +/// A borrowed byte buffer which is incrementally filled and initialized. /// -/// This type makes it safer to work with `MaybeUninit` buffers, such as to read into a buffer -/// without having to initialize it first. It tracks the region of bytes that have been filled and -/// the region that remains uninitialized. +/// This type is a sort of "double cursor". It tracks three regions in the buffer: a region at the beginning of the +/// buffer that has been logically filled with data, a region that has been initialized at some point but not yet +/// logically filled, and a region at the end that is fully uninitialized. The filled region is guaranteed to be a +/// subset of the initialized region. /// -/// The contents of the buffer can be visualized as: +/// In summary, the contents of the buffer can be visualized as: /// ```not_rust -/// [ capacity ] -/// [ len: filled and initialized | capacity - len: uninitialized ] +/// [ capacity ] +/// [ filled | unfilled ] +/// [ initialized | uninitialized ] /// ``` /// -/// Note that `BorrowedBuf` does not distinguish between uninitialized data and data that was -/// previously initialized but no longer contains valid data. -/// -/// A `BorrowedBuf` is created around some existing data (or capacity for data) via a unique -/// reference (`&mut`). The `BorrowedBuf` can be configured (e.g., using `clear` or `set_len`), but -/// cannot be directly written. To write into the buffer, use `unfilled` to create a -/// `BorrowedCursor`. The cursor has write-only access to the unfilled portion of the buffer. +/// A `BorrowedBuf` is created around some existing data (or capacity for data) via a unique reference +/// (`&mut`). The `BorrowedBuf` can be configured (e.g., using `clear` or `set_init`), but cannot be +/// directly written. To write into the buffer, use `unfilled` to create a `BorrowedCursor`. The cursor +/// has write-only access to the unfilled portion of the buffer (you can think of it as a +/// write-only iterator). /// /// The lifetime `'data` is a bound on the lifetime of the underlying data. pub struct BorrowedBuf<'data> { @@ -29,11 +30,14 @@ pub struct BorrowedBuf<'data> { buf: &'data mut [MaybeUninit], /// The length of `self.buf` which is known to be filled. filled: usize, + /// The length of `self.buf` which is known to be initialized. + init: usize, } impl Debug for BorrowedBuf<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.debug_struct("BorrowedBuf") + .field("init", &self.init) .field("filled", &self.filled) .field("capacity", &self.capacity()) .finish() @@ -44,22 +48,24 @@ impl Debug for BorrowedBuf<'_> { impl<'data> From<&'data mut [u8]> for BorrowedBuf<'data> { #[inline] fn from(slice: &'data mut [u8]) -> BorrowedBuf<'data> { + let len = slice.len(); + BorrowedBuf { - // SAFETY: Always in bounds. We treat the buffer as uninitialized, even though it's - // already initialized. + // SAFETY: initialized data never becoming uninitialized is an invariant of BorrowedBuf buf: unsafe { (slice as *mut [u8]).as_uninit_slice_mut().unwrap() }, filled: 0, + init: len, } } } /// Creates a new `BorrowedBuf` from an uninitialized buffer. /// -/// Use `set_filled` if part of the buffer is known to be already filled. +/// Use `set_init` if part of the buffer is known to be already initialized. impl<'data> From<&'data mut [MaybeUninit]> for BorrowedBuf<'data> { #[inline] fn from(buf: &'data mut [MaybeUninit]) -> BorrowedBuf<'data> { - BorrowedBuf { buf, filled: 0 } + BorrowedBuf { buf, filled: 0, init: 0 } } } @@ -68,11 +74,14 @@ impl<'data> From<&'data mut [MaybeUninit]> for BorrowedBuf<'data> { /// Use `BorrowedCursor::with_unfilled_buf` instead for a safer alternative. impl<'data> From> for BorrowedBuf<'data> { #[inline] - fn from(buf: BorrowedCursor<'data>) -> BorrowedBuf<'data> { + fn from(mut buf: BorrowedCursor<'data>) -> BorrowedBuf<'data> { + let init = buf.init_mut().len(); BorrowedBuf { - // SAFETY: Always in bounds. We treat the buffer as uninitialized. + // SAFETY: no initialized byte is ever uninitialized as per + // `BorrowedBuf`'s invariant buf: unsafe { buf.buf.buf.get_unchecked_mut(buf.buf.filled..) }, filled: 0, + init, } } } @@ -90,6 +99,12 @@ impl<'data> BorrowedBuf<'data> { self.filled } + /// Returns the length of the initialized part of the buffer. + #[inline] + pub fn init_len(&self) -> usize { + self.init + } + /// Returns a shared reference to the filled portion of the buffer. #[inline] pub fn filled(&self) -> &[u8] { @@ -144,16 +159,33 @@ impl<'data> BorrowedBuf<'data> { /// Clears the buffer, resetting the filled region to empty. /// - /// The contents of the buffer are not modified. + /// The number of initialized bytes is not changed, and the contents of the buffer are not modified. #[inline] pub fn clear(&mut self) -> &mut Self { self.filled = 0; self } + + /// Asserts that the first `n` bytes of the buffer are initialized. + /// + /// `BorrowedBuf` assumes that bytes are never de-initialized, so this method does nothing when called with fewer + /// bytes than are already known to be initialized. + /// + /// # Safety + /// + /// The caller must ensure that the first `n` unfilled bytes of the buffer have already been initialized. + #[inline] + pub unsafe fn set_init(&mut self, n: usize) -> &mut Self { + self.init = cmp::max(self.init, n); + self + } } /// A writeable view of the unfilled portion of a [`BorrowedBuf`]. /// +/// The unfilled portion consists of an initialized and an uninitialized part; see [`BorrowedBuf`] +/// for details. +/// /// Data can be written directly to the cursor by using [`append`](BorrowedCursor::append) or /// indirectly by getting a slice of part or all of the cursor and writing into the slice. In the /// indirect case, the caller must call [`advance`](BorrowedCursor::advance) after writing to inform @@ -206,17 +238,48 @@ impl<'a> BorrowedCursor<'a> { self.buf.filled } + /// Returns a mutable reference to the initialized portion of the cursor. + #[inline] + pub fn init_mut(&mut self) -> &mut [u8] { + // SAFETY: We only slice the initialized part of the buffer, which is always valid + unsafe { + let buf = self.buf.buf.get_unchecked_mut(self.buf.filled..self.buf.init); + buf.assume_init_mut() + } + } + /// Returns a mutable reference to the whole cursor. /// /// # Safety /// - /// The caller must not uninitialize any previously initialized bytes. + /// The caller must not uninitialize any bytes in the initialized portion of the cursor. #[inline] pub unsafe fn as_mut(&mut self) -> &mut [MaybeUninit] { // SAFETY: always in bounds unsafe { self.buf.buf.get_unchecked_mut(self.buf.filled..) } } + /// Advances the cursor by asserting that `n` bytes have been filled. + /// + /// After advancing, the `n` bytes are no longer accessible via the cursor and can only be + /// accessed via the underlying buffer. I.e., the buffer's filled portion grows by `n` elements + /// and its unfilled portion (and the capacity of this cursor) shrinks by `n` elements. + /// + /// If less than `n` bytes initialized (by the cursor's point of view), `set_init` should be + /// called first. + /// + /// # Panics + /// + /// Panics if there are less than `n` bytes initialized. + #[inline] + pub fn advance(&mut self, n: usize) -> &mut Self { + // The subtraction cannot underflow by invariant of this type. + assert!(n <= self.buf.init - self.buf.filled); + + self.buf.filled += n; + self + } + /// Advances the cursor by asserting that `n` bytes have been filled. /// /// After advancing, the `n` bytes are no longer accessible via the cursor and can only be @@ -225,11 +288,42 @@ impl<'a> BorrowedCursor<'a> { /// /// # Safety /// - /// The caller must ensure that the first `n` bytes of the cursor have been initialized. `n` - /// must not exceed the remaining capacity of this cursor. + /// The caller must ensure that the first `n` bytes of the cursor have been properly + /// initialised. #[inline] - pub unsafe fn advance(&mut self, n: usize) -> &mut Self { + pub unsafe fn advance_unchecked(&mut self, n: usize) -> &mut Self { self.buf.filled += n; + self.buf.init = cmp::max(self.buf.init, self.buf.filled); + self + } + + /// Initializes all bytes in the cursor. + #[inline] + pub fn ensure_init(&mut self) -> &mut Self { + // SAFETY: always in bounds and we never uninitialize these bytes. + let uninit = unsafe { self.buf.buf.get_unchecked_mut(self.buf.init..) }; + + // SAFETY: 0 is a valid value for MaybeUninit and the length matches the allocation + // since it is comes from a slice reference. + unsafe { + ptr::write_bytes(uninit.as_mut_ptr(), 0, uninit.len()); + } + self.buf.init = self.buf.capacity(); + + self + } + + /// Asserts that the first `n` unfilled bytes of the cursor are initialized. + /// + /// `BorrowedBuf` assumes that bytes are never de-initialized, so this method does nothing when + /// called with fewer bytes than are already known to be initialized. + /// + /// # Safety + /// + /// The caller must ensure that the first `n` bytes of the buffer have already been initialized. + #[inline] + pub unsafe fn set_init(&mut self, n: usize) -> &mut Self { + self.buf.init = cmp::max(self.buf.init, self.buf.filled + n); self } @@ -247,6 +341,10 @@ impl<'a> BorrowedCursor<'a> { self.as_mut()[..buf.len()].write_copy_of_slice(buf); } + // SAFETY: We just added the entire contents of buf to the filled section. + unsafe { + self.set_init(buf.len()); + } self.buf.filled += buf.len(); } @@ -269,9 +367,17 @@ impl<'a> BorrowedCursor<'a> { // there, one could mark some bytes as initialized even though there aren't. assert!(core::ptr::addr_eq(prev_ptr, buf.buf)); - // SAFETY: These bytes were filled in the `BorrowedBuf`, so they're filled in the cursor - // too, because the buffer wasn't replaced. - self.buf.filled += buf.filled; + let filled = buf.filled; + let init = buf.init; + + // Update `init` and `filled` fields with what was written to the buffer. + // `self.buf.filled` was the starting length of the `BorrowedBuf`. + // + // SAFETY: These amounts of bytes were initialized/filled in the `BorrowedBuf`, + // and therefore they are initialized/filled in the cursor too, because the + // buffer wasn't replaced. + self.buf.init = self.buf.filled + init; + self.buf.filled += filled; res } diff --git a/library/coretests/tests/io/borrowed_buf.rs b/library/coretests/tests/io/borrowed_buf.rs index 730ba04465a11..aaa98d26ff8b9 100644 --- a/library/coretests/tests/io/borrowed_buf.rs +++ b/library/coretests/tests/io/borrowed_buf.rs @@ -8,6 +8,7 @@ fn new() { let mut rbuf: BorrowedBuf<'_> = buf.into(); assert_eq!(rbuf.filled().len(), 0); + assert_eq!(rbuf.init_len(), 16); assert_eq!(rbuf.capacity(), 16); assert_eq!(rbuf.unfilled().capacity(), 16); } @@ -19,16 +20,27 @@ fn uninit() { let mut rbuf: BorrowedBuf<'_> = buf.into(); assert_eq!(rbuf.filled().len(), 0); + assert_eq!(rbuf.init_len(), 0); assert_eq!(rbuf.capacity(), 16); assert_eq!(rbuf.unfilled().capacity(), 16); } +#[test] +fn initialize_unfilled() { + let buf: &mut [_] = &mut [MaybeUninit::uninit(); 16]; + let mut rbuf: BorrowedBuf<'_> = buf.into(); + + rbuf.unfilled().ensure_init(); + + assert_eq!(rbuf.init_len(), 16); +} + #[test] fn advance_filled() { let buf: &mut [_] = &mut [0; 16]; let mut rbuf: BorrowedBuf<'_> = buf.into(); - unsafe { rbuf.unfilled().advance(1) }; + rbuf.unfilled().advance(1); assert_eq!(rbuf.filled().len(), 1); assert_eq!(rbuf.unfilled().capacity(), 15); @@ -39,7 +51,7 @@ fn clear() { let buf: &mut [_] = &mut [255; 16]; let mut rbuf: BorrowedBuf<'_> = buf.into(); - unsafe { rbuf.unfilled().advance(16) }; + rbuf.unfilled().advance(16); assert_eq!(rbuf.filled().len(), 16); assert_eq!(rbuf.unfilled().capacity(), 0); @@ -49,9 +61,33 @@ fn clear() { assert_eq!(rbuf.filled().len(), 0); assert_eq!(rbuf.unfilled().capacity(), 16); - unsafe { rbuf.unfilled().advance(16) }; + assert_eq!(rbuf.unfilled().init_mut(), [255; 16]); +} + +#[test] +fn set_init() { + let buf: &mut [_] = &mut [MaybeUninit::zeroed(); 16]; + let mut rbuf: BorrowedBuf<'_> = buf.into(); + + unsafe { + rbuf.set_init(8); + } + + assert_eq!(rbuf.init_len(), 8); + + rbuf.unfilled().advance(4); - assert_eq!(rbuf.filled(), [255; 16]); + unsafe { + rbuf.set_init(2); + } + + assert_eq!(rbuf.init_len(), 8); + + unsafe { + rbuf.set_init(8); + } + + assert_eq!(rbuf.init_len(), 8); } #[test] @@ -61,6 +97,7 @@ fn append() { rbuf.unfilled().append(&[0; 8]); + assert_eq!(rbuf.init_len(), 8); assert_eq!(rbuf.filled().len(), 8); assert_eq!(rbuf.filled(), [0; 8]); @@ -68,6 +105,7 @@ fn append() { rbuf.unfilled().append(&[1; 16]); + assert_eq!(rbuf.init_len(), 16); assert_eq!(rbuf.filled().len(), 16); assert_eq!(rbuf.filled(), [1; 16]); } @@ -87,12 +125,43 @@ fn reborrow_written() { assert_eq!(cursor.written(), 32); assert_eq!(buf.unfilled().written(), 32); + assert_eq!(buf.init_len(), 32); assert_eq!(buf.filled().len(), 32); let filled = buf.filled(); assert_eq!(&filled[..16], [1; 16]); assert_eq!(&filled[16..], [2; 16]); } +#[test] +fn cursor_set_init() { + let buf: &mut [_] = &mut [MaybeUninit::zeroed(); 16]; + let mut rbuf: BorrowedBuf<'_> = buf.into(); + + unsafe { + rbuf.unfilled().set_init(8); + } + + assert_eq!(rbuf.init_len(), 8); + assert_eq!(rbuf.unfilled().init_mut().len(), 8); + assert_eq!(unsafe { rbuf.unfilled().as_mut().len() }, 16); + + rbuf.unfilled().advance(4); + + unsafe { + rbuf.unfilled().set_init(2); + } + + assert_eq!(rbuf.init_len(), 8); + + unsafe { + rbuf.unfilled().set_init(8); + } + + assert_eq!(rbuf.init_len(), 12); + assert_eq!(rbuf.unfilled().init_mut().len(), 8); + assert_eq!(unsafe { rbuf.unfilled().as_mut().len() }, 12); +} + #[test] fn cursor_with_unfilled_buf() { let buf: &mut [_] = &mut [MaybeUninit::uninit(); 16]; @@ -100,30 +169,31 @@ fn cursor_with_unfilled_buf() { let mut cursor = rbuf.unfilled(); cursor.with_unfilled_buf(|buf| { - assert_eq!(buf.capacity(), 16); buf.unfilled().append(&[1, 2, 3]); assert_eq!(buf.filled(), &[1, 2, 3]); }); + assert_eq!(cursor.init_mut().len(), 0); assert_eq!(cursor.written(), 3); cursor.with_unfilled_buf(|buf| { assert_eq!(buf.capacity(), 13); + assert_eq!(buf.init_len(), 0); - unsafe { - buf.unfilled().as_mut().write_filled(0); - buf.unfilled().advance(4) - }; + buf.unfilled().ensure_init(); + buf.unfilled().advance(4); }); + assert_eq!(cursor.init_mut().len(), 9); assert_eq!(cursor.written(), 7); cursor.with_unfilled_buf(|buf| { assert_eq!(buf.capacity(), 9); + assert_eq!(buf.init_len(), 9); }); + assert_eq!(cursor.init_mut().len(), 9); assert_eq!(cursor.written(), 7); - assert_eq!(rbuf.len(), 7); assert_eq!(rbuf.filled(), &[1, 2, 3, 0, 0, 0, 0]); } diff --git a/library/std/src/fs/tests.rs b/library/std/src/fs/tests.rs index bcaafcfee787a..0a5d1153d860c 100644 --- a/library/std/src/fs/tests.rs +++ b/library/std/src/fs/tests.rs @@ -709,6 +709,8 @@ fn file_test_read_buf() { let mut file = check!(File::open(filename)); check!(file.read_buf(buf.unfilled())); assert_eq!(buf.filled(), &[1, 2, 3, 4]); + // File::read_buf should omit buffer initialization. + assert_eq!(buf.init_len(), 4); check!(fs::remove_file(filename)); } diff --git a/library/std/src/io/buffered/bufreader.rs b/library/std/src/io/buffered/bufreader.rs index 69c260b5410af..40441dc057d0d 100644 --- a/library/std/src/io/buffered/bufreader.rs +++ b/library/std/src/io/buffered/bufreader.rs @@ -284,6 +284,15 @@ impl BufReader { } } +// This is only used by a test which asserts that the initialization-tracking is correct. +#[cfg(test)] +impl BufReader { + #[allow(missing_docs)] + pub fn initialized(&self) -> usize { + self.buf.initialized() + } +} + impl BufReader { /// Seeks relative to the current position. If the new position lies within the buffer, /// the buffer will not be flushed, allowing for more efficient seeks. diff --git a/library/std/src/io/buffered/bufreader/buffer.rs b/library/std/src/io/buffered/bufreader/buffer.rs index 2694726b3f44f..9b600cd55758b 100644 --- a/library/std/src/io/buffered/bufreader/buffer.rs +++ b/library/std/src/io/buffered/bufreader/buffer.rs @@ -21,19 +21,25 @@ pub struct Buffer { // Each call to `fill_buf` sets `filled` to indicate how many bytes at the start of `buf` are // initialized with bytes from a read. filled: usize, + // This is the max number of bytes returned across all `fill_buf` calls. We track this so that we + // can accurately tell `read_buf` how many bytes of buf are initialized, to bypass as much of its + // defensive initialization as possible. Note that while this often the same as `filled`, it + // doesn't need to be. Calls to `fill_buf` are not required to actually fill the buffer, and + // omitting this is a huge perf regression for `Read` impls that do not. + initialized: usize, } impl Buffer { #[inline] pub fn with_capacity(capacity: usize) -> Self { let buf = Box::new_uninit_slice(capacity); - Self { buf, pos: 0, filled: 0 } + Self { buf, pos: 0, filled: 0, initialized: 0 } } #[inline] pub fn try_with_capacity(capacity: usize) -> io::Result { match Box::try_new_uninit_slice(capacity) { - Ok(buf) => Ok(Self { buf, pos: 0, filled: 0 }), + Ok(buf) => Ok(Self { buf, pos: 0, filled: 0, initialized: 0 }), Err(_) => { Err(io::const_error!(ErrorKind::OutOfMemory, "failed to allocate read buffer")) } @@ -62,6 +68,12 @@ impl Buffer { self.pos } + // This is only used by a test which asserts that the initialization-tracking is correct. + #[cfg(test)] + pub fn initialized(&self) -> usize { + self.initialized + } + #[inline] pub fn discard_buffer(&mut self) { self.pos = 0; @@ -98,8 +110,13 @@ impl Buffer { /// Read more bytes into the buffer without discarding any of its contents pub fn read_more(&mut self, mut reader: impl Read) -> io::Result { let mut buf = BorrowedBuf::from(&mut self.buf[self.filled..]); + let old_init = self.initialized - self.filled; + unsafe { + buf.set_init(old_init); + } reader.read_buf(buf.unfilled())?; self.filled += buf.len(); + self.initialized += buf.init_len() - old_init; Ok(buf.len()) } @@ -120,10 +137,16 @@ impl Buffer { debug_assert!(self.pos == self.filled); let mut buf = BorrowedBuf::from(&mut *self.buf); + // SAFETY: `self.filled` bytes will always have been initialized. + unsafe { + buf.set_init(self.initialized); + } + let result = reader.read_buf(buf.unfilled()); self.pos = 0; self.filled = buf.len(); + self.initialized = buf.init_len(); result?; } diff --git a/library/std/src/io/buffered/tests.rs b/library/std/src/io/buffered/tests.rs index fc77b02a8e828..6ad4158b92904 100644 --- a/library/std/src/io/buffered/tests.rs +++ b/library/std/src/io/buffered/tests.rs @@ -1052,6 +1052,30 @@ fn single_formatted_write() { assert_eq!(writer.get_ref().events, [RecordedEvent::Write("hello, world!\n".to_string())]); } +#[test] +fn bufreader_full_initialize() { + struct OneByteReader; + impl Read for OneByteReader { + fn read(&mut self, buf: &mut [u8]) -> crate::io::Result { + if buf.len() > 0 { + buf[0] = 0; + Ok(1) + } else { + Ok(0) + } + } + } + let mut reader = BufReader::new(OneByteReader); + // Nothing is initialized yet. + assert_eq!(reader.initialized(), 0); + + let buf = reader.fill_buf().unwrap(); + // We read one byte... + assert_eq!(buf.len(), 1); + // But we initialized the whole buffer! + assert_eq!(reader.initialized(), reader.capacity()); +} + /// This is a regression test for https://github.com/rust-lang/rust/issues/127584. #[test] fn bufwriter_aliasing() { diff --git a/library/std/src/io/copy.rs b/library/std/src/io/copy.rs index 8b5e7c4df4e08..2b558efb8885e 100644 --- a/library/std/src/io/copy.rs +++ b/library/std/src/io/copy.rs @@ -214,19 +214,28 @@ impl BufferedWriterSpec for BufWriter { } let mut len = 0; + let mut init = 0; loop { let buf = self.buffer_mut(); let mut read_buf: BorrowedBuf<'_> = buf.spare_capacity_mut().into(); + unsafe { + // SAFETY: init is either 0 or the init_len from the previous iteration. + read_buf.set_init(init); + } + if read_buf.capacity() >= DEFAULT_BUF_SIZE { let mut cursor = read_buf.unfilled(); match reader.read_buf(cursor.reborrow()) { Ok(()) => { let bytes_read = cursor.written(); + if bytes_read == 0 { return Ok(len); } + + init = read_buf.init_len() - bytes_read; len += bytes_read as u64; // SAFETY: BorrowedBuf guarantees all of its filled bytes are init @@ -239,6 +248,10 @@ impl BufferedWriterSpec for BufWriter { Err(e) => return Err(e), } } else { + // All the bytes that were already in the buffer are initialized, + // treat them as such when the buffer is flushed. + init += buf.len(); + self.flush_buf()?; } } diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index 4c064c435e5bc..b7756befa11e9 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -419,6 +419,8 @@ pub(crate) fn default_read_to_end( .and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE)) .unwrap_or(DEFAULT_BUF_SIZE); + let mut initialized = 0; // Extra initialized bytes from previous loop iteration + const PROBE_SIZE: usize = 32; fn small_probe_read(r: &mut R, buf: &mut Vec) -> Result { @@ -447,6 +449,8 @@ pub(crate) fn default_read_to_end( } } + let mut consecutive_short_reads = 0; + loop { if buf.len() == buf.capacity() && buf.capacity() == start_cap { // The buffer might be an exact fit. Let's read into a probe buffer @@ -470,6 +474,11 @@ pub(crate) fn default_read_to_end( spare = &mut spare[..buf_len]; let mut read_buf: BorrowedBuf<'_> = spare.into(); + // SAFETY: These bytes were initialized but not filled in the previous loop + unsafe { + read_buf.set_init(initialized); + } + let mut cursor = read_buf.unfilled(); let result = loop { match r.read_buf(cursor.reborrow()) { @@ -480,7 +489,9 @@ pub(crate) fn default_read_to_end( } }; + let unfilled_but_initialized = cursor.init_mut().len(); let bytes_read = cursor.written(); + let was_fully_initialized = read_buf.init_len() == buf_len; // SAFETY: BorrowedBuf's invariants mean this much memory is initialized. unsafe { @@ -495,8 +506,27 @@ pub(crate) fn default_read_to_end( return Ok(buf.len() - start_len); } + if bytes_read < buf_len { + consecutive_short_reads += 1; + } else { + consecutive_short_reads = 0; + } + + // store how much was initialized but not filled + initialized = unfilled_but_initialized; + // Use heuristics to determine the max read size if no initial size hint was provided if size_hint.is_none() { + // The reader is returning short reads but it doesn't call ensure_init(). + // In that case we no longer need to restrict read sizes to avoid + // initialization costs. + // When reading from disk we usually don't get any short reads except at EOF. + // So we wait for at least 2 short reads before uncapping the read buffer; + // this helps with the Windows issue. + if !was_fully_initialized && consecutive_short_reads > 1 { + max_read_size = usize::MAX; + } + // we have passed a larger buffer than previously and the // reader still hasn't returned a short read if buf_len >= max_read_size && bytes_read == buf_len { @@ -557,13 +587,8 @@ pub(crate) fn default_read_buf(read: F, mut cursor: BorrowedCursor<'_>) -> Re where F: FnOnce(&mut [u8]) -> Result, { - // SAFETY: We do not uninitialize any part of the buffer. - let n = read(unsafe { cursor.as_mut().write_filled(0) })?; - assert!(n <= cursor.capacity()); - // SAFETY: We've initialized the entire buffer, and `read` can't make it uninitialized. - unsafe { - cursor.advance(n); - } + let n = read(cursor.ensure_init().init_mut())?; + cursor.advance(n); Ok(()) } @@ -3073,21 +3098,31 @@ impl Read for Take { // The condition above guarantees that `self.limit` fits in `usize`. let limit = self.limit as usize; + let extra_init = cmp::min(limit, buf.init_mut().len()); + // SAFETY: no uninit data is written to ibuf let ibuf = unsafe { &mut buf.as_mut()[..limit] }; let mut sliced_buf: BorrowedBuf<'_> = ibuf.into(); + // SAFETY: extra_init bytes of ibuf are known to be initialized + unsafe { + sliced_buf.set_init(extra_init); + } + let mut cursor = sliced_buf.unfilled(); let result = self.inner.read_buf(cursor.reborrow()); + let new_init = cursor.init_mut().len(); let filled = sliced_buf.len(); // cursor / sliced_buf / ibuf must drop here - // SAFETY: filled bytes have been filled and therefore initialized unsafe { - buf.advance(filled); + // SAFETY: filled bytes have been filled and therefore initialized + buf.advance_unchecked(filled); + // SAFETY: new_init bytes of buf's unfilled buffer have been initialized + buf.set_init(new_init); } self.limit -= filled as u64; diff --git a/library/std/src/io/tests.rs b/library/std/src/io/tests.rs index e14e6432eafaf..b22988d4a8a9d 100644 --- a/library/std/src/io/tests.rs +++ b/library/std/src/io/tests.rs @@ -209,6 +209,15 @@ fn read_buf_exact() { assert_eq!(c.read_buf_exact(buf.unfilled()).unwrap_err().kind(), io::ErrorKind::UnexpectedEof); } +#[test] +#[should_panic] +fn borrowed_cursor_advance_overflow() { + let mut buf = [0; 512]; + let mut buf = BorrowedBuf::from(&mut buf[..]); + buf.unfilled().advance(1); + buf.unfilled().advance(usize::MAX); +} + #[test] fn take_eof() { struct R; diff --git a/library/std/src/io/util.rs b/library/std/src/io/util.rs index a09c8bc069306..0410df3ef1a3e 100644 --- a/library/std/src/io/util.rs +++ b/library/std/src/io/util.rs @@ -283,7 +283,7 @@ impl Read for Repeat { // SAFETY: No uninit bytes are being written. unsafe { buf.as_mut() }.write_filled(self.byte); // SAFETY: the entire unfilled portion of buf has been initialized. - unsafe { buf.advance(buf.capacity()) }; + unsafe { buf.advance_unchecked(buf.capacity()) }; Ok(()) } diff --git a/library/std/src/io/util/tests.rs b/library/std/src/io/util/tests.rs index 92dbc3919bea2..d0f106d7af416 100644 --- a/library/std/src/io/util/tests.rs +++ b/library/std/src/io/util/tests.rs @@ -75,36 +75,43 @@ fn empty_reads() { let mut buf: BorrowedBuf<'_> = buf.into(); e.read_buf(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit()]; let mut buf: BorrowedBuf<'_> = buf.into(); e.read_buf(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1024]; let mut buf: BorrowedBuf<'_> = buf.into(); e.read_buf(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1024]; let mut buf: BorrowedBuf<'_> = buf.into(); Read::by_ref(&mut e).read_buf(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [MaybeUninit<_>] = &mut []; let mut buf: BorrowedBuf<'_> = buf.into(); e.read_buf_exact(buf.unfilled()).unwrap(); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit()]; let mut buf: BorrowedBuf<'_> = buf.into(); assert_eq!(e.read_buf_exact(buf.unfilled()).unwrap_err().kind(), ErrorKind::UnexpectedEof); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1024]; let mut buf: BorrowedBuf<'_> = buf.into(); assert_eq!(e.read_buf_exact(buf.unfilled()).unwrap_err().kind(), ErrorKind::UnexpectedEof); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1024]; let mut buf: BorrowedBuf<'_> = buf.into(); @@ -113,6 +120,7 @@ fn empty_reads() { ErrorKind::UnexpectedEof, ); assert_eq!(buf.len(), 0); + assert_eq!(buf.init_len(), 0); let mut buf = Vec::new(); assert_eq!(e.read_to_end(&mut buf).unwrap(), 0); diff --git a/library/std/src/net/tcp/tests.rs b/library/std/src/net/tcp/tests.rs index 4787f8a1040b9..7c7ef7b2f7018 100644 --- a/library/std/src/net/tcp/tests.rs +++ b/library/std/src/net/tcp/tests.rs @@ -315,6 +315,8 @@ fn read_buf() { let mut buf = BorrowedBuf::from(buf.as_mut_slice()); t!(s.read_buf(buf.unfilled())); assert_eq!(buf.filled(), &[1, 2, 3, 4]); + // TcpStream::read_buf should omit buffer initialization. + assert_eq!(buf.init_len(), 4); t.join().ok().expect("thread panicked"); }) diff --git a/library/std/src/process/tests.rs b/library/std/src/process/tests.rs index c8a83edffe427..12c5130defe5a 100644 --- a/library/std/src/process/tests.rs +++ b/library/std/src/process/tests.rs @@ -188,8 +188,10 @@ fn child_stdout_read_buf() { // ChildStdout::read_buf should omit buffer initialization. if cfg!(target_os = "windows") { assert_eq!(buf.filled(), b"abc\r\n"); + assert_eq!(buf.init_len(), 5); } else { assert_eq!(buf.filled(), b"abc\n"); + assert_eq!(buf.init_len(), 4); }; } diff --git a/library/std/src/sys/fd/hermit.rs b/library/std/src/sys/fd/hermit.rs index 28fafdaf57d8a..2666da16420c4 100644 --- a/library/std/src/sys/fd/hermit.rs +++ b/library/std/src/sys/fd/hermit.rs @@ -33,7 +33,7 @@ impl FileDesc { ) })?; // SAFETY: Exactly `result` bytes have been filled. - unsafe { buf.advance(result as usize) }; + unsafe { buf.advance_unchecked(result as usize) }; Ok(()) } diff --git a/library/std/src/sys/fd/unix.rs b/library/std/src/sys/fd/unix.rs index c5e8646dada1e..bb6c0ac9e18e6 100644 --- a/library/std/src/sys/fd/unix.rs +++ b/library/std/src/sys/fd/unix.rs @@ -185,7 +185,7 @@ impl FileDesc { // SAFETY: `ret` bytes were written to the initialized portion of the buffer unsafe { - cursor.advance(ret as usize); + cursor.advance_unchecked(ret as usize); } Ok(()) } @@ -203,7 +203,7 @@ impl FileDesc { // SAFETY: `ret` bytes were written to the initialized portion of the buffer unsafe { - cursor.advance(ret as usize); + cursor.advance_unchecked(ret as usize); } Ok(()) } diff --git a/library/std/src/sys/fs/solid.rs b/library/std/src/sys/fs/solid.rs index ec1db262855ad..f6d5d3b784d3b 100644 --- a/library/std/src/sys/fs/solid.rs +++ b/library/std/src/sys/fs/solid.rs @@ -401,7 +401,7 @@ impl File { // Safety: `num_bytes_read` bytes were written to the unfilled // portion of the buffer - cursor.advance(num_bytes_read); + cursor.advance_unchecked(num_bytes_read); Ok(()) } diff --git a/library/std/src/sys/net/connection/socket/hermit.rs b/library/std/src/sys/net/connection/socket/hermit.rs index 8350d2b5fe4a0..c32f8dcc8de86 100644 --- a/library/std/src/sys/net/connection/socket/hermit.rs +++ b/library/std/src/sys/net/connection/socket/hermit.rs @@ -143,7 +143,7 @@ impl Socket { ) })?; unsafe { - buf.advance(ret as usize); + buf.advance_unchecked(ret as usize); } Ok(()) } diff --git a/library/std/src/sys/net/connection/socket/solid.rs b/library/std/src/sys/net/connection/socket/solid.rs index ac06cdc00c8f0..673d75046d3f2 100644 --- a/library/std/src/sys/net/connection/socket/solid.rs +++ b/library/std/src/sys/net/connection/socket/solid.rs @@ -190,7 +190,7 @@ impl Socket { netc::recv(self.as_raw_fd(), buf.as_mut().as_mut_ptr().cast(), buf.capacity(), flags) })?; unsafe { - buf.advance(ret as usize); + buf.advance_unchecked(ret as usize); } Ok(()) } diff --git a/library/std/src/sys/net/connection/socket/unix.rs b/library/std/src/sys/net/connection/socket/unix.rs index 323d6214347e7..6d06a8d86bf16 100644 --- a/library/std/src/sys/net/connection/socket/unix.rs +++ b/library/std/src/sys/net/connection/socket/unix.rs @@ -294,7 +294,7 @@ impl Socket { ) })?; unsafe { - buf.advance(ret as usize); + buf.advance_unchecked(ret as usize); } Ok(()) } diff --git a/library/std/src/sys/net/connection/socket/windows.rs b/library/std/src/sys/net/connection/socket/windows.rs index 4da51d78ea69b..7355f0ce6bf5e 100644 --- a/library/std/src/sys/net/connection/socket/windows.rs +++ b/library/std/src/sys/net/connection/socket/windows.rs @@ -243,7 +243,7 @@ impl Socket { } } _ => { - unsafe { buf.advance(result as usize) }; + unsafe { buf.advance_unchecked(result as usize) }; Ok(()) } } diff --git a/library/std/src/sys/pal/sgx/abi/usercalls/mod.rs b/library/std/src/sys/pal/sgx/abi/usercalls/mod.rs index f1e4a5a42577a..5041770faf661 100644 --- a/library/std/src/sys/pal/sgx/abi/usercalls/mod.rs +++ b/library/std/src/sys/pal/sgx/abi/usercalls/mod.rs @@ -46,7 +46,7 @@ pub fn read_buf(fd: Fd, mut buf: BorrowedCursor<'_>) -> IoResult<()> { let mut userbuf = alloc::User::<[u8]>::uninitialized(buf.capacity()); let len = raw::read(fd, userbuf.as_mut_ptr().cast(), userbuf.len()).from_sgx_result()?; userbuf[..len].copy_to_enclave(&mut buf.as_mut()[..len]); - buf.advance(len); + buf.advance_unchecked(len); Ok(()) } } diff --git a/library/std/src/sys/pal/windows/handle.rs b/library/std/src/sys/pal/windows/handle.rs index ffa8507831acf..90e243e1aa038 100644 --- a/library/std/src/sys/pal/windows/handle.rs +++ b/library/std/src/sys/pal/windows/handle.rs @@ -117,7 +117,7 @@ impl Handle { Ok(read) => { // Safety: `read` bytes were written to the initialized portion of the buffer unsafe { - cursor.advance(read); + cursor.advance_unchecked(read); } Ok(()) } @@ -140,7 +140,7 @@ impl Handle { // SAFETY: `read` bytes were written to the initialized portion of the buffer unsafe { - cursor.advance(read); + cursor.advance_unchecked(read); } Ok(()) } diff --git a/library/std/src/sys/process/windows/child_pipe.rs b/library/std/src/sys/process/windows/child_pipe.rs index b848435ac275f..da7a86ca072e3 100644 --- a/library/std/src/sys/process/windows/child_pipe.rs +++ b/library/std/src/sys/process/windows/child_pipe.rs @@ -260,7 +260,7 @@ impl ChildPipe { Err(e) => Err(e), Ok(n) => { unsafe { - buf.advance(n); + buf.advance_unchecked(n); } Ok(()) } diff --git a/library/std/src/sys/stdio/zkvm.rs b/library/std/src/sys/stdio/zkvm.rs index 84496ac937363..f31c6c26e87cd 100644 --- a/library/std/src/sys/stdio/zkvm.rs +++ b/library/std/src/sys/stdio/zkvm.rs @@ -19,7 +19,7 @@ impl io::Read for Stdin { fn read_buf(&mut self, mut buf: BorrowedCursor<'_>) -> io::Result<()> { unsafe { let n = abi::sys_read(fileno::STDIN, buf.as_mut().as_mut_ptr().cast(), buf.capacity()); - buf.advance(n); + buf.advance_unchecked(n); } Ok(()) } From c7dd19b8d6fc3a06dfec78ad76452babcadef3b5 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 17 Dec 2025 12:25:45 +0100 Subject: [PATCH 4/4] add `miri_spin_loop` to make `hint::spin_loop` work consistently --- library/core/src/hint.rs | 9 +++++++++ src/tools/miri/src/shims/foreign_items.rs | 7 +++++++ src/tools/miri/tests/utils/miri_extern.rs | 3 +++ 3 files changed, 19 insertions(+) diff --git a/library/core/src/hint.rs b/library/core/src/hint.rs index e2ac746d31497..4c050b49bf7eb 100644 --- a/library/core/src/hint.rs +++ b/library/core/src/hint.rs @@ -269,6 +269,15 @@ pub const unsafe fn assert_unchecked(cond: bool) { #[stable(feature = "renamed_spin_loop", since = "1.49.0")] pub fn spin_loop() { crate::cfg_select! { + miri => { + unsafe extern "Rust" { + safe fn miri_spin_loop(); + } + + // Miri does support some of the intrinsics that are called below, but to guarantee + // consistent behavior across targets, this custom function is used. + miri_spin_loop(); + } target_arch = "x86" => { // SAFETY: the `cfg` attr ensures that we only execute this on x86 targets. crate::arch::x86::_mm_pause() diff --git a/src/tools/miri/src/shims/foreign_items.rs b/src/tools/miri/src/shims/foreign_items.rs index 8a959d06bf9ca..48f4ca53cdca0 100644 --- a/src/tools/miri/src/shims/foreign_items.rs +++ b/src/tools/miri/src/shims/foreign_items.rs @@ -435,6 +435,13 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { // Return value: 0 on success, otherwise the size it would have needed. this.write_int(if success { 0 } else { needed_size }, dest)?; } + // Hint that a loop is spinning indefinitely. + "miri_spin_loop" => { + let [] = this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?; + + // Try to run another thread to maximize the chance of finding actual bugs. + this.yield_active_thread(); + } // Obtains the size of a Miri backtrace. See the README for details. "miri_backtrace_size" => { this.handle_miri_backtrace_size(abi, link_name, args, dest)?; diff --git a/src/tools/miri/tests/utils/miri_extern.rs b/src/tools/miri/tests/utils/miri_extern.rs index 09f9ca032d439..bd01866dc34c8 100644 --- a/src/tools/miri/tests/utils/miri_extern.rs +++ b/src/tools/miri/tests/utils/miri_extern.rs @@ -155,4 +155,7 @@ extern "Rust" { /// Blocks the current execution if the argument is false pub fn miri_genmc_assume(condition: bool); + + /// Indicate to Miri that this thread is busy-waiting in a spin loop. + pub fn miri_spin_loop(); }