From 5be02d6f051d78a3140bf14b4f20537515f85a10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Tue, 21 Jan 2025 17:00:28 +0100 Subject: [PATCH 01/21] Add `@bors rollup=never` to rustc-push PR body --- src/doc/rustc-dev-guide/josh-sync/src/sync.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doc/rustc-dev-guide/josh-sync/src/sync.rs b/src/doc/rustc-dev-guide/josh-sync/src/sync.rs index eff80b1091d3b..7ff5e018d2edc 100644 --- a/src/doc/rustc-dev-guide/josh-sync/src/sync.rs +++ b/src/doc/rustc-dev-guide/josh-sync/src/sync.rs @@ -180,7 +180,7 @@ impl GitSync { ); println!( // Open PR with `subtree update` title to silence the `no-merges` triagebot check - " https://github.com/{UPSTREAM_REPO}/compare/{github_user}:{branch}?quick_pull=1&title=Rustc+dev+guide+subtree+update&body=r?+@ghost" + " https://github.com/{UPSTREAM_REPO}/compare/{github_user}:{branch}?quick_pull=1&title=Rustc+dev+guide+subtree+update&body=@bors+rollup=never%0Ar?+@ghost" ); drop(josh); From 6b48b67dfcc1f75bbf19ec690d498129e19360ab Mon Sep 17 00:00:00 2001 From: edwloef Date: Tue, 21 Jan 2025 22:28:04 +0100 Subject: [PATCH 02/21] optimize slice::ptr_rotate for compile-time-constant small rotates --- library/core/src/slice/rotate.rs | 327 ++++++++++++++++--------------- 1 file changed, 166 insertions(+), 161 deletions(-) diff --git a/library/core/src/slice/rotate.rs b/library/core/src/slice/rotate.rs index d8e0acb565c8c..20833dc31aa2b 100644 --- a/library/core/src/slice/rotate.rs +++ b/library/core/src/slice/rotate.rs @@ -11,11 +11,18 @@ use crate::{cmp, ptr}; /// /// # Algorithm /// -/// Algorithm 1 is used for small values of `left + right` or for large `T`. The elements are moved -/// into their final positions one at a time starting at `mid - left` and advancing by `right` steps -/// modulo `left + right`, such that only one temporary is needed. Eventually, we arrive back at -/// `mid - left`. However, if `gcd(left + right, right)` is not 1, the above steps skipped over -/// elements. For example: +/// Algorithm 1 is used if `min(left, right)` is small enough to fit onto a stack buffer. The +/// `min(left, right)` elements are copied onto the buffer, `memmove` is applied to the others, and +/// the ones on the buffer are moved back into the hole on the opposite side of where they +/// originated. +/// +/// Algorithms that can be vectorized outperform the above once `left + right` becomes large enough. +/// +/// Algorithm 2 is otherwise used for small values of `left + right` or for large `T`. The elements +/// are moved into their final positions one at a time starting at `mid - left` and advancing by +/// `right` steps modulo `left + right`, such that only one temporary is needed. Eventually, we +/// arrive back at `mid - left`. However, if `gcd(left + right, right)` is not 1, the above steps +/// skipped over elements. For example: /// ```text /// left = 10, right = 6 /// the `^` indicates an element in its final place @@ -39,13 +46,7 @@ use crate::{cmp, ptr}; /// `gcd(left + right, right)` value). The end result is that all elements are finalized once and /// only once. /// -/// Algorithm 2 is used if `left + right` is large but `min(left, right)` is small enough to -/// fit onto a stack buffer. The `min(left, right)` elements are copied onto the buffer, `memmove` -/// is applied to the others, and the ones on the buffer are moved back into the hole on the -/// opposite side of where they originated. -/// -/// Algorithms that can be vectorized outperform the above once `left + right` becomes large enough. -/// Algorithm 1 can be vectorized by chunking and performing many rounds at once, but there are too +/// Algorithm 2 can be vectorized by chunking and performing many rounds at once, but there are too /// few rounds on average until `left + right` is enormous, and the worst case of a single /// round is always there. Instead, algorithm 3 utilizes repeated swapping of /// `min(left, right)` elements until a smaller rotate problem is left. @@ -65,172 +66,176 @@ pub(super) unsafe fn ptr_rotate(mut left: usize, mut mid: *mut T, mut right: if T::IS_ZST { return; } - loop { - // N.B. the below algorithms can fail if these cases are not checked - if (right == 0) || (left == 0) { - return; + // N.B. the below algorithms can fail if these cases are not checked + if (right == 0) || (left == 0) { + return; + } + // `T` is not a zero-sized type, so it's okay to divide by its size. + if !cfg!(feature = "optimize_for_size") + && cmp::min(left, right) <= mem::size_of::() / mem::size_of::() + { + // Algorithm 1 + // The `[T; 0]` here is to ensure this is appropriately aligned for T + let mut rawarray = MaybeUninit::<(BufType, [T; 0])>::uninit(); + let buf = rawarray.as_mut_ptr() as *mut T; + // SAFETY: `mid-left <= mid-left+right < mid+right` + let dim = unsafe { mid.sub(left).add(right) }; + if left <= right { + // SAFETY: + // + // 1) The `if` condition about the sizes ensures `[mid-left; left]` will fit in + // `buf` without overflow and `buf` was created just above and so cannot be + // overlapped with any value of `[mid-left; left]` + // 2) [mid-left, mid+right) are all valid for reading and writing and we don't care + // about overlaps here. + // 3) The `if` condition about `left <= right` ensures writing `left` elements to + // `dim = mid-left+right` is valid because: + // - `buf` is valid and `left` elements were written in it in 1) + // - `dim+left = mid-left+right+left = mid+right` and we write `[dim, dim+left)` + unsafe { + // 1) + ptr::copy_nonoverlapping(mid.sub(left), buf, left); + // 2) + ptr::copy(mid, mid.sub(left), right); + // 3) + ptr::copy_nonoverlapping(buf, dim, left); + } + } else { + // SAFETY: same reasoning as above but with `left` and `right` reversed + unsafe { + ptr::copy_nonoverlapping(mid, buf, right); + ptr::copy(mid.sub(left), dim, left); + ptr::copy_nonoverlapping(buf, mid.sub(left), right); + } } - if !cfg!(feature = "optimize_for_size") - && ((left + right < 24) || (mem::size_of::() > mem::size_of::<[usize; 4]>())) - { - // Algorithm 1 - // Microbenchmarks indicate that the average performance for random shifts is better all - // the way until about `left + right == 32`, but the worst case performance breaks even - // around 16. 24 was chosen as middle ground. If the size of `T` is larger than 4 - // `usize`s, this algorithm also outperforms other algorithms. - // SAFETY: callers must ensure `mid - left` is valid for reading and writing. - let x = unsafe { mid.sub(left) }; - // beginning of first round - // SAFETY: see previous comment. - let mut tmp: T = unsafe { x.read() }; - let mut i = right; - // `gcd` can be found before hand by calculating `gcd(left + right, right)`, - // but it is faster to do one loop which calculates the gcd as a side effect, then - // doing the rest of the chunk - let mut gcd = right; - // benchmarks reveal that it is faster to swap temporaries all the way through instead - // of reading one temporary once, copying backwards, and then writing that temporary at - // the very end. This is possibly due to the fact that swapping or replacing temporaries - // uses only one memory address in the loop instead of needing to manage two. + } else if !cfg!(feature = "optimize_for_size") + && ((left + right < 24) || (mem::size_of::() > mem::size_of::<[usize; 4]>())) + { + // Algorithm 2 + // Microbenchmarks indicate that the average performance for random shifts is better all + // the way until about `left + right == 32`, but the worst case performance breaks even + // around 16. 24 was chosen as middle ground. If the size of `T` is larger than 4 + // `usize`s, this algorithm also outperforms other algorithms. + // SAFETY: callers must ensure `mid - left` is valid for reading and writing. + let x = unsafe { mid.sub(left) }; + // beginning of first round + // SAFETY: see previous comment. + let mut tmp: T = unsafe { x.read() }; + let mut i = right; + // `gcd` can be found before hand by calculating `gcd(left + right, right)`, + // but it is faster to do one loop which calculates the gcd as a side effect, then + // doing the rest of the chunk + let mut gcd = right; + // benchmarks reveal that it is faster to swap temporaries all the way through instead + // of reading one temporary once, copying backwards, and then writing that temporary at + // the very end. This is possibly due to the fact that swapping or replacing temporaries + // uses only one memory address in the loop instead of needing to manage two. + loop { + // [long-safety-expl] + // SAFETY: callers must ensure `[left, left+mid+right)` are all valid for reading and + // writing. + // + // - `i` start with `right` so `mid-left <= x+i = x+right = mid-left+right < mid+right` + // - `i <= left+right-1` is always true + // - if `i < left`, `right` is added so `i < left+right` and on the next + // iteration `left` is removed from `i` so it doesn't go further + // - if `i >= left`, `left` is removed immediately and so it doesn't go further. + // - overflows cannot happen for `i` since the function's safety contract ask for + // `mid+right-1 = x+left+right` to be valid for writing + // - underflows cannot happen because `i` must be bigger or equal to `left` for + // a subtraction of `left` to happen. + // + // So `x+i` is valid for reading and writing if the caller respected the contract + tmp = unsafe { x.add(i).replace(tmp) }; + // instead of incrementing `i` and then checking if it is outside the bounds, we + // check if `i` will go outside the bounds on the next increment. This prevents + // any wrapping of pointers or `usize`. + if i >= left { + i -= left; + if i == 0 { + // end of first round + // SAFETY: tmp has been read from a valid source and x is valid for writing + // according to the caller. + unsafe { x.write(tmp) }; + break; + } + // this conditional must be here if `left + right >= 15` + if i < gcd { + gcd = i; + } + } else { + i += right; + } + } + // finish the chunk with more rounds + for start in 1..gcd { + // SAFETY: `gcd` is at most equal to `right` so all values in `1..gcd` are valid for + // reading and writing as per the function's safety contract, see [long-safety-expl] + // above + tmp = unsafe { x.add(start).read() }; + // [safety-expl-addition] + // + // Here `start < gcd` so `start < right` so `i < right+right`: `right` being the + // greatest common divisor of `(left+right, right)` means that `left = right` so + // `i < left+right` so `x+i = mid-left+i` is always valid for reading and writing + // according to the function's safety contract. + i = start + right; loop { - // [long-safety-expl] - // SAFETY: callers must ensure `[left, left+mid+right)` are all valid for reading and - // writing. - // - // - `i` start with `right` so `mid-left <= x+i = x+right = mid-left+right < mid+right` - // - `i <= left+right-1` is always true - // - if `i < left`, `right` is added so `i < left+right` and on the next - // iteration `left` is removed from `i` so it doesn't go further - // - if `i >= left`, `left` is removed immediately and so it doesn't go further. - // - overflows cannot happen for `i` since the function's safety contract ask for - // `mid+right-1 = x+left+right` to be valid for writing - // - underflows cannot happen because `i` must be bigger or equal to `left` for - // a subtraction of `left` to happen. - // - // So `x+i` is valid for reading and writing if the caller respected the contract + // SAFETY: see [long-safety-expl] and [safety-expl-addition] tmp = unsafe { x.add(i).replace(tmp) }; - // instead of incrementing `i` and then checking if it is outside the bounds, we - // check if `i` will go outside the bounds on the next increment. This prevents - // any wrapping of pointers or `usize`. if i >= left { i -= left; - if i == 0 { - // end of first round - // SAFETY: tmp has been read from a valid source and x is valid for writing - // according to the caller. - unsafe { x.write(tmp) }; + if i == start { + // SAFETY: see [long-safety-expl] and [safety-expl-addition] + unsafe { x.add(start).write(tmp) }; break; } - // this conditional must be here if `left + right >= 15` - if i < gcd { - gcd = i; - } } else { i += right; } } - // finish the chunk with more rounds - for start in 1..gcd { - // SAFETY: `gcd` is at most equal to `right` so all values in `1..gcd` are valid for - // reading and writing as per the function's safety contract, see [long-safety-expl] - // above - tmp = unsafe { x.add(start).read() }; - // [safety-expl-addition] - // - // Here `start < gcd` so `start < right` so `i < right+right`: `right` being the - // greatest common divisor of `(left+right, right)` means that `left = right` so - // `i < left+right` so `x+i = mid-left+i` is always valid for reading and writing - // according to the function's safety contract. - i = start + right; + } + } else { + loop { + if left >= right { + // Algorithm 3 + // There is an alternate way of swapping that involves finding where the last swap + // of this algorithm would be, and swapping using that last chunk instead of swapping + // adjacent chunks like this algorithm is doing, but this way is still faster. loop { - // SAFETY: see [long-safety-expl] and [safety-expl-addition] - tmp = unsafe { x.add(i).replace(tmp) }; - if i >= left { - i -= left; - if i == start { - // SAFETY: see [long-safety-expl] and [safety-expl-addition] - unsafe { x.add(start).write(tmp) }; - break; - } - } else { - i += right; + // SAFETY: + // `left >= right` so `[mid-right, mid+right)` is valid for reading and writing + // Subtracting `right` from `mid` each turn is counterbalanced by the addition and + // check after it. + unsafe { + ptr::swap_nonoverlapping(mid.sub(right), mid, right); + mid = mid.sub(right); + } + left -= right; + if left < right { + break; } - } - } - return; - // `T` is not a zero-sized type, so it's okay to divide by its size. - } else if !cfg!(feature = "optimize_for_size") - && cmp::min(left, right) <= mem::size_of::() / mem::size_of::() - { - // Algorithm 2 - // The `[T; 0]` here is to ensure this is appropriately aligned for T - let mut rawarray = MaybeUninit::<(BufType, [T; 0])>::uninit(); - let buf = rawarray.as_mut_ptr() as *mut T; - // SAFETY: `mid-left <= mid-left+right < mid+right` - let dim = unsafe { mid.sub(left).add(right) }; - if left <= right { - // SAFETY: - // - // 1) The `else if` condition about the sizes ensures `[mid-left; left]` will fit in - // `buf` without overflow and `buf` was created just above and so cannot be - // overlapped with any value of `[mid-left; left]` - // 2) [mid-left, mid+right) are all valid for reading and writing and we don't care - // about overlaps here. - // 3) The `if` condition about `left <= right` ensures writing `left` elements to - // `dim = mid-left+right` is valid because: - // - `buf` is valid and `left` elements were written in it in 1) - // - `dim+left = mid-left+right+left = mid+right` and we write `[dim, dim+left)` - unsafe { - // 1) - ptr::copy_nonoverlapping(mid.sub(left), buf, left); - // 2) - ptr::copy(mid, mid.sub(left), right); - // 3) - ptr::copy_nonoverlapping(buf, dim, left); } } else { - // SAFETY: same reasoning as above but with `left` and `right` reversed - unsafe { - ptr::copy_nonoverlapping(mid, buf, right); - ptr::copy(mid.sub(left), dim, left); - ptr::copy_nonoverlapping(buf, mid.sub(left), right); - } - } - return; - } else if left >= right { - // Algorithm 3 - // There is an alternate way of swapping that involves finding where the last swap - // of this algorithm would be, and swapping using that last chunk instead of swapping - // adjacent chunks like this algorithm is doing, but this way is still faster. - loop { - // SAFETY: - // `left >= right` so `[mid-right, mid+right)` is valid for reading and writing - // Subtracting `right` from `mid` each turn is counterbalanced by the addition and - // check after it. - unsafe { - ptr::swap_nonoverlapping(mid.sub(right), mid, right); - mid = mid.sub(right); - } - left -= right; - if left < right { - break; + // Algorithm 3, `left < right` + loop { + // SAFETY: `[mid-left, mid+left)` is valid for reading and writing because + // `left < right` so `mid+left < mid+right`. + // Adding `left` to `mid` each turn is counterbalanced by the subtraction and check + // after it. + unsafe { + ptr::swap_nonoverlapping(mid.sub(left), mid, left); + mid = mid.add(left); + } + right -= left; + if right < left { + break; + } } } - } else { - // Algorithm 3, `left < right` - loop { - // SAFETY: `[mid-left, mid+left)` is valid for reading and writing because - // `left < right` so `mid+left < mid+right`. - // Adding `left` to `mid` each turn is counterbalanced by the subtraction and check - // after it. - unsafe { - ptr::swap_nonoverlapping(mid.sub(left), mid, left); - mid = mid.add(left); - } - right -= left; - if right < left { - break; - } + + if (right == 0) || (left == 0) { + return; } } } From 353d916a0a0246b1bce331737e21806d77c7b857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Fri, 24 Jan 2025 13:59:05 +0100 Subject: [PATCH 03/21] Revert "Add `@bors rollup=never` to rustc-push PR body" --- src/doc/rustc-dev-guide/josh-sync/src/sync.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doc/rustc-dev-guide/josh-sync/src/sync.rs b/src/doc/rustc-dev-guide/josh-sync/src/sync.rs index 7ff5e018d2edc..eff80b1091d3b 100644 --- a/src/doc/rustc-dev-guide/josh-sync/src/sync.rs +++ b/src/doc/rustc-dev-guide/josh-sync/src/sync.rs @@ -180,7 +180,7 @@ impl GitSync { ); println!( // Open PR with `subtree update` title to silence the `no-merges` triagebot check - " https://github.com/{UPSTREAM_REPO}/compare/{github_user}:{branch}?quick_pull=1&title=Rustc+dev+guide+subtree+update&body=@bors+rollup=never%0Ar?+@ghost" + " https://github.com/{UPSTREAM_REPO}/compare/{github_user}:{branch}?quick_pull=1&title=Rustc+dev+guide+subtree+update&body=r?+@ghost" ); drop(josh); From 17a14b627eedef258b4e8144615fc2460c13648e Mon Sep 17 00:00:00 2001 From: Ada Alakbarova <58857108+ada4a@users.noreply.github.com> Date: Fri, 24 Jan 2025 14:12:17 +0100 Subject: [PATCH 04/21] fix(solve/significant-changes): typo --- src/doc/rustc-dev-guide/src/solve/significant-changes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doc/rustc-dev-guide/src/solve/significant-changes.md b/src/doc/rustc-dev-guide/src/solve/significant-changes.md index c5bb8a01b12b5..c82b5d468961a 100644 --- a/src/doc/rustc-dev-guide/src/solve/significant-changes.md +++ b/src/doc/rustc-dev-guide/src/solve/significant-changes.md @@ -42,7 +42,7 @@ old implementation structurally relates the aliases instead. This enables the new solver to stall equality until it is able to normalize the related aliases. The behavior of the old solver is incomplete and relies on eager normalization -which replaces ambiguous aliases with inference variables. As this is not +which replaces ambiguous aliases with inference variables. As this is not possible for aliases containing bound variables, the old implementation does not handle aliases inside of binders correctly, e.g. [#102048]. See the chapter on [normalization] for more details. From 1392e074b7941af2f0b4bb400e1d0c75943cfe47 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Sat, 25 Jan 2025 18:21:58 +0100 Subject: [PATCH 05/21] CI: use key-restore for cache GH action It seems one can't overwrite a cache entry: ``` Failed to save: Unable to reserve cache with key linkcheck--0.8.1, another job may be creating this cache. More details: Cache already exists. Scope: refs/heads/master, Key: linkcheck--0.8.1, Version: 33f8fd511bed9c91c40778bc5c27cb58425caa894ab50f9c5705d83cb78660e0 Warning: Cache save failed. ``` A proper solution is to use `restore-keys`: https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache --- src/doc/rustc-dev-guide/.github/workflows/ci.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/doc/rustc-dev-guide/.github/workflows/ci.yml b/src/doc/rustc-dev-guide/.github/workflows/ci.yml index 006bcce44b3d5..3f810e2fbcc99 100644 --- a/src/doc/rustc-dev-guide/.github/workflows/ci.yml +++ b/src/doc/rustc-dev-guide/.github/workflows/ci.yml @@ -41,7 +41,9 @@ jobs: uses: actions/cache/restore@v4 with: path: book/linkcheck/cache.json - key: linkcheck--${{ env.MDBOOK_LINKCHECK2_VERSION }} + key: linkcheck--${{ env.MDBOOK_LINKCHECK2_VERSION }}--${{ github.run_id }} + restore-keys: | + linkcheck--${{ env.MDBOOK_LINKCHECK2_VERSION }}-- - name: Install latest nightly Rust toolchain if: steps.mdbook-cache.outputs.cache-hit != 'true' @@ -66,7 +68,7 @@ jobs: uses: actions/cache/save@v4 with: path: book/linkcheck/cache.json - key: linkcheck--${{ env.MDBOOK_LINKCHECK2_VERSION }} + key: linkcheck--${{ env.MDBOOK_LINKCHECK2_VERSION }}--${{ github.run_id }} - name: Deploy to gh-pages if: github.event_name == 'push' From 40e051f1bfc41f3230e1a264425cd064ea66f500 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Sat, 25 Jan 2025 13:47:50 -0800 Subject: [PATCH 06/21] Update boring lines to sync with rustdoc --- .../src/early_late_parameters.md | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/doc/rustc-dev-guide/src/early_late_parameters.md b/src/doc/rustc-dev-guide/src/early_late_parameters.md index 6d13655294d3e..28026b0af276b 100644 --- a/src/doc/rustc-dev-guide/src/early_late_parameters.md +++ b/src/doc/rustc-dev-guide/src/early_late_parameters.md @@ -126,8 +126,8 @@ In this example we call `foo`'s function item type twice, each time with a borro If the lifetime parameter on `foo` was late bound this would be able to compile as each caller could provide a different lifetime argument for its borrow. See the following example which demonstrates this using the `bar` function defined above: ```rust -#fn foo<'a: 'a>(b: &'a String) -> &'a String { b } -#fn bar<'a>(b: &'a String) -> &'a String { b } +# fn foo<'a: 'a>(b: &'a String) -> &'a String { b } +# fn bar<'a>(b: &'a String) -> &'a String { b } // Early bound parameters are instantiated here, however as `'a` is // late bound it is not provided here. @@ -220,24 +220,24 @@ Then, for the first case, we can call each function with a single lifetime argum ```rust #![deny(late_bound_lifetime_arguments)] -#fn free_function<'a: 'a, 'b>(_: &'a (), _: &'b ()) {} +# fn free_function<'a: 'a, 'b>(_: &'a (), _: &'b ()) {} # -#struct Foo; +# struct Foo; # -#trait Trait: Sized { -# fn trait_method<'a: 'a, 'b>(self, _: &'a (), _: &'b ()); -# fn trait_function<'a: 'a, 'b>(_: &'a (), _: &'b ()); -#} +# trait Trait: Sized { +# fn trait_method<'a: 'a, 'b>(self, _: &'a (), _: &'b ()); +# fn trait_function<'a: 'a, 'b>(_: &'a (), _: &'b ()); +# } # -#impl Trait for Foo { -# fn trait_method<'a: 'a, 'b>(self, _: &'a (), _: &'b ()) {} -# fn trait_function<'a: 'a, 'b>(_: &'a (), _: &'b ()) {} -#} +# impl Trait for Foo { +# fn trait_method<'a: 'a, 'b>(self, _: &'a (), _: &'b ()) {} +# fn trait_function<'a: 'a, 'b>(_: &'a (), _: &'b ()) {} +# } # -#impl Foo { -# fn inherent_method<'a: 'a, 'b>(self, _: &'a (), _: &'b ()) {} -# fn inherent_function<'a: 'a, 'b>(_: &'a (), _: &'b ()) {} -#} +# impl Foo { +# fn inherent_method<'a: 'a, 'b>(self, _: &'a (), _: &'b ()) {} +# fn inherent_function<'a: 'a, 'b>(_: &'a (), _: &'b ()) {} +# } # // Specifying as many arguments as there are early // bound parameters is always a future compat warning @@ -251,24 +251,24 @@ free_function::<'static>(&(), &()); For the second case we call each function with more lifetime arguments than there are lifetime parameters (be it early or late bound) and note that method calls result in a FCW as opposed to the free/associated functions which result in a hard error: ```rust -#fn free_function<'a: 'a, 'b>(_: &'a (), _: &'b ()) {} +# fn free_function<'a: 'a, 'b>(_: &'a (), _: &'b ()) {} # -#struct Foo; +# struct Foo; # -#trait Trait: Sized { -# fn trait_method<'a: 'a, 'b>(self, _: &'a (), _: &'b ()); -# fn trait_function<'a: 'a, 'b>(_: &'a (), _: &'b ()); -#} +# trait Trait: Sized { +# fn trait_method<'a: 'a, 'b>(self, _: &'a (), _: &'b ()); +# fn trait_function<'a: 'a, 'b>(_: &'a (), _: &'b ()); +# } # -#impl Trait for Foo { -# fn trait_method<'a: 'a, 'b>(self, _: &'a (), _: &'b ()) {} -# fn trait_function<'a: 'a, 'b>(_: &'a (), _: &'b ()) {} -#} +# impl Trait for Foo { +# fn trait_method<'a: 'a, 'b>(self, _: &'a (), _: &'b ()) {} +# fn trait_function<'a: 'a, 'b>(_: &'a (), _: &'b ()) {} +# } # -#impl Foo { -# fn inherent_method<'a: 'a, 'b>(self, _: &'a (), _: &'b ()) {} -# fn inherent_function<'a: 'a, 'b>(_: &'a (), _: &'b ()) {} -#} +# impl Foo { +# fn inherent_method<'a: 'a, 'b>(self, _: &'a (), _: &'b ()) {} +# fn inherent_function<'a: 'a, 'b>(_: &'a (), _: &'b ()) {} +# } # // Specifying more arguments than there are early // bound parameters is a future compat warning when @@ -421,4 +421,4 @@ impl<'a> Fn<()> for FooFnItem<'a> { type Output = &'a String; /* fn call(...) -> ... { ... } */ } -``` \ No newline at end of file +``` From 20818c47a59cb5fe617c5653a953d71c30def1c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Le=C3=B3n=20Orell=20Valerian=20Liehr?= Date: Sun, 26 Jan 2025 02:42:09 +0100 Subject: [PATCH 07/21] Remove accidental leading empty line in code block --- src/doc/rustc-dev-guide/src/early_late_parameters.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doc/rustc-dev-guide/src/early_late_parameters.md b/src/doc/rustc-dev-guide/src/early_late_parameters.md index 28026b0af276b..3b2a5e8a155b6 100644 --- a/src/doc/rustc-dev-guide/src/early_late_parameters.md +++ b/src/doc/rustc-dev-guide/src/early_late_parameters.md @@ -128,7 +128,7 @@ If the lifetime parameter on `foo` was late bound this would be able to compile ```rust # fn foo<'a: 'a>(b: &'a String) -> &'a String { b } # fn bar<'a>(b: &'a String) -> &'a String { b } - +# // Early bound parameters are instantiated here, however as `'a` is // late bound it is not provided here. let b = bar; From 5210a8d1e9789ad961b8464958fee3e804552a7e Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Sun, 26 Jan 2025 12:08:48 -0800 Subject: [PATCH 08/21] Correct information on dylib compression Compression of dylibs was removed in https://github.com/rust-lang/rust/pull/113695 (and decompression removed in https://github.com/rust-lang/rust/pull/132402). --- src/doc/rustc-dev-guide/src/backend/libs-and-metadata.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doc/rustc-dev-guide/src/backend/libs-and-metadata.md b/src/doc/rustc-dev-guide/src/backend/libs-and-metadata.md index b0823b9a5eed3..556b3fdf8f84f 100644 --- a/src/doc/rustc-dev-guide/src/backend/libs-and-metadata.md +++ b/src/doc/rustc-dev-guide/src/backend/libs-and-metadata.md @@ -42,7 +42,7 @@ format is specific to `rustc`, and may change over time. This file contains: ### dylib A `dylib` is a platform-specific shared library. It includes the `rustc` -[metadata] in a special link section called `.rustc` in a compressed format. +[metadata] in a special link section called `.rustc`. ### rmeta From 2412289fc9ae0e8903d1c3cca8ef0aaf84eae307 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Mon, 27 Jan 2025 23:34:36 +0100 Subject: [PATCH 09/21] Fix rustc-pull CI's bash commands --- src/doc/rustc-dev-guide/.github/workflows/rustc-pull.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/doc/rustc-dev-guide/.github/workflows/rustc-pull.yml b/src/doc/rustc-dev-guide/.github/workflows/rustc-pull.yml index 87a3ee2e78f1d..615927d55e597 100644 --- a/src/doc/rustc-dev-guide/.github/workflows/rustc-pull.yml +++ b/src/doc/rustc-dev-guide/.github/workflows/rustc-pull.yml @@ -50,10 +50,10 @@ jobs: RESULT=`gh pr list --author github-actions[bot] --state open -q 'map(select(.title=="Rustc pull update")) | length' --json title` if [[ "$RESULT" -eq 0 ]]; then echo "Creating new pull request" - PR_URL=gh pr create -B master --title 'Rustc pull update' --body 'Latest update from rustc.' + PR_URL=`gh pr create -B master --title 'Rustc pull update' --body 'Latest update from rustc.'` echo "pr_url=$PR_URL" >> $GITHUB_OUTPUT else - PR_URL=gh pr list --author github-actions[bot] --state open -q 'map(select(.title=="Rustc pull update")) | .[0].url' --json url,title + PR_URL=`gh pr list --author github-actions[bot] --state open -q 'map(select(.title=="Rustc pull update")) | .[0].url' --json url,title` echo "pr_url=$PR_URL" >> $GITHUB_OUTPUT fi env: From d63796f69a977f1ab914004c70169a80ef0d8520 Mon Sep 17 00:00:00 2001 From: Joren-vanGoethem <55790052+Joren-vanGoethem@users.noreply.github.com> Date: Tue, 28 Jan 2025 07:43:16 +0100 Subject: [PATCH 10/21] Update about-this-guide.md --- src/doc/rustc-dev-guide/src/about-this-guide.md | 1 - 1 file changed, 1 deletion(-) diff --git a/src/doc/rustc-dev-guide/src/about-this-guide.md b/src/doc/rustc-dev-guide/src/about-this-guide.md index 793bfa9e66e59..781a5c51bf7a8 100644 --- a/src/doc/rustc-dev-guide/src/about-this-guide.md +++ b/src/doc/rustc-dev-guide/src/about-this-guide.md @@ -72,7 +72,6 @@ You might also find the following sites useful: - The [Rust reference][rr], even though it doesn't specifically talk about Rust's internals, is a great resource nonetheless - Although out of date, [Tom Lee's great blog article][tlgba] is very helpful -- [rustaceans.org][ro] is helpful, but mostly dedicated to IRC - The [Rust Compiler Testing Docs][rctd] - For [@bors], [this cheat sheet][cheatsheet] is helpful - Google is always helpful when programming. From 62102ee041d0681e507e16c6d17ff5798b3a9d9f Mon Sep 17 00:00:00 2001 From: Boxy Date: Tue, 28 Jan 2025 11:57:04 +0000 Subject: [PATCH 11/21] Preparing for merge from rustc --- src/doc/rustc-dev-guide/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doc/rustc-dev-guide/rust-version b/src/doc/rustc-dev-guide/rust-version index 9693bfd63e84d..183d26b293842 100644 --- a/src/doc/rustc-dev-guide/rust-version +++ b/src/doc/rustc-dev-guide/rust-version @@ -1 +1 @@ -ecda83b30f0f68cf5692855dddc0bc38ee8863fc +66d6064f9eb888018775e08f84747ee6f39ba28e From 5861aa21d9acfae6e83acb79081b81c7a7c4994f Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 28 Jan 2025 11:04:26 -0700 Subject: [PATCH 12/21] Add some extra pointers for rustdoc frontend devs --- src/doc/rustc-dev-guide/src/rustdoc.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/doc/rustc-dev-guide/src/rustdoc.md b/src/doc/rustc-dev-guide/src/rustdoc.md index 3867d2489886d..2a0e212f98e31 100644 --- a/src/doc/rustc-dev-guide/src/rustdoc.md +++ b/src/doc/rustc-dev-guide/src/rustdoc.md @@ -58,10 +58,13 @@ does is call the `main()` that's in this crate's `lib.rs`, though.) * If you want to copy those docs to a webserver, copy all of `build/host/doc`, since that's where the CSS, JS, fonts, and landing page are. + * For frontend debugging, disable the `rust.docs-minification` option in [`config.toml`]. * Use `./x test tests/rustdoc*` to run the tests using a stage1 rustdoc. * See [Rustdoc internals] for more information about tests. +[`config.toml`]: ./building/how-to-build-and-run.md + ## Code structure * All paths in this section are relative to `src/librustdoc` in the rust-lang/rust repository. @@ -77,6 +80,7 @@ does is call the `main()` that's in this crate's `lib.rs`, though.) * The tests on the structure of rustdoc HTML output are located in `tests/rustdoc`, where they're handled by the test runner of bootstrap and the supplementary script `src/etc/htmldocck.py`. +* Frontend CSS and JavaScript are stored in `html/static/`. ## Tests @@ -91,6 +95,11 @@ does is call the `main()` that's in this crate's `lib.rs`, though.) browser-UI-test](https://github.com/GuillaumeGomez/browser-UI-test/) that uses puppeteer to run tests in a headless browser and check rendering and interactivity. +* Additionally, JavaScript type annotations are written using [TypeScript-flavored JSDoc] + comments and an external d.ts file. The code itself is plain, valid JavaScript; we only + use tsc as a linter. + +[TypeScript-flavored JSDoc]: https://www.typescriptlang.org/docs/handbook/jsdoc-supported-types.html ## Constraints From 6763561161dff2419ffe9795ec8bc40671c16e9e Mon Sep 17 00:00:00 2001 From: Bart Jacobs Date: Tue, 28 Jan 2025 21:42:51 +0100 Subject: [PATCH 13/21] btree/node.rs: remove incorrect comment from pop_internal_level docs --- library/alloc/src/collections/btree/node.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/library/alloc/src/collections/btree/node.rs b/library/alloc/src/collections/btree/node.rs index 6815ac1c19305..2525f5856cd21 100644 --- a/library/alloc/src/collections/btree/node.rs +++ b/library/alloc/src/collections/btree/node.rs @@ -600,9 +600,6 @@ impl NodeRef { /// no cleanup is done on any of the keys, values and other children. /// This decreases the height by 1 and is the opposite of `push_internal_level`. /// - /// Requires exclusive access to the `NodeRef` object but not to the root node; - /// it will not invalidate other handles or references to the root node. - /// /// Panics if there is no internal level, i.e., if the root node is a leaf. pub(super) fn pop_internal_level(&mut self, alloc: A) { assert!(self.height > 0); From 941ab8cd187f987242d2872e0b6dabbcca569fd1 Mon Sep 17 00:00:00 2001 From: yegeunyang Date: Tue, 28 Jan 2025 20:42:26 -0600 Subject: [PATCH 14/21] Add "Writing tests" section --- src/doc/rustc-dev-guide/src/getting-started.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/doc/rustc-dev-guide/src/getting-started.md b/src/doc/rustc-dev-guide/src/getting-started.md index 03d2811e8b108..d7797d2e4bc08 100644 --- a/src/doc/rustc-dev-guide/src/getting-started.md +++ b/src/doc/rustc-dev-guide/src/getting-started.md @@ -137,6 +137,10 @@ pull request, continuing the work on the feature. [abandoned-prs]: https://github.com/rust-lang/rust/pulls?q=is%3Apr+label%3AS-inactive+is%3Aclosed +### Writing tests + +Issues that have been resolved but do not have a regression test are marked with the `E-needs-test` label. Writing unit tests is a low-risk, lower-priority task that offers new contributors a great opportunity to familiarize themselves with the codebase. + ### Contributing to std (standard library) See [std-dev-guide](https://std-dev-guide.rust-lang.org/). From 0127e6479fa86a371e53412c4c46237877c81682 Mon Sep 17 00:00:00 2001 From: yegeunyang Date: Tue, 28 Jan 2025 21:05:12 -0600 Subject: [PATCH 15/21] Add link to declare_lint! macro --- src/doc/rustc-dev-guide/src/diagnostics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doc/rustc-dev-guide/src/diagnostics.md b/src/doc/rustc-dev-guide/src/diagnostics.md index 709e9d4f88935..8f389640d271f 100644 --- a/src/doc/rustc-dev-guide/src/diagnostics.md +++ b/src/doc/rustc-dev-guide/src/diagnostics.md @@ -602,7 +602,7 @@ as the linter walks the AST. You can then choose to emit lints in a very similar way to compile errors. You also declare the metadata of a particular lint via the `declare_lint!` -macro. This includes the name, the default level, a short description, and some +macro. [This macro](https://doc.rust-lang.org/nightly/nightly-rustc/rustc_lint_defs/macro.declare_lint.html) includes the name, the default level, a short description, and some more details. Note that the lint and the lint pass must be registered with the compiler. From 4260e3a5b89de914567ee0689e87ffd7da400538 Mon Sep 17 00:00:00 2001 From: yegeunyang Date: Tue, 28 Jan 2025 22:56:52 -0600 Subject: [PATCH 16/21] Touch up a sentence --- src/doc/rustc-dev-guide/src/getting-started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doc/rustc-dev-guide/src/getting-started.md b/src/doc/rustc-dev-guide/src/getting-started.md index d7797d2e4bc08..4cb1d0b31ebd2 100644 --- a/src/doc/rustc-dev-guide/src/getting-started.md +++ b/src/doc/rustc-dev-guide/src/getting-started.md @@ -139,7 +139,7 @@ pull request, continuing the work on the feature. ### Writing tests -Issues that have been resolved but do not have a regression test are marked with the `E-needs-test` label. Writing unit tests is a low-risk, lower-priority task that offers new contributors a great opportunity to familiarize themselves with the codebase. +Issues that have been resolved but do not have a regression test are marked with the `E-needs-test` label. Writing unit tests is a low-risk, lower-priority task that offers new contributors a great opportunity to familiarize themselves with the testing infrastructure and contribution workflow. ### Contributing to std (standard library) From 810e4c1bc62a4801a4c29fe6c975630acbd78370 Mon Sep 17 00:00:00 2001 From: Bart Jacobs Date: Wed, 29 Jan 2025 08:35:29 +0100 Subject: [PATCH 17/21] btree/node.rs: pop_internal_level: does not invalidate other handles --- library/alloc/src/collections/btree/node.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/library/alloc/src/collections/btree/node.rs b/library/alloc/src/collections/btree/node.rs index 2525f5856cd21..37f784a322cad 100644 --- a/library/alloc/src/collections/btree/node.rs +++ b/library/alloc/src/collections/btree/node.rs @@ -600,6 +600,9 @@ impl NodeRef { /// no cleanup is done on any of the keys, values and other children. /// This decreases the height by 1 and is the opposite of `push_internal_level`. /// + /// Does not invalidate any handles or references pointing into the subtree + /// rooted at the first child of `self`. + /// /// Panics if there is no internal level, i.e., if the root node is a leaf. pub(super) fn pop_internal_level(&mut self, alloc: A) { assert!(self.height > 0); From 2f276b36e203c05b3c38b427d6e38219ae6a2cf1 Mon Sep 17 00:00:00 2001 From: Santiago Pastorino Date: Wed, 29 Jan 2025 13:27:41 -0300 Subject: [PATCH 18/21] spastorino back from vacations --- triagebot.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/triagebot.toml b/triagebot.toml index 4a09fe116a572..6beaae2b9b742 100644 --- a/triagebot.toml +++ b/triagebot.toml @@ -1023,7 +1023,6 @@ contributing_url = "https://rustc-dev-guide.rust-lang.org/getting-started.html" users_on_vacation = [ "jyn514", "nnethercote", - "spastorino", "workingjubilee", "kobzol" ] From 311c3b71f0972e144cac91679a08906875c8af3f Mon Sep 17 00:00:00 2001 From: edwloef Date: Mon, 27 Jan 2025 16:35:15 +0100 Subject: [PATCH 19/21] split slice::ptr_rotate into three separate algorithms, to hopefully help inlining --- library/core/src/slice/rotate.rs | 369 ++++++++++++++++--------------- 1 file changed, 195 insertions(+), 174 deletions(-) diff --git a/library/core/src/slice/rotate.rs b/library/core/src/slice/rotate.rs index 20833dc31aa2b..3e88978b78105 100644 --- a/library/core/src/slice/rotate.rs +++ b/library/core/src/slice/rotate.rs @@ -1,6 +1,8 @@ use crate::mem::{self, MaybeUninit, SizedTypeProperties}; use crate::{cmp, ptr}; +type BufType = [usize; 32]; + /// Rotates the range `[mid-left, mid+right)` such that the element at `mid` becomes the first /// element. Equivalently, rotates the range `left` elements to the left or `right` elements to the /// right. @@ -8,17 +10,76 @@ use crate::{cmp, ptr}; /// # Safety /// /// The specified range must be valid for reading and writing. -/// -/// # Algorithm -/// +pub(super) unsafe fn ptr_rotate(left: usize, mid: *mut T, right: usize) { + if T::IS_ZST { + return; + } + // abort early if the rotate is a no-op + if (left == 0) || (right == 0) { + return; + } + // `T` is not a zero-sized type, so it's okay to divide by its size. + if !cfg!(feature = "optimize_for_size") + && cmp::min(left, right) <= mem::size_of::() / mem::size_of::() + { + // SAFETY: guaranteed by the caller + unsafe { ptr_rotate_memmove(left, mid, right) }; + } else if !cfg!(feature = "optimize_for_size") + && ((left + right < 24) || (mem::size_of::() > mem::size_of::<[usize; 4]>())) + { + // SAFETY: guaranteed by the caller + unsafe { ptr_rotate_gcd(left, mid, right) } + } else { + // SAFETY: guaranteed by the caller + unsafe { ptr_rotate_swap(left, mid, right) } + } +} + /// Algorithm 1 is used if `min(left, right)` is small enough to fit onto a stack buffer. The /// `min(left, right)` elements are copied onto the buffer, `memmove` is applied to the others, and /// the ones on the buffer are moved back into the hole on the opposite side of where they /// originated. /// -/// Algorithms that can be vectorized outperform the above once `left + right` becomes large enough. +/// # Safety /// -/// Algorithm 2 is otherwise used for small values of `left + right` or for large `T`. The elements +/// The specified range must be valid for reading and writing. +unsafe fn ptr_rotate_memmove(left: usize, mid: *mut T, right: usize) { + // The `[T; 0]` here is to ensure this is appropriately aligned for T + let mut rawarray = MaybeUninit::<(BufType, [T; 0])>::uninit(); + let buf = rawarray.as_mut_ptr() as *mut T; + // SAFETY: `mid-left <= mid-left+right < mid+right` + let dim = unsafe { mid.sub(left).add(right) }; + if left <= right { + // SAFETY: + // + // 1) The `if` condition about the sizes ensures `[mid-left; left]` will fit in + // `buf` without overflow and `buf` was created just above and so cannot be + // overlapped with any value of `[mid-left; left]` + // 2) [mid-left, mid+right) are all valid for reading and writing and we don't care + // about overlaps here. + // 3) The `if` condition about `left <= right` ensures writing `left` elements to + // `dim = mid-left+right` is valid because: + // - `buf` is valid and `left` elements were written in it in 1) + // - `dim+left = mid-left+right+left = mid+right` and we write `[dim, dim+left)` + unsafe { + // 1) + ptr::copy_nonoverlapping(mid.sub(left), buf, left); + // 2) + ptr::copy(mid, mid.sub(left), right); + // 3) + ptr::copy_nonoverlapping(buf, dim, left); + } + } else { + // SAFETY: same reasoning as above but with `left` and `right` reversed + unsafe { + ptr::copy_nonoverlapping(mid, buf, right); + ptr::copy(mid.sub(left), dim, left); + ptr::copy_nonoverlapping(buf, mid.sub(left), right); + } + } +} + +/// Algorithm 2 is used for small values of `left + right` or for large `T`. The elements /// are moved into their final positions one at a time starting at `mid - left` and advancing by /// `right` steps modulo `left + right`, such that only one temporary is needed. Eventually, we /// arrive back at `mid - left`. However, if `gcd(left + right, right)` is not 1, the above steps @@ -48,195 +109,155 @@ use crate::{cmp, ptr}; /// /// Algorithm 2 can be vectorized by chunking and performing many rounds at once, but there are too /// few rounds on average until `left + right` is enormous, and the worst case of a single -/// round is always there. Instead, algorithm 3 utilizes repeated swapping of -/// `min(left, right)` elements until a smaller rotate problem is left. +/// round is always there. /// -/// ```text -/// left = 11, right = 4 -/// [4 5 6 7 8 9 10 11 12 13 14 . 0 1 2 3] -/// ^ ^ ^ ^ ^ ^ ^ ^ swapping the right most elements with elements to the left -/// [4 5 6 7 8 9 10 . 0 1 2 3] 11 12 13 14 -/// ^ ^ ^ ^ ^ ^ ^ ^ swapping these -/// [4 5 6 . 0 1 2 3] 7 8 9 10 11 12 13 14 -/// we cannot swap any more, but a smaller rotation problem is left to solve -/// ``` -/// when `left < right` the swapping happens from the left instead. -pub(super) unsafe fn ptr_rotate(mut left: usize, mut mid: *mut T, mut right: usize) { - type BufType = [usize; 32]; - if T::IS_ZST { - return; - } - // N.B. the below algorithms can fail if these cases are not checked - if (right == 0) || (left == 0) { - return; - } - // `T` is not a zero-sized type, so it's okay to divide by its size. - if !cfg!(feature = "optimize_for_size") - && cmp::min(left, right) <= mem::size_of::() / mem::size_of::() - { - // Algorithm 1 - // The `[T; 0]` here is to ensure this is appropriately aligned for T - let mut rawarray = MaybeUninit::<(BufType, [T; 0])>::uninit(); - let buf = rawarray.as_mut_ptr() as *mut T; - // SAFETY: `mid-left <= mid-left+right < mid+right` - let dim = unsafe { mid.sub(left).add(right) }; - if left <= right { - // SAFETY: - // - // 1) The `if` condition about the sizes ensures `[mid-left; left]` will fit in - // `buf` without overflow and `buf` was created just above and so cannot be - // overlapped with any value of `[mid-left; left]` - // 2) [mid-left, mid+right) are all valid for reading and writing and we don't care - // about overlaps here. - // 3) The `if` condition about `left <= right` ensures writing `left` elements to - // `dim = mid-left+right` is valid because: - // - `buf` is valid and `left` elements were written in it in 1) - // - `dim+left = mid-left+right+left = mid+right` and we write `[dim, dim+left)` - unsafe { - // 1) - ptr::copy_nonoverlapping(mid.sub(left), buf, left); - // 2) - ptr::copy(mid, mid.sub(left), right); - // 3) - ptr::copy_nonoverlapping(buf, dim, left); +/// # Safety +/// +/// The specified range must be valid for reading and writing. +unsafe fn ptr_rotate_gcd(left: usize, mid: *mut T, right: usize) { + // Algorithm 2 + // Microbenchmarks indicate that the average performance for random shifts is better all + // the way until about `left + right == 32`, but the worst case performance breaks even + // around 16. 24 was chosen as middle ground. If the size of `T` is larger than 4 + // `usize`s, this algorithm also outperforms other algorithms. + // SAFETY: callers must ensure `mid - left` is valid for reading and writing. + let x = unsafe { mid.sub(left) }; + // beginning of first round + // SAFETY: see previous comment. + let mut tmp: T = unsafe { x.read() }; + let mut i = right; + // `gcd` can be found before hand by calculating `gcd(left + right, right)`, + // but it is faster to do one loop which calculates the gcd as a side effect, then + // doing the rest of the chunk + let mut gcd = right; + // benchmarks reveal that it is faster to swap temporaries all the way through instead + // of reading one temporary once, copying backwards, and then writing that temporary at + // the very end. This is possibly due to the fact that swapping or replacing temporaries + // uses only one memory address in the loop instead of needing to manage two. + loop { + // [long-safety-expl] + // SAFETY: callers must ensure `[left, left+mid+right)` are all valid for reading and + // writing. + // + // - `i` start with `right` so `mid-left <= x+i = x+right = mid-left+right < mid+right` + // - `i <= left+right-1` is always true + // - if `i < left`, `right` is added so `i < left+right` and on the next + // iteration `left` is removed from `i` so it doesn't go further + // - if `i >= left`, `left` is removed immediately and so it doesn't go further. + // - overflows cannot happen for `i` since the function's safety contract ask for + // `mid+right-1 = x+left+right` to be valid for writing + // - underflows cannot happen because `i` must be bigger or equal to `left` for + // a subtraction of `left` to happen. + // + // So `x+i` is valid for reading and writing if the caller respected the contract + tmp = unsafe { x.add(i).replace(tmp) }; + // instead of incrementing `i` and then checking if it is outside the bounds, we + // check if `i` will go outside the bounds on the next increment. This prevents + // any wrapping of pointers or `usize`. + if i >= left { + i -= left; + if i == 0 { + // end of first round + // SAFETY: tmp has been read from a valid source and x is valid for writing + // according to the caller. + unsafe { x.write(tmp) }; + break; } - } else { - // SAFETY: same reasoning as above but with `left` and `right` reversed - unsafe { - ptr::copy_nonoverlapping(mid, buf, right); - ptr::copy(mid.sub(left), dim, left); - ptr::copy_nonoverlapping(buf, mid.sub(left), right); + // this conditional must be here if `left + right >= 15` + if i < gcd { + gcd = i; } + } else { + i += right; } - } else if !cfg!(feature = "optimize_for_size") - && ((left + right < 24) || (mem::size_of::() > mem::size_of::<[usize; 4]>())) - { - // Algorithm 2 - // Microbenchmarks indicate that the average performance for random shifts is better all - // the way until about `left + right == 32`, but the worst case performance breaks even - // around 16. 24 was chosen as middle ground. If the size of `T` is larger than 4 - // `usize`s, this algorithm also outperforms other algorithms. - // SAFETY: callers must ensure `mid - left` is valid for reading and writing. - let x = unsafe { mid.sub(left) }; - // beginning of first round - // SAFETY: see previous comment. - let mut tmp: T = unsafe { x.read() }; - let mut i = right; - // `gcd` can be found before hand by calculating `gcd(left + right, right)`, - // but it is faster to do one loop which calculates the gcd as a side effect, then - // doing the rest of the chunk - let mut gcd = right; - // benchmarks reveal that it is faster to swap temporaries all the way through instead - // of reading one temporary once, copying backwards, and then writing that temporary at - // the very end. This is possibly due to the fact that swapping or replacing temporaries - // uses only one memory address in the loop instead of needing to manage two. + } + // finish the chunk with more rounds + for start in 1..gcd { + // SAFETY: `gcd` is at most equal to `right` so all values in `1..gcd` are valid for + // reading and writing as per the function's safety contract, see [long-safety-expl] + // above + tmp = unsafe { x.add(start).read() }; + // [safety-expl-addition] + // + // Here `start < gcd` so `start < right` so `i < right+right`: `right` being the + // greatest common divisor of `(left+right, right)` means that `left = right` so + // `i < left+right` so `x+i = mid-left+i` is always valid for reading and writing + // according to the function's safety contract. + i = start + right; loop { - // [long-safety-expl] - // SAFETY: callers must ensure `[left, left+mid+right)` are all valid for reading and - // writing. - // - // - `i` start with `right` so `mid-left <= x+i = x+right = mid-left+right < mid+right` - // - `i <= left+right-1` is always true - // - if `i < left`, `right` is added so `i < left+right` and on the next - // iteration `left` is removed from `i` so it doesn't go further - // - if `i >= left`, `left` is removed immediately and so it doesn't go further. - // - overflows cannot happen for `i` since the function's safety contract ask for - // `mid+right-1 = x+left+right` to be valid for writing - // - underflows cannot happen because `i` must be bigger or equal to `left` for - // a subtraction of `left` to happen. - // - // So `x+i` is valid for reading and writing if the caller respected the contract + // SAFETY: see [long-safety-expl] and [safety-expl-addition] tmp = unsafe { x.add(i).replace(tmp) }; - // instead of incrementing `i` and then checking if it is outside the bounds, we - // check if `i` will go outside the bounds on the next increment. This prevents - // any wrapping of pointers or `usize`. if i >= left { i -= left; - if i == 0 { - // end of first round - // SAFETY: tmp has been read from a valid source and x is valid for writing - // according to the caller. - unsafe { x.write(tmp) }; + if i == start { + // SAFETY: see [long-safety-expl] and [safety-expl-addition] + unsafe { x.add(start).write(tmp) }; break; } - // this conditional must be here if `left + right >= 15` - if i < gcd { - gcd = i; - } } else { i += right; } } - // finish the chunk with more rounds - for start in 1..gcd { - // SAFETY: `gcd` is at most equal to `right` so all values in `1..gcd` are valid for - // reading and writing as per the function's safety contract, see [long-safety-expl] - // above - tmp = unsafe { x.add(start).read() }; - // [safety-expl-addition] - // - // Here `start < gcd` so `start < right` so `i < right+right`: `right` being the - // greatest common divisor of `(left+right, right)` means that `left = right` so - // `i < left+right` so `x+i = mid-left+i` is always valid for reading and writing - // according to the function's safety contract. - i = start + right; + } +} + +/// Algorithm 3 utilizes repeated swapping of `min(left, right)` elements. +/// +/// /// +/// ```text +/// left = 11, right = 4 +/// [4 5 6 7 8 9 10 11 12 13 14 . 0 1 2 3] +/// ^ ^ ^ ^ ^ ^ ^ ^ swapping the right most elements with elements to the left +/// [4 5 6 7 8 9 10 . 0 1 2 3] 11 12 13 14 +/// ^ ^ ^ ^ ^ ^ ^ ^ swapping these +/// [4 5 6 . 0 1 2 3] 7 8 9 10 11 12 13 14 +/// we cannot swap any more, but a smaller rotation problem is left to solve +/// ``` +/// when `left < right` the swapping happens from the left instead. +/// +/// # Safety +/// +/// The specified range must be valid for reading and writing. +unsafe fn ptr_rotate_swap(mut left: usize, mut mid: *mut T, mut right: usize) { + loop { + if left >= right { + // Algorithm 3 + // There is an alternate way of swapping that involves finding where the last swap + // of this algorithm would be, and swapping using that last chunk instead of swapping + // adjacent chunks like this algorithm is doing, but this way is still faster. loop { - // SAFETY: see [long-safety-expl] and [safety-expl-addition] - tmp = unsafe { x.add(i).replace(tmp) }; - if i >= left { - i -= left; - if i == start { - // SAFETY: see [long-safety-expl] and [safety-expl-addition] - unsafe { x.add(start).write(tmp) }; - break; - } - } else { - i += right; + // SAFETY: + // `left >= right` so `[mid-right, mid+right)` is valid for reading and writing + // Subtracting `right` from `mid` each turn is counterbalanced by the addition and + // check after it. + unsafe { + ptr::swap_nonoverlapping(mid.sub(right), mid, right); + mid = mid.sub(right); + } + left -= right; + if left < right { + break; } } - } - } else { - loop { - if left >= right { - // Algorithm 3 - // There is an alternate way of swapping that involves finding where the last swap - // of this algorithm would be, and swapping using that last chunk instead of swapping - // adjacent chunks like this algorithm is doing, but this way is still faster. - loop { - // SAFETY: - // `left >= right` so `[mid-right, mid+right)` is valid for reading and writing - // Subtracting `right` from `mid` each turn is counterbalanced by the addition and - // check after it. - unsafe { - ptr::swap_nonoverlapping(mid.sub(right), mid, right); - mid = mid.sub(right); - } - left -= right; - if left < right { - break; - } + } else { + // Algorithm 3, `left < right` + loop { + // SAFETY: `[mid-left, mid+left)` is valid for reading and writing because + // `left < right` so `mid+left < mid+right`. + // Adding `left` to `mid` each turn is counterbalanced by the subtraction and check + // after it. + unsafe { + ptr::swap_nonoverlapping(mid.sub(left), mid, left); + mid = mid.add(left); } - } else { - // Algorithm 3, `left < right` - loop { - // SAFETY: `[mid-left, mid+left)` is valid for reading and writing because - // `left < right` so `mid+left < mid+right`. - // Adding `left` to `mid` each turn is counterbalanced by the subtraction and check - // after it. - unsafe { - ptr::swap_nonoverlapping(mid.sub(left), mid, left); - mid = mid.add(left); - } - right -= left; - if right < left { - break; - } + right -= left; + if right < left { + break; } } - - if (right == 0) || (left == 0) { - return; - } + } + if (right == 0) || (left == 0) { + return; } } } From fb3d1d0c4bcd1b744c8ef23ba977bad9fcd43849 Mon Sep 17 00:00:00 2001 From: edwloef Date: Tue, 28 Jan 2025 12:26:32 +0100 Subject: [PATCH 20/21] add inline attribute and codegen test --- library/core/src/slice/rotate.rs | 4 +++ .../codegen/lib-optimizations/slice_rotate.rs | 30 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 tests/codegen/lib-optimizations/slice_rotate.rs diff --git a/library/core/src/slice/rotate.rs b/library/core/src/slice/rotate.rs index 3e88978b78105..5d5ee4c7b6240 100644 --- a/library/core/src/slice/rotate.rs +++ b/library/core/src/slice/rotate.rs @@ -10,6 +10,7 @@ type BufType = [usize; 32]; /// # Safety /// /// The specified range must be valid for reading and writing. +#[inline] pub(super) unsafe fn ptr_rotate(left: usize, mid: *mut T, right: usize) { if T::IS_ZST { return; @@ -43,6 +44,7 @@ pub(super) unsafe fn ptr_rotate(left: usize, mid: *mut T, right: usize) { /// # Safety /// /// The specified range must be valid for reading and writing. +#[inline] unsafe fn ptr_rotate_memmove(left: usize, mid: *mut T, right: usize) { // The `[T; 0]` here is to ensure this is appropriately aligned for T let mut rawarray = MaybeUninit::<(BufType, [T; 0])>::uninit(); @@ -114,6 +116,7 @@ unsafe fn ptr_rotate_memmove(left: usize, mid: *mut T, right: usize) { /// # Safety /// /// The specified range must be valid for reading and writing. +#[inline] unsafe fn ptr_rotate_gcd(left: usize, mid: *mut T, right: usize) { // Algorithm 2 // Microbenchmarks indicate that the average performance for random shifts is better all @@ -218,6 +221,7 @@ unsafe fn ptr_rotate_gcd(left: usize, mid: *mut T, right: usize) { /// # Safety /// /// The specified range must be valid for reading and writing. +#[inline] unsafe fn ptr_rotate_swap(mut left: usize, mut mid: *mut T, mut right: usize) { loop { if left >= right { diff --git a/tests/codegen/lib-optimizations/slice_rotate.rs b/tests/codegen/lib-optimizations/slice_rotate.rs new file mode 100644 index 0000000000000..d0a7b328d1845 --- /dev/null +++ b/tests/codegen/lib-optimizations/slice_rotate.rs @@ -0,0 +1,30 @@ +//@ compile-flags: -O + +#![crate_type = "lib"] + +// Ensure that the simple case of rotating by a constant 1 optimizes to the obvious thing + +// CHECK-LABEL: @rotate_left_by_one +#[no_mangle] +pub fn rotate_left_by_one(slice: &mut [i32]) { + // CHECK-NOT: phi + // CHECK-NOT: call + // CHECK-NOT: load + // CHECK-NOT: store + // CHECK-NOT: getelementptr + // CHECK: %[[END:.+]] = getelementptr + // CHECK-NEXT: %[[DIM:.+]] = getelementptr + // CHECK-NEXT: %[[LAST:.+]] = load + // CHECK-NEXT: %[[FIRST:.+]] = shl + // CHECK-NEXT: call void @llvm.memmove + // CHECK-NEXT: store i32 %[[LAST]], ptr %[[DIM:.+]] + // CHECK-NOT: phi + // CHECK-NOT: call + // CHECK-NOT: load + // CHECK-NOT: store + // CHECK-NOT: getelementptr + // CHECK: ret void + if !slice.is_empty() { + slice.rotate_left(1); + } +} From 52519e145ef3496781fa049ea0edbe44a1bc206c Mon Sep 17 00:00:00 2001 From: Marijn Schouten Date: Wed, 29 Jan 2025 19:47:59 +0100 Subject: [PATCH 21/21] Cleanup docs for Allocator --- library/core/src/alloc/mod.rs | 58 ++++++++++++++++------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/library/core/src/alloc/mod.rs b/library/core/src/alloc/mod.rs index aa841db045ce7..dcab6136ae8a1 100644 --- a/library/core/src/alloc/mod.rs +++ b/library/core/src/alloc/mod.rs @@ -49,26 +49,26 @@ impl fmt::Display for AllocError { /// An implementation of `Allocator` can allocate, grow, shrink, and deallocate arbitrary blocks of /// data described via [`Layout`][]. /// -/// `Allocator` is designed to be implemented on ZSTs, references, or smart pointers because having -/// an allocator like `MyAlloc([u8; N])` cannot be moved, without updating the pointers to the +/// `Allocator` is designed to be implemented on ZSTs, references, or smart pointers. +/// An allocator for `MyAlloc([u8; N])` cannot be moved, without updating the pointers to the /// allocated memory. /// -/// Unlike [`GlobalAlloc`][], zero-sized allocations are allowed in `Allocator`. If an underlying -/// allocator does not support this (like jemalloc) or return a null pointer (such as -/// `libc::malloc`), this must be caught by the implementation. +/// In contrast to [`GlobalAlloc`][], `Allocator` allows zero-sized allocations. If an underlying +/// allocator does not support this (like jemalloc) or responds by returning a null pointer +/// (such as `libc::malloc`), this must be caught by the implementation. /// /// ### Currently allocated memory /// -/// Some of the methods require that a memory block be *currently allocated* via an allocator. This -/// means that: +/// Some of the methods require that a memory block is *currently allocated* by an allocator. +/// This means that: +/// * the starting address for that memory block was previously +/// returned by [`allocate`], [`grow`], or [`shrink`], and +/// * the memory block has not subsequently been deallocated. /// -/// * the starting address for that memory block was previously returned by [`allocate`], [`grow`], or -/// [`shrink`], and -/// -/// * the memory block has not been subsequently deallocated, where blocks are either deallocated -/// directly by being passed to [`deallocate`] or were changed by being passed to [`grow`] or -/// [`shrink`] that returns `Ok`. If `grow` or `shrink` have returned `Err`, the passed pointer -/// remains valid. +/// A memory block is deallocated by a call to [`deallocate`], +/// or by a call to [`grow`] or [`shrink`] that returns `Ok`. +/// A call to `grow` or `shrink` that returns `Err`, +/// does not deallocate the memory block passed to it. /// /// [`allocate`]: Allocator::allocate /// [`grow`]: Allocator::grow @@ -77,32 +77,28 @@ impl fmt::Display for AllocError { /// /// ### Memory fitting /// -/// Some of the methods require that a layout *fit* a memory block. What it means for a layout to -/// "fit" a memory block means (or equivalently, for a memory block to "fit" a layout) is that the +/// Some of the methods require that a `layout` *fit* a memory block or vice versa. This means that the /// following conditions must hold: -/// -/// * The block must be allocated with the same alignment as [`layout.align()`], and -/// -/// * The provided [`layout.size()`] must fall in the range `min ..= max`, where: -/// - `min` is the size of the layout most recently used to allocate the block, and -/// - `max` is the latest actual size returned from [`allocate`], [`grow`], or [`shrink`]. +/// * the memory block must be *currently allocated* with alignment of [`layout.align()`], and +/// * [`layout.size()`] must fall in the range `min ..= max`, where: +/// - `min` is the size of the layout used to allocate the block, and +/// - `max` is the actual size returned from [`allocate`], [`grow`], or [`shrink`]. /// /// [`layout.align()`]: Layout::align /// [`layout.size()`]: Layout::size /// /// # Safety /// -/// * Memory blocks returned from an allocator that are [*currently allocated*] must point to -/// valid memory and retain their validity while they are [*currently allocated*] and the shorter -/// of: -/// - the borrow-checker lifetime of the allocator type itself. -/// - as long as at least one of the instance and all of its clones has not been dropped. +/// Memory blocks that are [*currently allocated*] by an allocator, +/// must point to valid memory, and retain their validity while until either: +/// - the memory block is deallocated, or +/// - the allocator is dropped. /// -/// * copying, cloning, or moving the allocator must not invalidate memory blocks returned from this -/// allocator. A copied or cloned allocator must behave like the same allocator, and +/// Copying, cloning, or moving the allocator must not invalidate memory blocks returned from it +/// A copied or cloned allocator must behave like the original allocator. /// -/// * any pointer to a memory block which is [*currently allocated*] may be passed to any other -/// method of the allocator. +/// A memory block which is [*currently allocated*] may be passed to +/// any method of the allocator that accepts such an argument. /// /// [*currently allocated*]: #currently-allocated-memory #[unstable(feature = "allocator_api", issue = "32838")]