-
-
Save wolfspider/28e940647eed1385d38bb972f54e8b2e to your computer and use it in GitHub Desktop.
#include <stdlib.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
// Define the list structure | |
typedef struct Prims_list__uint32_t { | |
enum { Prims_Cons, Prims_Nil, Prims_Null } tag; | |
union { | |
struct { | |
struct Prims_list__uint32_t *tl; | |
uint32_t hd[1]; | |
}; | |
struct { | |
struct Prims_list__uint32_t *ntl; | |
uint32_t nd[]; | |
}; | |
}; | |
} Prims_list__uint32_t; | |
// Function to create a new Cons node | |
Prims_list__uint32_t *cons(uint32_t hd, Prims_list__uint32_t *tl) { | |
Prims_list__uint32_t *node = (Prims_list__uint32_t *)malloc(sizeof(Prims_list__uint32_t)); | |
node->tag = Prims_Cons; | |
node->hd[1] = hd; | |
node->tl = tl; | |
return node; | |
} | |
Prims_list__uint32_t *ncons(Prims_list__uint32_t *tl) { | |
Prims_list__uint32_t *node = (Prims_list__uint32_t *)malloc(sizeof(Prims_list__uint32_t)); | |
node->tag = Prims_Null; | |
node->ntl = tl; | |
return node; | |
} | |
// Function to create a new Nil node | |
Prims_list__uint32_t *nil() { | |
Prims_list__uint32_t *node = (Prims_list__uint32_t *)malloc(sizeof(Prims_list__uint32_t)); | |
node->tag = Prims_Nil; | |
node->tl = NULL; | |
return node; | |
} | |
// Deep copy function | |
Prims_list__uint32_t *deep_copy_list(Prims_list__uint32_t *list) { | |
if (list == NULL) { | |
return NULL; | |
} | |
if (list->tag == Prims_Nil) { | |
return nil(); | |
} | |
if (list->tag == Prims_Null) { | |
Prims_list__uint32_t *new_list = ncons(NULL); | |
new_list->tl = deep_copy_list(list->ntl); | |
return new_list; | |
} | |
// Create a new node and copy the head value | |
Prims_list__uint32_t *new_list = cons(list->hd[1], NULL); | |
// Recursively copy the rest of the list | |
new_list->tl = deep_copy_list(list->tl); | |
return new_list; | |
} | |
// Free list function for cleanup | |
void free_list(Prims_list__uint32_t *list) { | |
if(list->tag == Prims_Null) { | |
while (list != NULL) { | |
Prims_list__uint32_t *next = list->ntl; | |
free(list); | |
list = next; | |
} | |
} | |
else { | |
while (list != NULL) { | |
Prims_list__uint32_t *next = list->tl; | |
free(list); | |
list = next; | |
} | |
} | |
} | |
// Print list function | |
void print_list(Prims_list__uint32_t *list) { | |
printf("["); | |
while (list != NULL) { | |
if (list->tag == Prims_Cons) { | |
printf("%u", list->hd[1]); | |
list = list->tl; | |
if (list != NULL) { | |
printf(" -> "); | |
} | |
} else if (list->tag == Prims_Nil) { | |
printf("Nil"); | |
list = list->tl; | |
if (list != NULL) { | |
printf(" -> "); | |
} | |
} else if (list->tag == Prims_Null) { | |
printf("Null"); | |
list = list->ntl; | |
if (list != NULL) { | |
printf(" -> "); | |
} | |
} | |
} | |
printf("]\n"); | |
} | |
// Test the deep copy function | |
int main() { | |
// Create an example list: 0 -> 1 -> Null -> 1 -> 0 -> Nil | |
Prims_list__uint32_t *original_list = cons(0, cons(1, ncons(cons(1, cons(0, nil()))))); | |
// Print the original list | |
printf("Original list: "); | |
print_list(original_list); | |
// Deep copy the list | |
Prims_list__uint32_t *copied_list = deep_copy_list(original_list); | |
// Print the copied list | |
printf("Copied list: "); | |
print_list(copied_list); | |
// Free the original list | |
free_list(original_list); | |
// Free the copied list | |
free_list(copied_list); | |
return 0; | |
} |
After printf is removed, no you cannot beat the compiler but you can come pretty damn close. The C version was lowered to LLVM IR at O3 and raised back into C++. O3 becomes "pinned" at this point with only very basic headers used. Formal verification with Low* was used to model the C memory semantics. The resurrected Julia C LLVM backend was used to transpile the LLVM IR back into C which was then raised to C++.
Low* generated example is as follows:
/*
This file was generated by KaRaMeL <https://github.com/FStarLang/karamel>
KaRaMeL invocation: ../_build/default/src/Karamel.exe -warn-error @4 -verbose -skip-makefiles -tmpdir .output/GcTypes.out -no-prefix GcTypes -o .output/GcTypes.exe .output/FStar_Pervasives_Native.krml .output/FStar_Pervasives.krml .output/FStar_Float.krml .output/FStar_Mul.krml .output/FStar_Squash.krml .output/FStar_Classical.krml .output/FStar_Preorder.krml .output/FStar_Sealed.krml .output/FStar_Range.krml .output/FStar_Calc.krml .output/FStar_StrongExcludedMiddle.krml .output/FStar_Classical_Sugar.krml .output/FStar_List_Tot_Base.krml .output/FStar_List_Tot_Properties.krml .output/FStar_List_Tot.krml .output/FStar_Seq_Base.krml .output/FStar_Seq_Properties.krml .output/FStar_Seq.krml .output/FStar_Math_Lib.krml .output/FStar_Math_Lemmas.krml .output/FStar_BitVector.krml .output/FStar_UInt.krml .output/FStar_UInt32.krml .output/FStar_Char.krml .output/FStar_Pprint.krml .output/FStar_Issue.krml .output/FStar_TypeChecker_Core.krml .output/FStar_Errors_Msg.krml .output/FStar_Tactics_Common.krml .output/FStar_Reflection_Types.krml .output/FStar_Tactics_Types.krml .output/FStar_Tactics_Result.krml .output/FStar_Monotonic_Pure.krml .output/FStar_Tactics_Effect.krml .output/FStar_Tactics_Unseal.krml .output/FStar_Ghost.krml .output/FStar_Sealed_Inhabited.krml .output/FStar_Syntax_Syntax.krml .output/FStar_Reflection_V2_Data.krml .output/FStar_VConfig.krml .output/FStar_Order.krml .output/FStar_Reflection_V2_Builtins.krml .output/FStar_Reflection_Const.krml .output/FStar_Tactics_V2_Builtins.krml .output/FStar_FunctionalExtensionality.krml .output/FStar_Set.krml .output/FStar_ErasedLogic.krml .output/FStar_PropositionalExtensionality.krml .output/FStar_PredicateExtensionality.krml .output/FStar_TSet.krml .output/FStar_Monotonic_Heap.krml .output/FStar_Heap.krml .output/FStar_Map.krml .output/FStar_Monotonic_Witnessed.krml .output/FStar_Monotonic_HyperHeap.krml .output/FStar_Monotonic_HyperStack.krml .output/FStar_HyperStack.krml .output/FStar_HyperStack_ST.krml .output/FStar_Universe.krml .output/FStar_Tactics_SMT.krml .output/FStar_GSet.krml .output/FStar_ModifiesGen.krml .output/FStar_Reflection_TermEq.krml .output/FStar_Reflection_TermEq_Simple.krml .output/FStar_Tactics_Util.krml .output/FStar_Reflection_V2_Derived.krml .output/FStar_Reflection_V2_Derived_Lemmas.krml .output/FStar_Reflection_V2_Compare.krml .output/FStar_Reflection_V2.krml .output/FStar_Tactics_NamedView.krml .output/FStar_Reflection_V1_Data.krml .output/FStar_Reflection_V1_Builtins.krml .output/FStar_Tactics_V1_Builtins.krml .output/FStar_Tactics_Builtins.krml .output/FStar_Tactics_V2_SyntaxCoercions.krml .output/FStar_Tactics_Visit.krml .output/FStar_Tactics_V2_SyntaxHelpers.krml .output/FStar_Reflection_V2_Formula.krml .output/FStar_Tactics_V2_Derived.krml .output/FStar_Tactics_Typeclasses.krml .output/FStar_Tactics_MApply.krml .output/FStar_Tactics_Print.krml .output/FStar_IndefiniteDescription.krml .output/FStar_Tactics_V2_Logic.krml .output/FStar_Tactics_V2.krml .output/FStar_BigOps.krml .output/LowStar_Monotonic_Buffer.krml .output/LowStar_Buffer.krml .output/FStar_Int.krml .output/FStar_Int32.krml .output/FStar_UInt8.krml .output/C.krml .output/FStar_HyperStack_All.krml .output/Shift.krml .output/Rust2.krml .output/CheckedInt.krml .output/FStar_HyperStack_IO.krml .output/LowStar_Modifies.krml .output/FStar_Exn.krml .output/FStar_ST.krml .output/FStar_All.krml .output/FStar_List.krml .output/FStar_String.krml .output/FStar_UInt64.krml .output/FStar_UInt16.krml .output/FStar_Bytes.krml .output/TestKrmlBytes.krml .output/Rust3.krml .output/FStar_Int64.krml .output/FStar_Int128.krml .output/FStar_Int16.krml .output/FStar_Int8.krml .output/FStar_Int_Cast.krml .output/FStar_BV.krml .output/FStar_Reflection_V2_Arith.krml .output/FStar_Tactics_BV.krml .output/FStar_UInt128.krml .output/FStar_Integers.krml .output/C_String.krml .output/Wasm10.krml .output/FStar_Buffer.krml .output/TestLib.krml .output/Hoisting.krml .output/PolyComp.krml .output/Underspec.krml .output/FStar_Int_Cast_Full.krml .output/LowStar_ImmutableBuffer.krml .output/LowStar_Literal.krml .output/StringLit.krml .output/LowStar_Comment.krml .output/ExternalEq.krml .output/UnusedPoly.krml .output/Spec_Loops.krml .output/LowStar_BufferOps.krml .output/C_Loops.krml .output/WasmSupport.krml .output/GlobalInit2.krml .output/GlobalInit.krml .output/MonomorphizationSeparate1.krml .output/LowStar_Ignore.krml .output/Wasm2.krml .output/Abbrev.krml .output/Ghost2.krml .output/RingBuffer.krml .output/Substitute.krml .output/Unused_A.krml .output/Unused_B.krml .output/Abstract.krml .output/C_Failure.krml .output/InitializerLists.krml .output/Ctypes1.krml .output/Ctypes2.krml .output/BadMatch.krml .output/MemCpyModel.krml .output/FStar_SizeT.krml .output/LowStar_Failure.krml .output/LowStar_Lib_LinkedList.krml .output/LowStar_Lib_LinkedList2.krml .output/LowStar_Lib_AssocList.krml .output/Inline.krml .output/LinkedList4.krml .output/Ctypes3.krml .output/InlineTest.krml .output/StaticHeaderAPI.krml .output/StaticHeaderLib.krml .output/StaticHeader.krml .output/UnusedParameter.krml .output/ParamAbbrev.krml .output/Rust4.krml .output/AbstractStructParameter.krml .output/AbstractStructAbstract.krml .output/AbstractStruct2.krml .output/IfThenElse.krml .output/PrivateInclude1.krml .output/PrivateInclude2.krml .output/Server.krml .output/DepPair.krml .output/VariableMerge.krml .output/Wasm9.krml .output/ML16Externals.krml .output/TwoUnitsAreOne.krml .output/ForwardDecl.krml .output/C89.krml .output/NameCollisionHelper.krml .output/FunctionalEncoding.krml .output/MemCpy.krml .output/RecordTypingLimitation.krml .output/LowStar_Printf.krml .output/LowStar_ConstBuffer.krml .output/ConstBuffer.krml .output/Wasm8.krml .output/ProperErasure.krml .output/StructWithUnitIsUnit.krml .output/Failure.krml .output/MonomorphizationCrash.krml .output/LowStar_UninitializedBuffer.krml .output/UBuffer.krml .output/Ignore.krml .output/Ctypes4.krml .output/FStar_Endianness.krml .output/LowStar_Endianness.krml .output/DataTypesSimple.krml .output/FStar_Modifies.krml .output/Rust1.krml .output/Null.krml .output/MallocFree.krml .output/GcTypes.krml .output/C_Nullity.krml .output/Structs.krml .output/Tuples.krml .output/DataTypesMut.krml .output/Deref.krml .output/AbstractStruct.krml .output/Unsound.krml .output/EqB.krml .output/MonomorphizationSeparate2.krml .output/RecursivePoly.krml .output/Polymorphic.krml .output/LinkedList2.krml .output/BlitNull.krml .output/ML16.krml .output/IfDef.krml .output/Wasm5.krml .output/Ghost1.krml .output/TailCalls2.krml .output/Scope.krml .output/Strlen.krml .output/ColoredRegion.krml .output/EtaStruct.krml .output/NoExtract.krml .output/Flat.krml .output/Literal.krml .output/ExitCode.krml .output/TopLevelArray.krml .output/Fill.krml .output/NoShadow.krml .output/TailCalls.krml .output/Uu.krml .output/Failwith.krml .output/DataTypesEq.krml .output/EmptyStruct.krml .output/TestAlloca.krml .output/Vla.krml .output/Layered.krml .output/LinkedList1.krml .output/HigherOrder4.krml .output/NameCollision.krml .output/CustomEq.krml .output/Renaming.krml .output/Wasm3.krml .output/HigherOrder6.krml .output/Wireguard.krml .output/WasmTrap.krml .output/NoConstructor.krml .output/Parameterized.krml .output/PatternAny.krml .output/OpEq.krml .output/InlineNoExtract.krml .output/PushPop.krml .output/Wasm6.krml .output/OptimizedOption.krml .output/Structs2.krml .output/Loops.krml .output/Recursive.krml .output/Rust5.krml .output/WildCard.krml .output/HigherOrder5.krml .output/Wasm4.krml .output/Mini.krml .output/Rust6.krml .output/HigherOrder2.krml .output/Macro.krml .output/ExternErased.krml .output/DataTypes.krml .output/TotalLoops.krml .output/Intro.krml .output/Printf.krml .output/Endianness.krml .output/Wasm7.krml .output/HigherOrder.krml .output/Verbatim.krml .output/Library.krml .output/Attributes.krml .output/HigherOrder3.krml .output/Private.krml .output/MutualStruct.krml .output/IfDefKrml.krml .output/Const.krml .output/FunPtr.krml .output/Shifting.krml .output/SimpleWasm.krml .output/Wasm1.krml .output/MulDiv.krml .output/LinkedList3.krml .output/MatchNested.krml .output/MutRec.krml .output/Comment.krml -bundle GcTypes=WindowsHack,*
F* version: <unknown>
KaRaMeL version: 65aab550
*/
#include "GcTypes.h"
Prims_list__uint32_t *test(void)
{
Prims_list__uint32_t *buf = KRML_HOST_MALLOC(sizeof (Prims_list__uint32_t));
buf[0U] = ((Prims_list__uint32_t){ .tag = Prims_Nil });
Prims_list__uint32_t *buf0 = KRML_HOST_MALLOC(sizeof (Prims_list__uint32_t));
buf0[0U] = ((Prims_list__uint32_t){ .tag = Prims_Cons, .hd = 1U, .tl = buf });
Prims_list__uint32_t *buf1 = KRML_HOST_MALLOC(sizeof (Prims_list__uint32_t));
buf1[0U] = ((Prims_list__uint32_t){ .tag = Prims_Cons, .hd = 0U, .tl = buf0 });
return buf1;
}
int32_t main(void)
{
Prims_list__uint32_t *scrut = test();
if
(scrut->tag == Prims_Cons && scrut->tl->tag == Prims_Cons && scrut->tl->tl->tag == Prims_Nil)
{
uint32_t y = scrut->tl->hd;
uint32_t x = scrut->hd;
return (int32_t)(x + y - 1U);
}
else
return (int32_t)1U;
}
AI was then used to create a more workable example without Low* primitives and only internally defined ones. This "penguin in a blender" test seems to show that some pure code opts are possible to apply. AI seems to think the memory safe version is the following (and did a pretty good initial attempt IMO):
enum List {
Nil,
Cons(u32, Box<List>),
}
fn test() -> List {
List::Cons(0, Box::new(List::Cons(1, Box::new(List::Nil))))
}
fn main() {
match test() {
List::Cons(x, box List::Cons(y, box List::Nil)) => {
let result = x + y - 1;
println!("Result: {}", result);
},
_ => println!("Unexpected pattern"),
}
}
And I think one last thing here: How can we be sure that this transpiled C++ version is anywhere close to the performance of the unmodified C version? We can do the same test as in the benchmark against the original C Code and the Benchmark code with a specific number of iterations like so:
int main() {
for(int I = 0; I < 50000000; ++I)
{
llist();
}
return 0;
}
The results are as follows:
➜ low_list git:(master) ✗ clang++ -O3 GCTypes-bench.cc -o GCTypesBench
➜ low_list git:(master) ✗ clang -O3 GCTypes-bench.c -o GCTypesBenchC
➜ low_list git:(master) ✗ time ./GCTypesBench
./GCTypesBench 7.32s user 0.00s system 99% cpu 7.322 total
➜ low_list git:(master) ✗ time ./GCTypesBenchC
./GCTypesBenchC 7.28s user 0.00s system 99% cpu 7.278 total
I would say that is fairly close as well. But how about unoptimized C?
➜ low_list git:(master) ✗ clang -O0 GCTypes-bench.c -o GCTypesBenchC
➜ low_list git:(master) ✗ time ./GCTypesBenchC
./GCTypesBenchC 9.89s user 0.00s system 99% cpu 9.894 total
Pretty solid results so far. Looks like the pseudo-ASM C++ is doing it's job. Unoptimized must surely show something is wrong though right? That code is just really not approachable. These are the results:
➜ low_list git:(master) ✗ clang++ -O0 GCTypes-bench.cc -o GCTypesBench
➜ low_list git:(master) ✗ time ./GCTypesBench
./GCTypesBench 10.79s user 0.00s system 99% cpu 10.792 total
It really could be worse apparently. The only real outlier here is "std" which seems to go from much less efficient to efficient starting at O1.
What exactly is the Rust code doing? Giving it the same treatment and lowering to C you find many more runtime checks and of course Rust does a lot of DCE along the way as well. Here is a section of Rust code transpiled to C from the sample provided before:
static void _ZN10playground4main17he86dcf1ea0a15fc1E(void) {
__PREFIXALIGN__(8) struct l_array_8_uint8_t llvm_cbe_self_2e_dbg_2e_spill_2e_i __POSTFIXALIGN__(8); /* Address-exposed local */
__PREFIXALIGN__(8) struct l_array_8_uint8_t llvm_cbe_f_2e_dbg_2e_spill_2e_i __POSTFIXALIGN__(8); /* Address-exposed local */
__PREFIXALIGN__(8) struct l_array_8_uint8_t llvm_cbe_x_2e_dbg_2e_spill_2e_i __POSTFIXALIGN__(8); /* Address-exposed local */
__PREFIXALIGN__(8) struct l_array_16_uint8_t llvm_cbe__3_2e_i __POSTFIXALIGN__(8); /* Address-exposed local */
__PREFIXALIGN__(8) struct l_array_16_uint8_t _56 __POSTFIXALIGN__(8); /* Address-exposed local */
__PREFIXALIGN__(4) struct l_array_4_uint8_t llvm_cbe_y_2e_dbg_2e_spill __POSTFIXALIGN__(4); /* Address-exposed local */
__PREFIXALIGN__(4) struct l_array_4_uint8_t llvm_cbe_x_2e_dbg_2e_spill __POSTFIXALIGN__(4); /* Address-exposed local */
__PREFIXALIGN__(8) struct l_array_48_uint8_t llvm_cbe__19 __POSTFIXALIGN__(8); /* Address-exposed local */
__PREFIXALIGN__(8) struct l_array_16_uint8_t llvm_cbe__16 __POSTFIXALIGN__(8); /* Address-exposed local */
__PREFIXALIGN__(8) struct l_array_16_uint8_t llvm_cbe__15 __POSTFIXALIGN__(8); /* Address-exposed local */
__PREFIXALIGN__(8) struct l_array_48_uint8_t llvm_cbe__12 __POSTFIXALIGN__(8); /* Address-exposed local */
__PREFIXALIGN__(4) struct l_array_4_uint8_t llvm_cbe_result __POSTFIXALIGN__(4); /* Address-exposed local */
__PREFIXALIGN__(8) struct l_array_16_uint8_t llvm_cbe__1 __POSTFIXALIGN__(8); /* Address-exposed local */
uint32_t _57;
void* llvm_cbe__21;
uint32_t _58;
void* llvm_cbe__22;
void* llvm_cbe__23;
uint32_t _59;
uint32_t llvm_cbe_x;
void* llvm_cbe__24;
uint64_t llvm_cbe__30;
uint32_t llvm_cbe_y;
struct l_unnamed_2 _60;
uint32_t llvm_cbe__9_2e_0;
uint32_t llvm_cbe__10_2e_0;
void* _61;
void* _62;
#line 13 "/playground/src/main.rs"
llvm_cbe__1 = _ZN10playground4test17h706ef2a73c95369dE();
_57 = *((uint32_t*)&llvm_cbe__1);
if (((((uint64_t)(uint32_t)_57)) == UINT64_C(1))) {
goto llvm_cbe_bb3;
} else {
goto llvm_cbe_bb2;
}
llvm_cbe_bb3:
llvm_cbe__21 = *(void**)(((&((uint8_t*)(&llvm_cbe__1))[((int64_t)8)])));
_58 = *(uint32_t*)llvm_cbe__21;
if (((((uint64_t)(uint32_t)_58)) == UINT64_C(1))) {
goto llvm_cbe_bb4;
} else {
goto llvm_cbe_bb2;
}
llvm_cbe_bb2:
#line 18 "/playground/src/main.rs"
llvm_cbe__19 = _ZN4core3fmt9Arguments9new_const17hc6a2e34e6c9027c8E((&alloc_f044d3424154ec6b3e934af0d1eab91c));
goto llvm_cbe_bb10;
llvm_cbe_bb4:
#line 13 "/playground/src/main.rs"
llvm_cbe__22 = *(void**)(((&((uint8_t*)(&llvm_cbe__1))[((int64_t)8)])));
llvm_cbe__23 = *(void**)(((&((uint8_t*)llvm_cbe__22)[((int64_t)8)])));
_59 = *(uint32_t*)llvm_cbe__23;
if (((((uint64_t)(uint32_t)_59)) == UINT64_C(0))) {
goto llvm_cbe_bb5;
} else {
goto llvm_cbe_bb2;
}
llvm_cbe_bb5:
#line 14 "/playground/src/main.rs"
llvm_cbe_x = *(uint32_t*)(((&((uint8_t*)(&llvm_cbe__1))[((int64_t)4)])));
*((uint32_t*)&llvm_cbe_x_2e_dbg_2e_spill) = llvm_cbe_x;
llvm_cbe__24 = *(void**)(((&((uint8_t*)(&llvm_cbe__1))[((int64_t)8)])));
llvm_cbe__30 = ((uint64_t)(uintptr_t)llvm_cbe__24);
if (((llvm_cbe__30 & 7) == UINT64_C(0))) {
goto llvm_cbe_bb15;
} else {
goto llvm_cbe_panic;
}
llvm_cbe_bb15:
llvm_cbe_y = *(uint32_t*)(((&((uint8_t*)llvm_cbe__24)[((int64_t)4)])));
*((uint32_t*)&llvm_cbe_y_2e_dbg_2e_spill) = llvm_cbe_y;
#line 15 "/playground/src/main.rs"
_60 = llvm_OC_uadd_OC_with_OC_overflow_OC_i32(llvm_cbe_x, llvm_cbe_y);
llvm_cbe__9_2e_0 = (_60.field0);
if (((_60.field1))) {
goto llvm_cbe_panic1;
} else {
goto llvm_cbe_bb6;
}
llvm_cbe_panic:
#line 14 "/playground/src/main.rs"
_ZN4core9panicking36panic_misaligned_pointer_dereference17hc1c33e077e1bc82cE(8, llvm_cbe__30, (&alloc_4f86935d518ff736ec845fdded627245));
__builtin_unreachable();
llvm_cbe_bb6:
#line 15 "/playground/src/main.rs"
llvm_cbe__10_2e_0 = llvm_sub_u32(llvm_cbe__9_2e_0, 1);
if ((((uint32_t)llvm_cbe__9_2e_0) < ((uint32_t)1u))) {
goto llvm_cbe_panic2;
} else {
goto llvm_cbe_bb7;
}
llvm_cbe_panic1:
_ZN4core9panicking11panic_const24panic_const_add_overflow17h08d3a954a3902361E((&alloc_6689ebc1ea65111c28ba615705391f7a));
goto llvm_cbe_unreachable;
llvm_cbe_unreachable:
__builtin_unreachable();
llvm_cbe_bb7:
*((uint32_t*)&llvm_cbe_result) = llvm_cbe__10_2e_0;
*((void**)&llvm_cbe_x_2e_dbg_2e_spill_2e_i) = (&llvm_cbe_result);
#line 114 "/rustc/fd8d6fbe505ecf913f5e2ca590c69a7da2789879/library/core/src/fmt/rt.rs"
*((void**)&llvm_cbe_f_2e_dbg_2e_spill_2e_i) = ((void*)&_ZN4core3fmt3num3imp52__EC_LT_EC_impl_EC_u20_EC_core_OC__OC_fmt_OC__OC_Display_EC_u20_EC_for_EC_u20_EC_u32_EC_GT_EC_3fmt17h65de1fcd5382b9caE);
#line 1807 "/rustc/fd8d6fbe505ecf913f5e2ca590c69a7da2789879/library/core/src/ptr/non_null.rs"
*((void**)&llvm_cbe_self_2e_dbg_2e_spill_2e_i) = (&llvm_cbe_result);
#line 103 "/rustc/fd8d6fbe505ecf913f5e2ca590c69a7da2789879/library/core/src/fmt/rt.rs"
*((void**)&llvm_cbe__3_2e_i) = (&llvm_cbe_result);
*(void**)(((&((uint8_t*)(&llvm_cbe__3_2e_i))[((int64_t)8)]))) = ((void*)&_ZN4core3fmt3num3imp52__EC_LT_EC_impl_EC_u20_EC_core_OC__OC_fmt_OC__OC_Display_EC_u20_EC_for_EC_u20_EC_u32_EC_GT_EC_3fmt17h65de1fcd5382b9caE);
#line 100 "/rustc/fd8d6fbe505ecf913f5e2ca590c69a7da2789879/library/core/src/fmt/rt.rs"
_61 = memcpy((&llvm_cbe__16), (&llvm_cbe__3_2e_i), 16);
goto llvm_cbe_bb8;
llvm_cbe_panic2:
#line 15 "/playground/src/main.rs"
_ZN4core9panicking11panic_const24panic_const_sub_overflow17h293a45d95f19d811E((&alloc_6689ebc1ea65111c28ba615705391f7a));
goto llvm_cbe_unreachable;
llvm_cbe_bb8:
#line 16 "/playground/src/main.rs"
_62 = memcpy((((&(&llvm_cbe__15)->array[((int64_t)0)]))), (&llvm_cbe__16), 16);
llvm_cbe__12 = _ZN4core3fmt9Arguments6new_v117hf04d341004d6cb1bE((&alloc_9bce62b4958e9ae9fca1ec2ed4203a0b), (&llvm_cbe__15));
goto llvm_cbe_bb9;
llvm_cbe_bb9:
_ZN3std2io5stdio6_print17ha75bd8669125f6d8E((&llvm_cbe__12));
goto llvm_cbe_bb17;
llvm_cbe_bb17:
goto llvm_cbe_bb11;
llvm_cbe_bb11:
#line 20 "/playground/src/main.rs"
_ZN4core3ptr37drop_in_place_EC_LT_EC_playground_OC__OC_List_EC_GT_EC_17ha1813a0cd7217cffE((&llvm_cbe__1));
return;
llvm_cbe_bb10:
#line 18 "/playground/src/main.rs"
_ZN3std2io5stdio6_print17ha75bd8669125f6d8E((&llvm_cbe__19));
goto llvm_cbe_bb18;
llvm_cbe_bb18:
goto llvm_cbe_bb11;
}
Rust embeds a lot of safety checks within the binary which is presumably what you would want if the program is modified outside of the author's control possibly. Formal verification can do those checks as well and it doesn't have to be embedded within the application itself. They both represent two different approaches and some of the newest advancements incorporate both by formally verifying Rust within the context of a provable methodology. All of these insights were gathered via the following Low* code:
module GcTypes
open FStar.HyperStack.ST
module U32 = FStar.UInt32
let test (): Stack (list U32.t) (fun _ -> true) (fun _ _ _ -> true) =
0ul :: 1ul :: []
let main (): Stack FStar.Int32.t (fun _ -> true) (fun _ _ _ -> true) =
match test () with
| x :: y :: [] ->
FStar.Int.Cast.uint32_to_int32 FStar.UInt32.(x +%^ y -%^ 1ul)
| _ ->
FStar.Int.Cast.uint32_to_int32 1ul
After switching to a more modern machine let's put this all together for the final results. We should know what to expect here:
───────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
│ File: gcTypesCpp.cpp
───────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
1 │ #include <optional>
2 │ #include <list>
3 │ #include <array>
4 │ #include <iostream>
5 │
6 │ using namespace std;
7 │
8 │ int main()
9 │ {
10 │
11 │ for(int I; I < 50000000; ++I)
12 │ {
13 │
14 │ list<optional<array<int, 1>>> sourceList {
15 │ array<int, 1>{0},
16 │ array<int, 1>{1},
17 │ {},
18 │ array<int, 1>{1},
19 │ array<int, 1>{0}
20 │ };
21 │
22 │
23 │ list<optional<array<int, 1>>> copiedList(sourceList);
24 │ };
25 │
26 │ return 0;
27 │ }
28 │
───────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
❯ time ./gcTypesCpp
./gcTypesCpp 25.21s user 0.02s system 99% cpu 25.230 total
❯ time ./gcTypesCpp01
./gcTypesCpp01 0.00s user 0.00s system 85% cpu 0.004 total
C++ using std does this operation in 25 seconds but O1 optimizes it out completely and only increments the counter.
(edit: I've decided to follow up and measure this on the same machine by forcing C++ not to optimize. At that point it is doing something different but interesting to look at the performance at any rate.)
───────┬──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
│ File: gcTypesCpp-NonOpt.cpp
───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
1 │ #include <optional>
2 │ #include <list>
3 │ #include <array>
4 │ #include <iostream>
5 │
6 │ using namespace std;
7 │
8 │ volatile int sink = 0; // Prevent optimization by introducing a volatile sink
9 │
10 │ int main() {
11 │ for (int I = 0; I < 50000000; ++I) {
12 │ list<optional<array<int, 1>>> sourceList{
13 │ array<int, 1>{0},
14 │ array<int, 1>{1},
15 │ {},
16 │ array<int, 1>{1},
17 │ array<int, 1>{0}
18 │ };
19 │
20 │ list<optional<array<int, 1>>> copiedList(sourceList);
21 │
22 │ // Introduce a side effect using a volatile variable
23 │ sink += copiedList.size();
24 │ };
25 │
26 │ return 0;
27 │ }
───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
❯ clang++ -std=c++17 gcTypesCpp-NonOpt.cpp -O3 -o gcTypesCpp-NonOpt
❯ time ./gcTypesCpp-NonOpt
./gcTypesCpp-NonOpt 5.91s user 0.00s system 99% cpu 5.912 total
───────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
│ File: src/main.rs
───────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
1 │ #![feature(box_patterns)]
2 │
3 │ #[allow(dead_code)]
4 │ #[derive(Clone)] // Deriving Clone for the enum
5 │ enum List {
6 │ Nil,
7 │ Cons(u32, Box<List>),
8 │ }
9 │
10 │ fn test() -> List {
11 │ List::Cons(0, Box::new(List::Cons(1, Box::new(List::Nil))))
12 │ }
13 │
14 │ fn main() {
15 │ for _i in 1..50000000 {
16 │ let _list = test();
17 │
18 │ let _copieidlist = test().clone();
19 │ }
20 │ }
───────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
❯ time cargo run src/main.rs
Compiling rust-list-test v0.1.0 (/home/jessebennett/rust-list-test)
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.07s
Running `target/debug/rust-list-test src/main.rs`
cargo run src/main.rs 10.12s user 0.07s system 100% cpu 10.152 total
───────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
│ File: src/main.rs
───────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
1 │ #![feature(box_patterns)]
2 │
3 │ //#[allow(dead_code)]
4 │ #[derive(Clone)] // Deriving Clone for the enum
5 │ enum List {
6 │ Nil,
7 │ Cons(u32, Box<List>),
8 │ }
9 │
10 │ fn test() -> List {
11 │ List::Cons(0, Box::new(List::Cons(1, Box::new(List::Nil))))
12 │ }
13 │
14 │ fn main() {
15 │ for _i in 1..50000000 {
16 │ let _list = test();
17 │
18 │ let _copieidlist = test().clone();
19 │ }
20 │ }
───────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
❯ time cargo run --release src/main.rs
Compiling rust-list-test v0.1.0 (/home/jessebennett/rust-list-test)
warning: fields `0` and `1` are never read
--> src/main.rs:7:10
|
7 | Cons(u32, Box<List>),
| ---- ^^^ ^^^^^^^^^
| |
| fields in this variant
|
= note: `List` has a derived impl for the trait `Clone`, but this is intentionally ignored during dead code analysis
= note: `#[warn(dead_code)]` on by default
help: consider changing the fields to be of unit type to suppress this warning while preserving the field numbering, or remove the fields
|
7 | Cons((), ()),
| ~~ ~~
warning: `rust-list-test` (bin "rust-list-test") generated 1 warning
Finished `release` profile [optimized] target(s) in 0.08s
Running `target/release/rust-list-test src/main.rs`
cargo run --release src/main.rs 3.12s user 0.05s system 100% cpu 3.151 total
Testing Rust opts shows that the execution time is cut down by ~70% but yet still clocks in at 3 seconds and still does not optimize the loop out entirely. It is initially faster than the unoptimized C++ by ~250%.
───────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
│ File: gcTypes.c
───────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
1 │ #include <stdlib.h>
2 │ #include <stdint.h>
3 │ #include <stdio.h>
4 │
5 │ // Define the list structure
6 │ typedef struct Prims_list__uint32_t {
7 │ enum { Prims_Cons, Prims_Nil, Prims_Null } tag;
8 │ union {
9 │ struct {
10 │ struct Prims_list__uint32_t *tl;
11 │ uint32_t hd[1];
12 │ };
13 │ struct {
14 │ struct Prims_list__uint32_t *ntl;
15 │ uint32_t nd[];
16 │ };
17 │ };
18 │ } Prims_list__uint32_t;
19 │
20 │ // Function to create a new Cons node
21 │ Prims_list__uint32_t *cons(uint32_t hd, Prims_list__uint32_t *tl) {
22 │ Prims_list__uint32_t *node = (Prims_list__uint32_t *)malloc(sizeof(Prims_list__uint32_t));
23 │ node->tag = Prims_Cons;
24 │ node->hd[1] = hd;
25 │ node->tl = tl;
26 │ return node;
27 │ }
28 │
29 │ Prims_list__uint32_t *ncons(Prims_list__uint32_t *tl) {
30 │ Prims_list__uint32_t *node = (Prims_list__uint32_t *)malloc(sizeof(Prims_list__uint32_t));
31 │ node->tag = Prims_Null;
32 │ node->ntl = tl;
33 │ return node;
34 │ }
35 │
36 │ // Function to create a new Nil node
37 │ Prims_list__uint32_t *nil() {
38 │ Prims_list__uint32_t *node = (Prims_list__uint32_t *)malloc(sizeof(Prims_list__uint32_t));
39 │ node->tag = Prims_Nil;
40 │ node->tl = NULL;
41 │ return node;
42 │ }
43 │
44 │ // Deep copy function
45 │ Prims_list__uint32_t *deep_copy_list(Prims_list__uint32_t *list) {
46 │ if (list == NULL) {
47 │ return NULL;
48 │ }
49 │
50 │ if (list->tag == Prims_Nil) {
51 │ return nil();
52 │ }
53 │
54 │ if (list->tag == Prims_Null) {
55 │
56 │ Prims_list__uint32_t *new_list = ncons(NULL);
57 │
58 │ new_list->tl = deep_copy_list(list->ntl);
59 │
60 │ return new_list;
61 │ }
62 │
63 │ // Create a new node and copy the head value
64 │ Prims_list__uint32_t *new_list = cons(list->hd[1], NULL);
65 │
66 │ // Recursively copy the rest of the list
67 │ new_list->tl = deep_copy_list(list->tl);
68 │
69 │ return new_list;
70 │ }
71 │
72 │ // Free list function for cleanup
73 │ void free_list(Prims_list__uint32_t *list) {
74 │
75 │ if(list->tag == Prims_Null) {
76 │ while (list != NULL) {
77 │ Prims_list__uint32_t *next = list->ntl;
78 │ free(list);
79 │ list = next;
80 │ }
81 │ }
82 │ else {
83 │ while (list != NULL) {
84 │ Prims_list__uint32_t *next = list->tl;
85 │ free(list);
86 │ list = next;
87 │ }
88 │ }
89 │ }
90 │
91 │ // Print list function
92 │ void print_list(Prims_list__uint32_t *list) {
93 │ printf("[");
94 │ while (list != NULL) {
95 │ if (list->tag == Prims_Cons) {
96 │ printf("%u", list->hd[1]);
97 │ list = list->tl;
98 │ if (list != NULL) {
99 │ printf(" -> ");
100 │ }
101 │ } else if (list->tag == Prims_Nil) {
102 │ printf("Nil");
103 │ list = list->tl;
104 │ if (list != NULL) {
105 │ printf(" -> ");
106 │ }
107 │ } else if (list->tag == Prims_Null) {
108 │ printf("Null");
109 │ list = list->ntl;
110 │ if (list != NULL) {
111 │ printf(" -> ");
112 │ }
113 │ }
114 │ }
115 │ printf("]\n");
116 │ }
117 │
118 │ // Test the deep copy function
119 │ int main() {
120 │
121 │ for(int I = 0; I < 50000000; ++I)
122 │ {
123 │ // Create an example list: 0 -> 1 -> Null -> 1 -> 0 -> Nil
124 │ Prims_list__uint32_t *original_list = cons(0, cons(1, ncons(cons(1, cons(0, nil())))));
125 │
126 │ Prims_list__uint32_t *copied_list = deep_copy_list(original_list);
127 │
128 │ free_list(original_list);
129 │
130 │ free_list(copied_list);
131 │ }
132 │
133 │ return 0;
134 │ }
───────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
❯ time ./gcTypes
./gcTypes 6.68s user 0.00s system 99% cpu 6.686 total
❯ time ./gcTypes01
./gcTypes01 5.19s user 0.00s system 99% cpu 5.191 total
And finally we have C with light assistance from Low*. At 6.68s this is the fastest bar-none. I have even left in the code for the full implementation just as these other languages are pulling in the entire libraries to accomplish the same thing. Optimization shaves 1 second off and then it becomes the slowest but no effort has been made to actually do anything about that. This is just one test and many languages are multi-faceted when it comes to performance but these consistent results are showing the value of formal verification. I will leave you with wise words from one of the luminaries of my time who went by the name "Biggie Smalls":
You can be as good as the best of them. But as bad as the worst, so don't test me (Get money) You better move over (Get money)
Notorious B.I.G, Young M.A.F.I.A., Gettin' Money (Get Money Remix)
Optimized and converted to CPP at O3