| Loop Id: 28 | Module: exec | Source: miniqmc.cpp:412-475 [...] | Coverage: 0.05% |
|---|
| Loop Id: 28 | Module: exec | Source: miniqmc.cpp:412-475 [...] | Coverage: 0.05% |
|---|
(35) 0x415000 LDRSW X8, [SP, #312] |
(35) 0x415004 CMP X24, X8 |
(35) 0x415008 ADD X24, X24, #1 |
(35) 0x41500c B.GE 414e8c |
(35) 0x415010 LDR X8, [SP, #64] |
(35) 0x415014 LDR X8, [X8] |
(35) 0x415018 LDR X27, [X8, X24,LSL #3] |
(35) 0x41501c LDR X8, [X27, #5064] |
(35) 0x415020 STR X8, [SP, #152] |
(35) 0x415024 LDR X8, [SP, #136] |
(35) 0x415028 LDRSW X8, [X8] |
(35) 0x41502c STP X24, X8, [SP, #72] |
(35) 0x415030 CBZ W8, 415080 |
(35) 0x415034 TBNZ W8, #31, 41572c |
(35) 0x415038 ADD X8, X8, X8,LSL #1 |
(35) 0x41503c UBFM X24, X8, #61, #60 |
(35) 0x415040 ORR X0, XZR, X24 |
(35) 0x415044 BL 410110 |
(35) 0x415048 STR X0, [SP, #112] |
(35) 0x41504c ORR X9, XZR, #0xaa |
(35) 0x415050 SUB X8, X24, #24 |
(35) 0x415054 LDR X0, [SP, #112] |
(35) 0x415058 MOVZ X10, #24 |
(35) 0x41505c ORR W1, WZR, WZR |
(35) 0x415060 MOVK X9, #43691 |
(35) 0x415064 UMULH X8, X8, X9 |
(35) 0x415068 MOVZ W9, #24 |
(35) 0x41506c UBFM X8, X8, #4, #63 |
(35) 0x415070 MADD X2, X8, X9, X10 |
(35) 0x415074 BL 4101f0 |
(35) 0x415078 B 415084 |
(35) 0x415080 STR XZR, [SP, #112] |
(35) 0x415084 LDR X8, [SP, #56] |
(35) 0x415088 LDRSW X24, [X8] |
(35) 0x41508c ADRP X8, |
(35) 0x415090 ADD X8, X8, #3200 |
(35) 0x415094 STP X8, X24, [SP, #256] |
(35) 0x415098 ADD X8, SP, #256 |
(35) 0x41509c STP XZR, XZR, [X8, #16] |
(35) 0x4150a0 CBZ W24, 4150e8 |
(35) 0x4150a4 TBNZ W24, #31, 41572c |
(35) 0x4150a8 ADD X8, X24, X24,LSL #1 |
(35) 0x4150ac UBFM X28, X8, #61, #60 |
(35) 0x4150b0 ORR X0, XZR, X28 |
(35) 0x4150b4 BL 410110 |
(35) 0x4150b8 ORR X9, XZR, #0xaa |
(35) 0x4150bc SUB X8, X28, #24 |
(35) 0x4150c0 MOVZ X10, #24 |
(35) 0x4150c4 STP X24, X0, [SP, #272] |
(35) 0x4150c8 STR X24, [SP, #264] |
(35) 0x4150cc ORR W1, WZR, WZR |
(35) 0x4150d0 MOVK X9, #43691 |
(35) 0x4150d4 UMULH X8, X8, X9 |
(35) 0x4150d8 MOVZ W9, #24 |
(35) 0x4150dc UBFM X8, X8, #4, #63 |
(35) 0x4150e0 MADD X2, X8, X9, X10 |
(35) 0x4150e4 BL 4101f0 |
(35) 0x4150e8 LDR X1, [SP, #80] |
(35) 0x4150ec ADRP X8, |
(35) 0x4150f0 ADD X8, X8, #3128 |
(35) 0x4150f4 STR WZR, [SP, #292] |
(35) 0x4150f8 STP XZR, XZR, [SP, #232] |
(35) 0x4150fc STP XZR, X8, [SP, #248] |
(35) 0x415100 CBZ W1, 41512c |
(35) 0x415104 ADD X0, SP, #232 |
(35) 0x415108 BL 417560 |
(35) 0x41510c LDR X8, [SP, #80] |
(35) 0x415110 STR X0, [SP, #232] |
(35) 0x415114 ORR W1, WZR, WZR |
(35) 0x415118 UBFM X2, X8, #61, #60 |
(35) 0x41511c ADD X24, X0, X8,LSL #3 |
(35) 0x415120 BL 4101f0 |
(35) 0x415124 ORR X8, XZR, X24 |
(35) 0x415128 B 415138 |
(35) 0x41512c ORR X24, XZR, XZR |
(35) 0x415130 ORR X8, XZR, XZR |
(35) 0x415134 STR XZR, [SP, #232] |
(35) 0x415138 STP X8, X24, [SP, #240] |
(35) 0x41513c LDR X8, [SP, #160] |
(35) 0x415140 LDR X8, [X8] |
(35) 0x415144 LDR X0, [X8, #16] |
(35) 0x415148 BL 474220 |
(35) 0x41514c LDR X8, [SP, #104] |
(35) 0x415150 LDR W8, [X8] |
(35) 0x415154 CMP W8, #1 |
(35) 0x415158 B.LT 41562c |
(27) 0x41515c RDVL X8, #1 |
(27) 0x415160 ORR W9, WZR, WZR |
(27) 0x415164 ADD X24, X27, X8 |
(27) 0x415168 LDR X8, [SP, #136] |
(27) 0x41516c STR W9, [SP, #124] |
(27) 0x415170 LDR W16, [X8] |
(27) 0x415174 CMP W16, #1 |
(27) 0x415178 B.LT 4154a0 |
(27) 0x41517c LDR D0, [X27, #5024] |
(27) 0x415180 LDR D10, [X27, #5016] |
(27) 0x415184 LDR X8, [SP, #88] |
(27) 0x415188 ORR X17, XZR, XZR |
(27) 0x41518c CMP X8, #1 |
(27) 0x415190 LDR X18, [SP, #232] |
(27) 0x415194 LDR X28, [X27, #5008] |
(27) 0x415198 CSINC X0, X8, XZR, #8 |
(27) 0x41519c FSUB D11, D0, S10 |
(29) 0x4151a0 MOVI D0, #0 |
(29) 0x4151a4 FMOV D1, #1.0000000 |
(29) 0x4151a8 ORR X8, XZR, X0 |
(29) 0x4151ac B 415240 |
(30) 0x4151c0 LDR X9, [X27, #5000] |
(30) 0x4151c4 LDR X10, [X27, #16] |
(30) 0x4151c8 ORR X28, XZR, XZR |
(30) 0x4151cc AND X9, X9, #0x0 |
(30) 0x4151d0 AND X11, X10, #0x0 |
(30) 0x4151d4 SBFM X10, X10, #0, #0 |
(30) 0x4151d8 ORR X9, X11, X9 |
(30) 0x4151dc LDR X11, [X27, #3184] |
(30) 0x4151e0 AND X10, X10, X25 |
(30) 0x4151e4 EOR X9, X11, X9,LSR #1 |
(30) 0x4151e8 EOR X9, X9, X10 |
(30) 0x4151ec STR X9, [X27, #5000] |
(30) 0x4151f0 ORR X9, XZR, X28 |
(30) 0x4151f4 ADD X28, X28, #1 |
(30) 0x4151f8 ADD X10, X27, #16 |
(30) 0x4151fc SUBS X8, X8, #1 |
(30) 0x415200 STR X28, [X27, #5008] |
(30) 0x415204 LDR X9, [X10, X9,LSL #3] |
(30) 0x415208 UBFM X10, X9, #11, #42 |
(30) 0x41520c EOR X9, X10, X9 |
(30) 0x415210 MOVZ W10, #22144 |
(30) 0x415214 MOVK W10, #40236 |
(30) 0x415218 AND X10, X10, X9,LSL #7 |
(30) 0x41521c EOR X9, X10, X9 |
(30) 0x415220 MOVZ W10, #61382 |
(30) 0x415224 AND X10, X10, X9,LSL #15 |
(30) 0x415228 EOR X9, X10, X9 |
(30) 0x41522c EOR X9, X9, X9,LSR #18 |
(30) 0x415230 UCVTF D2, X9 |
(30) 0x415234 FMADD D0, D1, D2, D0 |
(30) 0x415238 FMUL D1, D1, D8 |
(30) 0x41523c B.EQ 415440 |
(30) 0x415240 CMP X28, #624 |
(30) 0x415244 B.CC 4151f0 |
(30) 0x415248 PTRUE P2.D, ALL |
(30) 0x41524c ADD X10, X27, #16 |
(30) 0x415250 ORR X9, XZR, XZR |
(30) 0x415254 LD1RD {Z2.D}, P2/Z, [X10] |
(30) 0x415258 SUB X10, X29, #32 |
(30) 0x41525c CNTW X12, ALL |
(30) 0x415260 LDR P3, [X10, #511, MUL VL] |
(31) 0x415264 ADD X10, X27, X9,LSL #3 |
(31) 0x415268 ADD X11, X24, X9,LSL #3 |
(31) 0x41526c ADD X9, X9, X12 |
(31) 0x415270 LD1D {Z3.D}, P2/Z, [X10, X26,LSL #3] |
(31) 0x415274 LD1D {Z7.D}, P2/Z, [X11, X23,LSL #3] |
(31) 0x415278 SPLICE Z2.D, P3, Z2.D, Z3.D |
(31) 0x41527c ORR Z4.D, Z3.D, Z3.D |
(31) 0x415280 ORR Z5.D, Z3.D, Z3.D |
(31) 0x415284 AND Z3.D, Z3.D, #0x1 |
(31) 0x415288 AND Z4.D, Z4.D, #0x7ffffffe |
(31) 0x41528c CMPEQ P0.D, P2/Z, Z3.D, #0 |
(31) 0x415290 DUP Z3.D, X25 |
(31) 0x415294 AND Z2.D, Z2.D, #0x80000000 |
(31) 0x415298 ORR Z4.D, Z4.D, Z2.D |
(31) 0x41529c LD1D {Z2.D}, P2/Z, [X11, X26,LSL #3] |
(31) 0x4152a0 LSR Z4.D, Z4.D, #63 |
(31) 0x4152a4 SPLICE Z5.D, P3, Z5.D, Z2.D |
(31) 0x4152a8 ORR Z6.D, Z2.D, Z2.D |
(31) 0x4152ac AND Z6.D, Z6.D, #0x7ffffffe |
(31) 0x4152b0 AND Z5.D, Z5.D, #0x80000000 |
(31) 0x4152b4 ORR Z5.D, Z6.D, Z5.D |
(31) 0x4152b8 LD1D {Z6.D}, P2/Z, [X10, X23,LSL #3] |
(31) 0x4152bc LSR Z5.D, Z5.D, #63 |
(31) 0x4152c0 EOR Z5.D, Z5.D, Z7.D |
(31) 0x4152c4 EOR Z4.D, Z4.D, Z6.D |
(31) 0x4152c8 ORR Z6.D, Z2.D, Z2.D |
(31) 0x4152cc AND Z6.D, Z6.D, #0x1 |
(31) 0x4152d0 CMPEQ P1.D, P2/Z, Z6.D, #0 |
(31) 0x4152d4 EOR Z6.D, Z4.D, Z3.D |
(31) 0x4152d8 EOR Z3.D, Z5.D, Z3.D |
(31) 0x4152dc CMP X22, X9 |
(31) 0x4152e0 SEL Z4.D, P0, Z4.D, Z6.D |
(31) 0x4152e4 SEL Z3.D, P1, Z5.D, Z3.D |
(31) 0x4152e8 ST1D {Z4.D}, P2, [X10, X21,LSL #3] |
(31) 0x4152ec ST1D {Z3.D}, P2, [X11, X21,LSL #3] |
(31) 0x4152f0 B.NE 415264 |
(30) 0x4152f4 SUB X9, X29, #32 |
(30) 0x4152f8 LDP X10, X11, [SP, #176] |
(30) 0x4152fc LDR X14, [SP, #192] |
(30) 0x415300 RDVL X15, #1 |
(30) 0x415304 ADD X10, X27, X10 |
(30) 0x415308 LDR P0, [X9, #510, MUL VL] |
(30) 0x41530c LASTB X9, P0, Z2.D |
(30) 0x415310 HINT #0 |
(30) 0x415314 HINT #0 |
(30) 0x415318 HINT #0 |
(30) 0x41531c HINT #0 |
(32) 0x415320 AND X12, X9, #0x0 |
(32) 0x415324 LDR X9, [X10, #24] |
(32) 0x415328 SUBS X11, X11, #1 |
(32) 0x41532c AND X13, X9, #0x0 |
(32) 0x415330 ORR X12, X13, X12 |
(32) 0x415334 LDR X13, [X10, #3192] |
(32) 0x415338 EOR X12, X13, X12,LSR #1 |
(32) 0x41533c SBFM X13, X9, #0, #0 |
(32) 0x415340 AND X13, X13, X25 |
(32) 0x415344 EOR X12, X12, X13 |
(32) 0x415348 STR X12, [X10, #16] |
(32) 0x41534c ADD X10, X10, #8 |
(32) 0x415350 B.NE 415320 |
(30) 0x415354 PTRUE P1.D, ALL |
(30) 0x415358 ADD X9, X27, #1832 |
(30) 0x41535c SUB X12, X29, #32 |
(30) 0x415360 ADD X10, X27, #1840 |
(30) 0x415364 LDR P2, [X12, #511, MUL VL] |
(30) 0x415368 CNTD X11, ALL |
(30) 0x41536c LD1RD {Z2.D}, P1/Z, [X9] |
(30) 0x415370 ORR X9, XZR, X14 |
(30) 0x415374 HINT #0 |
(30) 0x415378 HINT #0 |
(30) 0x41537c HINT #0 |
(33) 0x415380 ORR Z3.D, Z2.D, Z2.D |
(33) 0x415384 LDR Z2, [X10, MUL VL] |
(33) 0x415388 SPLICE Z3.D, P2, Z3.D, Z2.D |
(33) 0x41538c ORR Z4.D, Z2.D, Z2.D |
(33) 0x415390 AND Z4.D, Z4.D, #0x7ffffffe |
(33) 0x415394 AND Z3.D, Z3.D, #0x80000000 |
(33) 0x415398 ORR Z3.D, Z4.D, Z3.D |
(33) 0x41539c ORR Z4.D, Z2.D, Z2.D |
(33) 0x4153a0 AND Z4.D, Z4.D, #0x1 |
(33) 0x4153a4 LSR Z3.D, Z3.D, #63 |
(33) 0x4153a8 CMPEQ P0.D, P1/Z, Z4.D, #0 |
(33) 0x4153ac LD1D {Z4.D}, P1/Z, [X10, X19,LSL #3] |
(33) 0x4153b0 SUBS X9, X9, X11 |
(33) 0x4153b4 EOR Z3.D, Z3.D, Z4.D |
(33) 0x4153b8 DUP Z4.D, X25 |
(33) 0x4153bc EOR Z4.D, Z3.D, Z4.D |
(33) 0x4153c0 SEL Z3.D, P0, Z3.D, Z4.D |
(33) 0x4153c4 ST1D {Z3.D}, P1, [X10, X20,LSL #3] |
(33) 0x4153c8 ADD X10, X10, X15 |
(33) 0x4153cc B.NE 415380 |
(30) 0x4153d0 CMP X14, #396 |
(30) 0x4153d4 B.EQ 4151c0 |
(30) 0x4153d8 SUB X9, X29, #32 |
(30) 0x4153dc LDR X10, [SP, #168] |
(30) 0x4153e0 SUB X11, X14, #396 |
(30) 0x4153e4 LDR P0, [X9, #509, MUL VL] |
(30) 0x4153e8 ADD X10, X27, X10 |
(30) 0x4153ec LASTB X9, P0, Z2.D |
(30) 0x4153f0 HINT #0 |
(30) 0x4153f4 HINT #0 |
(30) 0x4153f8 HINT #0 |
(30) 0x4153fc HINT #0 |
(34) 0x415400 AND X12, X9, #0x0 |
(34) 0x415404 LDR X9, [X10, #1840] |
(34) 0x415408 ADDS X11, X11, #1 |
(34) 0x41540c AND X13, X9, #0x0 |
(34) 0x415410 ORR X12, X13, X12 |
(34) 0x415414 LDR X13, [X10, #16] |
(34) 0x415418 EOR X12, X13, X12,LSR #1 |
(34) 0x41541c SBFM X13, X9, #0, #0 |
(34) 0x415420 AND X13, X13, X25 |
(34) 0x415424 EOR X12, X12, X13 |
(34) 0x415428 STR X12, [X10, #1832] |
(34) 0x41542c ADD X10, X10, #8 |
(34) 0x415430 B.CC 415400 |
(30) 0x415434 B 4151c0 |
(29) 0x415440 FDIV D0, D0, D1 |
(29) 0x415444 FCMP D0, D9 |
(29) 0x415448 B.GE 415464 |
(29) 0x41544c FMADD D0, D0, D11, D10 |
(29) 0x415450 STR D0, [X18, X17,LSL #3] |
(29) 0x415454 ADD X17, X17, #1 |
(29) 0x415458 CMP X17, X16 |
(29) 0x41545c B.NE 4151a0 |
(27) 0x415460 B 4154a0 |
(29) 0x415464 FMOV D0, #1.0000000 |
(29) 0x415468 MOVI D1, #0 |
(29) 0x41546c STR X16, [SP, #144] |
(29) 0x415470 STP X18, X17, [SP, #40] |
(29) 0x415474 STR X0, [SP, #32] |
(29) 0x415478 BL 410640 |
(29) 0x41547c LDP X0, X18, [SP, #32] |
(29) 0x415480 LDR X17, [SP, #48] |
(29) 0x415484 LDR X16, [SP, #144] |
(29) 0x415488 FMADD D0, D0, D11, D10 |
(29) 0x41548c STR D0, [X18, X17,LSL #3] |
(29) 0x415490 ADD X17, X17, #1 |
(29) 0x415494 CMP X17, X16 |
(29) 0x415498 B.NE 4151a0 |
(27) 0x41549c HINT #0 |
(27) 0x4154a0 LDR X8, [SP, #96] |
(27) 0x4154a4 LDR W2, [X8] |
(27) 0x4154a8 LDR X1, [SP, #112] |
(27) 0x4154ac ORR X0, XZR, X27 |
(27) 0x4154b0 BL 417b00 |
(27) 0x4154b4 LDR X8, [SP, #136] |
(27) 0x4154b8 LDR W8, [X8] |
(27) 0x4154bc CMP W8, #1 |
(27) 0x4154c0 B.LT 415608 |
0x4154c4 LDR X8, [SP, #112] |
0x4154c8 ORR X28, XZR, XZR |
0x4154cc STR X8, [SP, #144] |
0x4154d0 B 4154fc |
0x4154e0 LDP X8, X9, [SP, #136] |
0x4154e4 ADD X28, X28, #1 |
0x4154e8 LDRSW X8, [X8] |
0x4154ec ADD X9, X9, #24 |
0x4154f0 STR X9, [SP, #144] |
0x4154f4 CMP X28, X8 |
0x4154f8 B.GE 415608 |
0x4154fc LDR X8, [SP, #160] |
0x415500 LDR X8, [X8] |
0x415504 LDR X0, [X8, #40] |
0x415508 BL 474220 |
0x41550c LDR X1, [SP, #152] |
0x415510 MOVZ W8, #5072 |
0x415514 ORR W2, WZR, W28 |
0x415518 ADD X0, X27, X8 |
0x41551c BL 4196d0 |
0x415520 LDR X8, [SP, #160] |
0x415524 LDR X8, [X8] |
0x415528 LDR X0, [X8, #40] |
0x41552c BL 474340 |
0x415530 LDP X2, X0, [SP, #144] |
0x415534 MOVZ W3, #1 |
0x415538 ORR W1, WZR, W28 |
0x41553c BL 453060 |
0x415540 LDR X8, [SP, #160] |
0x415544 LDR X8, [X8] |
0x415548 LDR X0, [X8, #48] |
0x41554c BL 474220 |
0x415550 STP XZR, XZR, [SP, #208] |
0x415554 STR XZR, [SP, #224] |
0x415558 LDR X1, [SP, #152] |
0x41555c MOVZ W8, #5072 |
0x415560 ADD X3, SP, #208 |
0x415564 ORR W2, WZR, W28 |
0x415568 ADD X0, X27, X8 |
0x41556c BL 4197f0 |
0x415570 LDR X8, [SP, #160] |
0x415574 LDR X8, [X8] |
0x415578 LDR X0, [X8, #48] |
0x41557c BL 474340 |
0x415580 LDR X8, [SP, #232] |
0x415584 LDR D0, [X8, X28,LSL #3] |
0x415588 LDR X8, [SP, #128] |
0x41558c LDR D1, [X8] |
0x415590 FCMP D0, D1 |
0x415594 B.GE 4155e8 |
0x415598 LDR X8, [SP, #160] |
0x41559c LDR X8, [X8] |
0x4155a0 LDR X0, [X8, #56] |
0x4155a4 BL 474220 |
0x4155a8 LDR X1, [SP, #152] |
0x4155ac MOVZ W8, #5072 |
0x4155b0 ORR W2, WZR, W28 |
0x4155b4 ADD X0, X27, X8 |
0x4155b8 BL 4199d0 |
0x4155bc LDR X8, [SP, #160] |
0x4155c0 LDR X8, [X8] |
0x4155c4 LDR X0, [X8, #56] |
0x4155c8 BL 474340 |
0x4155cc LDR X0, [SP, #152] |
0x4155d0 ORR W1, WZR, W28 |
0x4155d4 BL 453860 |
0x4155d8 LDR W8, [SP, #300] |
0x4155dc ADD W8, W8, #1 |
0x4155e0 STR W8, [SP, #300] |
0x4155e4 B 4154e0 |
0x4155e8 LDR X0, [SP, #152] |
0x4155ec ORR X1, XZR, X28 |
0x4155f0 BL 453b90 |
0x4155f4 MOVZ W8, #5072 |
0x4155f8 ORR W1, WZR, W28 |
0x4155fc ADD X0, X27, X8 |
0x415600 BL 419b10 |
0x415604 B 4154e0 |
(27) 0x415608 MOVZ W8, #5072 |
(27) 0x41560c ADD X0, X27, X8 |
(27) 0x415610 BL 419aa0 |
(27) 0x415614 LDR W9, [SP, #124] |
(27) 0x415618 LDR X8, [SP, #104] |
(27) 0x41561c ADD W9, W9, #1 |
(27) 0x415620 LDR W8, [X8] |
(27) 0x415624 CMP W9, W8 |
(27) 0x415628 B.LT 415168 |
(35) 0x41562c LDR X0, [SP, #152] |
(35) 0x415630 ORR W1, WZR, WZR |
(35) 0x415634 BL 453ba0 |
(35) 0x415638 LDR X1, [SP, #152] |
(35) 0x41563c MOVZ W8, #5072 |
(35) 0x415640 ADD X0, X27, X8 |
(35) 0x415644 BL 419b20 |
(35) 0x415648 LDR X8, [SP, #160] |
(35) 0x41564c LDR X8, [X8] |
(35) 0x415650 LDR X0, [X8, #16] |
(35) 0x415654 BL 474340 |
(35) 0x415658 MOVZ W8, #5184 |
(35) 0x41565c ADD X1, SP, #256 |
(35) 0x415660 ADD X0, X27, X8 |
(35) 0x415664 BL 416480 |
(35) 0x415668 LDR X8, [SP, #160] |
(35) 0x41566c LDR X8, [X8] |
(35) 0x415670 LDR X24, [X8, #24] |
(35) 0x415674 ORR X0, XZR, X24 |
(35) 0x415678 BL 474220 |
(35) 0x41567c LDR X1, [SP, #152] |
(35) 0x415680 MOVZ W8, #5184 |
(35) 0x415684 ADD X0, X27, X8 |
(35) 0x415688 MOVZ W8, #5072 |
(35) 0x41568c ADD X2, X27, X8 |
(35) 0x415690 BL 416760 |
(35) 0x415694 ORR X0, XZR, X24 |
(35) 0x415698 BL 474340 |
(35) 0x41569c LDR X0, [SP, #232] |
(35) 0x4156a0 LDR X24, [SP, #72] |
(35) 0x4156a4 CBZ X0, 4156b8 |
(35) 0x4156a8 LDR X8, [SP, #248] |
(35) 0x4156ac CMP X8, X0 |
(35) 0x4156b0 B.EQ 415730 |
(35) 0x4156b4 BL 410810 |
(35) 0x4156b8 ADRP X8, |
(35) 0x4156bc ADD X8, X8, #3200 |
(35) 0x4156c0 STR X8, [SP, #256] |
(35) 0x4156c4 LDR X8, [SP, #272] |
(35) 0x4156c8 CBZ X8, 4156dc |
(35) 0x4156cc LDR X0, [SP, #280] |
(35) 0x4156d0 ADD X8, X8, X8,LSL #1 |
(35) 0x4156d4 UBFM X1, X8, #61, #60 |
(35) 0x4156d8 BL 4100e0 |
(35) 0x4156dc LDR X8, [SP, #80] |
(35) 0x4156e0 CBZ W8, 415000 |
(35) 0x4156e4 LDR X0, [SP, #112] |
(35) 0x4156e8 ADD X8, X8, X8,LSL #1 |
(35) 0x4156ec UBFM X1, X8, #61, #60 |
(35) 0x4156f0 BL 4100e0 |
(35) 0x4156f4 B 415000 |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Particle/ParticleAttrib.h: 34 - 34 |
-------------------------------------------------------------------------------- |
34: explicit inline ParticleAttrib(size_t n = 0) : __my_base(n), InUnit(0) {} |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/Mallocator.hpp: 76 - 78 |
-------------------------------------------------------------------------------- |
76: if (n == 0) |
77: throw std::runtime_error("Mallocator::deallocate does not accept size 0 allocations."); |
78: free(p); |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 62 - 62 |
-------------------------------------------------------------------------------- |
62: X[d] = T(0); |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/alloc_traits.h: 335 - 335 |
-------------------------------------------------------------------------------- |
335: { return __a.allocate(__n); } |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/new_allocator.h: 134 - 172 |
-------------------------------------------------------------------------------- |
134: if (__builtin_expect(__n > this->_M_max_size(), false)) |
[...] |
151: return static_cast<_Tp*>(_GLIBCXX_OPERATOR_NEW(__n * sizeof(_Tp))); |
[...] |
172: _GLIBCXX_OPERATOR_DELETE(_GLIBCXX_SIZED_DEALLOC(__p, __n)); |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/random.h: 2011 - 2011 |
-------------------------------------------------------------------------------- |
2011: return (__aurng() * (__p.b() - __p.a())) + __p.a(); |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/stl_construct.h: 119 - 119 |
-------------------------------------------------------------------------------- |
119: ::new((void*)__p) _Tp(std::forward<_Args>(__args)...); |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 47 - 301 |
-------------------------------------------------------------------------------- |
47: { |
48: if (n) |
[...] |
144: virtual ~Vector() { free(); } |
[...] |
210: if (nAllocated) |
211: { |
212: mAllocator.deallocate(X, nAllocated); |
[...] |
289: T* X = nullptr; |
[...] |
300: X = mAllocator.allocate(n); |
301: nLocal = n; |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/refwrap.h: 351 - 351 |
-------------------------------------------------------------------------------- |
351: { return *_M_data; } |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/stl_vector.h: 99 - 1131 |
-------------------------------------------------------------------------------- |
99: : _M_start(), _M_finish(), _M_end_of_storage() |
[...] |
368: _M_deallocate(_M_impl._M_start, |
369: _M_impl._M_end_of_storage - _M_impl._M_start); |
[...] |
380: return __n != 0 ? _Tr::allocate(_M_impl, __n) : pointer(); |
[...] |
388: if (__p) |
[...] |
398: this->_M_impl._M_start = this->_M_allocate(__n); |
399: this->_M_impl._M_finish = this->_M_impl._M_start; |
400: this->_M_impl._M_end_of_storage = this->_M_impl._M_start + __n; |
[...] |
1131: return *(this->_M_impl._M_start + __n); |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Utilities/StdRandom.h: 102 - 106 |
-------------------------------------------------------------------------------- |
102: for (int i = 0; i < n; ++i) |
103: d[i] = uniform(myRNG); |
104: } |
105: |
106: inline void generate_normal(T* restrict d, int n) { BoxMuller2::generate(*this, d, n); } |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/random.tcc: 404 - 3374 |
-------------------------------------------------------------------------------- |
404: for (size_t __k = 0; __k < (__n - __m); ++__k) |
405: { |
406: _UIntType __y = ((_M_x[__k] & __upper_mask) |
407: | (_M_x[__k + 1] & __lower_mask)); |
408: _M_x[__k] = (_M_x[__k + __m] ^ (__y >> 1) |
409: ^ ((__y & 0x01) ? __a : 0)); |
410: } |
411: |
412: for (size_t __k = (__n - __m); __k < (__n - 1); ++__k) |
413: { |
414: _UIntType __y = ((_M_x[__k] & __upper_mask) |
415: | (_M_x[__k + 1] & __lower_mask)); |
416: _M_x[__k] = (_M_x[__k + (__m - __n)] ^ (__y >> 1) |
417: ^ ((__y & 0x01) ? __a : 0)); |
418: } |
419: |
420: _UIntType __y = ((_M_x[__n - 1] & __upper_mask) |
421: | (_M_x[0] & __lower_mask)); |
422: _M_x[__n - 1] = (_M_x[__m - 1] ^ (__y >> 1) |
423: ^ ((__y & 0x01) ? __a : 0)); |
[...] |
458: if (_M_p >= state_size) |
459: _M_gen_rand(); |
460: |
461: // Calculate o(x(i)). |
462: result_type __z = _M_x[_M_p++]; |
463: __z ^= (__z >> __u) & __d; |
464: __z ^= (__z << __s) & __b; |
465: __z ^= (__z << __t) & __c; |
466: __z ^= (__z >> __l); |
[...] |
3365: for (size_t __k = __m; __k != 0; --__k) |
3366: { |
3367: __sum += _RealType(__urng() - __urng.min()) * __tmp; |
3368: __tmp *= __r; |
3369: } |
3370: __ret = __sum / __tmp; |
3371: if (__builtin_expect(__ret >= _RealType(1), 0)) |
3372: { |
3373: #if _GLIBCXX_USE_C99_MATH_FUNCS |
3374: __ret = std::nextafter(_RealType(1), _RealType(0)); |
/home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/build/miniqmc/src/Drivers/miniqmc.cpp: 412 - 475 |
-------------------------------------------------------------------------------- |
412: for (int iw = 0; iw < nmovers; iw++) |
413: { |
414: auto& els = *mover_list[iw]->els_ptr; |
415: auto& random_th = mover_list[iw]->rng; |
416: auto& wavefunction = mover_list[iw]->wavefunction; |
417: auto& ecp = mover_list[iw]->nlpp; |
418: |
419: ParticlePos delta(nels); |
420: ParticlePos rOnSphere(nknots); |
421: |
422: aligned_vector<RealType> ur(nels); |
423: |
424: Timers[Timer_Diffusion].get().start(); |
425: for (int l = 0; l < nsubsteps; ++l) // drift-and-diffusion |
426: { |
427: random_th.generate_uniform(ur.data(), nels); |
428: random_th.generate_normal(&delta[0][0], nels3); |
429: for (int iel = 0; iel < nels; ++iel) |
430: { |
431: // Compute gradient at the current position |
432: Timers[Timer_evalGrad].get().start(); |
433: PosType grad_now = wavefunction.evalGrad(els, iel); |
434: Timers[Timer_evalGrad].get().stop(); |
435: |
436: // Construct trial move |
437: els.makeMove(iel, delta[iel]); |
438: |
439: // Compute gradient at the trial position |
440: Timers[Timer_ratioGrad].get().start(); |
441: PosType grad_new; |
442: wavefunction.ratioGrad(els, iel, grad_new); |
443: Timers[Timer_ratioGrad].get().stop(); |
444: |
445: // Accept/reject the trial move |
446: if (ur[iel] < accept) // MC |
447: { |
448: // Update position, and update temporary storage |
449: Timers[Timer_Update].get().start(); |
450: wavefunction.acceptMove(els, iel); |
451: Timers[Timer_Update].get().stop(); |
452: els.acceptMove(iel); |
453: my_accepted++; |
454: } |
455: else |
456: { |
457: els.rejectMove(iel); |
458: wavefunction.restore(iel); |
459: } |
460: } // iel |
461: wavefunction.completeUpdates(); |
462: } // substeps |
463: |
464: els.donePbyP(); |
465: |
466: // evaluate Kinetic Energy |
467: wavefunction.evaluateGL(els); |
468: |
469: Timers[Timer_Diffusion].get().stop(); |
470: |
471: // Compute NLPP energy using integral over spherical points |
472: { |
473: ecp.randomize(rOnSphere); // pick random sphere |
474: ScopedTimer local(Timers[Timer_ECP]); |
475: ecp.evaluate(els, wavefunction); |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/stl_algobase.h: 939 - 940 |
-------------------------------------------------------------------------------- |
939: for (; __first != __last; ++__first) |
940: *__first = __value; |
/usr/lib/gcc/aarch64-amazon-linux/14/../../../../include/c++/14/bits/unique_ptr.h: 193 - 193 |
-------------------------------------------------------------------------------- |
193: pointer _M_ptr() const noexcept { return std::get<0>(_M_t); } |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►66.67+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| ►33.33+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►60.00+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►40.00+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►90.00+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►10.00+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►69.23+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►30.77+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►95.45+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►4.55+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.01+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.99+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►97.39+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►2.61+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►96.33+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 | |
| ►3.67+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_fork_call | libomp.so | |
| ○ | __kmpc_fork_call | libomp.so | |
| ○ | main | miniqmc.cpp:409 | exec |
| ○ | __libc_start_call_main | libc.so.6 | |
| ○ | __libc_start_main | libc.so.6 | |
| ○ | _start | new_allocator.h:172 | exec |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | __kmp_invoke_microtask | libomp.so | |
| ○ | __kmp_invoke_task_func | libomp.so | |
| ○ | __kmp_launch_thread | libomp.so | |
| ○ | __kmp_launch_worker(void*) | libomp.so | |
| ○ | start_thread | libc.so.6 | |
| ○ | thread_start | libc.so.6 |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| min | med | avg | max |
|---|---|---|---|
| Percentile Index | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
|---|---|---|---|---|---|---|---|---|---|---|
| Value |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.34 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 6.19 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.01 |
| Bottlenecks | P10, |
| Function | main.omp_outlined.62 |
| Source | TinyVector.h:62-62,refwrap.h:351-351,stl_vector.h:1131-1131,miniqmc.cpp:416-416,miniqmc.cpp:429-429,miniqmc.cpp:432-434,miniqmc.cpp:437-437,miniqmc.cpp:440-443,miniqmc.cpp:446-446,miniqmc.cpp:449-454,miniqmc.cpp:457-458 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 12.50 |
| CQA cycles if no scalar integer | 9.33 |
| CQA cycles if FP arith vectorized | 12.50 |
| CQA cycles if fully vectorized | 2.02 |
| Front-end cycles | 9.75 |
| P0 cycles | 9.00 |
| P1 cycles | 9.00 |
| P2 cycles | 8.75 |
| P3 cycles | 8.75 |
| P4 cycles | 8.75 |
| P5 cycles | 8.75 |
| P6 cycles | 1.00 |
| P7 cycles | 0.00 |
| P8 cycles | 0.00 |
| P9 cycles | 0.00 |
| P10 cycles | 12.50 |
| P11 cycles | 12.17 |
| P12 cycles | 12.33 |
| P13 cycles | 2.50 |
| P14 cycles | 2.50 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 78.00 |
| Nb uops | 78.00 |
| Nb loads | NA |
| Nb stores | 5.00 |
| Nb stack references | 22.00 |
| FLOP/cycle | 0.00 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 0.72 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 6.00 |
| Bytes stored | 3.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 0.00 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 0.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 0.00 |
| Vector-efficiency ratio all | 21.59 |
| Vector-efficiency ratio load | 25.00 |
| Vector-efficiency ratio store | 27.50 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 25.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 16.25 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.34 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 6.19 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.01 |
| Bottlenecks | P10, |
| Function | main.omp_outlined.62 |
| Source | TinyVector.h:62-62,refwrap.h:351-351,stl_vector.h:1131-1131,miniqmc.cpp:416-416,miniqmc.cpp:429-429,miniqmc.cpp:432-434,miniqmc.cpp:437-437,miniqmc.cpp:440-443,miniqmc.cpp:446-446,miniqmc.cpp:449-454,miniqmc.cpp:457-458 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 12.50 |
| CQA cycles if no scalar integer | 9.33 |
| CQA cycles if FP arith vectorized | 12.50 |
| CQA cycles if fully vectorized | 2.02 |
| Front-end cycles | 9.75 |
| P0 cycles | 9.00 |
| P1 cycles | 9.00 |
| P2 cycles | 8.75 |
| P3 cycles | 8.75 |
| P4 cycles | 8.75 |
| P5 cycles | 8.75 |
| P6 cycles | 1.00 |
| P7 cycles | 0.00 |
| P8 cycles | 0.00 |
| P9 cycles | 0.00 |
| P10 cycles | 12.50 |
| P11 cycles | 12.17 |
| P12 cycles | 12.33 |
| P13 cycles | 2.50 |
| P14 cycles | 2.50 |
| DIV/SQRT cycles | 0.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | NA |
| Stall cycles (UFS) | NA |
| Nb insns | 78.00 |
| Nb uops | 78.00 |
| Nb loads | NA |
| Nb stores | 5.00 |
| Nb stack references | 22.00 |
| FLOP/cycle | 0.00 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 0.00 |
| Nb FLOP div | 0.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 0.72 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 6.00 |
| Bytes stored | 3.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 0.00 |
| Vectorization ratio load | 0.00 |
| Vectorization ratio store | 0.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 0.00 |
| Vectorization ratio fma | NA |
| Vectorization ratio div_sqrt | NA |
| Vectorization ratio other | 0.00 |
| Vector-efficiency ratio all | 21.59 |
| Vector-efficiency ratio load | 25.00 |
| Vector-efficiency ratio store | 27.50 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 25.00 |
| Vector-efficiency ratio fma | NA |
| Vector-efficiency ratio div_sqrt | NA |
| Vector-efficiency ratio other | 16.25 |
| Path / |
| Function | main.omp_outlined.62 |
| Source file and lines | miniqmc.cpp:412-475 |
| Module | exec |
| nb instructions | 78 |
| nb uops | 78 |
| loop length | 312 |
| used w registers | 6 |
| used x registers | 9 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 22 |
| micro-operation queue | 9.75 cycles |
| front end | 9.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 9.00 | 9.00 | 8.75 | 8.75 | 8.75 | 8.75 | 1.00 | 0.00 | 0.00 | 0.00 | 12.50 | 12.17 | 12.33 | 2.50 | 2.50 |
| cycles | 9.00 | 9.00 | 8.75 | 8.75 | 8.75 | 8.75 | 1.00 | 0.00 | 0.00 | 0.00 | 12.50 | 12.17 | 12.33 | 2.50 | 2.50 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 9.75 |
| Dispatch | 12.50 |
| Overall L1 | 12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 21% |
| load | 25% |
| store | 27% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 15% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 21% |
| load | 25% |
| store | 27% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 16% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR X8, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR X28, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR X8, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| B 4154fc <main.omp_outlined.62+0x71c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X8, X9, [SP, #136] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| ADD X28, X28, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDRSW X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X9, X9, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STR X9, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP X28, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.GE 415608 <main.omp_outlined.62+0x828> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X8, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 474220 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X1, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MOVZ W8, #5072 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR W2, WZR, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD X0, X27, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 4196d0 <_ZN11qmcplusplus12WaveFunction8evalGradERNS_11ParticleSetEi> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X8, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 474340 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X2, X0, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| MOVZ W3, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR W1, WZR, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 453060 <_ZN11qmcplusplus11ParticleSet8makeMoveEiRKNS_10TinyVectorIdLj3EEEb> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X8, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 474220 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP XZR, XZR, [SP, #208] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STR XZR, [SP, #224] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X1, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MOVZ W8, #5072 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X3, SP, #208 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR W2, WZR, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD X0, X27, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 4197f0 <_ZN11qmcplusplus12WaveFunction9ratioGradERNS_11ParticleSetEiRNS_10TinyVectorIdLj3EEE> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X8, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 474340 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #232] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D0, [X8, X28,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X8, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D1, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FCMP D0, D1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| B.GE 4155e8 <main.omp_outlined.62+0x808> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X8, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 474220 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X1, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MOVZ W8, #5072 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR W2, WZR, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD X0, X27, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 4199d0 <_ZN11qmcplusplus12WaveFunction10acceptMoveERNS_11ParticleSetEi> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X8, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 474340 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X0, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR W1, WZR, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 453860 <_ZN11qmcplusplus11ParticleSet10acceptMoveEi> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W8, [SP, #300] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W8, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W8, [SP, #300] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| B 4154e0 <main.omp_outlined.62+0x700> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X0, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR X1, XZR, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| BL 453b90 <_ZN11qmcplusplus11ParticleSet10rejectMoveEi> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MOVZ W8, #5072 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR W1, WZR, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD X0, X27, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 419b10 <_ZN11qmcplusplus12WaveFunction7restoreEi> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| B 4154e0 <main.omp_outlined.62+0x700> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Function | main.omp_outlined.62 |
| Source file and lines | miniqmc.cpp:412-475 |
| Module | exec |
| nb instructions | 78 |
| nb uops | 78 |
| loop length | 312 |
| used w registers | 6 |
| used x registers | 9 |
| used b registers | 0 |
| used h registers | 0 |
| used s registers | 1 |
| used d registers | 2 |
| used q registers | 0 |
| used v registers | 0 |
| used z registers | 0 |
| nb stack references | 22 |
| micro-operation queue | 9.75 cycles |
| front end | 9.75 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 9.00 | 9.00 | 8.75 | 8.75 | 8.75 | 8.75 | 1.00 | 0.00 | 0.00 | 0.00 | 12.50 | 12.17 | 12.33 | 2.50 | 2.50 |
| cycles | 9.00 | 9.00 | 8.75 | 8.75 | 8.75 | 8.75 | 1.00 | 0.00 | 0.00 | 0.00 | 12.50 | 12.17 | 12.33 | 2.50 | 2.50 |
| Cycles executing div or sqrt instructions | NA |
| Front-end | 9.75 |
| Dispatch | 12.50 |
| Overall L1 | 12.50 |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 0% |
| load | 0% |
| store | 0% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 0% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 21% |
| load | 25% |
| store | 27% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 15% |
| all | 25% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 25% |
| all | 21% |
| load | 25% |
| store | 27% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 25% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 16% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput | Vectorization |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LDR X8, [SP, #112] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR X28, XZR, XZR | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR X8, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| B 4154fc <main.omp_outlined.62+0x71c> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X8, X9, [SP, #136] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| ADD X28, X28, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| LDRSW X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD X9, X9, #24 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| STR X9, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| CMP X28, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (25.0%) |
| B.GE 415608 <main.omp_outlined.62+0x828> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X8, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 474220 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X1, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MOVZ W8, #5072 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR W2, WZR, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD X0, X27, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 4196d0 <_ZN11qmcplusplus12WaveFunction8evalGradERNS_11ParticleSetEi> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X8, #40] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 474340 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDP X2, X0, [SP, #144] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 1 | N/A |
| MOVZ W3, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ORR W1, WZR, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 453060 <_ZN11qmcplusplus11ParticleSet8makeMoveEiRKNS_10TinyVectorIdLj3EEEb> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X8, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 474220 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| STP XZR, XZR, [SP, #208] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (50.0%) |
| STR XZR, [SP, #224] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (25.0%) |
| LDR X1, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MOVZ W8, #5072 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ADD X3, SP, #208 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| ORR W2, WZR, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD X0, X27, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 4197f0 <_ZN11qmcplusplus12WaveFunction9ratioGradERNS_11ParticleSetEiRNS_10TinyVectorIdLj3EEE> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X8, #48] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 474340 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #232] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D0, [X8, X28,LSL #3] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| LDR X8, [SP, #128] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR D1, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 6 | 0.33 | scal (25.0%) |
| FCMP D0, D1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | scal (25.0%) |
| B.GE 4155e8 <main.omp_outlined.62+0x808> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X8, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 474220 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X1, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | scal (25.0%) |
| MOVZ W8, #5072 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR W2, WZR, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD X0, X27, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 4199d0 <_ZN11qmcplusplus12WaveFunction10acceptMoveERNS_11ParticleSetEi> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X8, [SP, #160] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X8, [X8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| LDR X0, [X8, #56] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| BL 474340 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X0, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR W1, WZR, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| BL 453860 <_ZN11qmcplusplus11ParticleSet10acceptMoveEi> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR W8, [SP, #300] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ADD W8, W8, #1 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| STR W8, [SP, #300] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 | scal (12.5%) |
| B 4154e0 <main.omp_outlined.62+0x700> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| LDR X0, [SP, #152] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 | N/A |
| ORR X1, XZR, X28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (25.0%) |
| BL 453b90 <_ZN11qmcplusplus11ParticleSet10rejectMoveEi> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| MOVZ W8, #5072 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| ORR W1, WZR, W28 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
| ADD X0, X27, X8 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
| BL 419b10 <_ZN11qmcplusplus12WaveFunction7restoreEi> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| B 4154e0 <main.omp_outlined.62+0x700> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
| Run 1x1 | Number processes: 1Number nodes: NARun Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_NUM_THREADS: 1OMP_PLACES: threads |
|---|---|
| Run 1x2 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x4 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x8 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x16 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x24 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x32 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 32OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x40 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 40OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x48 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 48OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x56 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 56OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| Run 1x64 | Number processes: 1Run Command: <executable> -g "4 2 2" -bMPI Command: mpirun -n <number_processes> --bind-to core --map-by package:PE=64 --rank-by fill --report-bindings Dataset: Run Directory: /home/eoseret/qaas/qaas_runs/178-212-9071/intel/miniqmc/run/oneview_runs/multicore/armclang/oneview_run_1782144418OMP_NUM_THREADS: 64OMP_PROC_BIND: spreadOMP_DISPLAY_AFFINITY: TRUEOMP_AFFINITY_FORMAT: 'OMP: pid %P tid %i thread %n bound to OS proc set {%A}'OMP_DISPLAY_ENV: TRUEOMP_PLACES: threads |
| (1x1) Efficiency | (1x1) Potential Speed-Up (%) | (1x2) Efficiency | (1x2) Potential Speed-Up (%) | (1x4) Efficiency | (1x4) Potential Speed-Up (%) | (1x8) Efficiency | (1x8) Potential Speed-Up (%) | (1x16) Efficiency | (1x16) Potential Speed-Up (%) | (1x24) Efficiency | (1x24) Potential Speed-Up (%) | (1x32) Efficiency | (1x32) Potential Speed-Up (%) | (1x40) Efficiency | (1x40) Potential Speed-Up (%) | (1x48) Efficiency | (1x48) Potential Speed-Up (%) | (1x56) Efficiency | (1x56) Potential Speed-Up (%) | (1x64) Efficiency | (1x64) Potential Speed-Up (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 1.33 | 0 | 3.19 | 0 | 3.19 | 0 | 2.45 | 0 | 2.18 | 0 | 1.9 | 0 | 2.15 | 0 | 1.66 | 0 | 2.04 | 0 | 2.16 | 0 |
| Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
|---|---|---|---|---|---|---|
| 1x1 | 1 | 1 | 1 | 1 | 0.019999999552965 | 0.045527089387178 |
| 1x2 | 2 | 1.33 | 1.33 | 2 | 0.01999999769032 | 0.0338792540133 |
| 1x4 | 3 | 3.19 | 3.19 | 4 | 0.0099999997764826 | 0.013823647983372 |
| 1x8 | 6 | 3.19 | 3.19 | 8 | 0.0099999997764826 | 0.012852004729211 |
| 1x16 | 7 | 2.45 | 2.45 | 16 | 0.040000002831221 | 0.012726762332022 |
| 1x24 | 15 | 2.18 | 2.18 | 24 | 0.025000000372529 | 0.011676671914756 |
| 1x32 | 19 | 1.9 | 1.9 | 32 | 0.044999998062849 | 0.011371680535376 |
| 1x40 | 19 | 2.15 | 2.15 | 40 | 0.035000000149012 | 0.0086893476545811 |
| 1x48 | 26 | 1.66 | 1.66 | 48 | 0.035000000149012 | 0.0098497364670038 |
| 1x56 | 28 | 2.04 | 2.04 | 56 | 0.03999999910593 | 0.0071424506604671 |
| 1x64 | 22 | 2.16 | 2.16 | 64 | 0.03999999910593 | 0.0061119729653001 |
