I just thought I would post here because a search for ep32 fp performance leads here.
I updated the test code in the previous post to fix a few things. My code and results are at the end.
The main point is that, in the best case, integer adds and multiplies all take one cycle while 32 bit float adds and multiplies take two.
For multiply-accumulate I count the multiply and add as 2 operations and there is no integer multiply-accumulate instruction.
It takes 3 cycles to do an integer multiply and then add for some reason. I'd say the single precision fp performance is very good for a microcontroller. It's probably as fast to use floats as fixed point integer operations in many situations since the fixed point code will need extra shifts.
Definitely don't use doubles unless you absolutely have to.
Also note that the esp32 CPU cores do have hardware fp sqrt and divide instructions but they are not currently generated by the compiler and only included in more recent versions of the c maths library, performance isn't great.
Here's the assembly code just to prove that the loops are doing the correct instructions.
Code: Select all
400d09c8 <_Z13test_additionIiEvv>:
400d09c8: 004136 entry a1, 32
400d09cb: fd9641 l32r a4, 400d0024 <_stext+0x4>
400d09ce: 14b8 l32i.n a11, a4, 4
400d09d0: 04a8 l32i.n a10, a4, 0
400d09d2: fd9581 l32r a8, 400d0028 <_stext+0x8>
400d09d5: 0008e0 callx8 a8
400d09d8: 34b8 l32i.n a11, a4, 12
400d09da: 0a6d mov.n a6, a10
400d09dc: 24a8 l32i.n a10, a4, 8
400d09de: fd9281 l32r a8, 400d0028 <_stext+0x8>
400d09e1: 0008e0 callx8 a8
400d09e4: 54b8 l32i.n a11, a4, 20
400d09e6: 0a5d mov.n a5, a10
400d09e8: 44a8 l32i.n a10, a4, 16
400d09ea: fd8f81 l32r a8, 400d0028 <_stext+0x8>
400d09ed: 0008e0 callx8 a8
400d09f0: 0a3d mov.n a3, a10
400d09f2: 74b8 l32i.n a11, a4, 28
400d09f4: 64a8 l32i.n a10, a4, 24
400d09f6: fd8c81 l32r a8, 400d0028 <_stext+0x8>
400d09f9: 0008e0 callx8 a8
400d09fc: fd8981 l32r a8, 400d0020 <_stext>
400d09ff: 0a2d mov.n a2, a10
400d0a01: 1f8876 loop a8, 400d0a24 <_Z13test_additionIiEvv+0x5c>
400d0a04: 662a add.n a6, a6, a2
400d0a06: 553a add.n a5, a5, a3
400d0a08: 353a add.n a3, a5, a3
400d0a0a: 262a add.n a2, a6, a2
400d0a0c: 662a add.n a6, a6, a2
400d0a0e: 553a add.n a5, a5, a3
400d0a10: 335a add.n a3, a3, a5
400d0a12: 226a add.n a2, a2, a6
400d0a14: 662a add.n a6, a6, a2
400d0a16: 553a add.n a5, a5, a3
400d0a18: 335a add.n a3, a3, a5
400d0a1a: 226a add.n a2, a2, a6
400d0a1c: 662a add.n a6, a6, a2
400d0a1e: 553a add.n a5, a5, a3
400d0a20: 335a add.n a3, a3, a5
400d0a22: 226a add.n a2, a2, a6
400d0a24: 06ad mov.n a10, a6
400d0a26: fd8181 l32r a8, 400d002c <_stext+0xc>
400d0a29: 0008e0 callx8 a8
400d0a2c: 04a9 s32i.n a10, a4, 0
400d0a2e: 14b9 s32i.n a11, a4, 4
400d0a30: 05ad mov.n a10, a5
400d0a32: fd7e81 l32r a8, 400d002c <_stext+0xc>
400d0a35: 0008e0 callx8 a8
400d0a38: 24a9 s32i.n a10, a4, 8
400d0a3a: 34b9 s32i.n a11, a4, 12
400d0a3c: 03ad mov.n a10, a3
400d0a3e: fd7b81 l32r a8, 400d002c <_stext+0xc>
400d0a41: 0008e0 callx8 a8
400d0a44: 44a9 s32i.n a10, a4, 16
400d0a46: 54b9 s32i.n a11, a4, 20
400d0a48: 02ad mov.n a10, a2
400d0a4a: fd7881 l32r a8, 400d002c <_stext+0xc>
400d0a4d: 0008e0 callx8 a8
400d0a50: 64a9 s32i.n a10, a4, 24
400d0a52: 74b9 s32i.n a11, a4, 28
400d0a54: f01d retw.n
...
400d0a58 <_Z19test_multiplicationIiEvv>:
400d0a58: 004136 entry a1, 32
400d0a5b: fd7241 l32r a4, 400d0024 <_stext+0x4>
400d0a5e: 14b8 l32i.n a11, a4, 4
400d0a60: 04a8 l32i.n a10, a4, 0
400d0a62: fd7181 l32r a8, 400d0028 <_stext+0x8>
400d0a65: 0008e0 callx8 a8
400d0a68: 34b8 l32i.n a11, a4, 12
400d0a6a: 0a6d mov.n a6, a10
400d0a6c: 24a8 l32i.n a10, a4, 8
400d0a6e: fd6e81 l32r a8, 400d0028 <_stext+0x8>
400d0a71: 0008e0 callx8 a8
400d0a74: 54b8 l32i.n a11, a4, 20
400d0a76: 0a5d mov.n a5, a10
400d0a78: 44a8 l32i.n a10, a4, 16
400d0a7a: fd6b81 l32r a8, 400d0028 <_stext+0x8>
400d0a7d: 0008e0 callx8 a8
400d0a80: 0a3d mov.n a3, a10
400d0a82: 74b8 l32i.n a11, a4, 28
400d0a84: 64a8 l32i.n a10, a4, 24
400d0a86: fd6881 l32r a8, 400d0028 <_stext+0x8>
400d0a89: 0008e0 callx8 a8
400d0a8c: fd6581 l32r a8, 400d0020 <_stext>
400d0a8f: 0a2d mov.n a2, a10
400d0a91: 2f8876 loop a8, 400d0ac4 <_Z19test_multiplicationIiEvv+0x6c>
400d0a94: 826620 mull a6, a6, a2
400d0a97: 825530 mull a5, a5, a3
400d0a9a: 822620 mull a2, a6, a2
400d0a9d: 823530 mull a3, a5, a3
400d0aa0: 826620 mull a6, a6, a2
400d0aa3: 825530 mull a5, a5, a3
400d0aa6: 822260 mull a2, a2, a6
400d0aa9: 823350 mull a3, a3, a5
400d0aac: 826620 mull a6, a6, a2
400d0aaf: 825530 mull a5, a5, a3
400d0ab2: 822260 mull a2, a2, a6
400d0ab5: 823350 mull a3, a3, a5
400d0ab8: 826620 mull a6, a6, a2
400d0abb: 825530 mull a5, a5, a3
400d0abe: 822260 mull a2, a2, a6
400d0ac1: 823350 mull a3, a3, a5
400d0ac4: 06ad mov.n a10, a6
400d0ac6: fd5981 l32r a8, 400d002c <_stext+0xc>
400d0ac9: 0008e0 callx8 a8
400d0acc: 04a9 s32i.n a10, a4, 0
400d0ace: 14b9 s32i.n a11, a4, 4
400d0ad0: 05ad mov.n a10, a5
400d0ad2: fd5681 l32r a8, 400d002c <_stext+0xc>
400d0ad5: 0008e0 callx8 a8
400d0ad8: 24a9 s32i.n a10, a4, 8
400d0ada: 34b9 s32i.n a11, a4, 12
400d0adc: 03ad mov.n a10, a3
400d0ade: fd5381 l32r a8, 400d002c <_stext+0xc>
400d0ae1: 0008e0 callx8 a8
400d0ae4: 44a9 s32i.n a10, a4, 16
400d0ae6: 54b9 s32i.n a11, a4, 20
400d0ae8: 02ad mov.n a10, a2
400d0aea: fd5081 l32r a8, 400d002c <_stext+0xc>
400d0aed: 0008e0 callx8 a8
400d0af0: 64a9 s32i.n a10, a4, 24
400d0af2: 74b9 s32i.n a11, a4, 28
400d0af4: f01d retw.n
...
400d0af8 <_Z24test_multiply_accumulateIiEvv>:
400d0af8: 004136 entry a1, 32
400d0afb: fd4a21 l32r a2, 400d0024 <_stext+0x4>
400d0afe: 12b8 l32i.n a11, a2, 4
400d0b00: 02a8 l32i.n a10, a2, 0
400d0b02: fd4981 l32r a8, 400d0028 <_stext+0x8>
400d0b05: 0008e0 callx8 a8
400d0b08: 32b8 l32i.n a11, a2, 12
400d0b0a: 0a6d mov.n a6, a10
400d0b0c: 22a8 l32i.n a10, a2, 8
400d0b0e: fd4681 l32r a8, 400d0028 <_stext+0x8>
400d0b11: 0008e0 callx8 a8
400d0b14: 52b8 l32i.n a11, a2, 20
400d0b16: 0a5d mov.n a5, a10
400d0b18: 42a8 l32i.n a10, a2, 16
400d0b1a: fd4381 l32r a8, 400d0028 <_stext+0x8>
400d0b1d: 0008e0 callx8 a8
400d0b20: 0a4d mov.n a4, a10
400d0b22: 72b8 l32i.n a11, a2, 28
400d0b24: 62a8 l32i.n a10, a2, 24
400d0b26: fd4081 l32r a8, 400d0028 <_stext+0x8>
400d0b29: 0008e0 callx8 a8
400d0b2c: fd4181 l32r a8, 400d0030 <_stext+0x10>
400d0b2f: 0a3d mov.n a3, a10
400d0b31: 4f8876 loop a8, 400d0b84 <_Z24test_multiply_accumulateIiEvv+0x8c>
400d0b34: 82c330 mull a12, a3, a3
400d0b37: cc6a add.n a12, a12, a6
400d0b39: 82bc40 mull a11, a12, a4
400d0b3c: bb5a add.n a11, a11, a5
400d0b3e: 82abb0 mull a10, a11, a11
400d0b41: aa4a add.n a10, a10, a4
400d0b43: 829ca0 mull a9, a12, a10
400d0b46: 993a add.n a9, a9, a3
400d0b48: 826990 mull a6, a9, a9
400d0b4b: c6ca add.n a12, a6, a12
400d0b4d: 825ac0 mull a5, a10, a12
400d0b50: b5ba add.n a11, a5, a11
400d0b52: 824bb0 mull a4, a11, a11
400d0b55: a4aa add.n a10, a4, a10
400d0b57: 823ca0 mull a3, a12, a10
400d0b5a: 939a add.n a9, a3, a9
400d0b5c: 826990 mull a6, a9, a9
400d0b5f: 66ca add.n a6, a6, a12
400d0b61: 825a60 mull a5, a10, a6
400d0b64: 55ba add.n a5, a5, a11
400d0b66: 824550 mull a4, a5, a5
400d0b69: 44aa add.n a4, a4, a10
400d0b6b: 823640 mull a3, a6, a4
400d0b6e: 339a add.n a3, a3, a9
400d0b70: 829330 mull a9, a3, a3
400d0b73: 696a add.n a6, a9, a6
400d0b75: 829460 mull a9, a4, a6
400d0b78: 595a add.n a5, a9, a5
400d0b7a: 829550 mull a9, a5, a5
400d0b7d: 494a add.n a4, a9, a4
400d0b7f: 829640 mull a9, a6, a4
400d0b82: 393a add.n a3, a9, a3
400d0b84: 06ad mov.n a10, a6
400d0b86: fd2981 l32r a8, 400d002c <_stext+0xc>
400d0b89: 0008e0 callx8 a8
400d0b8c: 02a9 s32i.n a10, a2, 0
400d0b8e: 12b9 s32i.n a11, a2, 4
400d0b90: 05ad mov.n a10, a5
400d0b92: fd2681 l32r a8, 400d002c <_stext+0xc>
400d0b95: 0008e0 callx8 a8
400d0b98: 22a9 s32i.n a10, a2, 8
400d0b9a: 32b9 s32i.n a11, a2, 12
400d0b9c: 04ad mov.n a10, a4
400d0b9e: fd2381 l32r a8, 400d002c <_stext+0xc>
400d0ba1: 0008e0 callx8 a8
400d0ba4: 42a9 s32i.n a10, a2, 16
400d0ba6: 52b9 s32i.n a11, a2, 20
400d0ba8: 03ad mov.n a10, a3
400d0baa: fd2081 l32r a8, 400d002c <_stext+0xc>
400d0bad: 0008e0 callx8 a8
400d0bb0: 62a9 s32i.n a10, a2, 24
400d0bb2: 72b9 s32i.n a11, a2, 28
400d0bb4: f01d retw.n
...
400d0bb8 <_Z13test_additionIfEvv>:
400d0bb8: 006136 entry a1, 48
400d0bbb: fd1a21 l32r a2, 400d0024 <_stext+0x4>
400d0bbe: 12b8 l32i.n a11, a2, 4
400d0bc0: 02a8 l32i.n a10, a2, 0
400d0bc2: fd1c81 l32r a8, 400d0034 <_stext+0x14>
400d0bc5: 0008e0 callx8 a8
400d0bc8: fa3a50 wfr f3, a10
400d0bcb: 32b8 l32i.n a11, a2, 12
400d0bcd: 22a8 l32i.n a10, a2, 8
400d0bcf: 004133 ssi f3, a1, 0
400d0bd2: fd1881 l32r a8, 400d0034 <_stext+0x14>
400d0bd5: 0008e0 callx8 a8
400d0bd8: fa2a50 wfr f2, a10
400d0bdb: 52b8 l32i.n a11, a2, 20
400d0bdd: 42a8 l32i.n a10, a2, 16
400d0bdf: 014123 ssi f2, a1, 4
400d0be2: fd1481 l32r a8, 400d0034 <_stext+0x14>
400d0be5: 0008e0 callx8 a8
400d0be8: fa1a50 wfr f1, a10
400d0beb: 72b8 l32i.n a11, a2, 28
400d0bed: 62a8 l32i.n a10, a2, 24
400d0bef: 024113 ssi f1, a1, 8
400d0bf2: fd1081 l32r a8, 400d0034 <_stext+0x14>
400d0bf5: 0008e0 callx8 a8
400d0bf8: fd0a81 l32r a8, 400d0020 <_stext>
400d0bfb: 000133 lsi f3, a1, 0
400d0bfe: 010123 lsi f2, a1, 4
400d0c01: 020113 lsi f1, a1, 8
400d0c04: fa0a50 wfr f0, a10
400d0c07: f03d nop.n
400d0c09: 2f8876 loop a8, 400d0c3c <_Z13test_additionIfEvv+0x84>
400d0c0c: 0a3300 add.s f3, f3, f0
400d0c0f: 0a2210 add.s f2, f2, f1
400d0c12: 0a0300 add.s f0, f3, f0
400d0c15: 0a1210 add.s f1, f2, f1
400d0c18: 0a3300 add.s f3, f3, f0
400d0c1b: 0a2210 add.s f2, f2, f1
400d0c1e: 0a0030 add.s f0, f0, f3
400d0c21: 0a1120 add.s f1, f1, f2
400d0c24: 0a3300 add.s f3, f3, f0
400d0c27: 0a2210 add.s f2, f2, f1
400d0c2a: 0a0030 add.s f0, f0, f3
400d0c2d: 0a1120 add.s f1, f1, f2
400d0c30: 0a3300 add.s f3, f3, f0
400d0c33: 0a2210 add.s f2, f2, f1
400d0c36: 0a0030 add.s f0, f0, f3
400d0c39: 0a1120 add.s f1, f1, f2
400d0c3c: faa340 rfr a10, f3
400d0c3f: 004103 ssi f0, a1, 0
400d0c42: 024113 ssi f1, a1, 8
400d0c45: 014123 ssi f2, a1, 4
400d0c48: fcfc81 l32r a8, 400d0038 <_stext+0x18>
400d0c4b: 0008e0 callx8 a8
400d0c4e: 010123 lsi f2, a1, 4
400d0c51: 0062a2 s32i a10, a2, 0
400d0c54: 0162b2 s32i a11, a2, 4
400d0c57: faa240 rfr a10, f2
400d0c5a: fcf781 l32r a8, 400d0038 <_stext+0x18>
400d0c5d: 0008e0 callx8 a8
400d0c60: 020113 lsi f1, a1, 8
400d0c63: 22a9 s32i.n a10, a2, 8
400d0c65: 32b9 s32i.n a11, a2, 12
400d0c67: faa140 rfr a10, f1
400d0c6a: fcf381 l32r a8, 400d0038 <_stext+0x18>
400d0c6d: 0008e0 callx8 a8
400d0c70: 000103 lsi f0, a1, 0
400d0c73: 42a9 s32i.n a10, a2, 16
400d0c75: 52b9 s32i.n a11, a2, 20
400d0c77: faa040 rfr a10, f0
400d0c7a: fcef81 l32r a8, 400d0038 <_stext+0x18>
400d0c7d: 0008e0 callx8 a8
400d0c80: 62a9 s32i.n a10, a2, 24
400d0c82: 72b9 s32i.n a11, a2, 28
400d0c84: f01d retw.n
...
400d0c88 <_Z19test_multiplicationIfEvv>:
400d0c88: 006136 entry a1, 48
400d0c8b: fce621 l32r a2, 400d0024 <_stext+0x4>
400d0c8e: 12b8 l32i.n a11, a2, 4
400d0c90: 02a8 l32i.n a10, a2, 0
400d0c92: fce881 l32r a8, 400d0034 <_stext+0x14>
400d0c95: 0008e0 callx8 a8
400d0c98: fa3a50 wfr f3, a10
400d0c9b: 32b8 l32i.n a11, a2, 12
400d0c9d: 22a8 l32i.n a10, a2, 8
400d0c9f: 004133 ssi f3, a1, 0
400d0ca2: fce481 l32r a8, 400d0034 <_stext+0x14>
400d0ca5: 0008e0 callx8 a8
400d0ca8: fa2a50 wfr f2, a10
400d0cab: 52b8 l32i.n a11, a2, 20
400d0cad: 42a8 l32i.n a10, a2, 16
400d0caf: 014123 ssi f2, a1, 4
400d0cb2: fce081 l32r a8, 400d0034 <_stext+0x14>
400d0cb5: 0008e0 callx8 a8
400d0cb8: fa1a50 wfr f1, a10
400d0cbb: 72b8 l32i.n a11, a2, 28
400d0cbd: 62a8 l32i.n a10, a2, 24
400d0cbf: 024113 ssi f1, a1, 8
400d0cc2: fcdc81 l32r a8, 400d0034 <_stext+0x14>
400d0cc5: 0008e0 callx8 a8
400d0cc8: fcd681 l32r a8, 400d0020 <_stext>
400d0ccb: 000133 lsi f3, a1, 0
400d0cce: 010123 lsi f2, a1, 4
400d0cd1: 020113 lsi f1, a1, 8
400d0cd4: fa0a50 wfr f0, a10
400d0cd7: f03d nop.n
400d0cd9: 2f8876 loop a8, 400d0d0c <_Z19test_multiplicationIfEvv+0x84>
400d0cdc: 2a3300 mul.s f3, f3, f0
400d0cdf: 2a2210 mul.s f2, f2, f1
400d0ce2: 2a0300 mul.s f0, f3, f0
400d0ce5: 2a1210 mul.s f1, f2, f1
400d0ce8: 2a3300 mul.s f3, f3, f0
400d0ceb: 2a2210 mul.s f2, f2, f1
400d0cee: 2a0030 mul.s f0, f0, f3
400d0cf1: 2a1120 mul.s f1, f1, f2
400d0cf4: 2a3300 mul.s f3, f3, f0
400d0cf7: 2a2210 mul.s f2, f2, f1
400d0cfa: 2a0030 mul.s f0, f0, f3
400d0cfd: 2a1120 mul.s f1, f1, f2
400d0d00: 2a3300 mul.s f3, f3, f0
400d0d03: 2a2210 mul.s f2, f2, f1
400d0d06: 2a0030 mul.s f0, f0, f3
400d0d09: 2a1120 mul.s f1, f1, f2
400d0d0c: faa340 rfr a10, f3
400d0d0f: 004103 ssi f0, a1, 0
400d0d12: 024113 ssi f1, a1, 8
400d0d15: 014123 ssi f2, a1, 4
400d0d18: fcc881 l32r a8, 400d0038 <_stext+0x18>
400d0d1b: 0008e0 callx8 a8
400d0d1e: 010123 lsi f2, a1, 4
400d0d21: 0062a2 s32i a10, a2, 0
400d0d24: 0162b2 s32i a11, a2, 4
400d0d27: faa240 rfr a10, f2
400d0d2a: fcc381 l32r a8, 400d0038 <_stext+0x18>
400d0d2d: 0008e0 callx8 a8
400d0d30: 020113 lsi f1, a1, 8
400d0d33: 22a9 s32i.n a10, a2, 8
400d0d35: 32b9 s32i.n a11, a2, 12
400d0d37: faa140 rfr a10, f1
400d0d3a: fcbf81 l32r a8, 400d0038 <_stext+0x18>
400d0d3d: 0008e0 callx8 a8
400d0d40: 000103 lsi f0, a1, 0
400d0d43: 42a9 s32i.n a10, a2, 16
400d0d45: 52b9 s32i.n a11, a2, 20
400d0d47: faa040 rfr a10, f0
400d0d4a: fcbb81 l32r a8, 400d0038 <_stext+0x18>
400d0d4d: 0008e0 callx8 a8
400d0d50: 62a9 s32i.n a10, a2, 24
400d0d52: 72b9 s32i.n a11, a2, 28
400d0d54: f01d retw.n
...
400d0d58 <_Z24test_multiply_accumulateIfEvv>:
400d0d58: 006136 entry a1, 48
400d0d5b: fcb221 l32r a2, 400d0024 <_stext+0x4>
400d0d5e: 12b8 l32i.n a11, a2, 4
400d0d60: 02a8 l32i.n a10, a2, 0
400d0d62: fcb481 l32r a8, 400d0034 <_stext+0x14>
400d0d65: 0008e0 callx8 a8
400d0d68: fa3a50 wfr f3, a10
400d0d6b: 32b8 l32i.n a11, a2, 12
400d0d6d: 22a8 l32i.n a10, a2, 8
400d0d6f: 004133 ssi f3, a1, 0
400d0d72: fcb081 l32r a8, 400d0034 <_stext+0x14>
400d0d75: 0008e0 callx8 a8
400d0d78: fa2a50 wfr f2, a10
400d0d7b: 52b8 l32i.n a11, a2, 20
400d0d7d: 42a8 l32i.n a10, a2, 16
400d0d7f: 014123 ssi f2, a1, 4
400d0d82: fcac81 l32r a8, 400d0034 <_stext+0x14>
400d0d85: 0008e0 callx8 a8
400d0d88: fa1a50 wfr f1, a10
400d0d8b: 72b8 l32i.n a11, a2, 28
400d0d8d: 62a8 l32i.n a10, a2, 24
400d0d8f: 024113 ssi f1, a1, 8
400d0d92: fca881 l32r a8, 400d0034 <_stext+0x14>
400d0d95: 0008e0 callx8 a8
400d0d98: fca681 l32r a8, 400d0030 <_stext+0x10>
400d0d9b: 000133 lsi f3, a1, 0
400d0d9e: 010123 lsi f2, a1, 4
400d0da1: 020113 lsi f1, a1, 8
400d0da4: fa0a50 wfr f0, a10
400d0da7: f03d nop.n
400d0da9: 2f8876 loop a8, 400d0ddc <_Z24test_multiply_accumulateIfEvv+0x84>
400d0dac: 4a3000 madd.s f3, f0, f0
400d0daf: 4a2310 madd.s f2, f3, f1
400d0db2: 4a1220 madd.s f1, f2, f2
400d0db5: 4a0310 madd.s f0, f3, f1
400d0db8: 4a3000 madd.s f3, f0, f0
400d0dbb: 4a2130 madd.s f2, f1, f3
400d0dbe: 4a1220 madd.s f1, f2, f2
400d0dc1: 4a0310 madd.s f0, f3, f1
400d0dc4: 4a3000 madd.s f3, f0, f0
400d0dc7: 4a2130 madd.s f2, f1, f3
400d0dca: 4a1220 madd.s f1, f2, f2
400d0dcd: 4a0310 madd.s f0, f3, f1
400d0dd0: 4a3000 madd.s f3, f0, f0
400d0dd3: 4a2130 madd.s f2, f1, f3
400d0dd6: 4a1220 madd.s f1, f2, f2
400d0dd9: 4a0310 madd.s f0, f3, f1
400d0ddc: faa340 rfr a10, f3
400d0ddf: 004103 ssi f0, a1, 0
400d0de2: 024113 ssi f1, a1, 8
400d0de5: 014123 ssi f2, a1, 4
400d0de8: fc9481 l32r a8, 400d0038 <_stext+0x18>
400d0deb: 0008e0 callx8 a8
400d0dee: 010123 lsi f2, a1, 4
400d0df1: 0062a2 s32i a10, a2, 0
400d0df4: 0162b2 s32i a11, a2, 4
400d0df7: faa240 rfr a10, f2
400d0dfa: fc8f81 l32r a8, 400d0038 <_stext+0x18>
400d0dfd: 0008e0 callx8 a8
400d0e00: 020113 lsi f1, a1, 8
400d0e03: 22a9 s32i.n a10, a2, 8
400d0e05: 32b9 s32i.n a11, a2, 12
400d0e07: faa140 rfr a10, f1
400d0e0a: fc8b81 l32r a8, 400d0038 <_stext+0x18>
400d0e0d: 0008e0 callx8 a8
400d0e10: 000103 lsi f0, a1, 0
400d0e13: 42a9 s32i.n a10, a2, 16
400d0e15: 52b9 s32i.n a11, a2, 20
400d0e17: faa040 rfr a10, f0
400d0e1a: fc8781 l32r a8, 400d0038 <_stext+0x18>
400d0e1d: 0008e0 callx8 a8
400d0e20: 62a9 s32i.n a10, a2, 24
400d0e22: 72b9 s32i.n a11, a2, 28
400d0e24: f01d retw.n
...