summaryrefslogtreecommitdiffstats
path: root/tinyusb/lib/SEGGER_RTT/RTT/SEGGER_RTT_ASM_ARMv7M.S
blob: 78cde4d7585bd885b730d35c5d4ab9a8327f47da (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
/*********************************************************************
*                   (c) SEGGER Microcontroller GmbH                  *
*                        The Embedded Experts                        *
*                           www.segger.com                           *
**********************************************************************

-------------------------- END-OF-HEADER -----------------------------

File    : SEGGER_RTT_ASM_ARMv7M.S
Purpose : Assembler implementation of RTT functions for ARMv7M

Additional information:
  This module is written to be assembler-independent and works with
  GCC and clang (Embedded Studio) and IAR.
*/

#define SEGGER_RTT_ASM      // Used to control processed input from header file
#include "SEGGER_RTT.h"

/*********************************************************************
*
*       Defines, fixed
*
**********************************************************************
*/
#define _CCIAR   0
#define _CCCLANG 1

#if (defined __SES_ARM) || (defined __GNUC__) || (defined __clang__)
  #define _CC_TYPE             _CCCLANG
  #define _PUB_SYM             .global
  #define _EXT_SYM             .extern
  #define _END                 .end
  #define _WEAK                .weak
  #define _THUMB_FUNC          .thumb_func
  #define _THUMB_CODE          .code 16
  #define _WORD                .word
  #define _SECTION(Sect, Type, AlignExp) .section Sect ##, "ax"
  #define _ALIGN(Exp)          .align Exp 
  #define _PLACE_LITS          .ltorg
  #define _DATA_SECT_START 
  #define _C_STARTUP           _start
  #define _STACK_END           __stack_end__
  #define _RAMFUNC
  //
  // .text     => Link to flash
  // .fast     => Link to RAM
  // OtherSect => Usually link to RAM
  // Alignment is 2^x
  //
#elif defined (__IASMARM__)
  #define _CC_TYPE             _CCIAR
  #define _PUB_SYM             PUBLIC
  #define _EXT_SYM             EXTERN
  #define _END                 END
  #define _WEAK                _WEAK
  #define _THUMB_FUNC
  #define _THUMB_CODE          THUMB
  #define _WORD                DCD
  #define _SECTION(Sect, Type, AlignExp) SECTION Sect ## : ## Type ## :REORDER:NOROOT ## (AlignExp)
  #define _ALIGN(Exp)          alignrom Exp 
  #define _PLACE_LITS
  #define _DATA_SECT_START     DATA
  #define _C_STARTUP           __iar_program_start
  #define _STACK_END           sfe(CSTACK)
  #define _RAMFUNC             SECTION_TYPE SHT_PROGBITS, SHF_WRITE | SHF_EXECINSTR
  //
  // .text     => Link to flash
  // .textrw   => Link to RAM
  // OtherSect => Usually link to RAM
  // NOROOT    => Allows linker to throw away the function, if not referenced
  // Alignment is 2^x
  //
#endif

#if (_CC_TYPE == _CCIAR)
        NAME SEGGER_RTT_ASM_ARMv7M
#else
        .syntax unified
#endif

#if defined (RTT_USE_ASM) && (RTT_USE_ASM == 1)
        #define SHT_PROGBITS 0x1

/*********************************************************************
*
*       Public / external symbols
*
**********************************************************************
*/

        _EXT_SYM __aeabi_memcpy
        _EXT_SYM __aeabi_memcpy4
        _EXT_SYM _SEGGER_RTT

        _PUB_SYM SEGGER_RTT_ASM_WriteSkipNoLock

/*********************************************************************
*
*       SEGGER_RTT_WriteSkipNoLock
*
*  Function description
*    Stores a specified number of characters in SEGGER RTT
*    control block which is then read by the host.
*    SEGGER_RTT_WriteSkipNoLock does not lock the application and
*    skips all data, if the data does not fit into the buffer.
*
*  Parameters
*    BufferIndex  Index of "Up"-buffer to be used (e.g. 0 for "Terminal").
*    pBuffer      Pointer to character array. Does not need to point to a \0 terminated string.
*    NumBytes     Number of bytes to be stored in the SEGGER RTT control block.
*                 MUST be > 0!!!
*                 This is done for performance reasons, so no initial check has do be done.
*
*  Return value
*    1: Data has been copied
*    0: No space, data has not been copied
*
*  Notes
*    (1) If there is not enough space in the "Up"-buffer, all data is dropped.
*    (2) For performance reasons this function does not call Init()
*        and may only be called after RTT has been initialized.
*        Either by calling SEGGER_RTT_Init() or calling another RTT API function first.
*/
        _SECTION(.text, CODE, 2)
        _ALIGN(2)
        _THUMB_FUNC
SEGGER_RTT_ASM_WriteSkipNoLock:   // unsigned SEGGER_RTT_WriteSkipNoLock(unsigned BufferIndex, const void* pData, unsigned NumBytes) {
        //
        // Cases:
        //   1) RdOff <= WrOff => Space until wrap-around is sufficient
        //   2) RdOff <= WrOff => Space after wrap-around needed (copy in 2 chunks)
        //   3) RdOff <  WrOff => No space in buf
        //   4) RdOff >  WrOff => Space is sufficient
        //   5) RdOff >  WrOff => No space in buf
        //
        // 1) is the most common case for large buffers and assuming that J-Link reads the data fast enough
        //
        // Register usage:
        //   R0 Temporary needed as RdOff, <Tmp> register later on
        //   R1 pData
        //   R2 <NumBytes>
        //   R3 <Tmp> register. Hold free for subroutine calls
        //   R4 <Rem>
        //   R5 pRing->pBuffer
        //   R6 pRing (Points to active struct SEGGER_RTT_BUFFER_DOWN)
        //   R7 WrOff
        //
        PUSH     {R4-R7}
        ADD      R3,R0,R0, LSL #+1
        LDR.W    R0,=_SEGGER_RTT                 // pRing = &_SEGGER_RTT.aUp[BufferIndex];
        ADD      R0,R0,R3, LSL #+3
        ADD      R6,R0,#+24
        LDR      R0,[R6, #+16]                   // RdOff = pRing->RdOff;
        LDR      R7,[R6, #+12]                   // WrOff = pRing->WrOff;
        LDR      R5,[R6, #+4]                    // pRing->pBuffer
        CMP      R7,R0
        BCC.N    _CheckCase4                     // if (RdOff <= WrOff) {                           => Case 1), 2) or 3)
        //
        // Handling for case 1, later on identical to case 4
        //
        LDR      R3,[R6, #+8]                    //  Avail = pRing->SizeOfBuffer - WrOff - 1u;      => Space until wrap-around (assume 1 byte not usable for case that RdOff == 0)
        SUBS     R4,R3,R7                        // <Rem> (Used in case we jump into case 2 afterwards)
        SUBS     R3,R4,#+1                       // <Avail>
        CMP      R3,R2
        BCC.N    _CheckCase2                     // if (Avail >= NumBytes) {  => Case 1)?
_Case4:
        ADDS     R5,R7,R5                        // pBuffer += WrOff
        ADDS     R0,R2,R7                        // v = WrOff + NumBytes
        //
        // 2x unrolling for the copy loop that is used most of the time
        // This is a special optimization for small SystemView packets and makes them even faster
        //
        _ALIGN(2)
_LoopCopyStraight:                               // memcpy(pRing->pBuffer + WrOff, pData, NumBytes);
        LDRB     R3,[R1], #+1
        STRB     R3,[R5], #+1                    // *pDest++ = *pSrc++
        SUBS     R2,R2,#+1
        BEQ      _CSDone
        LDRB     R3,[R1], #+1
        STRB     R3,[R5], #+1                    // *pDest++ = *pSrc++
        SUBS     R2,R2,#+1
        BNE      _LoopCopyStraight
_CSDone:
        STR      R0,[R6, #+12]                   // pRing->WrOff = WrOff + NumBytes;
        MOVS     R0,#+1
        POP      {R4-R7}
        BX       LR                              // Return 1
_CheckCase2:
        ADDS     R0,R0,R3                        // Avail += RdOff; => Space incl. wrap-around
        CMP      R0,R2
        BCC.N    _Case3                          // if (Avail >= NumBytes) {           => Case 2? => If not, we have case 3) (does not fit)
        //
        // Handling for case 2
        //
        ADDS     R0,R7,R5                        // v = pRing->pBuffer + WrOff => Do not change pRing->pBuffer here because 2nd chunk needs org. value
        SUBS     R2,R2,R4                        // NumBytes -= Rem;  (Rem = pRing->SizeOfBuffer - WrOff; => Space until end of buffer)
_LoopCopyBeforeWrapAround:                       // memcpy(pRing->pBuffer + WrOff, pData, Rem); => Copy 1st chunk
        LDRB     R3,[R1], #+1
        STRB     R3,[R0], #+1                    // *pDest++ = *pSrc++
        SUBS     R4,R4,#+1
        BNE      _LoopCopyBeforeWrapAround
        //
        // Special case: First check that assumed RdOff == 0 calculated that last element before wrap-around could not be used
        // But 2nd check (considering space until wrap-around and until RdOff) revealed that RdOff is not 0, so we can use the last element
        // In this case, we may use a copy straight until buffer end anyway without needing to copy 2 chunks
        // Therefore, check if 2nd memcpy is necessary at all
        //
        ADDS     R4,R2,#+0                       // Save <NumBytes> (needed as counter in loop but must be written to <WrOff> after the loop). Also use this inst to update the flags to skip 2nd loop if possible
        BEQ.N    _No2ChunkNeeded                 // if (NumBytes) {
_LoopCopyAfterWrapAround:                        // memcpy(pRing->pBuffer, pData + Rem, NumBytes);
        LDRB     R3,[R1], #+1                    // pData already points to the next src byte due to copy loop increment before this loop
        STRB     R3,[R5], #+1                    // *pDest++ = *pSrc++
        SUBS     R2,R2,#+1
        BNE      _LoopCopyAfterWrapAround
_No2ChunkNeeded:
        STR      R4,[R6, #+12]                   // pRing->WrOff = NumBytes; => Must be written after copying data because J-Link may read control block asynchronously while writing into buffer
        MOVS     R0,#+1
        POP      {R4-R7}
        BX       LR                              // Return 1
_CheckCase4:
        SUBS     R0,R0,R7
        SUBS     R0,R0,#+1                       // Avail = RdOff - WrOff - 1u;
        CMP      R0,R2
        BCS.N    _Case4                          // if (Avail >= NumBytes) {      => Case 4) == 1) ? => If not, we have case 5) == 3) (does not fit)
_Case3:
        MOVS     R0,#+0
        POP      {R4-R7}
        BX       LR                              // Return 0
        _PLACE_LITS

#endif  // defined (RTT_USE_ASM) && (RTT_USE_ASM == 1)
        _END

/*************************** End of file ****************************/