Browse Source

Implement DELAY_NS with CYCCNT on Cortex-M7 (#12283)

pull/1/head
Nils Hasenbanck 6 years ago
committed by Scott Lahteine
parent
commit
cafabf2055
  1. 6
      Marlin/src/HAL/HAL_STM32/HAL.cpp
  2. 2
      Marlin/src/HAL/HAL_STM32F7/HAL.h
  3. 2
      Marlin/src/HAL/HAL_TEENSY31_32/HAL.cpp
  4. 105
      Marlin/src/HAL/shared/Delay.h

6
Marlin/src/HAL/HAL_STM32/HAL.cpp

@ -30,6 +30,7 @@
#include "HAL.h" #include "HAL.h"
#include "../../inc/MarlinConfig.h" #include "../../inc/MarlinConfig.h"
#include "../shared/Delay.h"
#if ENABLED(EEPROM_EMULATED_WITH_SRAM) #if ENABLED(EEPROM_EMULATED_WITH_SRAM)
#if STM32F7xx #if STM32F7xx
@ -80,6 +81,11 @@ uint16_t HAL_adc_result;
// HAL initialization task // HAL initialization task
void HAL_init(void) { void HAL_init(void) {
// Needed for DELAY_NS() / DELAY_US() on CORTEX-M7
#if (defined(__arm__) || defined(__thumb__)) && __CORTEX_M == 7
enableCycleCounter();
#endif
FastIO_init(); FastIO_init();
#if ENABLED(SDSUPPORT) #if ENABLED(SDSUPPORT)

2
Marlin/src/HAL/HAL_STM32F7/HAL.h

@ -153,8 +153,6 @@ extern uint16_t HAL_adc_result;
// Public functions // Public functions
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------
// Memory related // Memory related
#define __bss_end __bss_end__ #define __bss_end __bss_end__

2
Marlin/src/HAL/HAL_TEENSY31_32/HAL.cpp

@ -26,7 +26,7 @@
#ifdef __MK20DX256__ #ifdef __MK20DX256__
#include "HAL.h" #include "HAL.h"
#include "../Delay.h" #include "../shared/Delay.h"
#include <Wire.h> #include <Wire.h>

105
Marlin/src/HAL/shared/Delay.h

@ -19,6 +19,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
* *
*/ */
#pragma once
/** /**
* Busy wait delay cycles routines: * Busy wait delay cycles routines:
@ -28,57 +29,81 @@
* DELAY_US(count): Delay execution in microseconds * DELAY_US(count): Delay execution in microseconds
*/ */
#ifndef MARLIN_DELAY_H
#define MARLIN_DELAY_H
#include "../../core/macros.h" #include "../../core/macros.h"
#include "../../core/millis_t.h"
#if defined(__arm__) || defined(__thumb__) #if defined(__arm__) || defined(__thumb__)
// https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles #if __CORTEX_M == 7
#define nop() __asm__ __volatile__("nop;\n\t":::) // Cortex-M7 can use the cycle counter of the DWT unit
// http://www.anthonyvh.com/2017/05/18/cortex_m-cycle_counter/
FORCE_INLINE static void __delay_4cycles(uint32_t cy) { // +1 cycle FORCE_INLINE static void enableCycleCounter() {
#if ARCH_PIPELINE_RELOAD_CYCLES < 2 CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
#define EXTRA_NOP_CYCLES A("nop")
#else
#define EXTRA_NOP_CYCLES ""
#endif
__asm__ __volatile__( // Unlock DWT.
A(".syntax unified") // is to prevent CM0,CM1 non-unified syntax DWT->LAR = 0xC5ACCE55;
L("1")
A("subs %[cnt],#1")
EXTRA_NOP_CYCLES
A("bne 1b")
: [cnt]"+r"(cy) // output: +r means input+output
: // input:
: "cc" // clobbers:
);
}
// Delay in cycles DWT->CYCCNT = 0;
FORCE_INLINE static void DELAY_CYCLES(uint32_t x) { DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
}
if (__builtin_constant_p(x)) { FORCE_INLINE volatile uint32_t getCycleCount() { return DWT->CYCCNT; }
#define MAXNOPS 4
if (x <= (MAXNOPS)) { FORCE_INLINE static void DELAY_CYCLES(const uint32_t x) {
switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); } const uint32_t endCycles = getCycleCount() + x;
} while (PENDING(getCycleCount(), endCycles)) { }
else { // because of +1 cycle inside delay_4cycles }
const uint32_t rem = (x - 1) % (MAXNOPS);
switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); } #else
if ((x = (x - 1) / (MAXNOPS)))
__delay_4cycles(x); // if need more then 4 nop loop is more optimal // https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles
#define nop() __asm__ __volatile__("nop;\n\t":::)
FORCE_INLINE static void __delay_4cycles(uint32_t cy) { // +1 cycle
#if ARCH_PIPELINE_RELOAD_CYCLES < 2
#define EXTRA_NOP_CYCLES A("nop")
#else
#define EXTRA_NOP_CYCLES ""
#endif
__asm__ __volatile__(
A(".syntax unified") // is to prevent CM0,CM1 non-unified syntax
L("1")
A("subs %[cnt],#1")
EXTRA_NOP_CYCLES
A("bne 1b")
: [cnt]"+r"(cy) // output: +r means input+output
: // input:
: "cc" // clobbers:
);
}
// Delay in cycles
FORCE_INLINE static void DELAY_CYCLES(uint32_t x) {
if (__builtin_constant_p(x)) {
#define MAXNOPS 4
if (x <= (MAXNOPS)) {
switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); }
}
else { // because of +1 cycle inside delay_4cycles
const uint32_t rem = (x - 1) % (MAXNOPS);
switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); }
if ((x = (x - 1) / (MAXNOPS)))
__delay_4cycles(x); // if need more then 4 nop loop is more optimal
}
#undef MAXNOPS
} }
#undef MAXNOPS else if ((x >>= 2))
__delay_4cycles(x);
} }
else if ((x >>= 2)) #undef nop
__delay_4cycles(x);
} #endif
#undef nop
#elif defined(__AVR__) #elif defined(__AVR__)
@ -144,5 +169,3 @@
// Delay in microseconds // Delay in microseconds
#define DELAY_US(x) DELAY_CYCLES( (x) * (F_CPU / 1000000UL) ) #define DELAY_US(x) DELAY_CYCLES( (x) * (F_CPU / 1000000UL) )
#endif // MARLIN_DELAY_H

Loading…
Cancel
Save