Topics

回复:[Zephyr-devel] Is this a bug? how to guarantee the "atomic semantics access of readyqueue" in "do_swap" function during context switch in SMP mode?


"曹子龙
 

Hi andy:

     so, this is turely a issue that i have illustrated in last emai? is that true?
so the mainline code still has this issue, right?

 thank you.


曹子龙

珠海全志科技股份有限公司      BU1-PSW

地址:广东省珠海市高新区唐家湾镇科技2路9号

TEL:13824125580

Email:caozilong@...

网址: http://www.allwinnertech.com

 


------------------------------------------------------------------
发件人:Andy Ross <andrew.j.ross@...>
发送时间:2020年1月21日(星期二) 22:45
收件人:曹子龙 <caozilong@...>; devel <devel@...>
主 题:Re: [Zephyr-devel] Is this a bug? how to guarantee the "atomic semantics access of readyqueue" in "do_swap" function during context switch in SMP mode?

There is a fix for exactly this race this in https://github.com/zephyrproject-rtos/zephyr/pull/21903

On 1/21/2020 1:49 AM, "曹子龙 wrote:
HI folks:
     if from the call pass of z_swap, which with parameter,  
      do_swap(key.key, lock, 1);
    because is_spinlock is 1, so the lock would be released before invoke the actual switch function "arch_switch", 
which means there are nothing procteced the readyqueue of the scheduler, during the moment, if other cpus access the readyqueue, there would be a chance that
"_current" task be selected by other cpus before the register  context do the actual backup. so did this a bug?
 40 static ALWAYS_INLINE unsigned int do_swap(unsigned int key,
 41                                         ¦ struct k_spinlock *lock,
 42                                         ¦ int is_spinlock)
 43 {                   
 44         ARG_UNUSED(lock);
 45         struct k_thread *new_thread, *old_thread;
 46                     
 47 #ifdef CONFIG_EXECUTION_BENCHMARKING
 48         extern void read_timer_start_of_swap(void);
 49         read_timer_start_of_swap();
 50 #endif              
 51                     
 52         old_thread = _current;
 53                     
 54         z_check_stack_sentinel();
 55                     
 56         if (is_spinlock) {
 57                 k_spin_release(lock);
 58         }           
 59                     
 60         new_thread = z_get_next_ready_thread();
 61                     
 62         if (new_thread != old_thread) {
 63                 sys_trace_thread_switched_out();
 64 #ifdef CONFIG_TIMESLICING
 65                 z_reset_time_slice();
 66 #endif              
 67                     
 68                 old_thread->swap_retval = -EAGAIN;
 69                     
 70 #ifdef CONFIG_SMP   
 71                 _current_cpu->swap_ok = 0;
 72                     
 73                 new_thread->base.cpu = arch_curr_cpu()->id;
 74                     
 75                 if (!is_spinlock) {
 76                         z_smp_release_global_lock(new_thread);
 77                 }   
 78 #endif              
 79                 _current = new_thread;
 80                 arch_switch(new_thread->switch_handle,
 81                         ¦   ¦&old_thread->switch_handle); 84         }          
 85         
 83                 sys_trace_thread_switched_in();
 84         }          
 85         
 86         if (is_spinlock) {
 87                 arch_irq_unlock(key);
 88         } else {   
 89                 irq_unlock(key);
 90         }
 91                    
 92         return _current->swap_retval;
 93 }
 94       

曹子龙

珠海全志科技股份有限公司      BU1-PSW

地址:广东省珠海市高新区唐家湾镇科技2路9号

TEL:13824125580

Email:caozilong@...

网址: http://www.allwinnertech.com

 




Andy Ross
 

Actually, that PR merged a few hours ago, so code in master does not have this issue anymore.  But yes: it was real and visible in CI testing as a spurious failure in qemu_x86_64.

Andy

On 1/21/2020 10:31 PM, "曹子龙 wrote:
Hi andy:

     so, this is turely a issue that i have illustrated in last emai? is that true?
so the mainline code still has this issue, right?

 thank you.


曹子龙

珠海全志科技股份有限公司      BU1-PSW

地址:广东省珠海市高新区唐家湾镇科技2路9号

TEL:13824125580

Email:caozilong@...

网址: http://www.allwinnertech.com

 


------------------------------------------------------------------
发件人:Andy Ross <andrew.j.ross@...>
发送时间:2020年1月21日(星期二) 22:45
收件人:曹子龙 <caozilong@...>; devel <devel@...>
主 题:Re: [Zephyr-devel] Is this a bug? how to guarantee the "atomic semantics access of readyqueue" in "do_swap" function during context switch in SMP mode?

There is a fix for exactly this race this in https://github.com/zephyrproject-rtos/zephyr/pull/21903

On 1/21/2020 1:49 AM, "曹子龙 wrote:
HI folks:
     if from the call pass of z_swap, which with parameter,  
      do_swap(key.key, lock, 1);
    because is_spinlock is 1, so the lock would be released before invoke the actual switch function "arch_switch", 
which means there are nothing procteced the readyqueue of the scheduler, during the moment, if other cpus access the readyqueue, there would be a chance that
"_current" task be selected by other cpus before the register  context do the actual backup. so did this a bug?
 40 static ALWAYS_INLINE unsigned int do_swap(unsigned int key,
 41                                         ¦ struct k_spinlock *lock,
 42                                         ¦ int is_spinlock)
 43 {                   
 44         ARG_UNUSED(lock);
 45         struct k_thread *new_thread, *old_thread;
 46                     
 47 #ifdef CONFIG_EXECUTION_BENCHMARKING
 48         extern void read_timer_start_of_swap(void);
 49         read_timer_start_of_swap();
 50 #endif              
 51                     
 52         old_thread = _current;
 53                     
 54         z_check_stack_sentinel();
 55                     
 56         if (is_spinlock) {
 57                 k_spin_release(lock);
 58         }           
 59                     
 60         new_thread = z_get_next_ready_thread();
 61                     
 62         if (new_thread != old_thread) {
 63                 sys_trace_thread_switched_out();
 64 #ifdef CONFIG_TIMESLICING
 65                 z_reset_time_slice();
 66 #endif              
 67                     
 68                 old_thread->swap_retval = -EAGAIN;
 69                     
 70 #ifdef CONFIG_SMP   
 71                 _current_cpu->swap_ok = 0;
 72                     
 73                 new_thread->base.cpu = arch_curr_cpu()->id;
 74                     
 75                 if (!is_spinlock) {
 76                         z_smp_release_global_lock(new_thread);
 77                 }   
 78 #endif              
 79                 _current = new_thread;
 80                 arch_switch(new_thread->switch_handle,
 81                         ¦   ¦&old_thread->switch_handle); 84         }          
 85         
 83                 sys_trace_thread_switched_in();
 84         }          
 85         
 86         if (is_spinlock) {
 87                 arch_irq_unlock(key);
 88         } else {   
 89                 irq_unlock(key);
 90         }
 91                    
 92         return _current->swap_retval;
 93 }
 94       

曹子龙

珠海全志科技股份有限公司      BU1-PSW

地址:广东省珠海市高新区唐家湾镇科技2路9号

TEL:13824125580

Email:caozilong@...

网址: http://www.allwinnertech.com

 




Jennifer M Williams
 

Hi

The PR mentioned below (https://github.com/zephyrproject-rtos/zephyr/pull/21903) was merged yesterday – please review the comments and code changes for more details on the fix. Can you please describe a bit more how your potential issue is not addressed by this PR?

 

Thank you,

Jen

 

From: devel@... <devel@...> On Behalf Of "???
Sent: Tuesday, January 21, 2020 10:31 PM
To: Ross, Andrew J <andrew.j.ross@...>; devel <devel@...>
Subject: 回复:[Zephyr-devel] Is this a bug? how to guarantee the "atomic semantics access of readyqueue" in "do_swap" function during context switch in SMP mode?

 

Hi andy:

 

     so, this is turely a issue that i have illustrated in last emai? is that true?

so the mainline code still has this issue, right?

 

 thank you.

 

 

曹子

珠海全志科技股份有限公司      BU1-PSW

地址:广东省珠海市高新区唐家湾镇科技2路9号

TEL:13824125580

Email:caozilong@...

网址: http://www.allwinnertech.com

 

 

------------------------------------------------------------------

发件人:Andy Ross <andrew.j.ross@...>

发送时间:2020121(星期二) 22:45

收件人:曹子 <caozilong@...>; devel <devel@...>

主 题:Re: [Zephyr-devel] Is this a bug? how to guarantee the "atomic semantics access of readyqueue" in "do_swap" function during context switch in SMP mode?

 

There is a fix for exactly this race this in https://github.com/zephyrproject-rtos/zephyr/pull/21903

On 1/21/2020 1:49 AM, "曹子 wrote:

HI folks:

     if from the call pass of z_swap, which with parameter,  

      do_swap(key.key, lock, 1);

    because is_spinlock is 1, so the lock would be released before invoke the actual switch function "arch_switch", 

which means there are nothing procteced the readyqueue of the scheduler, during the moment, if other cpus access the readyqueue, there would be a chance that

"_current" task be selected by other cpus before the register  context do the actual backup. so did this a bug?

 40 static ALWAYS_INLINE unsigned int do_swap(unsigned int key,

 41                                         ¦ struct k_spinlock *lock,

 42                                         ¦ int is_spinlock)

 43 {                   

 44         ARG_UNUSED(lock);

 45         struct k_thread *new_thread, *old_thread;

 46                     

 47 #ifdef CONFIG_EXECUTION_BENCHMARKING

 48         extern void read_timer_start_of_swap(void);

 49         read_timer_start_of_swap();

 50 #endif              

 51                     

 52         old_thread = _current;

 53                     

 54         z_check_stack_sentinel();

 55                     

 56         if (is_spinlock) {

 57                 k_spin_release(lock);

 58         }           

 59                     

 60         new_thread = z_get_next_ready_thread();

 61                     

 62         if (new_thread != old_thread) {

 63                 sys_trace_thread_switched_out();

 64 #ifdef CONFIG_TIMESLICING

 65                 z_reset_time_slice();

 66 #endif              

 67                     

 68                 old_thread->swap_retval = -EAGAIN;

 69                     

 70 #ifdef CONFIG_SMP   

 71                 _current_cpu->swap_ok = 0;

 72                     

 73                 new_thread->base.cpu = arch_curr_cpu()->id;

 74                     

 75                 if (!is_spinlock) {

 76                         z_smp_release_global_lock(new_thread);

 77                 }   

 78 #endif              

 79                 _current = new_thread;

 80                 arch_switch(new_thread->switch_handle,

 81                         ¦   ¦&old_thread->switch_handle); 84         }          

 85         

 83                 sys_trace_thread_switched_in();

 84         }          

 85         

 86         if (is_spinlock) {

 87                 arch_irq_unlock(key);

 88         } else {   

 89                 irq_unlock(key);

 90         }

 91                    

 92         return _current->swap_retval;

 93 }

 94       

曹子

珠海全志科技股份有限公司      BU1-PSW

地址:广东省珠海市高新区唐家湾镇科技2路9号

TEL:13824125580

Email:caozilong@...

网址: http://www.allwinnertech.com