Eigenstate: myrddin-dev mailing list

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 2/4] Fix futex timeouts and handle futex error codes.


Overall, seems sane, biggest issue is a missed wake that I let slip through earlier.
Comments inline.

On Sat, 11 Aug 2018 19:12:47 -0700, iriri <iri@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx> wrote:

> 
> diff --git a/lib/sys/sys+linux-x64.myr b/lib/sys/sys+linux-x64.myr
> index 1cebb123..83fdea53 100644
> --- a/lib/sys/sys+linux-x64.myr
> +++ b/lib/sys/sys+linux-x64.myr
> @@ -562,6 +562,8 @@ pkg sys =
>  	
>  	const Futexpriv	: futexop = 128
>  	const Futexclockrt	: futexop = 256
> +
> +	const Futexbitsetmatchany : int32 = -1
  	
Same note as previous patch.

>  	/* poll events : posix */
>  	const Pollin	: pollevt = 0x001	/* There is data to read.  */
> @@ -1273,7 +1275,7 @@ pkg sys =
>  	const dup	: (fd : fd -> fd)
>  	const dup2	: (src : fd, dst : fd -> fd)
>  	const futex	: (uaddr : int32#, op : futexop, val : int32, \
> diff --git a/lib/thread/condvar+freebsd.myr b/lib/thread/condvar+freebsd.myr
> index 002757ae..4e2b0c11 100644
> --- a/lib/thread/condvar+freebsd.myr
> +++ b/lib/thread/condvar+freebsd.myr
> @@ -1,7 +1,6 @@
>  use std
>  
>  use "atomic"
> -use "common"
>  use "mutex"
>  use "futex"
>  
> @@ -22,19 +21,11 @@ const mkcond = {mtx
>  }
>  
>  const condwait = {cond
> -	var seq
> -	var mtx
> +	var mtx = cond._mtx
> +	var seq = xget(&cond._seq)
>  
> -	mtx = cond._mtx
> -	seq = cond._seq
>  	mtxunlock(mtx)
> -
> -	/*
> -	FIXME?: `ftxwait` can be interrupted but `condwait` should always be
> -	done in a loop anyway.
> -	*/
> -	ftxwait(&cond._seq, seq, Zptr)
> -
> +	ftxwait(&cond._seq, seq, 0)
>  	mtxlock(mtx)

Looking at this code, I realized it may have lost something
in the translation from Linux cond vars. Isn't it possible
that if we have multiple waiters on the condition variable,
we could miss waking up a waiter, because we erronously decide
a lock is uncontended?

Consider this interleaving of operations:

	t1		t2 		t3
  (st=U) |               |               |
    lock +               |               |
condwait +               |               |
  (st=L) |               |               |
  unlock +               |               |
  (st=U) |               |               |
         |          lock +               |
         |        (st=L) |               |
         |      condwait +               |
         |        (st=L) |               |
         |        unlock +               |
         |        (st=U) |               |
         |               |               |
         |               |        signal +
         |               |       ftxwake |
    lock +               |               |
  (st=L) |               |               |
 dostuff |               |               |
  unlock +               |               |
  (st=U) |               |               |

It seems to me that T2 will never get woken up unless
we use the special mtxcontended() function in our futex
based mutex implementation, which sets it to contended
while locking the futexe. That forces the unlock to issue
a wakeup, and wakes up the thread.

This could probably use some more commenting. Possibly
with the contents of the above message.


> diff --git a/lib/thread/condvar+osx.myr b/lib/thread/condvar+osx.myr
> index d74c321f..d18f4ccb 100644
> --- a/lib/thread/condvar+osx.myr
> +++ b/lib/thread/condvar+osx.myr
> @@ -1,7 +1,6 @@
>  use std
>  
>  use "atomic"
> -use "common"
>  use "mutex"
>  use "futex"
>  
> @@ -22,19 +21,11 @@ const mkcond = {mtx
>  }
>  
>  const condwait = {cond
> -	var seq
> -	var mtx
> +	var mtx = cond._mtx
> +	var seq = xget(&cond._seq)
>  
> -	mtx = cond._mtx
> -	seq = cond._seq
>  	mtxunlock(mtx)
> -
> -	/*
> -	FIXME?: `ftxwait` can be interrupted but `condwait` should always be
> -	done in a loop anyway.
> -	*/
> -	ftxwait(&cond._seq, seq, Zptr)
> -
> +	ftxwait(&cond._seq, seq, 0)
>  	mtxlock(mtx)
>  }

Same bug, seems like we can have a missed wakeup. Also, it really looks like
all of the condvar implementations could be unified, at least for the waiting.
Is it the waking that keeps them separate right now?

>  
> diff --git a/lib/thread/futex+freebsd.myr b/lib/thread/futex+freebsd.myr
> index ea5f28d8..a9ad01ed 100644
> --- a/lib/thread/futex+freebsd.myr
> +++ b/lib/thread/futex+freebsd.myr
> @@ -1,3 +1,4 @@
> +use std
>  use sys
>  
>  use "atomic"
> @@ -7,34 +8,51 @@ pkg thread =
>  	type ftxtag = uint32
>  	impl atomic ftxtag
>  
> -	const ftxwait : (uaddr : ftxtag#, val : ftxtag, timeout : sys.timespec# -> int)
> -	const ftxwake : (uaddr : ftxtag# -> int)
> -	const ftxwakeall : (uaddr : ftxtag# -> int)
> +	const ftxwait : (uaddr : ftxtag#, val : ftxtag, tmout : uint32 -> sys.errno)
> +	const ftxwake : (uaddr : ftxtag# -> void)
> +	const ftxwakeall : (uaddr : ftxtag# -> void)
>  ;;
>  
> -const ftxwait = {uaddr, val, timeout
> -	if timeout == Zptr
> -		-> sys.umtx_op((uaddr : void#), sys.Umtxwaituintpriv, (val : uint64), Zptr, Zptr)
> +const ftxwait = {uaddr, val, tmout
> +	var ut : sys._umtx_time, utp, utsize, rc
> +
> +	if tmout == 0

Preference: use -1 for infinite timeout.

> +		utp = Zptr
> +		utsize = Zptr
> +	else
> +		var t = (tmout : int64)
> +		std.assert(sys.clock_gettime(`sys.Clockmonotonic, &ut._timeout) == 0,
> +			"error: clock_gettime returned -1\n")
> +		ut._timeout.nsec += (t % 1_000_000) * 1000
> +		ut._timeout.sec += (ut._timeout.nsec / 1_000_000_000) + (t / 1_000_000)
> +		ut._timeout.nsec %= 1_000_000_000
> +		ut._flags = (sys.Umtxabstime : uint32)
> +		ut._clockid = 1 /* CLOCK_MONOTONIC. Not exported from sys. */

Can always export from sys.

> +		utp = (&ut : void#)
> +		utsize = (sizeof(sys._umtx_time) : void#)
> +	;;
> +
> +	while (rc = (sys.umtx_op((uaddr : void#),
> +			sys.Umtxwaituintpriv,
> +			(val : uint64),
> +			utsize,
> +			utp) : sys.errno)) == sys.Eintr
>  	;;
>  
> -	var ut : sys._umtx_time = [
> -		._timeout = timeout#,
> -		._flags = (sys.Umtxabstime : uint32),
> -		._clockid = 1, /* CLOCK_MONOTONIC. Not exported from sys. */
> -	]
> -	-> sys.umtx_op((uaddr : void#),
> -		sys.Umtxwaituintpriv,
> -		(val : uint64),
> -		(sizeof(sys._umtx_time) : void#),
> -		(&ut : void#))
> +	match rc
> +	| 0: -> 0
> +	| sys.Eagain: -> sys.Eagain
> +	| sys.Etimedout: -> sys.Etimedout
> +	| err: std.fatal("error: futex returned {}\n", err)

std.abort(), so we can get a nice core dump.

> +	;;
>  }
>  
>  const ftxwake = {uaddr
> -	-> sys.umtx_op((uaddr : void#), sys.Umtxwakepriv, 1, Zptr, Zptr)
> +	sys.umtx_op((uaddr : void#), sys.Umtxwakepriv, 1, Zptr, Zptr)
>  }
>  
>  const ftxwakeall = {uaddr
> -	-> sys.umtx_op((uaddr : void#), sys.Umtxwakepriv, 0x7fffffff, Zptr, Zptr)
> +	sys.umtx_op((uaddr : void#), sys.Umtxwakepriv, 0x7fffffff, Zptr, Zptr)
>  }
>  
>  impl atomic ftxtag =
> diff --git a/lib/thread/futex+linux.myr b/lib/thread/futex+linux.myr
> index c5dbf062..d20aca8b 100644
> --- a/lib/thread/futex+linux.myr
> +++ b/lib/thread/futex+linux.myr
> @@ -1,3 +1,4 @@
> +use std
>  use sys
>  
>  use "atomic"
> @@ -7,26 +8,58 @@ pkg thread =
>  	type ftxtag = uint32
>  	impl atomic ftxtag
>  
> -	const ftxwait : (uaddr : ftxtag#, val : ftxtag, timeout : sys.timespec# -> int)
> -	const ftxwake : (uaddr : ftxtag# -> int)
> -	const ftxwakeall : (uaddr : ftxtag# -> int)
> +	const ftxwait : (uaddr : ftxtag#, val : ftxtag, tmout : uint32 -> sys.errno)
> +	const ftxwake : (uaddr : ftxtag# -> void)
> +	const ftxwakeall : (uaddr : ftxtag# -> void)
>  ;;
>  
> -const ftxwait = {uaddr, val, timeout
> -	-> (sys.futex((uaddr : int32#),
> -		sys.Futexwait | sys.Futexpriv,
> -		(val : int32),
> -		timeout,
> -		Zptr,
> -		0) : int)
> +const ftxwait = {uaddr, val, tmout
> +	var rc
> +

> +	if tmout == 0
> +		while (rc = (sys.futex((uaddr : int32#),
> +				sys.Futexwait | sys.Futexpriv,
> +				(val : int32),
> +				Zptr,
> +				Zptr,
> +				0) : sys.errno)) == sys.Eintr
> +		;;
> +	else
> +		var t = (tmout : int64)
> +		var ts
> +		std.assert(sys.clock_gettime(`sys.Clockmonotonic, &ts) == 0,
> +			"error: clock_gettime returned -1\n")
> +		ts.nsec += (t % 1_000_000) * 1000
> +		ts.sec += (ts.nsec / 1_000_000_000) + (t / 1_000_000)
> +		ts.nsec %= 1_000_000_000
> +
> +		/*
> +		Futexwaitbitset + Futexbitsetmatchany causes the timeout to be
> +		treated as absolute rather than relative.
> +		*/
> +		while (rc = (sys.futex((uaddr : int32#),
> +				sys.Futexwaitbitset | sys.Futexpriv,
> +				(val : int32),
> +				&ts,
> +				Zptr,
> +				sys.Futexbitsetmatchany) : sys.errno)) == sys.Eintr
> +		;;
> +	;;

This could be extracted into a utility function shared across all the
implementations, now that the time struct is regularized. Something
like:

	var tmptr = totimeout(&tm)
	while (rc = sys.futex(..., tmptr, ...) == sys.Eintr)
		break
	;;

>  impl atomic ftxtag =
> diff --git a/lib/thread/futex+openbsd:6.2.myr b/lib/thread/futex+openbsd:6.2.myr
> index e3c9c413..cda80347 100644
> --- a/lib/thread/futex+openbsd:6.2.myr
> +++ b/lib/thread/futex+openbsd:6.2.myr

Same comments as the other systems.

>  impl atomic ftxtag =
> diff --git a/lib/thread/futex+osx.myr b/lib/thread/futex+osx.myr
> index 17478703..c07e6b3e 100644
> --- a/lib/thread/futex+osx.myr
> +++ b/lib/thread/futex+osx.myr
>  }
>  
Same comments as the other systems.

>  impl atomic ftxtag =

-- 
    Ori Bernstein

Follow-Ups:
Re: [PATCH 2/4] Fix futex timeouts and handle futex error codes.Ori Bernstein <ori@xxxxxxxxxxxxxx>
[PATCH 2/4] Fix futex timeouts and handle futex error codes.iriri <iri@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx>
References:
[PATCH 2/4] Fix futex timeouts and handle futex error codes.iriri <iri@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx>