Eigenstate: myrddin-dev mailing list

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Fix use-after-free in futex-based semaphore implementations.


The current implementation of `sem_post` first increments the value of
the semaphore and then checks the waiter count to see if it must call
`ftxwake`.  However, it is possible for `sem_wait` to return at any time
after the value has been incremented, meaning that the memory this check
looks at might already be deallocated.

Happily, it turns out the OS X futex operations actually take a 32 bit
value which greatly simplifies things.
---
 lib/sys/sys+osx-x64.myr  |  6 +++---
 lib/thread/futex+osx.myr | 29 +++++++++++++++--------------
 lib/thread/sem+futex.myr | 19 +++++++++++++++++--
 3 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/lib/sys/sys+osx-x64.myr b/lib/sys/sys+osx-x64.myr
index cee1a74c..9bd83462 100644
--- a/lib/sys/sys+osx-x64.myr
+++ b/lib/sys/sys+osx-x64.myr
@@ -835,8 +835,8 @@ pkg sys =
 		-> int)
 
 	/* ulock */
-	const ulock_wait	: (op : ulockop, uaddr : uint64#, val : uint64, timeout : uint32 -> int)
-	const ulock_wake	: (op : ulockop, uaddr : uint64#, wakeval : uint64 -> int)
+	const ulock_wait	: (op : ulockop, uaddr : void#, val : uint64, timeout : uint32 -> int)
+	const ulock_wake	: (op : ulockop, uaddr : void#, wakeval : uint64 -> int)
 
 	extern const cstring : (str : byte[:] -> byte#)
 	/* filled by start code */
@@ -1110,7 +1110,7 @@ const ulock_wait = {op, uaddr, val, timeout
 }
 
 const ulock_wake = {op, uaddr, wakeval
-	-> (syscall(Sysulock_wake, a(op), uaddr, a(wakeval)) : int)
+	-> (syscall(Sysulock_wake, a(op), uaddr, wakeval) : int)
 }
 
 const waitstatus = {st
diff --git a/lib/thread/futex+osx.myr b/lib/thread/futex+osx.myr
index ea93fda7..d67320f8 100644
--- a/lib/thread/futex+osx.myr
+++ b/lib/thread/futex+osx.myr
@@ -4,8 +4,13 @@ use sys
 use "atomic"
 use "common"
 
+/*
+The `ulock_` functions are undocumented but the relevant source can be found at
+https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/bsd/kern/sys_ulock.c
+*/
 pkg thread =
-	type ftxtag = uint64
+	/* `UL_COMPARE_AND_WAIT` only uses 32 bits of the value (line 407) */
+	type ftxtag = uint32
 	impl atomic ftxtag
 
 	const ftxwait : (uaddr : ftxtag#, val : ftxtag, tmout : std.time -> sys.errno)
@@ -13,16 +18,12 @@ pkg thread =
 	const ftxwakeall : (uaddr : ftxtag# -> void)
 ;;
 
-/*
- * The ulock_ functions are undocumented but the relevant source can be found at
- * https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/bsd/kern/sys_ulock.c
- */
 const ftxwait = {uaddr, val, tmout
 	var rc
 
 	if tmout < 0
 		while (rc = (sys.ulock_wait(sys.Ulockcompareandwait,
-				(uaddr : uint64#),
+				(uaddr : void#),
 				(val : uint64),
 				0) : sys.errno)) == sys.Eintr
 		;;
@@ -34,7 +35,7 @@ const ftxwait = {uaddr, val, tmout
 		t = (tmout : uint32)
 
 		while (rc = (sys.ulock_wait(sys.Ulockcompareandwait,
-				(uaddr : uint64#),
+				(uaddr : void#),
 				(val : uint64),
 				t) : sys.errno)) == sys.Eintr
 			var now
@@ -69,17 +70,17 @@ const ftxwait = {uaddr, val, tmout
 }
 
 const ftxwake = {uaddr
-	sys.ulock_wake(sys.Ulockcompareandwait, (uaddr : uint64#), 0)
+	sys.ulock_wake(sys.Ulockcompareandwait, (uaddr : void#), 0)
 }
 
 const ftxwakeall = {uaddr
-	sys.ulock_wake(sys.Ulockcompareandwait | sys.Ulockulfwakeall, (uaddr : uint64#), 0)
+	sys.ulock_wake(sys.Ulockcompareandwait | sys.Ulockulfwakeall, (uaddr : void#), 0)
 }
 
 impl atomic ftxtag =
-	xget = {p; -> (xget64((p : uint64#)) : ftxtag)}
-	xset = {p, v; xset64((p : uint64#), (v : uint64))}
-	xadd = {p, v; -> (xadd64((p : uint64#), (v : uint64)) : ftxtag)}
-	xcas = {p, old, new; -> (xcas64((p : uint64#), (old : uint64), (new : uint64)) : ftxtag)}
-	xchg = {p, v; -> (xchg64((p : uint64#), (v : uint64)) : ftxtag)}
+	xget = {p; -> (xget32((p : uint32#)) : ftxtag)}
+	xset = {p, v; xset32((p : uint32#), (v : uint32))}
+	xadd = {p, v; -> (xadd32((p : uint32#), (v : uint32)) : ftxtag)}
+	xcas = {p, old, new; -> (xcas32((p : uint32#), (old : uint32), (new : uint32)) : ftxtag)}
+	xchg = {p, v; -> (xchg32((p : uint32#), (v : uint32)) : ftxtag)}
 ;;
diff --git a/lib/thread/sem+futex.myr b/lib/thread/sem+futex.myr
index 45bca0d9..68b5054f 100644
--- a/lib/thread/sem+futex.myr
+++ b/lib/thread/sem+futex.myr
@@ -4,6 +4,10 @@ use "atomic"
 use "futex"
 
 pkg thread =
+	/*
+	We want to be able to read both members in the same atomic operation
+	and this implementation assumes `ftxtag` is 32 bits.
+	*/
 	type sem = struct
 		_val      : ftxtag
 		_nwaiters : uint32
@@ -48,9 +52,20 @@ const semtrywait = {s
 }
 
 const sempost = {s
-	std.assert((xadd(&s._val, 1) : uint32) != ~0x0, "error: semaphore overflowed\n")
+	/*
+	It's possible for the semaphore to be deallocated at any time after
+	`_val` is incremented so we must not dereference `s` a second time. We
+	work around this by also reading the value of `_nwaiters` during the
+	atomic fetch and add.
+	*/
+	var state = xadd((s : uint64#), 1)
+	std.assert((state : uint32) != ~0x0, "error: semaphore overflowed\n")
 
-	if xget(&s._nwaiters) > 0
+	if (state >> 32) > 0
+		/*
+		However, it is both legal and expected to pass a potentially
+		invalid address to `ftxwake`.
+		*/
 		ftxwake(&s._val)
 	;;
 }
-- 
2.21.0


Follow-Ups:
Re: [PATCH] Fix use-after-free in futex-based semaphore implementations.Ori Bernstein <ori@xxxxxxxxxxxxxx>