[PATCH 5/9] Pass small-aggregate arguments by the AMD64 abi.
[Thread Prev] | [Thread Next]
- Subject: [PATCH 5/9] Pass small-aggregate arguments by the AMD64 abi.
- From: "S. Gilles" <sgilles@xxxxxxx>
- Reply-to: myrddin-dev@xxxxxxxxxxxxxx
- Date: Mon, 8 Jun 2020 00:21:12 -0400
- To: "myrddin-dev" <myrddin-dev@xxxxxxxxxxxxxx>
- Cc: "S. Gilles" <sgilles@xxxxxxx>
Generally “aggregate types smaller than two eightbytes, such that each
eightbyte is unambiguously integer or floating-point, are passed in
registers”.
Since things like structs are assumed to be on the stack (for
addressing), this leads to inefficiencies in function calls: the code
var foo : bar = [ .a = 123, .b = 4.567 ]
baz(foo)
will store foo on the stack, then load it into registers to call baz,
and the prologue of baz will immediately push foo back onto the stack.
Ah, well.
---
6/isel.c | 257 ++++++++++++++++++++++++++++++++++++++++++++++++-------
6/locs.c | 2 +
2 files changed, 229 insertions(+), 30 deletions(-)
diff --git a/6/isel.c b/6/isel.c
index 3dc139b9..3a0129c6 100644
--- a/6/isel.c
+++ b/6/isel.c
@@ -83,6 +83,28 @@ tymode(Type *t)
return ModeNone;
}
+static Mode
+forcefltmode(Mode m)
+{
+ assert(m != ModeNone);
+ switch (m) {
+ case ModeQ: return ModeD;
+ case ModeD: return ModeD;
+ default: return ModeF;
+ }
+}
+
+static Mode
+forceintmode(Mode m)
+{
+ assert(m != ModeNone);
+ switch (m) {
+ case ModeD: return ModeQ;
+ case ModeF: return ModeL;
+ default: return m;
+ }
+}
+
static Mode
mode(Node *n)
{
@@ -501,33 +523,87 @@ call(Isel *s, Node *n)
}
static void
-placearg(Isel *s, Node *argn, Loc *argloc, Loc *rsp, int vararg, size_t *nfloats, size_t *nints, size_t *argoff)
+placearg(Isel *s, Node *argn, Loc *argloc, PassIn p, Loc *rsp, int vararg, size_t *nfloats, size_t *nints, size_t *argoff)
{
+ /*
+ placearg may be called when argn is stored at argloc, but it may also
+ be called when argloc is a small piece of argn, as in the case when
+ small structs are being passed. In those circumstances, p is PassInSSE
+ or PassInInt, and argn is irrelevant. Therefore, argn should not be
+ relied on when p is PassInSSE or PassInInt.
+ */
Loc *src, *dst;
size_t a;
- if (stacknode(argn)) {
- src = locreg(ModeQ);
- g(s, Ilea, argloc, src, NULL);
- a = tyalign(exprtype(argn));
- blit(s, rsp, src, *argoff, 0, size(argn), a);
- *argoff += size(argn);
- } else if (!vararg && isfloatmode(argloc->mode) && *nfloats < Nfloatregargs) {
- dst = coreg(floatargregs[*nfloats], argloc->mode);
+ if (p == PassInNoPref) {
+ if (stacknode(argn)) {
+ p = PassInMemory;
+ } else if (!vararg && isfloatmode(argloc->mode) && *nfloats < Nfloatregargs) {
+ p = PassInSSE;
+ } else if (!vararg && isintmode(argloc->mode) && *nints < Nintregargs) {
+ p = PassInInt;
+ } else {
+ p = PassInMemory;
+ }
+ }
+
+ switch (p) {
+ case PassInMemory:
+ if (stacknode(argn)) {
+ src = locreg(ModeQ);
+ g(s, Ilea, argloc, src, NULL);
+ a = tyalign(exprtype(argn));
+ blit(s, rsp, src, *argoff, 0, size(argn), a);
+ *argoff += size(argn);
+ } else {
+ dst = locmem(*argoff, rsp, NULL, argloc->mode);
+ argloc = inri(s, argloc);
+ stor(s, argloc, dst);
+ *argoff += size(argn);
+ }
+ break;
+ case PassInSSE:
+ dst = coreg(floatargregs[*nfloats], forcefltmode(argloc->mode));
argloc = inri(s, argloc);
- g(s, Imovs, argloc, dst, NULL);
+ if (isfloatmode(argloc->mode)) {
+ g(s, Imovs, argloc, dst, NULL);
+ } else {
+ g(s, Imov, argloc, dst, NULL);
+ }
(*nfloats)++;
- } else if (!vararg && isintmode(argloc->mode) && *nints < Nintregargs) {
- dst = coreg(intargregs[*nints], argloc->mode);
+ break;
+ case PassInInt:
+ dst = coreg(intargregs[*nints], forceintmode(argloc->mode));
argloc = inri(s, argloc);
g(s, Imov, argloc, dst, NULL);
(*nints)++;
- } else {
- dst = locmem(*argoff, rsp, NULL, argloc->mode);
- argloc = inri(s, argloc);
- stor(s, argloc, dst);
- *argoff += size(argn);
+ break;
+ case PassInNoPref: /* impossible */
+ die("cannot determine how to pass arg");
+ break;
+ }
+}
+
+static int
+sufficientregs(PassIn *pc, size_t sz, size_t nfloats, size_t nints)
+{
+ size_t needed_ints = 0;
+ size_t needed_floats = 0;
+ size_t i;
+
+ if (sz > 16) {
+ return 0;
+ }
+
+ for (i = 1; i <= sz; i += 8) {
+ if (pc[i / 8] == PassInInt) {
+ needed_ints++;
+ } else if (pc[i / 8] == PassInSSE) {
+ needed_floats++;
+ }
}
+
+ return (needed_floats + nfloats <= Nfloatregargs) && (needed_ints + nints <= Nintregargs);
}
static Loc *
@@ -535,6 +611,7 @@ gencall(Isel *s, Node *n)
{
Loc *arg; /* values we reduced */
size_t argsz, argoff, nargs, vasplit;
+ size_t sz;
size_t nfloats, nints;
Loc *retloc, *rsp, *ret; /* hard-coded registers */
Loc *stkbump; /* calculated stack offset */
@@ -572,7 +649,12 @@ gencall(Isel *s, Node *n)
* one at a time, we evaluate the args in reverse order.
* Not good.
*
- * Skip the first operand, since it's the function itself */
+ * Skip the first operand, since it's the function itself
+ *
+ * Strictly speaking, we might waste a little space here,
+ * since some of these args might actually get passed in
+ * registers.
+ */
for (i = 0; i < nargs; i++) {
argsz = align(argsz, min(size(args[i]), Ptrsz));
argsz += size(args[i]);
@@ -590,13 +672,37 @@ gencall(Isel *s, Node *n)
vararg = 0;
for (i = 0; i < nargs; i++) {
arg = selexpr(s, args[i]);
- argoff = alignto(argoff, exprtype(args[i]));
+ t = exprtype(args[i]);
+ argoff = alignto(argoff, t);
+ sz = size(args[i]);
if (i >= vasplit)
vararg = 1;
else
argoff = align(argoff, 8);
-
- placearg(s, args[i], arg, rsp, vararg, &nfloats, &nints, &argoff);
+ if (!vararg && isaggregate(t) && sz <= 16) {
+ PassIn pc[2] = { PassInNoPref, PassInNoPref };
+ classify(t, pc);
+ if (pc[0] == PassInMemory || pc[1] == PassInMemory || !sufficientregs(pc, sz, nfloats, nints)) {
+ placearg(s, args[i], arg, PassInMemory, rsp, vararg, &nfloats, &nints, &argoff);
+ } else {
+ assert(stacknode(args[i]));
+ assert(pc[0] != PassInNoPref);
+ placearg(s, args[i], arg, pc[0], rsp, vararg, &nfloats, &nints, &argoff);
+ if (size(args[i]) > 8) {
+ assert(pc[1] != PassInNoPref);
+ Loc *forcedreg = locreg(ModeQ);
+ if (arg->type == Loclbl || (arg->type == Locmeml && !arg->mem.base)) {
+ forcedreg = loclitl(arg->lbl);
+ } else {
+ g(s, Ilea, arg, forcedreg, NULL);
+ }
+ Loc *argpluseight = locmem(8, forcedreg, NULL, ModeQ);
+ placearg(s, args[i], argpluseight, pc[1], rsp, vararg, &nfloats, &nints, &argoff);
+ }
+ }
+ } else {
+ placearg(s, args[i], arg, PassInNoPref, rsp, vararg, &nfloats, &nints, &argoff);
+ }
}
call(s, n);
if (argsz)
@@ -976,26 +1082,55 @@ savedregs[] = {
Rnone
};
+static Mode
+eightbytemode(PassIn p)
+{
+ if (p == PassInSSE) {
+ return ModeD;
+ }
+
+ return ModeQ;
+}
+
+
+static void
+movearg(Isel *s, Loc *dst, PassIn p, Mode m, size_t *nfloats, size_t *nints, size_t *argoff)
+{
+ Loc *a;
+ assert(m != ModeNone);
+
+ switch(p) {
+ case PassInInt:
+ a = coreg(intargregs[*nints], forceintmode(m));
+ g(s, Imov, a, dst, NULL);
+ (*nints)++;
+ break;
+ case PassInSSE:
+ a = coreg(floatargregs[*nfloats], forcefltmode(m));
+ g(s, Imovs, a, dst, NULL);
+ (*nfloats)++;
+ break;
+ default: /* no need to move if on stack */
+ break;
+ }
+}
+
static void
retrievearg(Isel *s, Node *argn, int vararg, size_t *nfloats, size_t *nints, size_t *argoff)
{
- Loc *a, *l;
+ Loc *l;
if (stacknode(argn)) {
htput(s->stkoff, argn, itop(-(*argoff + 2*Ptrsz)));
*argoff += size(argn);
} else if (!vararg && isfloatmode(mode(argn)) && *nfloats < Nfloatregargs) {
- a = coreg(floatargregs[*nfloats], mode(argn));
l = loc(s, argn);
- g(s, Imovs, a, l, NULL);
+ movearg(s, l, PassInSSE, forcefltmode(mode(argn)), nfloats, nints, argoff);
htput(s->reglocs, argn, l);
- (*nfloats)++;
} else if (!vararg && isintmode(mode(argn)) && *nints < Nintregargs) {
- a = coreg(intargregs[*nints], mode(argn));
l = loc(s, argn);
- g(s, Imov, a, l, NULL);
+ movearg(s, l, PassInInt, forceintmode(mode(argn)), nfloats, nints, argoff);
htput(s->reglocs, argn, l);
- (*nints)++;
} else if (tybase(decltype(argn))->type != Tyvoid) {
/* varargs go on the stack */
htput(s->stkoff, argn, itop(-(*argoff + 2*Ptrsz)));
@@ -1008,8 +1143,10 @@ addarglocs(Isel *s, Func *fn)
{
size_t i, nints, nfloats, nargs;
size_t argoff;
+ size_t sz;
int vararg;
Node *arg;
+ Type *t;
argoff = 0;
nfloats = 0;
@@ -1018,13 +1155,41 @@ addarglocs(Isel *s, Func *fn)
nargs = countargs(fn->type);
for (i = 0; i < fn->nargs; i++) {
arg = fn->args[i];
- argoff = alignto(argoff, decltype(arg));
+ t = decltype(arg);
+ argoff = alignto(argoff, t);
+ sz = size(arg);
if (i >= nargs)
vararg = 1;
else
argoff = align(argoff, 8);
- retrievearg(s, arg, vararg, &nfloats, &nints, &argoff);
+ if (!vararg && isaggregate(t) && sz <= 16) {
+ PassIn pc[2] = { PassInNoPref, PassInNoPref };
+ classify(t, pc);
+ if (pc[0] == PassInMemory || pc[1] == PassInMemory || !sufficientregs(pc, sz, nfloats, nints)) {
+ retrievearg(s, arg, vararg, &nfloats, &nints, &argoff);
+ } else {
+ /*
+ * Needs to be carefully put back on the stack, not simply located among
+ * registers. loc will succeed because the space was reserved in
+ * handlesmallstructargs.
+ */
+ Loc *l = loc(s, arg);
+ movearg(s, l, pc[0], eightbytemode(pc[0]), &nfloats, &nints, &argoff);
+ if (size(arg) > 8) {
+ Loc *forcedreg = locreg(ModeQ);
+ if (l->type == Loclbl || (l->type == Locmeml && !l->mem.base)) {
+ forcedreg = loclitl(l->lbl);
+ } else {
+ g(s, Ilea, l, forcedreg, NULL);
+ }
+ Loc *lpluseight = locmem(8, forcedreg, NULL, ModeQ);
+ movearg(s, lpluseight, pc[1], eightbytemode(pc[1]), &nfloats, &nints, &argoff);
+ }
+ }
+ } else {
+ retrievearg(s, arg, vararg, &nfloats, &nints, &argoff);
+ }
}
}
@@ -1107,6 +1272,37 @@ mkasmbb(Bb *bb)
return as;
}
+static void
+handlesmallstructargs(Isel *is, Func *fn)
+{
+ /*
+ * Perform a last-minute adjustment to fn->stksz to handle small structs
+ * that will be passed in registers. We do this inside selfunc so that
+ * generics will be specialized.
+ */
+ size_t vasplit = countargs(fn->type);
+ size_t i = 0, sz = 0;
+ Type *t;
+ Node *arg;
+
+ for (i = 0; i < fn->nargs; i++) {
+ arg = fn->args[i];
+ t = decltype(arg);
+ sz = size(arg);
+
+ if (i < vasplit && isaggregate(t) && sz <= 16) {
+ PassIn pc[2] = { PassInNoPref, PassInNoPref };
+ classify(t, pc);
+ if (pc[0] != PassInMemory && pc[1] != PassInMemory) {
+ sz = align(sz, 8);
+ fn->stksz += sz;
+ fn->stksz = align(fn->stksz, min(sz, Ptrsz));
+ htput(fn->stkoff, fn->args[i], itop(fn->stksz));
+ }
+ }
+ }
+}
+
void
selfunc(Isel *is, Func *fn, Htab *globls, Htab *strtab)
{
@@ -1132,6 +1328,7 @@ selfunc(Isel *is, Func *fn, Htab *globls, Htab *strtab)
g(is, Iloc, locstrlbl(buf), NULL);
}
+ handlesmallstructargs(is, fn);
prologue(is, fn, fn->stksz);
lastline = -1;
for (j = 0; j < fn->cfg->nbb - 1; j++) {
diff --git a/6/locs.c b/6/locs.c
index aa26f01e..b314cd9a 100644
--- a/6/locs.c
+++ b/6/locs.c
@@ -176,6 +176,8 @@ loclit(long val, Mode m)
Loc *
coreg(Reg r, Mode m)
{
+ assert(m != ModeNone);
+
Reg crtab[][Nmode + 1] = {
[Ral] = {Rnone, Ral, Rax, Reax, Rrax},
[Rcl] = {Rnone, Rcl, Rcx, Recx, Rrcx},
--
2.26.2
| [PATCH 0/9] Handle small-aggregates via AMD64 abi | "S. Gilles" <sgilles@xxxxxxx> |
- Prev by Date: [PATCH 4/9] Add classification algorithm for small-struct passing.
- Next by Date: [PATCH 7/9] Bump abi version.
- Previous by thread: [PATCH 4/9] Add classification algorithm for small-struct passing.
- Next by thread: [PATCH 7/9] Bump abi version.
- Index(es):