[PATCH 5/9] Pass small-aggregate arguments by the AMD64 abi.
[Thread Prev] | [Thread Next]
- Subject: [PATCH 5/9] Pass small-aggregate arguments by the AMD64 abi.
- From: "S. Gilles" <sgilles@xxxxxxx>
- Reply-to: myrddin-dev@xxxxxxxxxxxxxx
- Date: Sat, 20 Jun 2020 22:40:05 -0400
- To: "myrddin-dev" <myrddin-dev@xxxxxxxxxxxxxx>
- Cc: "S. Gilles" <sgilles@xxxxxxx>
Generally “aggregate types smaller than two eightbytes, such that each eightbyte is unambiguously integer or floating-point, are passed in registers”. Since things like structs are assumed to be on the stack (for addressing), this leads to inefficiencies in function calls: the code var foo : bar = [ .a = 123, .b = 4.567 ] baz(foo) will store foo on the stack, then load it into registers to call baz, and the prologue of baz will immediately push foo back onto the stack. Ah, well. --- 6/asm.h | 18 +-- 6/isel.c | 318 ++++++++++++++++++++++++++++++++++++++++++++++----- 6/locs.c | 2 + 6/typeinfo.c | 65 +++++++---- 4 files changed, 344 insertions(+), 59 deletions(-) diff --git a/6/asm.h b/6/asm.h index 447ee7ac..fc0b4c64 100644 --- a/6/asm.h +++ b/6/asm.h @@ -53,11 +53,16 @@ typedef enum { } PassIn; typedef enum { - RetVoid, - RetReg, - RetSmallAggregate, - RetBig, -} RetType; + ArgVoid, + ArgReg, /* Either int or flt, depending on Loc* type */ + ArgSmallAggr_Int, + ArgSmallAggr_Flt, + ArgSmallAggr_Int_Int, + ArgSmallAggr_Flt_Int, + ArgSmallAggr_Int_Flt, + ArgSmallAggr_Flt_Flt, + ArgBig, +} ArgType; typedef enum { Classbad, @@ -331,9 +336,8 @@ size_t size(Node *n); ssize_t tyoffset(Type *ty, Node *memb); ssize_t offset(Node *aggr, Node *memb); size_t countargs(Type *t); -void classify(Type *t, PassIn *p); +ArgType classify(Type *t); int isaggregate(Type *t); -RetType howreturned(Type *t); int stacknode(Node *n); int floatnode(Node *n); void breakhere(); diff --git a/6/isel.c b/6/isel.c index 3dc139b9..33c3c381 100644 --- a/6/isel.c +++ b/6/isel.c @@ -83,6 +83,28 @@ tymode(Type *t) return ModeNone; } +static Mode +forcefltmode(Mode m) +{ + assert(m != ModeNone); + switch (m) { + case ModeQ: return ModeD; + case ModeD: return ModeD; + default: return ModeF; + } +} + +static Mode +forceintmode(Mode m) +{ + assert(m != ModeNone); + switch (m) { + case ModeD: return ModeQ; + case ModeF: return ModeL; + default: return m; + } +} + static Mode mode(Node *n) { @@ -501,33 +523,110 @@ call(Isel *s, Node *n) } static void -placearg(Isel *s, Node *argn, Loc *argloc, Loc *rsp, int vararg, size_t *nfloats, size_t *nints, size_t *argoff) +placearg(Isel *s, Node *argn, Loc *argloc, PassIn p, Loc *rsp, int vararg, size_t *nfloats, size_t *nints, size_t *argoff) { + /* + placearg may be called when argn is stored at argloc, but it may also + be called when argloc is a small piece of argn, as in the case when + small structs are being passed. In those circumstances, p is PassInSSE + or PassInInt, and argn is irrelevant. Therefore, argn should not be + relied on when p is PassInSSE or PassInInt. + */ Loc *src, *dst; size_t a; - if (stacknode(argn)) { - src = locreg(ModeQ); - g(s, Ilea, argloc, src, NULL); - a = tyalign(exprtype(argn)); - blit(s, rsp, src, *argoff, 0, size(argn), a); - *argoff += size(argn); - } else if (!vararg && isfloatmode(argloc->mode) && *nfloats < Nfloatregargs) { - dst = coreg(floatargregs[*nfloats], argloc->mode); + if (p == PassInNoPref) { + if (stacknode(argn)) { + p = PassInMemory; + } else if (!vararg && isfloatmode(argloc->mode) && *nfloats < Nfloatregargs) { + p = PassInSSE; + } else if (!vararg && isintmode(argloc->mode) && *nints < Nintregargs) { + p = PassInInt; + } else { + p = PassInMemory; + } + } + + switch (p) { + case PassInMemory: + if (stacknode(argn)) { + src = locreg(ModeQ); + g(s, Ilea, argloc, src, NULL); + a = tyalign(exprtype(argn)); + blit(s, rsp, src, *argoff, 0, size(argn), a); + *argoff += size(argn); + } else { + dst = locmem(*argoff, rsp, NULL, argloc->mode); + argloc = inri(s, argloc); + stor(s, argloc, dst); + *argoff += size(argn); + } + break; + case PassInSSE: + dst = coreg(floatargregs[*nfloats], forcefltmode(argloc->mode)); argloc = inri(s, argloc); - g(s, Imovs, argloc, dst, NULL); + if (isfloatmode(argloc->mode)) { + g(s, Imovs, argloc, dst, NULL); + } else { + g(s, Imov, argloc, dst, NULL); + } (*nfloats)++; - } else if (!vararg && isintmode(argloc->mode) && *nints < Nintregargs) { - dst = coreg(intargregs[*nints], argloc->mode); + break; + case PassInInt: + dst = coreg(intargregs[*nints], forceintmode(argloc->mode)); argloc = inri(s, argloc); g(s, Imov, argloc, dst, NULL); (*nints)++; + break; + case PassInNoPref: /* impossible */ + die("cannot determine how to pass arg"); + break; + } +} + +static int +sufficientregs(ArgType a, size_t nfloats, size_t nints) +{ + size_t needed_ints = 0; + size_t needed_flts = 0; + + switch(a) { + case ArgSmallAggr_Int: + case ArgSmallAggr_Flt_Int: + case ArgSmallAggr_Int_Flt: + needed_ints = 1; + break; + case ArgSmallAggr_Int_Int: + needed_ints = 2; + break; + default: break; + } + + switch(a) { + case ArgSmallAggr_Flt: + case ArgSmallAggr_Flt_Int: + case ArgSmallAggr_Int_Flt: + needed_flts = 1; + break; + case ArgSmallAggr_Flt_Flt: + needed_flts = 2; + break; + default: break; + } + + return (needed_flts + nfloats <= Nfloatregargs) && (needed_ints + nints <= Nintregargs); +} + +static Loc * +plus8(Isel *s, Loc *base) +{ + Loc *forcedreg = locreg(ModeQ); + if (base->type == Loclbl || (base->type == Locmeml && !base->mem.base)) { + forcedreg = loclitl(base->lbl); } else { - dst = locmem(*argoff, rsp, NULL, argloc->mode); - argloc = inri(s, argloc); - stor(s, argloc, dst); - *argoff += size(argn); + g(s, Ilea, base, forcedreg, NULL); } + return locmem(8, forcedreg, NULL, ModeQ); } static Loc * @@ -572,7 +671,12 @@ gencall(Isel *s, Node *n) * one at a time, we evaluate the args in reverse order. * Not good. * - * Skip the first operand, since it's the function itself */ + * Skip the first operand, since it's the function itself + * + * Strictly speaking, we might waste a little space here, + * since some of these args might actually get passed in + * registers. + */ for (i = 0; i < nargs; i++) { argsz = align(argsz, min(size(args[i]), Ptrsz)); argsz += size(args[i]); @@ -590,13 +694,54 @@ gencall(Isel *s, Node *n) vararg = 0; for (i = 0; i < nargs; i++) { arg = selexpr(s, args[i]); - argoff = alignto(argoff, exprtype(args[i])); + t = exprtype(args[i]); + argoff = alignto(argoff, t); + ArgType a = ArgBig; if (i >= vasplit) vararg = 1; else argoff = align(argoff, 8); - placearg(s, args[i], arg, rsp, vararg, &nfloats, &nints, &argoff); + if (!vararg) { + a = classify(t); + } + + if (!sufficientregs(a, nfloats, nints)) { + a = ArgBig; + } + + switch(a) { + case ArgVoid: + break; + case ArgReg: + case ArgBig: + /* placearg can figure this out */ + placearg(s, args[i], arg, PassInNoPref, rsp, vararg, &nfloats, &nints, &argoff); + break; + case ArgSmallAggr_Int: + placearg(s, args[i], arg, PassInInt, rsp, vararg, &nfloats, &nints, &argoff); + break; + case ArgSmallAggr_Flt: + placearg(s, args[i], arg, PassInSSE, rsp, vararg, &nfloats, &nints, &argoff); + break; + case ArgSmallAggr_Int_Int: + placearg(s, args[i], arg , PassInInt, rsp, vararg, &nfloats, &nints, &argoff); + placearg(s, args[i], plus8(s, arg), PassInInt, rsp, vararg, &nfloats, &nints, &argoff); + break; + case ArgSmallAggr_Int_Flt: + placearg(s, args[i], arg , PassInInt, rsp, vararg, &nfloats, &nints, &argoff); + placearg(s, args[i], plus8(s, arg), PassInSSE, rsp, vararg, &nfloats, &nints, &argoff); + break; + case ArgSmallAggr_Flt_Int: + placearg(s, args[i], arg , PassInSSE, rsp, vararg, &nfloats, &nints, &argoff); + placearg(s, args[i], plus8(s, arg), PassInInt, rsp, vararg, &nfloats, &nints, &argoff); + break; + case ArgSmallAggr_Flt_Flt: + placearg(s, args[i], arg , PassInSSE, rsp, vararg, &nfloats, &nints, &argoff); + placearg(s, args[i], plus8(s, arg), PassInSSE, rsp, vararg, &nfloats, &nints, &argoff); + break; + } + } call(s, n); if (argsz) @@ -976,26 +1121,44 @@ savedregs[] = { Rnone }; +static void +movearg(Isel *s, Loc *dst, PassIn p, Mode m, size_t *nfloats, size_t *nints, size_t *argoff) +{ + Loc *a; + assert(m != ModeNone); + + switch(p) { + case PassInInt: + a = coreg(intargregs[*nints], forceintmode(m)); + g(s, Imov, a, dst, NULL); + (*nints)++; + break; + case PassInSSE: + a = coreg(floatargregs[*nfloats], forcefltmode(m)); + g(s, Imovs, a, dst, NULL); + (*nfloats)++; + break; + default: /* no need to move if on stack */ + break; + } +} + static void retrievearg(Isel *s, Node *argn, int vararg, size_t *nfloats, size_t *nints, size_t *argoff) { - Loc *a, *l; + Loc *l; if (stacknode(argn)) { htput(s->stkoff, argn, itop(-(*argoff + 2*Ptrsz))); *argoff += size(argn); } else if (!vararg && isfloatmode(mode(argn)) && *nfloats < Nfloatregargs) { - a = coreg(floatargregs[*nfloats], mode(argn)); l = loc(s, argn); - g(s, Imovs, a, l, NULL); + movearg(s, l, PassInSSE, forcefltmode(mode(argn)), nfloats, nints, argoff); htput(s->reglocs, argn, l); - (*nfloats)++; } else if (!vararg && isintmode(mode(argn)) && *nints < Nintregargs) { - a = coreg(intargregs[*nints], mode(argn)); l = loc(s, argn); - g(s, Imov, a, l, NULL); + movearg(s, l, PassInInt, forceintmode(mode(argn)), nfloats, nints, argoff); htput(s->reglocs, argn, l); - (*nints)++; } else if (tybase(decltype(argn))->type != Tyvoid) { /* varargs go on the stack */ htput(s->stkoff, argn, itop(-(*argoff + 2*Ptrsz))); @@ -1010,6 +1173,7 @@ addarglocs(Isel *s, Func *fn) size_t argoff; int vararg; Node *arg; + Type *t; argoff = 0; nfloats = 0; @@ -1018,13 +1182,60 @@ addarglocs(Isel *s, Func *fn) nargs = countargs(fn->type); for (i = 0; i < fn->nargs; i++) { arg = fn->args[i]; - argoff = alignto(argoff, decltype(arg)); + t = decltype(arg); + argoff = alignto(argoff, t); + ArgType a = ArgBig; + Loc *l = NULL; if (i >= nargs) vararg = 1; else argoff = align(argoff, 8); - retrievearg(s, arg, vararg, &nfloats, &nints, &argoff); + if (!vararg) { + a = classify(t); + } + + if (!sufficientregs(a, nfloats, nints)) { + a = ArgBig; + } + + switch(a) { + case ArgVoid: + break; + case ArgReg: + case ArgBig: + /* retrievearg can figure this out */ + retrievearg(s, arg, vararg, &nfloats, &nints, &argoff); + break; + case ArgSmallAggr_Int: + l = loc(s, arg); + movearg(s, l, PassInInt, ModeQ, &nfloats, &nints, &argoff); + break; + case ArgSmallAggr_Flt: + l = loc(s, arg); + movearg(s, l, PassInSSE, ModeD, &nfloats, &nints, &argoff); + break; + case ArgSmallAggr_Int_Int: + l = loc(s, arg); + movearg(s, l , PassInInt, ModeQ, &nfloats, &nints, &argoff); + movearg(s, plus8(s, l), PassInInt, ModeQ, &nfloats, &nints, &argoff); + break; + case ArgSmallAggr_Int_Flt: + l = loc(s, arg); + movearg(s, l , PassInInt, ModeQ, &nfloats, &nints, &argoff); + movearg(s, plus8(s, l), PassInSSE, ModeD, &nfloats, &nints, &argoff); + break; + case ArgSmallAggr_Flt_Int: + l = loc(s, arg); + movearg(s, l , PassInSSE, ModeD, &nfloats, &nints, &argoff); + movearg(s, plus8(s, l), PassInInt, ModeQ, &nfloats, &nints, &argoff); + break; + case ArgSmallAggr_Flt_Flt: + l = loc(s, arg); + movearg(s, l , PassInSSE, ModeD, &nfloats, &nints, &argoff); + movearg(s, plus8(s, l), PassInSSE, ModeD, &nfloats, &nints, &argoff); + break; + } } } @@ -1107,6 +1318,56 @@ mkasmbb(Bb *bb) return as; } +static void +handlesmallstructargs(Isel *is, Func *fn) +{ + /* + * Perform a last-minute adjustment to fn->stksz to handle small structs + * that will be passed in registers. We do this inside selfunc so that + * generics will be specialized. + */ + size_t vasplit = countargs(fn->type); + size_t i = 0; + Type *t; + Node *arg; + + for (i = 0; i < fn->nargs; i++) { + arg = fn->args[i]; + t = decltype(arg); + int vararg = 0; + ArgType a = ArgBig; + + if (i >= vasplit) + vararg = 1; + + if (!vararg) { + a = classify(t); + } + + switch(a) { + case ArgVoid: + case ArgReg: + case ArgBig: + /* No need for any extra space for this arg */ + break; + case ArgSmallAggr_Int: + case ArgSmallAggr_Flt: + fn->stksz += 8; + fn->stksz = align(fn->stksz, min(8, Ptrsz)); + htput(fn->stkoff, fn->args[i], itop(fn->stksz)); + break; + case ArgSmallAggr_Int_Int: + case ArgSmallAggr_Int_Flt: + case ArgSmallAggr_Flt_Int: + case ArgSmallAggr_Flt_Flt: + fn->stksz += 16; + fn->stksz = align(fn->stksz, min(16, Ptrsz)); + htput(fn->stkoff, fn->args[i], itop(fn->stksz)); + break; + } + } +} + void selfunc(Isel *is, Func *fn, Htab *globls, Htab *strtab) { @@ -1132,6 +1393,7 @@ selfunc(Isel *is, Func *fn, Htab *globls, Htab *strtab) g(is, Iloc, locstrlbl(buf), NULL); } + handlesmallstructargs(is, fn); prologue(is, fn, fn->stksz); lastline = -1; for (j = 0; j < fn->cfg->nbb - 1; j++) { diff --git a/6/locs.c b/6/locs.c index aa26f01e..b314cd9a 100644 --- a/6/locs.c +++ b/6/locs.c @@ -176,6 +176,8 @@ loclit(long val, Mode m) Loc * coreg(Reg r, Mode m) { + assert(m != ModeNone); + Reg crtab[][Nmode + 1] = { [Ral] = {Rnone, Ral, Rax, Reax, Rrax}, [Rcl] = {Rnone, Rcl, Rcx, Recx, Rrcx}, diff --git a/6/typeinfo.c b/6/typeinfo.c index aff1c338..fdced668 100644 --- a/6/typeinfo.c +++ b/6/typeinfo.c @@ -531,16 +531,6 @@ classify_recursive(Type *t, PassIn *p, size_t *total_offset) *total_offset = align(cur_offset + sz, tyalign(t)); } -void -classify(Type *t, PassIn *p) -{ - size_t total_offset = 0; - /* p must be of length exactly 2 */ - p[0] = PassInNoPref; - p[1] = PassInNoPref; - classify_recursive(t, p, &total_offset); -} - int isaggregate(Type *t) { @@ -549,30 +539,57 @@ isaggregate(Type *t) (t->type == Tyunion && !isenum(t))); } -RetType howreturned(Type *t) +ArgType +classify(Type *t) { - /* - * This is only for determining how values are returned from functions. - * Determining how arguments are passed requires register counting using - * the whole prototype. - */ size_t sz = tysize(t); - PassIn pc[2] = { PassInNoPref, PassInNoPref }; + size_t total_offset = 0; + + /* p must be of length exactly 2 */ + PassIn pi[2] = { PassInNoPref, PassInNoPref }; if (tybase(t)->type == Tyvoid) { - return RetVoid; + return ArgVoid; } else if (isstacktype(t)) { if (isaggregate(t) && sz <= 16) { - classify(t, pc); - if (pc[0] == PassInMemory || pc[1] == PassInMemory) { - return RetBig; + classify_recursive(t, pi, &total_offset); + if (pi[0] == PassInMemory || pi[1] == PassInMemory) { + return ArgBig; } - return RetSmallAggregate; + switch(pi[0]) { + case PassInInt: + if (sz <= 8) { + return ArgSmallAggr_Int; + } + switch(pi[1]) { + case PassInInt: return ArgSmallAggr_Int_Int; + case PassInSSE: return ArgSmallAggr_Int_Flt; + default: + die("Impossible return from classify_recursive"); + break; + } + break; + case PassInSSE: + if (sz <= 8) { + return ArgSmallAggr_Flt; + } + switch(pi[1]) { + case PassInInt: return ArgSmallAggr_Flt_Int; + case PassInSSE: return ArgSmallAggr_Flt_Flt; + default: + die("Impossible return from classify_recursive"); + break; + } + break; + default: + die("Impossible return from classify_recursive"); + break; + } } - return RetBig; + return ArgBig; } - return RetReg; + return ArgReg; } -- 2.27.0
[PATCH 0/9] v2: Handle small-aggregates via AMD64 abi | "S. Gilles" <sgilles@xxxxxxx> |
- Prev by Date: [PATCH 4/9] Add classification algorithm for small-struct passing.
- Next by Date: [PATCH 6/9] Return small-aggregate values by the AMD64 abi.
- Previous by thread: [PATCH 4/9] Add classification algorithm for small-struct passing.
- Next by thread: [PATCH 6/9] Return small-aggregate values by the AMD64 abi.
- Index(es):