Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/backend/cdb/cdbpath.c
Original file line number Diff line number Diff line change
Expand Up @@ -1469,7 +1469,13 @@ cdbpath_motion_for_join(PlannerInfo *root,
outer.ok_to_replicate = false;
break;
case JOIN_RIGHT:
case JOIN_RIGHT_SEMI:
case JOIN_RIGHT_ANTI:
/*
* GPDB: A right-semi join emits inner (build-side) rows, so just
* like JOIN_RIGHT/JOIN_RIGHT_ANTI the inner side must not be
* replicated, or matched inner rows could be emitted more than once.
*/
inner.ok_to_replicate = false;
break;
case JOIN_FULL:
Expand Down Expand Up @@ -3220,6 +3226,7 @@ cdbpath_motion_for_parallel_join(PlannerInfo *root,
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
case JOIN_RIGHT:
case JOIN_RIGHT_SEMI:
case JOIN_RIGHT_ANTI:
case JOIN_FULL:
outer.ok_to_replicate = false;
Expand Down
3 changes: 3 additions & 0 deletions src/backend/commands/explain.c
Original file line number Diff line number Diff line change
Expand Up @@ -2183,6 +2183,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
case JOIN_LASJ_NOTIN:
jointype = "Left Anti Semi (Not-In)";
break;
case JOIN_RIGHT_SEMI:
jointype = "Right Semi";
break;
case JOIN_RIGHT_ANTI:
jointype = "Right Anti";
break;
Expand Down
22 changes: 16 additions & 6 deletions src/backend/executor/nodeHashjoin.c
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,14 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
}
}

/*
* In a right-semijoin, we only need the first match for each
* inner tuple.
*/
if (node->js.jointype == JOIN_RIGHT_SEMI &&
HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple)))
continue;

/*
* We've got a match, but still need to test non-hashed quals.
* ExecScanHashBucket already set up all the state needed to
Expand All @@ -704,10 +712,10 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
{
node->hj_MatchedOuter = true;


/*
* This is really only needed if HJ_FILL_INNER(node), but
* we'll avoid the branch and just set it always.
* This is really only needed if HJ_FILL_INNER(node) or if
* we are in a right-semijoin, but we'll avoid the branch
* and just set it always.
*/
if (!HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple)))
HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple));
Expand Down Expand Up @@ -1024,6 +1032,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
{
case JOIN_INNER:
case JOIN_SEMI:
case JOIN_RIGHT_SEMI:
break;
case JOIN_LEFT:
case JOIN_ANTI:
Expand Down Expand Up @@ -1792,10 +1801,11 @@ ExecReScanHashJoin(HashJoinState *node)
/*
* Okay to reuse the hash table; needn't rescan inner, either.
*
* However, if it's a right/right-anti/full join, we'd better
* reset the inner-tuple match flags contained in the table.
* However, if it's a right/right-anti/right-semi/full join, we'd
* better reset the inner-tuple match flags contained in the
* table.
*/
if (HJ_FILL_INNER(node))
if (HJ_FILL_INNER(node) || node->js.jointype == JOIN_RIGHT_SEMI)
ExecHashTableResetMatchFlags(node->hj_HashTable);

/*
Expand Down
50 changes: 46 additions & 4 deletions src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1559,6 +1559,24 @@ CTranslatorDXLToPlStmt::TranslateDXLHashJoin(
// translate join children
CDXLNode *left_tree_dxlnode = (*hj_dxlnode)[EdxlhjIndexHashLeft];
CDXLNode *right_tree_dxlnode = (*hj_dxlnode)[EdxlhjIndexHashRight];

// For JOIN_RIGHT_SEMI / JOIN_RIGHT_ANTI the GPDB executor builds the hash
// table on the inner (right) child and emits inner-side rows, but ORCA
// places the semantically-preserved LHS as the DXL left child. Swap which
// DXL child becomes the executor's outer (probe) vs inner (build/Hash) so
// that the LHS ends up as the inner -- mirroring the PostgreSQL planner,
// which generates JOIN_RIGHT_SEMI with inner = LHS. OUTER_VAR/INNER_VAR
// assignment follows the child-context order (context[0] -> OUTER_VAR,
// context[1] -> INNER_VAR), so the target list, quals and hash clauses are
// remapped automatically; the outer/inner hash-key extraction below is
// also swapped to match.
BOOL fSwapBuildSide = (join->jointype == JOIN_RIGHT_SEMI ||
join->jointype == JOIN_RIGHT_ANTI);
CDXLNode *outer_tree_dxlnode =
fSwapBuildSide ? right_tree_dxlnode : left_tree_dxlnode;
CDXLNode *inner_tree_dxlnode =
fSwapBuildSide ? left_tree_dxlnode : right_tree_dxlnode;

CDXLNode *project_list_dxlnode = (*hj_dxlnode)[EdxlhjIndexProjList];
CDXLNode *filter_dxlnode = (*hj_dxlnode)[EdxlhjIndexFilter];
CDXLNode *join_filter_dxlnode = (*hj_dxlnode)[EdxlhjIndexJoinFilter];
Expand All @@ -1570,7 +1588,7 @@ CTranslatorDXLToPlStmt::TranslateDXLHashJoin(
m_mp, false, output_context->GetColIdToParamIdMap());

Plan *left_plan =
TranslateDXLOperatorToPlan(left_tree_dxlnode, &left_dxl_translate_ctxt,
TranslateDXLOperatorToPlan(outer_tree_dxlnode, &left_dxl_translate_ctxt,
ctxt_translation_prev_siblings);

// the right side of the join is the one where the hash phase is done
Expand All @@ -1580,7 +1598,7 @@ CTranslatorDXLToPlStmt::TranslateDXLHashJoin(
translation_context_arr_with_siblings->AppendArray(
ctxt_translation_prev_siblings);
Plan *right_plan =
(Plan *) TranslateDXLHash(right_tree_dxlnode, &right_dxl_translate_ctxt,
(Plan *) TranslateDXLHash(inner_tree_dxlnode, &right_dxl_translate_ctxt,
translation_context_arr_with_siblings);

CDXLTranslationContextArray *child_contexts =
Expand Down Expand Up @@ -1740,8 +1758,26 @@ CTranslatorDXLToPlStmt::TranslateDXLHashJoin(
hashoperators = gpdb::LAppendOid(hashoperators, hclause->opno);
hashcollations = gpdb::LAppendOid(hashcollations, hclause->inputcollid);

outer_hashkeys = gpdb::LAppend(outer_hashkeys, linitial(hclause->args));
inner_hashkeys = gpdb::LAppend(inner_hashkeys, lsecond(hclause->args));
// Hash clauses are built as (DXL-left-key OP DXL-right-key). Normally
// DXL-left is the outer child, so linitial is the outer key. When the
// build side is swapped (JOIN_RIGHT_SEMI / JOIN_RIGHT_ANTI), the DXL
// left child became the executor's inner and the right child the
// outer, so the outer/inner hash keys must be taken from the opposite
// operands to stay consistent with the swapped lefttree/righttree.
if (fSwapBuildSide)
{
outer_hashkeys =
gpdb::LAppend(outer_hashkeys, lsecond(hclause->args));
inner_hashkeys =
gpdb::LAppend(inner_hashkeys, linitial(hclause->args));
}
else
{
outer_hashkeys =
gpdb::LAppend(outer_hashkeys, linitial(hclause->args));
inner_hashkeys =
gpdb::LAppend(inner_hashkeys, lsecond(hclause->args));
}
}

hashjoin->hashoperators = hashoperators;
Expand Down Expand Up @@ -6778,6 +6814,12 @@ CTranslatorDXLToPlStmt::GetGPDBJoinTypeFromDXLJoinType(EdxlJoinType join_type)
case EdxljtLeftAntiSemijoinNotIn:
jt = JOIN_LASJ_NOTIN;
break;
case EdxljtRightSemijoin:
jt = JOIN_RIGHT_SEMI;
break;
case EdxljtRightAntiSemijoin:
jt = JOIN_RIGHT_ANTI;
break;
default:
GPOS_ASSERT(!"Unrecognized join type");
}
Expand Down
8 changes: 8 additions & 0 deletions src/backend/gpopt/translate/CTranslatorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,14 @@ CTranslatorUtils::ConvertToDXLJoinType(JoinType jt)
join_type = EdxljtLeftAntiSemijoinNotIn;
break;

case JOIN_RIGHT_SEMI:
join_type = EdxljtRightSemijoin;
break;

case JOIN_RIGHT_ANTI:
join_type = EdxljtRightAntiSemijoin;
break;

default:
GPOS_ASSERT(!"Unrecognized join type");
}
Expand Down
15 changes: 15 additions & 0 deletions src/backend/gporca/libgpdbcost/include/gpdbcost/CCostModelGPDB.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,21 @@ class CCostModelGPDB : public ICostModel
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci);

// cost of right semi hash join (Mark Join): build = outer/LHS, probe =
// inner/RHS, finalize emits LHS rows with at least one match. Reuses the
// CostHashJoin formula with the build/probe input roles swapped.
static CCost CostRightSemiHashJoin(CMemoryPool *mp,
CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci);

// cost of right anti semi hash join (build = outer/LHS, emit unmatched
// build rows). Same skeleton as CostRightSemiHashJoin.
static CCost CostRightAntiSemiHashJoin(CMemoryPool *mp,
CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci);

// cost of merge join
static CCost CostMergeJoin(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
Expand Down
165 changes: 165 additions & 0 deletions src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1239,6 +1239,161 @@ CCostModelGPDB::CostMergeJoin(CMemoryPool *mp, CExpressionHandle &exprhdl,
}


// @function:
// CCostModelGPDB::CostRightSemiHashJoin
//
// @doc:
// Cost of right semi hash join (PG-style: build = outer/left, probe =
// inner/right, finalize emits left rows that have at least one match).
// Reuses CostHashJoin formula with child[0]/child[1] roles swapped, then
// adds a finalize-phase scan term. Returns ∞ when vec=off (Mark Join
// execution only supported in vec engine path -- see design M5).
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostRightSemiHashJoin(CMemoryPool *mp,
CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(COperator::EopPhysicalRightSemiHashJoin ==
exprhdl.Pop()->Eopid());

//
// RIGHT_SEMI cost: build = outer (PG LHS, child 0), probe = inner
// (PG RHS, child 1). This is the inverse of CostHashJoin which
// builds on child 1. We mirror the spill / no-spill formulas from
// CostHashJoin verbatim, just feeding the swapped rows / widths.
//
const DOUBLE dRowsBuild = pci->PdRows()[0]; // outer = build
const DOUBLE dWidthBuild = pci->GetWidth()[0];
const DOUBLE dRowsProbe = pci->PdRows()[1]; // inner = probe
const DOUBLE dWidthProbe = pci->GetWidth()[1];

const CDouble dHJHashTableInitCostFactor =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHJHashTableInitCostFactor)
->Get();
const CDouble dHJHashTableColumnCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHJHashTableColumnCostUnit)
->Get();
const CDouble dHJHashTableWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHJHashTableWidthCostUnit)
->Get();
const CDouble dJoinFeedingTupColumnCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupColumnCostUnit)
->Get();
const CDouble dJoinFeedingTupWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupWidthCostUnit)
->Get();
const CDouble dHJHashingTupWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHJHashingTupWidthCostUnit)
->Get();
const CDouble dJoinOutputTupCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinOutputTupCostUnit)
->Get();
const CDouble dHJSpillingMemThreshold =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHJSpillingMemThreshold)
->Get();
const CDouble dHJFeedingTupColumnSpillingCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(
CCostModelParamsGPDB::EcpHJFeedingTupColumnSpillingCostUnit)
->Get();
const CDouble dHJFeedingTupWidthSpillingCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(
CCostModelParamsGPDB::EcpHJFeedingTupWidthSpillingCostUnit)
->Get();
const CDouble dHJHashingTupWidthSpillingCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(
CCostModelParamsGPDB::EcpHJHashingTupWidthSpillingCostUnit)
->Get();

CExpression *pexprJoinCond = exprhdl.PexprScalarRepChild(2);
CColRefSet *pcrsUsed = pexprJoinCond->DeriveUsedColumns();
const ULONG ulColsUsed = pcrsUsed->Size();

CCost costLocal(0);

if (dRowsBuild * dWidthBuild <= dHJSpillingMemThreshold)
{
// build fits in memory
costLocal = CCost(
pci->NumRebinds() *
(dRowsBuild * (ulColsUsed * dHJHashTableColumnCostUnit +
dWidthBuild * dHJHashTableWidthCostUnit) +
ulColsUsed * dRowsProbe * dJoinFeedingTupColumnCostUnit +
dWidthProbe * dRowsProbe * dJoinFeedingTupWidthCostUnit +
dWidthBuild * dRowsBuild * dHJHashingTupWidthCostUnit +
pci->Rows() * pci->Width() * dJoinOutputTupCostUnit));
}
else
{
// build spills -- same base spill formula as CostHashJoin with the
// build/probe roles swapped. We deliberately do NOT add an extra
// asymmetric IO penalty here (unlike the Lightning vec variant): the
// base build-hashing terms already make a smaller build cheaper, so
// RIGHT_SEMI vs LEFT_SEMI is compared fairly on the same formula
// without globally perturbing CostHashJoin for all other joins.
costLocal = CCost(
pci->NumRebinds() *
(dHJHashTableInitCostFactor +
dRowsBuild * (ulColsUsed * dHJHashTableColumnCostUnit +
dWidthBuild * dHJHashTableWidthCostUnit) +
ulColsUsed * dRowsProbe * dHJFeedingTupColumnSpillingCostUnit +
dWidthProbe * dRowsProbe * dHJFeedingTupWidthSpillingCostUnit +
dWidthBuild * dRowsBuild * dHJHashingTupWidthSpillingCostUnit +
pci->Rows() * pci->Width() * dJoinOutputTupCostUnit));
}

CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
(void) mp;
return costChild + costLocal;
}


//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostRightAntiSemiHashJoin
//
// @doc:
// Cost of right anti semi hash join (build = outer/left, emit unvisited).
// Same skeleton as RightSemi; finalize accounts for build_rows minus
// matched_rows (estimated from pci->Rows() which is the relation-level
// semantic output i.e. matched count).
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostRightAntiSemiHashJoin(CMemoryPool *mp,
CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(COperator::EopPhysicalRightAntiSemiHashJoin ==
exprhdl.Pop()->Eopid());

// ANTI shares the build/probe shape of SEMI -- after probe marks
// matched build rows via visited bits, finalize emits the unvisited
// build rows. The finalize cost is proportional to build_rows and is
// dominated by the build hashing term already in CostRightSemiHashJoin,
// so reuse the same formula. (See design doc M3.3.)
return CostRightSemiHashJoin(mp, exprhdl, pcmgpdb, pci);
}

//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostIndexNLJoin
Expand Down Expand Up @@ -2720,6 +2875,16 @@ CCostModelGPDB::Cost(
return CostHashJoin(m_mp, exprhdl, this, pci);
}

case COperator::EopPhysicalRightSemiHashJoin:
{
return CostRightSemiHashJoin(m_mp, exprhdl, this, pci);
}

case COperator::EopPhysicalRightAntiSemiHashJoin:
{
return CostRightAntiSemiHashJoin(m_mp, exprhdl, this, pci);
}

case COperator::EopPhysicalInnerIndexNLJoin:
case COperator::EopPhysicalLeftOuterIndexNLJoin:
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ class COperator : public CRefCount, public DbgPrintMixin<COperator>
EopPhysicalLeftSemiHashJoin,
EopPhysicalLeftAntiSemiHashJoin,
EopPhysicalLeftAntiSemiHashJoinNotIn,
EopPhysicalRightSemiHashJoin,
EopPhysicalRightAntiSemiHashJoin,
EopPhysicalRightOuterHashJoin,
EopPhysicalFullHashJoin,

Expand Down
Loading