/******************************************************************************** * * The cost-based optimizer for ODMG 2.0 OQL. * It reorders operations using a greedy bottom-up heuristic. * The agorithm (called GOO) is described in: L. Fegaras, "A New Heuristic for * Optimizing Large Queries", DEXA'98 (http://lambda.uta.edu/order.ps.gz) * * Copyright (c) 1999-2003 by Leonidas Fegaras, the University of Texas at * Arlington. All rights reserved. * Programmer: Leonidas Fegaras * Date: 4/9/97 * *********************************************************************************/ #include #include "odl.h" #include "typecheck.h" /* set this to true to trace the operator ordering algorithm (long output) */ const bool TRACE_ORDERING = false; /* max # of tables in a query */ const int range_vars_maxnum = 100; Expr project_variable ( Expr e ); /* return the variables in e that are not bound by some lambda binding or iterator */ list* free_variables ( Expr e, list* except ); /* return the variables of a pattern */ list* pattern_variables ( Expr pattern ); static Expr all_range_variables[range_vars_maxnum]; static list* nodes[range_vars_maxnum]; static Expr predicate[range_vars_maxnum][range_vars_maxnum]; static double S[range_vars_maxnum][range_vars_maxnum]; static Expr tree[range_vars_maxnum]; static list* depends_on[range_vars_maxnum]; static double size[range_vars_maxnum]; static Expr nesting_vars[range_vars_maxnum]; /* used for printing tracing info during GOO */ void prs ( int n ) { for(int i = 0; iarguments()->consp()) cout << "[" << i << "," << j << "] " << S[i][j] << "\t" << predicate[i][j] << endl; }; bool member ( int i, list* s ) { for(; s->consp(); s=s->tl) if (i==s->hd) return true; return false; }; bool intersect ( list* x, list* y ) { for(; x->consp(); x=x->tl) if (member(x->hd,y)) return true; return false; }; bool subset ( list* x, list* y ) { for(; x->consp(); x=x->tl) if (!member(x->hd,y)) return false; return true; }; Expr combine_predicates ( Expr x, Expr y ) { #case x | and(...r) => #case y | and(...s) => return #; | _ => return #; #end; | _ => #case y | and(...s) => return #; | _ => return #; #end; #end; }; list* unary_ops = Nil->cons(#)->cons(#)->cons(#); Expr kept_variables ( Expr e ) { #case e | get(_,_,`v,_) => for(short i = 0; ieq(v)) return nesting_vars[i]; | nest(_,_,_,_,`vars,...r) => return vars; | reduce(...r) => return #; | unnest(_,_,_,_,_,`keep) => return keep; | _ => return #; | `f(_,`x,`y,...r) => { Expr kx = kept_variables(x); Expr ky = kept_variables(y); if (kx->eq(ky)) return #; else if (pattern_variables(kx)->length() > pattern_variables(ky)->length()) return kx; else return ky; }; #end; }; pair* best_evaluation_algorithm ( int i, int j, Expr pred ) { list* preds = pred->arguments(); Expr x = tree[i]; Expr y = tree[j]; #case x | nest(`m,sort(_,`o),`v,`hd,`vars,and(...r),`apred) => return Pair( size[i]*size[j]*S[i][j], # ); | nest(`m,_,`v,`hd,`vars,and(...r),`apred) => return Pair( size[i]*size[j]*S[i][j], # ); | reduce(`m,sort(_,`o),`v,`hd,and(...r)) => return Pair( size[i]*size[j]*S[i][j], # ); | reduce(`m,_,`v,`hd,and(...r)) => return Pair( size[i]*size[j]*S[i][j], # ); | unnest(`m,input,`v,`path,and(...r),`outer) => return Pair( size[i]*size[j]*S[i][j], # ); | `f(`m,...r) => { Expr kx = kept_variables(x); Expr ky = kept_variables(y); if (kx->eq(ky)) return Pair( size[i]*size[j]*S[i][j], # ); else if (pattern_variables(kx)->length() > pattern_variables(ky)->length()) return Pair( size[i]*size[j]*S[i][j], # ); else return Pair( size[i]*size[j]*S[i][j], # ); }; #end; }; pair* make_plan ( int i, int j ) { Expr pred = predicate[i][j]; if (!pred->info.Function.Name->eq(#)) pred = #; return best_evaluation_algorithm(i,j,pred); }; Expr greedy ( int N ) { for(int n = N; n>1; n--) { if (TRACE_ORDERING) prs(n); double minc = 1e100; pair* c; Expr plan; bool found = false; int i, j; for(int ii = 0; iinullp() && (c = make_plan(ii,jj), c->first < minc) ) { found = true; minc = c->first; plan = c->second; i = ii; j = jj; }; }; if (!found) odmg_error("cyclic dependencies in query graph",plan); if (TRACE_ORDERING) cout << "*** Merge " << i << " with " << j << " into " << i << " and move " << n-1 << " to " << j << endl; size[i] = size[i]*size[j]*S[i][j]; tree[i] = plan; nodes[i] = nodes[i]->append(nodes[j]); depends_on[i] = depends_on[j]; for(int k = 0; k return 1; | sort(`x,...r) => return number_of_variables(x); | join(_,`x,`y,...r) => return number_of_variables(x)+number_of_variables(y); | `op(_,`x,...r) => return number_of_variables(x)+1; | _ => return 0; #end; }; int find_variable ( Expr v, int k ) { for(int i=0; ieq(w) => return i; #end; return -1; odmg_error("No such variable",v); }; void build_graph_dependencies ( int k, Expr e ) { for(list* r = free_variables(e,Nil); r->consp(); r=r->tl) { int i = find_variable(r->hd,k); if (i>=0 && !member(i,depends_on[k])) depends_on[k] = depends_on[k]->cons(i); }; }; /* Retrieve statistics from the meta-database */ int table_cardinality ( Expr table ) { if (!table->variablep()) return 10; Ref d = get_data_entry(table->name()); if (d.valid()) return d->cardinality; else return 10; }; int table_size ( Expr table ) { if (!table->variablep()) return 10; Ref d = get_data_entry(table->name()); if (d.valid()) return d->size; else return 10; }; /* selectivity estimation; needs lots of work */ float selectivity ( Expr pred ) { float sel = 1.0; for(list* r = pred->arguments(); r->consp(); r=r->tl) #case r->hd | eq(project(`x,`a,_),`v) => sel = sel/10.0; | eq(`v,project(`x,`a,_)) => sel = sel/10.0; | _ => sel = sel/5.0; #end; return sel; }; void set_predicate_info ( Expr var1, Expr var2, Expr pred, int k ) { int i = find_variable(var1,k); int j = find_variable(var2,k); #case predicate[i][j] | and() => { predicate[i][j] = predicate[j][i] = pred; S[i][j] = S[j][i] = selectivity(pred); }; | `p => { predicate[i][j] = predicate[j][i] = #; S[i][j] = S[j][i] = S[i][j]*selectivity(pred); }; #end; }; void build_graph_edges ( Expr pred, int k ) { #case pred | and(...r) => for(; r->consp(); r=r->tl) build_graph_edges(r->hd,k); | not(eq(`x,`y)) => build_graph_edges(#,k); | not(neq(`x,`y)) => build_graph_edges(#,k); | not(`x) => build_graph_edges(x,k); | or(...r) => { list* vars = free_variables(pred,Nil); if (vars->length() == 2) set_predicate_info(vars->hd,vars->tl->hd,pred,k); else odmg_error("Sorry, I can't handle this predicate yet",pred); }; | `op(`x,`y) : !project_variable(x)->eq(#) && !project_variable(y)->eq(#) => set_predicate_info(project_variable(x),project_variable(y),pred,k); | _ => odmg_error("Can't handle this predicate in the join",pred); #end; }; list* range_variables ( Expr e, list* except ) { #case e | `f(_,_,`v,...r) : f->eq(#) || f->eq(#) || f->eq(#) => if (member(v,except)) return Nil; else return Cons(v,Nil); | `f(_,`x,`y,...r) : f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#) => return range_variables(x,except)->append(range_variables(y,except)); | `f(_,`x,`v,...r) : f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#) => return Cons(v,range_variables(x,except)); | `f(`x,...r) : f->eq(#) || f->eq(#) => return range_variables(x,except); | _ => return Nil; #end; }; list* range_vars ( Expr e ) { list* res = range_variables(e,Nil); return res; }; typedef pair*>* itype; itype build_graph_nodes ( Expr e, itype k, Expr nesting ) { #case e | get(_,`table,`v,`pred) => { int m = k->first; tree[m] = e; all_range_variables[m] = v; size[m] = table_cardinality(table)*selectivity(pred); nesting_vars[m] = nesting; depends_on[m] = k->second; if (!nesting->eq(#) && m > 0) build_graph_dependencies(m,nesting); return Pair(m+1,k->second); }; | join(_,`x,`y,`pred,`keep) => { itype m = build_graph_nodes(x,k,#); itype n = build_graph_nodes(y,m,keep); build_graph_edges(pred,n->first); return n; }; | nest(`M,`x,`v,`hd,`vars,`bpred,`apred) => { itype n = build_graph_nodes(x,k,vars); int m = n->first; tree[m] = #; all_range_variables[m] = v; size[m] = size[m-1]/10*selectivity(bpred)*selectivity(apred); depends_on[m] = n->second; depends_on[m+1] = new list; build_graph_dependencies(m,hd); list* s = range_variables(x,Nil); build_graph_dependencies(m,#); build_graph_dependencies(m+1,bpred); build_graph_dependencies(m+1,apred); return Pair(m+1,n->second->cons(m)); }; | reduce(`M,`x,`v,`hd,`pred) => { itype n = build_graph_nodes(x,k,#); int m = n->first; tree[m] = #; all_range_variables[m] = v; size[m] = 1.0; list* s = range_variables(x,Nil); depends_on[m] = n->second; build_graph_dependencies(m,#); build_graph_dependencies(m,hd); build_graph_dependencies(m,pred); return Pair(m+1,n->second); }; | unnest(`M,`x,`v,`path,`pred,`keep) => { itype n = build_graph_nodes(x,k,keep); int m = n->first; tree[m] = #; all_range_variables[m] = v; size[m] = 10*size[m-1]*selectivity(pred); depends_on[m] = n->second; depends_on[m+1] = new list; build_graph_dependencies(m,path); build_graph_dependencies(m,pred); return Pair(m+1,n->second); }; | sort(`x,...r) => return build_graph_nodes(x,k,nesting); | _ => return k; #end; }; /* it finds the best operation tree of an algebraic form; * it assumes that all bulk algebraic forms has an outermost reduce operation */ Expr best_tree ( Expr e ) { #case e | reduce(...r) => { int N = number_of_variables(e); for(int i = 0; i)->cons(i); for(int j = 0; j; }; }; if (build_graph_nodes(e,Pair(0,new list),#)->first == 0) return e; Expr res = greedy(N); if (TRACE_ORDERING) { res->pretty_print(0,cout); cout << "\n**********************\n"; }; return res; }; | `f(...r) => { list* s = Nil; for(; r->consp(); r=r->tl) s = s->cons(best_tree(r->hd)); s = s->reverse(); return #<`f(...s)>; }; | _ => return e; #end; };