/******************************************************************************** * * The rule-based optimizer for ODMG 2.0 OQL. * It generates physical plans from algebraic forms using a rule-based rewriting engine * * Copyright (c) 1999-2003 by Leonidas Fegaras, the University of Texas at * Arlington. All rights reserved. * Programmer: Leonidas Fegaras * Date: 4/9/97 * *********************************************************************************/ #include #include "odl.h" #include "typecheck.h" Expr project_variable ( Expr e ); float selectivity ( Expr pred ); int table_cardinality ( Expr table ); int table_size ( Expr table ); list* range_vars ( Expr e ); list* pattern_variables ( Expr pattern ); Expr best_tree ( Expr e ); list* free_variables ( Expr e, list* except ); double ilog ( int i ) { return (i>0) ? log(i) : 0; }; /* all operator names */ list* algebraic_operators = Nil->cons(#)->cons(#)->cons(#)->cons(#)->cons(#)->cons(#)->cons(#)->cons(#)->cons(#); /* all_plans holds the names of all physical plans */ list* all_plans = Nil->cons(#)->cons(#)->cons(#)->cons(#)->cons(#) ->cons(#)->cons(#)->cons(#)->cons(#) ->cons(#)->cons(#)->cons(#)->cons(#); /* true if x is a plan */ bool is_plan ( Expr x ) { #case x | `f(...r) => return member(f,all_plans); | _ => return false; #end; }; list* set_union ( list* x, list* y ) { list* res = x; for(list* r = y; r->consp(); r=r->tl) if (!(member(r->hd,res))) res = res->cons(r->hd); return res; }; /* returns the attributes of the range variables in tables * that participate in the predicate pred */ list* get_order ( Expr pred, list* tables ) { #case pred | OID(`x) => if (member(x,tables)) return Nil->cons(pred); else return get_order(x,tables); | project(`x,`a,_) => if (member(project_variable(x),tables)) return Nil->cons(pred); else return Nil; | `f(...r) => { list* res = Nil; for(; r->consp(); r=r->tl) res = set_union(res,get_order(r->hd,tables)); return res; }; | _ => if (pred->variablep() && member(pred,tables)) return Nil->cons(pred); else return Nil; #end; }; /* true if there is an equality predicate that binds an attribute of a table * in left_tables with an attribute of a table in right_tables. * This will imply that the join between these tables will be an equijoin */ bool equijoinp ( Expr pred, list* left_tables, list* right_tables ) { #case pred | and(...r,eq(`a,`b),...s) : ((member(project_variable(a),left_tables) && member(project_variable(b),right_tables)) || (member(project_variable(a),right_tables) && member(project_variable(b),left_tables))) => return true; | _ => return false; #end; }; /* finds an equality predicate from pred that associates attributes of the tables * in left_tables with attributes of tables in right_tables (if there is one). * Then it returns the attributes of the tables in the left_table that are involved * in this predicate. These attributes determine the order in which the left input * of a merge join must be sorted */ Expr required_order ( Expr pred, list* left_tables, list* right_tables ) { #case pred | and(...r,eq(`a,`b),...s) : member(project_variable(a),left_tables) && member(project_variable(b),right_tables) => return #; | and(...r,eq(`a,`b),...s) : member(project_variable(a),right_tables) && member(project_variable(b),left_tables) => return #; | _ => return #; #end; }; Expr is_left_key ( Expr x, Expr y, Expr xorder, Expr yorder ) { #case xorder | order(...r,OID(_),...s) => return #; #end; #case yorder | order(...r,OID(_),...s) => return #; #end; /* need also to test if either xorder or yorder is a key for x or y */ return #; }; /* true if attribute is a key of the class table */ bool is_key ( Expr table, Expr attribute ) { if (table->variablep() && classp(table->name())) return member(attribute,class_keys(get_declaration_binding(table->name()))); else return false; }; bool is_range_var ( Expr v, Expr x ) { #case x | `f[`h(_,_,`n,...r)] : v->eq(n) && (h->eq(#) || h->eq(#)) => return true; | _ => return false; #end; }; /* true if the expected order is at least as strong as the expected order. * expected_order = computed_order plus something */ bool subsumes ( Expr computed_order, Expr expected_order ) { #case computed_order | `f(...co) => #case expected_order | `f(...eo) => { list* s = eo; for(list* r = co; r->consp(); r=r->tl, s=s->tl) if (s->nullp()) return false; else if (!r->hd->eq(s->hd) && !r->hd->eq(#hd))>) && !s->hd->eq(#hd))>)) return false; return true; }; | _ => return false; #end; | _ => return false; #end; }; Expr index_order ( Expr class_name, Expr range_variable, list* index_attributes ) { list* ps = Nil; Schema sch = new binding(range_variable->name(),class_name); for(list* r = index_attributes->reverse(); r->consp(); r=r->tl) { Expr e = #hd))>; Expr tp = type_of(e,sch); ps = ps->cons(#hd),`tp)>); }; return #; }; Expr find_index ( Expr table, Expr range_variable, Expr order ) { Expr cname = extent_type(table->name()); #case get_declaration_binding(new string("_class_indexes"),cname->name()); | index(...s) => for(; s->consp(); s=s->tl) #case get_declaration_binding(s->hd->name()) | index(`index_name,`class_name,...attrs) : subsumes(order,index_order(cname,range_variable,attrs)) => return #; #end; #end; return #; }; /* the cost of accessing an index to satisfy a required order io and a predicate pred */ float index_access_cost ( Expr table, Expr name, Expr pred, list* io ) { float size = table_size(table)+10; if (io->length() > 1) { size = 3.0; for(list* s = io; s->consp(); s=s->tl) #case pred | and(...m,eq(project(`x,`a,`c),`v),...n) : x->eq(name) && s->hd->eq(#) && !member(name,free_variables(v,Nil)) => size = size*0.7; | and(...m,eq(`v,project(`x,`a,`c)),...n) : x->eq(name) && s->hd->eq(#) && !member(name,free_variables(v,Nil)) => size = size*0.7; | _ => return table_size(table)+10; #end; return size; }; for(list* r = pred->arguments(); r->consp(); r=r->tl) #case r->hd | eq(project(`x,`a,`c),`v) : x->eq(name) && member(#,io) => size = 3.0; | eq(`v,project(`x,`a,`c)) : x->eq(name) && member(#,io) => size = 3.0; | `f(project(`x,`a,`c),`v) : x->eq(name) && member(#,io) && (f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#)) => size = size/2.0; | `f(`v,project(`x,`a,`c)) : x->eq(name) && member(#,io) && (f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#)) => size = size/2.0; | _ => size = size*1.5; #end; return size; }; /* returns all indexes attached to table ranged by the range variable name */ list* all_table_indexes ( Expr table, Expr name ) { list* res = Nil; Expr cname = extent_type(table->name()); #case get_declaration_binding(new string("_class_indexes"),cname->name()) | index(...s) => for(; s->consp(); s=s->tl) #case get_declaration_binding(s->hd->name()) | index(`index_name,`class_name,...attrs) => res = res->cons(#); #end; #end; return res; }; pair* find_index_preds ( Expr pred, list* attrbs ) { Expr bottom = #; Expr top = #; if (attrbs->length() > 1) { for(list* s = attrbs; s->consp(); s=s->tl) #case pred | and(...m,eq(project(`w,`a,`tp),`value),...n) : s->hd->eq(#) && !member(s->hd->arguments()->hd,free_variables(value,Nil)) => top = (top->eq(#)) ? value : #; | and(...m,eq(`value,project(`w,`a,`tp)),...n) : s->hd->eq(#) && !member(s->hd->arguments()->hd,free_variables(value,Nil)) => top = (top->eq(#)) ? value : #; | _ => return Pair(bottom,top); #end; return Pair(top,top); }; for(list* r = pred->arguments(); r->consp(); r=r->tl) #case r->hd | eq(project(`w,`a,`tp),`value) : member(#,attrbs) && !member(w,free_variables(value,Nil)) => top = bottom = value; | eq(`value,project(`w,`a,`tp)) : member(#,attrbs) && !member(w,free_variables(value,Nil)) => top = bottom = value; | `f(project(`w,`a,`tp),`value) : member(#,attrbs) && (f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#)) && !member(w,free_variables(value,Nil)) => if (f->eq(#) || f->eq(#)) bottom = value; else top = value; | `f(`value,project(`w,`a,`tp)) : member(#,attrbs) && (f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#)) && !member(w,free_variables(value,Nil)) => if (f->eq(#) || f->eq(#)) top = value; else bottom = value; #end; return Pair(bottom,top); }; Expr make_index_scan ( Expr index_name, list* attrbs, Expr m, Expr table, Expr v, Expr pred ) { pair* bt = find_index_preds(pred,attrbs); Expr bottom = bt->first; Expr top = bt->second; return #; }; Expr applicable_index ( Expr index, Expr table, Expr v, Expr pred ) { Expr cname = extent_type(table->name()); #case get_declaration_binding(index->name(),cname->name()) | index(`index_name,`class_name,`attr) => #case pred | and(...r,eq(project(`w,`a,`tp),`path),...s) : w->eq(v) && a->eq(attr) => return #; | and(...r,eq(`path,project(`w,`a,`tp)),...s) : w->eq(v) && a->eq(attr) => return #; #end; | _ => cout << "******error\n"; #end; return #; }; list* nest_groups ( Expr e ) { #case e | `f(_,_,`v,...r) : f->eq(#) || f->eq(#) || f->eq(#) => return Cons(v,Nil); | `f(_,`x,`y,...r) : f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#) => return nest_groups(x)->append(nest_groups(y)); | `f(_,`x,`v,...r) : f->eq(#) || f->eq(#) || f->eq(#) => return nest_groups(x)->append(Cons(v,Nil)); | `f(_,`x,`v,_,`gv,...r) : f->eq(#) || f->eq(#) => return pattern_variables(gv)->append(Cons(#,Nil)); | `f(`x,...r) : f->eq(#) || f->eq(#) => return Cons(#,Nil); | _ => return Nil; #end; }; Expr nest_order ( Expr gvars, Expr e ) { list* pvars = pattern_variables(gvars); list* r = pvars; for(list* s = nest_groups(e); r->consp() && s->consp(); s=s->tl) if (r->hd->eq(s->hd)) r = r->tl; if (r->consp()) return #; else return #; }; /*defined below */ Expr best_evaluation_order ( Expr e ); %{ /* set this to true if you want to get a tracing of the rule system; lots of output */ #define trace_rules false /* set this to true if you want to print the results of every optimization module */ #define trace_stages false %trace { 0 } /* inherited attributes */ %inherited order, /* the expected order (for the physical stage) */ groups /* expected groups to be formed before nest (group-by) */ /* synthesized atttributes: */ %synthesized { int size; /* the collection cardinality */ double cost; /* the plan cost */ Expr order; /* the computed (real) order */ Expr groups; /* groups formed after nest (group-by) */ } = { 0, 1e100, #, # } %logical none = none; %physical /* if no expected order, use TABLE_SCAN */ { order=`ord; } get(`M,`table,`name,`pred) : subsumes(ord,#) = TABLE_SCAN(`M,`table,`name,`pred) : { size = int(table_cardinality(table)*selectivity(pred)); cost = table_size(table); order = #; }; /* an INDEX_SCAN that delivers the expected order */ { order=order(`o,...os); } get(`M,`table,`name,`pred) : table->variablep() = #case find_index(table,name,#) | index(`idx,order(...io)) => `(make_index_scan(idx,io,M,table,name,pred)) : { size = int(table_cardinality(table)*selectivity(pred)); cost = index_access_cost(table,name,pred,io); order = #; }; #end; /* if the first sort attribute is a table key, ignore the rest of the attributes and use index scan */ { order=order(project(`t,`a,...s),`b,...os); } get(`M,`table,`name,`pred) : table->variablep() && is_key(extent_type(table->name()),a) = #case find_index(table,name,#) | index(`idx,order(...io)) => `(make_index_scan(idx,io,M,table,name,pred)) : { size = int(table_cardinality(table)*selectivity(pred)); cost = index_access_cost(table,name,pred,io); order = #; }; #end; /* if expected order is empty, then use any index; so here we return every possible index */ { order=order(); } get(`M,`table,`name,`pred) : table->variablep() = #forall r in all_table_indexes(table,name) do #case r | index(`idx,order(...io)) => `(make_index_scan(idx,io,M,table,name,pred)) : { size = int(table_cardinality(table)*selectivity(pred)); cost = index_access_cost(table,name,pred,io); order = #; }; #end; #end; /* if a non-empty order is expected for plan x, then SORT x and make */ /* the expected order for x empty so that this rule is not applied twice */ { order=order(`o,...os); } (`x<-{ order=order(); }) : is_plan(x) && !subsumes(#,^x.order) && free_variables(#,range_vars(x))->nullp() = let Expr rwp = PHYSICAL_REWRITE(#,inherit(#,ATRBS.groups),LEVEL+1)->hd->expression in SORT(`x,`rwp) : { size = ^x.size; cost = ^x.cost+(^x.size*ilog(^x.size)); order = #; }; { order=order(`o,...os); } (`x<-{ order=order(); }) : is_plan(x) && subsumes(#,^x.order) = `x : { size = ^x.size; cost = ^x.cost; order = ^x.order; }; { order=order(); } sort(`x<-{ order=`xord; },`xord) = `x : { size = ^x.size; cost = ^x.cost; order = ^x.order; }; { order=order(`o,...os); } sort(`x<-{ order=`xord; },`xord) = `x : { size = ^x.size; cost = ^x.cost; order = ^x.order; }; /* convert a join into a nested loop; the expected order propagates to the left input only */ { order=`ord; } join( `M, `x<-{ order=`ord; }, `y<-{ order=order(); }, `pred, `keep ) : free_variables(ord,range_vars(x))->nullp() = NESTED_LOOP(`M,`x,`y,`pred,`keep) : { size = int(^x.size*^y.size*selectivity(pred)); cost = ^x.cost+^y.cost+(^x.size*^y.size); order = ^x.order; }; { order=order(); } join( `M, `x<-{ order=order(); }, `y<-{ order=order(); }, `pred, `keep ) = BLOCK_NESTED_LOOP(`M,`x,`y,`pred,`keep) : { size = int(^x.size*^y.size*selectivity(pred)); cost = ^x.cost+^y.cost+(^x.size*^y.size)/1000; order = #; }; /* convert a join into an indexed nested loop; the expected order propagates to the left input only */ { order=`ord; } join( `M, `x<-{ order=`ord; }, `y<-{ order=order(); }, `pred, `keep ) = #case y | INDEX_SCAN(`m,`Y,`v,`py,`index,`low,`high) : !applicable_index(index,Y,v,pred)->eq(#) => INDEXED_LOOP(`M,`x,`y,`pred,`keep,`index,`(applicable_index(index,Y,v,pred))) : { size = int(^x.size*^y.size*selectivity(pred)); cost = ^x.cost+4.0*^x.size; order = ^x.order; }; #end; /* convert a pointer join into pointer access */ { order=`ord; } join( `M, `x<-{ order=`ord; }, get( _, `table, `var, and(...q) ), and(...r,eq(`path,OID(`v)),...s), `keep ) : v->eq(var) = MAP(`M,`x,`var,and(...q,...r,...s),`keep,`path) : { size = int(^x.size*selectivity(#)); cost = ^x.cost+4.0*^x.size; order = ^x.order; }; /* convert a join into a merge join; both inputs are required to be sorted */ /* and at least one sort key to be a key (checked by is_left_key) */ { order=`ord; } join( `M, `x<-{ order=`(required_order(pred,range_vars(x),range_vars(y))); }, `y<-{ order=`(required_order(pred,range_vars(y),range_vars(x))); }, `pred, `keep ) : keep->eq(#) /* currently I can't do an outer merge join */ && equijoinp(pred,range_vars(x),range_vars(y)) && subsumes(ord,^x.order) = let Expr xorder = required_order(pred,range_vars(x),range_vars(y)), Expr yorder = required_order(pred,range_vars(y),range_vars(x)) in #case is_left_key(x,y,xorder,yorder) | true => MERGE_JOIN(`M,`x,`y,`pred,true,`xorder,`yorder) : { size = int(^x.size*^y.size*selectivity(pred)); cost = ^x.cost+^y.cost+(^x.size+^y.size); order = ^x.order; }; | false => MERGE_JOIN(`M,`x,`y,`pred,false,`xorder,`yorder) : { size = int(^x.size*^y.size*selectivity(pred)); cost = ^x.cost+^y.cost+(^x.size+^y.size); order = ^x.order; }; #end; { order=`ord; } nest( `M, `e<-{ order=`(nest_order(gv,e)); }, `v, `hd, `gv, `bpred, `apred ) : subsumes(ord,nest_order(gv,e)) = NEST(`M,`e,`v,`hd,`gv,`bpred,`apred) : { size = ^e.size; cost = ^e.cost+^e.size; order = nest_order(gv,e); }; { order=order(); } reduce(`M,`e,`v,`head,`pred) = REDUCE(`M,`e,`v,`head,`pred) : { size = (collectionp(M)) ? ^e.size : 1; cost = ^e.cost+^e.size; order = ^e.order; }; { order=`ord; } unnest( `M, `e<-{ order=`ord; }, `v, `path, `pred, `keep ) : subsumes(ord,^e.order) && !member(v,free_variables(ord,Nil)) = UNNEST( `M, `e, `v, `path, `pred, `keep ) : { size = ^e.size*10; cost = ^e.cost+^e.size*10; order = #arguments()),`v)>; }; { order=order(...os,`o); } unnest( `M, `e<-{ order=order(...os); }, `v, `path, `pred, `keep ) : o->eq(v) && subsumes(#,^e.order) = UNNEST( `M, `e, `v, `path, `pred, `keep ) : { size = ^e.size*10; cost = ^e.cost+^e.size*10; order = #; }; { order=order(); } merge(`M,`x,`y) = MERGE(`M,`x,`y) : { size = ^x.size+^y.size; cost = ^x.cost+^y.cost+^x.size+^y.size; order = #; }; { order=`ord; } unit(`M,`x) = UNIT(`M,`x) : { size = 1; cost = 0; order = ord; }; { order=`ord; } zero(`M,`tp) = ZERO(`M,`tp) : { size = 0; cost = 0; order = ord; }; %} /*--------------------------------------------------------------------------------*/ bool better_plan ( plan* x, plan* y ) { return !x->expression->eq(#) && x->attributes.cost < y->attributes.cost; }; /* returns the 10 best plans from a list of plans */ list* ten_best_plans ( list* plans ) { list* res = new list; short i = 0; for(list* r = plans->sort(better_plan); i<10 && r->consp(); r=r->tl, i++) res = res->cons(r->hd); return res->reverse(); }; /* optimize an OQL query */ Expr best_evaluation_order ( Expr e ) { optimizer_init(); /* initial inherited attributes */ inherited init; init.order = #; init.groups = #; plan* best_plan = new plan(#,default_synthesized); best_plans = ten_best_plans; Expr be = best_tree(e); list* plans = PHYSICAL_REWRITE(be,init,0); for(list* r = plans; r->consp(); r = r->tl) if (r->hd->attributes.cost <= best_plan->attributes.cost) best_plan = r->hd; if (best_plan->expression->eq(#)) return plans->hd->expression; else return best_plan->expression; };