/******************************************************************************** * * The optimizer for ODMG 2.0 OQL. * The calculus and optimizer are described in the paper: * L. Fegaras and D. Maier, "Optimizing Object Queries Using an Effective Calculus" * (available at: http://lambda.uta.edu/tods00.ps.gz) * * Copyright (c) 1999-2003 by Leonidas Fegaras, the University of Texas at * Arlington. All rights reserved. * Programmer: Leonidas Fegaras * Date: 4/9/97 * *********************************************************************************/ #include #include "odl.h" #include "typecheck.h" /* set it to false if you don't want query unnesting */ bool query_unnesting = true; /* set it to false if you don't want pointer joins (ie. no materialization of path expressions) */ bool pointer_joins = false; /* set it to false if you don't want convertion of unnests to pointer joins) */ bool unnests_to_joins = false; /* set them to true for extensive tracing of normalization and unnesting */ const bool TRACE_NORMALIZATION = false; const bool TRACE_UNNEST = false; /* all global assignments */ static list* global_variables = Nil; /* does e generate a collection? -- defined in code_generator.gen */ bool bulk_operation ( Expr e ); /* Is attribute a key in table? -- defined in rule_based_opt.gen */ bool is_key ( Expr table, Expr attribute ); /* used in binding variables to values during normalization */ typedef binding* Env; Env empty_environment = new binding; double min ( double x, double y ) { if (x* x, list* y ) { for(list* r = x; r->consp(); r=r->tl) if (!member(r->hd,y)) return false; return true; }; /* check whether x and y are equal under variable renaming */ bool alpha_equivalent ( Expr x, Expr y, binding* env ) { if (x->eq(y)) return true; #case # | pair(lambda(`vx,`bx),lambda(`vy,`by)) => return alpha_equivalent(bx,by,env->extend(vx->name(),vy)); | pair(compr(`m,`hd1,...r),compr(`n,`hd2,...s)) => { for(; r->consp() && s->consp(); r=r->tl, s=s->tl) #case r->hd | iterate(`v,`e) => #case s->hd | iterate(`w,`u) => if (alpha_equivalent(e,u,env)) env = env->extend(v->name(),w); else return false; | _ => return false; #end; | _ => if (!alpha_equivalent(r->hd,s->hd,env)) return false; #end; // return (alpha_equivalent(m,n,env) && alpha_equivalent(hd1,hd2,env)); return alpha_equivalent(hd1,hd2,env); }; | pair(get(_,`x,`v,...r),get(_,`y,`w,...s)) => alpha_equivalent(x,y,env) && alpha_equivalent(#,#,env->extend(v->name(),w)); | pair(unnest(_,`x,`v,...r),unnest(_,`y,`w,...s)) => alpha_equivalent(x,y,env) && alpha_equivalent(#,#,env->extend(v->name(),w)); | pair(nest(_,`x,`v,...r),nest(_,`y,`w,...s)) => alpha_equivalent(x,y,env) && alpha_equivalent(#,#,env->extend(v->name(),w)); | pair(`f(...r),`g(...s)) => { if (!alpha_equivalent(f,g,env)) return false; for(; r->consp() && s->consp(); r=r->tl, s=s->tl) if (!alpha_equivalent(r->hd,s->hd,env)) return false; return r->nullp() && s->nullp(); }; | _ : x->variablep() => if (env->in(x->name())) return alpha_equivalent(env->find(x->name()),y,env); else return x->eq(y); | _ => if (x->functionp() || y->functionp()) return false; else return x->eq(y); #end; }; /* is the collection monoid x at least as strong as the monoid y? */ bool no_weaker_monoid ( Expr x, Expr y) { #case x | list => return true; | bag => return !y->eq(#); | set => return !y->eq(#) && !y->eq(#) && !y->eq(#); | _ => return true; #end; }; /* ... defined later */ Expr normalize ( Expr e, Env env ); /* collect and insert the predicates of a comprehension at its righttmost end */ Expr collect_predicates ( Expr e ) { #case e | compr(`m,`hd,...r) => { list* nr = Nil; list* pr = Nil; m = collect_predicates(m); for(list* s = r; s->consp(); s=s->tl) { Expr u = collect_predicates(s->hd); #case u | iterate(_,_) => nr = nr->cons(u); | bind(_,_) => nr = nr->cons(u); | and(...l) => pr = pr->append(l); | _ => pr = pr->cons(u); #end; }; nr = nr->reverse(); #case m | some => #case collect_predicates(hd) | and(...s) => return #; | false => return #; | `p => return #; #end; | all => #case normalize(#,empty_environment) | and(...s) => return #; | false => return #; | `p => return #; #end; | _ => return #; #end; }; | `f(...r) => { list* nr = Nil; for(list* s = r; s->consp(); s=s->tl) nr = nr->cons(collect_predicates(s->hd)); nr = nr->reverse(); return #<`f(...nr)>; }; | _ => return e; #end; }; /* return the variables of a pattern */ list* pattern_variables ( Expr pattern ) { #case pattern | none => return Nil; | pair(`a,`b) => return pattern_variables(a)->append(pattern_variables(b)); | `v => return Cons(v,Nil); #end; }; /* return the variables in e that are not bound by some lambda binding or iterator */ list* free_variables ( Expr e, list* except ) { #case e | project(`x,`a,...r) => return free_variables(x,except); | method(`x,`a,...r) => return free_variables(x,except)->append(free_variables(#,except)); | bind(`a,`x) => return free_variables(x,except); | case(`x,...r) => return free_variables(x,except); | lambda(`v,`e) => return free_variables(e,except->append(pattern_variables(v))); | assign(...r,`e) => { list* res = Nil; for(; r->consp(); r=r->tl) #case r->hd | bind(`v,`u,...s) => { res = res->append(free_variables(u,except)); except = Cons(v,except); }; #end; return res->append(free_variables(e,except)); }; | compr(`m,`hd,...r) => { list* l = except; list* s = Nil; for(; r->consp(); r=r->tl) #case r->hd | iterate(`v,`u) => { l = l->cons(v); s = s->append(free_variables(u,l)); }; | bind(`v,`u) => { l = l->cons(v); s = s->append(free_variables(u,l)); }; | `u => s = s->append(free_variables(u,l)); #end; return s->append(free_variables(hd,l)); }; | `f(_,`x,`v,...r) : f->eq(#) || f->eq(#) || f->eq(#) => return free_variables(#,Cons(v,except)); | `f(_,`x,`v,...r) : f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#) || f->eq(#) => return free_variables(#,Cons(v,except)); | `f(...r) => { list* s = Nil; for(; r->consp(); r=r->tl) s = s->append(free_variables(r->hd,except)); return s; }; | `v => if (v->variablep() && (v->name()->content()[0] == '_') && !member(v,except) && !member(v,global_variables) && !extentp(v) && find_scoped_variable(v->name())->eq(#)) return Cons(v,Nil); #end; return Nil; }; /* returns true if pred refers to a variable in pattern */ bool refers_to ( Expr pred, Expr pattern ) { list* fv = free_variables(pred,Nil); for(list* r = pattern_variables(pattern); r->consp(); r=r->tl) if (member(r->hd,fv)) return true; return false; }; /* bind pattern variables to values; eg. pattern_binding of pair(pair(a,b),c) * to pair(pair(1,2),3) will bind a to 1, b to 2, and c to 3 */ binding* pattern_bindings ( Expr pattern, Expr value ) { #case pattern | pair(`x,`y) => #case value | pair(`a,`b) => return pattern_bindings(x,a) ->extend(pattern_bindings(y,b)); | _ => return pattern_bindings(x,#) ->extend(pattern_bindings(y,#)); #end; | _ => return (new binding)->extend(pattern->name(),value); #end; }; /* given a pattern, return a new pattern of the same shape but with fresh variables */ Expr new_patterns ( Expr pattern ) { #case pattern | pair(`x,`y) => return #; | _ => return new_variable(); #end; }; /* ... defined below ... */ list* rename_vars_list ( list* terms, binding* binds ); /* alpha coersion (renaming variables to avoid name conflicts) */ Expr rename_vars ( Expr term, binding* binds ) { #case term | project(`e,`v,...r) => return #; | bind(`v,`e) => return #; | case(`e,...r) => return #; | lambda(`v,`e) => { Expr nv = new_patterns(v); return #extend(binds))))>; }; | compr(`m,`hd,...r) => { list* q = Nil; binding* nbinds = binds; for(list* s = r; s->consp(); s=s->tl) #case s->hd | iterate(`v,`e) => { Expr nv = new_patterns(v); q = q->cons(#); nbinds = pattern_bindings(v,nv)->extend(nbinds); }; | bind(`v,`e) => { Expr nv = new_patterns(v); q = q->cons(#); nbinds = pattern_bindings(v,nv)->extend(nbinds); }; | _ => q = q->cons(rename_vars(s->hd,nbinds)); #end; Expr nm = rename_vars(m,nbinds); Expr nhd = rename_vars(hd,nbinds); q = q->reverse(); return #; }; | `f(...r) => return function(rename_vars(f,binds),rename_vars_list(r,binds)); | _ => if (term->variablep()) return binds->find(term->name(),term); else return term; #end; }; list* rename_vars_list ( list* terms, binding* binds ) { list* res = Nil; for(list* r = terms; r->consp(); r=r->tl) res = res->cons(rename_vars(r->hd,binds)); return res->reverse(); }; /* merge environments */ Env extend ( Env e1, Env e2 ) { return e2->extend(e1); }; /******************************************************************************** * Program Normalization ********************************************************************************/ list*>* normalize_qualifiers ( Expr m, list* initqs, Expr head, list* qualifiers, Env env ) { list* nquals = initqs; Env nenv = env; for(list* r = qualifiers; r->consp(); r=r->tl) #case r->hd | iterate(`v,zero(`n)) => return new list*>; | iterate(`v,unit(`n,`e)) => nenv = extend(pattern_bindings(v,normalize(e,nenv)),nenv); | iterate(`v,merge(`n,`x,`y)) => { if (!no_weaker_monoid(n,m)) warning("A weaker monoid in a comprehension generator may result into indeterminism",r->hd); return normalize_qualifiers(m,nquals,head,r->tl->cons(#),nenv) ->append(normalize_qualifiers(m,nquals,head,r->tl->cons(#),nenv)); }; | iterate(`v,compr(`n,`hd,...s)) => r = Cons(r->hd,s->append(r->tl->cons(#))); | bind(`v,`e) => nenv = extend(pattern_bindings(v,normalize(e,nenv)),nenv); | true => ; | false => return new list*>; | compr(some,`pred,...s) => r = Cons(r->hd,s->append(r->tl->cons(pred))); | `e => nquals = nquals->cons(normalize(e,nenv)); #end; return Cons(nquals->reverse()->cons(normalize(head,nenv))->cons(normalize(m,nenv)), new list*>); }; Expr normalize_comprehension ( Expr e, Env env, bool &changed ) { #case e | compr(`m,`hd,...r) => { Expr res; list*>* new_qualifiers = normalize_qualifiers(m,Nil,hd,r,env); if (new_qualifiers->nullp()) return #; m = new_qualifiers->hd->hd; if (new_qualifiers->hd->tl->tl->nullp()) res = #hd->tl->hd))>; else res = #hd))>; for(list*>* s = new_qualifiers->tl; s->consp(); s=s->tl) if (s->hd->tl->tl->nullp()) res = #hd)))>; else res = #hd)))>; changed = !e->eq(res); return res; }; #end; return e; }; /* collect the range variables of the qualifiers in a comprehension */ list* range_variables ( list* qualifiers ) { list* res = Nil; for(list* r = qualifiers; r->consp(); r=r->tl) #case r->hd | iterate(`v,`e) => res = res->cons(v); #end; return res; }; /* Is e the name of a class with an extent? */ bool has_extent ( Expr e ) { return e->variablep() && !get_class_extent(e)->eq(#); }; /* materialization of path expression into pointer joins between extents */ Expr path_materialization ( Expr e, Env env, bool &changed ) { if (!pointer_joins) return e; #case e | compr(`m,`h,...r,`f[project(project(`x,`A,`Aclass),`B,`tp)],...s) : member(x,range_variables(r)) /* path expression in a qualifier */ && has_extent(Aclass) => { Expr extent = get_class_extent(Aclass); Expr nv = new_variable(); list* ns = subst_list(#,nv, Cons(#<`f[project(`nv,`B,`tp)]>,s)); Expr nh = subst_expr(#,nv,h); changed = true; return #; }; | compr(`m,`f[project(project(`x,`A,`Aclass),`B,`tp)],...r) : member(x,range_variables(r)) /* path expression in the comprehension head */ && has_extent(Aclass) => { Expr extent = get_class_extent(Aclass); Expr nv = new_variable(); Expr nh = subst_expr(#,nv, #<`f[project(`nv,`B,`tp)]>); changed = true; return #; }; #end; return e; }; bool inverse_relationship_is_variable ( Expr A, Expr Aclass ) { #case get_declaration_binding(A->name(),Aclass->name()) | relationship(`x,_,inverse(_,_)) => return x->variablep(); | _ => return false; #end; }; /* can an unnest be a join? (see below) */ Expr unnest_can_be_join ( Expr A, Expr Aclass ) { if (!has_extent(Aclass)) return #; for(list< Ref >* r = get_declarations(A->name()); r->consp(); r=r->tl) #case get_declaration_binding(r->hd) | relationship(`v,`rname,inverse(`inv_name,`inv_var)) : inv_name->eq(Aclass) && inverse_relationship_is_variable(inv_var,inv_name) => return inv_var; #end; return #; }; Expr class_of_inverse_relationship ( Expr A, Expr Aclass ) { #case get_declaration_binding(A->name(),Aclass->name()) | relationship(`x,_,inverse(_,_)) => return x; | _ => return #; #end; }; /* Some unnests can be translated into pointer joins: this is * possible when there is an one-to-many relationship for the unnesting * path and the class that corresponds to the 'many' part has an extent */ Expr unnest_to_join ( Expr e, Env env, bool &changed ) { if (!unnests_to_joins) return e; #case e | compr(`m,`h,...r,iterate(`v,project(`x,`A,`f(`Aclass))),...s) : !unnest_can_be_join(A,Aclass)->eq(#) => { Expr rname = unnest_can_be_join(A,Aclass); Expr extent = get_class_extent(Aclass); Expr Bclass = class_of_inverse_relationship(rname,Aclass); changed = true; return #; }; #end; return e; }; bool pos_predicate ( Expr e, Expr nv ) { #case e | and(...r) => for(; r->consp(); r=r->tl) if (pos_predicate(r->hd,nv)) return true; | `v : v->eq(nv) => return true; #end; return false; }; bool is_key_of ( Expr e, Expr key ) { return e->variablep() && extentp(e) && is_key(extent_type(e->name()),key); }; list* place_bind_in_qualifiers ( Expr v, Expr e, list* gl ) { list*>* fv = new list*>; list* res = Nil; for(list* r = gl; r->consp(); r=r->tl) #case r->hd | iterate(`v,`u) => fv = fv->cons(free_variables(u,Nil)); | bind(`v,`u) => fv = fv->cons(free_variables(u,Nil)); | `u => fv = fv->cons(free_variables(u,Nil)); #end; fv = fv->reverse(); bool found = false; for(list* r = gl; r->consp(); r=r->tl, fv=fv->tl) if (!found && member(v,fv->hd)) { found = true; res = Cons(r->hd,Cons(#,res)); } else res = Cons(r->hd,res); if (!found) return Cons(#,res->reverse()); else return res->reverse(); }; /* Some programmers still use OQL like SQL! This function converts some joins into pointer * traversals using class relationships. This is a kind of inverse of unnest_to_join */ Expr relational_joins_to_relationships ( Expr e, Env env, bool &changed ) { static Expr nv = new_variable(); #case e | compr(`m,`h,...r,iterate(`v1,`e1),...s,`f[eq(project(`e2,`a1,`t1),project(`v2,`a2,`t2))],...q) : a1->eq(a2) && v1->eq(v2) && pos_predicate(#<`f[`nv]>,nv) && is_key_of(e1,a1) => { changed = true; list* n = place_bind_in_qualifiers(v1,e2,s); return #; }; | compr(`m,`h,...r,iterate(`v1,`e1),...s,`f[eq(project(`v2,`a2,`t2),project(`e2,`a1,`t1))],...q) : a1->eq(a2) && v1->eq(v2) && pos_predicate(#<`f[`nv]>,nv) && is_key_of(e1,a1) => { changed = true; list* n = place_bind_in_qualifiers(v1,e2,s); return #; }; | compr(`m,`h,...r,iterate(`v1,`e1),...s,`f[eq(`e2,`v2)],...q) : v1->eq(v2) && e1->variablep() && pos_predicate(#<`f[`nv]>,nv) && extentp(e1) => { changed = true; list* n = place_bind_in_qualifiers(v1,e2,s); return #; }; | compr(`m,`h,...r,iterate(`v1,`e1),...s,`f[eq(`v2,`e2)],...q) : v1->eq(v2) && e1->variablep() && pos_predicate(#<`f[`nv]>,nv) && extentp(e1) => { changed = true; list* n = place_bind_in_qualifiers(v1,e2,s); return #; }; #end; return e; }; bool without_assign ( Expr e ) { #case e | `f[assign(...r)] => return false; | _ => return true; #end; }; /* Handling non-correlated nesting: pull out from a comprehension terms that are constant */ Expr eliminate_constant_subterms ( Expr e, Env env, bool &changed ) { static list* vf = Nil->cons(#)->cons(#); #case e | compr(...r,`f[`g(...s)],...t) : member(g,vf) && free_variables(#<`g(...s)>,Nil)->nullp() && without_assign(#<`f[x]>) => { changed = true; Expr nv = new_variable(); global_variables = global_variables->cons(nv); return #; }; #end; return e; }; Expr basic_reduction ( Expr e, Env env, bool &changed ) { bool b = changed; changed = true; #case e | unit(`m,`x) : !collectionp(m) => return x; | zero(sum) => return #<0>; | zero(max) => return #<0>; | zero(all) => return #; | zero(some) => return #; | merge(sum,`x,`y) => return #; | merge(max,`x,`y) => return #; | merge(all,`x,`y) => return #; | merge(some,`x,`y) => return #; | merge(_,zero(...r),`x) => return x; | merge(_,`x,zero(...r)) => return x; | plus(`x,`y) : x->integerp() && y->integerp() => return integer(x->info.Integer+y->info.Integer); | not(true) => return #; | not(false) => return #; | not(not(`x)) => return x; | not(compr(all,false,...r)) => return #; | not(compr(some,true,...r)) => return #; | not(and(...ps)) => { list* s = Nil; for(list* r = ps->reverse(); r->consp(); r=r->tl) s = s->cons(#hd))>); return #; }; | not(or(...ps)) => { list* s = Nil; for(list* r = ps->reverse(); r->consp(); r=r->tl) s = s->cons(#hd))>); return #; }; | not(`f(`x,`y)) => #case f | eq => return #; | neq => return #; | lt => return #; | gt => return #; | leq => return #; | geq => return #; | _ => { changed = b; return e; }; #end; | and() => return #; | or() => return #; | and(...r,false,...s) => return #; | and(...r,true,...s) => return #; | or(...r,true,...s) => return #; | or(...r,false,...s) => return #; | if(true,`x,`y) => return x; | if(false,`x,`y) => return y; | compr(`m,`hd,`pred) : (!pred->functionp() || !pred->info.Function.Name->eq(#)) => return #; | apply(`f,`e) => #case normalize(f,env) | lambda(`v,`b) => return normalize(b,extend(pattern_bindings(v,e),env)); #end; | project(struct(...r),`a,...n) => { for(list* s = r; s->consp(); s=s->tl) #case s->hd | bind(`b,`u) : a->eq(b) => return u; #end; return #; }; | project(union_construction(`f,`tag,_,`u,_,_),`a,...n) => if (tag->eq(a)) return u; else odmg_error("wrong union construction/projection",e); | project(union_construction(`f,_,`tag,_,`u,_,_),`a,...n) => if (tag->eq(a)) return u; else odmg_error("inconsistent union construction/projection",e); | cases(union_construction(`f,`tag,_,_,_,_),...cs) => { bool found = false; for(list* r=cs; !found && r->consp(); r=r->tl) #case r->hd | case(`e,...tags) : member(tag,tags) || member(#,tags) => return e; #end; if (!found) odmg_error("Union case does not handle this attribute",e); }; | cases(union_construction(`f,_,`tag,_,_,_,_),...cs) => { bool found = false; for(list* r=cs; !found && r->consp(); r=r->tl) #case r->hd | case(`e,...tags) : member(tag,tags) || member(#,tags) => return e; #end; if (!found) odmg_error("Union case does not handle this attribute",e); }; | fst(pair(`a,`b)) => return a; | snd(pair(`a,`b)) => return b; | compr(set,`hd,iterate(`v,`x)) : hd->eq(v) => return x; | `v : v->variablep() => if (env->in(e->name())) return env->find(e->name()); #end; changed = b; return e; }; /* normalize e using an environment; this is the rewrite engine of the optimizer */ Expr normalize1 ( Expr e, Env env ) { try{ if (TRACE_NORMALIZATION) { cout << "** normalize1: "; e->pretty_print(0,cout); cout << endl; }; Expr res; #case e | `f(...r) => { list* nr = Nil; for(list* s = r; s->consp(); s=s->tl) nr = nr->cons(normalize1(s->hd,env)); nr = nr->reverse(); res = #<`f(...nr)>; }; | _ => res = e; #end; bool changed = false; res = basic_reduction(res,env,changed); res = normalize_comprehension(res,env,changed); res = relational_joins_to_relationships(res,env,changed); if (TRACE_NORMALIZATION) { cout << "--> "; res->pretty_print(0,cout); cout << endl; }; if (changed) return normalize1(res,env); else return res; } DEBUG(e); }; Expr normalize2 ( Expr e, Env env ) { try{ if (TRACE_NORMALIZATION) { cout << "** normalize2: "; e->pretty_print(0,cout); cout << endl; }; Expr res; #case e | `f(...r) => { list* nr = Nil; for(list* s = r; s->consp(); s=s->tl) nr = nr->cons(normalize2(s->hd,env)); nr = nr->reverse(); res = #<`f(...nr)>; }; | _ => res = e; #end; bool changed = false; res = path_materialization(res,env,changed); res = unnest_to_join(res,env,changed); res = eliminate_constant_subterms(res,env,changed); if (TRACE_NORMALIZATION) { cout << "--> "; res->pretty_print(0,cout); cout << endl; }; if (changed) return normalize2(res,env); else return res; } DEBUG(e); }; Expr normalize ( Expr e, Env env ) { Expr res = normalize2(normalize1(e,env),env); return res; }; /******************************************************************************** * Translation of comprehensions into algebraic operators ********************************************************************************/ typedef struct { Expr fst; Expr snd; Expr thrd; Expr frth; } qtuple; /* splits the predicate pred into four parts: it returns the quadruple * [and(...left),and(...right),and(...both),and(...rest)], where left holds all * predicates that refer to the variables in the left_pattern * exclusively; right holds all predicates that refer to the variables in * the right_pattern exclusively; both holds all predicates that * refer to the variables in both left and right patterns, and rest holds the rest */ qtuple split_predicate ( Expr pred, Expr left_pattern, Expr right_pattern ) { list* lefts = Nil; list* rights = Nil; list* both = Nil; list* rests = Nil; #case pred | and(...r) => for(; r->consp(); r=r->tl) #case r->hd | `f[compr(...s)] => rests = rests->cons(r->hd); | and(...s) => { qtuple t = split_predicate(r->hd,left_pattern,right_pattern); lefts = lefts->append(t.fst->arguments()); rights = rights->append(t.snd->arguments()); both = both->append(t.thrd->arguments()); rests = rests->append(t.frth->arguments()); }; | _ => { list* vars = free_variables(r->hd,Nil); if (subset(vars,pattern_variables(left_pattern))) lefts = lefts->cons(r->hd); else if (subset(vars,pattern_variables(right_pattern))) rights = rights->cons(r->hd); else if (subset(vars,pattern_variables(left_pattern) ->append(pattern_variables(right_pattern)))) both = both->cons(r->hd); else rests = rests->cons(r->hd); }; #end; qtuple x = { #, #, #, # }; return x; #end; }; Expr bulk ( Expr m ) { if (m->eq(#) || m->eq(#) || m->eq(#) || m->eq(#)) return #; if (m->eq(#) || m->eq(#)) return #; #case m | ordered(...r) => return #; | _ => return m; #end; }; Expr promote_pred ( Expr e, Expr pat, Expr pred ) { #case e | get(`M,`table,`v,and(...s)) => return #arguments())))>; | join(`M,`x,`y,and(...s),`keep) => #case pat | pair(`px,`py) => { qtuple tp = split_predicate(pred,px,py); Expr nx = promote_pred(x,px,tp.fst); Expr ny = promote_pred(y,py,tp.snd); list* r = tp.thrd->arguments()->append(tp.frth->arguments()); return #; }; #end; | nest(`M,`x,`v,`hd,`vars,and(...s),`apred) => #case pat | pair(`pv,`px) => { qtuple tp = split_predicate(pred,pv,px); Expr nx = promote_pred(x,px,tp.snd); list* r = tp.thrd->arguments()->append(tp.frth->arguments()); return #; }; #end; | reduce(`M,`x,`v,`hd,and(...s)) => #case pat | pair(`pv,`px) => { qtuple tp = split_predicate(pred,pv,px); Expr nx = promote_pred(x,px,tp.snd); list* r = tp.thrd->arguments()->append(tp.frth->arguments()); return #; }; #end; | unnest(`M,`x,`v,`path,and(...s),`keep) => #case pat | pair(`pv,`px) => { qtuple tp = split_predicate(pred,pv,px); Expr nx = promote_pred(x,px,tp.snd); list* r = tp.thrd->arguments()->append(tp.frth->arguments()); return #; }; #end; | _ => return #; /* ??? */ #end; }; list* project_vars ( Expr e ) { #case e | project(`x,_,_) => return project_vars(x); | OID(`x) => return project_vars(x); | `f(...r) => { list* s = Nil; for(list* n = r->reverse(); n->consp(); n=n->tl) s = s->append(project_vars(n->hd)); return s; }; | _ => if (e->variablep()) return Cons(e,Nil); else return Nil; #end; }; binding* group_bindings ( Expr e1, Expr e2, binding* env ) { list* r = project_vars(e1); list* s = project_vars(e2); for(; r->consp() && s->consp(); r=r->tl, s=s->tl) if (!r->hd->eq(s->hd) && !env->in(r->hd->name()) && !env->in(s->hd->name())) env = env->extend(r->hd->name(),s->hd); return env; }; /* The memo table is used for eliminating duplicate nested queries in a query */ list*>* memo_table = new list*>; void memoize ( Expr v, Expr e ) { memo_table = (memo_table)->cons(new pair(v,e)); }; Expr find_in_memo ( Expr e ) { for(list*>* r = memo_table; r->consp(); r=r->tl) if (alpha_equivalent(r->hd->second,e,new binding)) return r->hd->first; return #; }; /* alpha-renaming using an environment */ Expr env_subst ( Expr e, binding* env ) { #case e | `f(...r) => { list* s = Nil; for(; r->consp(); r=r->tl) s = s->cons(env_subst(r->hd,env)); s = s->reverse(); return #<`f(...s)>; }; | `v : v->variablep() => if (env->in(v->name())) return env->find(v->name()); else return e; | _ => return e; #end; }; binding* zip_range_vars ( list* r, list* s ) { binding* env = new binding; for(; r->consp() && s->consp(); r=r->tl, s=s->tl) #case #hd),`(s->hd))> | pair(iterate(`x,`a),iterate(`y,`b)) => if (alpha_equivalent(a,b,env)) env = env->extend(x->name(),y); else return new binding; | pair(iterate(`x,`a),_) => return new binding; | pair(_,iterate(`x,`a)) => return new binding; #end; for(; r->consp(); r=r->tl) #case r->hd | iterate(_,_) => return new binding; #end; for(; s->consp(); s=s->tl) #case s->hd | iterate(_,_) => return new binding; #end; return env; }; list* candidate_for_group_by ( Expr pred, binding* env ) { #case pred | and(...r) => { list* s = Nil; for(; r->consp(); r=r->tl) #case r->hd | eq(`e1,`e2) : !e1->variablep() && !e2->variablep() => if (alpha_equivalent(e1,e2,group_bindings(e1,e2,env))) s = Cons(e2,s); else if (alpha_equivalent(e2,e1,group_bindings(e2,e1,env))) s = Cons(e1,s); #end; return s; }; | _ => return Nil; #end; }; list* rest_of_groupby ( list* r, list* groups, list* preds, binding* env ) { list* res = Nil; for(; r->consp(); r=r->tl) #case r->hd | eq(`x,`y) : member(x,groups) || member(y,groups) => ; | `e : member(env_subst(e,env),preds) => ; | `e => res = res->cons(env_subst(e,env)); #end; return res->reverse(); }; Expr last ( list* r ) { if (r->tl->nullp()) return r->hd; else return last(r->tl); }; Expr groupby_vars ( list* r ) { if (r->tl->nullp()) return r->hd; else return #tl)),`(r->hd))>; }; /* Translating and unnesting nested comprehensions */ Expr translate ( Expr e, Expr u, Expr w, Expr v, Expr E ) { #case e | compr(...r) : TRACE_UNNEST => { cout << "** unnest: "; e->pretty_print(11,cout); cout << "\nu / w / v: " << u << " / " << w << " / " << v; cout << "\n E: "; E->pretty_print(11,cout); cout << endl; }; #end; Expr res = #; static Expr nv = new_variable(); #case e /* Shortcut to unnesting: the following two rules recognize calculus forms generated by OQL group-bys */ | compr(`m,`hd1,...rr,`f[compr(`n,`hd2,...ss)]) : query_unnesting && zip_range_vars(ss,rr)->length() > 0 && candidate_for_group_by(last(ss),zip_range_vars(ss,rr))->consp() && free_variables(#<`f[`nv]>,Cons(nv,range_variables(rr)))->nullp() => { binding* env = zip_range_vars(ss,rr); Expr pred2 = last(ss); list* gs = candidate_for_group_by(pred2,env); Expr vars = groupby_vars(gs->append(pattern_variables(u))); Expr nhd = env_subst(hd2,env); list* rest = rest_of_groupby(last(ss)->arguments(),gs,#<`f[X]>->arguments(),env); Expr ne = #; res = translate( #, u, w, v, E ); }; | compr(`m,`f[compr(`n,`hd2,...ss)],...rr) : query_unnesting && zip_range_vars(ss,rr)->length() > 0 && candidate_for_group_by(last(ss),zip_range_vars(ss,rr))->consp() && free_variables(#<`f[`nv]>,Cons(nv,range_variables(rr)))->nullp() => { binding* env = zip_range_vars(ss,rr); Expr pred2 = last(ss); list* gs = candidate_for_group_by(pred2,env); Expr vars = groupby_vars(gs->append(pattern_variables(u))); Expr nhd = env_subst(hd2,env); list* rest = rest_of_groupby(last(ss)->arguments(),gs,last(rr)->arguments(),env); Expr ne = #; res = translate( #, u, w, v, E ); }; | compr(`m,`hd1,`pred) => #case pred | `p[compr(`n,`hd2,...rr,and(...ss))] /* nested comprehension in a predicate */ => if (query_unnesting) { Expr pp = #; Expr nv = find_in_memo(pp); if (!nv->eq(#)) res = translate( #, u, w, v, E ); else { nv = new_variable(); memoize(nv,pp); res = translate( #, u, #, v, translate( #, w, w, nv, E ) ); }; } else { Expr np = translate(#,#,#,new_variable(),#); res = translate( #, u, w, v, E ); }; | _ => #case hd1 | `f[compr(`n,`hd2,...r)] /* nested comprehension in the head */ => if (query_unnesting) { Expr pp = #; Expr nv = find_in_memo(pp); if (!nv->eq(#)) res = translate( #, u, w, v, E ); else { nv = new_variable(); memoize(nv,pp); res = translate( #, u, #, v, translate( #, w, w, nv, E ) ); }; } else { Expr np = translate(#,#,#,new_variable(),#); res = translate( #, u, w, v, E ); }; | _ => #case m | `f[compr(`n,`hd2,...r)] /* nested comprehension in the accumulator (for order-by) */ => if (query_unnesting) { Expr pp = #; Expr nv = find_in_memo(pp); if (!nv->eq(#)) res = translate( #, u, w, v, E ); else { nv = new_variable(); memoize(nv,pp); res = translate( #, u, #, v, translate( #, w, w, nv, E ) ); }; } else { Expr np = translate(#,#,#,new_variable(),#); res = translate( #, u, w, v, E ); }; | _ => { #case m | ordered(...r) => { m = #; E = #; }; #end; if (u->eq(#)) res = #; else res = #; }; #end; #end; #end; | compr(`m,`hd1,iterate(`a,`f[compr(`n,`hd2,...r)]),...s,`p) /* nested comprehension in the generator domain */ => if (query_unnesting) { Expr pp = #; Expr nv = find_in_memo(pp); if (!nv->eq(#)) res = translate( #, u, w, v, E ); else { nv = new_variable(); memoize(nv,pp); res = translate( #, u, #, v, translate( #, w, w, nv, E ) ); }; } else { Expr nd = translate(#,#,#,new_variable(),#); res = translate( #, u, w, v, E ); }; | compr(`m,`hd,iterate(`a,`X),...r,`p) => if (u->eq(#)) { qtuple tp = split_predicate(p,w,a); Expr newE = (tp.fst->arguments()->nullp()) ? E : promote_pred(E,w,tp.fst); Expr np = #arguments()),...(tp.thrd->arguments()))>; if (w->eq(#)) if (true || X->variablep()) res = translate( #, u, a, v, #arguments()),...(tp.snd->arguments()),...(tp.thrd->arguments())))> ); else res = translate( #, u, a, v, # ); else if (X->variablep() && !member(X,pattern_variables(w))) { Expr x = #; res = translate( #, u, #, v, # ); } else res = translate( #, u, #, v, # ); } else { qtuple tp = split_predicate(p,w,a); Expr np = #arguments()),...(tp.thrd->arguments()))>; Expr newE = (tp.fst->arguments()->nullp()) ? E : promote_pred(E,w,tp.fst); if (X->variablep()) res = translate( #, u, #, v, # ); else res = translate( #, u, #, v, # ); }; | merge(`m,`x,`y) => { Expr nx = translate(x,u,w,v,E); Expr ny = translate(y,u,w,v,E); res = #; }; | unit(`m,`x) => { Expr nx = translate(x,u,w,v,E); res = #; }; | zero(`m) => res = #; | bind(`n,`x) => { Expr nx = translate(x,u,w,v,E); res = #; }; | `c(`n,...r) : c->eq(#) || c->eq(#) || c->eq(#) => { list* s = Nil; for(; r->consp(); r=r->tl) s = s->cons(translate(r->hd,u,w,v,E)); s = s->reverse(); res = #<`c(`n,...s)>; }; | _ : bulk_operation(e) => { Expr v = new_variable(); Expr w = new_variable(); res = #; } | `f(...r) => { list* s = Nil; for(; r->consp(); r=r->tl) s = s->cons(translate(r->hd,u,w,v,E)); s = s->reverse(); res = #<`f(...s)>; }; | _ => if (e->variablep() && extentp(e)) { Expr v = new_variable(); Expr w = new_variable(); res = #; } else if (e->variablep()) #case find_scoped_variable(e->name()) | none => res = e; | `f(`tp) : collectionp(f) => { Expr v = new_variable(); Expr w = new_variable(); res = #; }; | _ => res = e; #end else res = e; #end; #case e | compr(...r) : TRACE_UNNEST => { cout << "--> "; res->pretty_print(0,cout); cout << endl; }; #end; return res; }; /******************************************************************************** * Shortcut to group-by unnesting (remove groupby's generated by the unnesting algorithm) ********************************************************************************/ /* if e is a path, return the beginning of the path */ Expr project_variable ( Expr e ) { #case e | project(`x,_,_) => return project_variable(x); | OID(`x) => return project_variable(x); | _ => if (e->variablep()) return e; else return #; #end; }; binding* zip_vars ( list* x, list* y ) { binding* env = new binding; list* r = x; list* s = y; for(; r->consp() && s->consp(); r=r->tl, s=s->tl) env = env->extend(new binding(r->hd->name(),s->hd)); if (r->consp() || s->consp()) return new binding; return env; }; bool alpha_nest_equiv ( Expr x, Expr y, binding* env ); bool alpha_nest_equiv_list ( list* xl, Expr y, binding* env ) { for(list* r = xl; r->consp(); r=r->tl) if (alpha_nest_equiv(r->hd,y,env)) return true; return false; }; bool alpha_nest_equiv ( Expr x, Expr y, binding* env ) { if (x->eq(y)) return true; #case # | pair(lambda(`vx,`bx),lambda(`vy,`by)) => return alpha_nest_equiv(bx,by,env->extend(vx->name(),vy)); | pair(get(_,`x,`v,...r),get(_,`y,`w,...s)) => alpha_nest_equiv(x,y,env) && alpha_nest_equiv(#,#,env->extend(v->name(),w)); | pair(unnest(_,`x,`v,...r),unnest(_,`y,`w,...s)) => alpha_nest_equiv(x,y,env) && alpha_nest_equiv(#,#,env->extend(v->name(),w)); | pair(nest(_,`x,`v,...r),nest(_,`y,`w,...s)) => alpha_nest_equiv(x,y,env) && alpha_nest_equiv(#,#,env->extend(v->name(),w)); | pair(`f(...r),`g(...s)) => { if (!alpha_nest_equiv(f,g,env)) return false; for(; s->consp(); s=s->tl) if (!alpha_nest_equiv_list(r,s->hd,env)) return false; return true; }; | _ : x->variablep() => if (env->in(x->name())) return alpha_nest_equiv(env->find(x->name()),y,env); else return x->eq(y); | _ => if (x->functionp() || y->functionp()) return false; else return x->eq(y); #end; }; Expr pass_gvar ( Expr e, Expr var ) { #case e | nest(`m,`x,`v,`hd,`gv,...r) => if (v->eq(var)) return e; else if (member(var,pattern_variables(gv))) return e; else return #; | unnest(`m,`x,`v,`path,`pred,`keep) => if (v->eq(var)) return e; else if (member(var,pattern_variables(keep))) return e; else return #; | join(`m,`x,`y,`pred,`keep) => if (member(var,pattern_variables(keep))) return e; else return #; | _ => return e; #end; }; list* range_vars ( Expr e ); Expr convert_groupby ( Expr e ) { #case e | get(`m,`u,`v,`f[group_by(`n,`hd,`vars,...rest)]) => { Expr nv = find_in_memo(#); if (!nv->eq(#)) { u = pass_gvar(u,nv); return convert_groupby(#); }; nv = new_variable(); memoize(nv,#); Expr res = convert_groupby(#); return res; }; | nest(`m,`u,`v,`f[group_by(`n,`hd,`vars,...rest)],`vars1,`bpred,`apred) => { u = convert_groupby(u); Expr nv = find_in_memo(#); if (!nv->eq(#)) { u = pass_gvar(u,nv); return convert_groupby(#); }; nv = new_variable(); memoize(nv,#); return convert_groupby(#); }; | nest(`m,`u,`v,`hd1,`vars1,`bpred,`f[group_by(`n,`hd,`vars,...rest)]) => { u = convert_groupby(u); Expr nv = find_in_memo(#); if (!nv->eq(#)) { u = pass_gvar(u,nv); return convert_groupby(#); }; nv = new_variable(); memoize(nv,#); return convert_groupby(#); }; | reduce(`m,`u,`v,`f[group_by(`n,`hd,`vars,...rest)],`pred) => { u = convert_groupby(u); Expr nv = find_in_memo(#); if (!nv->eq(#)) { u = pass_gvar(u,nv); return convert_groupby(#); }; nv = new_variable(); memoize(nv,#); return convert_groupby(#); }; | reduce(`m,`u,`v,`hd1,`f[group_by(`n,`hd,`vars,...rest)]) => { u = convert_groupby(u); Expr nv = find_in_memo(#); if (!nv->eq(#)) { u = pass_gvar(u,nv); return convert_groupby(#); }; nv = new_variable(); memoize(nv,#); return convert_groupby(#); }; | nest(`m,join(`n,`x,`y,`pred,`v),`nv,`hd,`gv,`pb,`pa) : query_unnesting && alpha_nest_equiv(x,y,new binding) && subset(pattern_variables(gv),range_vars(x)) && candidate_for_group_by(pred,zip_vars(range_vars(y),range_vars(x)))->consp() => { binding* env = zip_vars(range_vars(y),range_vars(x)); list* gs = candidate_for_group_by(pred,env); Expr vars = #; Expr nhd = env_subst(hd,env); list* rest = rest_of_groupby(pred->arguments(),gs,pb->arguments(),env); Expr ne = #; return convert_groupby(ne); }; | `f(...r) => { list* s = Nil; for(; r->consp(); r=r->tl) s = s->cons(convert_groupby(r->hd)); s = s->reverse(); return #<`f(...s)>; }; | _ => return e; #end; }; /******************************************************************************** * Putting everything together ********************************************************************************/ /* optimize an OQL query */ Expr optimize_query (Expr query) { memo_table = new list*>; global_variables = Nil; Expr e = rename_vars(query,new binding); Expr u = collect_predicates(normalize(e,empty_environment)); /* type_of(u); checking the consistency of the calculus form */ Expr t = translate(rename_vars(u,new binding),#,#,new_variable(),#); t = convert_groupby(t); type_of(t); /* checking the consistency of the algebraic form */ return t; };