/[cvs]/nfo/perl/libs/SQL/Parser.pm
ViewVC logotype

Contents of /nfo/perl/libs/SQL/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (show annotations)
Mon Dec 2 05:53:54 2002 UTC (21 years, 5 months ago) by joko
Branch: MAIN
CVS Tags: HEAD
Changes since 1.3: +1 -1 lines
+ re-applied modification: make dots (.) possible in table-names

1 ######################################################################
2 package SQL::Parser;
3 ######################################################################
4 #
5 # This module is copyright (c), 2001,2002 by Jeff Zucker.
6 # All rights resered.
7 #
8 # It may be freely distributed under the same terms as Perl itself.
9 # See below for help and copyright information (search for SYNOPSIS).
10 #
11 ######################################################################
12
13 use strict;
14 use vars qw($VERSION);
15 use constant FUNCTION_NAMES => join '|', qw(
16 TRIM SUBSTRING UPPER LOWER TO_CHAR
17 );
18
19 $VERSION = '1.005';
20
21 BEGIN { if( $ENV{SQL_USER_DEFS} ) { require SQL::UserDefs; } }
22
23
24 #############################
25 # PUBLIC METHODS
26 #############################
27
28 sub new {
29 my $class = shift;
30 my $dialect = shift || 'ANSI';
31 $dialect = 'ANSI' if uc $dialect eq 'ANSI';
32 $dialect = 'AnyData' if uc $dialect eq 'ANYDATA' or uc $dialect eq 'CSV';
33 # $dialect = 'CSV' if uc $dialect eq 'CSV';
34 if ($dialect eq 'SQL::Eval') {
35 $dialect = 'AnyData';
36 }
37 my $flags = shift || {};
38 $flags->{"dialect"} = $dialect;
39 $flags->{"PrintError"} = 1 unless defined $flags->{"PrintError"};
40 my $self = bless_me($class,$flags);
41 $self->dialect( $self->{"dialect"} );
42 $self->set_feature_flags($self->{"select"},$self->{"create"});
43 return bless $self,$class;
44 }
45
46 sub parse {
47 my $self = shift;
48 my $sql = shift;
49 #printf "<%s>", $self->{dialect_set};
50 $self->dialect( $self->{"dialect"} ) unless $self->{"dialect_set"};
51 $sql =~ s/^\s+//;
52 $sql =~ s/\s+$//;
53 $self->{"struct"} = {};
54 $self->{"tmp"} = {};
55 $self->{"original_string"} = $sql;
56
57 ################################################################
58 #
59 # COMMENTS
60
61 # C-STYLE
62 #
63 my $comment_re = $self->{"comment_re"} || '(\/\*.*?\*\/)';
64 $self->{"comment_re"} = $comment_re;
65 my $starts_with_comment;
66 if ($sql =~ /^\s*$comment_re(.*)$/s) {
67 $self->{"comment"} = $1;
68 $sql = $2;
69 $starts_with_comment=1;
70 }
71 # SQL STYLE
72 #
73 if ($sql =~ /^\s*--(.*)(\n|$)/) {
74 $self->{"comment"} = $1;
75 return 1;
76 }
77 ################################################################
78
79 $sql = $self->clean_sql($sql);
80 my($com) = $sql =~ /^\s*(\S+)\s+/s ;
81 if (!$com) {
82 return 1 if $starts_with_comment;
83 return $self->do_err("Incomplete statement!");
84 }
85 $com = uc $com;
86 if ($self->{"opts"}->{"valid_commands"}->{$com}) {
87 my $rv = $self->$com($sql);
88 delete $self->{"struct"}->{"literals"};
89 # return $self->do_err("No table names found!")
90 # unless $self->{"struct"}->{"table_names"};
91 return $self->do_err("No command found!")
92 unless $self->{"struct"}->{"command"};
93 if ( $self->{"struct"}->{"join"}
94 and scalar keys %{$self->{"struct"}->{"join"}}==0
95 ) {
96 delete $self->{"struct"}->{"join"};
97 }
98 $self->replace_quoted_ids();
99 $self->{struct}->{org_table_names} = $self->{struct}->{table_names};
100 my @uTables = map {uc $_ } @{$self->{struct}->{table_names}};
101 $self->{struct}->{table_names} = \@uTables;
102 $self->{struct}->{org_col_names} = $self->{struct}->{column_names};
103 my @uCols = map {uc $_ } @{$self->{struct}->{column_names}};
104 $self->{struct}->{column_names} = \@uCols;
105 if ($self->{original_string} =~ /Y\.\*/) {
106 #use mylibs; zwarn $self; exit;
107 }
108 return $rv;
109 }
110 else {
111 $self->{struct}={};
112 if ($ENV{SQL_USER_DEFS}) {
113 return SQL::UserDefs::user_parse($self,$sql);
114 }
115 return $self->do_err("Command '$com' not recognized or not supported!");
116 }
117 }
118
119 sub replace_quoted_ids {
120 my $self = shift;
121 my $id = shift;
122 return $id unless $self->{struct}->{quoted_ids};
123 if ($id) {
124 if ($id =~ /^\?QI(\d+)\?$/) {
125 return '"'.$self->{struct}->{quoted_ids}->[$1].'"';
126 }
127 else {
128 return $id;
129 }
130 }
131 my @tables = @{$self->{struct}->{table_names}};
132 for my $t(@tables) {
133 if ($t =~ /^\?QI(.+)\?$/ ) {
134 $t = '"'.$self->{struct}->{quoted_ids}->[$1].'"';
135 # $t = $self->{struct}->{quoted_ids}->[$1];
136 }
137 }
138 $self->{struct}->{table_names} = \@tables;
139 delete $self->{struct}->{quoted_ids};
140 }
141
142 sub structure { shift->{"struct"} }
143 sub command { my $x = shift->{"struct"}->{command} || '' }
144
145 sub feature {
146 my($self,$opt_class,$opt_name,$opt_value) = @_;
147 if (defined $opt_value) {
148 if ( $opt_class eq 'select' ) {
149 $self->set_feature_flags( {"join"=>$opt_value} );
150 }
151 elsif ( $opt_class eq 'create' ) {
152 $self->set_feature_flags( undef, {$opt_name=>$opt_value} );
153 }
154 else {
155 $self->{$opt_class}->{$opt_name} = $opt_value;
156 }
157 }
158 else {
159 return $self->{"opts"}->{$opt_class}->{$opt_name};
160 }
161 }
162
163 sub errstr { shift->{"struct"}->{"errstr"} }
164
165 sub list {
166 my $self = shift;
167 my $com = uc shift;
168 return () if $com !~ /COMMANDS|RESERVED|TYPES|OPS|OPTIONS|DIALECTS/i;
169 $com = 'valid_commands' if $com eq 'COMMANDS';
170 $com = 'valid_comparison_operators' if $com eq 'OPS';
171 $com = 'valid_data_types' if $com eq 'TYPES';
172 $com = 'valid_options' if $com eq 'OPTIONS';
173 $com = 'reserved_words' if $com eq 'RESERVED';
174 $self->dialect( $self->{"dialect"} ) unless $self->{"dialect_set"};
175
176 return sort keys %{ $self->{"opts"}->{$com} } unless $com eq 'DIALECTS';
177 my $dDir = "SQL/Dialects";
178 my @dialects;
179 for my $dir(@INC) {
180 local *D;
181
182 if ( opendir(D,"$dir/$dDir") ) {
183 @dialects = grep /.*\.pm$/, readdir(D);
184 last;
185 }
186 }
187 @dialects = map { s/\.pm$//; $_} @dialects;
188 return @dialects;
189 }
190
191 sub dialect {
192 my($self,$dialect) = @_;
193 return $self->{"dialect"} unless $dialect;
194 return $self->{"dialect"} if $self->{dialect_set};
195 $self->{"opts"} = {};
196 my $mod = "SQL/Dialects/$dialect.pm";
197 undef $@;
198 eval {
199 require "$mod";
200 };
201 return $self->do_err($@) if $@;
202 $mod =~ s/\.pm//;
203 $mod =~ s"/"::"g;
204 my @data = split /\n/, $mod->get_config;
205 my $feature;
206 for (@data) {
207 chomp;
208 s/^\s+//;
209 s/\s+$//;
210 next unless $_;
211 if (/^\[(.*)\]$/i) {
212 $feature = lc $1;
213 $feature =~ s/\s+/_/g;
214 next;
215 }
216 my $newopt = uc $_;
217 $newopt =~ s/\s+/ /g;
218 $self->{"opts"}->{$feature}->{$newopt} = 1;
219 }
220 $self->{"dialect"} = $dialect;
221 $self->{"dialect_set"}++;
222 }
223
224 ##################################################################
225 # SQL COMMANDS
226 ##################################################################
227
228 ####################################################
229 # DROP TABLE <table_name>
230 ####################################################
231 sub DROP {
232 my $self = shift;
233 my $stmt = shift;
234 my $table_name;
235 $self->{"struct"}->{"command"} = 'DROP';
236 if ($stmt =~ /^\s*DROP\s+TABLE\s+IF\s+EXISTS\s+(.*)$/si ) {
237 $stmt = "DROP TABLE $1";
238 $self->{"struct"}->{ignore_missing_table}=1;
239 }
240 if ($stmt =~ /^\s*DROP\s+(\S+)\s+(.+)$/si ) {
241 my $com2 = $1 || '';
242 $table_name = $2;
243 if ($com2 !~ /^TABLE$/i) {
244 return $self->do_err(
245 "The command 'DROP $com2' is not recognized or not supported!"
246 );
247 }
248 $table_name =~ s/^\s+//;
249 $table_name =~ s/\s+$//;
250 if ( $table_name =~ /(\S+) (RESTRICT|CASCADE)/i) {
251 $table_name = $1;
252 $self->{"struct"}->{"drop_behavior"} = uc $2;
253 }
254 }
255 else {
256 return $self->do_err( "Incomplete DROP statement!" );
257
258 }
259 return undef unless $self->TABLE_NAME($table_name);
260 $table_name = $self->replace_quoted_ids($table_name);
261 $self->{"tmp"}->{"is_table_name"} = {$table_name => 1};
262 $self->{"struct"}->{"table_names"} = [$table_name];
263 return 1;
264 }
265
266 ####################################################
267 # DELETE FROM <table_name> WHERE <search_condition>
268 ####################################################
269 sub DELETE {
270 my($self,$str) = @_;
271 $self->{"struct"}->{"command"} = 'DELETE';
272 my($table_name,$where_clause) = $str =~
273 /^DELETE FROM (\S+)(.*)$/i;
274 return $self->do_err(
275 'Incomplete DELETE statement!'
276 ) if !$table_name;
277 return undef unless $self->TABLE_NAME($table_name);
278 $self->{"tmp"}->{"is_table_name"} = {$table_name => 1};
279 $self->{"struct"}->{"table_names"} = [$table_name];
280 $self->{"struct"}->{"column_names"} = ['*'];
281 $where_clause =~ s/^\s+//;
282 $where_clause =~ s/\s+$//;
283 if ($where_clause) {
284 $where_clause =~ s/^WHERE\s*(.*)$/$1/i;
285 return undef unless $self->SEARCH_CONDITION($where_clause);
286 }
287 return 1;
288 }
289
290 ##############################################################
291 # SELECT
292 ##############################################################
293 # SELECT [<set_quantifier>] <select_list>
294 # | <set_function_specification>
295 # FROM <from_clause>
296 # [WHERE <search_condition>]
297 # [ORDER BY <order_by_clause>]
298 # [LIMIT <limit_clause>]
299 ##############################################################
300
301 sub SELECT {
302 my($self,$str) = @_;
303 $self->{"struct"}->{"command"} = 'SELECT';
304 my($from_clause,$where_clause,$order_clause,$limit_clause);
305 $str =~ s/^SELECT (.+)$/$1/i;
306 if ( $str =~ s/^(.+) LIMIT (.+)$/$1/i ) { $limit_clause = $2; }
307 if ( $str =~ s/^(.+) ORDER BY (.+)$/$1/i ) { $order_clause = $2; }
308 if ( $str =~ s/^(.+?) WHERE (.+)$/$1/i ) { $where_clause = $2; }
309 if ( $str =~ s/^(.+?) FROM (.+)$/$1/i ) { $from_clause = $2; }
310 else {
311 return $self->do_err("Couldn't find FROM clause in SELECT!");
312 }
313 return undef unless $self->FROM_CLAUSE($from_clause);
314 return undef unless $self->SELECT_CLAUSE($str);
315 if ($where_clause) {
316 return undef unless $self->SEARCH_CONDITION($where_clause);
317 }
318 if ($order_clause) {
319 return undef unless $self->SORT_SPEC_LIST($order_clause);
320 }
321 if ($limit_clause) {
322 return undef unless $self->LIMIT_CLAUSE($limit_clause);
323 }
324 if ( ( $self->{"struct"}->{"join"}->{"clause"}
325 and $self->{"struct"}->{"join"}->{"clause"} eq 'ON'
326 )
327 or ( $self->{"struct"}->{"multiple_tables"}
328 ###new
329 and !(scalar keys %{$self->{"struct"}->{"join"}})
330 # and !$self->{"struct"}->{"join"}
331 ###
332 ) ) {
333 return undef unless $self->IMPLICIT_JOIN();
334 }
335 return 1;
336 }
337
338 sub IMPLICIT_JOIN {
339 my $self = shift;
340 delete $self->{"struct"}->{"multiple_tables"};
341 if ( !$self->{"struct"}->{"join"}->{"clause"}
342 or $self->{"struct"}->{"join"}->{"clause"} ne 'ON'
343 ) {
344 $self->{"struct"}->{"join"}->{"type"} = 'INNER';
345 $self->{"struct"}->{"join"}->{"clause"} = 'IMPLICIT';
346 }
347 if (defined $self->{"struct"}->{"keycols"} ) {
348 my @keys;
349 my @keys2 = @keys = @{ $self->{"struct"}->{"keycols"} };
350 $self->{"struct"}->{"join"}->{"table_order"} = $self->order_joins(\@keys2);
351 @{$self->{"struct"}->{"join"}->{"keycols"}} = @keys;
352 delete $self->{"struct"}->{"keycols"};
353 }
354 else {
355 return $self->do_err("No equijoin condition in WHERE or ON clause");
356 }
357 return 1;
358 }
359
360 sub EXPLICIT_JOIN {
361 my $self = shift;
362 my $remainder = shift;
363 return undef unless $remainder;
364 my($tableA,$tableB,$keycols,$jtype,$natural);
365 if ($remainder =~ /^(.+?) (NATURAL|INNER|LEFT|RIGHT|FULL|UNION|JOIN)(.+)$/s){
366 $tableA = $1;
367 $remainder = $2.$3;
368 }
369 else {
370 ($tableA,$remainder) = $remainder =~ /^(\S+) (.*)/;
371 }
372 if ( $remainder =~ /^NATURAL (.+)/) {
373 $self->{"struct"}->{"join"}->{"clause"} = 'NATURAL';
374 $natural++;
375 $remainder = $1;
376 }
377 if ( $remainder =~
378 /^(INNER|LEFT|RIGHT|FULL|UNION) JOIN (.+)/
379 ) {
380 $jtype = $self->{"struct"}->{"join"}->{"clause"} = $1;
381 $remainder = $2;
382 $jtype = "$jtype OUTER" if $jtype !~ /INNER|UNION/;
383 }
384 if ( $remainder =~
385 /^(LEFT|RIGHT|FULL) OUTER JOIN (.+)/
386 ) {
387 $jtype = $self->{"struct"}->{"join"}->{"clause"} = $1 . " OUTER";
388 $remainder = $2;
389 }
390 if ( $remainder =~ /^JOIN (.+)/) {
391 $jtype = 'INNER';
392 $self->{"struct"}->{"join"}->{"clause"} = 'DEFAULT INNER';
393 $remainder = $1;
394 }
395 if ( $self->{"struct"}->{"join"} ) {
396 if ( $remainder && $remainder =~ /^(.+?) USING \(([^\)]+)\)(.*)/) {
397 $self->{"struct"}->{"join"}->{"clause"} = 'USING';
398 $tableB = $1;
399 my $keycolstr = $2;
400 $remainder = $3;
401 @$keycols = split /,/,$keycolstr;
402 }
403 if ( $remainder && $remainder =~ /^(.+?) ON (.+)/) {
404 $self->{"struct"}->{"join"}->{"clause"} = 'ON';
405 $tableB = $1;
406 my $keycolstr = $2;
407 $remainder = $3;
408 if ($keycolstr =~ / OR /i ) {
409 return $self->do_err(qq~Can't use OR in an ON clause!~,1);
410 }
411 @$keycols = split / AND /i,$keycolstr;
412 $self->{"tmp"}->{"is_table_name"}->{"$tableA"} = 1;
413 $self->{"tmp"}->{"is_table_name"}->{"$tableB"} = 1;
414 for (@$keycols) {
415 my($arg1,$arg2) = split / = /;
416 return undef unless $arg1 = $self->ROW_VALUE($arg1);
417 return undef unless $arg2 = $self->ROW_VALUE($arg2);
418 if ( $arg1->{"type"}eq 'column' and $arg2->{"type"}eq 'column'){
419 push @{ $self->{"struct"}->{"keycols"} }, $arg1->{"value"};
420 push @{ $self->{"struct"}->{"keycols"} }, $arg2->{"value"};
421 delete $self->{"struct"}->{"where_clause"};
422 }
423 }
424 }
425 elsif ($remainder =~ /^(.+?)$/i) {
426 $tableB = $1;
427 $remainder = $2;
428 }
429 $remainder =~ s/^\s+// if $remainder;
430 }
431 if ($jtype) {
432 $jtype = "NATURAL $jtype" if $natural;
433 if ($natural and $keycols) {
434 return $self->do_err(
435 qq~Can't use NATURAL with a USING or ON clause!~
436 );
437 }
438 return undef unless $self->TABLE_NAME_LIST("$tableA,$tableB");
439 $self->{"struct"}->{"join"}->{"type"} = $jtype;
440 $self->{"struct"}->{"join"}->{"keycols"} = $keycols if $keycols;
441 return 1;
442 }
443 return $self->do_err("Couldn't parse explicit JOIN!");
444 }
445
446 sub SELECT_CLAUSE {
447 my($self,$str) = @_;
448 return undef unless $str;
449 if ($str =~ s/^(DISTINCT|ALL) (.+)$/$2/i) {
450 $self->{"struct"}->{"set_quantifier"} = uc $1;
451 }
452 if ($str =~ /[()]/) {
453 return undef unless $self->SET_FUNCTION_SPEC($str);
454 }
455 else {
456 return undef unless $self->SELECT_LIST($str);
457 }
458 }
459
460 sub FROM_CLAUSE {
461 my($self,$str) = @_;
462 return undef unless $str;
463 if ($str =~ / JOIN /i ) {
464 return undef unless $self->EXPLICIT_JOIN($str);
465 }
466 else {
467 return undef unless $self->TABLE_NAME_LIST($str);
468 }
469 }
470
471 sub INSERT {
472 my($self,$str) = @_;
473 my $col_str;
474 my($table_name,$val_str) = $str =~
475 /^INSERT\s+INTO\s+(.+?)\s+VALUES\s+\((.+?)\)$/i;
476 if ($table_name and $table_name =~ /[()]/ ) {
477 ($table_name,$col_str,$val_str) = $str =~
478 /^INSERT\s+INTO\s+(.+?)\s+\((.+?)\)\s+VALUES\s+\((.+?)\)$/i;
479 }
480 return $self->do_err('No table name specified!') unless $table_name;
481 return $self->do_err('Missing values list!') unless defined $val_str;
482 return undef unless $self->TABLE_NAME($table_name);
483 $self->{"struct"}->{"command"} = 'INSERT';
484 $self->{"struct"}->{"table_names"} = [$table_name];
485 if ($col_str) {
486 return undef unless $self->COLUMN_NAME_LIST($col_str);
487 }
488 else {
489 $self->{"struct"}->{"column_names"} = ['*'];
490 }
491 return undef unless $self->LITERAL_LIST($val_str);
492 return 1;
493 }
494
495 ###################################################################
496 # UPDATE ::=
497 #
498 # UPDATE <table> SET <set_clause_list> [ WHERE <search_condition>]
499 #
500 ###################################################################
501 sub UPDATE {
502 my($self,$str) = @_;
503 $self->{"struct"}->{"command"} = 'UPDATE';
504 my($table_name,$remainder) = $str =~
505 /^UPDATE (.+?) SET (.+)$/i;
506 return $self->do_err(
507 'Incomplete UPDATE clause'
508 ) if !$table_name or !$remainder;
509 return undef unless $self->TABLE_NAME($table_name);
510 $self->{"tmp"}->{"is_table_name"} = {$table_name => 1};
511 $self->{"struct"}->{"table_names"} = [$table_name];
512 my($set_clause,$where_clause) = $remainder =~
513 /(.*?) WHERE (.*)$/i;
514 $set_clause = $remainder if !$set_clause;
515 return undef unless $self->SET_CLAUSE_LIST($set_clause);
516 if ($where_clause) {
517 return undef unless $self->SEARCH_CONDITION($where_clause);
518 }
519 my @vals = @{$self->{"struct"}->{"values"}};
520 my $num_val_placeholders=0;
521 for my $v(@vals) {
522 $num_val_placeholders++ if $v->{"type"} eq 'placeholder';
523 }
524 $self->{"struct"}->{"num_val_placeholders"}=$num_val_placeholders;
525 return 1;
526 }
527
528 #########
529 # CREATE
530 #########
531
532 sub CREATE {
533 my $self = shift;
534 my $stmt = shift;
535 $self->{"struct"}->{"command"} = 'CREATE';
536 my($table_name,$table_element_def,%is_col_name);
537 if ($stmt =~ /^CREATE (LOCAL|GLOBAL) TEMPORARY TABLE(.*)$/si ) {
538 $self->{"struct"}->{"table_type"} = "$1 TEMPORARY";
539 $stmt = "CREATE TABLE$2";
540 }
541 if ($stmt =~ /^(.*) ON COMMIT (DELETE|PRESERVE) ROWS\s*$/si ) {
542 $stmt = $1;
543 $self->{"struct"}->{"commit_behaviour"} = $2;
544 return $self->do_err(
545 "Can't specify commit behaviour for permanent tables."
546 )
547 if !defined $self->{"struct"}->{"table_type"}
548 or $self->{"struct"}->{"table_type"} !~ /TEMPORARY/;
549 }
550 if ($stmt =~ /^CREATE TABLE (\S+) \((.*)\)$/si ) {
551 $table_name = $1;
552 $table_element_def = $2;
553 }
554 else {
555 return $self->do_err( "Can't find column definitions!" );
556 }
557 return undef unless $self->TABLE_NAME($table_name);
558 $table_element_def =~ s/\s+\(/(/g;
559 my $primary_defined;
560 for my $col(split ',',$table_element_def) {
561 my($name,$type,$constraints)=($col =~/\s*(\S+)\s+(\S+)\s*(.*)/);
562 if (!$type) {
563 return $self->do_err( "Column definition is missing a data type!" );
564 }
565 return undef if !($self->IDENTIFIER($name));
566 # if ($name =~ /^\?QI(.+)\?$/ ) {
567 $name = $self->replace_quoted_ids($name);
568 # }
569 $constraints =~ s/^\s+//;
570 $constraints =~ s/\s+$//;
571 if ($constraints) {
572 $constraints =~ s/PRIMARY KEY/PRIMARY_KEY/i;
573 $constraints =~ s/NOT NULL/NOT_NULL/i;
574 my @c = split /\s+/, $constraints;
575 my %has_c;
576 for my $constr(@c) {
577 if ( $constr =~ /^\s*(UNIQUE|NOT_NULL|PRIMARY_KEY)\s*$/i ) {
578 my $cur_c = uc $1;
579 if ($has_c{$cur_c}++) {
580 return $self->do_err(
581 qq~Duplicate column constraint: '$constr'!~
582 );
583 }
584 if ($cur_c eq 'PRIMARY_KEY' and $primary_defined++ ) {
585 return $self->do_err(
586 qq~Can't have two PRIMARY KEYs in a table!~
587 );
588 }
589 $constr =~ s/_/ /g;
590 push @{$self->{"struct"}->{"column_defs"}->{"$name"}->{"constraints"} }, $constr;
591
592 }
593 else {
594 return $self->do_err("Unknown column constraint: '$constr'!");
595 }
596 }
597 }
598 $type = uc $type;
599 my $length;
600 if ( $type =~ /(.+)\((.+)\)/ ) {
601 $type = $1;
602 $length = $2;
603 }
604 if (!$self->{"opts"}->{"valid_data_types"}->{"$type"}) {
605 return $self->do_err("'$type' is not a recognized data type!");
606 }
607 $self->{"struct"}->{"column_defs"}->{"$name"}->{"data_type"} = $type;
608 $self->{"struct"}->{"column_defs"}->{"$name"}->{"data_length"} = $length;
609 push @{$self->{"struct"}->{"column_names"}},$name;
610 my $tmpname = $name;
611 $tmpname = uc $tmpname unless $tmpname =~ /^"/;
612 return $self->do_err("Duplicate column names!")
613 if $is_col_name{$tmpname}++;
614
615 }
616 $self->{"struct"}->{"table_names"} = [$table_name];
617 return 1;
618 }
619
620
621 ###############
622 # SQL SUBRULES
623 ###############
624
625 sub SET_CLAUSE_LIST {
626 my $self = shift;
627 my $set_string = shift;
628 my @sets = split /,/,$set_string;
629 my(@cols,@vals);
630 for(@sets) {
631 my($col,$val) = split / = /,$_;
632 return $self->do_err('Incomplete SET clause!') if !defined $col or !defined $val;
633 push @cols, $col;
634 push @vals, $val;
635 }
636 return undef unless $self->COLUMN_NAME_LIST(join ',',@cols);
637 return undef unless $self->LITERAL_LIST(join ',',@vals);
638 return 1;
639 }
640
641 sub SET_QUANTIFIER {
642 my($self,$str) = @_;
643 if ($str =~ /^(DISTINCT|ALL)\s+(.*)$/si) {
644 $self->{"struct"}->{"set_quantifier"} = uc $1;
645 $str = $2;
646 }
647 return $str;
648 }
649
650 sub SELECT_LIST {
651 my $self = shift;
652 my $col_str = shift;
653 if ( $col_str =~ /^\s*\*\s*$/ ) {
654 $self->{"struct"}->{"column_names"} = ['*'];
655 return 1;
656 }
657 my @col_list = split ',',$col_str;
658 if (!(scalar @col_list)) {
659 return $self->do_err('Missing column name list!');
660 }
661 my(@newcols,$newcol);
662 for my $col(@col_list) {
663 # $col = trim($col);
664 $col =~ s/^\s+//;
665 $col =~ s/\s+$//;
666 if ($col =~ /^(\S+)\.\*$/) {
667 my $table = $1;
668 my %is_table_alias = %{$self->{"tmp"}->{"is_table_alias"}};
669 $table = $is_table_alias{$table} if $is_table_alias{$table};
670 $table = $is_table_alias{"\L$table"} if $is_table_alias{"\L$table"};
671 # $table = uc $table unless $table =~ /^"/;
672 #use mylibs; zwarn \%is_table_alias;
673 #print "\n<<$table>>\n";
674 return undef unless $self->TABLE_NAME($table);
675 $table = $self->replace_quoted_ids($table);
676 push @newcols, "$table.*";
677 }
678 else {
679 return undef unless $newcol = $self->COLUMN_NAME($col);
680 push @newcols, $newcol;
681 }
682 }
683 $self->{"struct"}->{"column_names"} = \@newcols;
684 return 1;
685 }
686
687 sub SET_FUNCTION_SPEC {
688 my($self,$col_str) = @_;
689 my @funcs = split /,/, $col_str;
690 my %iscol;
691 for my $func(@funcs) {
692 if ($func =~ /^(COUNT|AVG|SUM|MAX|MIN) \((.*)\)\s*$/i ) {
693 my $set_function_name = uc $1;
694 my $set_function_arg = $2;
695 my $distinct;
696 if ( $set_function_arg =~ s/(DISTINCT|ALL) (.+)$/$2/i ) {
697 $distinct = uc $1;
698 $self->{"struct"}->{"set_quantifier"} = $distinct;
699 }
700 my $count_star = 1 if $set_function_name eq 'COUNT'
701 and $set_function_arg eq '*';
702 my $ok = $self->COLUMN_NAME($set_function_arg)
703 if !$count_star;
704 return undef if !$count_star and !$ok;
705 if ($set_function_arg !~ /^"/) {
706 $set_function_arg = uc $set_function_arg;
707 }
708 push @{ $self->{"struct"}->{'set_function'}}, {
709 name => $set_function_name,
710 arg => $set_function_arg,
711 distinct => $distinct,
712 };
713 push( @{ $self->{"struct"}->{"column_names"} }, $set_function_arg)
714 if !$iscol{$set_function_arg}++
715 and ($set_function_arg ne '*');
716 }
717 else {
718 return $self->do_err("Bad set function before FROM clause.");
719 }
720 }
721 my $cname = $self->{"struct"}->{"column_names"};
722 if ( !$cname or not scalar @$cname ) {
723 $self->{"struct"}->{"column_names"} = ['*'];
724 }
725 return 1;
726 }
727
728 sub LIMIT_CLAUSE {
729 my($self,$limit_clause) = @_;
730 # $limit_clause = trim($limit_clause);
731 $limit_clause =~ s/^\s+//;
732 $limit_clause =~ s/\s+$//;
733
734 return 1 if !$limit_clause;
735 my($offset,$limit,$junk) = split /,/, $limit_clause;
736 return $self->do_err('Bad limit clause!')
737 if (defined $limit and $limit =~ /[^\d]/)
738 or ( defined $offset and $offset =~ /[^\d]/ )
739 or defined $junk;
740 if (defined $offset and !defined $limit) {
741 $limit = $offset;
742 undef $offset;
743 }
744 $self->{"struct"}->{"limit_clause"} = {
745 limit => $limit,
746 offset => $offset,
747 };
748 return 1;
749 }
750
751 sub is_number {
752 my $x=shift;
753 return 0 if !defined $x;
754 return 1 if $x =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/;
755 return 0;
756 }
757
758 sub SORT_SPEC_LIST {
759 my($self,$order_clause) = @_;
760 return 1 if !$order_clause;
761 my %is_table_name = %{$self->{"tmp"}->{"is_table_name"}};
762 my %is_table_alias = %{$self->{"tmp"}->{"is_table_alias"}};
763 my @ocols;
764 my @order_columns = split ',',$order_clause;
765 for my $col(@order_columns) {
766 my $newcol;
767 my $newarg;
768 if ($col =~ /\s*(\S+)\s+(ASC|DESC)/si ) {
769 $newcol = $1;
770 $newarg = uc $2;
771 }
772 elsif ($col =~ /^\s*(\S+)\s*$/si ) {
773 $newcol = $1;
774 }
775 else {
776 return $self->do_err(
777 'Junk after column name in ORDER BY clause!'
778 );
779 }
780 return undef if !($newcol = $self->COLUMN_NAME($newcol));
781 if ($newcol =~ /^(.+)\..+$/s ) {
782 my $table = $1;
783 if ($table =~ /^'/) {
784 if (!$is_table_name{"$table"} and !$is_table_alias{"$table"} ) {
785 return $self->do_err( "Table '$table' in ORDER BY clause "
786 . "not in FROM clause."
787 );
788 }}
789 elsif (!$is_table_name{"\L$table"} and !$is_table_alias{"\L$table"} ) {
790 return $self->do_err( "Table '$table' in ORDER BY clause "
791 . "not in FROM clause."
792 );
793 }
794 }
795 push @ocols, {$newcol => $newarg};
796 }
797 $self->{"struct"}->{"sort_spec_list"} = \@ocols;
798 return 1;
799 }
800
801 sub SEARCH_CONDITION {
802 my $self = shift;
803 my $str = shift;
804 $str =~ s/^\s*WHERE (.+)/$1/;
805 $str =~ s/^\s+//;
806 $str =~ s/\s+$//;
807 return $self->do_err("Couldn't find WHERE clause!") unless $str;
808 $str = get_btwn( $str );
809 $str = get_in( $str );
810 my $open_parens = $str =~ tr/\(//;
811 my $close_parens = $str =~ tr/\)//;
812 if ($open_parens != $close_parens) {
813 return $self->do_err("Mismatched parentheses in WHERE clause!");
814 }
815 $str = nongroup_numeric( nongroup_string( $str ) );
816 my $pred = $open_parens
817 ? $self->parens_search($str,[])
818 : $self->non_parens_search($str,[]);
819 return $self->do_err("Couldn't find predicate!") unless $pred;
820 $self->{"struct"}->{"where_clause"} = $pred;
821 return 1;
822 }
823
824 ############################################################
825 # UTILITY FUNCTIONS CALLED TO PARSE PARENS IN WHERE CLAUSE
826 ############################################################
827
828 # get BETWEEN clause
829 #
830 sub get_btwn {
831 my $str = shift;
832 if ($str =~ /^(.+?) BETWEEN (.+)$/i ) {
833 my($col,$in,$out,$contents);
834 my $front = $1;
835 my $back = $2;
836 my $not = 1 if $front =~ s/^(.+) NOT$/$1/i;
837 if ($front =~ s/^(.+? )(AND|OR|\() (.+)$/$1$2/i) {
838 $col = $3;
839 }
840 else {
841 $col = $front;
842 $front = '';
843 }
844 $front .= " NOT" if $not;
845 my($val1,$val2);
846 if ($back =~ s/^(.+?) AND (.+)$/$2/) {
847 $val1 = $1;
848 }
849 if ($back =~ s/^(.+?) (AND|OR)(.+)$/$2$3/i) {
850 $val2 = $1;
851 }
852 else {
853 $val2 = $back;
854 $back = '';
855 }
856 $str = "$front ($col > $val1 AND $col < $val2) $back";
857 return get_btwn($str);
858 }
859 return $str;
860 }
861
862 # get IN clause
863 #
864 # a IN (b,c) -> (a=b OR a=c)
865 # a NOT IN (b,c) -> (a<>b AND a<>c)
866 #
867 sub get_in {
868 my $str = shift;
869 my $in_inside_parens;
870 if ($str =~ /^(.+?) IN (\(.+)$/i ) {
871 my($col,$in,$out,$contents);
872 my $front = $1;
873 my $back = $2;
874 my $not;
875 $not++ if $front =~ s/^(.+) NOT$/$1/i;
876 if ($front =~ s/^(.+? )(AND|OR|\() (.+)$/$1$2/i) {
877 $col = $3;
878 }
879 else {
880 $col = $front;
881 $not++ if $col =~ s/^NOT (.+)/$1/i;
882 $front = '';
883 }
884 if ( $col =~ s/^\(// ) {
885 $in_inside_parens++;
886 }
887 #print "~$not~\n";
888 # $front .= " NOT" if $not;
889 # $not++ if $front =~ s/^(.+) NOT$/$1/i;
890 my @chars = split '', $back;
891 for (0..$#chars) {
892 my $char = shift @chars;
893 $contents .= $char;
894 $in++ if $char eq '(';
895 if ( $char eq ')' ) {
896 $out++;
897 last if $in == $out;
898 }
899 }
900 $back = join '', @chars;
901 $back =~ s/\)$// if $in_inside_parens;
902 # print "\n[$front][$col][$contents][$back]\n";
903 #die "\n[$contents]\n";
904 $contents =~ s/^\(//;
905 $contents =~ s/\)$//;
906 my @vals = split /,/, $contents;
907 my $op = '=';
908 my $combiner = 'OR';
909 if ($not) {
910 $op = '<>';
911 $combiner = 'AND';
912 }
913 @vals = map { "$col $op $_" } @vals;
914 my $valStr = join " $combiner ", @vals;
915 $str = "$front ($valStr) $back";
916 $str =~ s/\s+/ /g;
917 return get_in($str);
918 }
919 $str =~ s/^\s+//;
920 $str =~ s/\s+$//;
921 $str =~ s/\(\s+/(/;
922 $str =~ s/\s+\)/)/;
923 #print "$str:\n";
924 return $str;
925 }
926
927 # groups clauses by nested parens
928 #
929 sub parens_search {
930 my $self = shift;
931 my $str = shift;
932 my $predicates = shift;
933 my $index = scalar @$predicates;
934
935 # to handle WHERE (a=b) AND (c=d)
936 if ($str =~ /\(([^()]+?)\)/ ) {
937 my $pred = quotemeta $1;
938 if ($pred !~ / (AND|OR) / ) {
939 $str =~ s/\(($pred)\)/$1/;
940 }
941 }
942 #
943
944 if ($str =~ s/\(([^()]+)\)/^$index^/ ) {
945 push @$predicates, $1;
946 }
947 if ($str =~ /\(/ ) {
948 return $self->parens_search($str,$predicates);
949 }
950 else {
951 return $self->non_parens_search($str,$predicates);
952 }
953 }
954
955 # creates predicates from clauses that either have no parens
956 # or ANDs or have been previously grouped by parens and ANDs
957 #
958 sub non_parens_search {
959 my $self = shift;
960 my $str = shift;
961 my $predicates = shift;
962 my $neg = 0;
963 my $nots = {};
964 if ( $str =~ s/^NOT (\^.+)$/$1/i ) {
965 $neg = 1;
966 $nots = {pred=>1};
967 }
968 my( $pred1, $pred2, $op );
969 my $and_preds =[];
970 ($str,$and_preds) = group_ands($str);
971 $str =~ s/^\s*\^0\^\s*$/$predicates->[0]/;
972 return if $str =~ /^\s*~0~\s*$/;
973 if ( ($pred1, $op, $pred2) = $str =~ /^(.+) (AND|OR) (.+)$/i ) {
974 $pred1 =~ s/\~(\d+)\~$/$and_preds->[$1]/;
975 $pred2 =~ s/\~(\d+)\~$/$and_preds->[$1]/;
976 $pred1 = $self->non_parens_search($pred1,$predicates);
977 $pred2 = $self->non_parens_search($pred2,$predicates);
978 return {
979 neg => $neg,
980 nots => $nots,
981 arg1 => $pred1,
982 op => uc $op,
983 arg2 => $pred2,
984 };
985 }
986 else {
987 my $xstr = $str;
988 $xstr =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
989 my($k,$v) = $xstr =~ /^(\S+?)\s+\S+\s*(.+)\s*$/;
990 #print "$k,$v\n" if defined $k;
991 push @{ $self->{struct}->{where_cols}->{$k}}, $v if defined $k;
992 return $self->PREDICATE($str);
993 }
994 }
995
996 # groups AND clauses that aren't already grouped by parens
997 #
998 sub group_ands{
999 my $str = shift;
1000 my $and_preds = shift || [];
1001 return($str,$and_preds) unless $str =~ / AND / and $str =~ / OR /;
1002 if ($str =~ /^(.*?) AND (.*)$/i ) {
1003 my $index = scalar @$and_preds;
1004 my($front, $back)=($1,$2);
1005 if ($front =~ /^.* OR (.*)$/i ) {
1006 $front = $1;
1007 }
1008 if ($back =~ /^(.*?) (OR|AND) (.*)$/i ) {
1009 $back = $1;
1010 }
1011 my $newpred = "$front AND $back";
1012 push @$and_preds, $newpred;
1013 $str =~ s/\Q$newpred/~$index~/i;
1014 return group_ands($str,$and_preds);
1015 }
1016 else {
1017 return $str,$and_preds;
1018 }
1019 }
1020
1021 # replaces string function parens with square brackets
1022 # e.g TRIM (foo) -> TRIM[foo]
1023 #
1024
1025 sub nongroup_string {
1026 my $f= FUNCTION_NAMES;
1027 my $str = shift;
1028 # $str =~ s/(TRIM|SUBSTRING|UPPER|LOWER) \(([^()]+)\)/$1\[$2\]/gi;
1029 $str =~ s/($f) \(([^()]+)\)/$1\[$2\]/gi;
1030 # if ( $str =~ /(TRIM|SUBSTRING|UPPER|LOWER) \(/i ) {
1031 if ( $str =~ /($f) \(/i ) {
1032 return nongroup_string($str);
1033 }
1034 else {
1035 return $str;
1036 }
1037 }
1038
1039 # replaces math parens with square brackets
1040 # e.g (4-(6+7)*9) -> MATH[4-MATH[6+7]*9]
1041 #
1042 sub nongroup_numeric {
1043 my $str = shift;
1044 my $has_op;
1045 if ( $str =~ /\(([0-9 \*\/\+\-_a-zA-Z\[\]\?]+)\)/ ) {
1046 my $match = $1;
1047 if ($match !~ /(LIKE |IS|BETWEEN|IN)/ ) {
1048 my $re = quotemeta($match);
1049 $str =~ s/\($re\)/MATH\[$match\]/;
1050 }
1051 else {
1052 $has_op++;
1053 }
1054 }
1055 if ( !$has_op and $str =~ /\(([0-9 \*\/\+\-_a-zA-Z\[\]\?]+)\)/ ) {
1056 return nongroup_numeric($str);
1057 }
1058 else {
1059 return $str;
1060 }
1061 }
1062 ############################################################
1063
1064
1065 #########################################################
1066 # LITERAL_LIST ::= <literal> [,<literal>]
1067 #########################################################
1068 sub LITERAL_LIST {
1069 my $self = shift;
1070 my $str = shift;
1071 my @tokens = split /,/, $str;
1072 my @values;
1073 for my $tok(@tokens) {
1074 my $val = $self->ROW_VALUE($tok);
1075 return $self->do_err(
1076 qq('$tok' is not a valid value or is not quoted!)
1077 ) unless $val;
1078 push @values, $val;
1079 }
1080 $self->{"struct"}->{"values"} = \@values;
1081 return 1;
1082 }
1083
1084
1085 ###################################################################
1086 # LITERAL ::= <quoted_string> | <question mark> | <number> | NULL
1087 ###################################################################
1088 sub LITERAL {
1089 my $self = shift;
1090 my $str = shift;
1091 return 'null' if $str =~ /^NULL$/i; # NULL
1092 # return 'empty_string' if $str =~ /^~E~$/i; # NULL
1093 if ($str eq '?') {
1094 $self->{struct}->{num_placeholders}++;
1095 return 'placeholder';
1096 }
1097 # return 'placeholder' if $str eq '?'; # placeholder question mark
1098 return 'string' if $str =~ /^'.*'$/s; # quoted string
1099 return 'number' if $str =~ # number
1100 /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/;
1101 return undef;
1102 }
1103 ###################################################################
1104 # PREDICATE
1105 ###################################################################
1106 sub PREDICATE {
1107 my $self = shift;
1108 my $str = shift;
1109 my @allops = keys %{ $self->{"opts"}->{"valid_comparison_operators"} };
1110 my @notops;
1111 for (@allops) { push (@notops, $_) if /NOT/i };
1112 my $ops = join '|', @notops;
1113 my $opexp = "^\\s*(.+)\\s+($ops)\\s+(.*)\\s*\$";
1114 my($arg1,$op,$arg2) = $str =~ /$opexp/i;
1115 if (!defined $op) {
1116 my @compops;
1117 for (@allops) { push (@compops, $_) if /<=|>=|<>/ };
1118 $ops = join '|', @compops;
1119 $opexp = "^\\s*(.+)\\s+($ops)\\s+(.*)\\s*\$";
1120 ($arg1,$op,$arg2) = $str =~ /$opexp/i;
1121 }
1122 if (!defined $op) {
1123 $ops = join '|', @allops;
1124 $opexp = "^\\s*(.+)\\s+($ops)\\s+(.*)\\s*\$";
1125 ($arg1,$op,$arg2) = $str =~ /$opexp/i;
1126 }
1127 $op = uc $op;
1128 if (!defined $arg1 || !defined $op || !defined $arg2) {
1129 return $self->do_err("Bad predicate: '$str'!");
1130 }
1131 my $negated = 0; # boolean value showing if predicate is negated
1132 my %not; # hash showing elements modified by NOT
1133 #
1134 # e.g. "NOT bar = foo" -> %not = (arg1=>1)
1135 # "bar NOT LIKE foo" -> %not = (op=>1)
1136 # "NOT bar NOT LIKE foo" -> %not = (arg1=>1,op=>1);
1137 # "NOT bar IS NOT NULL" -> %not = (arg1=>1,op=>1);
1138 # "bar = foo" -> %not = undef;
1139 #
1140 if ( $arg1 =~ s/^NOT (.+)$/$1/i ) {
1141 $not{arg1}++;
1142 }
1143 if ( $op =~ s/^(.+) NOT$/$1/i
1144 || $op =~ s/^NOT (.+)$/$1/i ) {
1145 $not{op}++;
1146 }
1147 $negated = 1 if %not and scalar keys %not == 1;
1148 return undef unless $arg1 = $self->ROW_VALUE($arg1);
1149 return undef unless $arg2 = $self->ROW_VALUE($arg2);
1150 if ( $arg1->{"type"}eq 'column'
1151 and $arg2->{"type"}eq 'column'
1152 and $op eq '='
1153 ) {
1154 push @{ $self->{"struct"}->{"keycols"} }, $arg1->{"value"};
1155 push @{ $self->{"struct"}->{"keycols"} }, $arg2->{"value"};
1156 }
1157 return {
1158 neg => $negated,
1159 nots => \%not,
1160 arg1 => $arg1,
1161 op => $op,
1162 arg2 => $arg2,
1163 };
1164 }
1165
1166 sub undo_string_funcs {
1167 my $str = shift;
1168 my $f= FUNCTION_NAMES;
1169 # $str =~ s/(TRIM|UPPER|LOWER|SUBSTRING)\[([^\]\[]+?)\]/$1 ($2)/;
1170 # if ($str =~ /(TRIM|UPPER|LOWER|SUBSTRING)\[/) {
1171 $str =~ s/($f)\[([^\]\[]+?)\]/$1 ($2)/;
1172 if ($str =~ /($f)\[/) {
1173 return undo_string_funcs($str);
1174 }
1175 return $str;
1176 }
1177
1178 sub undo_math_funcs {
1179 my $str = shift;
1180 $str =~ s/MATH\[([^\]\[]+?)\]/($1)/;
1181 if ($str =~ /MATH\[/) {
1182 return undo_math_funcs($str);
1183 }
1184 return $str;
1185 }
1186
1187
1188 ###################################################################
1189 # ROW_VALUE ::= <literal> | <column_name>
1190 ###################################################################
1191 sub ROW_VALUE {
1192 my $self = shift;
1193 my $str = shift;
1194 $str = undo_string_funcs($str);
1195 $str = undo_math_funcs($str);
1196 my $type;
1197
1198 # MATH
1199 #
1200 if ($str =~ /[\*\+\-\/]/ ) {
1201 my @vals;
1202 my $i=-1;
1203 $str =~ s/([^\s\*\+\-\/\)\(]+)/push @vals,$1;$i++;"?$i?"/ge;
1204 my @newvalues;
1205 for (@vals) {
1206 my $val = $self->ROW_VALUE($_);
1207 if ($val && $val->{"type"} !~ /number|column|placeholder/) {
1208 return $self->do_err(qq[
1209 String '$val' not allowed in Numeric expression!
1210 ]);
1211 }
1212 push @newvalues,$val;
1213 }
1214 return {
1215 type => 'function',
1216 name => 'numeric_exp',
1217 str => $str,
1218 vals => \@newvalues,
1219 }
1220 }
1221
1222 # SUBSTRING (value FROM start [FOR length])
1223 #
1224 if ($str =~ /^SUBSTRING \((.+?) FROM (.+)\)\s*$/i ) {
1225 my $name = 'SUBSTRING';
1226 my $start = $2;
1227 my $value = $self->ROW_VALUE($1);
1228 my $length;
1229 if ($start =~ /^(.+?) FOR (.+)$/i) {
1230 $start = $1;
1231 $length = $2;
1232 $length = $self->ROW_VALUE($length);
1233 }
1234 $start = $self->ROW_VALUE($start);
1235 $str =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
1236 return $self->do_err(
1237 "Can't use a string as a SUBSTRING position: '$str'!")
1238 if $start->{"type"} eq 'string'
1239 or ($start->{"length"} and $start->{"length"}->{"type"} eq 'string');
1240 return undef unless $value;
1241 return $self->do_err(
1242 "Can't use a number in SUBSTRING: '$str'!")
1243 if $value->{"type"} eq 'number';
1244 return {
1245 "type" => 'function',
1246 "name" => $name,
1247 "value" => $value,
1248 "start" => $start,
1249 "length" => $length,
1250 };
1251 }
1252
1253 # TO_CHAR (value)
1254 #
1255 if ($str =~ /^TO_CHAR \((.+)\)\s*$/i ) {
1256 my $name = 'TO_CHAR';
1257 my $value = $self->ROW_VALUE($1);
1258 return undef unless $value;
1259 return {
1260 type => 'function',
1261 name => $name,
1262 value => $value,
1263 };
1264 }
1265
1266 # UPPER (value) and LOWER (value)
1267 #
1268 if ($str =~ /^(UPPER|LOWER) \((.+)\)\s*$/i ) {
1269 my $name = uc $1;
1270 my $value = $self->ROW_VALUE($2);
1271 return undef unless $value;
1272 $str =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
1273 return $self->do_err(
1274 "Can't use a number in UPPER/LOWER: '$str'!")
1275 if $value->{"type"} eq 'number';
1276 return {
1277 type => 'function',
1278 name => $name,
1279 value => $value,
1280 };
1281 }
1282
1283 # TRIM ( [ [TRAILING|LEADING|BOTH] ['char'] FROM ] value )
1284 #
1285 if ($str =~ /^(TRIM) \((.+)\)\s*$/i ) {
1286 my $name = uc $1;
1287 my $value = $2;
1288 my($trim_spec,$trim_char);
1289 if ($value =~ /^(.+) FROM ([^\(\)]+)$/i ) {
1290 my $front = $1;
1291 $value = $2;
1292 if ($front =~ /^\s*(TRAILING|LEADING|BOTH)(.*)$/i ) {
1293 $trim_spec = uc $1;
1294 # $trim_char = trim($2);
1295 $trim_char = $2;
1296 $trim_char =~ s/^\s+//;
1297 $trim_char =~ s/\s+$//;
1298 undef $trim_char if length($trim_char)==0;
1299 }
1300 else {
1301 # $trim_char = trim($front);
1302 $trim_char = $front;
1303 $trim_char =~ s/^\s+//;
1304 $trim_char =~ s/\s+$//;
1305 }
1306 }
1307 $trim_char =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g if $trim_char;
1308 $value = $self->ROW_VALUE($value);
1309 return undef unless $value;
1310 $str =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
1311 return $self->do_err(
1312 "Can't use a number in TRIM: '$str'!")
1313 if $value->{"type"} eq 'number';
1314 return {
1315 type => 'function',
1316 name => $name,
1317 value => $value,
1318 trim_spec => $trim_spec,
1319 trim_char => $trim_char,
1320 };
1321 }
1322
1323 # STRING CONCATENATION
1324 #
1325 if ($str =~ /\|\|/ ) {
1326 my @vals = split / \|\| /,$str;
1327 my @newvals;
1328 for my $val(@vals) {
1329 my $newval = $self->ROW_VALUE($val);
1330 return undef unless $newval;
1331 return $self->do_err(
1332 "Can't use a number in string concatenation: '$str'!")
1333 if $newval->{"type"} eq 'number';
1334 push @newvals,$newval;
1335 }
1336 return {
1337 type => 'function',
1338 name => 'str_concat',
1339 value => \@newvals,
1340 };
1341 }
1342
1343 # NULL, PLACEHOLDER, NUMBER
1344 #
1345 if ( $type = $self->LITERAL($str) ) {
1346 undef $str if $type eq 'null';
1347 # if ($type eq 'empty_string') {
1348 # $str = '';
1349 # $type = 'string';
1350 # }
1351 $str = '' if $str and $str eq q('');
1352 return { type => $type, value => $str };
1353 }
1354
1355 # QUOTED STRING LITERAL
1356 #
1357 if ($str =~ /\?(\d+)\?/) {
1358 return { type =>'string',
1359 value => $self->{"struct"}->{"literals"}->[$1] };
1360 }
1361 # COLUMN NAME
1362 #
1363 return undef unless $str = $self->COLUMN_NAME($str);
1364 if ( $str =~ /^(.*)\./ && !$self->{"tmp"}->{"is_table_name"}->{"\L$1"}
1365 and !$self->{"tmp"}->{"is_table_alias"}->{"\L$1"} ) {
1366 return $self->do_err(
1367 "Table '$1' in WHERE clause not in FROM clause!"
1368 );
1369 }
1370 # push @{ $self->{"struct"}->{"where_cols"}},$str
1371 # unless $self->{"tmp"}->{"where_cols"}->{"$str"};
1372 $self->{"tmp"}->{"where_cols"}->{"$str"}++;
1373 return { type => 'column', value => $str };
1374 }
1375
1376 ###############################################
1377 # COLUMN NAME ::= [<table_name>.] <identifier>
1378 ###############################################
1379
1380 sub COLUMN_NAME {
1381 my $self = shift;
1382 my $str = shift;
1383 my($table_name,$col_name);
1384 if ( $str =~ /^\s*(\S+)\.(\S+)$/s ) {
1385 if (!$self->{"opts"}->{"valid_options"}->{"SELECT_MULTIPLE_TABLES"}) {
1386 return $self->do_err('Dialect does not support multiple tables!');
1387 }
1388 $table_name = $1;
1389 $col_name = $2;
1390 # my $alias = $self->{struct}->{table_alias} || [];
1391 # $table_name = shift @$alias if $alias;
1392 #print "$table_name : $alias";
1393 return undef unless $self->TABLE_NAME($table_name);
1394 $table_name = $self->replace_quoted_ids($table_name);
1395 my $ref;
1396 if ($table_name =~ /^"/) { #"
1397 if (!$self->{"tmp"}->{"is_table_name"}->{"$table_name"}
1398 and !$self->{"tmp"}->{"is_table_alias"}->{"$table_name"}
1399 ) {
1400 $self->do_err(
1401 "Table '$table_name' referenced but not found in FROM list!"
1402 );
1403 return undef;
1404 } }
1405 elsif (!$self->{"tmp"}->{"is_table_name"}->{"\L$table_name"}
1406 and !$self->{"tmp"}->{"is_table_alias"}->{"\L$table_name"}
1407 ) {
1408 $self->do_err(
1409 "Table '$table_name' referenced but not found in FROM list!"
1410 );
1411 return undef;
1412 }
1413 }
1414 else {
1415 $col_name = $str;
1416 }
1417 # $col_name = trim($col_name);
1418 $col_name =~ s/^\s+//;
1419 $col_name =~ s/\s+$//;
1420 return undef unless $col_name eq '*' or $self->IDENTIFIER($col_name);
1421 #
1422 # MAKE COL NAMES ALL UPPER CASE
1423 my $orgcol = $col_name;
1424 if ($col_name =~ /^\?QI(\d+)\?$/) {
1425 $col_name = $self->replace_quoted_ids($col_name);
1426 }
1427 else {
1428 # $col_name = lc $col_name;
1429 $col_name = uc $col_name;
1430 }
1431 $self->{struct}->{ORG_NAME}->{$col_name} = $orgcol;
1432 #
1433 #
1434 if ($table_name) {
1435 my $alias = $self->{tmp}->{is_table_alias}->{"\L$table_name"};
1436 #use mylibs; print "$table_name"; zwarn $self->{tmp};
1437 $table_name = $alias if defined $alias;
1438 $table_name = uc $table_name;
1439 $col_name = "$table_name.$col_name";
1440 #print "<<$col_name>>";
1441 }
1442 return $col_name;
1443 }
1444
1445 #########################################################
1446 # COLUMN NAME_LIST ::= <column_name> [,<column_name>...]
1447 #########################################################
1448 sub COLUMN_NAME_LIST {
1449 my $self = shift;
1450 my $col_str = shift;
1451 my @col_list = split ',',$col_str;
1452 if (!(scalar @col_list)) {
1453 return $self->do_err('Missing column name list!');
1454 }
1455 my @newcols;
1456 my $newcol;
1457 for my $col(@col_list) {
1458 $col =~ s/^\s+//;
1459 $col =~ s/\s+$//;
1460 # return undef if !($newcol = $self->COLUMN_NAME(trim($col)));
1461 return undef if !($newcol = $self->COLUMN_NAME($col));
1462 push @newcols, $newcol;
1463 }
1464 $self->{"struct"}->{"column_names"} = \@newcols;
1465 return 1;
1466 }
1467
1468
1469 #####################################################
1470 # TABLE_NAME_LIST := <table_name> [,<table_name>...]
1471 #####################################################
1472 sub TABLE_NAME_LIST {
1473 my $self = shift;
1474 my $table_name_str = shift;
1475 my %aliases = ();
1476 my @tables;
1477 my @table_names = split ',', $table_name_str;
1478 if ( scalar @table_names > 1
1479 and !$self->{"opts"}->{"valid_options"}->{'SELECT_MULTIPLE_TABLES'}
1480 ) {
1481 return $self->do_err('Dialect does not support multiple tables!');
1482 }
1483 my %is_table_alias;
1484 for my $table_str(@table_names) {
1485 my($table,$alias);
1486 my(@tstr) = split / /,$table_str;
1487 if (@tstr == 1) { $table = $tstr[0]; }
1488 elsif (@tstr == 2) { $table = $tstr[0]; $alias = $tstr[1]; }
1489 # elsif (@tstr == 2) { $table = $tstr[1]; $alias = $tstr[0]; }
1490 elsif (@tstr == 3) {
1491 return $self->do_err("Can't find alias in FROM clause!")
1492 unless uc($tstr[1]) eq 'AS';
1493 $table = $tstr[0]; $alias = $tstr[2];
1494 # $table = $tstr[2]; $alias = $tstr[0];
1495 }
1496 else {
1497 return $self->do_err("Can't find table names in FROM clause!")
1498 }
1499 return undef unless $self->TABLE_NAME($table);
1500 $table = $self->replace_quoted_ids($table);
1501 push @tables, $table;
1502 if ($alias) {
1503 #die $alias, $table;
1504 return undef unless $self->TABLE_NAME($alias);
1505 $alias = $self->replace_quoted_ids($alias);
1506 if ($alias =~ /^"/) {
1507 push @{$aliases{$table}},"$alias";
1508 $is_table_alias{"$alias"}=$table;
1509 }
1510 else {
1511 push @{$aliases{$table}},"\L$alias";
1512 $is_table_alias{"\L$alias"}=$table;
1513 }
1514 # $aliases{$alias} = $table;
1515 }
1516 }
1517
1518 # my %is_table_name = map { $_ => 1 } @tables,keys %aliases;
1519 my %is_table_name = map { lc $_ => 1 } @tables;
1520 #%is_table_alias = map { lc $_ => 1 } @aliases;
1521 $self->{"tmp"}->{"is_table_alias"} = \%is_table_alias;
1522 $self->{"tmp"}->{"is_table_name"} = \%is_table_name;
1523 $self->{"struct"}->{"table_names"} = \@tables;
1524 $self->{"struct"}->{"table_alias"} = \%aliases;
1525 $self->{"struct"}->{"multiple_tables"} = 1 if @tables > 1;
1526 return 1;
1527 }
1528
1529 #############################
1530 # TABLE_NAME := <identifier>
1531 #############################
1532 sub TABLE_NAME {
1533 my $self = shift;
1534 my $table_name = shift;
1535 if ($table_name =~ /\s*(\S+)\s+\S+/s) {
1536 return $self->do_err("Junk after table name '$1'!");
1537 }
1538 $table_name =~ s/\s+//s;
1539 if (!$table_name) {
1540 return $self->do_err('No table name specified!');
1541 }
1542 return $self->IDENTIFIER($table_name);
1543 # return undef if !($self->IDENTIFIER($table_name));
1544 # return 1;
1545 }
1546
1547
1548 ###################################################################
1549 # IDENTIFIER ::= <alphabetic_char> { <alphanumeric_char> | _ }...
1550 #
1551 # and must not be a reserved word or over 128 chars in length
1552 ###################################################################
1553 sub IDENTIFIER {
1554 my $self = shift;
1555 my $id = shift;
1556 if ($id =~ /^\?QI(.+)\?$/ ) {
1557 return 1;
1558 }
1559 return 1 if $id =~ /^".+?"$/s; # QUOTED IDENTIFIER
1560 my $err = "Bad table or column name '$id' "; # BAD CHARS
1561 if ($id !~ /\w|\./) {
1562 $err .= "has chars not alphanumeric or underscore!";
1563 return $self->do_err( $err );
1564 }
1565 if ($id =~ /^_/ or $id =~ /^\d/) { # BAD START
1566 $err .= "starts with non-alphabetic character!";
1567 return $self->do_err( $err );
1568 }
1569 if ( length $id > 128 ) { # BAD LENGTH
1570 $err .= "contains more than 128 characters!";
1571 return $self->do_err( $err );
1572 }
1573 $id = uc $id;
1574 #print "<$id>";
1575 #use mylibs; zwarn $self->{opts}->{reserved_words};
1576 #exit;
1577 if ( $self->{"opts"}->{"reserved_words"}->{$id} ) { # BAD RESERVED WORDS
1578 $err .= "is a SQL reserved word!";
1579 return $self->do_err( $err );
1580 }
1581 return 1;
1582 }
1583
1584 ########################################
1585 # PRIVATE METHODS AND UTILITY FUNCTIONS
1586 ########################################
1587 sub order_joins {
1588 my $self = shift;
1589 my $links = shift;
1590 for my $link(@$links) {
1591 if ($link !~ /\./) {
1592 return [];
1593 }
1594 }
1595 @$links = map { s/^(.+)\..*$/$1/; $1; } @$links;
1596 my @all_tables;
1597 my %relations;
1598 my %is_table;
1599 while (@$links) {
1600 my $t1 = shift @$links;
1601 my $t2 = shift @$links;
1602 return undef unless defined $t1 and defined $t2;
1603 push @all_tables, $t1 unless $is_table{$t1}++;
1604 push @all_tables, $t2 unless $is_table{$t2}++;
1605 $relations{$t1}{$t2}++;
1606 $relations{$t2}{$t1}++;
1607 }
1608 my @tables = @all_tables;
1609 my @order = shift @tables;
1610 my %is_ordered = ( $order[0] => 1 );
1611 my %visited;
1612 while(@tables) {
1613 my $t = shift @tables;
1614 my @rels = keys %{$relations{$t}};
1615 for my $t2(@rels) {
1616 next unless $is_ordered{$t2};
1617 push @order, $t;
1618 $is_ordered{$t}++;
1619 last;
1620 }
1621 if (!$is_ordered{$t}) {
1622 push @tables, $t if $visited{$t}++ < @all_tables;
1623 }
1624 }
1625 return $self->do_err(
1626 "Unconnected tables in equijoin statement!"
1627 ) if @order < @all_tables;
1628 return \@order;
1629 }
1630
1631 sub bless_me {
1632 my $class = shift;
1633 my $self = shift || {};
1634 return bless $self, $class;
1635 }
1636
1637 # PROVIDE BACKWARD COMPATIBILIT FOR JOCHEN'S FEATURE ATTRIBUTES TO NEW
1638 #
1639 #
1640 sub set_feature_flags {
1641 my($self,$select,$create) = @_;
1642 if (defined $select) {
1643 delete $self->{"select"};
1644 $self->{"opts"}->{"valid_options"}->{"SELECT_MULTIPLE_TABLES"} =
1645 $self->{"opts"}->{"select"}->{"join"} = $select->{"join"};
1646 }
1647 if (defined $create) {
1648 delete $self->{"create"};
1649 for my $key(keys %$create) {
1650 my $type = $key;
1651 $type =~ s/type_(.*)/\U$1/;
1652 $self->{"opts"}->{"valid_data_types"}->{"$type"} =
1653 $self->{"opts"}->{"create"}->{"$key"} = $create->{"$key"};
1654 }
1655 }
1656 }
1657
1658 sub clean_sql {
1659 my $self = shift;
1660 my $sql = shift;
1661 my $fields;
1662 my $i=-1;
1663 my $e = '\\';
1664 $e = quotemeta($e);
1665 ###new
1666 # CAN'T HANDLE BLOBS!!!
1667 # $sql = quotemeta($sql);
1668 # print "[$sql]\n";
1669 # if ($sql =~ s/^(.*,\s*)''(\s*[,\)].*)$/${1}NULL$2/g ) {
1670 # }
1671 # $sql =~ s/^([^\\']+?)''(.*)$/${1} NULL $2/g;
1672
1673 # $sql =~ s/([^\\]+?)''/$1 ~E~ /g;
1674
1675 # print "$sql\n";
1676 ###newend
1677
1678 # $sql =~ s~'(([^'$e]|$e.)+)'~push(@$fields,$1);$i++;"?$i?"~ge;
1679 $sql =~ s~'(([^'$e]|$e.|'')+)'~push(@$fields,$1);$i++;"?$i?"~ge;
1680
1681 # $sql =~ s/([^\\]+?)''/$1 ~E~ /g;
1682 #print "<$sql>";
1683 @$fields = map { s/''/\\'/g; $_ } @$fields;
1684
1685 ###new
1686 # if ( $sql =~ /'/) {
1687 if ( $sql =~ tr/[^\\]'// % 2 == 1 ) {
1688 ###endnew
1689 $sql =~ s/^.*\?(.+)$/$1/;
1690 die "Mismatched single quote before: '$sql\n";
1691 }
1692 if ($sql =~ /\?\?(\d)\?/) {
1693 $sql = $fields->[$1];
1694 die "Mismatched single quote: '$sql\n";
1695 }
1696 @$fields = map { s/$e'/'/g; s/^'(.*)'$/$1/; $_} @$fields;
1697 $self->{"struct"}->{"literals"} = $fields;
1698
1699 my $qids;
1700 $i=-1;
1701 $e = q/""/;
1702 # $sql =~ s~"(([^"$e]|$e.)+)"~push(@$qids,$1);$i++;"?QI$i?"~ge;
1703 $sql =~ s~"(([^"]|"")+)"~push(@$qids,$1);$i++;"?QI$i?"~ge;
1704 #@$qids = map { s/$e'/'/g; s/^'(.*)'$/$1/; $_} @$qids;
1705 $self->{"struct"}->{"quoted_ids"} = $qids if $qids;
1706
1707 # $sql =~ s~'(([^'\\]|\\.)+)'~push(@$fields,$1);$i++;"?$i?"~ge;
1708 # @$fields = map { s/\\'/'/g; s/^'(.*)'$/$1/; $_} @$fields;
1709 #print "$sql [@$fields]\n";# if $sql =~ /SELECT/;
1710
1711 ## before line 1511
1712 my $comment_re = $self->{"comment_re"};
1713 # if ( $sql =~ s/($comment_re)//gs) {
1714 # $self->{"comment"} = $1;
1715 # }
1716 if ( $sql =~ /(.*)$comment_re$/s) {
1717 $sql = $1;
1718 $self->{"comment"} = $2;
1719 }
1720 if ($sql =~ /^(.*)--(.*)(\n|$)/) {
1721 $sql = $1;
1722 $self->{"comment"} = $2;
1723 }
1724
1725 $sql =~ s/\n/ /g;
1726 $sql =~ s/\s+/ /g;
1727 $sql =~ s/(\S)\(/$1 (/g; # ensure whitespace before (
1728 $sql =~ s/\)(\S)/) $1/g; # ensure whitespace after )
1729 $sql =~ s/\(\s*/(/g; # trim whitespace after (
1730 $sql =~ s/\s*\)/)/g; # trim whitespace before )
1731 #
1732 # $sql =~ s/\s*\(/(/g; # trim whitespace before (
1733 # $sql =~ s/\)\s*/)/g; # trim whitespace after )
1734 for my $op( qw( = <> < > <= >= \|\|) ) {
1735 $sql =~ s/(\S)$op/$1 $op/g;
1736 $sql =~ s/$op(\S)/$op $1/g;
1737 }
1738 $sql =~ s/< >/<>/g;
1739 $sql =~ s/< =/<=/g;
1740 $sql =~ s/> =/>=/g;
1741 $sql =~ s/\s*,/,/g;
1742 $sql =~ s/,\s*/,/g;
1743 $sql =~ s/^\s+//;
1744 $sql =~ s/\s+$//;
1745 return $sql;
1746 }
1747
1748 sub trim {
1749 my $str = shift or return '';
1750 $str =~ s/^\s+//;
1751 $str =~ s/\s+$//;
1752 return $str;
1753 }
1754
1755 sub do_err {
1756 my $self = shift;
1757 my $err = shift;
1758 my $errtype = shift;
1759 my @c = caller 4;
1760 $err = "$err\n\n";
1761 # $err = $errtype ? "DIALECT ERROR: $err in $c[3]"
1762 # : "SQL ERROR: $err in $c[3]";
1763 $err = $errtype ? "DIALECT ERROR: $err"
1764 : "SQL ERROR: $err";
1765 $self->{"struct"}->{"errstr"} = $err;
1766 #$self->{"errstr"} = $err;
1767 warn $err if $self->{"PrintError"};
1768 die $err if $self->{"RaiseError"};
1769 return undef;
1770 }
1771
1772 1;
1773
1774 __END__
1775
1776 =pod
1777
1778 =head1 NAME
1779
1780 SQL::Parser -- validate, parse, or build SQL strings
1781
1782 =head1 SYNOPSIS
1783
1784 use SQL::Parser; # CREATE A PARSER OBJECT
1785 my $parser = SQL::Parser->new( $dialect, \%attrs );
1786
1787 my $success = $parser->parse( $sql_string ); # PARSE A SQL STRING &
1788 if ($success) { # DISPLAY RESULTING DATA
1789 use Data::Dumper; # STRUCTURE
1790 print Dumper $parser->structure;
1791 }
1792
1793 $parser->feature( $class, $name, $value ); # SET OR FIND STATUS OF
1794 my $has_feature = $parser->feature( $class, $name ); # A PARSER FEATURE
1795
1796 $parser->dialect( $dialect_name ); # SET OR FIND STATUS OF
1797 my $current_dialect = $parser->dialect; # A PARSER DIALECT
1798
1799 print $parser->errstr; # DISPLAY CURRENT ERROR
1800 # STRING
1801
1802
1803 =head1 DESCRIPTION
1804
1805 SQL::Parser is a parser, builder, and sytax validator for a
1806 small but useful subset of SQL (Structured Query Language). It
1807 accepts SQL strings and returns either a detailed error message
1808 if the syntax is invalid or a data structure containing the
1809 results of the parse if the syntax is valid. It will soon also
1810 work in reverse to build a SQL string from a supplied data
1811 structure.
1812
1813 The module can be used in batch mode to validate a series of
1814 statements, or as middle-ware for DBI drivers or other related
1815 projects. When combined with SQL::Statement version 0.2 or
1816 greater, the module can be used to actually perform the SQL
1817 commands on a variety of file formats using DBD::AnyData, or
1818 DBD::CSV, or DBD::Excel.
1819
1820 The module makes use of a variety of configuration files
1821 located in the SQL/Dialects directory, each of which is
1822 essentially a simple text file listing things like supported
1823 data types, reserved words, and other features specific to a
1824 given dialect of SQL. These features can also be turned on or
1825 off during program execution.
1826
1827 =head1 SUPPORTED SQL SYNTAX
1828
1829 This module is meant primarly as a base class for DBD drivers
1830 and as such concentrates on a small but useful subset of SQL 92.
1831 It does *not* in any way pretend to be a complete SQL 92 parser.
1832 The module will continue to add new supported syntax, currently,
1833 this is what is supported:
1834
1835 =head2 CREATE TABLE
1836
1837 CREATE [ {LOCAL|GLOBAL} TEMPORARY ] TABLE $table
1838 (
1839 $col_1 $col_type1 $col_constraints1,
1840 ...,
1841 $col_N $col_typeN $col_constraintsN,
1842 )
1843 [ ON COMMIT {DELETE|PRESERVE} ROWS ]
1844
1845 * col_type must be a valid data type as defined in the
1846 "valid_data_types" section of the dialect file for the
1847 current dialect
1848
1849 * col_constriaints may be "PRIMARY KEY" or one or both of
1850 "UNIQUE" and/or "NOT NULL"
1851
1852 * IMPORTANT NOTE: temporary tables, data types and column
1853 constraints are checked for syntax violations but are
1854 currently otherwise *IGNORED* -- they are recognized by
1855 the parser, but not by the execution engine
1856
1857 * The following valid ANSI SQL92 options are not currently
1858 supported: table constraints, named constraints, check
1859 constriants, reference constraints, constraint
1860 attributes, collations, default clauses, domain names as
1861 data types
1862
1863 =head2 DROP TABLE
1864
1865 DROP TABLE $table [ RESTRICT | CASCADE ]
1866
1867 * IMPORTANT NOTE: drop behavior (cascade or restrict) is
1868 checked for valid syntax but is otherwise *IGNORED* -- it
1869 is recognized by the parser, but not by the execution
1870 engine
1871
1872 =head2 INSERT INTO
1873
1874 INSERT INTO $table [ ( $col1, ..., $colN ) ] VALUES ( $val1, ... $valN )
1875
1876 * default values are not currently supported
1877 * inserting from a subquery is not currently supported
1878
1879 =head2 DELETE FROM
1880
1881 DELETE FROM $table [ WHERE search_condition ]
1882
1883 * see "search_condition" below
1884
1885 =head2 UPDATE
1886
1887 UPDATE $table SET $col1 = $val1, ... $colN = $valN [ WHERE search_condition ]
1888
1889 * default values are not currently supported
1890 * see "search_condition" below
1891
1892 =head2 SELECT
1893
1894 SELECT select_clause
1895 FROM from_clause
1896 [ WHERE search_condition ]
1897 [ ORDER BY $ocol1 [ASC|DESC], ... $ocolN [ASC|DESC] ]
1898 [ LIMIT [start,] length ]
1899
1900 * select clause ::=
1901 [DISTINCT|ALL] *
1902 | [DISTINCT|ALL] col1 [,col2, ... colN]
1903 | set_function1 [,set_function2, ... set_functionN]
1904
1905 * set function ::=
1906 COUNT ( [DISTINCT|ALL] * )
1907 | COUNT | MIN | MAX | AVG | SUM ( [DISTINCT|ALL] col_name )
1908
1909 * from clause ::=
1910 table1 [, table2, ... tableN]
1911 | table1 NATURAL [join_type] JOIN table2
1912 | table1 [join_type] table2 USING (col1,col2, ... colN)
1913 | table1 [join_type] JOIN table2 ON table1.colA = table2.colB
1914
1915 * join type ::=
1916 INNER
1917 | [OUTER] LEFT | RIGHT | FULL
1918
1919 * if join_type is not specified, INNER is the default
1920 * if DISTINCT or ALL is not specified, ALL is the default
1921 * if start position is omitted from LIMIT clause, position 0 is
1922 the default
1923 * ON clauses may only contain equal comparisons and AND combiners
1924 * self-joins are not currently supported
1925 * if implicit joins are used, the WHERE clause must contain
1926 and equijoin condition for each table
1927
1928
1929 =head2 SEARCH CONDITION
1930
1931 [NOT] $val1 $op1 $val1 [ ... AND|OR $valN $opN $valN ]
1932
1933
1934 =head2 OPERATORS
1935
1936 $op = | <> | < | > | <= | >=
1937 | IS NULL | IS NOT NULL | LIKE | CLIKE | BETWEEN | IN
1938
1939 The "CLIKE" operator works exactly the same as the "LIKE"
1940 operator, but is case insensitive. For example:
1941
1942 WHERE foo LIKE 'bar%' # succeeds if foo is "barbaz"
1943 # fails if foo is "BARBAZ" or "Barbaz"
1944
1945 WHERE foo CLIKE 'bar%' # succeeds for "barbaz", "Barbaz", and "BARBAZ"
1946
1947
1948 =head2 STRING FUNCTIONS & MATH EXPRESSIONS
1949
1950 String functions and math expressions are supported in WHERE
1951 clauses, in the VALUES part of an INSERT and UPDATE
1952 statements. They are not currently supported in the SELECT
1953 statement. For example:
1954
1955 SELECT * FROM foo WHERE UPPER(bar) = 'baz' # SUPPORTED
1956
1957 SELECT UPPER(foo) FROM bar # NOT SUPPORTED
1958
1959 =over
1960
1961 =item TRIM ( [ [LEADING|TRAILING|BOTH] ['trim_char'] FROM ] string )
1962
1963 Removes all occurrences of <trim_char> from the front, back, or
1964 both sides of a string.
1965
1966 BOTH is the default if neither LEADING nor TRAILING is specified.
1967
1968 Space is the default if no trim_char is specified.
1969
1970 Examples:
1971
1972 TRIM( string )
1973 trims leading and trailing spaces from string
1974
1975 TRIM( LEADING FROM str )
1976 trims leading spaces from string
1977
1978 TRIM( 'x' FROM str )
1979 trims leading and trailing x's from string
1980
1981 =item SUBSTRING( string FROM start_pos [FOR length] )
1982
1983 Returns the substring starting at start_pos and extending for
1984 "length" character or until the end of the string, if no
1985 "length" is supplied. Examples:
1986
1987 SUBSTRING( 'foobar' FROM 4 ) # returns "bar"
1988
1989 SUBSTRING( 'foobar' FROM 4 FOR 2) # returns "ba"
1990
1991
1992 =item UPPER(string) and LOWER(string)
1993
1994 These return the upper-case and lower-case variants of the string:
1995
1996 UPPER('foo') # returns "FOO"
1997 LOWER('FOO') # returns "foo"
1998
1999 =back
2000
2001 =head2 Identifiers (table & column names)
2002
2003 Regular identifiers (table and column names *without* quotes around them) are case INSENSITIVE so column foo, fOo, FOO all refer to the same column.
2004
2005 Delimited identifiers (table and column names *with* quotes around them) are case SENSITIVE so column "foo", "fOo", "FOO" each refer to different columns.
2006
2007 A delimited identifier is *never* equal to a regular identifer (so "foo" and foo are two different columns). But don't do that :-).
2008
2009 Remember thought that, in DBD::CSV if table names are used directly as file names, the case sensitivity depends on the OS e.g. on Windows files named foo, FOO, and fOo are the same as each other while on Unix they are different.
2010
2011
2012 =head1 METHODS
2013
2014 =head2 new()
2015
2016 The new() method creates a SQL::Parser object which can then be
2017 used to parse, validate, or build SQL strings. It takes one
2018 required parameter -- the name of the SQL dialect that will
2019 define the rules for the parser. A second optional parameter is
2020 a reference to a hash which can contain additional attributes of
2021 the parser.
2022
2023 use SQL::Parser;
2024 my $parser = SQL::Parser->new( $dialect_name, \%attrs );
2025
2026 The dialect_name parameter is a string containing any valid
2027 dialect such as 'ANSI', 'AnyData', or 'CSV'. See the section on
2028 the dialect() method below for details.
2029
2030 The attribute parameter is a reference to a hash that can
2031 contain error settings for the PrintError and RaiseError
2032 attributes. See the section below on the parse() method for
2033 details.
2034
2035 An example:
2036
2037 use SQL::Parser;
2038 my $parser = SQL::Parser->new('AnyData', {RaiseError=>1} );
2039
2040 This creates a new parser that uses the grammar rules
2041 contained in the .../SQL/Dialects/AnyData.pm file and which
2042 sets the RaiseError attribute to true.
2043
2044 For those needing backwards compatibility with SQL::Statement
2045 version 0.1x and lower, the attribute hash may also contain
2046 feature settings. See the section "FURTHER DETAILS - Backwards
2047 Compatibility" below for details.
2048
2049
2050 =head2 parse()
2051
2052 Once a SQL::Parser object has been created with the new()
2053 method, the parse() method can be used to parse any number of
2054 SQL strings. It takes a single required parameter -- a string
2055 containing a SQL command. The SQL string may optionally be
2056 terminated by a semicolon. The parse() method returns a true
2057 value if the parse is successful and a false value if the parse
2058 finds SQL syntax errors.
2059
2060 Examples:
2061
2062 1) my $success = $parser->parse('SELECT * FROM foo');
2063
2064 2) my $sql = 'SELECT * FROM foo';
2065 my $success = $parser->parse( $sql );
2066
2067 3) my $success = $parser->parse(qq!
2068 SELECT id,phrase
2069 FROM foo
2070 WHERE id < 7
2071 AND phrase <> 'bar'
2072 ORDER BY phrase;
2073 !);
2074
2075 4) my $success = $parser->parse('SELECT * FRoOM foo ');
2076
2077 In examples #1,#2, and #3, the value of $success will be true
2078 because the strings passed to the parse() method are valid SQL
2079 strings.
2080
2081 In example #4, however, the value of $success will be false
2082 because the string contains a SQL syntax error ('FRoOM' instead
2083 of 'FROM').
2084
2085 In addition to checking the return value of parse() with a
2086 variable like $success, you may use the PrintError and
2087 RaiseError attributes as you would in a DBI script:
2088
2089 * If PrintError is true, then SQL syntax errors will be sent as
2090 warnings to STDERR (i.e. to the screen or to a file if STDERR
2091 has been redirected). This is set to true by default which
2092 means that unless you specifically turn it off, all errors
2093 will be reported.
2094
2095 * If RaiseError is true, then SQL syntax errors will cause the
2096 script to die, (i.e. the script will terminate unless wrapped
2097 in an eval). This is set to false by default which means
2098 that unless you specifically turn it on, scripts will
2099 continue to operate even if there are SQL syntax errors.
2100
2101 Basically, you should leave PrintError on or else you will not
2102 be warned when an error occurs. If you are simply validating a
2103 series of strings, you will want to leave RaiseError off so that
2104 the script can check all strings regardless of whether some of
2105 them contain SQL errors. However, if you are going to try to
2106 execute the SQL or need to depend that it is correct, you should
2107 set RaiseError on so that the program will only continue to
2108 operate if all SQL strings use correct syntax.
2109
2110 IMPORTANT NOTE #1: The parse() method only checks syntax, it
2111 does NOT verify if the objects listed actually exist. For
2112 example, given the string "SELECT model FROM cars", the parse()
2113 method will report that the string contains valid SQL but that
2114 will not tell you whether there actually is a table called
2115 "cars" or whether that table contains a column called 'model'.
2116 Those kinds of verifications can be performed by the
2117 SQL::Statement module, not by SQL::Parser by itself.
2118
2119 IMPORTANT NOTE #2: The parse() method uses rules as defined by
2120 the selected dialect configuration file and the feature()
2121 method. This means that a statement that is valid in one
2122 dialect may not be valid in another. For example the 'CSV' and
2123 'AnyData' dialects define 'BLOB' as a valid data type but the
2124 'ANSI' dialect does not. Therefore the statement 'CREATE TABLE
2125 foo (picture BLOB)' would be valid in the first two dialects but
2126 would produce a syntax error in the 'ANSI' dialect.
2127
2128 =head2 structure()
2129
2130 After a SQL::Parser object has been created and the parse()
2131 method used to parse a SQL string, the structure() method
2132 returns the data structure of that string. This data structure
2133 may be passed on to other modules (e.g. SQL::Statement) or it
2134 may be printed out using, for example, the Data::Dumper module.
2135
2136 The data structure contains all of the information in the SQL
2137 string as parsed into its various components. To take a simple
2138 example:
2139
2140 $parser->parse('SELECT make,model FROM cars');
2141 use Data::Dumper;
2142 print Dumper $parser->structure;
2143
2144 Would produce:
2145
2146 $VAR1 = {
2147 'column_names' => [
2148 'make',
2149 'model'
2150 ],
2151 'command' => 'SELECT',
2152 'table_names' => [
2153 'cars'
2154 ]
2155 };
2156
2157 Please see the section "FURTHER DETAILS -- Parse structures"
2158 below for further examples.
2159
2160 =head2 build()
2161
2162 This method is in progress and should be available soon.
2163
2164 =head2 dialect()
2165
2166 $parser->dialect( $dialect_name ); # load a dialect configuration file
2167 my $dialect = $parser->dialect; # get the name of the current dialect
2168
2169 For example:
2170
2171 $parser->dialect('AnyData'); # loads the AnyData config file
2172 print $parser->dialect; # prints 'AnyData'
2173
2174 The $dialect_name parameter may be the name of any dialect
2175 configuration file on your system. Use the
2176 $parser->list('dialects') method to see a list of available
2177 dialects. At a minimum it will include "ANSI", "CSV", and
2178 "AnyData". For backwards compatiblity 'Ansi' is accepted as a
2179 synonym for 'ANSI', otherwise the names are case sensitive.
2180
2181 Loading a new dialect configuration file erases all current
2182 parser features and resets them to those defined in the
2183 configuration file.
2184
2185 See the section above on "Dialects" for details of these
2186 configuration files.
2187
2188 =head2 feature()
2189
2190 Features define the rules to be used by a specific parser
2191 instance. They are divided into the following classes:
2192
2193 * valid_commands
2194 * valid_options
2195 * valid_comparison_operators
2196 * valid_data_types
2197 * reserved_words
2198
2199 Within each class a feature name is either enabled or
2200 disabled. For example, under "valid_data_types" the name "BLOB"
2201 may be either disabled or enabled. If it is not eneabled
2202 (either by being specifically disabled, or simply by not being
2203 specified at all) then any SQL string using "BLOB" as a data
2204 type will throw a syntax error "Invalid data type: 'BLOB'".
2205
2206 The feature() method allows you to enable, disable, or check the
2207 status of any feature.
2208
2209 $parser->feature( $class, $name, 1 ); # enable a feature
2210
2211 $parser->feature( $class, $name, 0 ); # disable a feature
2212
2213 my $feature = $parser->feature( $class, $name ); # show status of a feature
2214
2215 For example:
2216
2217 $parser->feature('reserved_words','FOO',1); # make 'FOO' a reserved word
2218
2219 $parser->feature('valid_data_types','BLOB',0); # disallow 'BLOB' as a
2220 # data type
2221
2222 # determine if the LIKE
2223 # operator is supported
2224 my $LIKE = $parser->feature('valid_operators','LIKE');
2225
2226 See the section below on "Backwards Compatibility" for use of
2227 the feature() method with SQL::Statement 0.1x style parameters.
2228
2229 =head2 list()
2230
2231 =head2 errstr()
2232
2233 =head1 FURTHER DETAILS
2234
2235 =head2 Dialect Configuration Files
2236
2237 These will change completely when Tim finalizes the DBI get_info method.
2238
2239 =head2 Parse Structures
2240
2241 Here are some further examples of the data structures returned
2242 by the structure() method after a call to parse(). Only
2243 specific details are shown for each SQL instance, not the entire
2244 struture.
2245
2246 'SELECT make,model, FROM cars'
2247
2248 command => 'SELECT',
2249 table_names => [ 'cars' ],
2250 column_names => [ 'make', 'model' ],
2251
2252 'CREATE TABLE cars ( id INTEGER, model VARCHAR(40) )'
2253
2254 column_defs => {
2255 id => { data_type => INTEGER },
2256 model => { data_type => VARCHAR(40) },
2257 },
2258
2259 'SELECT DISTINCT make FROM cars'
2260
2261 set_quantifier => 'DISTINCT',
2262
2263 'SELECT MAX (model) FROM cars'
2264
2265 set_function => {
2266 name => 'MAX',
2267 arg => 'models',
2268 },
2269
2270 'SELECT * FROM cars LIMIT 5,10'
2271
2272 limit_clause => {
2273 offset => 5,
2274 limit => 10,
2275 },
2276
2277 'SELECT * FROM vars ORDER BY make, model DESC'
2278
2279 sort_spec_list => [
2280 { make => 'ASC' },
2281 { model => 'DESC' },
2282 ],
2283
2284 "INSERT INTO cars VALUES ( 7, 'Chevy', 'Impala' )"
2285
2286 values => [ 7, 'Chevy', 'Impala' ],
2287
2288
2289 =head2 Backwards Compatibility
2290
2291 This module can be used in conjunction with SQL::Statement,
2292 version 0.2 and higher. Earlier versions of SQL::Statement
2293 included a SQL::Parser as a submodule that used slightly
2294 different syntax than the current version. The current version
2295 supports all of this earlier syntax although new users are
2296 encouraged to use the new syntax listed above. If the syntax
2297 listed below is used, the module should be able to be subclassed
2298 exactly as it was with the older SQL::Statement versions and
2299 will therefore not require any modules or scripts that used it
2300 to make changes.
2301
2302 In the old style, features of the parser were accessed with this
2303 syntax:
2304
2305 feature('create','type_blob',1); # allow BLOB as a data type
2306 feature('create','type_blob',0); # disallow BLOB as a data type
2307 feature('select','join',1); # allow multi-table statements
2308
2309 The same settings could be acheieved in calls to new:
2310
2311 my $parser = SQL::Parser->new(
2312 'Ansi',
2313 {
2314 create => {type_blob=>1},
2315 select => {join=>1},
2316 },
2317 );
2318
2319 Both of these styles of setting features are supported in the
2320 current SQL::Parser.
2321
2322 =head1 ACKNOWLEDGEMENTS
2323
2324 *Many* thanks to Ilya Sterin who wrote most of code for the
2325 build() method and who assisted on the parentheses parsing code
2326 and who proved a great deal of support, advice, and testing
2327 throughout the development of the module.
2328
2329 =head1 AUTHOR & COPYRIGHT
2330
2331 This module is copyright (c) 2001 by Jeff Zucker.
2332 All rights reserved.
2333
2334 The module may be freely distributed under the same terms as
2335 Perl itself using either the "GPL License" or the "Artistic
2336 License" as specified in the Perl README file.
2337
2338 Jeff can be reached at: jeff@vpservices.com.
2339
2340 =cut

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed