/[cvs]/nfo/perl/libs/SQL/Parser.pm
ViewVC logotype

Contents of /nfo/perl/libs/SQL/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations)
Wed Nov 13 21:24:24 2002 UTC (21 years, 6 months ago) by joko
Branch: MAIN
Changes since 1.1: +1 -1 lines
+ from DBD::CSV - todo:
   [...] Currently it is not possible to use files with names like names.csv. [...]
   The fix should remove this restriction. I hope it doesn't break other things.

1 ######################################################################
2 package SQL::Parser;
3 ######################################################################
4 #
5 # This module is copyright (c), 2001 by Jeff Zucker.
6 # All rights resered.
7 # It may be freely distributed under the same terms as Perl itself.
8 # See below for help and copyright information (search for SYNOPSIS).
9 #
10 ######################################################################
11
12 use strict;
13 use vars qw($VERSION);
14
15 $VERSION = '1.004';
16
17
18 #############################
19 # PUBLIC METHODS
20 #############################
21
22 sub new {
23 my $class = shift;
24 my $dialect = shift || 'ANSI';
25 $dialect = 'ANSI' if uc $dialect eq 'ANSI';
26 $dialect = 'AnyData' if uc $dialect eq 'ANYDATA' or uc $dialect eq 'CSV';
27 # $dialect = 'CSV' if uc $dialect eq 'CSV';
28 if ($dialect eq 'SQL::Eval') {
29 $dialect = 'AnyData';
30 }
31 my $flags = shift || {};
32 $flags->{"dialect"} = $dialect;
33 $flags->{"PrintError"} = 1 unless defined $flags->{"PrintError"};
34 my $self = bless_me($class,$flags);
35 $self->dialect( $self->{"dialect"} );
36 $self->set_feature_flags($self->{"select"},$self->{"create"});
37 return bless $self,$class;
38 }
39
40 sub parse {
41 my $self = shift;
42 my $sql = shift;
43 #printf "<%s>", $self->{dialect_set};
44 $self->dialect( $self->{"dialect"} ) unless $self->{"dialect_set"};
45 $sql =~ s/^\s+//;
46 $sql =~ s/\s+$//;
47 $self->{"struct"} = {};
48 $self->{"tmp"} = {};
49 $self->{"original_string"} = $sql;
50
51 ################################################################
52 #
53 # COMMENTS
54
55 # C-STYLE
56 #
57 my $comment_re = $self->{"comment_re"} || '(\/\*.*?\*\/)';
58 $self->{"comment_re"} = $comment_re;
59 my $starts_with_comment;
60 if ($sql =~ /^\s*$comment_re(.*)$/s) {
61 $self->{"comment"} = $1;
62 $sql = $2;
63 $starts_with_comment=1;
64 }
65 # SQL STYLE
66 #
67 if ($sql =~ /^\s*--(.*)(\n|$)/) {
68 $self->{"comment"} = $1;
69 return 1;
70 }
71 ################################################################
72
73 $sql = $self->clean_sql($sql);
74 my($com) = $sql =~ /^\s*(\S+)\s+/s ;
75 if (!$com) {
76 return 1 if $starts_with_comment;
77 return $self->do_err("Incomplete statement!");
78 }
79 $com = uc $com;
80 if ($self->{"opts"}->{"valid_commands"}->{$com}) {
81 my $rv = $self->$com($sql);
82 delete $self->{"struct"}->{"literals"};
83 # return $self->do_err("No table names found!")
84 # unless $self->{"struct"}->{"table_names"};
85 return $self->do_err("No command found!")
86 unless $self->{"struct"}->{"command"};
87 if ( $self->{"struct"}->{"join"}
88 and scalar keys %{$self->{"struct"}->{"join"}}==0
89 ) {
90 delete $self->{"struct"}->{"join"};
91 }
92 $self->replace_quoted_ids();
93 return $rv;
94 }
95 else {
96 return $self->do_err("Command '$com' not recognized or not supported!");
97 }
98 }
99
100 sub replace_quoted_ids {
101 my $self = shift;
102 my $id = shift;
103 return $id unless $self->{struct}->{quoted_ids};
104 if ($id) {
105 if ($id =~ /^\?QI(\d+)\?$/) {
106 return '"'.$self->{struct}->{quoted_ids}->[$1].'"';
107 }
108 else {
109 return $id;
110 }
111 }
112 my @tables = @{$self->{struct}->{table_names}};
113 for my $t(@tables) {
114 if ($t =~ /^\?QI(.+)\?$/ ) {
115 $t = '"'.$self->{struct}->{quoted_ids}->[$1].'"';
116 # $t = $self->{struct}->{quoted_ids}->[$1];
117 }
118 }
119 $self->{struct}->{table_names} = \@tables;
120 delete $self->{struct}->{quoted_ids};
121 }
122
123 sub structure { shift->{"struct"} }
124 sub command { my $x = shift->{"struct"}->{command} || '' }
125
126 sub feature {
127 my($self,$opt_class,$opt_name,$opt_value) = @_;
128 if (defined $opt_value) {
129 if ( $opt_class eq 'select' ) {
130 $self->set_feature_flags( {"join"=>$opt_value} );
131 }
132 elsif ( $opt_class eq 'create' ) {
133 $self->set_feature_flags( undef, {$opt_name=>$opt_value} );
134 }
135 else {
136 $self->{$opt_class}->{$opt_name} = $opt_value;
137 }
138 }
139 else {
140 return $self->{"opts"}->{$opt_class}->{$opt_name};
141 }
142 }
143
144 sub errstr { shift->{"struct"}->{"errstr"} }
145
146 sub list {
147 my $self = shift;
148 my $com = uc shift;
149 return () if $com !~ /COMMANDS|RESERVED|TYPES|OPS|OPTIONS|DIALECTS/i;
150 $com = 'valid_commands' if $com eq 'COMMANDS';
151 $com = 'valid_comparison_operators' if $com eq 'OPS';
152 $com = 'valid_data_types' if $com eq 'TYPES';
153 $com = 'valid_options' if $com eq 'OPTIONS';
154 $com = 'reserved_words' if $com eq 'RESERVED';
155 $self->dialect( $self->{"dialect"} ) unless $self->{"dialect_set"};
156
157 return sort keys %{ $self->{"opts"}->{$com} } unless $com eq 'DIALECTS';
158 my $dDir = "SQL/Dialects";
159 my @dialects;
160 for my $dir(@INC) {
161 local *D;
162
163 if ( opendir(D,"$dir/$dDir") ) {
164 @dialects = grep /.*\.pm$/, readdir(D);
165 last;
166 }
167 }
168 @dialects = map { s/\.pm$//; $_} @dialects;
169 return @dialects;
170 }
171
172 sub dialect {
173 my($self,$dialect) = @_;
174 return $self->{"dialect"} unless $dialect;
175 return $self->{"dialect"} if $self->{dialect_set};
176 $self->{"opts"} = {};
177 my $mod = "SQL/Dialects/$dialect.pm";
178 undef $@;
179 eval {
180 require "$mod";
181 };
182 return $self->do_err($@) if $@;
183 $mod =~ s/\.pm//;
184 $mod =~ s"/"::"g;
185 my @data = split /\n/, $mod->get_config;
186 my $feature;
187 for (@data) {
188 chomp;
189 s/^\s+//;
190 s/\s+$//;
191 next unless $_;
192 if (/^\[(.*)\]$/i) {
193 $feature = lc $1;
194 $feature =~ s/\s+/_/g;
195 next;
196 }
197 my $newopt = uc $_;
198 $newopt =~ s/\s+/ /g;
199 $self->{"opts"}->{$feature}->{$newopt} = 1;
200 }
201 $self->{"dialect"} = $dialect;
202 $self->{"dialect_set"}++;
203 }
204
205 ##################################################################
206 # SQL COMMANDS
207 ##################################################################
208
209 ####################################################
210 # DROP TABLE <table_name>
211 ####################################################
212 sub DROP {
213 my $self = shift;
214 my $stmt = shift;
215 my $table_name;
216 $self->{"struct"}->{"command"} = 'DROP';
217 if ($stmt =~ /^\s*DROP\s+(\S+)\s+(.+)$/si ) {
218 my $com2 = $1 || '';
219 $table_name = $2;
220 if ($com2 !~ /^TABLE$/i) {
221 return $self->do_err(
222 "The command 'DROP $com2' is not recognized or not supported!"
223 );
224 }
225 $table_name =~ s/^\s+//;
226 $table_name =~ s/\s+$//;
227 if ( $table_name =~ /(\S+) (RESTRICT|CASCADE)/i) {
228 $table_name = $1;
229 $self->{"struct"}->{"drop_behavior"} = uc $2;
230 }
231 }
232 else {
233 return $self->do_err( "Incomplete DROP statement!" );
234
235 }
236 return undef unless $self->TABLE_NAME($table_name);
237 $table_name = $self->replace_quoted_ids($table_name);
238 $self->{"tmp"}->{"is_table_name"} = {$table_name => 1};
239 $self->{"struct"}->{"table_names"} = [$table_name];
240 return 1;
241 }
242
243 ####################################################
244 # DELETE FROM <table_name> WHERE <search_condition>
245 ####################################################
246 sub DELETE {
247 my($self,$str) = @_;
248 $self->{"struct"}->{"command"} = 'DELETE';
249 my($table_name,$where_clause) = $str =~
250 /^DELETE FROM (\S+)(.*)$/i;
251 return $self->do_err(
252 'Incomplete DELETE statement!'
253 ) if !$table_name;
254 return undef unless $self->TABLE_NAME($table_name);
255 $self->{"tmp"}->{"is_table_name"} = {$table_name => 1};
256 $self->{"struct"}->{"table_names"} = [$table_name];
257 $self->{"struct"}->{"column_names"} = ['*'];
258 $where_clause =~ s/^\s+//;
259 $where_clause =~ s/\s+$//;
260 if ($where_clause) {
261 $where_clause =~ s/^WHERE\s*(.*)$/$1/i;
262 return undef unless $self->SEARCH_CONDITION($where_clause);
263 }
264 return 1;
265 }
266
267 ##############################################################
268 # SELECT
269 ##############################################################
270 # SELECT [<set_quantifier>] <select_list>
271 # | <set_function_specification>
272 # FROM <from_clause>
273 # [WHERE <search_condition>]
274 # [ORDER BY <order_by_clause>]
275 # [LIMIT <limit_clause>]
276 ##############################################################
277
278 sub SELECT {
279 my($self,$str) = @_;
280 $self->{"struct"}->{"command"} = 'SELECT';
281 my($from_clause,$where_clause,$order_clause,$limit_clause);
282 $str =~ s/^SELECT (.+)$/$1/i;
283 if ( $str =~ s/^(.+) LIMIT (.+)$/$1/i ) { $limit_clause = $2; }
284 if ( $str =~ s/^(.+) ORDER BY (.+)$/$1/i ) { $order_clause = $2; }
285 if ( $str =~ s/^(.+?) WHERE (.+)$/$1/i ) { $where_clause = $2; }
286 if ( $str =~ s/^(.+?) FROM (.+)$/$1/i ) { $from_clause = $2; }
287 else {
288 return $self->do_err("Couldn't find FROM clause in SELECT!");
289 }
290 return undef unless $self->FROM_CLAUSE($from_clause);
291 return undef unless $self->SELECT_CLAUSE($str);
292 if ($where_clause) {
293 return undef unless $self->SEARCH_CONDITION($where_clause);
294 }
295 if ($order_clause) {
296 return undef unless $self->SORT_SPEC_LIST($order_clause);
297 }
298 if ($limit_clause) {
299 return undef unless $self->LIMIT_CLAUSE($limit_clause);
300 }
301 if ( ( $self->{"struct"}->{"join"}->{"clause"}
302 and $self->{"struct"}->{"join"}->{"clause"} eq 'ON'
303 )
304 or ( $self->{"struct"}->{"multiple_tables"}
305 ###new
306 and !(scalar keys %{$self->{"struct"}->{"join"}})
307 # and !$self->{"struct"}->{"join"}
308 ###
309 ) ) {
310 return undef unless $self->IMPLICIT_JOIN();
311 }
312 return 1;
313 }
314
315 sub IMPLICIT_JOIN {
316 my $self = shift;
317 delete $self->{"struct"}->{"multiple_tables"};
318 if ( !$self->{"struct"}->{"join"}->{"clause"}
319 or $self->{"struct"}->{"join"}->{"clause"} ne 'ON'
320 ) {
321 $self->{"struct"}->{"join"}->{"type"} = 'INNER';
322 $self->{"struct"}->{"join"}->{"clause"} = 'IMPLICIT';
323 }
324 if (defined $self->{"struct"}->{"keycols"} ) {
325 my @keys;
326 my @keys2 = @keys = @{ $self->{"struct"}->{"keycols"} };
327 $self->{"struct"}->{"join"}->{"table_order"} = $self->order_joins(\@keys2);
328 @{$self->{"struct"}->{"join"}->{"keycols"}} = @keys;
329 delete $self->{"struct"}->{"keycols"};
330 }
331 else {
332 return $self->do_err("No equijoin condition in WHERE or ON clause");
333 }
334 return 1;
335 }
336
337 sub EXPLICIT_JOIN {
338 my $self = shift;
339 my $remainder = shift;
340 return undef unless $remainder;
341 my($tableA,$tableB,$keycols,$jtype,$natural);
342 if ($remainder =~ /^(.+?) (NATURAL|INNER|LEFT|RIGHT|FULL|UNION|JOIN)(.+)$/s){
343 $tableA = $1;
344 $remainder = $2.$3;
345 }
346 else {
347 ($tableA,$remainder) = $remainder =~ /^(\S+) (.*)/;
348 }
349 if ( $remainder =~ /^NATURAL (.+)/) {
350 $self->{"struct"}->{"join"}->{"clause"} = 'NATURAL';
351 $natural++;
352 $remainder = $1;
353 }
354 if ( $remainder =~
355 /^(INNER|LEFT|RIGHT|FULL|UNION) JOIN (.+)/
356 ) {
357 $jtype = $self->{"struct"}->{"join"}->{"clause"} = $1;
358 $remainder = $2;
359 $jtype = "$jtype OUTER" if $jtype !~ /INNER|UNION/;
360 }
361 if ( $remainder =~
362 /^(LEFT|RIGHT|FULL) OUTER JOIN (.+)/
363 ) {
364 $jtype = $self->{"struct"}->{"join"}->{"clause"} = $1 . " OUTER";
365 $remainder = $2;
366 }
367 if ( $remainder =~ /^JOIN (.+)/) {
368 $jtype = 'INNER';
369 $self->{"struct"}->{"join"}->{"clause"} = 'DEFAULT INNER';
370 $remainder = $1;
371 }
372 if ( $self->{"struct"}->{"join"} ) {
373 if ( $remainder && $remainder =~ /^(.+?) USING \(([^\)]+)\)(.*)/) {
374 $self->{"struct"}->{"join"}->{"clause"} = 'USING';
375 $tableB = $1;
376 my $keycolstr = $2;
377 $remainder = $3;
378 @$keycols = split /,/,$keycolstr;
379 }
380 if ( $remainder && $remainder =~ /^(.+?) ON (.+)/) {
381 $self->{"struct"}->{"join"}->{"clause"} = 'ON';
382 $tableB = $1;
383 my $keycolstr = $2;
384 $remainder = $3;
385 if ($keycolstr =~ / OR /i ) {
386 return $self->do_err(qq~Can't use OR in an ON clause!~,1);
387 }
388 @$keycols = split / AND /i,$keycolstr;
389 $self->{"tmp"}->{"is_table_name"}->{"$tableA"} = 1;
390 $self->{"tmp"}->{"is_table_name"}->{"$tableB"} = 1;
391 for (@$keycols) {
392 my($arg1,$arg2) = split / = /;
393 return undef unless $arg1 = $self->ROW_VALUE($arg1);
394 return undef unless $arg2 = $self->ROW_VALUE($arg2);
395 if ( $arg1->{"type"}eq 'column' and $arg2->{"type"}eq 'column'){
396 push @{ $self->{"struct"}->{"keycols"} }, $arg1->{"value"};
397 push @{ $self->{"struct"}->{"keycols"} }, $arg2->{"value"};
398 delete $self->{"struct"}->{"where_clause"};
399 }
400 }
401 }
402 elsif ($remainder =~ /^(.+?)$/i) {
403 $tableB = $1;
404 $remainder = $2;
405 }
406 $remainder =~ s/^\s+// if $remainder;
407 }
408 if ($jtype) {
409 $jtype = "NATURAL $jtype" if $natural;
410 if ($natural and $keycols) {
411 return $self->do_err(
412 qq~Can't use NATURAL with a USING or ON clause!~
413 );
414 }
415 return undef unless $self->TABLE_NAME_LIST("$tableA,$tableB");
416 $self->{"struct"}->{"join"}->{"type"} = $jtype;
417 $self->{"struct"}->{"join"}->{"keycols"} = $keycols if $keycols;
418 return 1;
419 }
420 return $self->do_err("Couldn't parse explicit JOIN!");
421 }
422
423 sub SELECT_CLAUSE {
424 my($self,$str) = @_;
425 return undef unless $str;
426 if ($str =~ s/^(DISTINCT|ALL) (.+)$/$2/i) {
427 $self->{"struct"}->{"set_quantifier"} = uc $1;
428 }
429 if ($str =~ /[()]/) {
430 return undef unless $self->SET_FUNCTION_SPEC($str);
431 }
432 else {
433 return undef unless $self->SELECT_LIST($str);
434 }
435 }
436
437 sub FROM_CLAUSE {
438 my($self,$str) = @_;
439 return undef unless $str;
440 if ($str =~ / JOIN /i ) {
441 return undef unless $self->EXPLICIT_JOIN($str);
442 }
443 else {
444 return undef unless $self->TABLE_NAME_LIST($str);
445 }
446 }
447
448 sub INSERT {
449 my($self,$str) = @_;
450 my $col_str;
451 my($table_name,$val_str) = $str =~
452 /^INSERT\s+INTO\s+(.+?)\s+VALUES\s+\((.+?)\)$/i;
453 if ($table_name =~ /[()]/ ) {
454 ($table_name,$col_str,$val_str) = $str =~
455 /^INSERT\s+INTO\s+(.+?)\s+\((.+?)\)\s+VALUES\s+\((.+?)\)$/i;
456 }
457 return $self->do_err('No table name specified!') unless $table_name;
458 return $self->do_err('Missing values list!') unless $val_str;
459 return undef unless $self->TABLE_NAME($table_name);
460 $self->{"struct"}->{"command"} = 'INSERT';
461 $self->{"struct"}->{"table_names"} = [$table_name];
462 if ($col_str) {
463 return undef unless $self->COLUMN_NAME_LIST($col_str);
464 }
465 else {
466 $self->{"struct"}->{"column_names"} = ['*'];
467 }
468 return undef unless $self->LITERAL_LIST($val_str);
469 return 1;
470 }
471
472 ###################################################################
473 # UPDATE ::=
474 #
475 # UPDATE <table> SET <set_clause_list> [ WHERE <search_condition>]
476 #
477 ###################################################################
478 sub UPDATE {
479 my($self,$str) = @_;
480 $self->{"struct"}->{"command"} = 'UPDATE';
481 my($table_name,$remainder) = $str =~
482 /^UPDATE (.+?) SET (.+)$/i;
483 return $self->do_err(
484 'Incomplete UPDATE clause'
485 ) if !$table_name or !$remainder;
486 return undef unless $self->TABLE_NAME($table_name);
487 $self->{"tmp"}->{"is_table_name"} = {$table_name => 1};
488 $self->{"struct"}->{"table_names"} = [$table_name];
489 my($set_clause,$where_clause) = $remainder =~
490 /(.*?) WHERE (.*)$/i;
491 $set_clause = $remainder if !$set_clause;
492 return undef unless $self->SET_CLAUSE_LIST($set_clause);
493 if ($where_clause) {
494 return undef unless $self->SEARCH_CONDITION($where_clause);
495 }
496 my @vals = @{$self->{"struct"}->{"values"}};
497 my $num_val_placeholders=0;
498 for my $v(@vals) {
499 $num_val_placeholders++ if $v->{"type"} eq 'placeholder';
500 }
501 $self->{"struct"}->{"num_val_placeholders"}=$num_val_placeholders;
502 return 1;
503 }
504
505 #########
506 # CREATE
507 #########
508
509 sub CREATE {
510 my $self = shift;
511 my $stmt = shift;
512 $self->{"struct"}->{"command"} = 'CREATE';
513 my($table_name,$table_element_def);
514 if ($stmt =~ /^CREATE (LOCAL|GLOBAL) TEMPORARY TABLE(.*)$/si ) {
515 $self->{"struct"}->{"table_type"} = "$1 TEMPORARY";
516 $stmt = "CREATE TABLE$2";
517 }
518 if ($stmt =~ /^(.*) ON COMMIT (DELETE|PRESERVE) ROWS\s*$/si ) {
519 $stmt = $1;
520 $self->{"struct"}->{"commit_behaviour"} = $2;
521 return $self->do_err(
522 "Can't specify commit behaviour for permanent tables."
523 )
524 if !defined $self->{"struct"}->{"table_type"}
525 or $self->{"struct"}->{"table_type"} !~ /TEMPORARY/;
526 }
527 if ($stmt =~ /^CREATE TABLE (\S+) \((.*)\)$/si ) {
528 $table_name = $1;
529 $table_element_def = $2;
530 }
531 else {
532 return $self->do_err( "Can't find column definitions!" );
533 }
534 $table_element_def =~ s/\s+\(/(/g;
535 my $primary_defined;
536 for my $col(split ',',$table_element_def) {
537 my($name,$type,$constraints)=($col =~/\s*(\S+)\s+(\S+)\s*(.*)/);
538 if (!$type) {
539 return $self->do_err( "Column definition is missing a data type!" );
540 }
541 return undef if !($self->IDENTIFIER($name));
542 # if ($name =~ /^\?QI(.+)\?$/ ) {
543 $name = $self->replace_quoted_ids($name);
544 # }
545 $constraints =~ s/^\s+//;
546 $constraints =~ s/\s+$//;
547 if ($constraints) {
548 $constraints =~ s/PRIMARY KEY/PRIMARY_KEY/i;
549 $constraints =~ s/NOT NULL/NOT_NULL/i;
550 my @c = split /\s+/, $constraints;
551 my %has_c;
552 for my $constr(@c) {
553 if ( $constr =~ /^\s*(UNIQUE|NOT_NULL|PRIMARY_KEY)\s*$/i ) {
554 my $cur_c = uc $1;
555 if ($has_c{$cur_c}) {
556 return $self->do_err(
557 qq~Duplicate constraint: '$constr'!~
558 );
559 }
560 if ($cur_c eq 'PRIMARY_KEY' and $primary_defined++ ) {
561 return $self->do_err(
562 qq~There can be only one PRIMARY KEY in a table!~
563 );
564 }
565 $constr =~ s/_/ /g;
566 push @{$self->{"struct"}->{"column_defs"}->{"$name"}->{"constraints"} }, $constr;
567
568 }
569 else {
570 return $self->do_err("Bad column constraint: '$constr'!");
571 }
572 }
573 }
574 $type = uc $type;
575 my $length;
576 if ( $type =~ /(.+)\((.+)\)/ ) {
577 $type = $1;
578 $length = $2;
579 }
580 if (!$self->{"opts"}->{"valid_data_types"}->{"$type"}) {
581 return $self->do_err("'$type' is not a recognized data type!");
582 }
583 $self->{"struct"}->{"column_defs"}->{"$name"}->{"data_type"} = $type;
584 $self->{"struct"}->{"column_defs"}->{"$name"}->{"data_length"} = $length;
585 push @{$self->{"struct"}->{"column_names"}},$name;
586
587 }
588 $self->{"struct"}->{"table_names"} = [$table_name];
589 return 1;
590 }
591
592
593 ###############
594 # SQL SUBRULES
595 ###############
596
597 sub SET_CLAUSE_LIST {
598 my $self = shift;
599 my $set_string = shift;
600 my @sets = split /,/,$set_string;
601 my(@cols,@vals);
602 for(@sets) {
603 my($col,$val) = split / = /,$_;
604 return $self->do_err('Incomplete SET clause!') if !$col or !$val;
605 push @cols, $col;
606 push @vals, $val;
607 }
608 return undef unless $self->COLUMN_NAME_LIST(join ',',@cols);
609 return undef unless $self->LITERAL_LIST(join ',',@vals);
610 return 1;
611 }
612
613 sub SET_QUANTIFIER {
614 my($self,$str) = @_;
615 if ($str =~ /^(DISTINCT|ALL)\s+(.*)$/si) {
616 $self->{"struct"}->{"set_quantifier"} = uc $1;
617 $str = $2;
618 }
619 return $str;
620 }
621
622 sub SELECT_LIST {
623 my $self = shift;
624 my $col_str = shift;
625 if ( $col_str =~ /^\s*\*\s*$/ ) {
626 $self->{"struct"}->{"column_names"} = ['*'];
627 return 1;
628 }
629 my @col_list = split ',',$col_str;
630 if (!(scalar @col_list)) {
631 return $self->do_err('Missing column name list!');
632 }
633 my(@newcols,$newcol);
634 for my $col(@col_list) {
635 # $col = trim($col);
636 $col =~ s/^\s+//;
637 $col =~ s/\s+$//;
638 if ($col =~ /^(\S+)\.\*$/) {
639 my $table = $1;
640 return undef unless $self->TABLE_NAME($table);
641 $table = $self->replace_quoted_ids($table);
642 push @newcols, "$table.*";
643 }
644 else {
645 return undef unless $newcol = $self->COLUMN_NAME($col);
646 push @newcols, $newcol;
647 }
648 }
649 $self->{"struct"}->{"column_names"} = \@newcols;
650 return 1;
651 }
652
653 sub SET_FUNCTION_SPEC {
654 my($self,$col_str) = @_;
655 my @funcs = split /,/, $col_str;
656 my %iscol;
657 for my $func(@funcs) {
658 if ($func =~ /^(COUNT|AVG|SUM|MAX|MIN) \((.*)\)\s*$/i ) {
659 my $set_function_name = uc $1;
660 my $set_function_arg = $2;
661 my $distinct;
662 if ( $set_function_arg =~ s/(DISTINCT|ALL) (.+)$/$2/i ) {
663 $distinct = uc $1;
664 }
665 my $count_star = 1 if $set_function_name eq 'COUNT'
666 and $set_function_arg eq '*';
667 my $ok = $self->COLUMN_NAME($set_function_arg)
668 if !$count_star;
669 return undef if !$count_star and !$ok;
670 if ($set_function_arg !~ /^"/) {
671 $set_function_arg = uc $set_function_arg;
672 }
673 push @{ $self->{"struct"}->{'set_function'}}, {
674 name => $set_function_name,
675 arg => $set_function_arg,
676 distinct => $distinct,
677 };
678 push( @{ $self->{"struct"}->{"column_names"} }, $set_function_arg)
679 if !$iscol{$set_function_arg}++
680 and ($set_function_arg ne '*');
681 }
682 else {
683 return $self->do_err("Bad set function before FROM clause.");
684 }
685 }
686 my $cname = $self->{"struct"}->{"column_names"};
687 if ( !$cname or not scalar @$cname ) {
688 $self->{"struct"}->{"column_names"} = ['*'];
689 }
690 return 1;
691 }
692
693 sub LIMIT_CLAUSE {
694 my($self,$limit_clause) = @_;
695 # $limit_clause = trim($limit_clause);
696 $limit_clause =~ s/^\s+//;
697 $limit_clause =~ s/\s+$//;
698
699 return 1 if !$limit_clause;
700 my($offset,$limit,$junk) = split /,/, $limit_clause;
701 return $self->do_err('Bad limit clause!')
702 if (defined $limit and $limit =~ /[^\d]/)
703 or ( defined $offset and $offset =~ /[^\d]/ )
704 or defined $junk;
705 if (defined $offset and !defined $limit) {
706 $limit = $offset;
707 undef $offset;
708 }
709 $self->{"struct"}->{"limit_clause"} = {
710 limit => $limit,
711 offset => $offset,
712 };
713 return 1;
714 }
715
716 sub is_number {
717 my $x=shift;
718 return 0 if !defined $x;
719 return 1 if $x =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/;
720 return 0;
721 }
722
723 sub SORT_SPEC_LIST {
724 my($self,$order_clause) = @_;
725 return 1 if !$order_clause;
726 my %is_table_name = %{$self->{"tmp"}->{"is_table_name"}};
727 my %is_table_alias = %{$self->{"tmp"}->{"is_table_alias"}};
728 my @ocols;
729 my @order_columns = split ',',$order_clause;
730 for my $col(@order_columns) {
731 my $newcol;
732 my $newarg;
733 if ($col =~ /\s*(\S+)\s+(ASC|DESC)/si ) {
734 $newcol = $1;
735 $newarg = uc $2;
736 }
737 elsif ($col =~ /^\s*(\S+)\s*$/si ) {
738 $newcol = $1;
739 }
740 else {
741 return $self->do_err(
742 'Junk after column name in ORDER BY clause!'
743 );
744 }
745 return undef if !($newcol = $self->COLUMN_NAME($newcol));
746 if ($newcol =~ /^(.+)\..+$/s ) {
747 my $table = $1;
748 if ($table =~ /^'/) {
749 if (!$is_table_name{"$table"} and !$is_table_alias{"$table"} ) {
750 return $self->do_err( "Table '$table' in ORDER BY clause "
751 . "not in FROM clause."
752 );
753 }}
754 elsif (!$is_table_name{"\L$table"} and !$is_table_alias{"\L$table"} ) {
755 return $self->do_err( "Table '$table' in ORDER BY clause "
756 . "not in FROM clause."
757 );
758 }
759 }
760 push @ocols, {$newcol => $newarg};
761 }
762 $self->{"struct"}->{"sort_spec_list"} = \@ocols;
763 return 1;
764 }
765
766 sub SEARCH_CONDITION {
767 my $self = shift;
768 my $str = shift;
769 $str =~ s/^\s*WHERE (.+)/$1/;
770 $str =~ s/^\s+//;
771 $str =~ s/\s+$//;
772 return $self->do_err("Couldn't find WHERE clause!") unless $str;
773 $str = get_btwn( $str );
774 $str = get_in( $str );
775 my $open_parens = $str =~ tr/\(//;
776 my $close_parens = $str =~ tr/\)//;
777 if ($open_parens != $close_parens) {
778 return $self->do_err("Mismatched parentheses in WHERE clause!");
779 }
780 $str = nongroup_numeric( nongroup_string( $str ) );
781 my $pred = $open_parens
782 ? $self->parens_search($str,[])
783 : $self->non_parens_search($str,[]);
784 return $self->do_err("Couldn't find predicate!") unless $pred;
785 $self->{"struct"}->{"where_clause"} = $pred;
786 return 1;
787 }
788
789 ############################################################
790 # UTILITY FUNCTIONS CALLED TO PARSE PARENS IN WHERE CLAUSE
791 ############################################################
792
793 # get BETWEEN clause
794 #
795 sub get_btwn {
796 my $str = shift;
797 if ($str =~ /^(.+?) BETWEEN (.+)$/i ) {
798 my($col,$in,$out,$contents);
799 my $front = $1;
800 my $back = $2;
801 my $not = 1 if $front =~ s/^(.+) NOT$/$1/i;
802 if ($front =~ s/^(.+? )(AND|OR|\() (.+)$/$1$2/i) {
803 $col = $3;
804 }
805 else {
806 $col = $front;
807 $front = '';
808 }
809 $front .= " NOT" if $not;
810 my($val1,$val2);
811 if ($back =~ s/^(.+?) AND (.+)$/$2/) {
812 $val1 = $1;
813 }
814 if ($back =~ s/^(.+?) (AND|OR)(.+)$/$2$3/i) {
815 $val2 = $1;
816 }
817 else {
818 $val2 = $back;
819 $back = '';
820 }
821 $str = "$front ($col > $val1 AND $col < $val2) $back";
822 return get_btwn($str);
823 }
824 return $str;
825 }
826
827 # get IN clause
828 #
829 # a IN (b,c) -> (a=b OR a=c)
830 # a NOT IN (b,c) -> (a<>b AND a<>c)
831 #
832 sub get_in {
833 my $str = shift;
834 my $in_inside_parens;
835 if ($str =~ /^(.+?) IN (\(.+)$/i ) {
836 my($col,$in,$out,$contents);
837 my $front = $1;
838 my $back = $2;
839 my $not;
840 $not++ if $front =~ s/^(.+) NOT$/$1/i;
841 if ($front =~ s/^(.+? )(AND|OR|\() (.+)$/$1$2/i) {
842 $col = $3;
843 }
844 else {
845 $col = $front;
846 $not++ if $col =~ s/^NOT (.+)/$1/i;
847 $front = '';
848 }
849 if ( $col =~ s/^\(// ) {
850 $in_inside_parens++;
851 }
852 #print "~$not~\n";
853 # $front .= " NOT" if $not;
854 # $not++ if $front =~ s/^(.+) NOT$/$1/i;
855 my @chars = split '', $back;
856 for (0..$#chars) {
857 my $char = shift @chars;
858 $contents .= $char;
859 $in++ if $char eq '(';
860 if ( $char eq ')' ) {
861 $out++;
862 last if $in == $out;
863 }
864 }
865 $back = join '', @chars;
866 $back =~ s/\)$// if $in_inside_parens;
867 # print "\n[$front][$col][$contents][$back]\n";
868 #die "\n[$contents]\n";
869 $contents =~ s/^\(//;
870 $contents =~ s/\)$//;
871 my @vals = split /,/, $contents;
872 my $op = '=';
873 my $combiner = 'OR';
874 if ($not) {
875 $op = '<>';
876 $combiner = 'AND';
877 }
878 @vals = map { "$col $op $_" } @vals;
879 my $valStr = join " $combiner ", @vals;
880 $str = "$front ($valStr) $back";
881 $str =~ s/\s+/ /g;
882 return get_in($str);
883 }
884 $str =~ s/^\s+//;
885 $str =~ s/\s+$//;
886 $str =~ s/\(\s+/(/;
887 $str =~ s/\s+\)/)/;
888 #print "$str:\n";
889 return $str;
890 }
891
892 # groups clauses by nested parens
893 #
894 sub parens_search {
895 my $self = shift;
896 my $str = shift;
897 my $predicates = shift;
898 my $index = scalar @$predicates;
899
900 # to handle WHERE (a=b) AND (c=d)
901 if ($str =~ /\(([^()]+?)\)/ ) {
902 my $pred = quotemeta $1;
903 if ($pred !~ / (AND|OR) / ) {
904 $str =~ s/\(($pred)\)/$1/;
905 }
906 }
907 #
908
909 if ($str =~ s/\(([^()]+)\)/^$index^/ ) {
910 push @$predicates, $1;
911 }
912 if ($str =~ /\(/ ) {
913 return $self->parens_search($str,$predicates);
914 }
915 else {
916 return $self->non_parens_search($str,$predicates);
917 }
918 }
919
920 # creates predicates from clauses that either have no parens
921 # or ANDs or have been previously grouped by parens and ANDs
922 #
923 sub non_parens_search {
924 my $self = shift;
925 my $str = shift;
926 my $predicates = shift;
927 my $neg = 0;
928 my $nots = {};
929 if ( $str =~ s/^NOT (\^.+)$/$1/i ) {
930 $neg = 1;
931 $nots = {pred=>1};
932 }
933 my( $pred1, $pred2, $op );
934 my $and_preds =[];
935 ($str,$and_preds) = group_ands($str);
936 $str =~ s/^\s*\^0\^\s*$/$predicates->[0]/;
937 return if $str =~ /^\s*~0~\s*$/;
938 if ( ($pred1, $op, $pred2) = $str =~ /^(.+) (AND|OR) (.+)$/i ) {
939 $pred1 =~ s/\~(\d+)\~$/$and_preds->[$1]/;
940 $pred2 =~ s/\~(\d+)\~$/$and_preds->[$1]/;
941 $pred1 = $self->non_parens_search($pred1,$predicates);
942 $pred2 = $self->non_parens_search($pred2,$predicates);
943 return {
944 neg => $neg,
945 nots => $nots,
946 arg1 => $pred1,
947 op => uc $op,
948 arg2 => $pred2,
949 };
950 }
951 else {
952 my $xstr = $str;
953 $xstr =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
954 my($k,$v) = $xstr =~ /^(\S+?)\s+\S+\s*(.+)\s*$/;
955 #print "$k,$v\n" if defined $k;
956 push @{ $self->{struct}->{where_cols}->{$k}}, $v if defined $k;
957 return $self->PREDICATE($str);
958 }
959 }
960
961 # groups AND clauses that aren't already grouped by parens
962 #
963 sub group_ands{
964 my $str = shift;
965 my $and_preds = shift || [];
966 return($str,$and_preds) unless $str =~ / AND / and $str =~ / OR /;
967 if ($str =~ /^(.*?) AND (.*)$/i ) {
968 my $index = scalar @$and_preds;
969 my($front, $back)=($1,$2);
970 if ($front =~ /^.* OR (.*)$/i ) {
971 $front = $1;
972 }
973 if ($back =~ /^(.*?) (OR|AND) (.*)$/i ) {
974 $back = $1;
975 }
976 my $newpred = "$front AND $back";
977 push @$and_preds, $newpred;
978 $str =~ s/\Q$newpred/~$index~/i;
979 return group_ands($str,$and_preds);
980 }
981 else {
982 return $str,$and_preds;
983 }
984 }
985
986 # replaces string function parens with square brackets
987 # e.g TRIM (foo) -> TRIM[foo]
988 #
989 sub nongroup_string {
990 my $str = shift;
991 $str =~ s/(TRIM|SUBSTRING|UPPER|LOWER) \(([^()]+)\)/$1\[$2\]/gi;
992 if ( $str =~ /(TRIM|SUBSTRING|UPPER|LOWER) \(/i ) {
993 return nongroup_string($str);
994 }
995 else {
996 return $str;
997 }
998 }
999
1000 # replaces math parens with square brackets
1001 # e.g (4-(6+7)*9) -> MATH[4-MATH[6+7]*9]
1002 #
1003 sub nongroup_numeric {
1004 my $str = shift;
1005 my $has_op;
1006 if ( $str =~ /\(([0-9 \*\/\+\-_a-zA-Z\[\]\?]+)\)/ ) {
1007 my $match = $1;
1008 if ($match !~ /(LIKE |IS|BETWEEN|IN)/ ) {
1009 my $re = quotemeta($match);
1010 $str =~ s/\($re\)/MATH\[$match\]/;
1011 }
1012 else {
1013 $has_op++;
1014 }
1015 }
1016 if ( !$has_op and $str =~ /\(([0-9 \*\/\+\-_a-zA-Z\[\]\?]+)\)/ ) {
1017 return nongroup_numeric($str);
1018 }
1019 else {
1020 return $str;
1021 }
1022 }
1023 ############################################################
1024
1025
1026 #########################################################
1027 # LITERAL_LIST ::= <literal> [,<literal>]
1028 #########################################################
1029 sub LITERAL_LIST {
1030 my $self = shift;
1031 my $str = shift;
1032 my @tokens = split /,/, $str;
1033 my @values;
1034 for my $tok(@tokens) {
1035 my $val = $self->ROW_VALUE($tok);
1036 return $self->do_err(
1037 qq('$tok' is not a valid value or is not quoted!)
1038 ) unless $val;
1039 push @values, $val;
1040 }
1041 $self->{"struct"}->{"values"} = \@values;
1042 return 1;
1043 }
1044
1045
1046 ###################################################################
1047 # LITERAL ::= <quoted_string> | <question mark> | <number> | NULL
1048 ###################################################################
1049 sub LITERAL {
1050 my $self = shift;
1051 my $str = shift;
1052 return 'null' if $str =~ /^NULL$/i; # NULL
1053 # return 'empty_string' if $str =~ /^~E~$/i; # NULL
1054 if ($str eq '?') {
1055 $self->{struct}->{num_placeholders}++;
1056 return 'placeholder';
1057 }
1058 # return 'placeholder' if $str eq '?'; # placeholder question mark
1059 return 'string' if $str =~ /^'.*'$/s; # quoted string
1060 return 'number' if $str =~ # number
1061 /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/;
1062 return undef;
1063 }
1064 ###################################################################
1065 # PREDICATE
1066 ###################################################################
1067 sub PREDICATE {
1068 my $self = shift;
1069 my $str = shift;
1070 my @allops = keys %{ $self->{"opts"}->{"valid_comparison_operators"} };
1071 my @notops;
1072 for (@allops) { push (@notops, $_) if /NOT/i };
1073 my $ops = join '|', @notops;
1074 my $opexp = "^\\s*(.+)\\s+($ops)\\s+(.*)\\s*\$";
1075 my($arg1,$op,$arg2) = $str =~ /$opexp/i;
1076 if (!defined $op) {
1077 my @compops;
1078 for (@allops) { push (@compops, $_) if /<=|>=|<>/ };
1079 $ops = join '|', @compops;
1080 $opexp = "^\\s*(.+)\\s+($ops)\\s+(.*)\\s*\$";
1081 ($arg1,$op,$arg2) = $str =~ /$opexp/i;
1082 }
1083 if (!defined $op) {
1084 $ops = join '|', @allops;
1085 $opexp = "^\\s*(.+)\\s+($ops)\\s+(.*)\\s*\$";
1086 ($arg1,$op,$arg2) = $str =~ /$opexp/i;
1087 }
1088 $op = uc $op;
1089 if (!defined $arg1 || !defined $op || !defined $arg2) {
1090 return $self->do_err("Bad predicate: '$str'!");
1091 }
1092 my $negated = 0; # boolean value showing if predicate is negated
1093 my %not; # hash showing elements modified by NOT
1094 #
1095 # e.g. "NOT bar = foo" -> %not = (arg1=>1)
1096 # "bar NOT LIKE foo" -> %not = (op=>1)
1097 # "NOT bar NOT LIKE foo" -> %not = (arg1=>1,op=>1);
1098 # "NOT bar IS NOT NULL" -> %not = (arg1=>1,op=>1);
1099 # "bar = foo" -> %not = undef;
1100 #
1101 if ( $arg1 =~ s/^NOT (.+)$/$1/i ) {
1102 $not{arg1}++;
1103 }
1104 if ( $op =~ s/^(.+) NOT$/$1/i
1105 || $op =~ s/^NOT (.+)$/$1/i ) {
1106 $not{op}++;
1107 }
1108 $negated = 1 if %not and scalar keys %not == 1;
1109 return undef unless $arg1 = $self->ROW_VALUE($arg1);
1110 return undef unless $arg2 = $self->ROW_VALUE($arg2);
1111 if ( $arg1->{"type"}eq 'column'
1112 and $arg2->{"type"}eq 'column'
1113 and $op eq '='
1114 ) {
1115 push @{ $self->{"struct"}->{"keycols"} }, $arg1->{"value"};
1116 push @{ $self->{"struct"}->{"keycols"} }, $arg2->{"value"};
1117 }
1118 return {
1119 neg => $negated,
1120 nots => \%not,
1121 arg1 => $arg1,
1122 op => $op,
1123 arg2 => $arg2,
1124 };
1125 }
1126
1127 sub undo_string_funcs {
1128 my $str = shift;
1129 $str =~ s/(TRIM|UPPER|LOWER|SUBSTRING)\[([^\]\[]+?)\]/$1 ($2)/;
1130 if ($str =~ /(TRIM|UPPER|LOWER|SUBSTRING)\[/) {
1131 return undo_string_funcs($str);
1132 }
1133 return $str;
1134 }
1135
1136 sub undo_math_funcs {
1137 my $str = shift;
1138 $str =~ s/MATH\[([^\]\[]+?)\]/($1)/;
1139 if ($str =~ /MATH\[/) {
1140 return undo_math_funcs($str);
1141 }
1142 return $str;
1143 }
1144
1145
1146 ###################################################################
1147 # ROW_VALUE ::= <literal> | <column_name>
1148 ###################################################################
1149 sub ROW_VALUE {
1150 my $self = shift;
1151 my $str = shift;
1152 $str = undo_string_funcs($str);
1153 $str = undo_math_funcs($str);
1154 my $type;
1155
1156 # MATH
1157 #
1158 if ($str =~ /[\*\+\-\/]/ ) {
1159 my @vals;
1160 my $i=-1;
1161 $str =~ s/([^\s\*\+\-\/\)\(]+)/push @vals,$1;$i++;"?$i?"/ge;
1162 my @newvalues;
1163 for (@vals) {
1164 my $val = $self->ROW_VALUE($_);
1165 if ($val && $val->{"type"} !~ /number|column|placeholder/) {
1166 return $self->do_err(qq[
1167 String '$val' not allowed in Numeric expression!
1168 ]);
1169 }
1170 push @newvalues,$val;
1171 }
1172 return {
1173 type => 'function',
1174 name => 'numeric_exp',
1175 str => $str,
1176 vals => \@newvalues,
1177 }
1178 }
1179
1180 # SUBSTRING (value FROM start [FOR length])
1181 #
1182 if ($str =~ /^SUBSTRING \((.+?) FROM (.+)\)\s*$/i ) {
1183 my $name = 'SUBSTRING';
1184 my $start = $2;
1185 my $value = $self->ROW_VALUE($1);
1186 my $length;
1187 if ($start =~ /^(.+?) FOR (.+)$/i) {
1188 $start = $1;
1189 $length = $2;
1190 $length = $self->ROW_VALUE($length);
1191 }
1192 $start = $self->ROW_VALUE($start);
1193 $str =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
1194 return $self->do_err(
1195 "Can't use a string as a SUBSTRING position: '$str'!")
1196 if $start->{"type"} eq 'string'
1197 or ($start->{"length"} and $start->{"length"}->{"type"} eq 'string');
1198 return undef unless $value;
1199 return $self->do_err(
1200 "Can't use a number in SUBSTRING: '$str'!")
1201 if $value->{"type"} eq 'number';
1202 return {
1203 "type" => 'function',
1204 "name" => $name,
1205 "value" => $value,
1206 "start" => $start,
1207 "length" => $length,
1208 };
1209 }
1210
1211
1212 # UPPER (value) and LOWER (value)
1213 #
1214 if ($str =~ /^(UPPER|LOWER) \((.+)\)\s*$/i ) {
1215 my $name = uc $1;
1216 my $value = $self->ROW_VALUE($2);
1217 return undef unless $value;
1218 $str =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
1219 return $self->do_err(
1220 "Can't use a number in UPPER/LOWER: '$str'!")
1221 if $value->{"type"} eq 'number';
1222 return {
1223 type => 'function',
1224 name => $name,
1225 value => $value,
1226 };
1227 }
1228
1229 # TRIM ( [ [TRAILING|LEADING|BOTH] ['char'] FROM ] value )
1230 #
1231 if ($str =~ /^(TRIM) \((.+)\)\s*$/i ) {
1232 my $name = uc $1;
1233 my $value = $2;
1234 my($trim_spec,$trim_char);
1235 if ($value =~ /^(.+) FROM ([^\(\)]+)$/i ) {
1236 my $front = $1;
1237 $value = $2;
1238 if ($front =~ /^\s*(TRAILING|LEADING|BOTH)(.*)$/i ) {
1239 $trim_spec = uc $1;
1240 # $trim_char = trim($2);
1241 $trim_char = $2;
1242 $trim_char =~ s/^\s+//;
1243 $trim_char =~ s/\s+$//;
1244 undef $trim_char if length($trim_char)==0;
1245 }
1246 else {
1247 # $trim_char = trim($front);
1248 $trim_char = $front;
1249 $trim_char =~ s/^\s+//;
1250 $trim_char =~ s/\s+$//;
1251 }
1252 }
1253 $trim_char =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g if $trim_char;
1254 $value = $self->ROW_VALUE($value);
1255 return undef unless $value;
1256 $str =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
1257 return $self->do_err(
1258 "Can't use a number in TRIM: '$str'!")
1259 if $value->{"type"} eq 'number';
1260 return {
1261 type => 'function',
1262 name => $name,
1263 value => $value,
1264 trim_spec => $trim_spec,
1265 trim_char => $trim_char,
1266 };
1267 }
1268
1269 # STRING CONCATENATION
1270 #
1271 if ($str =~ /\|\|/ ) {
1272 my @vals = split / \|\| /,$str;
1273 my @newvals;
1274 for my $val(@vals) {
1275 my $newval = $self->ROW_VALUE($val);
1276 return undef unless $newval;
1277 return $self->do_err(
1278 "Can't use a number in string concatenation: '$str'!")
1279 if $newval->{"type"} eq 'number';
1280 push @newvals,$newval;
1281 }
1282 return {
1283 type => 'function',
1284 name => 'str_concat',
1285 value => \@newvals,
1286 };
1287 }
1288
1289 # NULL, PLACEHOLDER, NUMBER
1290 #
1291 if ( $type = $self->LITERAL($str) ) {
1292 undef $str if $type eq 'null';
1293 # if ($type eq 'empty_string') {
1294 # $str = '';
1295 # $type = 'string';
1296 # }
1297 $str = '' if $str and $str eq q('');
1298 return { type => $type, value => $str };
1299 }
1300
1301 # QUOTED STRING LITERAL
1302 #
1303 if ($str =~ /\?(\d+)\?/) {
1304 return { type =>'string',
1305 value => $self->{"struct"}->{"literals"}->[$1] };
1306 }
1307 # COLUMN NAME
1308 #
1309 return undef unless $str = $self->COLUMN_NAME($str);
1310 if ( $str =~ /^(.*)\./ && !$self->{"tmp"}->{"is_table_name"}->{"\L$1"}
1311 and !$self->{"tmp"}->{"is_table_alias"}->{"\L$1"} ) {
1312 return $self->do_err(
1313 "Table '$1' in WHERE clause not in FROM clause!"
1314 );
1315 }
1316 # push @{ $self->{"struct"}->{"where_cols"}},$str
1317 # unless $self->{"tmp"}->{"where_cols"}->{"$str"};
1318 $self->{"tmp"}->{"where_cols"}->{"$str"}++;
1319 return { type => 'column', value => $str };
1320 }
1321
1322 ###############################################
1323 # COLUMN NAME ::= [<table_name>.] <identifier>
1324 ###############################################
1325
1326 sub COLUMN_NAME {
1327 my $self = shift;
1328 my $str = shift;
1329 my($table_name,$col_name);
1330 if ( $str =~ /^\s*(\S+)\.(\S+)$/s ) {
1331 if (!$self->{"opts"}->{"valid_options"}->{"SELECT_MULTIPLE_TABLES"}) {
1332 return $self->do_err('Dialect does not support multiple tables!');
1333 }
1334 $table_name = $1;
1335 $col_name = $2;
1336 # my $alias = $self->{struct}->{table_alias} || [];
1337 # $table_name = shift @$alias if $alias;
1338 #print "$table_name : $alias";
1339 return undef unless $self->TABLE_NAME($table_name);
1340 $table_name = $self->replace_quoted_ids($table_name);
1341 my $ref;
1342 if ($table_name =~ /^"/) { #"
1343 if (!$self->{"tmp"}->{"is_table_name"}->{"$table_name"}
1344 and !$self->{"tmp"}->{"is_table_alias"}->{"$table_name"}
1345 ) {
1346 $self->do_err(
1347 "Table '$table_name' referenced but not found in FROM list!"
1348 );
1349 return undef;
1350 } }
1351 elsif (!$self->{"tmp"}->{"is_table_name"}->{"\L$table_name"}
1352 and !$self->{"tmp"}->{"is_table_alias"}->{"\L$table_name"}
1353 ) {
1354 $self->do_err(
1355 "Table '$table_name' referenced but not found in FROM list!"
1356 );
1357 return undef;
1358 }
1359 }
1360 else {
1361 $col_name = $str;
1362 }
1363 # $col_name = trim($col_name);
1364 $col_name =~ s/^\s+//;
1365 $col_name =~ s/\s+$//;
1366 return undef unless $col_name eq '*' or $self->IDENTIFIER($col_name);
1367 #
1368 # MAKE COL NAMES ALL LOWER CASE
1369 my $orgcol = $col_name;
1370 if ($col_name =~ /^\?QI(\d+)\?$/) {
1371 $col_name = $self->replace_quoted_ids($col_name);
1372 }
1373 else {
1374 # $col_name = lc $col_name;
1375 $col_name = uc $col_name;
1376 }
1377 $self->{struct}->{ORG_NAME}->{$col_name} = $orgcol;
1378 #
1379 #
1380 if ($table_name) {
1381 my $alias = $self->{tmp}->{is_table_alias}->{"\L$table_name"};
1382 #use mylibs; print "$table_name"; zwarn $self->{tmp}; exit;
1383 $table_name = $alias if defined $alias;
1384 $table_name = uc $table_name;
1385 $col_name = "$table_name.$col_name";
1386 }
1387 return $col_name;
1388 }
1389
1390 #########################################################
1391 # COLUMN NAME_LIST ::= <column_name> [,<column_name>...]
1392 #########################################################
1393 sub COLUMN_NAME_LIST {
1394 my $self = shift;
1395 my $col_str = shift;
1396 my @col_list = split ',',$col_str;
1397 if (!(scalar @col_list)) {
1398 return $self->do_err('Missing column name list!');
1399 }
1400 my @newcols;
1401 my $newcol;
1402 for my $col(@col_list) {
1403 $col =~ s/^\s+//;
1404 $col =~ s/\s+$//;
1405 # return undef if !($newcol = $self->COLUMN_NAME(trim($col)));
1406 return undef if !($newcol = $self->COLUMN_NAME($col));
1407 push @newcols, $newcol;
1408 }
1409 $self->{"struct"}->{"column_names"} = \@newcols;
1410 return 1;
1411 }
1412
1413
1414 #####################################################
1415 # TABLE_NAME_LIST := <table_name> [,<table_name>...]
1416 #####################################################
1417 sub TABLE_NAME_LIST {
1418 my $self = shift;
1419 my $table_name_str = shift;
1420 my %aliases = ();
1421 my @tables;
1422 my @table_names = split ',', $table_name_str;
1423 if ( scalar @table_names > 1
1424 and !$self->{"opts"}->{"valid_options"}->{'SELECT_MULTIPLE_TABLES'}
1425 ) {
1426 return $self->do_err('Dialect does not support multiple tables!');
1427 }
1428 my %is_table_alias;
1429 for my $table_str(@table_names) {
1430 my($table,$alias);
1431 my(@tstr) = split / /,$table_str;
1432 if (@tstr == 1) { $table = $tstr[0]; }
1433 elsif (@tstr == 2) { $table = $tstr[0]; $alias = $tstr[1]; }
1434 # elsif (@tstr == 2) { $table = $tstr[1]; $alias = $tstr[0]; }
1435 elsif (@tstr == 3) {
1436 return $self->do_err("Can't find alias in FROM clause!")
1437 unless uc($tstr[1]) eq 'AS';
1438 $table = $tstr[0]; $alias = $tstr[2];
1439 # $table = $tstr[2]; $alias = $tstr[0];
1440 }
1441 else {
1442 return $self->do_err("Can't find table names in FROM clause!")
1443 }
1444 return undef unless $self->TABLE_NAME($table);
1445 $table = $self->replace_quoted_ids($table);
1446 push @tables, $table;
1447 if ($alias) {
1448 #die $alias, $table;
1449 return undef unless $self->TABLE_NAME($alias);
1450 $alias = $self->replace_quoted_ids($alias);
1451 if ($alias =~ /^"/) {
1452 push @{$aliases{$table}},"$alias";
1453 $is_table_alias{"$alias"}=$table;
1454 }
1455 else {
1456 push @{$aliases{$table}},"\L$alias";
1457 $is_table_alias{"\L$alias"}=$table;
1458 }
1459 # $aliases{$alias} = $table;
1460 }
1461 }
1462
1463 # my %is_table_name = map { $_ => 1 } @tables,keys %aliases;
1464 my %is_table_name = map { lc $_ => 1 } @tables;
1465 #%is_table_alias = map { lc $_ => 1 } @aliases;
1466 $self->{"tmp"}->{"is_table_alias"} = \%is_table_alias;
1467 $self->{"tmp"}->{"is_table_name"} = \%is_table_name;
1468 $self->{"struct"}->{"table_names"} = \@tables;
1469 $self->{"struct"}->{"table_alias"} = \%aliases;
1470 $self->{"struct"}->{"multiple_tables"} = 1 if @tables > 1;
1471 return 1;
1472 }
1473
1474 #############################
1475 # TABLE_NAME := <identifier>
1476 #############################
1477 sub TABLE_NAME {
1478 my $self = shift;
1479 my $table_name = shift;
1480 if ($table_name =~ /\s*(\S+)\s+\S+/s) {
1481 return $self->do_err("Junk after table name '$1'!");
1482 }
1483 $table_name =~ s/\s+//s;
1484 if (!$table_name) {
1485 return $self->do_err('No table name specified!');
1486 }
1487 return undef if !($self->IDENTIFIER($table_name));
1488 return 1;
1489 }
1490
1491
1492 ###################################################################
1493 # IDENTIFIER ::= <alphabetic_char> { <alphanumeric_char> | _ }...
1494 #
1495 # and must not be a reserved word or over 128 chars in length
1496 ###################################################################
1497 sub IDENTIFIER {
1498 my $self = shift;
1499 my $id = shift;
1500 if ($id =~ /^\?QI(.+)\?$/ ) {
1501 return 1;
1502 }
1503 return 1 if $id =~ /^".+?"$/s; # QUOTED IDENTIFIER
1504 my $err = "Bad table or column name '$id' "; # BAD CHARS
1505 if ($id !~ /\w|\./) {
1506 $err .= "has chars not alphanumeric or underscore!";
1507 return $self->do_err( $err );
1508 }
1509 if ($id =~ /^_/ or $id =~ /^\d/) { # BAD START
1510 $err .= "starts with non-alphabetic character!";
1511 return $self->do_err( $err );
1512 }
1513 if ( length $id > 128 ) { # BAD LENGTH
1514 $err .= "contains more than 128 characters!";
1515 return $self->do_err( $err );
1516 }
1517 if ( $self->{"opts"}->{"reserved_words"}->{"$id"} ) { # BAD RESERVED WORDS
1518 $err .= "is a SQL reserved word!";
1519 return $self->do_err( $err );
1520 }
1521 return 1;
1522 }
1523
1524 ########################################
1525 # PRIVATE METHODS AND UTILITY FUNCTIONS
1526 ########################################
1527 sub order_joins {
1528 my $self = shift;
1529 my $links = shift;
1530 for my $link(@$links) {
1531 if ($link !~ /\./) {
1532 return [];
1533 }
1534 }
1535 @$links = map { s/^(.+)\..*$/$1/; $1; } @$links;
1536 my @all_tables;
1537 my %relations;
1538 my %is_table;
1539 while (@$links) {
1540 my $t1 = shift @$links;
1541 my $t2 = shift @$links;
1542 return undef unless defined $t1 and defined $t2;
1543 push @all_tables, $t1 unless $is_table{$t1}++;
1544 push @all_tables, $t2 unless $is_table{$t2}++;
1545 $relations{$t1}{$t2}++;
1546 $relations{$t2}{$t1}++;
1547 }
1548 my @tables = @all_tables;
1549 my @order = shift @tables;
1550 my %is_ordered = ( $order[0] => 1 );
1551 my %visited;
1552 while(@tables) {
1553 my $t = shift @tables;
1554 my @rels = keys %{$relations{$t}};
1555 for my $t2(@rels) {
1556 next unless $is_ordered{$t2};
1557 push @order, $t;
1558 $is_ordered{$t}++;
1559 last;
1560 }
1561 if (!$is_ordered{$t}) {
1562 push @tables, $t if $visited{$t}++ < @all_tables;
1563 }
1564 }
1565 return $self->do_err(
1566 "Unconnected tables in equijoin statement!"
1567 ) if @order < @all_tables;
1568 return \@order;
1569 }
1570
1571 sub bless_me {
1572 my $class = shift;
1573 my $self = shift || {};
1574 return bless $self, $class;
1575 }
1576
1577 # PROVIDE BACKWARD COMPATIBILIT FOR JOCHEN'S FEATURE ATTRIBUTES TO NEW
1578 #
1579 #
1580 sub set_feature_flags {
1581 my($self,$select,$create) = @_;
1582 if (defined $select) {
1583 delete $self->{"select"};
1584 $self->{"opts"}->{"valid_options"}->{"SELECT_MULTIPLE_TABLES"} =
1585 $self->{"opts"}->{"select"}->{"join"} = $select->{"join"};
1586 }
1587 if (defined $create) {
1588 delete $self->{"create"};
1589 for my $key(keys %$create) {
1590 my $type = $key;
1591 $type =~ s/type_(.*)/\U$1/;
1592 $self->{"opts"}->{"valid_data_types"}->{"$type"} =
1593 $self->{"opts"}->{"create"}->{"$key"} = $create->{"$key"};
1594 }
1595 }
1596 }
1597
1598 sub clean_sql {
1599 my $self = shift;
1600 my $sql = shift;
1601 my $fields;
1602 my $i=-1;
1603 my $e = '\\';
1604 $e = quotemeta($e);
1605 ###new
1606 # CAN'T HANDLE BLOBS!!!
1607 # $sql = quotemeta($sql);
1608 # print "[$sql]\n";
1609 # if ($sql =~ s/^(.*,\s*)''(\s*[,\)].*)$/${1}NULL$2/g ) {
1610 # }
1611 # $sql =~ s/^([^\\']+?)''(.*)$/${1} NULL $2/g;
1612
1613 # $sql =~ s/([^\\]+?)''/$1 ~E~ /g;
1614
1615 # print "$sql\n";
1616 ###newend
1617
1618 # $sql =~ s~'(([^'$e]|$e.)+)'~push(@$fields,$1);$i++;"?$i?"~ge;
1619 $sql =~ s~'(([^'$e]|$e.|'')+)'~push(@$fields,$1);$i++;"?$i?"~ge;
1620
1621 # $sql =~ s/([^\\]+?)''/$1 ~E~ /g;
1622 #print "<$sql>";
1623 @$fields = map { s/''/\\'/g; $_ } @$fields;
1624
1625 ###new
1626 # if ( $sql =~ /'/) {
1627 if ( $sql =~ tr/[^\\]'// % 2 == 1 ) {
1628 ###endnew
1629 $sql =~ s/^.*\?(.+)$/$1/;
1630 die "Mismatched single quote before: '$sql\n";
1631 }
1632 if ($sql =~ /\?\?(\d)\?/) {
1633 $sql = $fields->[$1];
1634 die "Mismatched single quote: '$sql\n";
1635 }
1636 @$fields = map { s/$e'/'/g; s/^'(.*)'$/$1/; $_} @$fields;
1637 $self->{"struct"}->{"literals"} = $fields;
1638
1639 my $qids;
1640 $i=-1;
1641 $e = q/""/;
1642 # $sql =~ s~"(([^"$e]|$e.)+)"~push(@$qids,$1);$i++;"?QI$i?"~ge;
1643 $sql =~ s~"(([^"]|"")+)"~push(@$qids,$1);$i++;"?QI$i?"~ge;
1644 #@$qids = map { s/$e'/'/g; s/^'(.*)'$/$1/; $_} @$qids;
1645 $self->{"struct"}->{"quoted_ids"} = $qids if $qids;
1646
1647 # $sql =~ s~'(([^'\\]|\\.)+)'~push(@$fields,$1);$i++;"?$i?"~ge;
1648 # @$fields = map { s/\\'/'/g; s/^'(.*)'$/$1/; $_} @$fields;
1649 #print "$sql [@$fields]\n";# if $sql =~ /SELECT/;
1650
1651 ## before line 1511
1652 my $comment_re = $self->{"comment_re"};
1653 # if ( $sql =~ s/($comment_re)//gs) {
1654 # $self->{"comment"} = $1;
1655 # }
1656 if ( $sql =~ /(.*)$comment_re$/s) {
1657 $sql = $1;
1658 $self->{"comment"} = $2;
1659 }
1660 if ($sql =~ /^(.*)--(.*)(\n|$)/) {
1661 $sql = $1;
1662 $self->{"comment"} = $2;
1663 }
1664
1665 $sql =~ s/\n/ /g;
1666 $sql =~ s/\s+/ /g;
1667 $sql =~ s/(\S)\(/$1 (/g; # ensure whitespace before (
1668 $sql =~ s/\)(\S)/) $1/g; # ensure whitespace after )
1669 $sql =~ s/\(\s*/(/g; # trim whitespace after (
1670 $sql =~ s/\s*\)/)/g; # trim whitespace before )
1671 #
1672 # $sql =~ s/\s*\(/(/g; # trim whitespace before (
1673 # $sql =~ s/\)\s*/)/g; # trim whitespace after )
1674 for my $op( qw( = <> < > <= >= \|\|) ) {
1675 $sql =~ s/(\S)$op/$1 $op/g;
1676 $sql =~ s/$op(\S)/$op $1/g;
1677 }
1678 $sql =~ s/< >/<>/g;
1679 $sql =~ s/< =/<=/g;
1680 $sql =~ s/> =/>=/g;
1681 $sql =~ s/\s*,/,/g;
1682 $sql =~ s/,\s*/,/g;
1683 $sql =~ s/^\s+//;
1684 $sql =~ s/\s+$//;
1685 return $sql;
1686 }
1687
1688 sub trim {
1689 my $str = shift or return '';
1690 $str =~ s/^\s+//;
1691 $str =~ s/\s+$//;
1692 return $str;
1693 }
1694
1695 sub do_err {
1696 my $self = shift;
1697 my $err = shift;
1698 my $errtype = shift;
1699 my @c = caller 4;
1700 $err = "$err\n\n";
1701 # $err = $errtype ? "DIALECT ERROR: $err in $c[3]"
1702 # : "SQL ERROR: $err in $c[3]";
1703 $err = $errtype ? "DIALECT ERROR: $err"
1704 : "SQL ERROR: $err";
1705 $self->{"struct"}->{"errstr"} = $err;
1706 #$self->{"errstr"} = $err;
1707 warn $err if $self->{"PrintError"};
1708 die if $self->{"RaiseError"};
1709 return undef;
1710 }
1711
1712 1;
1713
1714 __END__
1715
1716
1717 =head1 NAME
1718
1719 SQL::Parser -- validate, parse, or build SQL strings
1720
1721 =head1 SYNOPSIS
1722
1723 use SQL::Parser; # CREATE A PARSER OBJECT
1724 my $parser = SQL::Parser->new( $dialect, \%attrs );
1725
1726 my $success = $parser->parse( $sql_string ); # PARSE A SQL STRING &
1727 if ($success) { # DISPLAY RESULTING DATA
1728 use Data::Dumper; # STRUCTURE
1729 print Dumper $parser->structure;
1730 }
1731
1732 $parser->feature( $class, $name, $value ); # SET OR FIND STATUS OF
1733 my $has_feature = $parser->feature( $class, $name ); # A PARSER FEATURE
1734
1735 $parser->dialect( $dialect_name ); # SET OR FIND STATUS OF
1736 my $current_dialect = $parser->dialect; # A PARSER DIALECT
1737
1738 print $parser->errstr; # DISPLAY CURRENT ERROR
1739 # STRING
1740
1741
1742 =head1 DESCRIPTION
1743
1744 SQL::Parser is a parser, builder, and sytax validator for a
1745 small but useful subset of SQL (Structured Query Language). It
1746 accepts SQL strings and returns either a detailed error message
1747 if the syntax is invalid or a data structure containing the
1748 results of the parse if the syntax is valid. It will soon also
1749 work in reverse to build a SQL string from a supplied data
1750 structure.
1751
1752 The module can be used in batch mode to validate a series of
1753 statements, or as middle-ware for DBI drivers or other related
1754 projects. When combined with SQL::Statement version 0.2 or
1755 greater, the module can be used to actually perform the SQL
1756 commands on a variety of file formats using DBD::AnyData, or
1757 DBD::CSV, or DBD::Excel.
1758
1759 The module makes use of a variety of configuration files
1760 located in the SQL/Dialects directory, each of which is
1761 essentially a simple text file listing things like supported
1762 data types, reserved words, and other features specific to a
1763 given dialect of SQL. These features can also be turned on or
1764 off during program execution.
1765
1766 =head1 SUPPORTED SQL SYNTAX
1767
1768 This module is meant primarly as a base class for DBD drivers
1769 and as such concentrates on a small but useful subset of SQL 92.
1770 It does *not* in any way pretend to be a complete SQL 92 parser.
1771 The module will continue to add new supported syntax, currently,
1772 this is what is supported:
1773
1774 =head2 CREATE TABLE
1775
1776 CREATE [ {LOCAL|GLOBAL} TEMPORARY ] TABLE $table
1777 (
1778 $col_1 $col_type1 $col_constraints1,
1779 ...,
1780 $col_N $col_typeN $col_constraintsN,
1781 )
1782 [ ON COMMIT {DELETE|PRESERVE} ROWS ]
1783
1784 * col_type must be a valid data type as defined in the
1785 "valid_data_types" section of the dialect file for the
1786 current dialect
1787
1788 * col_constriaints may be "PRIMARY KEY" or one or both of
1789 "UNIQUE" and/or "NOT NULL"
1790
1791 * IMPORTANT NOTE: temporary tables, data types and column
1792 constraints are checked for syntax violations but are
1793 currently otherwise *IGNORED* -- they are recognized by
1794 the parser, but not by the execution engine
1795
1796 * The following valid ANSI SQL92 options are not currently
1797 supported: table constraints, named constraints, check
1798 constriants, reference constraints, constraint
1799 attributes, collations, default clauses, domain names as
1800 data types
1801
1802 =head2 DROP TABLE
1803
1804 DROP TABLE $table [ RESTRICT | CASCADE ]
1805
1806 * IMPORTANT NOTE: drop behavior (cascade or restrict) is
1807 checked for valid syntax but is otherwise *IGNORED* -- it
1808 is recognized by the parser, but not by the execution
1809 engine
1810
1811 =head2 INSERT INTO
1812
1813 INSERT INTO $table [ ( $col1, ..., $colN ) ] VALUES ( $val1, ... $valN )
1814
1815 * default values are not currently supported
1816 * inserting from a subquery is not currently supported
1817
1818 =head2 DELETE FROM
1819
1820 DELETE FROM $table [ WHERE search_condition ]
1821
1822 * see "search_condition" below
1823
1824 =head2 UPDATE
1825
1826 UPDATE $table SET $col1 = $val1, ... $colN = $valN [ WHERE search_condition ]
1827
1828 * default values are not currently supported
1829 * see "search_condition" below
1830
1831 =head2 SELECT
1832
1833 SELECT select_clause
1834 FROM from_clause
1835 [ WHERE search_condition ]
1836 [ ORDER BY $ocol1 [ASC|DESC], ... $ocolN [ASC|DESC] ]
1837 [ LIMIT [start,] length ]
1838
1839 * select clause ::=
1840 [DISTINCT|ALL] *
1841 | [DISTINCT|ALL] col1 [,col2, ... colN]
1842 | set_function1 [,set_function2, ... set_functionN]
1843
1844 * set function ::=
1845 COUNT ( [DISTINCT|ALL] * )
1846 | COUNT | MIN | MAX | AVG | SUM ( [DISTINCT|ALL] col_name )
1847
1848 * from clause ::=
1849 table1 [, table2, ... tableN]
1850 | table1 NATURAL [join_type] JOIN table2
1851 | table1 [join_type] table2 USING (col1,col2, ... colN)
1852 | table1 [join_type] JOIN table2 ON (table1.colA = table2.colB)
1853
1854 * join type ::=
1855 INNER
1856 | [OUTER] LEFT | RIGHT | FULL
1857
1858 * if join_type is not specified, INNER is the default
1859 * if DISTINCT or ALL is not specified, ALL is the default
1860 * if start position is omitted from LIMIT clause, position 0 is
1861 the default
1862 * ON clauses may only contain equal comparisons and AND combiners
1863 * self-joins are not currently supported
1864 * if implicit joins are used, the WHERE clause must contain
1865 and equijoin condition for each table
1866
1867
1868 =head2 SEARCH CONDITION
1869
1870 [NOT] $val1 $op1 $val1 [ ... AND|OR $valN $opN $valN ]
1871
1872
1873 =head2 OPERATORS
1874
1875 $op = | <> | < | > | <= | >=
1876 | IS NULL | IS NOT NULL | LIKE | CLIKE | BETWEEN | IN
1877
1878 The "CLIKE" operator works exactly the same as the "LIKE"
1879 operator, but is case insensitive. For example:
1880
1881 WHERE foo LIKE 'bar%' # succeeds if foo is "barbaz"
1882 # fails if foo is "BARBAZ" or "Barbaz"
1883
1884 WHERE foo CLIKE 'bar%' # succeeds for "barbaz", "Barbaz", and "BARBAZ"
1885
1886
1887 =head2 STRING FUNCTIONS & MATH EXPRESSIONS
1888
1889 String functions and math expressions are supported in WHERE
1890 clauses, in the VALUES part of an INSERT and UPDATE
1891 statements. They are not currently supported in the SELECT
1892 statement. For example:
1893
1894 SELECT * FROM foo WHERE UPPER(bar) = 'baz' # SUPPORTED
1895
1896 SELECT UPPER(foo) FROM bar # NOT SUPPORTED
1897
1898 =over
1899
1900 =item TRIM ( [ [LEADING|TRAILING|BOTH] ['trim_char'] FROM ] string )
1901
1902 Removes all occurrences of <trim_char> from the front, back, or
1903 both sides of a string.
1904
1905 BOTH is the default if neither LEADING nor TRAILING is specified.
1906
1907 Space is the default if no trim_char is specified.
1908
1909 Examples:
1910
1911 TRIM( string )
1912 trims leading and trailing spaces from string
1913
1914 TRIM( LEADING FROM str )
1915 trims leading spaces from string
1916
1917 TRIM( 'x' FROM str )
1918 trims leading and trailing x's from string
1919
1920 =item SUBSTRING( string FROM start_pos [FOR length] )
1921
1922 Returns the substring starting at start_pos and extending for
1923 "length" character or until the end of the string, if no
1924 "length" is supplied. Examples:
1925
1926 SUBSTRING( 'foobar' FROM 4 ) # returns "bar"
1927
1928 SUBSTRING( 'foobar' FROM 4 FOR 2) # returns "ba"
1929
1930
1931 =item UPPER(string) and LOWER(string)
1932
1933 These return the upper-case and lower-case variants of the string:
1934
1935 UPPER('foo') # returns "FOO"
1936 LOWER('FOO') # returns "foo"
1937
1938 =back
1939
1940 =head2 Identifiers (table & column names)
1941
1942 Regular identifiers (table and column names *without* quotes around them) are case INSENSITIVE so column foo, fOo, FOO all refer to the same column.
1943
1944 Delimited identifiers (table and column names *with* quotes around them) are case SENSITIVE so column "foo", "fOo", "FOO" each refer to different columns.
1945
1946 A delimited identifier is *never* equal to a regular identifer (so "foo" and foo are two different columns). But don't do that :-).
1947
1948 Remember thought that, in DBD::CSV if table names are used directly as file names, the case sensitivity depends on the OS e.g. on Windows files named foo, FOO, and fOo are the same as each other while on Unix they are different.
1949
1950
1951 =head1 METHODS
1952
1953 =head2 new()
1954
1955 The new() method creates a SQL::Parser object which can then be
1956 used to parse, validate, or build SQL strings. It takes one
1957 required parameter -- the name of the SQL dialect that will
1958 define the rules for the parser. A second optional parameter is
1959 a reference to a hash which can contain additional attributes of
1960 the parser.
1961
1962 use SQL::Parser;
1963 my $parser = SQL::Parser->new( $dialect_name, \%attrs );
1964
1965 The dialect_name parameter is a string containing any valid
1966 dialect such as 'ANSI', 'AnyData', or 'CSV'. See the section on
1967 the dialect() method below for details.
1968
1969 The attribute parameter is a reference to a hash that can
1970 contain error settings for the PrintError and RaiseError
1971 attributes. See the section below on the parse() method for
1972 details.
1973
1974 An example:
1975
1976 use SQL::Parser;
1977 my $parser = SQL::Parser->new('AnyData', {RaiseError=>1} );
1978
1979 This creates a new parser that uses the grammar rules
1980 contained in the .../SQL/Dialects/AnyData.pm file and which
1981 sets the RaiseError attribute to true.
1982
1983 For those needing backwards compatibility with SQL::Statement
1984 version 0.1x and lower, the attribute hash may also contain
1985 feature settings. See the section "FURTHER DETAILS - Backwards
1986 Compatibility" below for details.
1987
1988
1989 =head2 parse()
1990
1991 Once a SQL::Parser object has been created with the new()
1992 method, the parse() method can be used to parse any number of
1993 SQL strings. It takes a single required parameter -- a string
1994 containing a SQL command. The SQL string may optionally be
1995 terminated by a semicolon. The parse() method returns a true
1996 value if the parse is successful and a false value if the parse
1997 finds SQL syntax errors.
1998
1999 Examples:
2000
2001 1) my $success = $parser->parse('SELECT * FROM foo');
2002
2003 2) my $sql = 'SELECT * FROM foo';
2004 my $success = $parser->parse( $sql );
2005
2006 3) my $success = $parser->parse(qq!
2007 SELECT id,phrase
2008 FROM foo
2009 WHERE id < 7
2010 AND phrase <> 'bar'
2011 ORDER BY phrase;
2012 !);
2013
2014 4) my $success = $parser->parse('SELECT * FRoOM foo ');
2015
2016 In examples #1,#2, and #3, the value of $success will be true
2017 because the strings passed to the parse() method are valid SQL
2018 strings.
2019
2020 In example #4, however, the value of $success will be false
2021 because the string contains a SQL syntax error ('FRoOM' instead
2022 of 'FROM').
2023
2024 In addition to checking the return value of parse() with a
2025 variable like $success, you may use the PrintError and
2026 RaiseError attributes as you would in a DBI script:
2027
2028 * If PrintError is true, then SQL syntax errors will be sent as
2029 warnings to STDERR (i.e. to the screen or to a file if STDERR
2030 has been redirected). This is set to true by default which
2031 means that unless you specifically turn it off, all errors
2032 will be reported.
2033
2034 * If RaiseError is true, then SQL syntax errors will cause the
2035 script to die, (i.e. the script will terminate unless wrapped
2036 in an eval). This is set to false by default which means
2037 that unless you specifically turn it on, scripts will
2038 continue to operate even if there are SQL syntax errors.
2039
2040 Basically, you should leave PrintError on or else you will not
2041 be warned when an error occurs. If you are simply validating a
2042 series of strings, you will want to leave RaiseError off so that
2043 the script can check all strings regardless of whether some of
2044 them contain SQL errors. However, if you are going to try to
2045 execute the SQL or need to depend that it is correct, you should
2046 set RaiseError on so that the program will only continue to
2047 operate if all SQL strings use correct syntax.
2048
2049 IMPORTANT NOTE #1: The parse() method only checks syntax, it
2050 does NOT verify if the objects listed actually exist. For
2051 example, given the string "SELECT model FROM cars", the parse()
2052 method will report that the string contains valid SQL but that
2053 will not tell you whether there actually is a table called
2054 "cars" or whether that table contains a column called 'model'.
2055 Those kinds of verifications can be performed by the
2056 SQL::Statement module, not by SQL::Parser by itself.
2057
2058 IMPORTANT NOTE #2: The parse() method uses rules as defined by
2059 the selected dialect configuration file and the feature()
2060 method. This means that a statement that is valid in one
2061 dialect may not be valid in another. For example the 'CSV' and
2062 'AnyData' dialects define 'BLOB' as a valid data type but the
2063 'ANSI' dialect does not. Therefore the statement 'CREATE TABLE
2064 foo (picture BLOB)' would be valid in the first two dialects but
2065 would produce a syntax error in the 'ANSI' dialect.
2066
2067 =head2 structure()
2068
2069 After a SQL::Parser object has been created and the parse()
2070 method used to parse a SQL string, the structure() method
2071 returns the data structure of that string. This data structure
2072 may be passed on to other modules (e.g. SQL::Statement) or it
2073 may be printed out using, for example, the Data::Dumper module.
2074
2075 The data structure contains all of the information in the SQL
2076 string as parsed into its various components. To take a simple
2077 example:
2078
2079 $parser->parse('SELECT make,model FROM cars');
2080 use Data::Dumper;
2081 print Dumper $parser->structure;
2082
2083 Would produce:
2084
2085 $VAR1 = {
2086 'column_names' => [
2087 'make',
2088 'model'
2089 ],
2090 'command' => 'SELECT',
2091 'table_names' => [
2092 'cars'
2093 ]
2094 };
2095
2096 Please see the section "FURTHER DETAILS -- Parse structures"
2097 below for further examples.
2098
2099 =head2 build()
2100
2101 This method is in progress and should be available soon.
2102
2103 =head2 dialect()
2104
2105 $parser->dialect( $dialect_name ); # load a dialect configuration file
2106 my $dialect = $parser->dialect; # get the name of the current dialect
2107
2108 For example:
2109
2110 $parser->dialect('AnyData'); # loads the AnyData config file
2111 print $parser->dialect; # prints 'AnyData'
2112
2113 The $dialect_name parameter may be the name of any dialect
2114 configuration file on your system. Use the
2115 $parser->list('dialects') method to see a list of available
2116 dialects. At a minimum it will include "ANSI", "CSV", and
2117 "AnyData". For backwards compatiblity 'Ansi' is accepted as a
2118 synonym for 'ANSI', otherwise the names are case sensitive.
2119
2120 Loading a new dialect configuration file erases all current
2121 parser features and resets them to those defined in the
2122 configuration file.
2123
2124 See the section above on "Dialects" for details of these
2125 configuration files.
2126
2127 =head2 feature()
2128
2129 Features define the rules to be used by a specific parser
2130 instance. They are divided into the following classes:
2131
2132 * valid_commands
2133 * valid_options
2134 * valid_comparison_operators
2135 * valid_data_types
2136 * reserved_words
2137
2138 Within each class a feature name is either enabled or
2139 disabled. For example, under "valid_data_types" the name "BLOB"
2140 may be either disabled or enabled. If it is not eneabled
2141 (either by being specifically disabled, or simply by not being
2142 specified at all) then any SQL string using "BLOB" as a data
2143 type will throw a syntax error "Invalid data type: 'BLOB'".
2144
2145 The feature() method allows you to enable, disable, or check the
2146 status of any feature.
2147
2148 $parser->feature( $class, $name, 1 ); # enable a feature
2149
2150 $parser->feature( $class, $name, 0 ); # disable a feature
2151
2152 my $feature = $parser->feature( $class, $name ); # show status of a feature
2153
2154 For example:
2155
2156 $parser->feature('reserved_words','FOO',1); # make 'FOO' a reserved word
2157
2158 $parser->feature('valid_data_types','BLOB',0); # disallow 'BLOB' as a
2159 # data type
2160
2161 # determine if the LIKE
2162 # operator is supported
2163 my $LIKE = $parser->feature('valid_operators','LIKE');
2164
2165 See the section below on "Backwards Compatibility" for use of
2166 the feature() method with SQL::Statement 0.1x style parameters.
2167
2168 =head2 list()
2169
2170 =head2 errstr()
2171
2172 =head1 FURTHER DETAILS
2173
2174 =head2 Dialect Configuration Files
2175
2176 These will change completely when Tim finalizes the DBI get_info method.
2177
2178 =head2 Parse Structures
2179
2180 Here are some further examples of the data structures returned
2181 by the structure() method after a call to parse(). Only
2182 specific details are shown for each SQL instance, not the entire
2183 struture.
2184
2185 'SELECT make,model, FROM cars'
2186
2187 command => 'SELECT',
2188 table_names => [ 'cars' ],
2189 column_names => [ 'make', 'model' ],
2190
2191 'CREATE TABLE cars ( id INTEGER, model VARCHAR(40) )'
2192
2193 column_defs => {
2194 id => { data_type => INTEGER },
2195 model => { data_type => VARCHAR(40) },
2196 },
2197
2198 'SELECT DISTINCT make FROM cars'
2199
2200 set_quantifier => 'DISTINCT',
2201
2202 'SELECT MAX (model) FROM cars'
2203
2204 set_function => {
2205 name => 'MAX',
2206 arg => 'models',
2207 },
2208
2209 'SELECT * FROM cars LIMIT 5,10'
2210
2211 limit_clause => {
2212 offset => 5,
2213 limit => 10,
2214 },
2215
2216 'SELECT * FROM vars ORDER BY make, model DESC'
2217
2218 sort_spec_list => [
2219 { make => 'ASC' },
2220 { model => 'DESC' },
2221 ],
2222
2223 "INSERT INTO cars VALUES ( 7, 'Chevy', 'Impala' )"
2224
2225 values => [ 7, 'Chevy', 'Impala' ],
2226
2227
2228 =head2 Backwards Compatibility
2229
2230 This module can be used in conjunction with SQL::Statement,
2231 version 0.2 and higher. Earlier versions of SQL::Statement
2232 included a SQL::Parser as a submodule that used slightly
2233 different syntax than the current version. The current version
2234 supports all of this earlier syntax although new users are
2235 encouraged to use the new syntax listed above. If the syntax
2236 listed below is used, the module should be able to be subclassed
2237 exactly as it was with the older SQL::Statement versions and
2238 will therefore not require any modules or scripts that used it
2239 to make changes.
2240
2241 In the old style, features of the parser were accessed with this
2242 syntax:
2243
2244 feature('create','type_blob',1); # allow BLOB as a data type
2245 feature('create','type_blob',0); # disallow BLOB as a data type
2246 feature('select','join',1); # allow multi-table statements
2247
2248 The same settings could be acheieved in calls to new:
2249
2250 my $parser = SQL::Parser->new(
2251 'Ansi',
2252 {
2253 create => {type_blob=>1},
2254 select => {join=>1},
2255 },
2256 );
2257
2258 Both of these styles of setting features are supported in the
2259 current SQL::Parser.
2260
2261 =head1 ACKNOWLEDGEMENTS
2262
2263 *Many* thanks to Ilya Sterin who wrote most of code for the
2264 build() method and who assisted on the parentheses parsing code
2265 and who proved a great deal of support, advice, and testing
2266 throughout the development of the module.
2267
2268 =head1 AUTHOR & COPYRIGHT
2269
2270 This module is copyright (c) 2001 by Jeff Zucker.
2271 All rights reserved.
2272
2273 The module may be freely distributed under the same terms as
2274 Perl itself using either the "GPL License" or the "Artistic
2275 License" as specified in the Perl README file.
2276
2277 Jeff can be reached at: jeff@vpservices.com.
2278
2279 =cut

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed