/[cvs]/nfo/perl/libs/SQL/Parser.pm
ViewVC logotype

Annotation of /nfo/perl/libs/SQL/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations)
Wed Nov 13 21:24:24 2002 UTC (21 years, 6 months ago) by joko
Branch: MAIN
Changes since 1.1: +1 -1 lines
+ from DBD::CSV - todo:
   [...] Currently it is not possible to use files with names like names.csv. [...]
   The fix should remove this restriction. I hope it doesn't break other things.

1 joko 1.1 ######################################################################
2     package SQL::Parser;
3     ######################################################################
4     #
5     # This module is copyright (c), 2001 by Jeff Zucker.
6     # All rights resered.
7     # It may be freely distributed under the same terms as Perl itself.
8     # See below for help and copyright information (search for SYNOPSIS).
9     #
10     ######################################################################
11    
12     use strict;
13     use vars qw($VERSION);
14    
15     $VERSION = '1.004';
16    
17    
18     #############################
19     # PUBLIC METHODS
20     #############################
21    
22     sub new {
23     my $class = shift;
24     my $dialect = shift || 'ANSI';
25     $dialect = 'ANSI' if uc $dialect eq 'ANSI';
26     $dialect = 'AnyData' if uc $dialect eq 'ANYDATA' or uc $dialect eq 'CSV';
27     # $dialect = 'CSV' if uc $dialect eq 'CSV';
28     if ($dialect eq 'SQL::Eval') {
29     $dialect = 'AnyData';
30     }
31     my $flags = shift || {};
32     $flags->{"dialect"} = $dialect;
33     $flags->{"PrintError"} = 1 unless defined $flags->{"PrintError"};
34     my $self = bless_me($class,$flags);
35     $self->dialect( $self->{"dialect"} );
36     $self->set_feature_flags($self->{"select"},$self->{"create"});
37     return bless $self,$class;
38     }
39    
40     sub parse {
41     my $self = shift;
42     my $sql = shift;
43     #printf "<%s>", $self->{dialect_set};
44     $self->dialect( $self->{"dialect"} ) unless $self->{"dialect_set"};
45     $sql =~ s/^\s+//;
46     $sql =~ s/\s+$//;
47     $self->{"struct"} = {};
48     $self->{"tmp"} = {};
49     $self->{"original_string"} = $sql;
50    
51     ################################################################
52     #
53     # COMMENTS
54    
55     # C-STYLE
56     #
57     my $comment_re = $self->{"comment_re"} || '(\/\*.*?\*\/)';
58     $self->{"comment_re"} = $comment_re;
59     my $starts_with_comment;
60     if ($sql =~ /^\s*$comment_re(.*)$/s) {
61     $self->{"comment"} = $1;
62     $sql = $2;
63     $starts_with_comment=1;
64     }
65     # SQL STYLE
66     #
67     if ($sql =~ /^\s*--(.*)(\n|$)/) {
68     $self->{"comment"} = $1;
69     return 1;
70     }
71     ################################################################
72    
73     $sql = $self->clean_sql($sql);
74     my($com) = $sql =~ /^\s*(\S+)\s+/s ;
75     if (!$com) {
76     return 1 if $starts_with_comment;
77     return $self->do_err("Incomplete statement!");
78     }
79     $com = uc $com;
80     if ($self->{"opts"}->{"valid_commands"}->{$com}) {
81     my $rv = $self->$com($sql);
82     delete $self->{"struct"}->{"literals"};
83     # return $self->do_err("No table names found!")
84     # unless $self->{"struct"}->{"table_names"};
85     return $self->do_err("No command found!")
86     unless $self->{"struct"}->{"command"};
87     if ( $self->{"struct"}->{"join"}
88     and scalar keys %{$self->{"struct"}->{"join"}}==0
89     ) {
90     delete $self->{"struct"}->{"join"};
91     }
92     $self->replace_quoted_ids();
93     return $rv;
94     }
95     else {
96     return $self->do_err("Command '$com' not recognized or not supported!");
97     }
98     }
99    
100     sub replace_quoted_ids {
101     my $self = shift;
102     my $id = shift;
103     return $id unless $self->{struct}->{quoted_ids};
104     if ($id) {
105     if ($id =~ /^\?QI(\d+)\?$/) {
106     return '"'.$self->{struct}->{quoted_ids}->[$1].'"';
107     }
108     else {
109     return $id;
110     }
111     }
112     my @tables = @{$self->{struct}->{table_names}};
113     for my $t(@tables) {
114     if ($t =~ /^\?QI(.+)\?$/ ) {
115     $t = '"'.$self->{struct}->{quoted_ids}->[$1].'"';
116     # $t = $self->{struct}->{quoted_ids}->[$1];
117     }
118     }
119     $self->{struct}->{table_names} = \@tables;
120     delete $self->{struct}->{quoted_ids};
121     }
122    
123     sub structure { shift->{"struct"} }
124     sub command { my $x = shift->{"struct"}->{command} || '' }
125    
126     sub feature {
127     my($self,$opt_class,$opt_name,$opt_value) = @_;
128     if (defined $opt_value) {
129     if ( $opt_class eq 'select' ) {
130     $self->set_feature_flags( {"join"=>$opt_value} );
131     }
132     elsif ( $opt_class eq 'create' ) {
133     $self->set_feature_flags( undef, {$opt_name=>$opt_value} );
134     }
135     else {
136     $self->{$opt_class}->{$opt_name} = $opt_value;
137     }
138     }
139     else {
140     return $self->{"opts"}->{$opt_class}->{$opt_name};
141     }
142     }
143    
144     sub errstr { shift->{"struct"}->{"errstr"} }
145    
146     sub list {
147     my $self = shift;
148     my $com = uc shift;
149     return () if $com !~ /COMMANDS|RESERVED|TYPES|OPS|OPTIONS|DIALECTS/i;
150     $com = 'valid_commands' if $com eq 'COMMANDS';
151     $com = 'valid_comparison_operators' if $com eq 'OPS';
152     $com = 'valid_data_types' if $com eq 'TYPES';
153     $com = 'valid_options' if $com eq 'OPTIONS';
154     $com = 'reserved_words' if $com eq 'RESERVED';
155     $self->dialect( $self->{"dialect"} ) unless $self->{"dialect_set"};
156    
157     return sort keys %{ $self->{"opts"}->{$com} } unless $com eq 'DIALECTS';
158     my $dDir = "SQL/Dialects";
159     my @dialects;
160     for my $dir(@INC) {
161     local *D;
162    
163     if ( opendir(D,"$dir/$dDir") ) {
164     @dialects = grep /.*\.pm$/, readdir(D);
165     last;
166     }
167     }
168     @dialects = map { s/\.pm$//; $_} @dialects;
169     return @dialects;
170     }
171    
172     sub dialect {
173     my($self,$dialect) = @_;
174     return $self->{"dialect"} unless $dialect;
175     return $self->{"dialect"} if $self->{dialect_set};
176     $self->{"opts"} = {};
177     my $mod = "SQL/Dialects/$dialect.pm";
178     undef $@;
179     eval {
180     require "$mod";
181     };
182     return $self->do_err($@) if $@;
183     $mod =~ s/\.pm//;
184     $mod =~ s"/"::"g;
185     my @data = split /\n/, $mod->get_config;
186     my $feature;
187     for (@data) {
188     chomp;
189     s/^\s+//;
190     s/\s+$//;
191     next unless $_;
192     if (/^\[(.*)\]$/i) {
193     $feature = lc $1;
194     $feature =~ s/\s+/_/g;
195     next;
196     }
197     my $newopt = uc $_;
198     $newopt =~ s/\s+/ /g;
199     $self->{"opts"}->{$feature}->{$newopt} = 1;
200     }
201     $self->{"dialect"} = $dialect;
202     $self->{"dialect_set"}++;
203     }
204    
205     ##################################################################
206     # SQL COMMANDS
207     ##################################################################
208    
209     ####################################################
210     # DROP TABLE <table_name>
211     ####################################################
212     sub DROP {
213     my $self = shift;
214     my $stmt = shift;
215     my $table_name;
216     $self->{"struct"}->{"command"} = 'DROP';
217     if ($stmt =~ /^\s*DROP\s+(\S+)\s+(.+)$/si ) {
218     my $com2 = $1 || '';
219     $table_name = $2;
220     if ($com2 !~ /^TABLE$/i) {
221     return $self->do_err(
222     "The command 'DROP $com2' is not recognized or not supported!"
223     );
224     }
225     $table_name =~ s/^\s+//;
226     $table_name =~ s/\s+$//;
227     if ( $table_name =~ /(\S+) (RESTRICT|CASCADE)/i) {
228     $table_name = $1;
229     $self->{"struct"}->{"drop_behavior"} = uc $2;
230     }
231     }
232     else {
233     return $self->do_err( "Incomplete DROP statement!" );
234    
235     }
236     return undef unless $self->TABLE_NAME($table_name);
237     $table_name = $self->replace_quoted_ids($table_name);
238     $self->{"tmp"}->{"is_table_name"} = {$table_name => 1};
239     $self->{"struct"}->{"table_names"} = [$table_name];
240     return 1;
241     }
242    
243     ####################################################
244     # DELETE FROM <table_name> WHERE <search_condition>
245     ####################################################
246     sub DELETE {
247     my($self,$str) = @_;
248     $self->{"struct"}->{"command"} = 'DELETE';
249     my($table_name,$where_clause) = $str =~
250     /^DELETE FROM (\S+)(.*)$/i;
251     return $self->do_err(
252     'Incomplete DELETE statement!'
253     ) if !$table_name;
254     return undef unless $self->TABLE_NAME($table_name);
255     $self->{"tmp"}->{"is_table_name"} = {$table_name => 1};
256     $self->{"struct"}->{"table_names"} = [$table_name];
257     $self->{"struct"}->{"column_names"} = ['*'];
258     $where_clause =~ s/^\s+//;
259     $where_clause =~ s/\s+$//;
260     if ($where_clause) {
261     $where_clause =~ s/^WHERE\s*(.*)$/$1/i;
262     return undef unless $self->SEARCH_CONDITION($where_clause);
263     }
264     return 1;
265     }
266    
267     ##############################################################
268     # SELECT
269     ##############################################################
270     # SELECT [<set_quantifier>] <select_list>
271     # | <set_function_specification>
272     # FROM <from_clause>
273     # [WHERE <search_condition>]
274     # [ORDER BY <order_by_clause>]
275     # [LIMIT <limit_clause>]
276     ##############################################################
277    
278     sub SELECT {
279     my($self,$str) = @_;
280     $self->{"struct"}->{"command"} = 'SELECT';
281     my($from_clause,$where_clause,$order_clause,$limit_clause);
282     $str =~ s/^SELECT (.+)$/$1/i;
283     if ( $str =~ s/^(.+) LIMIT (.+)$/$1/i ) { $limit_clause = $2; }
284     if ( $str =~ s/^(.+) ORDER BY (.+)$/$1/i ) { $order_clause = $2; }
285     if ( $str =~ s/^(.+?) WHERE (.+)$/$1/i ) { $where_clause = $2; }
286     if ( $str =~ s/^(.+?) FROM (.+)$/$1/i ) { $from_clause = $2; }
287     else {
288     return $self->do_err("Couldn't find FROM clause in SELECT!");
289     }
290     return undef unless $self->FROM_CLAUSE($from_clause);
291     return undef unless $self->SELECT_CLAUSE($str);
292     if ($where_clause) {
293     return undef unless $self->SEARCH_CONDITION($where_clause);
294     }
295     if ($order_clause) {
296     return undef unless $self->SORT_SPEC_LIST($order_clause);
297     }
298     if ($limit_clause) {
299     return undef unless $self->LIMIT_CLAUSE($limit_clause);
300     }
301     if ( ( $self->{"struct"}->{"join"}->{"clause"}
302     and $self->{"struct"}->{"join"}->{"clause"} eq 'ON'
303     )
304     or ( $self->{"struct"}->{"multiple_tables"}
305     ###new
306     and !(scalar keys %{$self->{"struct"}->{"join"}})
307     # and !$self->{"struct"}->{"join"}
308     ###
309     ) ) {
310     return undef unless $self->IMPLICIT_JOIN();
311     }
312     return 1;
313     }
314    
315     sub IMPLICIT_JOIN {
316     my $self = shift;
317     delete $self->{"struct"}->{"multiple_tables"};
318     if ( !$self->{"struct"}->{"join"}->{"clause"}
319     or $self->{"struct"}->{"join"}->{"clause"} ne 'ON'
320     ) {
321     $self->{"struct"}->{"join"}->{"type"} = 'INNER';
322     $self->{"struct"}->{"join"}->{"clause"} = 'IMPLICIT';
323     }
324     if (defined $self->{"struct"}->{"keycols"} ) {
325     my @keys;
326     my @keys2 = @keys = @{ $self->{"struct"}->{"keycols"} };
327     $self->{"struct"}->{"join"}->{"table_order"} = $self->order_joins(\@keys2);
328     @{$self->{"struct"}->{"join"}->{"keycols"}} = @keys;
329     delete $self->{"struct"}->{"keycols"};
330     }
331     else {
332     return $self->do_err("No equijoin condition in WHERE or ON clause");
333     }
334     return 1;
335     }
336    
337     sub EXPLICIT_JOIN {
338     my $self = shift;
339     my $remainder = shift;
340     return undef unless $remainder;
341     my($tableA,$tableB,$keycols,$jtype,$natural);
342     if ($remainder =~ /^(.+?) (NATURAL|INNER|LEFT|RIGHT|FULL|UNION|JOIN)(.+)$/s){
343     $tableA = $1;
344     $remainder = $2.$3;
345     }
346     else {
347     ($tableA,$remainder) = $remainder =~ /^(\S+) (.*)/;
348     }
349     if ( $remainder =~ /^NATURAL (.+)/) {
350     $self->{"struct"}->{"join"}->{"clause"} = 'NATURAL';
351     $natural++;
352     $remainder = $1;
353     }
354     if ( $remainder =~
355     /^(INNER|LEFT|RIGHT|FULL|UNION) JOIN (.+)/
356     ) {
357     $jtype = $self->{"struct"}->{"join"}->{"clause"} = $1;
358     $remainder = $2;
359     $jtype = "$jtype OUTER" if $jtype !~ /INNER|UNION/;
360     }
361     if ( $remainder =~
362     /^(LEFT|RIGHT|FULL) OUTER JOIN (.+)/
363     ) {
364     $jtype = $self->{"struct"}->{"join"}->{"clause"} = $1 . " OUTER";
365     $remainder = $2;
366     }
367     if ( $remainder =~ /^JOIN (.+)/) {
368     $jtype = 'INNER';
369     $self->{"struct"}->{"join"}->{"clause"} = 'DEFAULT INNER';
370     $remainder = $1;
371     }
372     if ( $self->{"struct"}->{"join"} ) {
373     if ( $remainder && $remainder =~ /^(.+?) USING \(([^\)]+)\)(.*)/) {
374     $self->{"struct"}->{"join"}->{"clause"} = 'USING';
375     $tableB = $1;
376     my $keycolstr = $2;
377     $remainder = $3;
378     @$keycols = split /,/,$keycolstr;
379     }
380     if ( $remainder && $remainder =~ /^(.+?) ON (.+)/) {
381     $self->{"struct"}->{"join"}->{"clause"} = 'ON';
382     $tableB = $1;
383     my $keycolstr = $2;
384     $remainder = $3;
385     if ($keycolstr =~ / OR /i ) {
386     return $self->do_err(qq~Can't use OR in an ON clause!~,1);
387     }
388     @$keycols = split / AND /i,$keycolstr;
389     $self->{"tmp"}->{"is_table_name"}->{"$tableA"} = 1;
390     $self->{"tmp"}->{"is_table_name"}->{"$tableB"} = 1;
391     for (@$keycols) {
392     my($arg1,$arg2) = split / = /;
393     return undef unless $arg1 = $self->ROW_VALUE($arg1);
394     return undef unless $arg2 = $self->ROW_VALUE($arg2);
395     if ( $arg1->{"type"}eq 'column' and $arg2->{"type"}eq 'column'){
396     push @{ $self->{"struct"}->{"keycols"} }, $arg1->{"value"};
397     push @{ $self->{"struct"}->{"keycols"} }, $arg2->{"value"};
398     delete $self->{"struct"}->{"where_clause"};
399     }
400     }
401     }
402     elsif ($remainder =~ /^(.+?)$/i) {
403     $tableB = $1;
404     $remainder = $2;
405     }
406     $remainder =~ s/^\s+// if $remainder;
407     }
408     if ($jtype) {
409     $jtype = "NATURAL $jtype" if $natural;
410     if ($natural and $keycols) {
411     return $self->do_err(
412     qq~Can't use NATURAL with a USING or ON clause!~
413     );
414     }
415     return undef unless $self->TABLE_NAME_LIST("$tableA,$tableB");
416     $self->{"struct"}->{"join"}->{"type"} = $jtype;
417     $self->{"struct"}->{"join"}->{"keycols"} = $keycols if $keycols;
418     return 1;
419     }
420     return $self->do_err("Couldn't parse explicit JOIN!");
421     }
422    
423     sub SELECT_CLAUSE {
424     my($self,$str) = @_;
425     return undef unless $str;
426     if ($str =~ s/^(DISTINCT|ALL) (.+)$/$2/i) {
427     $self->{"struct"}->{"set_quantifier"} = uc $1;
428     }
429     if ($str =~ /[()]/) {
430     return undef unless $self->SET_FUNCTION_SPEC($str);
431     }
432     else {
433     return undef unless $self->SELECT_LIST($str);
434     }
435     }
436    
437     sub FROM_CLAUSE {
438     my($self,$str) = @_;
439     return undef unless $str;
440     if ($str =~ / JOIN /i ) {
441     return undef unless $self->EXPLICIT_JOIN($str);
442     }
443     else {
444     return undef unless $self->TABLE_NAME_LIST($str);
445     }
446     }
447    
448     sub INSERT {
449     my($self,$str) = @_;
450     my $col_str;
451     my($table_name,$val_str) = $str =~
452     /^INSERT\s+INTO\s+(.+?)\s+VALUES\s+\((.+?)\)$/i;
453     if ($table_name =~ /[()]/ ) {
454     ($table_name,$col_str,$val_str) = $str =~
455     /^INSERT\s+INTO\s+(.+?)\s+\((.+?)\)\s+VALUES\s+\((.+?)\)$/i;
456     }
457     return $self->do_err('No table name specified!') unless $table_name;
458     return $self->do_err('Missing values list!') unless $val_str;
459     return undef unless $self->TABLE_NAME($table_name);
460     $self->{"struct"}->{"command"} = 'INSERT';
461     $self->{"struct"}->{"table_names"} = [$table_name];
462     if ($col_str) {
463     return undef unless $self->COLUMN_NAME_LIST($col_str);
464     }
465     else {
466     $self->{"struct"}->{"column_names"} = ['*'];
467     }
468     return undef unless $self->LITERAL_LIST($val_str);
469     return 1;
470     }
471    
472     ###################################################################
473     # UPDATE ::=
474     #
475     # UPDATE <table> SET <set_clause_list> [ WHERE <search_condition>]
476     #
477     ###################################################################
478     sub UPDATE {
479     my($self,$str) = @_;
480     $self->{"struct"}->{"command"} = 'UPDATE';
481     my($table_name,$remainder) = $str =~
482     /^UPDATE (.+?) SET (.+)$/i;
483     return $self->do_err(
484     'Incomplete UPDATE clause'
485     ) if !$table_name or !$remainder;
486     return undef unless $self->TABLE_NAME($table_name);
487     $self->{"tmp"}->{"is_table_name"} = {$table_name => 1};
488     $self->{"struct"}->{"table_names"} = [$table_name];
489     my($set_clause,$where_clause) = $remainder =~
490     /(.*?) WHERE (.*)$/i;
491     $set_clause = $remainder if !$set_clause;
492     return undef unless $self->SET_CLAUSE_LIST($set_clause);
493     if ($where_clause) {
494     return undef unless $self->SEARCH_CONDITION($where_clause);
495     }
496     my @vals = @{$self->{"struct"}->{"values"}};
497     my $num_val_placeholders=0;
498     for my $v(@vals) {
499     $num_val_placeholders++ if $v->{"type"} eq 'placeholder';
500     }
501     $self->{"struct"}->{"num_val_placeholders"}=$num_val_placeholders;
502     return 1;
503     }
504    
505     #########
506     # CREATE
507     #########
508    
509     sub CREATE {
510     my $self = shift;
511     my $stmt = shift;
512     $self->{"struct"}->{"command"} = 'CREATE';
513     my($table_name,$table_element_def);
514     if ($stmt =~ /^CREATE (LOCAL|GLOBAL) TEMPORARY TABLE(.*)$/si ) {
515     $self->{"struct"}->{"table_type"} = "$1 TEMPORARY";
516     $stmt = "CREATE TABLE$2";
517     }
518     if ($stmt =~ /^(.*) ON COMMIT (DELETE|PRESERVE) ROWS\s*$/si ) {
519     $stmt = $1;
520     $self->{"struct"}->{"commit_behaviour"} = $2;
521     return $self->do_err(
522     "Can't specify commit behaviour for permanent tables."
523     )
524     if !defined $self->{"struct"}->{"table_type"}
525     or $self->{"struct"}->{"table_type"} !~ /TEMPORARY/;
526     }
527     if ($stmt =~ /^CREATE TABLE (\S+) \((.*)\)$/si ) {
528     $table_name = $1;
529     $table_element_def = $2;
530     }
531     else {
532     return $self->do_err( "Can't find column definitions!" );
533     }
534     $table_element_def =~ s/\s+\(/(/g;
535     my $primary_defined;
536     for my $col(split ',',$table_element_def) {
537     my($name,$type,$constraints)=($col =~/\s*(\S+)\s+(\S+)\s*(.*)/);
538     if (!$type) {
539     return $self->do_err( "Column definition is missing a data type!" );
540     }
541     return undef if !($self->IDENTIFIER($name));
542     # if ($name =~ /^\?QI(.+)\?$/ ) {
543     $name = $self->replace_quoted_ids($name);
544     # }
545     $constraints =~ s/^\s+//;
546     $constraints =~ s/\s+$//;
547     if ($constraints) {
548     $constraints =~ s/PRIMARY KEY/PRIMARY_KEY/i;
549     $constraints =~ s/NOT NULL/NOT_NULL/i;
550     my @c = split /\s+/, $constraints;
551     my %has_c;
552     for my $constr(@c) {
553     if ( $constr =~ /^\s*(UNIQUE|NOT_NULL|PRIMARY_KEY)\s*$/i ) {
554     my $cur_c = uc $1;
555     if ($has_c{$cur_c}) {
556     return $self->do_err(
557     qq~Duplicate constraint: '$constr'!~
558     );
559     }
560     if ($cur_c eq 'PRIMARY_KEY' and $primary_defined++ ) {
561     return $self->do_err(
562     qq~There can be only one PRIMARY KEY in a table!~
563     );
564     }
565     $constr =~ s/_/ /g;
566     push @{$self->{"struct"}->{"column_defs"}->{"$name"}->{"constraints"} }, $constr;
567    
568     }
569     else {
570     return $self->do_err("Bad column constraint: '$constr'!");
571     }
572     }
573     }
574     $type = uc $type;
575     my $length;
576     if ( $type =~ /(.+)\((.+)\)/ ) {
577     $type = $1;
578     $length = $2;
579     }
580     if (!$self->{"opts"}->{"valid_data_types"}->{"$type"}) {
581     return $self->do_err("'$type' is not a recognized data type!");
582     }
583     $self->{"struct"}->{"column_defs"}->{"$name"}->{"data_type"} = $type;
584     $self->{"struct"}->{"column_defs"}->{"$name"}->{"data_length"} = $length;
585     push @{$self->{"struct"}->{"column_names"}},$name;
586    
587     }
588     $self->{"struct"}->{"table_names"} = [$table_name];
589     return 1;
590     }
591    
592    
593     ###############
594     # SQL SUBRULES
595     ###############
596    
597     sub SET_CLAUSE_LIST {
598     my $self = shift;
599     my $set_string = shift;
600     my @sets = split /,/,$set_string;
601     my(@cols,@vals);
602     for(@sets) {
603     my($col,$val) = split / = /,$_;
604     return $self->do_err('Incomplete SET clause!') if !$col or !$val;
605     push @cols, $col;
606     push @vals, $val;
607     }
608     return undef unless $self->COLUMN_NAME_LIST(join ',',@cols);
609     return undef unless $self->LITERAL_LIST(join ',',@vals);
610     return 1;
611     }
612    
613     sub SET_QUANTIFIER {
614     my($self,$str) = @_;
615     if ($str =~ /^(DISTINCT|ALL)\s+(.*)$/si) {
616     $self->{"struct"}->{"set_quantifier"} = uc $1;
617     $str = $2;
618     }
619     return $str;
620     }
621    
622     sub SELECT_LIST {
623     my $self = shift;
624     my $col_str = shift;
625     if ( $col_str =~ /^\s*\*\s*$/ ) {
626     $self->{"struct"}->{"column_names"} = ['*'];
627     return 1;
628     }
629     my @col_list = split ',',$col_str;
630     if (!(scalar @col_list)) {
631     return $self->do_err('Missing column name list!');
632     }
633     my(@newcols,$newcol);
634     for my $col(@col_list) {
635     # $col = trim($col);
636     $col =~ s/^\s+//;
637     $col =~ s/\s+$//;
638     if ($col =~ /^(\S+)\.\*$/) {
639     my $table = $1;
640     return undef unless $self->TABLE_NAME($table);
641     $table = $self->replace_quoted_ids($table);
642     push @newcols, "$table.*";
643     }
644     else {
645     return undef unless $newcol = $self->COLUMN_NAME($col);
646     push @newcols, $newcol;
647     }
648     }
649     $self->{"struct"}->{"column_names"} = \@newcols;
650     return 1;
651     }
652    
653     sub SET_FUNCTION_SPEC {
654     my($self,$col_str) = @_;
655     my @funcs = split /,/, $col_str;
656     my %iscol;
657     for my $func(@funcs) {
658     if ($func =~ /^(COUNT|AVG|SUM|MAX|MIN) \((.*)\)\s*$/i ) {
659     my $set_function_name = uc $1;
660     my $set_function_arg = $2;
661     my $distinct;
662     if ( $set_function_arg =~ s/(DISTINCT|ALL) (.+)$/$2/i ) {
663     $distinct = uc $1;
664     }
665     my $count_star = 1 if $set_function_name eq 'COUNT'
666     and $set_function_arg eq '*';
667     my $ok = $self->COLUMN_NAME($set_function_arg)
668     if !$count_star;
669     return undef if !$count_star and !$ok;
670     if ($set_function_arg !~ /^"/) {
671     $set_function_arg = uc $set_function_arg;
672     }
673     push @{ $self->{"struct"}->{'set_function'}}, {
674     name => $set_function_name,
675     arg => $set_function_arg,
676     distinct => $distinct,
677     };
678     push( @{ $self->{"struct"}->{"column_names"} }, $set_function_arg)
679     if !$iscol{$set_function_arg}++
680     and ($set_function_arg ne '*');
681     }
682     else {
683     return $self->do_err("Bad set function before FROM clause.");
684     }
685     }
686     my $cname = $self->{"struct"}->{"column_names"};
687     if ( !$cname or not scalar @$cname ) {
688     $self->{"struct"}->{"column_names"} = ['*'];
689     }
690     return 1;
691     }
692    
693     sub LIMIT_CLAUSE {
694     my($self,$limit_clause) = @_;
695     # $limit_clause = trim($limit_clause);
696     $limit_clause =~ s/^\s+//;
697     $limit_clause =~ s/\s+$//;
698    
699     return 1 if !$limit_clause;
700     my($offset,$limit,$junk) = split /,/, $limit_clause;
701     return $self->do_err('Bad limit clause!')
702     if (defined $limit and $limit =~ /[^\d]/)
703     or ( defined $offset and $offset =~ /[^\d]/ )
704     or defined $junk;
705     if (defined $offset and !defined $limit) {
706     $limit = $offset;
707     undef $offset;
708     }
709     $self->{"struct"}->{"limit_clause"} = {
710     limit => $limit,
711     offset => $offset,
712     };
713     return 1;
714     }
715    
716     sub is_number {
717     my $x=shift;
718     return 0 if !defined $x;
719     return 1 if $x =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/;
720     return 0;
721     }
722    
723     sub SORT_SPEC_LIST {
724     my($self,$order_clause) = @_;
725     return 1 if !$order_clause;
726     my %is_table_name = %{$self->{"tmp"}->{"is_table_name"}};
727     my %is_table_alias = %{$self->{"tmp"}->{"is_table_alias"}};
728     my @ocols;
729     my @order_columns = split ',',$order_clause;
730     for my $col(@order_columns) {
731     my $newcol;
732     my $newarg;
733     if ($col =~ /\s*(\S+)\s+(ASC|DESC)/si ) {
734     $newcol = $1;
735     $newarg = uc $2;
736     }
737     elsif ($col =~ /^\s*(\S+)\s*$/si ) {
738     $newcol = $1;
739     }
740     else {
741     return $self->do_err(
742     'Junk after column name in ORDER BY clause!'
743     );
744     }
745     return undef if !($newcol = $self->COLUMN_NAME($newcol));
746     if ($newcol =~ /^(.+)\..+$/s ) {
747     my $table = $1;
748     if ($table =~ /^'/) {
749     if (!$is_table_name{"$table"} and !$is_table_alias{"$table"} ) {
750     return $self->do_err( "Table '$table' in ORDER BY clause "
751     . "not in FROM clause."
752     );
753     }}
754     elsif (!$is_table_name{"\L$table"} and !$is_table_alias{"\L$table"} ) {
755     return $self->do_err( "Table '$table' in ORDER BY clause "
756     . "not in FROM clause."
757     );
758     }
759     }
760     push @ocols, {$newcol => $newarg};
761     }
762     $self->{"struct"}->{"sort_spec_list"} = \@ocols;
763     return 1;
764     }
765    
766     sub SEARCH_CONDITION {
767     my $self = shift;
768     my $str = shift;
769     $str =~ s/^\s*WHERE (.+)/$1/;
770     $str =~ s/^\s+//;
771     $str =~ s/\s+$//;
772     return $self->do_err("Couldn't find WHERE clause!") unless $str;
773     $str = get_btwn( $str );
774     $str = get_in( $str );
775     my $open_parens = $str =~ tr/\(//;
776     my $close_parens = $str =~ tr/\)//;
777     if ($open_parens != $close_parens) {
778     return $self->do_err("Mismatched parentheses in WHERE clause!");
779     }
780     $str = nongroup_numeric( nongroup_string( $str ) );
781     my $pred = $open_parens
782     ? $self->parens_search($str,[])
783     : $self->non_parens_search($str,[]);
784     return $self->do_err("Couldn't find predicate!") unless $pred;
785     $self->{"struct"}->{"where_clause"} = $pred;
786     return 1;
787     }
788    
789     ############################################################
790     # UTILITY FUNCTIONS CALLED TO PARSE PARENS IN WHERE CLAUSE
791     ############################################################
792    
793     # get BETWEEN clause
794     #
795     sub get_btwn {
796     my $str = shift;
797     if ($str =~ /^(.+?) BETWEEN (.+)$/i ) {
798     my($col,$in,$out,$contents);
799     my $front = $1;
800     my $back = $2;
801     my $not = 1 if $front =~ s/^(.+) NOT$/$1/i;
802     if ($front =~ s/^(.+? )(AND|OR|\() (.+)$/$1$2/i) {
803     $col = $3;
804     }
805     else {
806     $col = $front;
807     $front = '';
808     }
809     $front .= " NOT" if $not;
810     my($val1,$val2);
811     if ($back =~ s/^(.+?) AND (.+)$/$2/) {
812     $val1 = $1;
813     }
814     if ($back =~ s/^(.+?) (AND|OR)(.+)$/$2$3/i) {
815     $val2 = $1;
816     }
817     else {
818     $val2 = $back;
819     $back = '';
820     }
821     $str = "$front ($col > $val1 AND $col < $val2) $back";
822     return get_btwn($str);
823     }
824     return $str;
825     }
826    
827     # get IN clause
828     #
829     # a IN (b,c) -> (a=b OR a=c)
830     # a NOT IN (b,c) -> (a<>b AND a<>c)
831     #
832     sub get_in {
833     my $str = shift;
834     my $in_inside_parens;
835     if ($str =~ /^(.+?) IN (\(.+)$/i ) {
836     my($col,$in,$out,$contents);
837     my $front = $1;
838     my $back = $2;
839     my $not;
840     $not++ if $front =~ s/^(.+) NOT$/$1/i;
841     if ($front =~ s/^(.+? )(AND|OR|\() (.+)$/$1$2/i) {
842     $col = $3;
843     }
844     else {
845     $col = $front;
846     $not++ if $col =~ s/^NOT (.+)/$1/i;
847     $front = '';
848     }
849     if ( $col =~ s/^\(// ) {
850     $in_inside_parens++;
851     }
852     #print "~$not~\n";
853     # $front .= " NOT" if $not;
854     # $not++ if $front =~ s/^(.+) NOT$/$1/i;
855     my @chars = split '', $back;
856     for (0..$#chars) {
857     my $char = shift @chars;
858     $contents .= $char;
859     $in++ if $char eq '(';
860     if ( $char eq ')' ) {
861     $out++;
862     last if $in == $out;
863     }
864     }
865     $back = join '', @chars;
866     $back =~ s/\)$// if $in_inside_parens;
867     # print "\n[$front][$col][$contents][$back]\n";
868     #die "\n[$contents]\n";
869     $contents =~ s/^\(//;
870     $contents =~ s/\)$//;
871     my @vals = split /,/, $contents;
872     my $op = '=';
873     my $combiner = 'OR';
874     if ($not) {
875     $op = '<>';
876     $combiner = 'AND';
877     }
878     @vals = map { "$col $op $_" } @vals;
879     my $valStr = join " $combiner ", @vals;
880     $str = "$front ($valStr) $back";
881     $str =~ s/\s+/ /g;
882     return get_in($str);
883     }
884     $str =~ s/^\s+//;
885     $str =~ s/\s+$//;
886     $str =~ s/\(\s+/(/;
887     $str =~ s/\s+\)/)/;
888     #print "$str:\n";
889     return $str;
890     }
891    
892     # groups clauses by nested parens
893     #
894     sub parens_search {
895     my $self = shift;
896     my $str = shift;
897     my $predicates = shift;
898     my $index = scalar @$predicates;
899    
900     # to handle WHERE (a=b) AND (c=d)
901     if ($str =~ /\(([^()]+?)\)/ ) {
902     my $pred = quotemeta $1;
903     if ($pred !~ / (AND|OR) / ) {
904     $str =~ s/\(($pred)\)/$1/;
905     }
906     }
907     #
908    
909     if ($str =~ s/\(([^()]+)\)/^$index^/ ) {
910     push @$predicates, $1;
911     }
912     if ($str =~ /\(/ ) {
913     return $self->parens_search($str,$predicates);
914     }
915     else {
916     return $self->non_parens_search($str,$predicates);
917     }
918     }
919    
920     # creates predicates from clauses that either have no parens
921     # or ANDs or have been previously grouped by parens and ANDs
922     #
923     sub non_parens_search {
924     my $self = shift;
925     my $str = shift;
926     my $predicates = shift;
927     my $neg = 0;
928     my $nots = {};
929     if ( $str =~ s/^NOT (\^.+)$/$1/i ) {
930     $neg = 1;
931     $nots = {pred=>1};
932     }
933     my( $pred1, $pred2, $op );
934     my $and_preds =[];
935     ($str,$and_preds) = group_ands($str);
936     $str =~ s/^\s*\^0\^\s*$/$predicates->[0]/;
937     return if $str =~ /^\s*~0~\s*$/;
938     if ( ($pred1, $op, $pred2) = $str =~ /^(.+) (AND|OR) (.+)$/i ) {
939     $pred1 =~ s/\~(\d+)\~$/$and_preds->[$1]/;
940     $pred2 =~ s/\~(\d+)\~$/$and_preds->[$1]/;
941     $pred1 = $self->non_parens_search($pred1,$predicates);
942     $pred2 = $self->non_parens_search($pred2,$predicates);
943     return {
944     neg => $neg,
945     nots => $nots,
946     arg1 => $pred1,
947     op => uc $op,
948     arg2 => $pred2,
949     };
950     }
951     else {
952     my $xstr = $str;
953     $xstr =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
954     my($k,$v) = $xstr =~ /^(\S+?)\s+\S+\s*(.+)\s*$/;
955     #print "$k,$v\n" if defined $k;
956     push @{ $self->{struct}->{where_cols}->{$k}}, $v if defined $k;
957     return $self->PREDICATE($str);
958     }
959     }
960    
961     # groups AND clauses that aren't already grouped by parens
962     #
963     sub group_ands{
964     my $str = shift;
965     my $and_preds = shift || [];
966     return($str,$and_preds) unless $str =~ / AND / and $str =~ / OR /;
967     if ($str =~ /^(.*?) AND (.*)$/i ) {
968     my $index = scalar @$and_preds;
969     my($front, $back)=($1,$2);
970     if ($front =~ /^.* OR (.*)$/i ) {
971     $front = $1;
972     }
973     if ($back =~ /^(.*?) (OR|AND) (.*)$/i ) {
974     $back = $1;
975     }
976     my $newpred = "$front AND $back";
977     push @$and_preds, $newpred;
978     $str =~ s/\Q$newpred/~$index~/i;
979     return group_ands($str,$and_preds);
980     }
981     else {
982     return $str,$and_preds;
983     }
984     }
985    
986     # replaces string function parens with square brackets
987     # e.g TRIM (foo) -> TRIM[foo]
988     #
989     sub nongroup_string {
990     my $str = shift;
991     $str =~ s/(TRIM|SUBSTRING|UPPER|LOWER) \(([^()]+)\)/$1\[$2\]/gi;
992     if ( $str =~ /(TRIM|SUBSTRING|UPPER|LOWER) \(/i ) {
993     return nongroup_string($str);
994     }
995     else {
996     return $str;
997     }
998     }
999    
1000     # replaces math parens with square brackets
1001     # e.g (4-(6+7)*9) -> MATH[4-MATH[6+7]*9]
1002     #
1003     sub nongroup_numeric {
1004     my $str = shift;
1005     my $has_op;
1006     if ( $str =~ /\(([0-9 \*\/\+\-_a-zA-Z\[\]\?]+)\)/ ) {
1007     my $match = $1;
1008     if ($match !~ /(LIKE |IS|BETWEEN|IN)/ ) {
1009     my $re = quotemeta($match);
1010     $str =~ s/\($re\)/MATH\[$match\]/;
1011     }
1012     else {
1013     $has_op++;
1014     }
1015     }
1016     if ( !$has_op and $str =~ /\(([0-9 \*\/\+\-_a-zA-Z\[\]\?]+)\)/ ) {
1017     return nongroup_numeric($str);
1018     }
1019     else {
1020     return $str;
1021     }
1022     }
1023     ############################################################
1024    
1025    
1026     #########################################################
1027     # LITERAL_LIST ::= <literal> [,<literal>]
1028     #########################################################
1029     sub LITERAL_LIST {
1030     my $self = shift;
1031     my $str = shift;
1032     my @tokens = split /,/, $str;
1033     my @values;
1034     for my $tok(@tokens) {
1035     my $val = $self->ROW_VALUE($tok);
1036     return $self->do_err(
1037     qq('$tok' is not a valid value or is not quoted!)
1038     ) unless $val;
1039     push @values, $val;
1040     }
1041     $self->{"struct"}->{"values"} = \@values;
1042     return 1;
1043     }
1044    
1045    
1046     ###################################################################
1047     # LITERAL ::= <quoted_string> | <question mark> | <number> | NULL
1048     ###################################################################
1049     sub LITERAL {
1050     my $self = shift;
1051     my $str = shift;
1052     return 'null' if $str =~ /^NULL$/i; # NULL
1053     # return 'empty_string' if $str =~ /^~E~$/i; # NULL
1054     if ($str eq '?') {
1055     $self->{struct}->{num_placeholders}++;
1056     return 'placeholder';
1057     }
1058     # return 'placeholder' if $str eq '?'; # placeholder question mark
1059     return 'string' if $str =~ /^'.*'$/s; # quoted string
1060     return 'number' if $str =~ # number
1061     /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/;
1062     return undef;
1063     }
1064     ###################################################################
1065     # PREDICATE
1066     ###################################################################
1067     sub PREDICATE {
1068     my $self = shift;
1069     my $str = shift;
1070     my @allops = keys %{ $self->{"opts"}->{"valid_comparison_operators"} };
1071     my @notops;
1072     for (@allops) { push (@notops, $_) if /NOT/i };
1073     my $ops = join '|', @notops;
1074     my $opexp = "^\\s*(.+)\\s+($ops)\\s+(.*)\\s*\$";
1075     my($arg1,$op,$arg2) = $str =~ /$opexp/i;
1076     if (!defined $op) {
1077     my @compops;
1078     for (@allops) { push (@compops, $_) if /<=|>=|<>/ };
1079     $ops = join '|', @compops;
1080     $opexp = "^\\s*(.+)\\s+($ops)\\s+(.*)\\s*\$";
1081     ($arg1,$op,$arg2) = $str =~ /$opexp/i;
1082     }
1083     if (!defined $op) {
1084     $ops = join '|', @allops;
1085     $opexp = "^\\s*(.+)\\s+($ops)\\s+(.*)\\s*\$";
1086     ($arg1,$op,$arg2) = $str =~ /$opexp/i;
1087     }
1088     $op = uc $op;
1089     if (!defined $arg1 || !defined $op || !defined $arg2) {
1090     return $self->do_err("Bad predicate: '$str'!");
1091     }
1092     my $negated = 0; # boolean value showing if predicate is negated
1093     my %not; # hash showing elements modified by NOT
1094     #
1095     # e.g. "NOT bar = foo" -> %not = (arg1=>1)
1096     # "bar NOT LIKE foo" -> %not = (op=>1)
1097     # "NOT bar NOT LIKE foo" -> %not = (arg1=>1,op=>1);
1098     # "NOT bar IS NOT NULL" -> %not = (arg1=>1,op=>1);
1099     # "bar = foo" -> %not = undef;
1100     #
1101     if ( $arg1 =~ s/^NOT (.+)$/$1/i ) {
1102     $not{arg1}++;
1103     }
1104     if ( $op =~ s/^(.+) NOT$/$1/i
1105     || $op =~ s/^NOT (.+)$/$1/i ) {
1106     $not{op}++;
1107     }
1108     $negated = 1 if %not and scalar keys %not == 1;
1109     return undef unless $arg1 = $self->ROW_VALUE($arg1);
1110     return undef unless $arg2 = $self->ROW_VALUE($arg2);
1111     if ( $arg1->{"type"}eq 'column'
1112     and $arg2->{"type"}eq 'column'
1113     and $op eq '='
1114     ) {
1115     push @{ $self->{"struct"}->{"keycols"} }, $arg1->{"value"};
1116     push @{ $self->{"struct"}->{"keycols"} }, $arg2->{"value"};
1117     }
1118     return {
1119     neg => $negated,
1120     nots => \%not,
1121     arg1 => $arg1,
1122     op => $op,
1123     arg2 => $arg2,
1124     };
1125     }
1126    
1127     sub undo_string_funcs {
1128     my $str = shift;
1129     $str =~ s/(TRIM|UPPER|LOWER|SUBSTRING)\[([^\]\[]+?)\]/$1 ($2)/;
1130     if ($str =~ /(TRIM|UPPER|LOWER|SUBSTRING)\[/) {
1131     return undo_string_funcs($str);
1132     }
1133     return $str;
1134     }
1135    
1136     sub undo_math_funcs {
1137     my $str = shift;
1138     $str =~ s/MATH\[([^\]\[]+?)\]/($1)/;
1139     if ($str =~ /MATH\[/) {
1140     return undo_math_funcs($str);
1141     }
1142     return $str;
1143     }
1144    
1145    
1146     ###################################################################
1147     # ROW_VALUE ::= <literal> | <column_name>
1148     ###################################################################
1149     sub ROW_VALUE {
1150     my $self = shift;
1151     my $str = shift;
1152     $str = undo_string_funcs($str);
1153     $str = undo_math_funcs($str);
1154     my $type;
1155    
1156     # MATH
1157     #
1158     if ($str =~ /[\*\+\-\/]/ ) {
1159     my @vals;
1160     my $i=-1;
1161     $str =~ s/([^\s\*\+\-\/\)\(]+)/push @vals,$1;$i++;"?$i?"/ge;
1162     my @newvalues;
1163     for (@vals) {
1164     my $val = $self->ROW_VALUE($_);
1165     if ($val && $val->{"type"} !~ /number|column|placeholder/) {
1166     return $self->do_err(qq[
1167     String '$val' not allowed in Numeric expression!
1168     ]);
1169     }
1170     push @newvalues,$val;
1171     }
1172     return {
1173     type => 'function',
1174     name => 'numeric_exp',
1175     str => $str,
1176     vals => \@newvalues,
1177     }
1178     }
1179    
1180     # SUBSTRING (value FROM start [FOR length])
1181     #
1182     if ($str =~ /^SUBSTRING \((.+?) FROM (.+)\)\s*$/i ) {
1183     my $name = 'SUBSTRING';
1184     my $start = $2;
1185     my $value = $self->ROW_VALUE($1);
1186     my $length;
1187     if ($start =~ /^(.+?) FOR (.+)$/i) {
1188     $start = $1;
1189     $length = $2;
1190     $length = $self->ROW_VALUE($length);
1191     }
1192     $start = $self->ROW_VALUE($start);
1193     $str =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
1194     return $self->do_err(
1195     "Can't use a string as a SUBSTRING position: '$str'!")
1196     if $start->{"type"} eq 'string'
1197     or ($start->{"length"} and $start->{"length"}->{"type"} eq 'string');
1198     return undef unless $value;
1199     return $self->do_err(
1200     "Can't use a number in SUBSTRING: '$str'!")
1201     if $value->{"type"} eq 'number';
1202     return {
1203     "type" => 'function',
1204     "name" => $name,
1205     "value" => $value,
1206     "start" => $start,
1207     "length" => $length,
1208     };
1209     }
1210    
1211    
1212     # UPPER (value) and LOWER (value)
1213     #
1214     if ($str =~ /^(UPPER|LOWER) \((.+)\)\s*$/i ) {
1215     my $name = uc $1;
1216     my $value = $self->ROW_VALUE($2);
1217     return undef unless $value;
1218     $str =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
1219     return $self->do_err(
1220     "Can't use a number in UPPER/LOWER: '$str'!")
1221     if $value->{"type"} eq 'number';
1222     return {
1223     type => 'function',
1224     name => $name,
1225     value => $value,
1226     };
1227     }
1228    
1229     # TRIM ( [ [TRAILING|LEADING|BOTH] ['char'] FROM ] value )
1230     #
1231     if ($str =~ /^(TRIM) \((.+)\)\s*$/i ) {
1232     my $name = uc $1;
1233     my $value = $2;
1234     my($trim_spec,$trim_char);
1235     if ($value =~ /^(.+) FROM ([^\(\)]+)$/i ) {
1236     my $front = $1;
1237     $value = $2;
1238     if ($front =~ /^\s*(TRAILING|LEADING|BOTH)(.*)$/i ) {
1239     $trim_spec = uc $1;
1240     # $trim_char = trim($2);
1241     $trim_char = $2;
1242     $trim_char =~ s/^\s+//;
1243     $trim_char =~ s/\s+$//;
1244     undef $trim_char if length($trim_char)==0;
1245     }
1246     else {
1247     # $trim_char = trim($front);
1248     $trim_char = $front;
1249     $trim_char =~ s/^\s+//;
1250     $trim_char =~ s/\s+$//;
1251     }
1252     }
1253     $trim_char =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g if $trim_char;
1254     $value = $self->ROW_VALUE($value);
1255     return undef unless $value;
1256     $str =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
1257     return $self->do_err(
1258     "Can't use a number in TRIM: '$str'!")
1259     if $value->{"type"} eq 'number';
1260     return {
1261     type => 'function',
1262     name => $name,
1263     value => $value,
1264     trim_spec => $trim_spec,
1265     trim_char => $trim_char,
1266     };
1267     }
1268    
1269     # STRING CONCATENATION
1270     #
1271     if ($str =~ /\|\|/ ) {
1272     my @vals = split / \|\| /,$str;
1273     my @newvals;
1274     for my $val(@vals) {
1275     my $newval = $self->ROW_VALUE($val);
1276     return undef unless $newval;
1277     return $self->do_err(
1278     "Can't use a number in string concatenation: '$str'!")
1279     if $newval->{"type"} eq 'number';
1280     push @newvals,$newval;
1281     }
1282     return {
1283     type => 'function',
1284     name => 'str_concat',
1285     value => \@newvals,
1286     };
1287     }
1288    
1289     # NULL, PLACEHOLDER, NUMBER
1290     #
1291     if ( $type = $self->LITERAL($str) ) {
1292     undef $str if $type eq 'null';
1293     # if ($type eq 'empty_string') {
1294     # $str = '';
1295     # $type = 'string';
1296     # }
1297     $str = '' if $str and $str eq q('');
1298     return { type => $type, value => $str };
1299     }
1300    
1301     # QUOTED STRING LITERAL
1302     #
1303     if ($str =~ /\?(\d+)\?/) {
1304     return { type =>'string',
1305     value => $self->{"struct"}->{"literals"}->[$1] };
1306     }
1307     # COLUMN NAME
1308     #
1309     return undef unless $str = $self->COLUMN_NAME($str);
1310     if ( $str =~ /^(.*)\./ && !$self->{"tmp"}->{"is_table_name"}->{"\L$1"}
1311     and !$self->{"tmp"}->{"is_table_alias"}->{"\L$1"} ) {
1312     return $self->do_err(
1313     "Table '$1' in WHERE clause not in FROM clause!"
1314     );
1315     }
1316     # push @{ $self->{"struct"}->{"where_cols"}},$str
1317     # unless $self->{"tmp"}->{"where_cols"}->{"$str"};
1318     $self->{"tmp"}->{"where_cols"}->{"$str"}++;
1319     return { type => 'column', value => $str };
1320     }
1321    
1322     ###############################################
1323     # COLUMN NAME ::= [<table_name>.] <identifier>
1324     ###############################################
1325    
1326     sub COLUMN_NAME {
1327     my $self = shift;
1328     my $str = shift;
1329     my($table_name,$col_name);
1330     if ( $str =~ /^\s*(\S+)\.(\S+)$/s ) {
1331     if (!$self->{"opts"}->{"valid_options"}->{"SELECT_MULTIPLE_TABLES"}) {
1332     return $self->do_err('Dialect does not support multiple tables!');
1333     }
1334     $table_name = $1;
1335     $col_name = $2;
1336     # my $alias = $self->{struct}->{table_alias} || [];
1337     # $table_name = shift @$alias if $alias;
1338     #print "$table_name : $alias";
1339     return undef unless $self->TABLE_NAME($table_name);
1340     $table_name = $self->replace_quoted_ids($table_name);
1341     my $ref;
1342     if ($table_name =~ /^"/) { #"
1343     if (!$self->{"tmp"}->{"is_table_name"}->{"$table_name"}
1344     and !$self->{"tmp"}->{"is_table_alias"}->{"$table_name"}
1345     ) {
1346     $self->do_err(
1347     "Table '$table_name' referenced but not found in FROM list!"
1348     );
1349     return undef;
1350     } }
1351     elsif (!$self->{"tmp"}->{"is_table_name"}->{"\L$table_name"}
1352     and !$self->{"tmp"}->{"is_table_alias"}->{"\L$table_name"}
1353     ) {
1354     $self->do_err(
1355     "Table '$table_name' referenced but not found in FROM list!"
1356     );
1357     return undef;
1358     }
1359     }
1360     else {
1361     $col_name = $str;
1362     }
1363     # $col_name = trim($col_name);
1364     $col_name =~ s/^\s+//;
1365     $col_name =~ s/\s+$//;
1366     return undef unless $col_name eq '*' or $self->IDENTIFIER($col_name);
1367     #
1368     # MAKE COL NAMES ALL LOWER CASE
1369     my $orgcol = $col_name;
1370     if ($col_name =~ /^\?QI(\d+)\?$/) {
1371     $col_name = $self->replace_quoted_ids($col_name);
1372     }
1373     else {
1374     # $col_name = lc $col_name;
1375     $col_name = uc $col_name;
1376     }
1377     $self->{struct}->{ORG_NAME}->{$col_name} = $orgcol;
1378     #
1379     #
1380     if ($table_name) {
1381     my $alias = $self->{tmp}->{is_table_alias}->{"\L$table_name"};
1382     #use mylibs; print "$table_name"; zwarn $self->{tmp}; exit;
1383     $table_name = $alias if defined $alias;
1384     $table_name = uc $table_name;
1385     $col_name = "$table_name.$col_name";
1386     }
1387     return $col_name;
1388     }
1389    
1390     #########################################################
1391     # COLUMN NAME_LIST ::= <column_name> [,<column_name>...]
1392     #########################################################
1393     sub COLUMN_NAME_LIST {
1394     my $self = shift;
1395     my $col_str = shift;
1396     my @col_list = split ',',$col_str;
1397     if (!(scalar @col_list)) {
1398     return $self->do_err('Missing column name list!');
1399     }
1400     my @newcols;
1401     my $newcol;
1402     for my $col(@col_list) {
1403     $col =~ s/^\s+//;
1404     $col =~ s/\s+$//;
1405     # return undef if !($newcol = $self->COLUMN_NAME(trim($col)));
1406     return undef if !($newcol = $self->COLUMN_NAME($col));
1407     push @newcols, $newcol;
1408     }
1409     $self->{"struct"}->{"column_names"} = \@newcols;
1410     return 1;
1411     }
1412    
1413    
1414     #####################################################
1415     # TABLE_NAME_LIST := <table_name> [,<table_name>...]
1416     #####################################################
1417     sub TABLE_NAME_LIST {
1418     my $self = shift;
1419     my $table_name_str = shift;
1420     my %aliases = ();
1421     my @tables;
1422     my @table_names = split ',', $table_name_str;
1423     if ( scalar @table_names > 1
1424     and !$self->{"opts"}->{"valid_options"}->{'SELECT_MULTIPLE_TABLES'}
1425     ) {
1426     return $self->do_err('Dialect does not support multiple tables!');
1427     }
1428     my %is_table_alias;
1429     for my $table_str(@table_names) {
1430     my($table,$alias);
1431     my(@tstr) = split / /,$table_str;
1432     if (@tstr == 1) { $table = $tstr[0]; }
1433     elsif (@tstr == 2) { $table = $tstr[0]; $alias = $tstr[1]; }
1434     # elsif (@tstr == 2) { $table = $tstr[1]; $alias = $tstr[0]; }
1435     elsif (@tstr == 3) {
1436     return $self->do_err("Can't find alias in FROM clause!")
1437     unless uc($tstr[1]) eq 'AS';
1438     $table = $tstr[0]; $alias = $tstr[2];
1439     # $table = $tstr[2]; $alias = $tstr[0];
1440     }
1441     else {
1442     return $self->do_err("Can't find table names in FROM clause!")
1443     }
1444     return undef unless $self->TABLE_NAME($table);
1445     $table = $self->replace_quoted_ids($table);
1446     push @tables, $table;
1447     if ($alias) {
1448     #die $alias, $table;
1449     return undef unless $self->TABLE_NAME($alias);
1450     $alias = $self->replace_quoted_ids($alias);
1451     if ($alias =~ /^"/) {
1452     push @{$aliases{$table}},"$alias";
1453     $is_table_alias{"$alias"}=$table;
1454     }
1455     else {
1456     push @{$aliases{$table}},"\L$alias";
1457     $is_table_alias{"\L$alias"}=$table;
1458     }
1459     # $aliases{$alias} = $table;
1460     }
1461     }
1462    
1463     # my %is_table_name = map { $_ => 1 } @tables,keys %aliases;
1464     my %is_table_name = map { lc $_ => 1 } @tables;
1465     #%is_table_alias = map { lc $_ => 1 } @aliases;
1466     $self->{"tmp"}->{"is_table_alias"} = \%is_table_alias;
1467     $self->{"tmp"}->{"is_table_name"} = \%is_table_name;
1468     $self->{"struct"}->{"table_names"} = \@tables;
1469     $self->{"struct"}->{"table_alias"} = \%aliases;
1470     $self->{"struct"}->{"multiple_tables"} = 1 if @tables > 1;
1471     return 1;
1472     }
1473    
1474     #############################
1475     # TABLE_NAME := <identifier>
1476     #############################
1477     sub TABLE_NAME {
1478     my $self = shift;
1479     my $table_name = shift;
1480     if ($table_name =~ /\s*(\S+)\s+\S+/s) {
1481     return $self->do_err("Junk after table name '$1'!");
1482     }
1483     $table_name =~ s/\s+//s;
1484     if (!$table_name) {
1485     return $self->do_err('No table name specified!');
1486     }
1487     return undef if !($self->IDENTIFIER($table_name));
1488     return 1;
1489     }
1490    
1491    
1492     ###################################################################
1493     # IDENTIFIER ::= <alphabetic_char> { <alphanumeric_char> | _ }...
1494     #
1495     # and must not be a reserved word or over 128 chars in length
1496     ###################################################################
1497     sub IDENTIFIER {
1498     my $self = shift;
1499     my $id = shift;
1500     if ($id =~ /^\?QI(.+)\?$/ ) {
1501     return 1;
1502     }
1503     return 1 if $id =~ /^".+?"$/s; # QUOTED IDENTIFIER
1504     my $err = "Bad table or column name '$id' "; # BAD CHARS
1505 joko 1.2 if ($id !~ /\w|\./) {
1506 joko 1.1 $err .= "has chars not alphanumeric or underscore!";
1507     return $self->do_err( $err );
1508     }
1509     if ($id =~ /^_/ or $id =~ /^\d/) { # BAD START
1510     $err .= "starts with non-alphabetic character!";
1511     return $self->do_err( $err );
1512     }
1513     if ( length $id > 128 ) { # BAD LENGTH
1514     $err .= "contains more than 128 characters!";
1515     return $self->do_err( $err );
1516     }
1517     if ( $self->{"opts"}->{"reserved_words"}->{"$id"} ) { # BAD RESERVED WORDS
1518     $err .= "is a SQL reserved word!";
1519     return $self->do_err( $err );
1520     }
1521     return 1;
1522     }
1523    
1524     ########################################
1525     # PRIVATE METHODS AND UTILITY FUNCTIONS
1526     ########################################
1527     sub order_joins {
1528     my $self = shift;
1529     my $links = shift;
1530     for my $link(@$links) {
1531     if ($link !~ /\./) {
1532     return [];
1533     }
1534     }
1535     @$links = map { s/^(.+)\..*$/$1/; $1; } @$links;
1536     my @all_tables;
1537     my %relations;
1538     my %is_table;
1539     while (@$links) {
1540     my $t1 = shift @$links;
1541     my $t2 = shift @$links;
1542     return undef unless defined $t1 and defined $t2;
1543     push @all_tables, $t1 unless $is_table{$t1}++;
1544     push @all_tables, $t2 unless $is_table{$t2}++;
1545     $relations{$t1}{$t2}++;
1546     $relations{$t2}{$t1}++;
1547     }
1548     my @tables = @all_tables;
1549     my @order = shift @tables;
1550     my %is_ordered = ( $order[0] => 1 );
1551     my %visited;
1552     while(@tables) {
1553     my $t = shift @tables;
1554     my @rels = keys %{$relations{$t}};
1555     for my $t2(@rels) {
1556     next unless $is_ordered{$t2};
1557     push @order, $t;
1558     $is_ordered{$t}++;
1559     last;
1560     }
1561     if (!$is_ordered{$t}) {
1562     push @tables, $t if $visited{$t}++ < @all_tables;
1563     }
1564     }
1565     return $self->do_err(
1566     "Unconnected tables in equijoin statement!"
1567     ) if @order < @all_tables;
1568     return \@order;
1569     }
1570    
1571     sub bless_me {
1572     my $class = shift;
1573     my $self = shift || {};
1574     return bless $self, $class;
1575     }
1576    
1577     # PROVIDE BACKWARD COMPATIBILIT FOR JOCHEN'S FEATURE ATTRIBUTES TO NEW
1578     #
1579     #
1580     sub set_feature_flags {
1581     my($self,$select,$create) = @_;
1582     if (defined $select) {
1583     delete $self->{"select"};
1584     $self->{"opts"}->{"valid_options"}->{"SELECT_MULTIPLE_TABLES"} =
1585     $self->{"opts"}->{"select"}->{"join"} = $select->{"join"};
1586     }
1587     if (defined $create) {
1588     delete $self->{"create"};
1589     for my $key(keys %$create) {
1590     my $type = $key;
1591     $type =~ s/type_(.*)/\U$1/;
1592     $self->{"opts"}->{"valid_data_types"}->{"$type"} =
1593     $self->{"opts"}->{"create"}->{"$key"} = $create->{"$key"};
1594     }
1595     }
1596     }
1597    
1598     sub clean_sql {
1599     my $self = shift;
1600     my $sql = shift;
1601     my $fields;
1602     my $i=-1;
1603     my $e = '\\';
1604     $e = quotemeta($e);
1605     ###new
1606     # CAN'T HANDLE BLOBS!!!
1607     # $sql = quotemeta($sql);
1608     # print "[$sql]\n";
1609     # if ($sql =~ s/^(.*,\s*)''(\s*[,\)].*)$/${1}NULL$2/g ) {
1610     # }
1611     # $sql =~ s/^([^\\']+?)''(.*)$/${1} NULL $2/g;
1612    
1613     # $sql =~ s/([^\\]+?)''/$1 ~E~ /g;
1614    
1615     # print "$sql\n";
1616     ###newend
1617    
1618     # $sql =~ s~'(([^'$e]|$e.)+)'~push(@$fields,$1);$i++;"?$i?"~ge;
1619     $sql =~ s~'(([^'$e]|$e.|'')+)'~push(@$fields,$1);$i++;"?$i?"~ge;
1620    
1621     # $sql =~ s/([^\\]+?)''/$1 ~E~ /g;
1622     #print "<$sql>";
1623     @$fields = map { s/''/\\'/g; $_ } @$fields;
1624    
1625     ###new
1626     # if ( $sql =~ /'/) {
1627     if ( $sql =~ tr/[^\\]'// % 2 == 1 ) {
1628     ###endnew
1629     $sql =~ s/^.*\?(.+)$/$1/;
1630     die "Mismatched single quote before: '$sql\n";
1631     }
1632     if ($sql =~ /\?\?(\d)\?/) {
1633     $sql = $fields->[$1];
1634     die "Mismatched single quote: '$sql\n";
1635     }
1636     @$fields = map { s/$e'/'/g; s/^'(.*)'$/$1/; $_} @$fields;
1637     $self->{"struct"}->{"literals"} = $fields;
1638    
1639     my $qids;
1640     $i=-1;
1641     $e = q/""/;
1642     # $sql =~ s~"(([^"$e]|$e.)+)"~push(@$qids,$1);$i++;"?QI$i?"~ge;
1643     $sql =~ s~"(([^"]|"")+)"~push(@$qids,$1);$i++;"?QI$i?"~ge;
1644     #@$qids = map { s/$e'/'/g; s/^'(.*)'$/$1/; $_} @$qids;
1645     $self->{"struct"}->{"quoted_ids"} = $qids if $qids;
1646    
1647     # $sql =~ s~'(([^'\\]|\\.)+)'~push(@$fields,$1);$i++;"?$i?"~ge;
1648     # @$fields = map { s/\\'/'/g; s/^'(.*)'$/$1/; $_} @$fields;
1649     #print "$sql [@$fields]\n";# if $sql =~ /SELECT/;
1650    
1651     ## before line 1511
1652     my $comment_re = $self->{"comment_re"};
1653     # if ( $sql =~ s/($comment_re)//gs) {
1654     # $self->{"comment"} = $1;
1655     # }
1656     if ( $sql =~ /(.*)$comment_re$/s) {
1657     $sql = $1;
1658     $self->{"comment"} = $2;
1659     }
1660     if ($sql =~ /^(.*)--(.*)(\n|$)/) {
1661     $sql = $1;
1662     $self->{"comment"} = $2;
1663     }
1664    
1665     $sql =~ s/\n/ /g;
1666     $sql =~ s/\s+/ /g;
1667     $sql =~ s/(\S)\(/$1 (/g; # ensure whitespace before (
1668     $sql =~ s/\)(\S)/) $1/g; # ensure whitespace after )
1669     $sql =~ s/\(\s*/(/g; # trim whitespace after (
1670     $sql =~ s/\s*\)/)/g; # trim whitespace before )
1671     #
1672     # $sql =~ s/\s*\(/(/g; # trim whitespace before (
1673     # $sql =~ s/\)\s*/)/g; # trim whitespace after )
1674     for my $op( qw( = <> < > <= >= \|\|) ) {
1675     $sql =~ s/(\S)$op/$1 $op/g;
1676     $sql =~ s/$op(\S)/$op $1/g;
1677     }
1678     $sql =~ s/< >/<>/g;
1679     $sql =~ s/< =/<=/g;
1680     $sql =~ s/> =/>=/g;
1681     $sql =~ s/\s*,/,/g;
1682     $sql =~ s/,\s*/,/g;
1683     $sql =~ s/^\s+//;
1684     $sql =~ s/\s+$//;
1685     return $sql;
1686     }
1687    
1688     sub trim {
1689     my $str = shift or return '';
1690     $str =~ s/^\s+//;
1691     $str =~ s/\s+$//;
1692     return $str;
1693     }
1694    
1695     sub do_err {
1696     my $self = shift;
1697     my $err = shift;
1698     my $errtype = shift;
1699     my @c = caller 4;
1700     $err = "$err\n\n";
1701     # $err = $errtype ? "DIALECT ERROR: $err in $c[3]"
1702     # : "SQL ERROR: $err in $c[3]";
1703     $err = $errtype ? "DIALECT ERROR: $err"
1704     : "SQL ERROR: $err";
1705     $self->{"struct"}->{"errstr"} = $err;
1706     #$self->{"errstr"} = $err;
1707     warn $err if $self->{"PrintError"};
1708     die if $self->{"RaiseError"};
1709     return undef;
1710     }
1711    
1712     1;
1713    
1714     __END__
1715    
1716    
1717     =head1 NAME
1718    
1719     SQL::Parser -- validate, parse, or build SQL strings
1720    
1721     =head1 SYNOPSIS
1722    
1723     use SQL::Parser; # CREATE A PARSER OBJECT
1724     my $parser = SQL::Parser->new( $dialect, \%attrs );
1725    
1726     my $success = $parser->parse( $sql_string ); # PARSE A SQL STRING &
1727     if ($success) { # DISPLAY RESULTING DATA
1728     use Data::Dumper; # STRUCTURE
1729     print Dumper $parser->structure;
1730     }
1731    
1732     $parser->feature( $class, $name, $value ); # SET OR FIND STATUS OF
1733     my $has_feature = $parser->feature( $class, $name ); # A PARSER FEATURE
1734    
1735     $parser->dialect( $dialect_name ); # SET OR FIND STATUS OF
1736     my $current_dialect = $parser->dialect; # A PARSER DIALECT
1737    
1738     print $parser->errstr; # DISPLAY CURRENT ERROR
1739     # STRING
1740    
1741    
1742     =head1 DESCRIPTION
1743    
1744     SQL::Parser is a parser, builder, and sytax validator for a
1745     small but useful subset of SQL (Structured Query Language). It
1746     accepts SQL strings and returns either a detailed error message
1747     if the syntax is invalid or a data structure containing the
1748     results of the parse if the syntax is valid. It will soon also
1749     work in reverse to build a SQL string from a supplied data
1750     structure.
1751    
1752     The module can be used in batch mode to validate a series of
1753     statements, or as middle-ware for DBI drivers or other related
1754     projects. When combined with SQL::Statement version 0.2 or
1755     greater, the module can be used to actually perform the SQL
1756     commands on a variety of file formats using DBD::AnyData, or
1757     DBD::CSV, or DBD::Excel.
1758    
1759     The module makes use of a variety of configuration files
1760     located in the SQL/Dialects directory, each of which is
1761     essentially a simple text file listing things like supported
1762     data types, reserved words, and other features specific to a
1763     given dialect of SQL. These features can also be turned on or
1764     off during program execution.
1765    
1766     =head1 SUPPORTED SQL SYNTAX
1767    
1768     This module is meant primarly as a base class for DBD drivers
1769     and as such concentrates on a small but useful subset of SQL 92.
1770     It does *not* in any way pretend to be a complete SQL 92 parser.
1771     The module will continue to add new supported syntax, currently,
1772     this is what is supported:
1773    
1774     =head2 CREATE TABLE
1775    
1776     CREATE [ {LOCAL|GLOBAL} TEMPORARY ] TABLE $table
1777     (
1778     $col_1 $col_type1 $col_constraints1,
1779     ...,
1780     $col_N $col_typeN $col_constraintsN,
1781     )
1782     [ ON COMMIT {DELETE|PRESERVE} ROWS ]
1783    
1784     * col_type must be a valid data type as defined in the
1785     "valid_data_types" section of the dialect file for the
1786     current dialect
1787    
1788     * col_constriaints may be "PRIMARY KEY" or one or both of
1789     "UNIQUE" and/or "NOT NULL"
1790    
1791     * IMPORTANT NOTE: temporary tables, data types and column
1792     constraints are checked for syntax violations but are
1793     currently otherwise *IGNORED* -- they are recognized by
1794     the parser, but not by the execution engine
1795    
1796     * The following valid ANSI SQL92 options are not currently
1797     supported: table constraints, named constraints, check
1798     constriants, reference constraints, constraint
1799     attributes, collations, default clauses, domain names as
1800     data types
1801    
1802     =head2 DROP TABLE
1803    
1804     DROP TABLE $table [ RESTRICT | CASCADE ]
1805    
1806     * IMPORTANT NOTE: drop behavior (cascade or restrict) is
1807     checked for valid syntax but is otherwise *IGNORED* -- it
1808     is recognized by the parser, but not by the execution
1809     engine
1810    
1811     =head2 INSERT INTO
1812    
1813     INSERT INTO $table [ ( $col1, ..., $colN ) ] VALUES ( $val1, ... $valN )
1814    
1815     * default values are not currently supported
1816     * inserting from a subquery is not currently supported
1817    
1818     =head2 DELETE FROM
1819    
1820     DELETE FROM $table [ WHERE search_condition ]
1821    
1822     * see "search_condition" below
1823    
1824     =head2 UPDATE
1825    
1826     UPDATE $table SET $col1 = $val1, ... $colN = $valN [ WHERE search_condition ]
1827    
1828     * default values are not currently supported
1829     * see "search_condition" below
1830    
1831     =head2 SELECT
1832    
1833     SELECT select_clause
1834     FROM from_clause
1835     [ WHERE search_condition ]
1836     [ ORDER BY $ocol1 [ASC|DESC], ... $ocolN [ASC|DESC] ]
1837     [ LIMIT [start,] length ]
1838    
1839     * select clause ::=
1840     [DISTINCT|ALL] *
1841     | [DISTINCT|ALL] col1 [,col2, ... colN]
1842     | set_function1 [,set_function2, ... set_functionN]
1843    
1844     * set function ::=
1845     COUNT ( [DISTINCT|ALL] * )
1846     | COUNT | MIN | MAX | AVG | SUM ( [DISTINCT|ALL] col_name )
1847    
1848     * from clause ::=
1849     table1 [, table2, ... tableN]
1850     | table1 NATURAL [join_type] JOIN table2
1851     | table1 [join_type] table2 USING (col1,col2, ... colN)
1852     | table1 [join_type] JOIN table2 ON (table1.colA = table2.colB)
1853    
1854     * join type ::=
1855     INNER
1856     | [OUTER] LEFT | RIGHT | FULL
1857    
1858     * if join_type is not specified, INNER is the default
1859     * if DISTINCT or ALL is not specified, ALL is the default
1860     * if start position is omitted from LIMIT clause, position 0 is
1861     the default
1862     * ON clauses may only contain equal comparisons and AND combiners
1863     * self-joins are not currently supported
1864     * if implicit joins are used, the WHERE clause must contain
1865     and equijoin condition for each table
1866    
1867    
1868     =head2 SEARCH CONDITION
1869    
1870     [NOT] $val1 $op1 $val1 [ ... AND|OR $valN $opN $valN ]
1871    
1872    
1873     =head2 OPERATORS
1874    
1875     $op = | <> | < | > | <= | >=
1876     | IS NULL | IS NOT NULL | LIKE | CLIKE | BETWEEN | IN
1877    
1878     The "CLIKE" operator works exactly the same as the "LIKE"
1879     operator, but is case insensitive. For example:
1880    
1881     WHERE foo LIKE 'bar%' # succeeds if foo is "barbaz"
1882     # fails if foo is "BARBAZ" or "Barbaz"
1883    
1884     WHERE foo CLIKE 'bar%' # succeeds for "barbaz", "Barbaz", and "BARBAZ"
1885    
1886    
1887     =head2 STRING FUNCTIONS & MATH EXPRESSIONS
1888    
1889     String functions and math expressions are supported in WHERE
1890     clauses, in the VALUES part of an INSERT and UPDATE
1891     statements. They are not currently supported in the SELECT
1892     statement. For example:
1893    
1894     SELECT * FROM foo WHERE UPPER(bar) = 'baz' # SUPPORTED
1895    
1896     SELECT UPPER(foo) FROM bar # NOT SUPPORTED
1897    
1898     =over
1899    
1900     =item TRIM ( [ [LEADING|TRAILING|BOTH] ['trim_char'] FROM ] string )
1901    
1902     Removes all occurrences of <trim_char> from the front, back, or
1903     both sides of a string.
1904    
1905     BOTH is the default if neither LEADING nor TRAILING is specified.
1906    
1907     Space is the default if no trim_char is specified.
1908    
1909     Examples:
1910    
1911     TRIM( string )
1912     trims leading and trailing spaces from string
1913    
1914     TRIM( LEADING FROM str )
1915     trims leading spaces from string
1916    
1917     TRIM( 'x' FROM str )
1918     trims leading and trailing x's from string
1919    
1920     =item SUBSTRING( string FROM start_pos [FOR length] )
1921    
1922     Returns the substring starting at start_pos and extending for
1923     "length" character or until the end of the string, if no
1924     "length" is supplied. Examples:
1925    
1926     SUBSTRING( 'foobar' FROM 4 ) # returns "bar"
1927    
1928     SUBSTRING( 'foobar' FROM 4 FOR 2) # returns "ba"
1929    
1930    
1931     =item UPPER(string) and LOWER(string)
1932    
1933     These return the upper-case and lower-case variants of the string:
1934    
1935     UPPER('foo') # returns "FOO"
1936     LOWER('FOO') # returns "foo"
1937    
1938     =back
1939    
1940     =head2 Identifiers (table & column names)
1941    
1942     Regular identifiers (table and column names *without* quotes around them) are case INSENSITIVE so column foo, fOo, FOO all refer to the same column.
1943    
1944     Delimited identifiers (table and column names *with* quotes around them) are case SENSITIVE so column "foo", "fOo", "FOO" each refer to different columns.
1945    
1946     A delimited identifier is *never* equal to a regular identifer (so "foo" and foo are two different columns). But don't do that :-).
1947    
1948     Remember thought that, in DBD::CSV if table names are used directly as file names, the case sensitivity depends on the OS e.g. on Windows files named foo, FOO, and fOo are the same as each other while on Unix they are different.
1949    
1950    
1951     =head1 METHODS
1952    
1953     =head2 new()
1954    
1955     The new() method creates a SQL::Parser object which can then be
1956     used to parse, validate, or build SQL strings. It takes one
1957     required parameter -- the name of the SQL dialect that will
1958     define the rules for the parser. A second optional parameter is
1959     a reference to a hash which can contain additional attributes of
1960     the parser.
1961    
1962     use SQL::Parser;
1963     my $parser = SQL::Parser->new( $dialect_name, \%attrs );
1964    
1965     The dialect_name parameter is a string containing any valid
1966     dialect such as 'ANSI', 'AnyData', or 'CSV'. See the section on
1967     the dialect() method below for details.
1968    
1969     The attribute parameter is a reference to a hash that can
1970     contain error settings for the PrintError and RaiseError
1971     attributes. See the section below on the parse() method for
1972     details.
1973    
1974     An example:
1975    
1976     use SQL::Parser;
1977     my $parser = SQL::Parser->new('AnyData', {RaiseError=>1} );
1978    
1979     This creates a new parser that uses the grammar rules
1980     contained in the .../SQL/Dialects/AnyData.pm file and which
1981     sets the RaiseError attribute to true.
1982    
1983     For those needing backwards compatibility with SQL::Statement
1984     version 0.1x and lower, the attribute hash may also contain
1985     feature settings. See the section "FURTHER DETAILS - Backwards
1986     Compatibility" below for details.
1987    
1988    
1989     =head2 parse()
1990    
1991     Once a SQL::Parser object has been created with the new()
1992     method, the parse() method can be used to parse any number of
1993     SQL strings. It takes a single required parameter -- a string
1994     containing a SQL command. The SQL string may optionally be
1995     terminated by a semicolon. The parse() method returns a true
1996     value if the parse is successful and a false value if the parse
1997     finds SQL syntax errors.
1998    
1999     Examples:
2000    
2001     1) my $success = $parser->parse('SELECT * FROM foo');
2002    
2003     2) my $sql = 'SELECT * FROM foo';
2004     my $success = $parser->parse( $sql );
2005    
2006     3) my $success = $parser->parse(qq!
2007     SELECT id,phrase
2008     FROM foo
2009     WHERE id < 7
2010     AND phrase <> 'bar'
2011     ORDER BY phrase;
2012     !);
2013    
2014     4) my $success = $parser->parse('SELECT * FRoOM foo ');
2015    
2016     In examples #1,#2, and #3, the value of $success will be true
2017     because the strings passed to the parse() method are valid SQL
2018     strings.
2019    
2020     In example #4, however, the value of $success will be false
2021     because the string contains a SQL syntax error ('FRoOM' instead
2022     of 'FROM').
2023    
2024     In addition to checking the return value of parse() with a
2025     variable like $success, you may use the PrintError and
2026     RaiseError attributes as you would in a DBI script:
2027    
2028     * If PrintError is true, then SQL syntax errors will be sent as
2029     warnings to STDERR (i.e. to the screen or to a file if STDERR
2030     has been redirected). This is set to true by default which
2031     means that unless you specifically turn it off, all errors
2032     will be reported.
2033    
2034     * If RaiseError is true, then SQL syntax errors will cause the
2035     script to die, (i.e. the script will terminate unless wrapped
2036     in an eval). This is set to false by default which means
2037     that unless you specifically turn it on, scripts will
2038     continue to operate even if there are SQL syntax errors.
2039    
2040     Basically, you should leave PrintError on or else you will not
2041     be warned when an error occurs. If you are simply validating a
2042     series of strings, you will want to leave RaiseError off so that
2043     the script can check all strings regardless of whether some of
2044     them contain SQL errors. However, if you are going to try to
2045     execute the SQL or need to depend that it is correct, you should
2046     set RaiseError on so that the program will only continue to
2047     operate if all SQL strings use correct syntax.
2048    
2049     IMPORTANT NOTE #1: The parse() method only checks syntax, it
2050     does NOT verify if the objects listed actually exist. For
2051     example, given the string "SELECT model FROM cars", the parse()
2052     method will report that the string contains valid SQL but that
2053     will not tell you whether there actually is a table called
2054     "cars" or whether that table contains a column called 'model'.
2055     Those kinds of verifications can be performed by the
2056     SQL::Statement module, not by SQL::Parser by itself.
2057    
2058     IMPORTANT NOTE #2: The parse() method uses rules as defined by
2059     the selected dialect configuration file and the feature()
2060     method. This means that a statement that is valid in one
2061     dialect may not be valid in another. For example the 'CSV' and
2062     'AnyData' dialects define 'BLOB' as a valid data type but the
2063     'ANSI' dialect does not. Therefore the statement 'CREATE TABLE
2064     foo (picture BLOB)' would be valid in the first two dialects but
2065     would produce a syntax error in the 'ANSI' dialect.
2066    
2067     =head2 structure()
2068    
2069     After a SQL::Parser object has been created and the parse()
2070     method used to parse a SQL string, the structure() method
2071     returns the data structure of that string. This data structure
2072     may be passed on to other modules (e.g. SQL::Statement) or it
2073     may be printed out using, for example, the Data::Dumper module.
2074    
2075     The data structure contains all of the information in the SQL
2076     string as parsed into its various components. To take a simple
2077     example:
2078    
2079     $parser->parse('SELECT make,model FROM cars');
2080     use Data::Dumper;
2081     print Dumper $parser->structure;
2082    
2083     Would produce:
2084    
2085     $VAR1 = {
2086     'column_names' => [
2087     'make',
2088     'model'
2089     ],
2090     'command' => 'SELECT',
2091     'table_names' => [
2092     'cars'
2093     ]
2094     };
2095    
2096     Please see the section "FURTHER DETAILS -- Parse structures"
2097     below for further examples.
2098    
2099     =head2 build()
2100    
2101     This method is in progress and should be available soon.
2102    
2103     =head2 dialect()
2104    
2105     $parser->dialect( $dialect_name ); # load a dialect configuration file
2106     my $dialect = $parser->dialect; # get the name of the current dialect
2107    
2108     For example:
2109    
2110     $parser->dialect('AnyData'); # loads the AnyData config file
2111     print $parser->dialect; # prints 'AnyData'
2112    
2113     The $dialect_name parameter may be the name of any dialect
2114     configuration file on your system. Use the
2115     $parser->list('dialects') method to see a list of available
2116     dialects. At a minimum it will include "ANSI", "CSV", and
2117     "AnyData". For backwards compatiblity 'Ansi' is accepted as a
2118     synonym for 'ANSI', otherwise the names are case sensitive.
2119    
2120     Loading a new dialect configuration file erases all current
2121     parser features and resets them to those defined in the
2122     configuration file.
2123    
2124     See the section above on "Dialects" for details of these
2125     configuration files.
2126    
2127     =head2 feature()
2128    
2129     Features define the rules to be used by a specific parser
2130     instance. They are divided into the following classes:
2131    
2132     * valid_commands
2133     * valid_options
2134     * valid_comparison_operators
2135     * valid_data_types
2136     * reserved_words
2137    
2138     Within each class a feature name is either enabled or
2139     disabled. For example, under "valid_data_types" the name "BLOB"
2140     may be either disabled or enabled. If it is not eneabled
2141     (either by being specifically disabled, or simply by not being
2142     specified at all) then any SQL string using "BLOB" as a data
2143     type will throw a syntax error "Invalid data type: 'BLOB'".
2144    
2145     The feature() method allows you to enable, disable, or check the
2146     status of any feature.
2147    
2148     $parser->feature( $class, $name, 1 ); # enable a feature
2149    
2150     $parser->feature( $class, $name, 0 ); # disable a feature
2151    
2152     my $feature = $parser->feature( $class, $name ); # show status of a feature
2153    
2154     For example:
2155    
2156     $parser->feature('reserved_words','FOO',1); # make 'FOO' a reserved word
2157    
2158     $parser->feature('valid_data_types','BLOB',0); # disallow 'BLOB' as a
2159     # data type
2160    
2161     # determine if the LIKE
2162     # operator is supported
2163     my $LIKE = $parser->feature('valid_operators','LIKE');
2164    
2165     See the section below on "Backwards Compatibility" for use of
2166     the feature() method with SQL::Statement 0.1x style parameters.
2167    
2168     =head2 list()
2169    
2170     =head2 errstr()
2171    
2172     =head1 FURTHER DETAILS
2173    
2174     =head2 Dialect Configuration Files
2175    
2176     These will change completely when Tim finalizes the DBI get_info method.
2177    
2178     =head2 Parse Structures
2179    
2180     Here are some further examples of the data structures returned
2181     by the structure() method after a call to parse(). Only
2182     specific details are shown for each SQL instance, not the entire
2183     struture.
2184    
2185     'SELECT make,model, FROM cars'
2186    
2187     command => 'SELECT',
2188     table_names => [ 'cars' ],
2189     column_names => [ 'make', 'model' ],
2190    
2191     'CREATE TABLE cars ( id INTEGER, model VARCHAR(40) )'
2192    
2193     column_defs => {
2194     id => { data_type => INTEGER },
2195     model => { data_type => VARCHAR(40) },
2196     },
2197    
2198     'SELECT DISTINCT make FROM cars'
2199    
2200     set_quantifier => 'DISTINCT',
2201    
2202     'SELECT MAX (model) FROM cars'
2203    
2204     set_function => {
2205     name => 'MAX',
2206     arg => 'models',
2207     },
2208    
2209     'SELECT * FROM cars LIMIT 5,10'
2210    
2211     limit_clause => {
2212     offset => 5,
2213     limit => 10,
2214     },
2215    
2216     'SELECT * FROM vars ORDER BY make, model DESC'
2217    
2218     sort_spec_list => [
2219     { make => 'ASC' },
2220     { model => 'DESC' },
2221     ],
2222    
2223     "INSERT INTO cars VALUES ( 7, 'Chevy', 'Impala' )"
2224    
2225     values => [ 7, 'Chevy', 'Impala' ],
2226    
2227    
2228     =head2 Backwards Compatibility
2229    
2230     This module can be used in conjunction with SQL::Statement,
2231     version 0.2 and higher. Earlier versions of SQL::Statement
2232     included a SQL::Parser as a submodule that used slightly
2233     different syntax than the current version. The current version
2234     supports all of this earlier syntax although new users are
2235     encouraged to use the new syntax listed above. If the syntax
2236     listed below is used, the module should be able to be subclassed
2237     exactly as it was with the older SQL::Statement versions and
2238     will therefore not require any modules or scripts that used it
2239     to make changes.
2240    
2241     In the old style, features of the parser were accessed with this
2242     syntax:
2243    
2244     feature('create','type_blob',1); # allow BLOB as a data type
2245     feature('create','type_blob',0); # disallow BLOB as a data type
2246     feature('select','join',1); # allow multi-table statements
2247    
2248     The same settings could be acheieved in calls to new:
2249    
2250     my $parser = SQL::Parser->new(
2251     'Ansi',
2252     {
2253     create => {type_blob=>1},
2254     select => {join=>1},
2255     },
2256     );
2257    
2258     Both of these styles of setting features are supported in the
2259     current SQL::Parser.
2260    
2261     =head1 ACKNOWLEDGEMENTS
2262    
2263     *Many* thanks to Ilya Sterin who wrote most of code for the
2264     build() method and who assisted on the parentheses parsing code
2265     and who proved a great deal of support, advice, and testing
2266     throughout the development of the module.
2267    
2268     =head1 AUTHOR & COPYRIGHT
2269    
2270     This module is copyright (c) 2001 by Jeff Zucker.
2271     All rights reserved.
2272    
2273     The module may be freely distributed under the same terms as
2274     Perl itself using either the "GPL License" or the "Artistic
2275     License" as specified in the Perl README file.
2276    
2277     Jeff can be reached at: jeff@vpservices.com.
2278    
2279     =cut

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed