0
|
1 |
#======================================================================
|
|
2 |
# A N T S E X P R S . P L
|
|
3 |
# (c) 2005 Andreas Thurnherr
|
|
4 |
# doc: Sat Dec 31 18:35:33 2005
|
30
|
5 |
# dlm: Thu Mar 9 10:12:48 2017
|
|
6 |
# uE-Info: 46 74 NIL 0 0 72 0 2 4 NIL ofnI
|
0
|
7 |
#======================================================================
|
|
8 |
|
|
9 |
# HISTORY:
|
|
10 |
# Dec 31, 2005: - extracted from [list]
|
|
11 |
# Jan 2, 2006: - re-written to use anonymous funs instead of eval()
|
|
12 |
# Jan 3, 2006: - added $DEBUG
|
|
13 |
# Jan 4, 2006: - removed NaN_handling_out
|
|
14 |
# Jan 9, 2006: - made $bufvar param to antsCompileExpr optional
|
|
15 |
# Jan 13, 2006: - separated AddrExpr from EditExpr
|
|
16 |
# - implemented abbreviated addr exprs
|
|
17 |
# Jan 14, 2006: - added old -G syntax to -S
|
|
18 |
# Jan 17, 2006: - BUG: $1, $2, did not work in abbrevs
|
|
19 |
# Jan 31, 2006: - added de-octalization code for abbrevs
|
|
20 |
# Apr 11, 2006: - added ,-separated list (again?)
|
|
21 |
# May 18, 2006: - fiddled
|
|
22 |
# Jun 20, 2006: - simplified regexprs; fields can now begin with _
|
|
23 |
# Jul 1, 2006: - Version 3.3 [HISTORY]
|
|
24 |
# Jul 24, 2006: - BUG: $$ did not work as advertised
|
|
25 |
# Dec 11, 2006: - BUG: 1e-3 was not recognized as a valid number in
|
|
26 |
# abbreviations
|
|
27 |
# Dec 1, 2007: - improved to allow -S%PARAM:... (mainly for %RECNO)
|
|
28 |
# Jan 20, 2007: - pointless debugging (BUGs in [fnr] [list])
|
|
29 |
# Mar 26, 2008: - BUG: . were not allowed in field names
|
|
30 |
# Mar 27, 2008: - added &antsCompileConstExpr()
|
|
31 |
# Mar 28, 2008: - made compile funs bomb on undefined %PARAMs
|
|
32 |
# Aug 27, 2008: - generate error on list(1)-specific address expressions
|
|
33 |
# Oct 12, 2008: - BUG: -S%RECNO%%6==1 did not work because %-escape magic
|
|
34 |
# word continued RECNO word to form undefined PARAM
|
|
35 |
# name. Solution: begin/end escape magic words for %
|
|
36 |
# and $ with a space (nonword character)
|
|
37 |
# Oct 5, 2009: - improved documentation
|
|
38 |
# - added $antsEditExprUsesFields flag
|
|
39 |
# Dec 10, 2009: - BUG: debug output had been wrong for ConstExprs
|
|
40 |
# - modified semantics to allow for : in param names
|
|
41 |
# May 21, 2011: - added support for $antsFnrNegativeOk
|
|
42 |
# May 22, 2011: - made it work
|
|
43 |
# Feb 20, 2012: - BUG: quoting had not been implemented
|
|
44 |
# Mar 10, 2012: - added ${field..field} syntax to edit exprs
|
20
|
45 |
# May 15, 2015: - BUG: -S did not work with :: %PARAMs
|
30
|
46 |
# Mar 9, 2017: - removed perl 5.22 warning about re (non-quoted braces)
|
0
|
47 |
|
|
48 |
$DEBUG = 0;
|
|
49 |
|
|
50 |
#----------------------------------------------------------------------
|
|
51 |
# Address Expressions
|
|
52 |
# - return value indicates whether current record matches
|
|
53 |
# - any valid PERL expression can be an addr expr
|
|
54 |
# - $id are assumed to be fields (use $$id for perl vars)
|
|
55 |
# - %id are assumed to be PARAMs (use %% to get %)
|
|
56 |
# - ABBREVIATIONS:
|
|
57 |
# - id1 relop id2 becomes numberp(id1) && numberp(id2) && $id1 relop $id2
|
|
58 |
# - id1 relop id2 relop id3 is analogous
|
|
59 |
# - id? can only be restricted field name ([\w\.] chars and, possibly, leading %)
|
|
60 |
# - non-perl relops ~=, <> become !=
|
|
61 |
#----------------------------------------------------------------------
|
|
62 |
|
|
63 |
sub antsCompileAddrExpr($) # subst fields/%PARAMs
|
|
64 |
{
|
|
65 |
my($expr,$bufVar) = @_;
|
|
66 |
$bufVar = '$ants_[0]' unless (length($bufVar) > 0);
|
|
67 |
|
|
68 |
#---------------------
|
|
69 |
# handle abbreviations
|
|
70 |
#---------------------
|
|
71 |
print(STDERR "IN AddrExpr = $expr\n") if ($DEBUG);
|
|
72 |
|
|
73 |
goto QUOTED_ADDR_EXPR
|
|
74 |
if ((substr($expr,0,1) eq "'" || substr($expr,0,1) eq '"') &&
|
|
75 |
(substr($expr,0,1) eq substr($expr,-1)));
|
|
76 |
|
|
77 |
# NB: update following code if -S extensions in [list] change
|
|
78 |
croak("$0: unsupported list(1)-specific address expression <$expr>\n")
|
|
79 |
if ($expr =~ /^\$?([\w\.]+)\s*~(([nN][aA][nN])|(([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?))/ ||
|
|
80 |
$expr =~ /^\$?([\w\.]+)\s*<$/ ||
|
|
81 |
$expr =~ /^<\$?([\w\.]+)$/ ||
|
|
82 |
$expr =~ /^\$?([\w\.]+)\s*>$/ ||
|
|
83 |
$expr =~ /^>\$?([\w\.]+)$/);
|
|
84 |
|
20
|
85 |
$expr =~ s/::/QquOte/g; # new-style :: %PARAMs
|
|
86 |
if ($expr =~ /^(%?[\w\.]+):/ || $expr =~ /^(\$\d+):/) { # old -G syntax
|
0
|
87 |
my($fname) = $1; my($range) = $';
|
20
|
88 |
$fname =~ s/QquOte/::/g;
|
0
|
89 |
if ($range =~ /(.*)\.\.(.*)/) {
|
|
90 |
my($min) = ($1 eq '*') ? -1e99 : $1;
|
|
91 |
my($max) = ($2 eq '*') ? 1e99 : $2;
|
|
92 |
croak("$0: illegal addr-expr $expr\n")
|
|
93 |
unless ((numberp($min) || $min =~ /^%/) &&
|
|
94 |
(numberp($max) || $max =~ /^%/));
|
|
95 |
$expr = "$min<=$fname<=$max";
|
|
96 |
} else {
|
|
97 |
if ($range eq '*') {
|
|
98 |
$expr = "numberp(\$$fname)";
|
|
99 |
} else {
|
|
100 |
my(@vl) = split(/,/,$range);
|
|
101 |
$vl[0] = str2num($vl[0]);
|
|
102 |
if (numberp($vl[0]) || $vl[0] =~ /^%/) {
|
|
103 |
$expr = "\$$fname==$vl[0]";
|
|
104 |
} else {
|
|
105 |
$expr = "\$$fname=~/$vl[0]/";
|
|
106 |
}
|
|
107 |
for (my($vi)=1; $vi<=$#vl; $vi++) {
|
|
108 |
$vl[$vi] = str2num($vl[$vi]);
|
|
109 |
if (numberp($vl[$vi]) || $vl[$vi] =~ /^%/) {
|
|
110 |
$expr .= "||\$$fname==$vl[$vi]";
|
|
111 |
} else {
|
|
112 |
$expr .= "||\$$fname=~/$vl[$vi]/";
|
|
113 |
}
|
|
114 |
}
|
|
115 |
}
|
|
116 |
}
|
|
117 |
print(STDERR "-G AddrExpr = $expr\n") if ($DEBUG);
|
|
118 |
}
|
20
|
119 |
$expr =~ s/QquOte/::/g;
|
|
120 |
|
0
|
121 |
my($relop) = '<|<=|>|>=|!=|~=|<>|=='; # relational ops
|
|
122 |
my($comparee) = '-?%?\$?[\w\.\+\-]+'; # nums, fields, PARAMs
|
|
123 |
my($numvar) = '^[\w\.]+$'; # fields
|
|
124 |
|
|
125 |
if ($expr =~ /^($comparee)\s*($relop)\s*($comparee)$/) {
|
|
126 |
my($c1) = $1; my($c2) = $3; my($ro) = $2;
|
|
127 |
$c1 =~ s/^0*(\d)/\1/; $c2 =~ s/^0*(\d)/\1/; # de-octalize
|
|
128 |
$ro = '!=' if ($ro eq '<>' || $ro eq '~=');
|
|
129 |
$expr = '';
|
|
130 |
if (!numberp($c1) && $c1 =~ /$numvar/) {
|
|
131 |
$c1 = "\$$c1";
|
|
132 |
$expr .= "numberp($c1) && ";
|
|
133 |
}
|
|
134 |
if (!numberp($c2) && $c2 =~ /$numvar/) {
|
|
135 |
$c2 = "\$$c2";
|
|
136 |
$expr .= "numberp($c2) && ";
|
|
137 |
}
|
|
138 |
$expr .= "($c1 $ro $c2)";
|
|
139 |
}
|
|
140 |
|
|
141 |
elsif ($expr =~ /^($comparee)\s*($relop)\s*($comparee)\s*($relop)\s*($comparee)$/) {
|
|
142 |
my($c1) = $1; my($c2) = $3; my($c3) = $5; my($ro1) = $2; my($ro2) = $4;
|
|
143 |
$c1 =~ s/^0*(\d)/\1/; $c2 =~ s/^0*(\d)/\1/; $c3 =~ s/^0*(\d)/\1/;
|
|
144 |
$ro1 = '!=' if ($ro1 eq '<>' || $ro1 eq '~=');
|
|
145 |
$ro2 = '!=' if ($ro2 eq '<>' || $ro2 eq '~=');
|
|
146 |
$expr = '';
|
|
147 |
if (!numberp($c1) && $c1 =~ /$numvar/) {
|
|
148 |
$c1 = "\$$c1";
|
|
149 |
$expr .= "numberp($c1) && ";
|
|
150 |
}
|
|
151 |
if (!numberp($c2) && $c2 =~ /$numvar/) {
|
|
152 |
$c2 = "\$$c2";
|
|
153 |
$expr .= "numberp($c2) && ";
|
|
154 |
}
|
|
155 |
if (!numberp($c3) && $c3 =~ /$numvar/) {
|
|
156 |
$c3 = "\$$c3";
|
|
157 |
$expr .= "numberp($c3) && ";
|
|
158 |
}
|
|
159 |
$expr .= "($c1 $ro1 $c2) && ($c2 $ro2 $c3)";
|
|
160 |
}
|
|
161 |
|
|
162 |
#-----------------------------------
|
|
163 |
# substitute ANTS fields and %PARAMs
|
|
164 |
#-----------------------------------
|
|
165 |
print(STDERR "MID AddrExpr = $expr\n") if ($DEBUG);
|
|
166 |
$expr =~ s{\$%}{%}g; # allow for $%param
|
|
167 |
$expr =~ s{\$\$}{ AnTsDoLlAr }g; # escape
|
30
|
168 |
while ($expr =~ /\$\{([^}]*)\}/) { # ${field}
|
0
|
169 |
my($fnr) = cardinalp($1) ? $1-1 : fnr($1);
|
|
170 |
croak("$0: unknown field $1\n") unless ($fnr >= 0);
|
|
171 |
$expr =~ s(\${$1})(AnTsDtArEf\[$fnr\]);
|
|
172 |
}
|
|
173 |
while ($expr =~ /\$([\w\.]+)/) { # $field
|
|
174 |
my($fnr) = cardinalp($1) ? $1-1 : fnr($1);
|
|
175 |
croak("$0: unknown field $1\n") unless ($fnr >= 0);
|
|
176 |
$expr =~ s{\$$1}{AnTsDtArEf\[$fnr\]};
|
|
177 |
}
|
|
178 |
while ($expr =~ /\$\+([\w\.]+)/) { # $+field
|
|
179 |
my($fnr) = cardinalp($1) ? $1-1 : fnr($1);
|
|
180 |
croak("$0: unknown field $1\n") unless ($fnr >= 0);
|
|
181 |
$expr =~ s{\$\+$1}{(AnTsDtArEf\[$fnr\]-AnTsDtArEf0\[$fnr\])};
|
|
182 |
}
|
|
183 |
$expr =~ s{%%}{ AnTsPeRcEnT }g; # escape
|
20
|
184 |
while ($expr =~ /%([\w\.:]+)/) { # %PARAMs
|
0
|
185 |
my($p) = $1;
|
|
186 |
croak("$0: Undefined PARAM %$p\n")
|
|
187 |
unless defined($P{$p});
|
|
188 |
$expr =~ s{%$p}{\$P\{'$p'\}};
|
|
189 |
}
|
|
190 |
$expr =~ s{AnTsDtArEf}{$bufVar}g;
|
|
191 |
$expr =~ s{ AnTsPeRcEnT} {%}g;
|
|
192 |
$expr =~ s{ AnTsDoLlAr }{\$}g;
|
|
193 |
|
|
194 |
#--------------------
|
|
195 |
# compile and return
|
|
196 |
#--------------------
|
|
197 |
QUOTED_ADDR_EXPR:
|
|
198 |
print(STDERR "OUT AddrExpr = $expr\n") if ($DEBUG);
|
|
199 |
my($subR) = eval("sub { return $expr };");
|
|
200 |
print(STDERR "sub { return $expr };\n") if ($DEBUG);
|
|
201 |
croak("sub { return $expr }; => $@\n") if ($@);
|
|
202 |
return $subR;
|
|
203 |
}
|
|
204 |
|
|
205 |
#----------------------------------------------------------------------
|
|
206 |
# Edit Expressions
|
|
207 |
# - execute calculation based on and/or modify current record
|
|
208 |
# - any valid PERL expression can be an edit expr
|
|
209 |
# - $id are assumed to be fields (use $$id for perl vars)
|
|
210 |
# - %id are assumed to be PARAMs (use %% to get %)
|
|
211 |
# - ${field} are fields
|
|
212 |
# - ${field..field} are field ranges
|
|
213 |
#----------------------------------------------------------------------
|
|
214 |
|
|
215 |
$antsEditExprUsesFields; # flag
|
|
216 |
|
|
217 |
sub antsCompileEditExpr($) # subst fields/%PARAMs
|
|
218 |
{
|
|
219 |
my($expr,$bufVar) = @_;
|
|
220 |
$bufVar = '$ants_[0]' unless defined($bufVar);
|
|
221 |
$antsEditExprUsesFields = 0;
|
|
222 |
|
|
223 |
print(STDERR "IN EditExpr = $expr\n") if ($DEBUG);
|
|
224 |
goto QUOTED_EDIT_EXPR
|
|
225 |
if ((substr($expr,0,1) eq "'" || substr($expr,0,1) eq '"') &&
|
|
226 |
(substr($expr,0,1) eq substr($expr,-1)));
|
|
227 |
|
|
228 |
$expr =~ s{\$%}{%}g; # allow for $%param
|
|
229 |
$expr =~ s{\$\$}{AnTsDoLlAr}g; # escape
|
30
|
230 |
while ($expr =~ /\$\{([^}]*)\.\.([^}]*)\}/) { # ${field..field}
|
0
|
231 |
$antsEditExprUsesFields |= 1;
|
|
232 |
my($ffnr) = cardinalp($1) ? $1-1 : fnr($1);
|
|
233 |
croak("$0: unknown field $1\n") unless ($ffnr >= 0);
|
|
234 |
my($lfnr) = cardinalp($2) ? $2-1 : fnr($2);
|
|
235 |
croak("$0: unknown field $2\n") unless ($lfnr >= 0);
|
|
236 |
croak("$0: empty field range $1..$2\n")
|
|
237 |
unless ($lfnr >= $ffnr);
|
|
238 |
my($expanded) = '';
|
|
239 |
for (my($f)=$ffnr; $f<=$lfnr; $f++) {
|
|
240 |
$expanded .= "AnTsDtArEf[$f]";
|
|
241 |
$expanded .= "," unless ($f == $lfnr);
|
|
242 |
}
|
|
243 |
$expr =~ s(\${$1\.\.$2})($expanded);
|
|
244 |
}
|
30
|
245 |
while ($expr =~ /\$\{([^}]*)\}/) { # ${field}
|
0
|
246 |
$antsEditExprUsesFields |= 1;
|
|
247 |
my($fnr) = cardinalp($1) ? $1-1 : fnr($1);
|
|
248 |
croak("$0: unknown field $1\n") unless ($fnr >= 0);
|
|
249 |
$expr =~ s(\${$1})(AnTsDtArEf\[$fnr\]);
|
|
250 |
}
|
|
251 |
while ($expr =~ /\$(-?[\w\.]+)/) { # $field
|
|
252 |
$antsEditExprUsesFields |= 1;
|
|
253 |
my($fnr) = cardinalp($1) ? $1-1 : fnr($1);
|
|
254 |
if ($fnr < 0) { # should only happen on $antsFnrNegativeOk
|
|
255 |
$expr =~ s{\$$1}{AnTsDtArEf\[AnTsDtAlEn$fnr\]};
|
|
256 |
} else {
|
|
257 |
croak("$0: unknown field $1\n") unless ($fnr >= 0);
|
|
258 |
$expr =~ s{\$$1}{AnTsDtArEf\[$fnr\]};
|
|
259 |
}
|
|
260 |
}
|
|
261 |
while ($expr =~ /\$\+([\w\.]+)/) { # $+field
|
|
262 |
$antsEditExprUsesFields |= 1;
|
|
263 |
my($fnr) = cardinalp($1) ? $1-1 : fnr($1);
|
|
264 |
croak("$0: unknown field $1\n") unless ($fnr >= 0);
|
|
265 |
$expr =~ s{\$\+$1}{(AnTsDtArEf\[$fnr\]-AnTsDtArEf0\[$fnr\])};
|
|
266 |
}
|
|
267 |
$expr =~ s{%%}{AnTsPeRcEnT}g; # escape
|
|
268 |
while ($expr =~ /%([\w\.:]+)/) { # %PARAMs
|
|
269 |
my($p) = $1;
|
|
270 |
croak("$0: Undefined PARAM %$p\n")
|
|
271 |
unless defined($P{$p});
|
|
272 |
$expr =~ s{%$p}{\$P\{"$p"\}};
|
|
273 |
}
|
|
274 |
if ($bufVar =~ m{\]$}) {
|
|
275 |
my($adl) = '@{' . $bufVar . '}';
|
|
276 |
$expr =~ s{AnTsDtAlEn}{$adl}g;
|
|
277 |
} else {
|
|
278 |
my($adl) = '@' . substr($bufVar,1);
|
|
279 |
$expr =~ s{AnTsDtAlEn}{$adl}g;
|
|
280 |
}
|
|
281 |
$expr =~ s{AnTsDtArEf}{$bufVar}g;
|
|
282 |
$expr =~ s{AnTsDtArEf}{$bufVar}g;
|
|
283 |
$expr =~ s{AnTsPeRcEnT}{%}g;
|
|
284 |
$expr =~ s{AnTsDoLlAr}{\$}g;
|
|
285 |
|
|
286 |
QUOTED_EDIT_EXPR:
|
|
287 |
$expr = "return $expr";
|
|
288 |
|
|
289 |
print(STDERR "OUT EditExpr = $expr\n") if ($DEBUG);
|
|
290 |
my($subR) = eval("sub { $expr };");
|
|
291 |
croak("sub { $expr }; => $@\n") if ($@);
|
|
292 |
return $subR;
|
|
293 |
}
|
|
294 |
|
|
295 |
#----------------------------------------------------------------------
|
|
296 |
# Constant Expressions
|
|
297 |
# - carry out calculation based on const and %PARAMs only
|
|
298 |
# - same as edit expressions without field substitutions (%PARAMs ok, though)
|
|
299 |
# - $ must still be escaped ($$), although this is unlikely to be used ever
|
|
300 |
#----------------------------------------------------------------------
|
|
301 |
|
|
302 |
sub antsCompileConstExpr($) # subst fields/%PARAMs
|
|
303 |
{
|
|
304 |
my($expr) = @_;
|
|
305 |
|
|
306 |
print(STDERR "IN ConstExpr = $expr\n") if ($DEBUG);
|
|
307 |
|
|
308 |
unless ((substr($expr,0,1) eq "'" || substr($expr,0,1) eq '"') &&
|
|
309 |
(substr($expr,0,1) eq substr($expr,-1))) { # quoted string
|
|
310 |
$expr =~ s{\$%}{%}g; # allow for $%param
|
|
311 |
$expr =~ s{\$\$}{AnTsDoLlAr}g; # escape
|
|
312 |
$expr =~ s{%%}{AnTsPeRcEnT}g; # escape
|
|
313 |
while ($expr =~ /%([\w\.:]+)/) { # %PARAMs
|
|
314 |
my($p) = $1;
|
|
315 |
croak("$0: Undefined PARAM %$p\n")
|
|
316 |
unless defined($P{$p});
|
|
317 |
$expr =~ s{%$p}{\$P\{"$p"\}};
|
|
318 |
}
|
|
319 |
$expr =~ s{AnTsPeRcEnT}{%}g;
|
|
320 |
$expr =~ s{AnTsDoLlAr}{\$}g;
|
|
321 |
}
|
|
322 |
|
|
323 |
$expr = "return $expr";
|
|
324 |
|
|
325 |
print(STDERR "OUT ConstExpr = $expr\n") if ($DEBUG);
|
|
326 |
my($subR) = eval("sub { $expr };");
|
|
327 |
croak("sub { $expr }; => $@\n") if ($@);
|
|
328 |
return $subR;
|
|
329 |
}
|
|
330 |
|
|
331 |
1;
|