#!/usr/bin/perl ## pythonizer Version 0.55 (Sept 2, 2020) ## Fuzzy prettyprint STDERR for Perl scripts ## Copyright Nikolai Bezroukov, 2019-2020. ## Licensed under Perl Artistic license ## ## As most Perl statement are simple over 80% of them usually allow sucessful translation. That's why we use the term "fuzzy" translation. ## The result will contain some statements that need to be converted by hand or corrected. In some cases that requres change of logic. ## Best works for Perl 4 subset of Perl 5 which typically is used in sysadmin scripts. ## Perl scripts that extensivly use references or OO requre more extensive manual effort ## ## --- INVOCATION: ## ## pythonizer [options] [file_to_process] ## ##--- OPTIONS: ## -p -- version of Python for generation, Default 3, if set to 2 generation is into Python 2.7 ## -v -- verbosity 0 -minimal (only serious messages) 3 max verbosity (warning, errors and serious); default -v 3 ## -h -- this help ## -t -- size of tab ingenerated Python code (emulated with spaces). Default is 4 ## -d level of debugging default is 0 -- production mode ## 0 -- Production mode ## 1 -- Testing mode. Program is autosaved in Archive (primitive versioning mechanism) ## 2 -- Stop at the beginning of statement analysys (the statement can be selected via breakpoint option -b ) ## 3 -- More debugging output. ## 4 -- Stop at lexical scanner with $DB::single = 1; ## 5 -- output stages of Python line generation ##--- PARAMETERS: ## ## 1st -- name of file (only one argument accepted) #--- Development History # # Ver Date Who Modification # ===== ========== ======== ============================================================== # 0.010 2019/10/09 BEZROUN Initial implementation # 0.020 2019/10/10 BEZROUN Revised structure of global arrays, Now we have four parallel arrays: TokenStr, ValClass ValPerl, ValPy # 0.030 2019/10/11 BEZROUN Recursion is used to expressions, but in certain cases when I need a look-ahead, bracket counting is used instead # 0.040 2019/10/12 BEZROUN Better listing for debugging implemented # 0.050 2019/11/06 BEZROUN Forgot almost everything after a month; revised code just to refreash memory. Tokenizer slightly improved # 0.051 2019/11/07 BEZROUN Assignment within logical expression is not allowed in Python 2.7. It is now translated correctly # 0.060 2019/11/08 BEZROUN post assignment conditions like "next if( substr($line,0,1) eq '') " are processed correctly # 0.070 2019/11/11 BEZROUN x=(v>0) ? y :z is now translated into ugly Python ternary operator which exists since Python 2.5 # 0.071 2019/11/11 BEZROUN program now correctly translated 80% codelines of pre_pythonizer.pl # 0.080 2019/12/27 BEZROUN Array ValCom is introduced for the preparation of version 0.2 of pre-processor pre_pythonizer.pl # 0.090 2020/02/03 BEZROUN #\ means continuation of the statement. # 0.091 2020/02/03 BEZROUN Moved sub preprocess_line to Pythonizer # 0.100 2020/03/16 BEZROUN Reworked scanner # 0.200 2020/08/05 BEZROUN Abandoned hope to make it perfect. # 0.210 2020/08/07 BEZROUN Moved gen_output to Perlscan, removed ValCom from the exported list. # 0.220 2020/08/07 BEZROUN Diamond operator is processed as a special type of identifier. # 0.230 2020/08/09 BEZROUN gen_chunk moves to Perlscan module. Pythoncode array made local # 0.230 2020/08/09 BEZROUN more functions and statements implemented # 0.240 2020/08/10 BEZROUN postfix conditional like return if(rc>0) re-implemented differently via scanner buffer # 0.250 2020/08/10 BEZROUN split function is reimplemented and optimized in case there is plain vanilla string and not-regex. # 0.251 2020/08/12 BEZROUN Perl_default_var is renames into default_var # 0.260 2020/08/14 BEZROUN System variables in double quoted literals are now complied correctly. Perlscan.pm improved. # 0.261 2020/08/14 BEZROUN for loop translation corrected # 0.270 2020/08/15 BEZROUN getopts is now implemented in Softpano.pm to allow the repetition of option letter to set the value of options ( -ddd) # 0.300 2020/08/17 BEZROUN Python 3.8 now is default for generaion. Option -p introduced. -p 2 changes target version of Python to 2.7 # 0.310 2020/08/18 BEZROUN f-strings are implemented for Python 3 mode instead of decompiling string into chunks # 0.320 2020/08/20 BEZROUN open statement and (condition) && ... statement translation corrected # 0.400 2020/08/22 BEZROUN __DATA__ and POD statements are now processed. File filename.data is created for data file. # 0.410 2020/08/24 BEZROUN pre_pythonizer now refactors Perl script pushing subroutines to the top and creating main sub. # 0.420 2020/08/25 BEZROUN print recognized in constructs like if($debug){ print 'something';}. # 0.430 2020/08/25 BEZROUN Variables from other namespaces recognized. # 0.440 2020/08/26 BEZROUN FailedTrans flag is replaced with TrStatus flag. Failure now is determined by the negative value of the TrStatus flag. # 0.450 2020/08/26 BEZROUN Option - r (refactor) added # 0.500 2020/08/31 BEZROUN Regular expression processing competly reworked based on changed in Perlscan # 0.510 2020/08/31 BEZROUN Special subroutine for putting regex in quote created in Perlscan.pm # 0.520 2020/08/31 BEZROUN Statement $line=~/abc/ this is not assignment statement; In no metacharaters it should be treated as string search. # 0.530 2020/08/31 BEZROUN Handling of __DATA and __END__ improved. Now they are not discarded but instead the separate file with extention.data is created. # 0.540 2020/09/01 BEZROUN Translation of function substr improved by recognizing several special cases. # 0.550 2020/09/01 BEZROUN Matching of groups corrected. # 0.560 2020/09/02 BEZROUN Translation of for and while improved. # 0.570 2020/09/03 BEZROUN Translation of ++ and -- implemented # 0.580 2020/09/03 BEZROUN Translation of function sprintf implemented # 0.600 2020/09/08 BEZROUN List on internal functions created. Translation of backquotes and open improved. # 0.700 2020/09/17 BEZROUN Basic global varibles detection added. Global statement generated for each local subroutine # 0.800 2020/10/02 BEZROUN More correct translation of array assignments. Globals initialiazed after main sub. Installer added # 0.810 2020/10/05 BEZROUN Pre-pythonizer by default does not create main subroutine # 0.820 2020/10/06 BEZROUN Function parsing rewritten to accomodate some "bracketless" cases which now became a norm in Perl # 0.830 2020/10/08 BEZROUN Implementation of postfix conditional is completly rewritten and now uses token buffering # 0.840 2020/10/09 BEZROUN state varibles now are prefixed with the name of sub to avoid conflict with globals # 0.850 2020/10/12 BEZROUN print translation improved; many fixes in lex analyser # 0.860 2020/10/14 BEZROUN Python 2.7 mode eliminated to simplify the code. Option -p removed. "since" test passed # 0.870 2020/10/21 BEZROUN Treatment of brackets systematised. The code of subroutine expression revised. #!start =============================================================================================================================== use v5.10.1; use warnings; use strict 'subs'; use feature 'state'; # # Modules used ( from the current directory to make debugging more convenient; will change later) # use Softpano qw(autocommit abend banner logme summary out); use Perlscan ('gen_statement', 'tokenize', 'gen_chunk', 'append', 'replace', 'insert', 'destroy', 'autoincrement_fix', '@ValClass', '@ValPerl', '@ValPy', '@ValCom', '@ValType', '$TokenStr'); use Pythonizer qw(correct_nest getline prolog output_line @LocalSub %GlobalVar); $VERSION='0.85'; $SCRIPT_NAME='pythonizer'; # # options # $breakpoint=9999; # line from which to debug code. See Pythonizer user guide $debug=0; # 0 -- production mode # 1 -- testing mode # 2 -- first pass debugging # 3 -- provides tracing during the second pass (useful for users for trableshooing infinite loops) # 4 -- stop at Perlscan.pm # 5 -- stop at particular error message. $HOME=$ENV{'HOME'}; # the directory used for autobackup (only if debug>0) if( $^O eq 'cygwin' ){ # $^O is built-in Perl Variable that contains OS name $HOME="/cygdrive/f/_Scripts"; # CygWin development mode -- the directory used for backups } # # Local dictionaries # %PyOpen=('<'=>'r', '>'=>'w', '>>'=>'a', '+<'=>'+'); $LOG_DIR='/tmp/'.ucfirst($SCRIPT_NAME); banner($LOG_DIR,$SCRIPT_NAME,"Fuzzy translator of Python to Perl. Version $VERSION",30); # Opens SYSLOG and print STDERRs banner; parameter 4 is log retention period prolog(); # sets all options, including breakpoint if( $debug > 0 ){ autocommit("$HOME/Archive",$ENV{'PERL5LIB'},qw(Softpano.pm Perlscan.pm Pythonizer.pm)); } # # Skip initial block of comments # $TrStatus=0; chomp($line=<>); # we need to discard the first line with /usr/bin/perl as interpreter output_line('','#!/usr/bin/python3 -u'); # put a proper line if ($line =~ /^\s*#!/){ $line=getline(); # skip previous interpreter definition and get the first meaningful line + initial block of comments, if present }else{ getline($line); # put the first line in the readline buffer $line=getline(); # rescan it to have full proper processing } foreach $l ('import sys,os,re','import fileinput,subprocess,inspect'){ output_line('',$l); # to block reproducing the first source line } #while($l=){ # chomp $l; # output_line($l,'',''); # to block reproducing the first source line; added Sept 3, 2020 just for the future #} #close DATA; # #Main loop # @Perlscan::BufferValClass=@Perlscan::BufferValCom=@Perlscan::BufferValPerl=@Perlscan::BufferValPy=(); # cleaning after the first pass my ($start,$token_buffer_active); $CurSub='main'; # default value of the current subroutine $token_buffer_active=0; $we_are_in_sub_body=0; while( defined($line) || scalar(@Perlscan::BufferValClass)>0 ){ $TrStatus=0; if( scalar(@ValClass)==0 || ! defined($ValClass[0]) ){ $line=getline(); # skip lines with no tokens like ';' next; } # # You need to claw back tokens from buffer for postfix conditionals. This is a pretty brittle and complex code -- Oct 8,2020 NNB # if ( scalar(@Perlscan::BufferValClass)==0 ) { if( $debug>1 ){ say STDERR "\n\n === Line $. Perl source:".(defined($line)?$line:$ValPerl[0])."===\n"; if( $.>=$breakpoint ){ logme('S', "Breakpoint was triggered at line $. in pythonizer.pl"); # $breakpoint=999999; $DB::single = 1; } } tokenize($line); # I just like to see tokenize call first in debugger :-) }else{ if($token_buffer_active==0){ @ValClass=@ValPerl=('{'); $token_buffer_active=1; }elsif($token_buffer_active==1){ @ValClass=@Perlscan::BufferValClass; $TokenStr=join('',@ValClass); @ValCom=@Perlscan::BufferValCom; @ValPerl=@Perlscan::BufferValPerl; @ValPy=@Perlscan::BufferValPy; $token_buffer_active=2; }else{ @ValClass=@ValPerl=('}'); @Perlscan::BufferValClass=@Perlscan::BufferValCom=@Perlscan::BufferValPerl=@Perlscan::BufferValPy=(); $token_buffer_active=0; } } # # Dealing with problem of state varaible mapping into Python via renaming # rename_state_var(0,$#ValPy); if( index($TokenStr,'s^')>-1){ # Selected cases of postfix and prefix operator can be translated; other not autoincrement_fix() # exported function located in Perlscan } # # Statements # $RecursionLevel=0; if( $ValClass[0] eq '}' ){ # we treat curvy bracket as a separate dummy statement correct_nest(-1); # next line will be de-indented if( $we_are_in_sub_body && $Pythonizer::NextNest ==0 ){ correct_nest(0,0); initialize_globals_for_state_vars(); %new_state_var_name=(); # hash for own and state variables %new_state_var_init=(); $CurSub='main'; } }elsif( $ValClass[0] eq '{' ){ correct_nest(1); # next line will be indented }elsif( $ValClass[0] eq '(' ){ $close_br_pos=matching_br(0); if( $close_br_pos && $ValClass[$close_br_pos+1] eq '=' ){ $TrStatus=assignment(0); }else{ $TrStatus=-255; } }elsif( $ValPy[0] eq 'NoTrans!' ){ output_line('','#SKIPPED: '.$line); $line=getline(); next; }elsif( $ValPerl[0] eq 'sub' ){ $we_are_in_sub_body=1; %new_state_var_name=(); # hash for own and state variables %new_state_var_init=(); # Perl has two types of sub statements -- prototype and actual $CurSub=$ValPy[1]; correct_nest(0,0); gen_chunk('def',$CurSub,'(perl_arg_array):'); # def name ([list of arguments]) $LocalSub{$CurSub}=1; if (exists($GlobalVar{$CurSub}) ){ gen_statement(); correct_nest(1,1); output_line($GlobalVar{$CurSub}); correct_nest(0,0); } }elsif( $ValPerl[0] eq 'BEGIN' ){ correct_nest(0,0); gen_chunk($ValPy[0],'perl_begin:'); # def }elsif( $ValPerl[0] eq 'close' ){ for( my $i=1; $i<@ValPy; $i++ ){ if( $ValClass[$i] eq 'i' ){ gen_chunk($ValPy[$i].'.f.close;'); } } }elsif( $ValPerl[0] =~ /say|print/ ){ $TrStatus=print3(0); }elsif( $ValPerl[0] =~ /warn/ ){ $TrStatus=print3(0,'STDERR'); # in Python3 this is a function }elsif( $ValClass[0] eq 's' ){ if( ($TokenStr=~tr/=//) > 1 && $ValClass[-1] eq 'd' && ($ValPerl[-2] eq '+=' || $ValPerl[-2] eq '-=') ){ # multiple assignment with the last increment like $K=$i+=1 need to be expanded due to bug in Python parser $ValPy[-1]=$ValPy[-3].substr($ValPerl[-2],0,1).$ValPy[-1]; $ValPy[-2]=$ValPerl[-2]='='; } $TrStatus=assignment(0); }elsif( $ValClass[0] eq 't' ){ if( scalar(@ValClass)==2 ){ #uninitalise single var declaration like my $line if( $ValPerl[0] eq 'my' ){ output_line("$ValPy[1]=None"); }elsif( $ValPerl[0] eq 'own' ){ gen_chunk( $ValPy[0], $ValPy[1] ); }elsif( $ValPerl[0] eq 'state' ){ $new_name=$CurSub.'_'.$ValPy[1]; $new_state_var_name{$ValPy[1]}=$new_name; gen_chunk($ValPy[0],$new_name); } finish(); next; }elsif( scalar(@ValClass)==4 && $ValClass[2] eq '=' ){ if( $ValPerl[0] eq 'my' ){ output_line("$ValPy[1]=$ValPy[-1]"); }elsif( $ValPerl[0] eq 'own' ){ gen_chunk( $ValPy[0], $ValPy[1].'=',$ValPy[-1] ); }elsif( $ValPerl[0] eq 'state' ){ $new_name=$CurSub.'_'.$ValPy[1]; $new_state_var_name{$ValPy[1]}=$new_name; gen_chunk($ValPy[0],$new_name); $new_state_var_init{$ValPy[1]}=$ValPy[-1]; } finish(); next; }elsif($ValClass[1] eq '('){ #this is a more complex case my $last=matching_br(1); if($#ValClass>$last && $ValClass[$last+1] eq '='){ if($ValPerl[0] eq 'state' ){ rename_state_var(2,$last-1); } $TrStatus=assignment(1); }else{ for($i=2; $i<$last;$i++){ if ($ValPy[$i] eq ','){ gen_chunk('='); }elsif($ValPerl[0] eq 'state'){ $new_name=$CurSub.'_'.$ValPy[$i]; $new_state_var_name{$ValPy[$i]}=$new_name; gen_chunk($new_name); }else{ gen_chunk($ValPy[$i]); } } gen_chunk('=None'); } }elsif( $ValClass[2] eq '=' ){ $TrStatus=assignment(1); }else{ $TrStatus=-255; } }elsif( $ValClass[0] eq 'h' ){ # hash to has need method copy # if( $ValClass[1] eq '=' ){ if( $ValPerl[2] eq '(' ){ # Special case hash initialization needs to be converted to dictionary initialization gen_chunk($ValPy[0].'={'); for( my $i=3; $i<$#ValPy; $i++ ){ gen_chunk( $ValPy[$i] ); } gen_chunk('}'); finish(); next; }elsif( scalar(@ValClass)==2 && ($ValPerl[2] eq 'h' || $ValPerl[2] eq 'q') ){ gen_chunk("$ValPy[0]=$ValPy[2].copy"); # copy structure not reference finish(); next; } } $TrStatus=assignment(0); }elsif($ValClass[0] eq 'a'){ if( $ValClass[1] eq '=' ){ if( $#ValClass==2 && $ValClass[2] eq 'a'){ # Special case array to array copy gen_chunk("$ValPy[0]=$ValPy[2].copy"); finish(); next; }elsif( $ValPerl[2] eq '(' ){ # array initialization gen_chunk($ValPy[0],'=['); $end_pos=matching_br(2); for (my $i=3; $i<$end_pos; $i++){ gen_chunk($ValPy[$i]); } gen_chunk(']'); finish(); next; }elsif( $ValPerl[2] =~ /<\w*>/ ){ # Special case of array initialization via slurping gen_chunk("$ValPy[0]=$ValPy[2].copy"); finish(); next; }elsif( $ValClass[2] eq 'a' && $ValPerl[3] eq '=' ){ my $last_eq=rindex($TokenStr,'='); if( $ValPerl[$last_eq+1] eq '(' ){ # list assignment @x=(1,2,3); $ValPy[$last_eq+1]='['; if( $ValPerl[-1] eq ')' ){ $ValPy[-1]=']'; }else{ $TrStatus=-255; finish(); next; } } for( $i=0; $i<$last_eq; $i+=2 ){ # cascade assignent processing @x=@y=@z if ($ValClass[$i+1] ne '='){ logme('S',"Token $ValPerl[$i+1] was found insted of '=' in what is expected to be array assignment"); $TrStatus=-255; last; } if( $last_eq+1==@ValClass ){ gen_statement("$ValPy[$i]=$ValPy[-1].copy"); # last array is the source }elsif( $ValPerl[$last_eq+1] eq '(' ){ #left side is the list $a=@b=(1,2,3) gen_chunk("$ValPy[$i]="); expression($last_eq+1,$#ValClass); # processing (1,2,3) -- you need brackets here. Recursion level should be 0 gen_statement(); } } finish(); next; } } $TrStatus=assignment(0); }elsif( $ValClass[0] eq 'c' ){ #normal control statement: if/while/for, etc -- next line is always nested. # in foreach loop "(" is absent ) do in perl you can's distibush between postfix for and foreach loop without parens if( defined($ValType[0]) && $ValType[0] eq 'P' && $ValClass[1] ne '(' && $ValPy[0] ne 'for' ){ insert(1,'(','(','('); append(')',')',')'); } if ($TokenStr=~/^c\(\!f\(?/ && $ValPerl[3] eq 'open' ){ $TrStatus=open_fun(3,'c'); }else{ $TrStatus=control(0); # control now itself destroy the last ) Oct 14, 2020 --NNB } }elsif( $ValClass[0] eq 'C' ){ #next last continue if( $ValPerl[0] eq 'elsif' ){ gen_chunk('elif '); $end_pos=matching_br(1); $TrStatus=expression(2,$end_pos-1,0); gen_chunk(':'); gen_statement(); }elsif( $ValPerl[0] eq 'else' ){ gen_chunk('else:'); gen_statement(); } }elsif( $ValClass[0] eq 'f' ){ #this is a left hand function like is substr($line,0,1)='' or open or chomp; if( $ValPerl[0] eq 'substr' ){ $TrStatus=left_hand_substr(0); }elsif( $ValPerl[0] eq 'chomp' ){ if( $#ValPerl==0) { gen_chunk(q[default_var=default_var.rstrip("\n")]); # chomp with no argumnets }else{ function(0,$#ValClass); } }elsif( $ValPerl[0] eq 'chop' ){ if( $ValPerl[1] eq '(' ){ if( $ValClass[2] eq 's' ){ gen_chunk($ValPy[2].'='.$ValPy[2].'[0:-1]'); } else{ $TrStatus=-1; } }else{ gen_chunk('default_var=default_var[0:-1]'); } }elsif( $ValPerl[0] eq 'open' ){ $rc=open_fun(0,'s'); }else{ $TrStatus=function(0); } }elsif( $ValClass[0] eq 'x' ){ # this is backquotes gen_chunk(qq{default_var=subprocess.run($ValPy[0],capture_output=True,text=True,shell=True)}); gen_statement(); gen_chunk(qq[subprocess_rc=default_var.returncode]); }elsif( $ValClass[0] eq 'd' ){ if( length($TokenStr)==1 ){ logme('W','line starts with digit'); }else{ $TrStatus=-1; } }elsif( $ValClass[0] eq '(' ){ # (/abc/) && a=b; (a0 && $ValClass[1] eq '(' ){ $right_br=matching_br(1); if( $ValClass[2] eq ')' ){ # function with zero arguments if( $ValPy[0] eq 'main' && $Pythonizer::CurNest==0 ){ my $globals=substr($GlobalVar{$CurSub},length('global')); $globals=~tr/,/=/; gen_statement($globals.'=None'); } gen_chunk($ValPy[0].'([])'); }elsif( $ValClass[2] eq 'f' && ( $ValClass[3] ne '(' || ($ValClass[3] eq '(' && matching_br(3) == $right_br-1)) ){ # bracketless call of built-in function as a single argument: get_config(split / /,$line) # or bracketed call to built-in function call that return list. We do not need sqare brackets gen_chunk($ValPy[0],'('); function(2,$right_br-1); # we assume that evethying in brackets is the function call gen_chunk(')'); }else{ # In all other cases we will put sqare bracket, even if they are redundant: they can be manually deleted. gen_chunk($ValPy[0]); gen_chunk('(['); $TrStatus=expression(2,$#ValClass-1,-1); # this will scan till ')' and should eliminate ')' due to -1 as 3-d arg gen_chunk('])'); } }else{ $RecursionLevel=-1; $TrStatus=expression(0,$#ValClass); # this will scan till ')' } }else{ $TrStatus=-1; } finish(); } # while initialize_globals_for_state_vars(); # # Epilog -- close output file and if you are in debugging mode display the content on the screen # if (scalar(@NoTrans)>0) { say STDERR "\nATTENTION!\nThe following lines were probably translated incorrectly:\n"; say STDERR join("\n",@NoTrans); } $rc=summary(); # print diagnostic messages summary exit $rc; sub finish # imitation of continue statement. { if( defined($TrStatus) && $TrStatus < 0 ){ push(@NoTrans,"[$.]: $line"); } gen_statement(); $line=getline(); # get new line correct_nest(); } # finish sub rename_state_var { ($from,$to)=@_; for( $i=$from; $i<=$to; $i++ ){ if( defined($ValClass[$i]) && $ValClass[$i]=~/[sah]/ && exists($new_state_var_name{$ValPy[$i]}) ){ $ValPy[$i]=$CurSub.'_'.$ValPy[$i]; } } } sub initialize_globals_for_state_vars { my @renamed_state_var=values(%new_state_var_name); return unless( defined($renamed_state_var[0]) ); # nothing to do # First generate varibles for which we have inialization for($i=1;$i<@renamed_state_var;$i++){ if( exists($new_state_var_init{$renamed_state_var[$i]}) ){ gen_statement($renamed_state_var[$i].'='.$new_state_var_init{$renamed_state_var[$i]}); } } # Now initialize the rest to None my $first=0; for( $i=1; $i<@renamed_state_var; $i++){ unless( exists($new_state_var_init{$renamed_state_var[$i]}) ){ if( $first==0 ){ gen_chunk($renamed_state_var[$i]); $first++; }else{ gen_chunk('=',$renamed_state_var[$i]); } } } ($first) && gen_chunk('=None'); gen_statement; } # # Print statement for Python 3 # sub print3 { my $start=$_[0]; my ($k,$handle); # end="") instead of trailing comma in Python 2 gen_chunk($ValPy[$start],'('); if( $ValClass[$start+1] eq 'i' ){ $handle=$ValPy[$start+1]; $k=$start+2; }else{ $handle=''; $k=$start+1; } if( $#ValClass>$k ){ $TrStatus=expression($k,$#ValClass,0); return -1 if ($TrStatus<0); }else{ if(length($handle)>0){ gen_chunk("file=$handle)"); }else{ gen_chunk(')'); } return; } if ($ValPerl[$start] eq 'print' && $ValClass[-1] eq '"' ){ if( $Perlscan::PythonCode[-1]=~qr[\\n["']$] ){ substr($Perlscan::PythonCode[-1],-3,2)=''; # Perl print was actually say }else{ gen_chunk(',end=""'); } } if( $handle){ #printing to file handle gen_chunk(',file=',$handle); # Python 3.x: print('hello world', file=file_object) } #say gen_chunk(')'); return scalar(@ValClass); } # print3 sub assignment # # Analyse and generate code for Perl assignment statement # { my $start=$_[0]; # start of analysys of assignment statement if( $start<0 || $TrStatus<0 ){ return -255; } my $limit=(scalar(@_)>1) ? $_[1] : $#ValClass; # Nov 11, 2019 accept not only the index of the first token, but also index of the last. my ($k,$split,$post_processing,$comma_pos,$colon_pos,$from,$to); # # Assignment with post condition need to be transformed into regular control structure in Python # $k=$start; # # C-style ++ and -- # if( $ValClass[$#ValClass] eq '^' ){ if ($#ValClass-$start==1){ gen_chunk($ValPy[$k],$ValPy[$k+1]); return $#ValClass+1 }else{ replace($#ValClass,'=','=',substr($ValPy[-1],0,2)); append('d','1','1'); $limit+=1; } } # # We assume this is a regular assignment with "=". Let's analyse the left side. # if( ($split=index($TokenStr,'=',$k))>-1 ){ if( $split-$k==1 ){ # single token on the left side -- regular assignment; gen_chunk($ValPy[$k]); # simple scalar assignment -- varible of left side }elsif( $ValPerl[$k] eq '(' ){ # brackets on the right side -- we assume that this is the list on the left side gen_chunk('['); $k++; gen_chunk($ValPy[$k]); # first in the cascading assignement $k++; while($k<$split ){ # this was we skip delimiters if( substr($TokenStr,$k,1)=~/^[sha]/ ){ gen_chunk(','.$ValPy[$k]); } $k++; } gen_chunk(']'); $k++; }else{ # possibly array with complex subscripts or complex hash key expression $k=expression($k,$split-1,0); # on the left side it can be array index or something more complex return -255 if ($k<0); } gen_chunk($ValPy[$split]); # generate appropriate operation hidden under generic token '=' ( +=, -=, etc) if( $limit - $split == 1 ){ # only one token after '=' if ($ValClass[$limit] eq 'x' ) { gen_chunk(qq{subprocess.run($ValPy[$split+1],capture_output=True,text=True,shell=True)}); gen_statement(); gen_statement(qq{subprocess_rc=$ValPy[$split-1].returncode}); gen_chunk($ValPy[$k]); gen_chunk($ValPy[$split]); gen_chunk($ValPy[$k].'.stdout'); }else{ gen_chunk($ValPy[$limit]); # that includes diamond operator <> and Aug 10,2020 #$is_numeric{$ValPerl[$k]}='d'; # capture the type of variable. } return($#ValClass); }else{ # we have some kind of expression on the right side if( (substr($TokenStr,$split,2) eq '=(')>-1 && (index($TokenStr,')?',$split))>-1 ){ # this is C-style conditional assigment x=(v>0):y:z; # Step one analyse the expression in blackets $to=matching_br($split+1); ($to<0) && return -255; # Fist we need to generate then part of ternary if expression $colon_pos=index($TokenStr,':',$to+2); if( $colon_pos>-1 ){ $k=expression($to+2,$colon_pos-1,0); return -255 if( $k<0 ); }else{ $k=expression($to+2,$#ValClass,0); return -255 if( $k<0 ); } gen_chunk(' if '); $k=$split+1; if( $to==$k+2){ $k++; gen_chunk($ValPy[$k]); # expression consist of one token $k+=3; # the next symbol after ')?' }else{ $k=expression($k+1,$to-1,0); # generate conditon without brackets return -255 if ($k<0); } if ($colon_pos>-1){ gen_chunk(' else '); $k=expression($colon_pos+1,$#ValPerl,0); # up to the very end return -255 if( $k<0 ); } gen_statement(); # output if line return $#ValClass; }else{ $k=expression($split+1,$limit,0); # process expression without brackets -- last param is 0 return -255 if( $k<0 ); } } }elsif( ($split=index($TokenStr,'~',$k))>-1) { $k=regex_and_translate($start,$k,$split,0); return $k+1; }else{ return -255; } return($#ValClass); } # assignment sub regex_and_translate # # process very tricky regex and tranlate function # { my($start,$k,$split)=@_; if( $ValClass[$split+1] eq 'f' && $ValPerl[$split+1] eq 'tr'){ # tr is a special case -- this is not regular expression if( $split-$k==1 ){ gen_chunk($ValPy[$split-1],'=',$ValPy[$split-1],'.translate(',$ValPy[$split-1],$ValPy[$split+1],')'); # a=a.trasnlate(a) }else{ $k=expression($start,$split-1,0); # can be array index or something more problemtic ;-) return -255 if( $k<0 ); gen_chunk('='); $k=expression($start,$split-1,0); # replicate the left part of the assignment gen_chunk('.translate('); $k=expression($start,$split-1,0); # replicate the left part of the assignment gen_chunk($ValPy[$split+1],')'); } # next token $k=$split+1; }elsif( ($split=index($TokenStr,'~',$k))>-1 ){ #regular expression $string =~ /cat/ or $string =~m/cat/ # re.search(r'cat', string): ... if($ValClass[$split+1] eq 'q') { # match only; There is no variable to assign results if( substr($ValPy[$split+1],0,1) eq '.' ){ $k=expression($start,$split-1,0); # generate left side of the regular expression return -255 if( $k<0 ); gen_chunk($ValPy[$split+1],')'); # add dot part generated by scanner }else{ gen_chunk($ValPy[$split+1]); $k=expression($start,$split-1,0); # generate left side of the regular expression, it can be array index or something even more problemtic ;-) return -255 if( $k<0 ); if (index($ValPy[$split+1],':=')>-1){ gen_chunk('))'); # close function bracket and expression }else{ gen_chunk(')'); # close function bracket and expression } } $k=$split+1; }elsif( $ValClass[$split+1] eq 'f' && $ValPerl[$split+1] eq 're' ){ # this is case of substirution if( $split-$k==1 ){ gen_chunk($ValPy[$split-1]); # a gen_chunk('='); # a= if( substr($ValPy[$split+1],0,1) eq '.' ){ gen_chunk($ValPy[$split-1].$ValPy[$split+1]); # a=a.find(string) }else{ gen_chunk("$ValPy[$split+1]$ValPy[$split-1])"); # a=re.sub(rexex,replacement,variable) } }else{ $k=expression($start,$split-1,0); # can be array index or something more problemtic ;-) return -255 if( $k<0 ); gen_chunk('='); if( substr($ValPy[$split+1],0,1) eq '.' ){ $k=expression($start,$split-1,0); # replicate the left part of the assignment return -255 if( $k<0 ); gen_chunk($ValPy[$split+1]); }else{ $k=expression($start,$split-1,0); # replicate the left part of the assignment return -255 if( $k<0 ); gen_chunk(')'); } } # next token $k=$split+1; }else{ return -255; } } return $k+1; } sub matching_br # Find matching bracket, arase closeing braket, if found. # Arg1 - starting position for scan # Arg2 - (optional) -- balance from whichto start (allows to skip opening brace) { my $scan_start=$_[0]; my $balance=(scalar(@_)>1) ? $_[1] : 0; # case where opening bracket is missing for some reason or was skipped. for( my $k=$scan_start; $k) }else{ gen_chunk($ValPy[$begin]); #while if ($ValClass[$start] eq '('){ $TrStatus=expression($start,$limit,1); # gen expression }else{ $TrStatus=expression($start,$limit,0); # gen expression } } gen_chunk(':'); return($#ValClass); }elsif( $ValPerl[$begin] eq 'for' && $ValPerl[$begin+1] eq '(' && $ValClass[$begin+2] !~ /[ahf]/ ){ # regular for loop but can be foreach loop too if( $ValPerl[-1] eq '++'){ $increment=''; }elsif( $ValPerl[-1] eq '--'){ $increment='-1'; }else{ logme('S', "In the current version more complex increment than ++ or -- requires manual translation"); $TrStatus=-1; return -255; } $start=$begin; gen_chunk($ValPy[$start]); if ($ValClass[$start+2] eq ';'){ gen_chunk($ValPy[$start+3],' in range(',$ValPy[$start+3]); $end_pos=$start+2; }else{ gen_chunk($ValPy[$start+2]); # index var gen_chunk('in range('); $start=index($TokenStr,'=',$start); # find initialization. BTW it can be expression if( $start == -1 ){$TrStatus=-1; return -255;} $start++; # find end of initialization $end_pos=next_same_level_token(';',$start,$limit); # end of expression if( $end_pos-$start==1 ){ gen_chunk($ValPy[$start]); }else{ $TrStatus=expression($start,$end_pos-1,0); # gen expression if( $TrStatus < -1 ){return -255;} } } gen_chunk(','); # # Analize loop exit condition # $start=index($TokenStr,'>',$end_pos); # fron last ; if( $start == -1 ){$TrStatus=-1; return -255; } $start++; # find end of loopexit condition $end_pos=next_same_level_token(';',$start,$limit); if( $end_pos == -1 ){$TrStatus=-1; return -255; } if( $end_pos-$start==1 ){ if($ValClass[$start] eq 'a'){ gen_chunk($ValPy[$start]); # array as limit of the range }else{ gen_chunk($ValPy[$start]); # all other cases of single limit of the range } }else{ $TrStatus=expression($start,$end_pos-1); # gen expression for the limit of the range return -255 if ($TrStatus<0); } # we already got increamnt at the begining if( $increment) { gen_chunk(",$increment):"); }else{ gen_chunk('):'); } return($#ValClass); }elsif( $ValPerl[$begin] eq 'for' || $ValPerl[$begin] eq 'foreach' ){ gen_chunk($ValPy[$begin]); if ($ValClass[$start] eq 's'){ gen_chunk($ValPy[$start].' in '); }else{ gen_chunk('default_var in '); } $start=index($TokenStr,'(',$start); if( substr($TokenStr,$start) eq '(a)') { # loop over an array gen_chunk($ValPy[$start+1]); }elsif( substr($TokenStr,$start)=~/^\(f\(?h\)/ ){ # foreach loop over a hash $start++; # skip '(' if( $ValPerl[$start] eq 'keys' || $ValPerl[$start] eq 'values' ){ $hashpos=index($TokenStr,'h',$start); gen_chunk("$ValPy[$hashpos]q$ValPy[$start]()"); # translate keys function into postfix notation }else{ $TrStatus=-1; return -255; } }elsif( substr($TokenStr,$start)=~/^\((['"qds](,['"qds])*)\)/ ){ # loop over explisit list $tempvar='['; for(my $i=$start+1;$i<$#ValClass;$i++){ $tempvar.=$ValPy[$i]; } gen_chunk($tempvar,']'); }else{ $TrStatus=-1; return -255; } gen_chunk(':'); return $#ValClass; }else{ $TrStatus=-1; return -255; } } # control sub next_same_level_token # get the next token on the same nesting level. { my $t=$_[0]; my $scan_start=$_[1]; my $scan_end=$_[2]; my $balance=0; for( my $k=$scan_start; $k<$scan_end; $k++ ){ $s=substr($TokenStr,$k,1); if( $s eq '(' ){ $balance++; }elsif( $s eq ')' ){ $balance--; } if( $s eq $t && $balance==0 ){ return $k; } } # for return -1; # not found } # next_comma sub function # Built-in functions processing { my $begin=$_[0]; my ($limit,$start ); # $limit is the position of closing bracket, if any # start is the position of the first symbol after the opening bracket $start=$begin+1; # default start for parentheiss less function; if( scalar(@_)>1 ){ $end_pos=$limit=$_[1]; } my $bracketed=0; # nessesary for the proper call of expression. if( $begin==$#ValClass || scalar(@_)>1 && $begin==$limit ){ $bracketed=-1; # serves as zero arg flag; }elsif( $begin<$#ValClass && $ValClass[$begin+1] eq '(' ){ $bracketed=1; $limit=matching_br($begin+1); $start=$begin+2; # function call with normal pathethis if( $ValClass[$limit] eq ')' ){ $end_pos=$limit-1; } }elsif( ($k=index($TokenStr,'0'))>=1 ){ $end_pos=$limit=$k-1; # if we have && that function should end before it }else{ $end_pos=$limit=$#ValClass; } my ($k, $split, $split2, $delta, $dict, $incr, $arg1, $arg2, $perl_name, $py_name); if( $begin<0 || $TrStatus<0 ){ $TrStatus=-255; return -255; } $perl_name=$ValPerl[$begin]; $py_name=$ValPy[$begin]; # # At this point # $start is the first token after prath (if presnet) # $end_pos is the sybol befor closing paren, if present # $limit -- the last symbol that belongs to this function, Scanning will start from $limit+1 if( $perl_name eq 'substr' ){ # substr($line,$from, To ) # 0 1 2 3 4 5 if( substr($TokenStr,$start,$end_pos-$start+1) =~ /^s,([-]?d),([-]?d)/ ){ # the simplest case when start and length of the substring are constants: substr($test,-1,1); $arg1=$ValPy[index($TokenStr,'d',$start)]; $arg2=$ValPy[$end_pos]; if( length($1) == 2 && substr($1,0,1) eq '-' ){ $arg1=-$arg1; } if( length($2) == 2 && substr($2,0,1) eq '-' ){ $arg2=-$arg2; } if( length($arg2)==1){ # positive gen_chunk("$ValPy[$start]\[$arg1\]"); # single symbol }elsif( $arg1<0 && -$arg1>=$arg2 ){ gen_chunk("$ValPy[$start]\[$arg1:\]"); # last symbol }elsif( $arg2>0 ){ $arg2+=$arg1; gen_chunk("$ValPy[$start]\[$arg1:$arg2\]"); # positive length added to the first index }elsif($arg2<0) { gen_chunk("$ValPy[$start]\[$arg1:$arg2\]"); # negative value is not length but the position from the last }else{ $TrStatus=-1; return -255; } return $limit+1; # $limit is the position of closing bracket, if any } # more complex case with varibles or expression in iether second or the third arguments #step1 -- generate varible from which substing is extracted (can be an expression) if( substr($TokenStr,$start+1,1) eq ',' ){ # Simplest case -- scalar varaible or constant is used gen_chunk($ValPy[$start]); # a simple first arg -- the name of the variable or a string $split=$start+1; }else{ $split=next_same_level_token(',',$start,$end_pos); if( $split==-1 ){ $TrStatus=-1; return -255 } $k=expression($start,$split-1,0); return -255 if ($k<0); } # # Processing of thesecond arg -- starting position. Can beconstant or expression # gen_chunk('['); # opening bracket if( substr($TokenStr,$split,3) =~ /,(\-?)d[),]/){ $arg1=( length($1)==1) ? -$ValPy[$split+1] : $ValPy[$split+1]; $arg1=$ValPy[$split+1]; } #determine if there are two or three argument $split2=next_same_level_token(',',$split+1,$end_pos); if( $split2>-1 ){ # substr($line,$start,$lenth) -- the third argumant is present $k=expression($split+1,$split2-1,0); # generate the secong arg return -255 if ($k<0); if( $end_pos-$split2==1 && $ValClass[$end_pos] eq 'd'){ #positive length $arg2=$ValPy[$end_pos]; if ( $ValPy[$split2+1] == 1 ){ gen_chunk(']'); # substr($line,$a,1) -- the third arg is 1 return $limit+1; } gen_chunk(':'); if( defined($arg1) ){ # rescn first arg and add the second if ($arg1==0){ gen_chunk($arg2); }else{ $arg2=$arg1+$arg2; gen_chunk($arg2); } }else{ $k=expression($split2+1,$end_pos,0); gen_chunk("+$arg2"); } gen_chunk("]"); }elsif( $end_pos-$split2==2 && substr($TokenStr,$split2,3) eq ',-d' ){ # third arg is negative constant. Can be used directly. gen_chunk(':'); $arg2=-$ValPy[$end_pos]; gen_chunk("$arg2]"); }else{ # general case of the third argument -- need to rescan the first and add the second gen_chunk(':'); if (defined($arg1)){ gen_chunk("$arg1+"); $k=expression($split2+1,$end_pos,0); gen_chunk(']'); }else{ $k=expression($split+1,$split2-1,0); # generate secong argument again return -255 if ($k<0); gen_chunk("+"); $k=expression($split2+1,$end_pos,0); return -255 if ($k<0); gen_chunk(']'); } } }else{ # substr($line,$start) $k=expression($split+1,$end_pos,0); return -255 if ($k<0); gen_chunk(':]'); } }elsif( $perl_name eq 'index' || $perl_name eq 'rindex' ){ # index(text,'search',from) # 0 1 2 3 4 # string.find(text, substr, start) if( $ValClass[$start+1] eq ',' ){ # Simplest case -- scalar varaible is used gen_chunk("$ValPy[$start]$py_name("); # line.find -- .find is now in scannet table Nov 15, 2019 --NNB $split=$start+2; }else{ $split=next_same_level_token(',',$start,$end_pos); # next comma on the same nesting level $k=expression($start+2,$split-1,0); return -255 if ($k<0); gen_chunk("$ValPy[$start]("); # .find and opening bracket } $split2=next_same_level_token(',',$split+1,$end_pos); # processing string to find if( $split2>-1 ){ # index($line,$string,$start) if( $split+2==$split2 ){ gen_chunk($ValPy[$split+1]); }else{ $k=expression($split+1,$split2-1,0); return -255 if ($k<0); } gen_chunk(','); $k=expression($split2+1,$end_pos,0); return -255 if ($k<0); gen_chunk(')'); }else{ # index($line,'xxx') -> line.find('xxx') $k=expression($split,$end_pos,0); return -255 if ($k<0); gen_chunk(')'); } }elsif( $perl_name eq 'join' ){ # $args=join(' ',@ARGS) => args=ARGS.join(' '); $split=next_same_level_token(',',$start,$end_pos); if($end_pos-$split==1 ){ # the second argument is not expression or function gen_chunk($ValPy[$end_pos],$py_name); # gen array.join(''); }else{ $TrStatus=expression($split+1,$end_pos); return $TrStatus if ($TrStatus<0); gen_chunk($py_name); # gen .join } if( $split-$start==1 ){ gen_chunk($ValPy[$start].')'); # gen delimiter }else{ $TrStatus=expression($start,$split-1); return $TrStatus if ($TrStatus<0); gen_chunk(')'); # close function invocation } }elsif( $perl_name eq 'open' ){ $TrStatus=open_fun($begin,'f'); # open like function, for example in if return -255 if( $TrStatus < 0 ); }elsif( $perl_name eq 'exists' ){ # applicable to the elemnt of hash only $k=$start; if( $ValClass[$k] eq 's') { $dict=$ValPy[$k]; $k+=2; if( $k+1<=$#ValPerl && $k+1==$end_pos ) { #single token between {} if( $ValClass[$k] eq 's' || $ValClass[$k] eq '"' || $ValClass[$k] eq "'"){ gen_chunk("$ValPy[$k] in $dict"); return $k+3; # you need to skip two closing brackets: }) } return -255; }else{ $k=expression($k-1,$limit,1); #preserve brackets return -255 if ($k<0); } }else{ return -255 } }elsif(substr($perl_name,0,1) eq '-') { #file predicate, always one agument gen_chunk($py_name.'('.$ValPy[$start].')'); }elsif( $perl_name eq 'split' ){ $k=$start; # pos of the first arg if ($bracketed==1 && $ValPerl[$k] eq ')'){ # special case of splitting default varible on white space gen_chunk("default_var.split(' ')"); return $end_pos+1; } $arg1=$ValPy[$k]; # first argument is present $arg1type=$ValClass[$k]; if($ValPerl[$k+1] ne ','){ # special case of splitting degaqult varible on white space if( $arg1 eq ' ' || $arg1type eq '"' ){ gen_chunk("default_var.split($arg1)"); }else{ gen_chunk($py_name,"($arg1,default_var"); # this is a regex function } return $end_pos+1; } $k+=2; # $k now points to the start of the second argument if( $k==$end_pos ){ $arg2=$ValPy[$k]; $k+=1; if( $arg1 eq ' ' || $arg1type eq '"' ){ gen_chunk("$arg2.lstrip($arg1"); }else{ gen_chunk($py_name,"($arg1,$arg2"); # this is a regex function } }elsif( ($split=next_same_level_token(',',$k+1,$end_pos))>-1 ){ #we have third argument to split gen_chunk($py_name,"($arg1,"); $k=expression($k,$split-1); gen_chunk(','); if( $end_pos-$split==1 ){ gen_chunk($ValPy[$k]); }else{ $k=expression($split+1,$end_pos,0); return -255 if($k<0); } } gen_chunk(')'); }elsif($perl_name eq 'print' ) { $TrStatus=print3($begin); # in Python3 this is a function if ($TrStatus<0) { return -255; } }elsif($perl_name eq 'defined' ) { # open used without parantethisi. always has one argument $k=$start; gen_chunk("$ValPy[$k] != none"); $k=($k+1<=$#ValPerl && $ValPerl[$k+1] eq ')') ? $k+2 : $k+1; }elsif($perl_name eq 'unshift' ){ # unshift ARRAY,LIST # arrayOrList.insert(0 , element) $k=$start; gen_chunk($ValPy[$k],$py_name,$ValPy[$k+2],')'); $k=($k+3<=$#ValPerl && $ValPerl[$k+3] eq ')') ? $k+4 : $k+3; }elsif($perl_name eq 'shift' ){ # assent only a single arg -- array; if no argument then it uses @_ array if( $bracketed==-1 ){ if( $CurSub eq 'main' || $Pythonizer::CurNest==0 ){ $arg1='sys.argv'; }else{ $arg1='perl_arg_array'; } gen_chunk("$arg1$py_name"); }elsif( $end_pos==$start ){ gen_chunk($ValPy[$start],$py_name); }else{ return -255; } }elsif( $perl_name eq 'push' ){ $k=$start; if( $ValClass[$k+2] =~ /[a"]/ ){ }elsif( $ValClass[$k+2] eq 'a' ){ gen_chunk($ValPy[$k],$py_name,$ValPy[$k+2],')'); # push(@x,@y) }elsif($ValClass[$k+2] eq '(' ){ gen_chunk($ValPy[$k],$py_name.'['); $k=expression($k+3,$end_pos,0); gen_chunk('])'); }else{ $k=expression($k+3,$end_pos,0); } }elsif($perl_name eq 'delete' ){ # open used without parantethisi. always has one argument $k=($ValPerl[$start+1] eq '(') ? $start+2 : $start+1; gen_chunk($ValPy[$k],$py_name,$ValPy[$k+2],')'); # delete($hash{$key}) => hash.pop($key) }elsif($perl_name eq 'sprintf' ){ #$ValPy[$start] -- format string gen_chunk($ValPy[$start],' % ( '); # format string $k=expression($start+2,$end_pos,0); # skip initial',' and scan all variables return -255 if $k<0; gen_chunk(')'); # }elsif($perl_name eq 'undef' ){ # undef in Perl accepts list of arguments if( $bracketed==-1){ gen_chunk('None'); }else{ gen_chunk("$ValPy[$start]"); for(my $i=$start+1; $i<$end_pos; $i+=2 ){ last if ($ValClass[$i] ne ','); # check if the list endeded gen_chunk("=$ValPy[$i+1]"); } gen_chunk("=None"); # final assignment } }elsif($perl_name eq 'chomp' ){ # undef in Perl accepts list of arguments if ($bracketed==-1){ gen_chunk($ValPy[1]."=default_var$py_name"); }elsif ($start==$end_pos){ gen_chunk($ValPy[$start].'='.$ValPy[$start].$py_name); }else{ for(my $i=$start+1; $i<$end_pos; $i+=2 ){ last if ($ValClass[$i] ne ','); # check if the list endeded gen_chunk($ValPy[$i],'=',$ValPy[$i],$ValPy[0],';'); } } }elsif($perl_name eq 'chop'){ gen_chunk("$ValPy[$start]=$ValPy[$start]".'[0,-1]'); }elsif( substr($py_name,0,1) eq '.' ){ #Generic Perl built-in function which is a method in Python $ValPy[$limit]='' if $bracketed==1; # in this case we do not need to process closing bracket. $TrStatus=expression($start,$end_pos,0); return -255 if ($TrStatus<0); gen_chunk($py_name); # add method }else{ gen_chunk($py_name); if ( $bracketed==-1 ){ # zero arguments -- special case gen_chunk('()'); }elsif( $end_pos==$start ){ # single argument gen_chunk('('.$ValPy[$start].')') }elsif( $bracketed==1 ){ $TrStatus=expression($start,$end_pos,1); return -255 if ($TrStatus<0); }else{ gen_chunk('('); $TrStatus=expression($start,$end_pos,0); return -255 if ($TrStatus<0); gen_chunk(')'); } } return $limit+1; # limit_1 always represnt the fist not scanned symbol for this function. } #function sub open_fun # # Process Perl open statement # assents two parameters # start -- Starting postion # mode -- 's' or 'f' statement or invocation in expression { my ($start,$mode)=@_; #my $limit=(scalar(@_)>1) ? $_[1] : balance_br($start+1); my($k,$myline, $target,$open_mode,$handle); # open (SYSFORM,'>',$output_file ) || abend(__LINE__,"Cannot open file $output_file for"); $k=($ValPerl[$start+1] eq '(') ? $start+2: $start+1; $handle=$ValPy[$k]; unless( $ValClass[$k] =~ /[is]/ ){ logme('W',"In open statement handle $ValPerl[$k] is not identifier or scalar variable. Translation might be incorrect"); } $k+=2 if( $ValPerl[$k+1] eq ','); if( $ValClass[$k] eq '"' && $ValClass[$k+1] eq ',' ){ # this is the second argument like in open(FILE,'>',$path); $open_mode=$ValPerl[$k]; # constant without quotes $k+=2; if( $ValClass[$k] eq '"' || $ValClass[$k] eq 's' ){ $target=$ValPy[$k]; } }elsif( $ValClass[$k] eq '"' ){ # ValPerl does not preserve quotes if( $ValPy[$k]=~/^(f?['"])([<->])+/ ){ $open_mode=$2; substr($ValPy[$k],length($1),1)=''; $target=$ValPy[$k]; }else{ # implicit filemode $open_mode='>'; $target=$ValPy[$k]; } }elsif( $ValClass[$k] eq 's' ){ # implicit filemode $open_mode='>'; $target=$ValPy[$k]; } if ($target eq '-' ) { $target='sys.argv[1]'; } if (exists($PyOpen{$open_mode}) ){ $open_mode=$PyOpen{$open_mode} }else{ $open_mode='?'; logme('E',"The mode '$open_mode' in open statement need to be manually translated to Python "); } if( $mode eq 'f' ){ logme('E',"In case of error open function in Python raises the FileNotFoundError exception. The code should be revised"); gen_chunk("($handle:=open($target,'$open_mode'))"); return $k+1 if ($ValClass[$k+1] eq ')'); return $k+1; }elsif( $mode eq 's' ){ $k+=2; output_line('try:'); correct_nest(1,1); output_line("$handle=open($target,'$open_mode')"); correct_nest(-1,-1); output_line('except OSError:'); correct_nest(1,1); output_line('sys.exit()'); correct_nest(-1,-1); }elsif( $mode eq 'c' ){ output_line('try:'); correct_nest(1,1); output_line("$handle=open($target,'$open_mode')"); # Open statement generation from collected info -- $handle, $target and $open_mode correct_nest(-1,-1); gen_chunk('except OSError:'); } #if ValPerl return $#ValClass; } # open_fun sub expression # # Anything in round brackets, including the list # Arg1 == (obligatory) starting point # Arg2 -- limit -- the last token to scan. # Arg3 -- mode of operation # -1 - Put '' in ValPy for external brackets # 0 - do not intefere with external blackets # 1 -preserve round brackets # Arg 4 -- if given set recursion level to 0 { my $begin=$_[0]; unless(defined($begin) ){ logme('S',"Internal error is expression call -- starting position is not defined while processlike $.: $line" ); return -255; } my ($bracketed,$cur_pos,$limit,$mode,$split,$start,$prev_k,$end_pos); if( $begin<0 || $TrStatus<0 ){ $TrStatus=-1; return -255; }elsif( $begin>$#ValClass ){ $cur_pos=$#ValClass; }else{ $cur_pos=$begin; } $start=$cur_pos; # starting point possibly from && or || -- we need that for regular expressions. $mode=(scalar(@_)>2) ? $_[2] : 0; # $mode allows brackets to be supressed but only on recursion level 0 # $mode 1 injects ( and ) if they are not present $limit=$#ValClass; $bracketed=0; if( scalar(@_)>1 ){ $limit=$_[1]; }elsif( $ValClass[$begin] eq '(' ){ $limit=matching_br($begin); }else{ $limit=$_[1]; } $bracketed=1 if $ValClass[$begin] eq '('; $RecursionLevel++; # we are starting from 0 # # we need tocoorect end_pos in case there is a closing bracket to $limit-1 # $end_pos=$limit; if( $mode==1 && $bracketed==0 ){ gen_chunk('('); # generate opening bracket, as requested }elsif( $mode==-1 && $ValClass[$begin] eq '(' && $ValClass[$limit] eq ')'){ # eliminate closing bracket $ValPy[$limit]=''; $end_pos=$limit-1; } $prev_k=-1; # starting position of infinite loop preventor. while($cur_pos<=$limit ){ if( $cur_pos < 0 || $TrStatus<0 ){ $TrStatus=-1; return -255; } unless( defined($ValClass[$cur_pos]) ){ say "Undefined ValClass at pos $cur_pos. Attempting to switch to debug mode "; $DB::single = 1; } # if( $ValClass[$cur_pos] eq '(' ){ # generate bracket if mode=1 or recursion level is above zero gen_chunk($ValPy[$cur_pos]); $cur_pos=expression($cur_pos+1,$end_pos,0); # preserve brackets ($cur_pos<0) && return -255; }elsif( $ValClass[$cur_pos] eq '<' ){ gen_chunk('readline()'); $cur_pos++; }elsif( $ValClass[$cur_pos] eq ')' ){ $RecursionLevel--; gen_chunk($ValPy[$cur_pos]); return $cur_pos+1; }elsif( $ValClass[$cur_pos] eq '0' ){ gen_chunk($ValPy[$cur_pos]); $start=$cur_pos=$cur_pos+1; # change starting point -- we need that for regular expressions. }elsif( $ValClass[$cur_pos] eq 'x' ){ # execution of Unix uility via shell gen_chunk(qq{subprocess.run($ValPy[$cur_pos],capture_output=True,text=True,shell=True).stdout}); $cur_pos++; }elsif( $ValClass[$cur_pos]=~ /[sf]/ ){ # match in Puthon is library re.match # As the argument to =~ can be complex. currently we can transtalte only two simple case: a scalar and an element of array/hash $pos=next_same_level_token('0',$cur_pos,$limit); # limit of search for '~' below $end_pos=( $pos>-1 )? $pos : (($bracketed ==1) ? $limit-1 : $limit); # limit scan to next && or || if( $end_pos-$cur_pos>=2 && $ValClass[$cur_pos] eq 's' && $ValClass[$cur_pos+1] eq '=' && $ValClass[$cur_pos+2] eq 'f' ){ # can be method like while( $line=shift ) gen_chunk($ValPy[$cur_pos],$ValPy[$cur_pos+1]); $cur_pos=function($cur_pos+2,$end_pos); }elsif( $end_pos-$cur_pos>1 && ($split=index(substr($TokenStr,$cur_pos,$end_pos-$cur_pos+1),'~'))>-1 ){ # REGEX processing $line=~/abc/ $cur_pos=regex_and_translate($start,$cur_pos,$cur_pos+$split); # split is index from $cur_pos not abs index }elsif( $ValClass[$cur_pos] eq 'f'){ $cur_pos=function($cur_pos,$end_pos); ($cur_pos<0) && return -255; }else{ gen_chunk($ValPy[$cur_pos]); $cur_pos++; } }elsif( $cur_pos<=$#ValClass && $ValClass[$cur_pos] eq 'i' && $ValClass[$cur_pos+1] eq '(' ){ $end_pos=matching_br($cur_pos+1); # find balanced bracket for the current bracket gen_chunk($ValPy[$cur_pos]); if( $LocalSub{$ValPy[$cur_pos]} ){ # Perl user defined function -- need to pass an array gen_chunk('(['); expression($cur_pos+2,$end_pos-1,0); # call without brackets gen_chunk('])'); $cur_pos=$end_pos+1; }elsif( $ValClass[$cur_pos+2] eq 'f' ){ #built-in function gen_chunk('('); function($cur_pos+2,$end_pos-1); gen_chunk(')'); }else{ #function of unknown origin logme("W","Function $ValPy[$cur_pos] is neither internal not built-in function. Please check the correspondence of arguments"); expression($cur_pos+1,$end_pos,1); # preseve brackets } $cur_pos=$end_pos+1; }else{ gen_chunk($ValPy[$cur_pos]); $cur_pos++; } if( $cur_pos eq $prev_k ){ logme("S","Internal error -- no progress in scanning expression from position $cur_pos"); $TrStatus=-1; return -255; } $prev_k=$cur_pos } if( $mode==1 && $ValClass[$begin] ne '(' ){ #we generated opening bracket, so let's geneerate closing gen_chunk(')'); } $RecursionLevel--; return $cur_pos+1; } #expression sub left_hand_substr # # Perl # substr(s1, fron, len)=s2 # can be translated into Python: # text = text[:start] + replacement + text[(start+length):] # or # s1 = s2.join(s1[0:from],s1[from+1:]) { my $equal_pos=index($TokenStr,'='); my $comman_no=0; state $temp_var=0; my $var=''; my ($replacement,$k); if( $equal_pos == -1 ){ return 255; } if( index(q(s'"qd),$ValClass[$equal_pos+1])>=1 && $#ValClass==$equal_pos+1 ){ # $#ValClass==$equal_pos+1 means that we deal with =$str variant # substr($str,$from,$len)=$str2; -- no expression on the right part $replacement=$ValPy[$equal_pos+1]; # we can translate such subst in a single line }else{ # we need a temp Variable to storethe replacement string $replacement="replacement$."; gen_chunk("$replacement.="); $k=expression($equal_pos+1); # parse the tail of te line first starting from '=' return -255 if ($k<0); gen_statement() # out the generated line } if( $ValClass[1] eq '(' && $ValClass[2] eq 's' ){ # the first argument should be scalar $var=$ValPy[2]; gen_chunk("$var=$var".'[:'); }else{ return -255; } for( $k=4; $k<@ValClass; $k++ ){ if( $ValClass[$k] eq ')' ){ last; }elsif( $ValClass[$k] eq ',' ){ $comma_no++; if( $comma_no==1 ){ gen_chunk("] + $replacement + $var".'[('.$ValPy[$k-1]); }elsif( $comma_no==2 ){ gen_chunk("+$ValPy[$k-1]:]"); } }elsif( $ValClass[$k] eq 'f' ){ $k=function($k); return -255 if ($k<0); }elsif( $ValClass[$k] eq '(' ){ $k=expression($k+1); return -255 if ($k<0); }else{ gen_chunk($ValPy[$k]); $k++; } } #for return $k; } #left_hand_substr __DATA__ def PostIncr(name, local={}): #Equivalent to name++ if name in local: local[name]+=1 return local[name]-1 globals()[name]+=1 return globals()[name]-1 def PostDecr(name, local={}): #Equivalent to name-- if name in local: local[name]-=1 return local[name]-1 globals()[name]-=1 return globals()[name]-1