gas-preprocessor.pl 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215
  1. #!/usr/bin/env perl
  2. # by David Conrad
  3. # This code is licensed under GPLv2 or later; go to gnu.org to read it
  4. # (not that it much matters for an asm preprocessor)
  5. # usage: set your assembler to be something like "perl gas-preprocessor.pl gcc"
  6. use strict;
  7. # Apple's gas is ancient and doesn't support modern preprocessing features like
  8. # .rept and has ugly macro syntax, among other things. Thus, this script
  9. # implements the subset of the gas preprocessor used by x264 and ffmpeg
  10. # that isn't supported by Apple's gas.
  11. my %canonical_arch = ("aarch64" => "aarch64", "arm64" => "aarch64",
  12. "arm" => "arm",
  13. "powerpc" => "powerpc", "ppc" => "powerpc");
  14. my %comments = ("aarch64" => '//',
  15. "arm" => '@',
  16. "powerpc" => '#');
  17. my @gcc_cmd;
  18. my @preprocess_c_cmd;
  19. my $comm;
  20. my $arch;
  21. my $as_type = "apple-gas";
  22. my $fix_unreq = $^O eq "darwin";
  23. my $force_thumb = 0;
  24. my $verbose = 0;
  25. my $arm_cond_codes = "eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|hs|lo";
  26. my $usage_str = "
  27. $0\n
  28. Gas-preprocessor.pl converts assembler files using modern GNU as syntax for
  29. Apple's ancient gas version or clang's incompatible integrated assembler. The
  30. conversion is regularly tested for Libav, x264 and vlc. Other projects might
  31. use different features which are not correctly handled.
  32. Options for this program needs to be separated with ' -- ' from the assembler
  33. command. Following options are currently supported:
  34. -help - this usage text
  35. -arch - target architecture
  36. -as-type - one value out of {{,apple-}{gas,clang},armasm}
  37. -fix-unreq
  38. -no-fix-unreq
  39. -force-thumb - assemble as thumb regardless of the input source
  40. (note, this is incomplete and only works for sources
  41. it explicitly was tested with)
  42. -verbose - print executed commands
  43. ";
  44. sub usage() {
  45. print $usage_str;
  46. }
  47. while (@ARGV) {
  48. my $opt = shift;
  49. if ($opt =~ /^-(no-)?fix-unreq$/) {
  50. $fix_unreq = $1 ne "no-";
  51. } elsif ($opt eq "-force-thumb") {
  52. $force_thumb = 1;
  53. } elsif ($opt eq "-verbose") {
  54. $verbose = 1;
  55. } elsif ($opt eq "-arch") {
  56. $arch = shift;
  57. die "unknown arch: '$arch'\n" if not exists $canonical_arch{$arch};
  58. } elsif ($opt eq "-as-type") {
  59. $as_type = shift;
  60. die "unknown as type: '$as_type'\n" if $as_type !~ /^((apple-)?(gas|clang)|armasm)$/;
  61. } elsif ($opt eq "-help") {
  62. usage();
  63. exit 0;
  64. } elsif ($opt eq "--" ) {
  65. @gcc_cmd = @ARGV;
  66. } elsif ($opt =~ /^-/) {
  67. die "option '$opt' is not known. See '$0 -help' for usage information\n";
  68. } else {
  69. push @gcc_cmd, $opt, @ARGV;
  70. }
  71. last if (@gcc_cmd);
  72. }
  73. if (grep /\.c$/, @gcc_cmd) {
  74. # C file (inline asm?) - compile
  75. @preprocess_c_cmd = (@gcc_cmd, "-S");
  76. } elsif (grep /\.[sS]$/, @gcc_cmd) {
  77. # asm file, just do C preprocessor
  78. @preprocess_c_cmd = (@gcc_cmd, "-E");
  79. } elsif (grep /-(v|h|-version|dumpversion)/, @gcc_cmd) {
  80. # pass -v/--version along, used during probing. Matching '-v' might have
  81. # uninteded results but it doesn't matter much if gas-preprocessor or
  82. # the compiler fails.
  83. print STDERR join(" ", @gcc_cmd)."\n" if $verbose;
  84. exec(@gcc_cmd);
  85. } else {
  86. die "Unrecognized input filetype";
  87. }
  88. if ($as_type eq "armasm") {
  89. $preprocess_c_cmd[0] = "cpp";
  90. push(@preprocess_c_cmd, "-undef");
  91. # Normally a preprocessor for windows would predefine _WIN32,
  92. # but we're using any generic system-agnostic preprocessor "cpp"
  93. # with -undef (to avoid getting predefined variables from the host
  94. # system in cross compilation cases), so manually define it here.
  95. push(@preprocess_c_cmd, "-D_WIN32");
  96. @preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd;
  97. # Remove -ignore XX parameter pairs from preprocess_c_cmd
  98. my $index = 1;
  99. while ($index < $#preprocess_c_cmd) {
  100. if ($preprocess_c_cmd[$index] eq "-ignore" and $index + 1 < $#preprocess_c_cmd) {
  101. splice(@preprocess_c_cmd, $index, 2);
  102. next;
  103. }
  104. $index++;
  105. }
  106. if (grep /^-MM$/, @preprocess_c_cmd) {
  107. print STDERR join(" ", @preprocess_c_cmd)."\n" if $verbose;
  108. system(@preprocess_c_cmd) == 0 or die "Error running preprocessor";
  109. exit 0;
  110. }
  111. }
  112. # if compiling, avoid creating an output file named '-.o'
  113. if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
  114. foreach my $i (@gcc_cmd) {
  115. if ($i =~ /\.[csS]$/) {
  116. my $outputfile = $i;
  117. $outputfile =~ s/\.[csS]$/.o/;
  118. push(@gcc_cmd, "-o");
  119. push(@gcc_cmd, $outputfile);
  120. last;
  121. }
  122. }
  123. }
  124. # replace only the '-o' argument with '-', avoids rewriting the make dependency
  125. # target specified with -MT to '-'
  126. my $index = 1;
  127. while ($index < $#preprocess_c_cmd) {
  128. if ($preprocess_c_cmd[$index] eq "-o") {
  129. $index++;
  130. $preprocess_c_cmd[$index] = "-";
  131. }
  132. $index++;
  133. }
  134. my $tempfile;
  135. if ($as_type ne "armasm") {
  136. @gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
  137. } else {
  138. @preprocess_c_cmd = grep ! /^-c$/, @preprocess_c_cmd;
  139. @preprocess_c_cmd = grep ! /^-m/, @preprocess_c_cmd;
  140. @preprocess_c_cmd = grep ! /^-G/, @preprocess_c_cmd;
  141. @preprocess_c_cmd = grep ! /^-W/, @preprocess_c_cmd;
  142. @preprocess_c_cmd = grep ! /^-Z/, @preprocess_c_cmd;
  143. @preprocess_c_cmd = grep ! /^-fp/, @preprocess_c_cmd;
  144. @preprocess_c_cmd = grep ! /^-EHsc$/, @preprocess_c_cmd;
  145. @preprocess_c_cmd = grep ! /^-O/, @preprocess_c_cmd;
  146. @preprocess_c_cmd = grep ! /^-oldit/, @preprocess_c_cmd;
  147. @gcc_cmd = grep ! /^-G/, @gcc_cmd;
  148. @gcc_cmd = grep ! /^-W/, @gcc_cmd;
  149. @gcc_cmd = grep ! /^-Z/, @gcc_cmd;
  150. @gcc_cmd = grep ! /^-fp/, @gcc_cmd;
  151. @gcc_cmd = grep ! /^-EHsc$/, @gcc_cmd;
  152. @gcc_cmd = grep ! /^-O/, @gcc_cmd;
  153. my @outfiles = grep /\.(o|obj)$/, @gcc_cmd;
  154. $tempfile = $outfiles[0].".asm";
  155. # Remove most parameters from gcc_cmd, which actually is the armasm command,
  156. # which doesn't support any of the common compiler/preprocessor options.
  157. @gcc_cmd = grep ! /^-D/, @gcc_cmd;
  158. @gcc_cmd = grep ! /^-U/, @gcc_cmd;
  159. @gcc_cmd = grep ! /^-m/, @gcc_cmd;
  160. @gcc_cmd = grep ! /^-M/, @gcc_cmd;
  161. @gcc_cmd = grep ! /^-c$/, @gcc_cmd;
  162. @gcc_cmd = grep ! /^-I/, @gcc_cmd;
  163. @gcc_cmd = map { /\.S$/ ? $tempfile : $_ } @gcc_cmd;
  164. }
  165. # detect architecture from gcc binary name
  166. if (!$arch) {
  167. if ($gcc_cmd[0] =~ /(arm64|aarch64|arm|powerpc|ppc)/) {
  168. $arch = $1;
  169. } else {
  170. # look for -arch flag
  171. foreach my $i (1 .. $#gcc_cmd-1) {
  172. if ($gcc_cmd[$i] eq "-arch" and
  173. $gcc_cmd[$i+1] =~ /(arm64|aarch64|arm|powerpc|ppc)/) {
  174. $arch = $1;
  175. }
  176. }
  177. }
  178. }
  179. # assume we're not cross-compiling if no -arch or the binary doesn't have the arch name
  180. $arch = qx/arch/ if (!$arch);
  181. die "Unknown target architecture '$arch'" if not exists $canonical_arch{$arch};
  182. $arch = $canonical_arch{$arch};
  183. $comm = $comments{$arch};
  184. my $inputcomm = $comm;
  185. $comm = ";" if $as_type =~ /armasm/;
  186. my %ppc_spr = (ctr => 9,
  187. vrsave => 256);
  188. print STDERR join(" ", @preprocess_c_cmd)."\n" if $verbose;
  189. open(INPUT, "-|", @preprocess_c_cmd) || die "Error running preprocessor";
  190. if ($ENV{GASPP_DEBUG}) {
  191. open(ASMFILE, ">&STDOUT");
  192. } else {
  193. if ($as_type ne "armasm") {
  194. print STDERR join(" ", @gcc_cmd)."\n" if $verbose;
  195. open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
  196. } else {
  197. open(ASMFILE, ">", $tempfile);
  198. }
  199. }
  200. my $current_macro = '';
  201. my $macro_level = 0;
  202. my $rept_level = 0;
  203. my %macro_lines;
  204. my %macro_args;
  205. my %macro_args_default;
  206. my $macro_count = 0;
  207. my $altmacro = 0;
  208. my $in_irp = 0;
  209. my $num_repts;
  210. my @rept_lines;
  211. my @irp_args;
  212. my $irp_param;
  213. my @ifstack;
  214. my %symbols;
  215. my @sections;
  216. my %literal_labels; # for ldr <reg>, =<expr>
  217. my $literal_num = 0;
  218. my $literal_expr = ".word";
  219. $literal_expr = ".quad" if $arch eq "aarch64";
  220. my $thumb = 0;
  221. my %thumb_labels;
  222. my %call_targets;
  223. my %import_symbols;
  224. my %neon_alias_reg;
  225. my %neon_alias_type;
  226. my $temp_label_next = 0;
  227. my %last_temp_labels;
  228. my %next_temp_labels;
  229. my %labels_seen;
  230. my %aarch64_req_alias;
  231. if ($force_thumb) {
  232. parse_line(".thumb\n");
  233. }
  234. # pass 1: parse .macro
  235. # note that the handling of arguments is probably overly permissive vs. gas
  236. # but it should be the same for valid cases
  237. while (<INPUT>) {
  238. # remove lines starting with '#', preprocessing is done, '#' at start of
  239. # the line indicates a comment for all supported archs (aarch64, arm, ppc
  240. # and x86). Also strips line number comments but since they are off anyway
  241. # it is no loss.
  242. s/^\s*#.*$//;
  243. # remove all comments (to avoid interfering with evaluating directives)
  244. s/(?<!\\)$inputcomm.*//x;
  245. # Strip out windows linefeeds
  246. s/\r$//;
  247. foreach my $subline (split(";", $_)) {
  248. # Add newlines at the end of lines that don't already have one
  249. chomp $subline;
  250. $subline .= "\n";
  251. parse_line($subline);
  252. }
  253. }
  254. sub eval_expr {
  255. my $expr = $_[0];
  256. while ($expr =~ /([A-Za-z._][A-Za-z0-9._]*)/g) {
  257. my $sym = $1;
  258. $expr =~ s/$sym/($symbols{$sym})/ if defined $symbols{$sym};
  259. }
  260. eval $expr;
  261. }
  262. sub handle_if {
  263. my $line = $_[0];
  264. # handle .if directives; apple's assembler doesn't support important non-basic ones
  265. # evaluating them is also needed to handle recursive macros
  266. if ($line =~ /\.if(n?)([a-z]*)\s+(.*)/) {
  267. my $result = $1 eq "n";
  268. my $type = $2;
  269. my $expr = $3;
  270. if ($type eq "b") {
  271. $expr =~ s/\s//g;
  272. $result ^= $expr eq "";
  273. } elsif ($type eq "c") {
  274. if ($expr =~ /(.*)\s*,\s*(.*)/) {
  275. $result ^= $1 eq $2;
  276. } else {
  277. die "argument to .ifc not recognized";
  278. }
  279. } elsif ($type eq "") {
  280. $result ^= eval_expr($expr) != 0;
  281. } elsif ($type eq "eq") {
  282. $result = eval_expr($expr) == 0;
  283. } elsif ($type eq "lt") {
  284. $result = eval_expr($expr) < 0;
  285. } else {
  286. chomp($line);
  287. die "unhandled .if varient. \"$line\"";
  288. }
  289. push (@ifstack, $result);
  290. return 1;
  291. } else {
  292. return 0;
  293. }
  294. }
  295. sub parse_if_line {
  296. my $line = $_[0];
  297. # evaluate .if blocks
  298. if (scalar(@ifstack)) {
  299. # Don't evaluate any new if statements if we're within
  300. # a repetition or macro - they will be evaluated once
  301. # the repetition is unrolled or the macro is expanded.
  302. if (scalar(@rept_lines) == 0 and $macro_level == 0) {
  303. if ($line =~ /\.endif/) {
  304. pop(@ifstack);
  305. return 1;
  306. } elsif ($line =~ /\.elseif\s+(.*)/) {
  307. if ($ifstack[-1] == 0) {
  308. $ifstack[-1] = !!eval_expr($1);
  309. } elsif ($ifstack[-1] > 0) {
  310. $ifstack[-1] = -$ifstack[-1];
  311. }
  312. return 1;
  313. } elsif ($line =~ /\.else/) {
  314. $ifstack[-1] = !$ifstack[-1];
  315. return 1;
  316. } elsif (handle_if($line)) {
  317. return 1;
  318. }
  319. }
  320. # discard lines in false .if blocks
  321. foreach my $i (0 .. $#ifstack) {
  322. if ($ifstack[$i] <= 0) {
  323. return 1;
  324. }
  325. }
  326. }
  327. return 0;
  328. }
  329. sub parse_line {
  330. my $line = $_[0];
  331. return if (parse_if_line($line));
  332. if (scalar(@rept_lines) == 0) {
  333. if ($line =~ /\.macro/) {
  334. $macro_level++;
  335. if ($macro_level > 1 && !$current_macro) {
  336. die "nested macros but we don't have master macro";
  337. }
  338. } elsif ($line =~ /\.endm/) {
  339. $macro_level--;
  340. if ($macro_level < 0) {
  341. die "unmatched .endm";
  342. } elsif ($macro_level == 0) {
  343. $current_macro = '';
  344. return;
  345. }
  346. }
  347. }
  348. if ($macro_level == 0) {
  349. if ($line =~ /\.(rept|irp)/) {
  350. $rept_level++;
  351. } elsif ($line =~ /.endr/) {
  352. $rept_level--;
  353. }
  354. }
  355. if ($macro_level > 1) {
  356. push(@{$macro_lines{$current_macro}}, $line);
  357. } elsif (scalar(@rept_lines) and $rept_level >= 1) {
  358. push(@rept_lines, $line);
  359. } elsif ($macro_level == 0) {
  360. expand_macros($line);
  361. } else {
  362. if ($line =~ /\.macro\s+([\d\w\.]+)\s*,?\s*(.*)/) {
  363. $current_macro = $1;
  364. # commas in the argument list are optional, so only use whitespace as the separator
  365. my $arglist = $2;
  366. $arglist =~ s/,/ /g;
  367. my @args = split(/\s+/, $arglist);
  368. foreach my $i (0 .. $#args) {
  369. my @argpair = split(/=/, $args[$i]);
  370. $macro_args{$current_macro}[$i] = $argpair[0];
  371. $argpair[0] =~ s/:vararg$//;
  372. $macro_args_default{$current_macro}{$argpair[0]} = $argpair[1];
  373. }
  374. # ensure %macro_lines has the macro name added as a key
  375. $macro_lines{$current_macro} = [];
  376. } elsif ($current_macro) {
  377. push(@{$macro_lines{$current_macro}}, $line);
  378. } else {
  379. die "macro level without a macro name";
  380. }
  381. }
  382. }
  383. sub handle_set {
  384. my $line = $_[0];
  385. if ($line =~ /\.(?:set|equ)\s+(\S*)\s*,\s*(.*)/) {
  386. $symbols{$1} = eval_expr($2);
  387. return 1;
  388. }
  389. return 0;
  390. }
  391. sub expand_macros {
  392. my $line = $_[0];
  393. # handle .if directives; apple's assembler doesn't support important non-basic ones
  394. # evaluating them is also needed to handle recursive macros
  395. if (handle_if($line)) {
  396. return;
  397. }
  398. if (/\.purgem\s+([\d\w\.]+)/) {
  399. delete $macro_lines{$1};
  400. delete $macro_args{$1};
  401. delete $macro_args_default{$1};
  402. return;
  403. }
  404. if ($line =~ /\.altmacro/) {
  405. $altmacro = 1;
  406. return;
  407. }
  408. if ($line =~ /\.noaltmacro/) {
  409. $altmacro = 0;
  410. return;
  411. }
  412. $line =~ s/\%([^,]*)/eval_expr($1)/eg if $altmacro;
  413. # Strip out the .set lines from the armasm output
  414. return if (handle_set($line) and $as_type eq "armasm");
  415. if ($line =~ /\.rept\s+(.*)/) {
  416. $num_repts = $1;
  417. @rept_lines = ("\n");
  418. # handle the possibility of repeating another directive on the same line
  419. # .endr on the same line is not valid, I don't know if a non-directive is
  420. if ($num_repts =~ s/(\.\w+.*)//) {
  421. push(@rept_lines, "$1\n");
  422. }
  423. $num_repts = eval_expr($num_repts);
  424. } elsif ($line =~ /\.irp\s+([\d\w\.]+)\s*(.*)/) {
  425. $in_irp = 1;
  426. $num_repts = 1;
  427. @rept_lines = ("\n");
  428. $irp_param = $1;
  429. # only use whitespace as the separator
  430. my $irp_arglist = $2;
  431. $irp_arglist =~ s/,/ /g;
  432. $irp_arglist =~ s/^\s+//;
  433. @irp_args = split(/\s+/, $irp_arglist);
  434. } elsif ($line =~ /\.irpc\s+([\d\w\.]+)\s*(.*)/) {
  435. $in_irp = 1;
  436. $num_repts = 1;
  437. @rept_lines = ("\n");
  438. $irp_param = $1;
  439. my $irp_arglist = $2;
  440. $irp_arglist =~ s/,/ /g;
  441. $irp_arglist =~ s/^\s+//;
  442. @irp_args = split(//, $irp_arglist);
  443. } elsif ($line =~ /\.endr/) {
  444. my @prev_rept_lines = @rept_lines;
  445. my $prev_in_irp = $in_irp;
  446. my @prev_irp_args = @irp_args;
  447. my $prev_irp_param = $irp_param;
  448. my $prev_num_repts = $num_repts;
  449. @rept_lines = ();
  450. $in_irp = 0;
  451. @irp_args = '';
  452. if ($prev_in_irp != 0) {
  453. foreach my $i (@prev_irp_args) {
  454. foreach my $origline (@prev_rept_lines) {
  455. my $line = $origline;
  456. $line =~ s/\\$prev_irp_param/$i/g;
  457. $line =~ s/\\\(\)//g; # remove \()
  458. parse_line($line);
  459. }
  460. }
  461. } else {
  462. for (1 .. $prev_num_repts) {
  463. foreach my $origline (@prev_rept_lines) {
  464. my $line = $origline;
  465. parse_line($line);
  466. }
  467. }
  468. }
  469. } elsif ($line =~ /(\S+:|)\s*([\w\d\.]+)\s*(.*)/ && exists $macro_lines{$2}) {
  470. handle_serialized_line($1);
  471. my $macro = $2;
  472. # commas are optional here too, but are syntactically important because
  473. # parameters can be blank
  474. my @arglist = split(/,/, $3);
  475. my @args;
  476. my @args_seperator;
  477. my $comma_sep_required = 0;
  478. foreach (@arglist) {
  479. # allow arithmetic/shift operators in macro arguments
  480. $_ =~ s/\s*(\+|-|\*|\/|<<|>>|<|>)\s*/$1/g;
  481. my @whitespace_split = split(/\s+/, $_);
  482. if (!@whitespace_split) {
  483. push(@args, '');
  484. push(@args_seperator, '');
  485. } else {
  486. foreach (@whitespace_split) {
  487. #print ("arglist = \"$_\"\n");
  488. if (length($_)) {
  489. push(@args, $_);
  490. my $sep = $comma_sep_required ? "," : " ";
  491. push(@args_seperator, $sep);
  492. #print ("sep = \"$sep\", arg = \"$_\"\n");
  493. $comma_sep_required = 0;
  494. }
  495. }
  496. }
  497. $comma_sep_required = 1;
  498. }
  499. my %replacements;
  500. if ($macro_args_default{$macro}){
  501. %replacements = %{$macro_args_default{$macro}};
  502. }
  503. # construct hashtable of text to replace
  504. foreach my $i (0 .. $#args) {
  505. my $argname = $macro_args{$macro}[$i];
  506. my @macro_args = @{ $macro_args{$macro} };
  507. if ($args[$i] =~ m/=/) {
  508. # arg=val references the argument name
  509. # XXX: I'm not sure what the expected behaviour if a lot of
  510. # these are mixed with unnamed args
  511. my @named_arg = split(/=/, $args[$i]);
  512. $replacements{$named_arg[0]} = $named_arg[1];
  513. } elsif ($i > $#{$macro_args{$macro}}) {
  514. # more args given than the macro has named args
  515. # XXX: is vararg allowed on arguments before the last?
  516. $argname = $macro_args{$macro}[-1];
  517. if ($argname =~ s/:vararg$//) {
  518. #print "macro = $macro, args[$i] = $args[$i], args_seperator=@args_seperator, argname = $argname, arglist[$i] = $arglist[$i], arglist = @arglist, args=@args, macro_args=@macro_args\n";
  519. #$replacements{$argname} .= ", $args[$i]";
  520. $replacements{$argname} .= "$args_seperator[$i] $args[$i]";
  521. } else {
  522. die "Too many arguments to macro $macro";
  523. }
  524. } else {
  525. $argname =~ s/:vararg$//;
  526. $replacements{$argname} = $args[$i];
  527. }
  528. }
  529. my $count = $macro_count++;
  530. # apply replacements as regex
  531. foreach (@{$macro_lines{$macro}}) {
  532. my $macro_line = $_;
  533. # do replacements by longest first, this avoids wrong replacement
  534. # when argument names are subsets of each other
  535. foreach (reverse sort {length $a <=> length $b} keys %replacements) {
  536. $macro_line =~ s/\\$_/$replacements{$_}/g;
  537. }
  538. if ($altmacro) {
  539. foreach (reverse sort {length $a <=> length $b} keys %replacements) {
  540. $macro_line =~ s/\b$_\b/$replacements{$_}/g;
  541. }
  542. }
  543. $macro_line =~ s/\\\@/$count/g;
  544. $macro_line =~ s/\\\(\)//g; # remove \()
  545. parse_line($macro_line);
  546. }
  547. } else {
  548. handle_serialized_line($line);
  549. }
  550. }
  551. sub is_arm_register {
  552. my $name = $_[0];
  553. if ($name eq "lr" or
  554. $name eq "ip" or
  555. $name =~ /^[rav]\d+$/) {
  556. return 1;
  557. }
  558. return 0;
  559. }
  560. sub is_aarch64_register {
  561. my $name = $_[0];
  562. if ($name =~ /^[xw]\d+$/) {
  563. return 1;
  564. }
  565. return 0;
  566. }
  567. sub handle_local_label {
  568. my $line = $_[0];
  569. my $num = $_[1];
  570. my $dir = $_[2];
  571. my $target = "$num$dir";
  572. if ($dir eq "b") {
  573. $line =~ s/\b$target\b/$last_temp_labels{$num}/g;
  574. } else {
  575. my $name = "temp_label_$temp_label_next";
  576. $temp_label_next++;
  577. push(@{$next_temp_labels{$num}}, $name);
  578. $line =~ s/\b$target\b/$name/g;
  579. }
  580. return $line;
  581. }
  582. sub handle_serialized_line {
  583. my $line = $_[0];
  584. # handle .previous (only with regard to .section not .subsection)
  585. if ($line =~ /\.(section|text|const_data)/) {
  586. push(@sections, $line);
  587. } elsif ($line =~ /\.previous/) {
  588. if (!$sections[-2]) {
  589. die ".previous without a previous section";
  590. }
  591. $line = $sections[-2];
  592. push(@sections, $line);
  593. }
  594. $thumb = 1 if $line =~ /\.code\s+16|\.thumb/;
  595. $thumb = 0 if $line =~ /\.code\s+32|\.arm/;
  596. # handle ldr <reg>, =<expr>
  597. if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/ and $as_type ne "armasm") {
  598. my $label = $literal_labels{$3};
  599. if (!$label) {
  600. $label = "Literal_$literal_num";
  601. $literal_num++;
  602. $literal_labels{$3} = $label;
  603. }
  604. $line = "$1 ldr$2, $label\n";
  605. } elsif ($line =~ /\.ltorg/ and $as_type ne "armasm") {
  606. $line .= ".align 2\n";
  607. foreach my $literal (keys %literal_labels) {
  608. $line .= "$literal_labels{$literal}:\n $literal_expr $literal\n";
  609. }
  610. %literal_labels = ();
  611. }
  612. # handle GNU as pc-relative relocations for adrp/add
  613. if ($line =~ /(.*)\s*adrp([\w\s\d]+)\s*,\s*#?:pg_hi21:([^\s]+)/ and $as_type =~ /^apple-/) {
  614. $line = "$1 adrp$2, ${3}\@PAGE\n";
  615. } elsif ($line =~ /(.*)\s*add([\w\s\d]+)\s*,([\w\s\d]+)\s*,\s*#?:lo12:([^\s]+)/ and $as_type =~ /^apple-/) {
  616. $line = "$1 add$2, $3, ${4}\@PAGEOFF\n";
  617. }
  618. # thumb add with large immediate needs explicit add.w
  619. if ($thumb and $line =~ /add\s+.*#([^@]+)/) {
  620. $line =~ s/add/add.w/ if eval_expr($1) > 255;
  621. }
  622. # mach-o local symbol names start with L (no dot)
  623. $line =~ s/(?<!\w)\.(L\w+)/$1/g;
  624. # recycle the '.func' directive for '.thumb_func'
  625. if ($thumb and $as_type =~ /^apple-/) {
  626. $line =~ s/\.func/.thumb_func/x;
  627. }
  628. if ($thumb and $line =~ /^\s*(\w+)\s*:/) {
  629. $thumb_labels{$1}++;
  630. }
  631. if ($as_type =~ /^apple-/ and
  632. $line =~ /^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?|\.global)\s+(\w+)/) {
  633. my $cond = $3;
  634. my $label = $4;
  635. # Don't interpret e.g. bic as b<cc> with ic as conditional code
  636. if ($cond =~ /^(|$arm_cond_codes)$/) {
  637. if (exists $thumb_labels{$label}) {
  638. print ASMFILE ".thumb_func $label\n";
  639. } else {
  640. $call_targets{$label}++;
  641. }
  642. }
  643. }
  644. # @l -> lo16() @ha -> ha16()
  645. $line =~ s/,\s+([^,]+)\@l\b/, lo16($1)/g;
  646. $line =~ s/,\s+([^,]+)\@ha\b/, ha16($1)/g;
  647. # move to/from SPR
  648. if ($line =~ /(\s+)(m[ft])([a-z]+)\s+(\w+)/ and exists $ppc_spr{$3}) {
  649. if ($2 eq 'mt') {
  650. $line = "$1${2}spr $ppc_spr{$3}, $4\n";
  651. } else {
  652. $line = "$1${2}spr $4, $ppc_spr{$3}\n";
  653. }
  654. }
  655. if ($line =~ /\.unreq\s+(.*)/) {
  656. if (defined $neon_alias_reg{$1}) {
  657. delete $neon_alias_reg{$1};
  658. delete $neon_alias_type{$1};
  659. return;
  660. } elsif (defined $aarch64_req_alias{$1}) {
  661. delete $aarch64_req_alias{$1};
  662. return;
  663. }
  664. }
  665. # old gas versions store upper and lower case names on .req,
  666. # but they remove only one on .unreq
  667. if ($fix_unreq) {
  668. if ($line =~ /\.unreq\s+(.*)/) {
  669. $line = ".unreq " . lc($1) . "\n";
  670. $line .= ".unreq " . uc($1) . "\n";
  671. }
  672. }
  673. if ($line =~ /(\w+)\s+\.(dn|qn)\s+(\w+)(?:\.(\w+))?(\[\d+\])?/) {
  674. $neon_alias_reg{$1} = "$3$5";
  675. $neon_alias_type{$1} = $4;
  676. return;
  677. }
  678. if (scalar keys %neon_alias_reg > 0 && $line =~ /^\s+v\w+/) {
  679. # This line seems to possibly have a neon instruction
  680. foreach (keys %neon_alias_reg) {
  681. my $alias = $_;
  682. # Require the register alias to match as an invididual word, not as a substring
  683. # of a larger word-token.
  684. if ($line =~ /\b$alias\b/) {
  685. $line =~ s/\b$alias\b/$neon_alias_reg{$alias}/g;
  686. # Add the type suffix. If multiple aliases match on the same line,
  687. # only do this replacement the first time (a vfoo.bar string won't match v\w+).
  688. $line =~ s/^(\s+)(v\w+)(\s+)/$1$2.$neon_alias_type{$alias}$3/;
  689. }
  690. }
  691. }
  692. if ($arch eq "aarch64" or $as_type eq "armasm") {
  693. # clang's integrated aarch64 assembler in Xcode 5 does not support .req/.unreq
  694. if ($line =~ /\b(\w+)\s+\.req\s+(\w+)\b/) {
  695. $aarch64_req_alias{$1} = $2;
  696. return;
  697. }
  698. foreach (keys %aarch64_req_alias) {
  699. my $alias = $_;
  700. # recursively resolve aliases
  701. my $resolved = $aarch64_req_alias{$alias};
  702. while (defined $aarch64_req_alias{$resolved}) {
  703. $resolved = $aarch64_req_alias{$resolved};
  704. }
  705. $line =~ s/\b$alias\b/$resolved/g;
  706. }
  707. }
  708. if ($arch eq "aarch64") {
  709. # fix missing aarch64 instructions in Xcode 5.1 (beta3)
  710. # mov with vector arguments is not supported, use alias orr instead
  711. if ($line =~ /^(\d+:)?\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) {
  712. $line = "$1 orr $2, $3, $3\n";
  713. }
  714. # movi 16, 32 bit shifted variant, shift is optional
  715. if ($line =~ /^(\d+:)?\s*movi\s+(v[0-3]?\d\.(?:2|4|8)[hsHS])\s*,\s*(#\w+)\b\s*$/) {
  716. $line = "$1 movi $2, $3, lsl #0\n";
  717. }
  718. # Xcode 5 misses the alias uxtl. Replace it with the more general ushll.
  719. # Clang 3.4 misses the alias sxtl too. Replace it with the more general sshll.
  720. # armasm64 also misses these instructions.
  721. if ($line =~ /^(\d+:)?\s*(s|u)xtl(2)?\s+(v[0-3]?\d\.[248][hsdHSD])\s*,\s*(v[0-3]?\d\.(?:2|4|8|16)[bhsBHS])\b\s*$/) {
  722. $line = "$1 $2shll$3 $4, $5, #0\n";
  723. }
  724. # clang 3.4 and armasm64 do not automatically use shifted immediates in add/sub
  725. if (($as_type eq "clang" or $as_type eq "armasm") and
  726. $line =~ /^(\d+:)?(\s*(?:add|sub)s?) ([^#l]+)#([\d\+\-\*\/ <>]+)\s*$/) {
  727. my $imm = eval $4;
  728. if ($imm > 4095 and not ($imm & 4095)) {
  729. $line = "$1 $2 $3#" . ($imm >> 12) . ", lsl #12\n";
  730. }
  731. }
  732. if ($ENV{GASPP_FIX_XCODE5}) {
  733. if ($line =~ /^\s*bsl\b/) {
  734. $line =~ s/\b(bsl)(\s+v[0-3]?\d\.(\w+))\b/$1.$3$2/;
  735. $line =~ s/\b(v[0-3]?\d)\.$3\b/$1/g;
  736. }
  737. if ($line =~ /^\s*saddl2?\b/) {
  738. $line =~ s/\b(saddl2?)(\s+v[0-3]?\d\.(\w+))\b/$1.$3$2/;
  739. $line =~ s/\b(v[0-3]?\d)\.\w+\b/$1/g;
  740. }
  741. if ($line =~ /^\s*dup\b.*\]$/) {
  742. $line =~ s/\bdup(\s+v[0-3]?\d)\.(\w+)\b/dup.$2$1/g;
  743. $line =~ s/\b(v[0-3]?\d)\.[bhsdBHSD](\[\d\])$/$1$2/g;
  744. }
  745. }
  746. }
  747. if ($as_type eq "armasm") {
  748. # Also replace variables set by .set
  749. foreach (keys %symbols) {
  750. my $sym = $_;
  751. $line =~ s/\b$sym\b/$symbols{$sym}/g;
  752. }
  753. # Handle function declarations and keep track of the declared labels
  754. if ($line =~ s/^\s*\.func\s+(\w+)/$1 PROC/) {
  755. $labels_seen{$1} = 1;
  756. }
  757. if ($line =~ s/^\s*(\d+)://) {
  758. # Convert local labels into unique labels. armasm (at least in
  759. # RVCT) has something similar, but still different enough.
  760. # By converting to unique labels we avoid any possible
  761. # incompatibilities.
  762. my $num = $1;
  763. foreach (@{$next_temp_labels{$num}}) {
  764. $line = "$_\n" . $line;
  765. }
  766. @next_temp_labels{$num} = ();
  767. my $name = "temp_label_$temp_label_next";
  768. $temp_label_next++;
  769. # The matching regexp above removes the label from the start of
  770. # the line (which might contain an instruction as well), readd
  771. # it on a separate line above it.
  772. $line = "$name:\n" . $line;
  773. $last_temp_labels{$num} = $name;
  774. }
  775. if ($line =~ s/^\s*(\w+):/$1/) {
  776. # Skip labels that have already been declared with a PROC,
  777. # labels must not be declared multiple times.
  778. return if (defined $labels_seen{$1});
  779. $labels_seen{$1} = 1;
  780. } elsif ($line !~ /(\w+) PROC/) {
  781. # If not a label, make sure the line starts with whitespace,
  782. # otherwise ms armasm interprets it incorrectly.
  783. $line =~ s/^[\.\w]/\t$&/;
  784. }
  785. # Check branch instructions
  786. if ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(bl?x?\.?([^\s]{2})?(\.w)?)\s+(\w+)/) {
  787. my $instr = $2;
  788. my $cond = $3;
  789. my $width = $4;
  790. my $target = $5;
  791. # Don't interpret e.g. bic as b<cc> with ic as conditional code
  792. if ($cond !~ /^(|$arm_cond_codes)$/) {
  793. # Not actually a branch
  794. } elsif ($target =~ /^(\d+)([bf])$/) {
  795. # The target is a local label
  796. $line = handle_local_label($line, $1, $2);
  797. $line =~ s/\b$instr\b/$&.w/ if $width eq "" and $arch eq "arm";
  798. } elsif (($arch eq "arm" and !is_arm_register($target)) or
  799. ($arch eq "aarch64" and !is_aarch64_register($target))) {
  800. $call_targets{$target}++;
  801. }
  802. } elsif ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(cbn?z|adr|tbz)\s+(\w+)\s*,(\s*#\d+\s*,)?\s*(\w+)/) {
  803. my $instr = $2;
  804. my $reg = $3;
  805. my $bit = $4;
  806. my $target = $5;
  807. if ($target =~ /^(\d+)([bf])$/) {
  808. # The target is a local label
  809. $line = handle_local_label($line, $1, $2);
  810. } else {
  811. $call_targets{$target}++;
  812. }
  813. # Convert tbz with a wX register into an xX register,
  814. # due to armasm64 bugs/limitations.
  815. if ($instr eq "tbz" and $reg =~ /w\d+/) {
  816. my $xreg = $reg;
  817. $xreg =~ s/w/x/;
  818. $line =~ s/\b$reg\b/$xreg/;
  819. }
  820. } elsif ($line =~ /^\s*.h?word.*\b\d+[bf]\b/) {
  821. while ($line =~ /\b(\d+)([bf])\b/g) {
  822. $line = handle_local_label($line, $1, $2);
  823. }
  824. }
  825. # ALIGN in armasm syntax is the actual number of bytes
  826. if ($line =~ /\.(?:p2)?align\s+(\d+)/) {
  827. my $align = 1 << $1;
  828. $line =~ s/\.(?:p2)?align\s+(\d+)/ALIGN $align/;
  829. }
  830. # Convert gas style [r0, :128] into armasm [r0@128] alignment specification
  831. $line =~ s/\[([^\[,]+),?\s*:(\d+)\]/[$1\@$2]/g;
  832. # armasm treats logical values {TRUE} and {FALSE} separately from
  833. # numeric values - logical operators and values can't be intermixed
  834. # with numerical values. Evaluate !<number> and (a <> b) into numbers,
  835. # let the assembler evaluate the rest of the expressions. This current
  836. # only works for cases when ! and <> are used with actual constant numbers,
  837. # we don't evaluate subexpressions here.
  838. # Evaluate !<number>
  839. while ($line =~ /!\s*(\d+)/g) {
  840. my $val = ($1 != 0) ? 0 : 1;
  841. $line =~ s/!(\d+)/$val/;
  842. }
  843. # Evaluate (a > b)
  844. while ($line =~ /\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/) {
  845. my $val;
  846. if ($2 eq "<") {
  847. $val = ($1 < $3) ? 1 : 0;
  848. } else {
  849. $val = ($1 > $3) ? 1 : 0;
  850. }
  851. $line =~ s/\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/$val/;
  852. }
  853. if ($arch eq "arm") {
  854. # Change a movw... #:lower16: into a mov32 pseudoinstruction
  855. $line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/;
  856. # and remove the following, matching movt completely
  857. $line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//;
  858. if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) {
  859. $import_symbols{$1}++;
  860. }
  861. # Misc bugs/deficiencies:
  862. # armasm seems unable to parse e.g. "vmov s0, s1" without a type
  863. # qualifier, thus add .f32.
  864. $line =~ s/^(\s+(?:vmov|vadd))(\s+s\d+\s*,\s*s\d+)/$1.f32$2/;
  865. } elsif ($arch eq "aarch64") {
  866. # Convert ext into ext8; armasm64 seems to require it named as ext8.
  867. $line =~ s/^(\s+)ext(\s+)/$1ext8$2/;
  868. # Pick up targets from ldr x0, =sym+offset
  869. if ($line =~ /^\s*ldr\s+(\w+)\s*,\s*=([a-zA-Z]\w*)(.*)$/) {
  870. my $reg = $1;
  871. my $sym = $2;
  872. my $offset = eval_expr($3);
  873. if ($offset < 0 and $ENV{GASPP_ARMASM64_SKIP_NEG_OFFSET}) {
  874. # armasm64 in VS < 15.6 is buggy with ldr x0, =sym+offset where the
  875. # offset is a negative value; it does write a negative
  876. # offset into the literal pool as it should, but the
  877. # negative offset only covers the lower 32 bit of the 64
  878. # bit literal/relocation.
  879. # Thus remove the offset and apply it manually with a sub
  880. # afterwards.
  881. $offset = -$offset;
  882. $line = "\tldr $reg, =$sym\n\tsub $reg, $reg, #$offset\n";
  883. }
  884. $import_symbols{$sym}++;
  885. }
  886. # armasm64 (currently) doesn't support offsets on adrp targets,
  887. # even though the COFF format relocations (and the linker)
  888. # supports it. Therefore strip out the offsets from adrp and
  889. # add :lo12: (in case future armasm64 would start handling it)
  890. # and add an extra explicit add instruction for the offset.
  891. if ($line =~ s/(adrp\s+\w+\s*,\s*(\w+))([\d\+\-\*\/\(\) <>]+)?/\1/) {
  892. $import_symbols{$2}++;
  893. }
  894. if ($line =~ s/(add\s+(\w+)\s*,\s*\w+\s*,\s*):lo12:(\w+)([\d\+\-\*\/\(\) <>]+)?/\1\3/) {
  895. my $reg = $2;
  896. my $sym = $3;
  897. my $offset = eval_expr($4);
  898. $line .= "\tadd $reg, $reg, #$offset\n" if $offset > 0;
  899. $import_symbols{$sym}++;
  900. }
  901. # Convert e.g. "add x0, x0, w0, uxtw" into "add x0, x0, w0, uxtw #0",
  902. # or "ldr x0, [x0, w0, uxtw]" into "ldr x0, [x0, w0, uxtw #0]".
  903. $line =~ s/(uxt[whb]|sxt[whb])(\s*\]?\s*)$/\1 #0\2/i;
  904. # Convert "mov x0, v0.d[0]" into "umov x0, v0.d[0]"
  905. $line =~ s/\bmov\s+[xw]\d+\s*,\s*v\d+\.[ds]/u$&/i;
  906. # Convert "ccmp w0, #0, #0, ne" into "ccmpne w0, #0, #0",
  907. # and "csel w0, w0, w0, ne" into "cselne w0, w0, w0".
  908. $line =~ s/(ccmp|csel)\s+([xw]\w+)\s*,\s*([xw#]\w+)\s*,\s*([xw#]\w+)\s*,\s*($arm_cond_codes)/\1\5 \2, \3, \4/;
  909. # Convert "cinc w0, w0, ne" into "cincne w0, w0".
  910. $line =~ s/(cinc)\s+([xw]\w+)\s*,\s*([xw]\w+)\s*,\s*($arm_cond_codes)/\1\4 \2, \3/;
  911. # Convert "cset w0, lo" into "csetlo w0"
  912. $line =~ s/(cset)\s+([xw]\w+)\s*,\s*($arm_cond_codes)/\1\3 \2/;
  913. if ($ENV{GASPP_ARMASM64_SKIP_PRFUM}) {
  914. # Strip out prfum; armasm64 (VS < 15.5) fails to assemble any
  915. # variant/combination of prfum tested so far, but since it is
  916. # a prefetch instruction it can be skipped without changing
  917. # results.
  918. $line =~ s/prfum.*\]//;
  919. }
  920. # Convert "ldrb w0, [x0, #-1]" into "ldurb w0, [x0, #-1]".
  921. # Don't do this for forms with writeback though.
  922. if ($line =~ /(ld|st)(r[bh]?)\s+(\w+)\s*,\s*\[\s*(\w+)\s*,\s*#([^\]]+)\s*\][^!]/) {
  923. my $instr = $1;
  924. my $suffix = $2;
  925. my $target = $3;
  926. my $base = $4;
  927. my $offset = eval_expr($5);
  928. if ($offset < 0) {
  929. $line =~ s/$instr$suffix/${instr}u$suffix/;
  930. }
  931. }
  932. if ($ENV{GASPP_ARMASM64_INVERT_SCALE}) {
  933. # Instructions like fcvtzs and scvtf store the scale value
  934. # inverted in the opcode (stored as 64 - scale), but armasm64
  935. # in VS < 15.5 stores it as-is. Thus convert from
  936. # "fcvtzs w0, s0, #8" into "fcvtzs w0, s0, #56".
  937. if ($line =~ /(?:fcvtzs|scvtf)\s+(\w+)\s*,\s*(\w+)\s*,\s*#(\d+)/) {
  938. my $scale = $3;
  939. my $inverted_scale = 64 - $3;
  940. $line =~ s/#$scale/#$inverted_scale/;
  941. }
  942. }
  943. # Convert "ld1 {v0.4h-v3.4h}" into "ld1 {v0.4h,v1.4h,v2.4h,v3.4h}"
  944. if ($line =~ /(?:ld|st)\d\s+({\s*v(\d+)\.(\d[bhsdBHSD])\s*-\s*v(\d+)\.(\d[bhsdBHSD])\s*})/) {
  945. my $regspec = $1;
  946. my $reg1 = $2;
  947. my $layout1 = $3;
  948. my $reg2 = $4;
  949. my $layout2 = $5;
  950. if ($layout1 eq $layout2) {
  951. my $new_regspec = "{";
  952. foreach my $i ($reg1 .. $reg2) {
  953. $new_regspec .= "," if ($i > $reg1);
  954. $new_regspec .= "v$i.$layout1";
  955. }
  956. $new_regspec .= "}";
  957. $line =~ s/$regspec/$new_regspec/;
  958. }
  959. }
  960. }
  961. # armasm is unable to parse &0x - add spacing
  962. $line =~ s/&0x/& 0x/g;
  963. }
  964. if ($force_thumb) {
  965. # Convert register post indexing to a separate add instruction.
  966. # This converts e.g. "ldr r0, [r1], r2" into "ldr r0, [r1]",
  967. # "add r1, r1, r2".
  968. $line =~ s/((?:ldr|str)[bh]?)\s+(\w+),\s*\[(\w+)\],\s*(\w+)/$1 $2, [$3]\n\tadd $3, $3, $4/g;
  969. # Convert "mov pc, lr" into "bx lr", since the former only works
  970. # for switching from arm to thumb (and only in armv7), but not
  971. # from thumb to arm.
  972. $line =~ s/mov\s*pc\s*,\s*lr/bx lr/g;
  973. # Convert stmdb/ldmia/stmfd/ldmfd/ldm with only one register into a plain str/ldr with post-increment/decrement.
  974. # Wide thumb2 encoding requires at least two registers in register list while all other encodings support one register too.
  975. $line =~ s/stm(?:db|fd)\s+sp!\s*,\s*\{([^,-]+)\}/str $1, [sp, #-4]!/g;
  976. $line =~ s/ldm(?:ia|fd)?\s+sp!\s*,\s*\{([^,-]+)\}/ldr $1, [sp], #4/g;
  977. # Convert muls into mul+cmp
  978. $line =~ s/muls\s+(\w+),\s*(\w+)\,\s*(\w+)/mul $1, $2, $3\n\tcmp $1, #0/g;
  979. # Convert "and r0, sp, #xx" into "mov r0, sp", "and r0, r0, #xx"
  980. $line =~ s/and\s+(\w+),\s*(sp|r13)\,\s*#(\w+)/mov $1, $2\n\tand $1, $1, #$3/g;
  981. # Convert "ldr r0, [r0, r1, lsl #6]" where the shift is >3 (which
  982. # can't be handled in thumb) into "add r0, r0, r1, lsl #6",
  983. # "ldr r0, [r0]", for the special case where the same address is
  984. # used as base and target for the ldr.
  985. if ($line =~ /(ldr[bh]?)\s+(\w+),\s*\[\2,\s*(\w+),\s*lsl\s*#(\w+)\]/ and $4 > 3) {
  986. $line =~ s/(ldr[bh]?)\s+(\w+),\s*\[\2,\s*(\w+),\s*lsl\s*#(\w+)\]/add $2, $2, $3, lsl #$4\n\t$1 $2, [$2]/;
  987. }
  988. $line =~ s/\.arm/.thumb/x;
  989. }
  990. # comment out unsupported directives
  991. $line =~ s/\.type/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
  992. $line =~ s/\.func/$comm$&/x if $as_type =~ /^(apple-|clang)/;
  993. $line =~ s/\.endfunc/$comm$&/x if $as_type =~ /^(apple-|clang)/;
  994. $line =~ s/\.endfunc/ENDP/x if $as_type =~ /armasm/;
  995. $line =~ s/\.ltorg/$comm$&/x if $as_type =~ /^(apple-|clang)/;
  996. $line =~ s/\.ltorg/LTORG/x if $as_type eq "armasm";
  997. $line =~ s/\.size/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
  998. $line =~ s/\.fpu/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
  999. $line =~ s/\.arch/$comm$&/x if $as_type =~ /^(apple-|clang|armasm)/;
  1000. $line =~ s/\.object_arch/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
  1001. $line =~ s/.section\s+.note.GNU-stack.*/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
  1002. $line =~ s/\.syntax/$comm$&/x if $as_type =~ /armasm/;
  1003. $line =~ s/\.hword/.short/x;
  1004. if ($as_type =~ /^apple-/) {
  1005. # the syntax for these is a little different
  1006. $line =~ s/\.global/.globl/x;
  1007. # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
  1008. $line =~ s/(.*)\.rodata/.const_data/x;
  1009. $line =~ s/\.int/.long/x;
  1010. $line =~ s/\.float/.single/x;
  1011. }
  1012. if ($as_type eq "apple-gas") {
  1013. $line =~ s/vmrs\s+APSR_nzcv/fmrx r15/x;
  1014. }
  1015. if ($as_type eq "armasm") {
  1016. $line =~ s/\.global/EXPORT/x;
  1017. $line =~ s/\.extern/IMPORT/x;
  1018. $line =~ s/\.int/dcd/x;
  1019. $line =~ s/\.long/dcd/x;
  1020. $line =~ s/\.float/dcfs/x;
  1021. $line =~ s/\.word/dcd/x;
  1022. $line =~ s/\.short/dcw/x;
  1023. $line =~ s/\.byte/dcb/x;
  1024. $line =~ s/\.quad/dcq/x;
  1025. $line =~ s/\.ascii/dcb/x;
  1026. $line =~ s/\.asciz(.*)$/dcb\1,0/x;
  1027. $line =~ s/\.thumb/THUMB/x;
  1028. $line =~ s/\.arm/ARM/x;
  1029. # The alignment in AREA is the power of two, just as .align in gas
  1030. $line =~ s/\.text/AREA |.text|, CODE, READONLY, ALIGN=4, CODEALIGN/;
  1031. $line =~ s/(\s*)(.*)\.ro?data/$1AREA |.rdata|, DATA, READONLY, ALIGN=5/;
  1032. $line =~ s/\.data/AREA |.data|, DATA, ALIGN=5/;
  1033. }
  1034. if ($as_type eq "armasm" and $arch eq "arm") {
  1035. $line =~ s/fmxr/vmsr/;
  1036. $line =~ s/fmrx/vmrs/;
  1037. $line =~ s/fadds/vadd.f32/;
  1038. # Armasm in VS 2019 16.3 errors out on "it" instructions. But
  1039. # armasm implicitly adds the necessary it instructions anyway, so we
  1040. # can just filter them out.
  1041. $line =~ s/^\s*it[te]*\s+/$comm$&/;
  1042. }
  1043. if ($as_type eq "armasm" and $arch eq "aarch64") {
  1044. # Convert "b.eq" into "beq"
  1045. $line =~ s/\bb\.($arm_cond_codes)\b/b\1/;
  1046. }
  1047. # catch unknown section names that aren't mach-o style (with a comma)
  1048. if ($as_type =~ /apple-/ and $line =~ /.section ([^,]*)$/) {
  1049. die ".section $1 unsupported; figure out the mach-o section name and add it";
  1050. }
  1051. print ASMFILE $line;
  1052. }
  1053. if ($as_type ne "armasm") {
  1054. print ASMFILE ".text\n";
  1055. print ASMFILE ".align 2\n";
  1056. foreach my $literal (keys %literal_labels) {
  1057. print ASMFILE "$literal_labels{$literal}:\n $literal_expr $literal\n";
  1058. }
  1059. map print(ASMFILE ".thumb_func $_\n"),
  1060. grep exists $thumb_labels{$_}, keys %call_targets;
  1061. } else {
  1062. map print(ASMFILE "\tIMPORT $_\n"),
  1063. grep ! exists $labels_seen{$_}, (keys %call_targets, keys %import_symbols);
  1064. print ASMFILE "\tEND\n";
  1065. }
  1066. close(INPUT) or exit 1;
  1067. close(ASMFILE) or exit 1;
  1068. if ($as_type eq "armasm" and ! defined $ENV{GASPP_DEBUG}) {
  1069. print STDERR join(" ", @gcc_cmd)."\n" if $verbose;
  1070. system(@gcc_cmd) == 0 or die "Error running assembler";
  1071. }
  1072. END {
  1073. unlink($tempfile) if defined $tempfile;
  1074. }
  1075. #exit 1