Friday, October 25, 2019

g2nasm - GAS ASM to NASM

Why didn't anyone else think to do this over the last 20+ years? I have no idea, but here it is. It's a Work In Progress, it's far from done but it's a decent start.
#!/bin/bash 
  ######################################################################
 ######################################################################
## Author: Adam Michael Danischewski 
## GitHub: https://github.com/AdamDanischewski/g2nasm
## Created Date: 2019-10-24
## Name: g2nasm.bsh 
## Version: v0.01
## Last Modified: 2019-10-24
## Issues: If you find any issues emai1 me at <my first name> (dot) 
##         <my last name> (at) gmail (dot) com. 
##
## Requirements: nasm, xxd, gnu awk, gnu sed
## 
## This script takes gas asm generated by gcc and converts it to nasm. 
## Eg. $> gcc -S -o code.gasm code.c 
## 
## You can then convert the code.gasm to nasm with this script: 
## Eg. $> g2nasm.bsh code.gasm > code.nasm 
##
## Then you can compile and run your nasm on Linux: 
## Eg. $> nasm -f elf64 -o code.o code.nasm 
##     $> gcc -no-pie -o run_code code.o 
##     $> ./run_code 
##
## As of this initial release it works for only a subset of gas asm - over 
## time this will hopefully work on all of gas asm (except prbly macros). 
## 
## For now this should work on simple programs. 
## Caveat: *** much is still missing/unmapped *** 
## Get the lastest on GitHub: https://github.com/AdamDanischewski/g2nasm 
## Pull requests invited. 
## 
## Tested on: 
## #include <stdio.h>
##
## /* function to show bytes in memory, from location start to start+n*/
## void show_mem_rep(char *start, int n)
## {
##     int i;
##     for (i = 0; i < n; i++)
##       printf(" %.2x", start[i]);
##     printf("\n");
## }
##   
## /*Main function to call above function for 0x01234567*/
## int main()
## {
##   int i = 0x01234567;
##   show_mem_rep((char *)&i, sizeof(i));
##   getchar();
##   return 0;
## }
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
##
## Released under Creative Commons License: CC BY-SA 4.0
## https://creativecommons.org/licenses/by-sa/4.0/
 ######################################################################
  ######################################################################
_=${BASH_ARGC} ### Cause BASH_ARGV to be populated, for options_handler. 

declare    FILE="${1}"
declare -a OPTIONS="" ## Used by: options_handler ()
declare    WORKING_COPY=""
declare -a GLOBALS 
declare    GLOBAL_OFFSET_TABLE_FLAG=0 
declare -r GLOBAL_OFFSET_TABLE="extern _GLOBAL_OFFSET_TABLE_" 
declare -a RO_BLOCKS
declare    RO_ALIGN="" 

function usage() {
cat << EOF 
Usage: ${0##*/} [-h] <file> 

 <file>: Gas asm file. 
         gcc -S -o code.gasm code.c ## file=code.gasm

 OPTIONS:
         -h|--help           Usage 
EOF
} 

function main_clean() { 
 WORKING_COPY=$(sed -E "/.string/!{s/, /,/g;s/%//g;s/\\$//g;s/je/jz/};/[ \t]*.(file|text|type|cfi_|size|ident|section|intel_syntax)/d;s/(^[ \t]*)(sub|lea|add|cmp|xor)([ql])([ \t]*)([^,]*)(,)([ \t]*)(.*$)/\1\2\4\8, \5/;s/\b(pop|push|sub|lea)([lq])\b/\1/;/^.LF..:/d" "${FILE}")
 GLOBALS[0]="$(sed -En 's/(^[ \t]*.globl)([ \t]*)([^ \t]*)(.*$)/global \3/p' <<< "${WORKING_COPY}")"
 WORKING_COPY=$(sed '/^[ \t]*.globl/d' <<< "${WORKING_COPY}")
 WORKING_COPY=$(sed -E '/\b[a-z]{3}q\b/s/(-[0-9]+)(\()([a-z]{3})(\))/qword [\3\1H]/' <<< "${WORKING_COPY}")
 WORKING_COPY=$(sed -E '/\b[a-z]{3}l\b/s/(-[0-9]+)(\()([a-z]{3})(\))/dword [\3\1H]/' <<< "${WORKING_COPY}")
 ## Every offset left except lea: -12(rbp)--> dword [rbp-0CH]
 WORKING_COPY=$(sed -E '/\blea\b/!s/(-[0-9]+)(\()([a-z]{3})(\))/dword [\3\1H]/' <<< "${WORKING_COPY}")
 ## lea mapping: -12(rbp)-->[rbp-0CH]
 WORKING_COPY=$(sed -E '/\blea\b/s/(-[0-9]+)(\()([a-z]{3})(\))/[\3\1H]/' <<< "${WORKING_COPY}")
}

function load_externals() { 
 local call="" 
 local plt="" 
 local -i i=0 
 while IFS= read -r call; do 
  let i+=1 
  call=$(awk '{print $2}' <<< "${call}")
  call="${call%@PLT}" 
  if ((! GLOBAL_OFFSET_TABLE_FLAG)); then 
   plt="${call##*@}"
   [[ -n "${plt}" ]] && GLOBAL_OFFSET_TABLE_FLAG=1
  fi 
  GLOBALS[${i}]="extern ${call}" 
 done < <(grep -P '^[ \t]*call' "${FILE}")
 WORKING_COPY="$(sed 's/@PLT//g' <<< "${WORKING_COPY}")"   
}

function num2hex() { printf "%02X\n" "${1}";}

function print_globals() { 
 local -i i=0
 for((i=0;i<${#GLOBALS[@]};i++)){ 
  printf "%11s\n" "${GLOBALS[${i}]}"
  ((i==0))&&echo
 }
 ((GLOBAL_OFFSET_TABLE_FLAG)) && printf "${GLOBAL_OFFSET_TABLE}\n"
 echo
}

function add_map() { 
 local -i line=0 
 local    val="" 
 local hexval="" 
 ## -8(rbp)-->[qd]word [rbp-8H]
}

function offset_to_hex() { 
 local -i line=0 
 local    val="" 
 local hexval="" 
 while IFS= read -r line; do 
  val="$(sed -nE "${line}s/(^[^0-9]+)([0-9]+H)(.*$)/\2/p" <<< "${WORKING_COPY}")"
  hexval=$(num2hex ${val%H})
  WORKING_COPY=$(sed "${line} s/${val}/${hexval}H/" <<< "${WORKING_COPY}")
 done < <(sed -nE '/^.*([dq]word|lea)[^0-9]*[0-9]+H.*$/=' <<< "${WORKING_COPY}") 
}

function mov_map() { 
 local -i line=0 
 local    val="" 
 local hexval="" 
 ## -8(rbp)-->qword [rbp-8H]
 WORKING_COPY=$(sed -E 's/(^[ \t]*)(mov)(sbl|zbl|slq|l|q)([ \t]*)([^,]*)(,)([ \t]*)(.*$)/\1\2\3\4\8, \5/' <<< "${WORKING_COPY}")
 WORKING_COPY=$(sed -E '/mov[dlq]/s//mov/;s/movzbl/movzx/g;s/movsbl/movsx/g;s/movslq/movsxd/g' <<< "${WORKING_COPY}")
}

function rax_map() { 
 WORKING_COPY=$(sed 's/(rax)/byte [rax]/g' <<< "${WORKING_COPY}")  
}

function leaq_map() { 
 ## .LC0(rip)-->[rel ?_001]
 ## --------------------------------------------------------------------------->> ## Make sure it's padded to three zeroes. 
 WORKING_COPY="$(sed -E '/.LC[0-9]/s/(\.LC)([0-9]+)(\()([a-z]{3})(\))/[rel ?_\2]/;s/(\[rel \?_)([0-9])]/\100\2]/;s/(\[rel \?_)([0-9][0-9])]/\10\2]/' <<< "${WORKING_COPY}")"
 ## Increment the label number. 
 WORKING_COPY="$(awk '{if($0 ~ /\[rel \?_[0-9]{3}/){match($0,/([0-9]{3})/,arr);var=arr[1]+1;var=sprintf("%03d",var);gsub(/[0-9]{3}/,var, $0); print $0;}else print;}' <<< "${WORKING_COPY}")"
}

## String Arg 1 - String to convert 
function convert_string_to_hex() {
 xxd -ps -c1 <(echo -en "${1}\x0"|sed 's/"//g')|tr '\n' ' '|sed -r 's/.{24}/\U&\n/g;s/^[ \t]*//'|sed -r '${s/^.*$/& 00 00 00 00 00 00 00 00 00/;s/^(.{24})(.*$)/\1/;};s/  / /g;s/^.*$/\U&/'| 
 sed -r 's/^./db &/;s/[0-9A-F]{2}[ \t]*$/&H/;s/[0-9A-F]{2}/&H,/g;'| sed -r 's/,[ \t]*$//;s/^.*$/        &/;s/0AH[ \t]*$/00H/;s/[,H \t]*$/H/'
}

## Load Read Only Blocks 
function get_ro_blocks() { 
 local block_start_addr=""
 local block_end_addr=""
 local block="" 
 local string="" 
 local ro_align="" 
 local -i ro_block_index=${#RO_BLOCKS[@]}
 ## get all read only block start addr's         
 while IFS= read -r block_start_addr; do 
  ## Note: start at index 1 since that's what nasm starts it's ro block labels at 
  let ro_block_index+=1
  ## save all read only blocks
  block="$(sed -En "${block_start_addr},/^[^ \t]/{/^[^ \t]/b;p}" <<< "${WORKING_COPY}")"
  block_end_addr="$(($(sed -En "$((${block_start_addr}+1)),\${/^[\.a-z]/{=;q}}" <<< "${WORKING_COPY}")-1))"
  WORKING_COPY="$(sed -E "${block_start_addr},${block_end_addr}s/^.*$/DELETE_THIS/" <<< "${WORKING_COPY}")"
  ro_align=$(sed -rn 's/^([ \t]*.align )([0-9]+)(.*$)/align=\2/p' <<< "${block}") 
  [[ -n "${ro_align}" ]] && RO_ALIGN="${ro_align}" && block="$(sed -r "s/.align[ \t]*[0-9]+//"<<<"${block}")" 
  string=$(sed -rn 's/^([ \t]*\.string)([ \t]*)("[^"]*")(.*$)/\3/p' <<< "${block}") 
  if [[ -n "${string}" ]]; then 
   string="$(convert_string_to_hex "${string}" | tr '\n' '\001')"
   block="$(sed -r "s/^[ \t]*\.string.*\$/${string}/;/^[ \t]*.(text|type|globl)/d" <<< "${block}"|tr '\001' '\n')" 
  fi  
  RO_BLOCKS[${ro_block_index}]="${block}"
 done < <(sed -En '/^\.LC[0-9]+:.*$/=' <<< "${WORKING_COPY}") 
 WORKING_COPY="$(sed '/^DELETE_THIS/d' <<< "${WORKING_COPY}")"
}

function print_ro_blocks() { 
 local -i i=1 
 printf "SECTION .rodata %s\n\n" "${RO_ALIGN}"
 for((i=1;i<=${#RO_BLOCKS[@]};i++)){ 
  printf "?_%03.f:\n" ${i}
  printf "%s\n" "${RO_BLOCKS[$i]}"
 }
}

function fs_map() { 
 ## fs:40-->qword [fs:abs 28H] 
WORKING_COPY="$(sed -E '/\bfs:[0-9]+\b/s/\bfs:([0-9]+)\b/qword [fs:abs \1H]/' <<< "${WORKING_COPY}")"
}

function init() { 
 main_clean
 load_externals
 get_ro_blocks
 mov_map
 leaq_map
 rax_map
 fs_map
 offset_to_hex ## Call this last. 
}

function print_section_text() { 
 printf "SECTION .text\n\n"
}

function print_section_data() { 
 printf "SECTION .data\n\n"
}

function print_section_bss() { 
 printf "SECTION .bss\n\n"
}

function main() { 
 init
 print_globals
 print_section_text
 printf "%s\n\n" "${WORKING_COPY}"
 print_section_data
 print_section_bss
 print_ro_blocks
}

function options_handler() { 
 local -i i=0
 local CURVAL=""
 local NEXTVAL=""
 ((${#BASH_ARGV[@]}==0)) && usage && exit 0 
 for((i=$((${#BASH_ARGV[@]}-1));i>=0;i--)); do 
  CURVAL=${BASH_ARGV[${i}]}
  NEXTVAL=${BASH_ARGV[$((${i}-1))]}
  case "${CURVAL}" in
      -h|-?|--help)
      usage 
      exit 0 
      ;; 
      *) 
      FILE="${CURVAL}"
      [[ ! -e "${FILE}" ]] && printf "*** ERROR - File (\"${FILE}\") not found. \n" && exit 1
      ;;
  esac
 done 
}

options_handler
main 
exit 0
Get the latest on GitHub.

No comments:

Post a Comment