#!/bin/bash
######################################################################
######################################################################
## Author: Adam Michael Danischewski
## GitHub: https://github.com/AdamDanischewski/g2nasm
## Created Date: 2019-10-24
## Name: g2nasm.bsh
## Version: v0.01
## Last Modified: 2019-10-24
## Issues: If you find any issues emai1 me at <my first name> (dot)
## <my last name> (at) gmail (dot) com.
##
## Requirements: nasm, xxd, gnu awk, gnu sed
##
## This script takes gas asm generated by gcc and converts it to nasm.
## Eg. $> gcc -S -o code.gasm code.c
##
## You can then convert the code.gasm to nasm with this script:
## Eg. $> g2nasm.bsh code.gasm > code.nasm
##
## Then you can compile and run your nasm on Linux:
## Eg. $> nasm -f elf64 -o code.o code.nasm
## $> gcc -no-pie -o run_code code.o
## $> ./run_code
##
## As of this initial release it works for only a subset of gas asm - over
## time this will hopefully work on all of gas asm (except prbly macros).
##
## For now this should work on simple programs.
## Caveat: *** much is still missing/unmapped ***
## Get the lastest on GitHub: https://github.com/AdamDanischewski/g2nasm
## Pull requests invited.
##
## Tested on:
## #include <stdio.h>
##
## /* function to show bytes in memory, from location start to start+n*/
## void show_mem_rep(char *start, int n)
## {
## int i;
## for (i = 0; i < n; i++)
## printf(" %.2x", start[i]);
## printf("\n");
## }
##
## /*Main function to call above function for 0x01234567*/
## int main()
## {
## int i = 0x01234567;
## show_mem_rep((char *)&i, sizeof(i));
## getchar();
## return 0;
## }
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
##
## Released under Creative Commons License: CC BY-SA 4.0
## https://creativecommons.org/licenses/by-sa/4.0/
######################################################################
######################################################################
_=${BASH_ARGC} ### Cause BASH_ARGV to be populated, for options_handler.
declare FILE="${1}"
declare -a OPTIONS="" ## Used by: options_handler ()
declare WORKING_COPY=""
declare -a GLOBALS
declare GLOBAL_OFFSET_TABLE_FLAG=0
declare -r GLOBAL_OFFSET_TABLE="extern _GLOBAL_OFFSET_TABLE_"
declare -a RO_BLOCKS
declare RO_ALIGN=""
function usage() {
cat << EOF
Usage: ${0##*/} [-h] <file>
<file>: Gas asm file.
gcc -S -o code.gasm code.c ## file=code.gasm
OPTIONS:
-h|--help Usage
EOF
}
function main_clean() {
WORKING_COPY=$(sed -E "/.string/!{s/, /,/g;s/%//g;s/\\$//g;s/je/jz/};/[ \t]*.(file|text|type|cfi_|size|ident|section|intel_syntax)/d;s/(^[ \t]*)(sub|lea|add|cmp|xor)([ql])([ \t]*)([^,]*)(,)([ \t]*)(.*$)/\1\2\4\8, \5/;s/\b(pop|push|sub|lea)([lq])\b/\1/;/^.LF..:/d" "${FILE}")
GLOBALS[0]="$(sed -En 's/(^[ \t]*.globl)([ \t]*)([^ \t]*)(.*$)/global \3/p' <<< "${WORKING_COPY}")"
WORKING_COPY=$(sed '/^[ \t]*.globl/d' <<< "${WORKING_COPY}")
WORKING_COPY=$(sed -E '/\b[a-z]{3}q\b/s/(-[0-9]+)(\()([a-z]{3})(\))/qword [\3\1H]/' <<< "${WORKING_COPY}")
WORKING_COPY=$(sed -E '/\b[a-z]{3}l\b/s/(-[0-9]+)(\()([a-z]{3})(\))/dword [\3\1H]/' <<< "${WORKING_COPY}")
## Every offset left except lea: -12(rbp)--> dword [rbp-0CH]
WORKING_COPY=$(sed -E '/\blea\b/!s/(-[0-9]+)(\()([a-z]{3})(\))/dword [\3\1H]/' <<< "${WORKING_COPY}")
## lea mapping: -12(rbp)-->[rbp-0CH]
WORKING_COPY=$(sed -E '/\blea\b/s/(-[0-9]+)(\()([a-z]{3})(\))/[\3\1H]/' <<< "${WORKING_COPY}")
}
function load_externals() {
local call=""
local plt=""
local -i i=0
while IFS= read -r call; do
let i+=1
call=$(awk '{print $2}' <<< "${call}")
call="${call%@PLT}"
if ((! GLOBAL_OFFSET_TABLE_FLAG)); then
plt="${call##*@}"
[[ -n "${plt}" ]] && GLOBAL_OFFSET_TABLE_FLAG=1
fi
GLOBALS[${i}]="extern ${call}"
done < <(grep -P '^[ \t]*call' "${FILE}")
WORKING_COPY="$(sed 's/@PLT//g' <<< "${WORKING_COPY}")"
}
function num2hex() { printf "%02X\n" "${1}";}
function print_globals() {
local -i i=0
for((i=0;i<${#GLOBALS[@]};i++)){
printf "%11s\n" "${GLOBALS[${i}]}"
((i==0))&&echo
}
((GLOBAL_OFFSET_TABLE_FLAG)) && printf "${GLOBAL_OFFSET_TABLE}\n"
echo
}
function add_map() {
local -i line=0
local val=""
local hexval=""
## -8(rbp)-->[qd]word [rbp-8H]
}
function offset_to_hex() {
local -i line=0
local val=""
local hexval=""
while IFS= read -r line; do
val="$(sed -nE "${line}s/(^[^0-9]+)([0-9]+H)(.*$)/\2/p" <<< "${WORKING_COPY}")"
hexval=$(num2hex ${val%H})
WORKING_COPY=$(sed "${line} s/${val}/${hexval}H/" <<< "${WORKING_COPY}")
done < <(sed -nE '/^.*([dq]word|lea)[^0-9]*[0-9]+H.*$/=' <<< "${WORKING_COPY}")
}
function mov_map() {
local -i line=0
local val=""
local hexval=""
## -8(rbp)-->qword [rbp-8H]
WORKING_COPY=$(sed -E 's/(^[ \t]*)(mov)(sbl|zbl|slq|l|q)([ \t]*)([^,]*)(,)([ \t]*)(.*$)/\1\2\3\4\8, \5/' <<< "${WORKING_COPY}")
WORKING_COPY=$(sed -E '/mov[dlq]/s//mov/;s/movzbl/movzx/g;s/movsbl/movsx/g;s/movslq/movsxd/g' <<< "${WORKING_COPY}")
}
function rax_map() {
WORKING_COPY=$(sed 's/(rax)/byte [rax]/g' <<< "${WORKING_COPY}")
}
function leaq_map() {
## .LC0(rip)-->[rel ?_001]
## --------------------------------------------------------------------------->> ## Make sure it's padded to three zeroes.
WORKING_COPY="$(sed -E '/.LC[0-9]/s/(\.LC)([0-9]+)(\()([a-z]{3})(\))/[rel ?_\2]/;s/(\[rel \?_)([0-9])]/\100\2]/;s/(\[rel \?_)([0-9][0-9])]/\10\2]/' <<< "${WORKING_COPY}")"
## Increment the label number.
WORKING_COPY="$(awk '{if($0 ~ /\[rel \?_[0-9]{3}/){match($0,/([0-9]{3})/,arr);var=arr[1]+1;var=sprintf("%03d",var);gsub(/[0-9]{3}/,var, $0); print $0;}else print;}' <<< "${WORKING_COPY}")"
}
## String Arg 1 - String to convert
function convert_string_to_hex() {
xxd -ps -c1 <(echo -en "${1}\x0"|sed 's/"//g')|tr '\n' ' '|sed -r 's/.{24}/\U&\n/g;s/^[ \t]*//'|sed -r '${s/^.*$/& 00 00 00 00 00 00 00 00 00/;s/^(.{24})(.*$)/\1/;};s/ / /g;s/^.*$/\U&/'|
sed -r 's/^./db &/;s/[0-9A-F]{2}[ \t]*$/&H/;s/[0-9A-F]{2}/&H,/g;'| sed -r 's/,[ \t]*$//;s/^.*$/ &/;s/0AH[ \t]*$/00H/;s/[,H \t]*$/H/'
}
## Load Read Only Blocks
function get_ro_blocks() {
local block_start_addr=""
local block_end_addr=""
local block=""
local string=""
local ro_align=""
local -i ro_block_index=${#RO_BLOCKS[@]}
## get all read only block start addr's
while IFS= read -r block_start_addr; do
## Note: start at index 1 since that's what nasm starts it's ro block labels at
let ro_block_index+=1
## save all read only blocks
block="$(sed -En "${block_start_addr},/^[^ \t]/{/^[^ \t]/b;p}" <<< "${WORKING_COPY}")"
block_end_addr="$(($(sed -En "$((${block_start_addr}+1)),\${/^[\.a-z]/{=;q}}" <<< "${WORKING_COPY}")-1))"
WORKING_COPY="$(sed -E "${block_start_addr},${block_end_addr}s/^.*$/DELETE_THIS/" <<< "${WORKING_COPY}")"
ro_align=$(sed -rn 's/^([ \t]*.align )([0-9]+)(.*$)/align=\2/p' <<< "${block}")
[[ -n "${ro_align}" ]] && RO_ALIGN="${ro_align}" && block="$(sed -r "s/.align[ \t]*[0-9]+//"<<<"${block}")"
string=$(sed -rn 's/^([ \t]*\.string)([ \t]*)("[^"]*")(.*$)/\3/p' <<< "${block}")
if [[ -n "${string}" ]]; then
string="$(convert_string_to_hex "${string}" | tr '\n' '\001')"
block="$(sed -r "s/^[ \t]*\.string.*\$/${string}/;/^[ \t]*.(text|type|globl)/d" <<< "${block}"|tr '\001' '\n')"
fi
RO_BLOCKS[${ro_block_index}]="${block}"
done < <(sed -En '/^\.LC[0-9]+:.*$/=' <<< "${WORKING_COPY}")
WORKING_COPY="$(sed '/^DELETE_THIS/d' <<< "${WORKING_COPY}")"
}
function print_ro_blocks() {
local -i i=1
printf "SECTION .rodata %s\n\n" "${RO_ALIGN}"
for((i=1;i<=${#RO_BLOCKS[@]};i++)){
printf "?_%03.f:\n" ${i}
printf "%s\n" "${RO_BLOCKS[$i]}"
}
}
function fs_map() {
## fs:40-->qword [fs:abs 28H]
WORKING_COPY="$(sed -E '/\bfs:[0-9]+\b/s/\bfs:([0-9]+)\b/qword [fs:abs \1H]/' <<< "${WORKING_COPY}")"
}
function init() {
main_clean
load_externals
get_ro_blocks
mov_map
leaq_map
rax_map
fs_map
offset_to_hex ## Call this last.
}
function print_section_text() {
printf "SECTION .text\n\n"
}
function print_section_data() {
printf "SECTION .data\n\n"
}
function print_section_bss() {
printf "SECTION .bss\n\n"
}
function main() {
init
print_globals
print_section_text
printf "%s\n\n" "${WORKING_COPY}"
print_section_data
print_section_bss
print_ro_blocks
}
function options_handler() {
local -i i=0
local CURVAL=""
local NEXTVAL=""
((${#BASH_ARGV[@]}==0)) && usage && exit 0
for((i=$((${#BASH_ARGV[@]}-1));i>=0;i--)); do
CURVAL=${BASH_ARGV[${i}]}
NEXTVAL=${BASH_ARGV[$((${i}-1))]}
case "${CURVAL}" in
-h|-?|--help)
usage
exit 0
;;
*)
FILE="${CURVAL}"
[[ ! -e "${FILE}" ]] && printf "*** ERROR - File (\"${FILE}\") not found. \n" && exit 1
;;
esac
done
}
options_handler
main
exit 0
Friday, October 25, 2019
g2nasm - GAS ASM to NASM
Why didn't anyone else think to do this over the last 20+ years? I have no idea, but here it is. It's a Work In Progress, it's far from done but it's a decent start.
Get the latest on GitHub.
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment