Google CTF Quals 2018 - Back To Basics

C64 BASIC Reversing

General problem description

You won't find any assembly in this challenge, only C64 BASIC. Once you get the password, the flag is CTF{password}. P.S. The challenge has been tested on the VICE emulator.

We got an old .prg file, which is a C64 program file.

Parsing the file

First we tried using parsers that exist in the wild, that would parse the file for us, but that proved to be not effective, as there were not many and they didn't work very well, so we wrote our own parser. Fortunately the file format is not very difficult. Here is the parser in python:

import binascii
decode_char = [
    u'\uFFFD',u'\uFFFD',u'\uFFFD',u'\uFFFD',u'\uFFFD',u'\uF100',u'\uFFFD',u'\uFFFD',u'\uF118',u'\uF119',u'\uFFFD',u'\uFFFD',u'\uFFFD',u'\u000D',u'\u000E',u'\uFFFD',
    u'\uFFFD',u'\uF11C',u'\uF11A',u'\uF120',u'\u007F',u'\uFFFD',u'\uFFFD',u'\uFFFD',u'\uFFFD',u'\uFFFD',u'\uFFFD',u'\uFFFD',u'\uF101',u'\uF11D',u'\uF102',u'\uF103',
    u'\u0020',u'\u0021',u'\u0022',u'\u0023',u'\u0024',u'\u0025',u'\u0026',u'\u0027',u'\u0028',u'\u0029',u'\u002A',u'\u002B',u'\u002C',u'\u002D',u'\u002E',u'\u002F',
    u'\u0030',u'\u0031',u'\u0032',u'\u0033',u'\u0034',u'\u0035',u'\u0036',u'\u0037',u'\u0038',u'\u0039',u'\u003A',u'\u003B',u'\u003C',u'\u003D',u'\u003E',u'\u003F',
    u'\u0040',u'\u0061',u'\u0062',u'\u0063',u'\u0064',u'\u0065',u'\u0066',u'\u0067',u'\u0068',u'\u0069',u'\u006A',u'\u006B',u'\u006C',u'\u006D',u'\u006E',u'\u006F',
    u'\u0070',u'\u0071',u'\u0072',u'\u0073',u'\u0074',u'\u0075',u'\u0076',u'\u0077',u'\u0078',u'\u0079',u'\u007A',u'\u005B',u'\u00A3',u'\u005D',u'\u2191',u'\u2190',
    u'\u2501',u'\u0041',u'\u0042',u'\u0043',u'\u0044',u'\u0045',u'\u0046',u'\u0047',u'\u0048',u'\u0049',u'\u004A',u'\u004B',u'\u004C',u'\u004D',u'\u004E',u'\u004F',
    u'\u0050',u'\u0051',u'\u0052',u'\u0053',u'\u0054',u'\u0055',u'\u0056',u'\u0057',u'\u0058',u'\u0059',u'\u005A',u'\u253C',u'\uF12E',u'\u2502',u'\u2592',u'\uF139',
    u'\uFFFD',u'\uF104',u'\uFFFD',u'\uFFFD',u'\uFFFD',u'\uF110',u'\uF112',u'\uF114',u'\uF116',u'\uF111',u'\uF113',u'\uF115',u'\uF117',u'\u000A',u'\u000F',u'\uFFFD',
    u'\uF105',u'\uF11E',u'\uF11B',u'\u000C',u'\uF121',u'\uF106',u'\uF107',u'\uF108',u'\uF109',u'\uF10A',u'\uF10B',u'\uF10C',u'\uF10D',u'\uF11D',u'\uF10E',u'\uF10F',
    u'\u00A0',u'\u258C',u'\u2584',u'\u2594',u'\u2581',u'\u258F',u'\u2592',u'\u2595',u'\uF12F',u'\uF13A',u'\uF130',u'\u251C',u'\uF134',u'\u2514',u'\u2510',u'\u2582',
    u'\u250C',u'\u2534',u'\u252C',u'\u2524',u'\u258E',u'\u258D',u'\uF131',u'\uF132',u'\uF133',u'\u2583',u'\u2713',u'\uF135',u'\uF136',u'\u2518',u'\uF137',u'\uF138',
    u'\u2501',u'\u0041',u'\u0042',u'\u0043',u'\u0044',u'\u0045',u'\u0046',u'\u0047',u'\u0048',u'\u0049',u'\u004A',u'\u004B',u'\u004C',u'\u004D',u'\u004E',u'\u004F',
    u'\u0050',u'\u0051',u'\u0052',u'\u0053',u'\u0054',u'\u0055',u'\u0056',u'\u0057',u'\u0058',u'\u0059',u'\u005A',u'\u253C',u'\uF12E',u'\u2502',u'\u2592',u'\uF139',
    u'\u00A0',u'\u258C',u'\u2584',u'\u2594',u'\u2581',u'\u258F',u'\u2592',u'\u2595',u'\uF12F',u'\uF13A',u'\uF130',u'\u251C',u'\uF134',u'\u2514',u'\u2510',u'\u2582',
    u'\u250C',u'\u2534',u'\u252C',u'\u2524',u'\u258E',u'\u258D',u'\uF131',u'\uF132',u'\uF133',u'\u2583',u'\u2713',u'\uF135',u'\uF136',u'\u2518',u'\uF137',u'\u2592'
    ]

token = ["END", "FOR", "NEXT", "DATA", "INPUT#", "INPUT", "DIM", "READ", 
        "LET", "GOTO", "RUN", "IF", "RESTORE", "GOSUB", "RETURN", "REM",
        "STOP", "ON", "WAIT", "LOAD", "SAVE", "VERIFY", "DEF", "POKE",
        "PRINT#", "PRINT", "CONT", "LIST", "CLR", "CMD", "SYS", "OPEN",
        "CLOSE", "GET", "NEW", "TAB(", "TO", "FN", "SPC(", "THEN",
        "NOT", "STEP", "+", "-", "*", "/", "power", "AND",
        "OR", ">", "=", "<", "SGN", "INT", "ABS", "USR",
        "FRE", "POS", "SQR", "RND", "LOG", "EXP", "COS", "SIN",
        "TAN", "ATN", "PEEK", "LEN", "STR$", "VAL", "ASC", "CHR$",
        "LEFT$", "RIGHT$", "MID$", "GO"]


ptr = 0
def parsecommand(f):
    global ptr
    s = ""
    while True:
        c = f.read(1)
        ptr += 1
        if len(c) == 0:
            return s
        byte = ord(c)
        if byte >= 0x80 and byte <= 0xCB:
            s = s + token[byte - 0x80]
            continue
        elif byte == 0x00:
            return s
        s = s + decode_char[byte]

with open("crackme.prg", "rb") as f:
    ptr = int(binascii.hexlify(f.read(2)[::-1]), 16)
    print("Mapping Offset %x" % ptr)
    while True:
        nextoffset = f.read(2)[::-1]
        if len(nextoffset) < 2:
            break
        print ("%x" % ptr) + " -> " + binascii.hexlify(nextoffset)
        ptr += 2
        label = f.read(2)
        if len(label) < 2:
            break
        lab = int(binascii.hexlify(label[::-1]), 16)
        ptr += 2
        print str(lab) + " " + parsecommand(f)

Patching the binary

We looked at the code and saw that this command:

2001 POKE 03397, 00069 : POKE 03398, 00013

Considering that the binary is mapped at memory position 0x801, this command actually modifies the program memory. There is also a second such command:

2001 POKE 03397, 00069 : POKE 03398, 00013

These two do not do anything except manipulate the next-pointer of a comment.

The next few pairs of commands, that we can recognize as important are of the form:

2004 ES = 03741 : EE = 04981 : EK = 148
2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I

They loop over program memory and change it. We Wrote a script, that patches these memory locations, so we can parse the decoded memory. The code for this was:

def patchbinary(f1, f2, fr, to, l):
    with open(f1, "rb") as f:
        data = f.read()
    offset = data[:2]
    ptr = int(binascii.hexlify(offset[::-1]), 16)
    fr -= ptr
    to -= ptr
    data = data[2:]
    pre = data[:fr]
    topatch = data[fr:to]
    post = data[to:]
    res = ""
    for c in topatch:
        res = res + l(c)
    with open(f2, "wb") as file2:
        file2.write(offset + pre + res + post)

#2001 POKE 03397, 00069 : POKE 03398, 00013
patchbinary("crackme.prg", "crackme2.prg", 3397, 3398, lambda x: chr(199))
patchbinary("crackme2.prg", "crackme2.prg", 3398, 3399, lambda x: chr(13))
#2001 POKE 03397, 00069 : POKE 03398, 00013
patchbinary("crackme2.prg", "crackme2.prg", 3397, 3398, lambda x: chr(69))
patchbinary("crackme2.prg", "crackme2.prg", 3398, 3399, lambda x: chr(13))
#2004 ES = 03741 : EE = 04981 : EK = 148
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 3741, 4982, lambda x: chr((ord(x) + 148) & 0xff))
#2004 ES = 05363 : EE = 06632 : EK = 152
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 5363, 6633, lambda x: chr((ord(x) + 152) & 0xff))
#2004 ES = 07014 : EE = 08219 : EK = 165
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 7014, 8220, lambda x: chr((ord(x) + 165) & 0xff))
#2004 ES = 08601 : EE = 09868 : EK = 184
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 8601, 9869, lambda x: chr((ord(x) + 184) & 0xff))
#2004 ES = 10250 : EE = 11483 : EK = 199
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 10250, 11484, lambda x: chr((ord(x) + 199) & 0xff))
#2004 ES = 11865 : EE = 13107 : EK = 240
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 11865, 13108, lambda x: chr((ord(x) + 240) & 0xff))
#2004 ES = 13489 : EE = 14760 : EK = 249
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 13489, 14761, lambda x: chr((ord(x) + 249) & 0xff))
#2004 ES = 15142 : EE = 16351 : EK = 132
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 15142, 16352, lambda x: chr((ord(x) + 132) & 0xff))
#2004 ES = 16733 : EE = 17975 : EK = 186
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 16733, 17976, lambda x: chr((ord(x) + 186) & 0xff))
#2004 ES = 18360 : EE = 19633 : EK = 214
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 18360, 19634, lambda x: chr((ord(x) + 214) & 0xff))
#2004 ES = 20020 : EE = 21265 : EK = 245
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 20020, 21266, lambda x: chr((ord(x) + 245) & 0xff))
#2004 ES = 21652 : EE = 22923 : EK = 203
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 21652, 22924, lambda x: chr((ord(x) + 203) & 0xff))
#2004 ES = 23310 : EE = 24584 : EK = 223
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 23310, 24585, lambda x: chr((ord(x) + 223) & 0xff))
#2004 ES = 24971 : EE = 26178 : EK = 237
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 24971, 26179, lambda x: chr((ord(x) + 237) & 0xff))
#2004 ES = 26565 : EE = 27837 : EK = 192
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 26565, 27838, lambda x: chr((ord(x) + 192) & 0xff))
#2004 ES = 28224 : EE = 29471 : EK = 157
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 28224, 29472, lambda x: chr((ord(x) + 157) & 0xff))
#2004 ES = 29858 : EE = 31101 : EK = 158
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 29858, 31102, lambda x: chr((ord(x) + 158) & 0xff))
#2004 ES = 31488 : EE = 32761 : EK = 235
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 31488, 32762, lambda x: chr((ord(x) + 235) & 0xff))
#2004 ES = 33148 : EE = 34367 : EK = 143
#2005 FOR I = ES TO EE : K = ( PEEK(I) + EK ) AND 255 : POKE I, K : NEXT I
patchbinary("crackme2.prg", "crackme2.prg", 33148, 34368, lambda x: chr((ord(x) + 143) & 0xff))

All decoded sections have the form of:

2010 v = 0.6666666666612316235641 - 0.00000000023283064365386962890625 : g = 0
2020 ba = ASC( MID$(p$, 1, 1) )
2021 bb = ASC( MID$(p$, 2, 1) )
2025 p0 = 0:p1 = 0:p2 = 0:p3 = 0:p4 = 0:p5 = 0:p6 = 0:p7 = 0:p8 = 0:p9 = 0:pa = 0:pb = 0:pc = 0
2030 IF ba AND 1 THEN p0 = 0.062500000001818989403545856475830078125
2031 IF ba AND 2 THEN p1 = 0.0156250000004547473508864641189575195312
2032 IF ba AND 4 THEN p2 = 0.0039062500001136868377216160297393798828
2033 IF ba AND 8 THEN p3 = 0.0009765625000284217094304040074348449707
2034 IF ba AND 16 THEN p4 = 0.0002441406250071054273576010018587112427
2035 IF ba AND 32 THEN p5 = 0.0000610351562517763568394002504646778107
2036 IF ba AND 64 THEN p6 = 0.0000152587890629440892098500626161694527
2037 IF ba AND 128 THEN p7 = 0.0000038146972657360223024625156540423632
2040 IF bb AND 1 THEN p8 = 0.0000009536743164340055756156289135105908
2031 IF bb AND 2 THEN p9 = 0.0000002384185791085013939039072283776477
2032 IF bb AND 4 THEN pa = 0.0000000596046447771253484759768070944119
2033 IF bb AND 8 THEN pb = 0.000000014901161194281337118994201773603
2034 IF bb AND 16 THEN pc = 0.0000000037252902985703342797485504434007
2050 k = v + p0 + p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9 + pa + pb + pc
2060 g = 0.671565706376017
2100 t0 = k = g : a = 86 : b = 10
2200 IF t0 = -1 THEN a = 83 : b = 5
2210 POKE 1024 + chkoff + 1, 90

ASC( MID$(p$, 1, 1) ) selects the first char and converts it into a number. From what we can gather the code adds values to v depending if certain bits are set in the converted chars of the input.

Then there is a compare with value g. THe problem here is, that the values can never add up to g no mather the combination. At first this was confusing, but computers when working with floating point may not be accurate. So what we tried to do was to be as accurate as possible and just go with that.

When working with the values we did not actually use floating point numbers, but python integers. They do not have a size limit we need to worry about, so we converted the floating point numbers to integers like this: 0.6666666666612316235641 -> int("6666666666612316235641".ljust(50, '0')). So we append 0s on the right side of the number until they have a length of 50. This means all our numbers are strings of length 50, which we can now transform to integers. This means we do not loose precision.

Now we need to choose the best values for p0 - pc so that the sum + v best matches g. We used this algorithm to solve the problem:

def solve_eq(koeff, v):
    sol = []
    for k in koeff:
        if abs(v - k) > v:
            sol.append(0)
        else:
            v -= k
            sol.append(1)
    print v
    return sol

When going through all decoded sections we can see that the only values, that diverge from the first block are v and g. We extracted all values and solved all bits of the solution:

arr = []
p = [int("062500000001818989403545856475830078125".ljust(50, '0')),
    int("0156250000004547473508864641189575195312".ljust(50, '0')),
    int("0039062500001136868377216160297393798828".ljust(50, '0')),
    int("0009765625000284217094304040074348449707".ljust(50, '0')),
    int("0002441406250071054273576010018587112427".ljust(50, '0')),
    int("0000610351562517763568394002504646778107".ljust(50, '0')),
    int("0000152587890629440892098500626161694527".ljust(50, '0')),
    int("0000038146972657360223024625156540423632".ljust(50, '0')),
    int("0000009536743164340055756156289135105908".ljust(50, '0')),
    int("0000002384185791085013939039072283776477".ljust(50, '0')),
    int("0000000596046447771253484759768070944119".ljust(50, '0')),
    int("000000014901161194281337118994201773603".ljust(50, '0')),
    int("0000000037252902985703342797485504434007".ljust(50, '0'))]

v = int("6666666666612316235641".ljust(50, '0')) - int("00000000023283064365386962890625".ljust(50, '0'))
k = int("671565706376017".ljust(50, '0'))
arr.extend(solve_eq(p, k - v))

k = int("682612358126820".ljust(50, '0'))
arr.extend(solve_eq(p, k - v))

v = int("6666666666612316235641".ljust(50, '0'))
k = int("682552023325146".ljust(50, '0'))
arr.extend(solve_eq(p, k - v))

v = int("6666666666612316235641".ljust(50, '0')) - int("00000000023283064365386962890625".ljust(50, '0'))
k = int("667647300753773".ljust(50, '0'))
arr.extend(solve_eq(p, k - v))

v = int("6666666666612316235641".ljust(50, '0'))
k = int("682310803327740".ljust(50, '0'))
arr.extend(solve_eq(p, k - v))

...

def frombits(bits):
    chars = []
    for b in range(len(bits) / 8):
        byte = bits[b*8:(b+1)*8]
        chars.append(chr(int(''.join([str(bit) for bit in byte]), 2)))
    return ''.join(chars)
s = ""
for i in range(0, len(arr), 8):
    s = s + frombits(arr[i:i + 8][::-1])
print 

Executing this program gives us LINKED-LISTS-AND-40-BIT-FLOATS which makes the flag CTF{LINKED-LISTS-AND-40-BIT-FLOATS}


Navigation