Skip to content

Instantly share code, notes, and snippets.

@junquera
Last active December 17, 2018 20:39
Show Gist options
  • Save junquera/ebc6d30895ca1334696cd6ea5e801ddf to your computer and use it in GitHub Desktop.
Save junquera/ebc6d30895ca1334696cd6ea5e801ddf to your computer and use it in GitHub Desktop.
Análisis estadístico de texto cifrado por sustitución monoalfabética
l = ord('Z') - ord('A') + 1
def inverso(x, m):
for i in range(m):
r = (x * i) % m
if r == 1:
return i
return 0
def encode_affine(t, a, b):
res = ""
for i in t:
d = chr(ord('A') + (((a * (ord(i) - ord('A'))) + b) % l))
res += d
return res
def decode_affine(t, a, b):
a = inverso(a, l)
res = ""
for i in t:
d = chr(ord('A') + ((a * ((ord(i) - ord('A')) - b)) % l))
res += d
return res
def affine_break(m1, m2, c1, c2):
# (a * m1) + b = c1
# (a * m2) + b = c2
# b = c2 - (a * m2)
# (a * m1) + c2 -(a * m2) = c1
# a * (m1 - m2) = c1 - c2
# ---
# a = (c1 - c2) * inv(m1 - m2)
# b = c2 - (a * m2)
m1 = ord(m1) - ord('A')
m2 = ord(m2) - ord('A')
c1 = ord(c1) - ord('A')
c2 = ord(c2) - ord('A')
a = ((c1 - c2) * inverso((m1 - m2), l)) % l
b = (c2 - (a * m2)) % l
return dict(a=a, b=b)
#!/usr/bin/env python3
t = '''
VAQG  JYHN  QGTR  YNOQ  AEXM  TRZR  QQAU  QAQQ  XQZQ  YUSA  QEFM  QZXM  OUGP  MP
'''
i = ord('A')
f = ord('Z')
r = (f + 1) - i
l = list("".join(t.split()))
def print_four(n, s):
aux = s
res = ""
while len(aux):
res += aux[:4]
res += " "
aux = aux[4:]
print("[%02d] %s" % (n, res))
def cesar(n, l):
res = []
for y in l:
c = chr(((ord(y) - i + n) % r) + i)
res.append(c)
return "".join(res)
if __name__ == '__main__':
print
for x in range(r)[:int(r/2) + 1]:
pos = cesar(-x, l) # []
neg = cesar(x, l)
print_four(x, pos)
print_four(r-x, neg)
#!/usr/bin/env python
import sys
from math import log
def entropy(b):
l = len(b)
count = {}
for i in b:
if i in count:
count[i] += 1
else:
count[i] = 1
result = 0
for v in count:
f = float(count[v])/l
result -= (f * log(f, 2))
return result
if __name__ == '__main__':
with open(sys.argv[1]) as f:
b = f.read()
result = entropy(b)
print(result)
# Porcentaje
# print(result/8)
[ ANÁLISIS ESTADÍSTICO DE TEXTO ]
[ CIFRADO POR SUSTITUCIÓN MONOALFABÉTICA ]
Javier Junquera Sánchez <[email protected]>
Fases:
1. Análisis de entropía
2. Análisis de aparición de caracteres
3. Análisis de caracteres repetidos juntos (RR, LL)
4. Análisis de dupletas repetidas
5. Generación de posibles traducciones basado en estadísticas de idioma
6. Uso de estadísticas para cifrado afín
7. Fuerza bruta afín, césar y atbash
----------
[*] Mensaje:
CDSFRXTHYDJDZHUDYFYDGXZYFRHNYDYDQXZRXQFCESXEXZHGXYFUHTHCDQRHDPCDHJECFJFQGDRHQYFJFYHYDZYFZFTNSHYDYGHFQFNQFMFRGXDRNJNCDGHUXLNFQDYHFYHZRNGFGHFQFFQNQHJEDRGXFQFCSFRXSGFYFCXZYFSFRAXZYFCHQYHUHYNXZFCCDJDDLNFZFYHZRNGDZHRDYDJFYHYDYFRXQGSXCFZRXSSFRGDFMHRDWPNQDUHXCDRHQQFRFZDSHDPESXEXSRHXQDCYFCXZYFSFRAXZLNFQNFZGSDZXRHFYDYRXQZHYFSDMNQYDJFQGDCFZPLNFESFRHZDQYFNQDYFMFQZDRFSSDYD
[*] Entropía: 3.911532
[*] Estadísticas:
{'F': 54, 'D': 41, 'Y': 34, 'H': 31, 'X': 25, 'R': 25, 'Q': 25, 'Z': 23, 'S': 18, 'N': 17, 'C': 16, 'G': 15, 'J': 9, 'E': 7, 'U': 5, 'P': 4, 'M': 4, 'L': 4, 'T': 3, 'A': 2, 'W': 1}
[*] Repetidos (RR, LL):
{'F': [140], 'C': [190], 'D': [194], 'S': [233, 358], 'Q': [256]}
[*] Parejas repetidas:
{'CD': [0, 55, 62, 115, 191, 252], 'DS': [1, 262], 'SF': [2, 156, 170, 234, 286, 337], 'FR': [3, 24, 106, 157, 171, 221, 235, 258, 287, 338], 'RX': [4, 35, 158, 222, 231, 310], 'XT': [5], 'TH': [6, 53], 'HY': [7, 82, 93, 184, 217, 314], 'YD': [8, 18, 28, 30, 83, 94, 212, 218, 307, 322, 361], 'DJ': [9, 192, 213, 323], 'JD': [10, 193], 'DZ': [11, 84, 207, 301], 'ZH': [12, 45, 208, 313], 'HU': [13, 118, 182], 'UD': [14], 'DY': [15, 29, 95, 125, 211, 219, 308, 348, 360], 'YF': [16, 23, 49, 77, 86, 163, 168, 176, 220, 279, 284, 315, 344, 349], 'FY': [17, 80, 128, 162, 200, 215, 306], 'DG': [19, 116], 'GX': [20, 47, 108, 150], 'XZ': [21, 33, 44, 166, 174, 187, 282, 290], 'ZY': [22, 85, 167, 175, 283], 'RH': [25, 58, 74, 254, 273, 304, 339], 'HN': [26], 'NY': [27], 'DQ': [31, 56, 342], 'QX': [32], 'ZR': [34, 131, 203, 230], 'XQ': [36, 223, 275, 311], 'QF': [37, 100, 103, 139, 153, 257], 'FC': [38, 154, 164, 177, 189, 280], 'CE': [39], 'ES': [40, 267, 336], 'SX': [41, 226, 268], 'XE': [42, 269], 'EX': [43, 270], 'HG': [46], 'XY': [48], 'FU': [50], 'UH': [51, 183, 249], 'HT': [52], 'HC': [54], 'QR': [57], 'HD': [59, 264], 'DP': [60, 265], 'PC': [61], 'DH': [63], 'HJ': [64, 145], 'JE': [65, 146], 'EC': [66], 'CF': [67, 228, 329], 'FJ': [68, 78], 'JF': [69, 79, 214, 324], 'FQ': [70, 99, 123, 138, 141, 152, 294, 325, 352], 'QG': [71, 224, 326], 'GD': [72, 206, 237, 327], 'DR': [73, 110, 148, 253, 355], 'HQ': [75, 179, 255], 'QY': [76, 180, 321, 343], 'YH': [81, 126, 129, 181, 201, 216], 'FZ': [87, 198, 229, 260, 297, 330], 'ZF': [88, 188, 199], 'FT': [89], 'TN': [90], 'NS': [91], 'SH': [92, 263], 'YG': [96], 'GH': [97, 117, 136], 'HF': [98, 127, 137, 305], 'FN': [101, 345], 'NQ': [102, 143, 246, 320, 346], 'FM': [104, 239, 350], 'MF': [105, 351], 'RG': [107, 149, 236], 'XD': [109], 'RN': [111, 132, 204], 'NJ': [112], 'JN': [113], 'NC': [114], 'UX': [119], 'XL': [120], 'LN': [121, 196, 292, 333], 'NF': [122, 197, 293, 296, 334], 'QD': [124, 247, 276, 347], 'HZ': [130, 202, 340], 'NG': [133, 205], 'GF': [134, 161], 'FG': [135], 'FF': [140], 'QN': [142, 295], 'QH': [144], 'ED': [147], 'XF': [151], 'CS': [155], 'XS': [159, 232, 271], 'SG': [160], 'CX': [165, 281], 'FS': [169, 285, 316, 357], 'RA': [172, 288], 'AX': [173, 289], 'CH': [178], 'YN': [185], 'NX': [186], 'CC': [190], 'DD': [194], 'DL': [195], 'HR': [209, 241], 'RD': [210, 242], 'GS': [225, 299], 'XC': [227, 251], 'SS': [233, 358], 'DF': [238], 'MH': [240], 'DW': [243], 'WP': [244], 'PN': [245], 'DU': [248], 'HX': [250, 274], 'QQ': [256], 'RF': [259, 356], 'ZD': [261, 341, 354], 'PE': [266], 'SR': [272], 'DC': [277, 328], 'CY': [278], 'ZL': [291], 'ZG': [298], 'SD': [300, 317, 359], 'ZX': [302], 'XR': [303], 'YR': [309], 'QZ': [312, 353], 'DM': [318], 'MN': [319], 'ZP': [331], 'PL': [332], 'FE': [335]}
[*] Matriz:
C D S F R X T H Y D J D Z H U D Y F Y D
G X Z Y F R H N Y D Y D Q X Z R X Q F C
E S X E X Z H G X Y F U H T H C D Q R H
D P C D H J E C F J F Q G D R H Q Y F J
F Y H Y D Z Y F Z F T N S H Y D Y G H F
Q F N Q F M F R G X D R N J N C D G H U
X L N F Q D Y H F Y H Z R N G F G H F Q
F F Q N Q H J E D R G X F Q F C S F R X
S G F Y F C X Z Y F S F R A X Z Y F C H
Q Y H U H Y N X Z F C C D J D D L N F Z
F Y H Z R N G D Z H R D Y D J F Y H Y D
Y F R X Q G S X C F Z R X S S F R G D F
M H R D W P N Q D U H X C D R H Q Q F R
F Z D S H D P E S X E X S R H X Q D C Y
F C X Z Y F S F R A X Z L N F Q N F Z G
S D Z X R H F Y D Y R X Q Z H Y F S D M
N Q Y D J F Q G D C F Z P L N F E S F R
H Z D Q Y F N Q D Y F M F Q Z D R F S S
D Y D
[*] Traducciones
{'F': ['L', 'E', 'R'], 'D': ['L', 'R', 'A'], 'Y': ['O'], 'H': ['S'], 'X': ['N'], 'R': ['R'], 'Q': ['L', 'R'], 'Z': ['D'], 'S': ['L', 'R', 'I'], 'N': ['U'], 'C': ['T', 'L', 'R'], 'G': ['C'], 'J': ['M'], 'E': ['P'], 'U': ['Q'], 'P': ['Y'], 'M': ['B'], 'L': ['H'], 'T': ['V'], 'A': ['G'], 'W': ['J']}
[*] Claves posibles: 162
-[CODEBOOK BY CRYPTOANALYSIS]
[18] [{'F': 'E', 'D': 'A', 'Y': 'O', 'H': 'S', 'X': 'N', 'R': 'R', 'Q': 'R', 'Z': 'D', 'S': 'R', 'N': 'U', 'C': 'L', 'G': 'C', 'J': 'M', 'E': 'P', 'U': 'Q', 'P': 'Y', 'M': 'B', 'L': 'H', 'T': 'V', 'A': 'G', 'W': 'J'}] LARERNVSOAMADSQAOEOACNDOERSUOAOARNDRNRELPRNPNDSCNOEQSVSLARRSAYLASMPLEMERCARSROEMEOSOADOEDEVURSOAOCSEREUREBERCNARUMULACSQNHUERAOSEOSDRUCECSEREERURSMPARCNERELRERNRCEOELNDOERERGNDOELSROSQSOUNDELLAMAAHUEDEOSDRUCADSRAOAMEOSOAOERNRCRNLEDRNRRERCAEBSRAJYURAQSNLARSRREREDARSAYPRNPNRRSNRALOELNDOERERGNDHUERUEDCRADNRSEOAORNRDSOERABUROAMERCALEDYHUEPRERSDAROEURAOEBERDARERRAOA
[18] [{'F': 'E', 'D': 'A', 'Y': 'O', 'H': 'S', 'X': 'N', 'R': 'R', 'Q': 'L', 'Z': 'D', 'S': 'R', 'N': 'U', 'C': 'L', 'G': 'C', 'J': 'M', 'E': 'P', 'U': 'Q', 'P': 'Y', 'M': 'B', 'L': 'H', 'T': 'V', 'A': 'G', 'W': 'J'}] LARERNVSOAMADSQAOEOACNDOERSUOAOALNDRNLELPRNPNDSCNOEQSVSLALRSAYLASMPLEMELCARSLOEMEOSOADOEDEVURSOAOCSELEULEBERCNARUMULACSQNHUELAOSEOSDRUCECSELEELULSMPARCNELELRERNRCEOELNDOERERGNDOELSLOSQSOUNDELLAMAAHUEDEOSDRUCADSRAOAMEOSOAOERNLCRNLEDRNRRERCAEBSRAJYULAQSNLARSLLEREDARSAYPRNPNRRSNLALOELNDOERERGNDHUELUEDCRADNRSEOAORNLDSOERABULOAMELCALEDYHUEPRERSDALOEULAOEBELDARERRAOA
[18] [{'F': 'E', 'D': 'A', 'Y': 'O', 'H': 'S', 'X': 'N', 'R': 'R', 'Q': 'R', 'Z': 'D', 'S': 'L', 'N': 'U', 'C': 'L', 'G': 'C', 'J': 'M', 'E': 'P', 'U': 'Q', 'P': 'Y', 'M': 'B', 'L': 'H', 'T': 'V', 'A': 'G', 'W': 'J'}] LALERNVSOAMADSQAOEOACNDOERSUOAOARNDRNRELPLNPNDSCNOEQSVSLARRSAYLASMPLEMERCARSROEMEOSOADOEDEVULSOAOCSEREUREBERCNARUMULACSQNHUERAOSEOSDRUCECSEREERURSMPARCNERELLERNLCEOELNDOELERGNDOELSROSQSOUNDELLAMAAHUEDEOSDRUCADSRAOAMEOSOAOERNRCLNLEDRNLLERCAEBSRAJYURAQSNLARSRREREDALSAYPLNPNLRSNRALOELNDOELERGNDHUERUEDCLADNRSEOAORNRDSOELABUROAMERCALEDYHUEPLERSDAROEURAOEBERDARELLAOA
-[AFFINE BY CRYPTOANALYSIS]
[{'a': 7, 'b': 3}] LARECOGIDAMASIVADEDATOSDECIUDADANOSCONELPROPOSITODEVIGILANCIAYLAIMPLEMENTACINDEMEDIDASDESEGURIDADTIENEUNEFECTOACUMULATIVOQUENADIEDISCUTETIENEENUNIMPACTOENELRECORTEDELOSDERECHOSDELINDIVIDUOSELLAMAAQUESEDISCUTASICADAMEDIDADECONTROLESCORRECTAEFICAZYUNAVIOLACINNECESARIAYPROPORCIONALDELOSDERECHOSQUENUESTRASOCIEDADCONSIDERAFUNDAMENTALESYQUEPRECISANDEUNADEFENSACERRADA
-[AFFINE BRUTEFORCE]
[{'a': 7, 'b': 3}] LARECOGIDAMASIVADEDATOSDECIUDADANOSCONELPROPOSITODEVIGILANCIAYLAIMPLEMENTACINDEMEDIDASDESEGURIDADTIENEUNEFECTOACUMULATIVOQUENADIEDISCUTETIENEENUNIMPACTOENELRECORTEDELOSDERECHOSDELINDIVIDUOSELLAMAAQUESEDISCUTASICADAMEDIDADECONTROLESCORRECTAEFICAZYUNAVIOLACINNECESARIAYPROPORCIONALDELOSDERECHOSQUENUESTRASOCIEDADCONSIDERAFUNDAMENTALESYQUEPRECISANDEUNADEFENSACERRADA
[{'a': 5, 'b': 13}] DYBOGCWEXYUYSERYXOXYJCSXOGEAXYXYLCSGCLODTBCTCSEJCXOREWEDYLGEYQDYEUTDOUOLJYGELXOUOXEXYSXOSOWABEXYXJEOLOALOFOGJCYGAUADYJERCKAOLYXEOXESGAJOJEOLOOLALEUTYGJCOLODBOGCBJOXODCSXOBOGNCSXODELXEREXACSODDYUYYKAOSOXESGAJYSEGYXYUOXEXYXOGCLJBCDOSGCBBOGJYOFEGYHQALYRECDYGELLOGOSYBEYQTBCTCBGECLYDXODCSXOBOGNCSKAOLAOSJBYSCGEOXYXGCLSEXOBYFALXYUOLJYDOSQKAOTBOGESYLXOALYXOFOLSYGOBBYXY
[{'a': 9, 'b': 3}] XATGQIWMLASAOMZALGLAJIOLGQMELALANIOQINGXDTIDIOMJILGZMWMXANQMAKXAMSDXGSGNJAQMNLGSGLMLAOLGOGWETMLALJMGNGENGBGQJIAQESEXAJMZIYEGNALMGLMOQEJGJMGNGGNENMSDAQJIGNGXTGQITJGLGXIOLGTGQRIOLGXMNLMZMLEIOGXXASAAYEGOGLMOQEJAOMQALASGLMLALGQINJTIXGOQITTGQJAGBMQAFKENAZMIXAQMNNGQGOATMAKDTIDITQMINAXLGXIOLGTGQRIOYEGNEGOJTAOIQMGLALQINOMLGTABENLASGNJAXGOKYEGDTGQMOANLGENALGBGNOAQGTTALA
-[CESAR BRUTEFORCE]
[{'rot': 13}] PQFSEKGULQWQMUHQLSLQTKMLSEUALQLQDKMEKDSPRFKRKMUTKLSHUGUPQDEUQCPQUWRPSWSDTQEUDLSWSLULQMLSMSGAFULQLTUSDSADSZSETKQEAWAPQTUHKYASDQLUSLUMEATSTUSDSSDADUWRQETKSDSPFSEKFTSLSPKMLSFSENKMLSPUDLUHULAKMSPPQWQQYASMSLUMEATQMUEQLQWSLULQLSEKDTFKPSMEKFFSETQSZUEQJCADQHUKPQEUDDSESMQFUQCRFKRKFEUKDQPLSPKMLSFSENKMYASDASMTFQMKEUSLQLEKDMULSFQZADLQWSDTQPSMCYASRFSEUMQDLSADQLSZSDMQESFFQLQ
[{'rot': 11}] NODQCIESJOUOKSFOJQJORIKJQCSYJOJOBIKCIBQNPDIPIKSRIJQFSESNOBCSOANOSUPNQUQBROCSBJQUQJSJOKJQKQEYDSJOJRSQBQYBQXQCRIOCYUYNORSFIWYQBOJSQJSKCYRQRSQBQQBYBSUPOCRIQBQNDQCIDRQJQNIKJQDQCLIKJQNSBJSFSJYIKQNNOUOOWYQKQJSKCYROKSCOJOUQJSJOJQCIBRDINQKCIDDQCROQXSCOHAYBOFSINOCSBBQCQKODSOAPDIPIDCSIBONJQNIKJQDQCLIKWYQBYQKRDOKICSQJOJCIBKSJQDOXYBJOUQBRONQKAWYQPDQCSKOBJQYBOJQXQBKOCQDDOJO
[{'rot': 7}] JKZMYEAOFKQKGOBKFMFKNEGFMYOUFKFKXEGYEXMJLZELEGONEFMBOAOJKXYOKWJKOQLJMQMXNKYOXFMQMFOFKGFMGMAUZOFKFNOMXMUXMTMYNEKYUQUJKNOBESUMXKFOMFOGYUNMNOMXMMXUXOQLKYNEMXMJZMYEZNMFMJEGFMZMYHEGFMJOXFOBOFUEGMJJKQKKSUMGMFOGYUNKGOYKFKQMFOFKFMYEXNZEJMGYEZZMYNKMTOYKDWUXKBOEJKYOXXMYMGKZOKWLZELEZYOEXKJFMJEGFMZMYHEGSUMXUMGNZKGEYOMFKFYEXGOFMZKTUXFKQMXNKJMGWSUMLZMYOGKXFMUXKFMTMXGKYMZZKFK
-[ATBASH]
XWHUICGSBWQWASFWBUBWTCABUISMBWBWJCAICJUXVHCVCASTCBUFSGSXWJISWKXWSQVXUQUJTWISJBUQUBSBWABUAUGMHSBWBTSUJUMJUNUITCWIMQMXWTSFCOMUJWBSUBSAIMTUTSUJUUJMJSQVWITCUJUXHUICHTUBUXCABUHUIZCABUXSJBSFSBMCAUXXWQWWOMUAUBSAIMTWASIWBWQUBSBWBUICJTHCXUAICHHUITWUNSIWDKMJWFSCXWISJJUIUAWHSWKVHCVCHISCJWXBUXCABUHUIZCAOMUJMUATHWACISUBWBICJASBUHWNMJBWQUJTWXUAKOMUVHUISAWJBUMJWBUNUJAWIUHHWBW
#!/bin/env python3
'''
[ ANÁLISIS ESTADÍSTICO DE TEXTO ]
[ CIFRADO POR SUSTITUCIÓN MONOALFABÉTICA ]
Javier Junquera Sánchez <[email protected]>
Fases:
1. Análisis de entropía
2. Análisis de aparición de caracteres
3. Análisis de caracteres repetidos juntos (RR, LL)
4. Análisis de dupletas repetidas
5. Generación de posibles traducciones basado en estadísticas de idioma
6. Uso de estadísticas para cifrado afín
7. Fuerza bruta afín, césar y atbash
'''
import re
from affine import decode_affine, affine_break
# BASADO EN EL QUIJOTE
estadisticas_castellano = {'A': 0.1203914356874436, 'B': 0.015130070791165134, 'C': 0.03719184102245509, 'D': 0.05437026585244542, 'E': 0.1386395633977034, 'F': 0.004934755557097172, 'G': 0.01092558720618012, 'H': 0.012658047516610746, 'I': 0.048772133465719646, 'J': 0.006582358140488471, 'K': 8.238012916956494e-05, 'L': 0.05551801306561237, 'M': 0.02788784162203377, 'N': 0.06781866708951004, 'O': 0.09582667220822723, 'P': 0.022245112473652295, 'Q': 0.02013110209127842, 'R': 0.06325617061234451, 'S': 0.07834783863652835, 'T': 0.03919621769909202, 'U': 0.048757887277968516, 'V': 0.011134324826707514, 'W': 0.00016847665514377192, 'X': 0.00025209558324821755, 'Y': 0.015759380650084517, 'Z': 0.004021760742090114}
estadisticas_castellano_esp = {' ': 1, 'A': 0.1203914356874436, 'B': 0.015130070791165134, 'C': 0.03719184102245509, 'D': 0.05437026585244542, 'E': 0.1386395633977034, 'F': 0.004934755557097172, 'G': 0.01092558720618012, 'H': 0.012658047516610746, 'I': 0.048772133465719646, 'J': 0.006582358140488471, 'K': 8.238012916956494e-05, 'L': 0.05551801306561237, 'M': 0.02788784162203377, 'N': 0.06781866708951004, 'O': 0.09582667220822723, 'P': 0.022245112473652295, 'Q': 0.02013110209127842, 'R': 0.06325617061234451, 'S': 0.07834783863652835, 'T': 0.03919621769909202, 'U': 0.048757887277968516, 'V': 0.011134324826707514, 'W': 0.00016847665514377192, 'X': 0.00025209558324821755, 'Y': 0.015759380650084517, 'Z': 0.004021760742090114}
# estadisticas_castellano = estadisticas_castellano_esp
# BASADO EN EL SEÑOR DE LOS ANILLO
estadisticas_ingles = {'A': 0.0831295411584481, 'B': 0.01738984496652256, 'C': 0.017155136950865266, 'D': 0.052443360917834525, 'E': 0.12285071813081565, 'F': 0.024581248091420035, 'G': 0.02449291711778557, 'H': 0.06476679361089449, 'I': 0.06393143497452282, 'J': 0.0006511254627912083, 'K': 0.009077900347519288, 'L': 0.04530117076396197, 'M': 0.022928197013403595, 'N': 0.06837322107728455, 'O': 0.0779066568745473, 'P': 0.013705181494913397, 'Q': 0.00058677003914324, 'R': 0.05933696247447866, 'S': 0.05987073392944122, 'T': 0.08989443186779629, 'U': 0.02559074493295679, 'V': 0.008983260018625218, 'W': 0.026715072040218356, 'X': 0.000682672239089232, 'Y': 0.019290222770715505, 'Z': 0.00036468073400515346}
with open('t.txt') as f:
t = f.read().replace(' ', '').replace('\n', '')
KEYWORDS = ['CRIP', 'CUANDO', 'EN', 'HA', 'HABIA', 'ERA', 'CONTRA', 'DESDE', 'SOLO', 'PERO', 'LE', 'SI', 'ESTA', 'AHORA', 'ALLI', 'SE', 'SEGUN', 'ANTE', 'SER', 'EL', 'POR', 'PARA', 'TAMBIEN', 'TODO', 'SUS', 'PORQUE', 'AQUI', 'YA', 'HACIA', 'A', 'CON', 'HAN', 'DEL', 'Y', 'AL', 'COMO', 'HASTA', 'QUE', 'O', 'UN', 'BAJO', 'LO', 'MAS', 'SU', 'LOS', 'SIN', 'NO', 'PUEDE', 'DOS', 'ENTRE', 'SOBRE', 'CIFR', 'MI', 'FUE', 'MUY', 'TRAS', 'LAS', 'LA', 'ES', 'SON', 'VEZ', 'ME', 'CABE', 'YO', 'HAY', 'ESTE', 'UNA', 'AÑOS', 'DE']
KEYWORDS_EN = list(set(['THE', 'IS', 'AND', 'WHO', 'HE', 'IT', 'WHERE', 'CRYPT', 'A']))
class Descifrado():
def __init__(self, text, key):
self.text = text
self.key = key
def __str__(self):
return "[%s] %s" % (self.key, self.text)
MAX = 5
def analiza_repetidos(t,i=2):
t = re.sub(r'[^A-Z]', '', t.upper())
res = {}
for j in range(len(t)):
ocur = t[j:j+i]
if len(ocur) < i:
continue
last = t[j:].find(ocur)
while last >= 0:
if ocur in res:
res[ocur].append(j+last)
else:
res[ocur] = [j+last]
n = t[j+last+1:].find(ocur)
if n >= 0:
last += n + 1
else:
last = -1
for x in res:
val = list(set(res[x]))
val.sort()
res[x] = val
return res
def calcula_distancias(v):
dist = []
for x in range(len(v))[1:]:
dist.append(v[x] - v[x-1])
return dist
def divisores(x):
res = []
for i in range(int(x/2))[2:]:
if x % i == 0:
res.append(i)
return res
def analisis_estadistico(txt):
txt = re.sub(r'[^A-Z]', '', txt.upper())
d = {chr(c + ord('A')): 0 for c in range(ord('Z') - ord('A') + 1)}
for x in txt:
if x in d:
d[x] += 1
else:
d[x] = 1
return {x: float(d[x])/len(txt) for x in d}
def gen_keys(cs, n, key):
if len(key) == len(cs):
return [key]
keys = []
for c in cs[n]:
keys += gen_keys(cs, n+1, key + c)
return keys
from math import gcd
def comun_divisor(values):
res = values[0]
for v in values[1:]:
aux = gcd(res, v)
if aux <= 2:
return res
res = aux
return res
def repetidos(t):
res = {}
for x in range(len(t) - 1):
if t[x] == t[x+1]:
c = t[x:x+1]
if c in res:
res[c].append(x)
else:
res[c] = [x]
return res
def representa_matrix(t, n=10):
res = ""
for i in range(len(t)):
if i % n == 0 and i > 0:
res += "\n"
res += "%c " % t[i]
return res
def translate(t, translations, n=1):
res = ""
while len(t):
c = t[:n]
t = t[n:]
res += translations.get(c, " ")
return res
def gen_translations(translations, translation):
translation_aux = {k: translation[k] for k in translation}
if len(translations) == 0:
r = translate(t, translation, n=n_gram)
trans_res.append(Descifrado(r, translation))
return
ts = translations.keys()
for i in ts:
ts_values = translations[i]
for j in ts_values:
# print(it)
# input(ts_values)
# input(j)
translation_aux[i] = j
# input(translations)
next = {k: translations[k] for k in translations if k!=i}
# input(next)
gen_translations(next, translation_aux)
break
english = False
def most_able(candidates, KEYWORDS=[]):
res = {}
for c in candidates:
kws = 0
for keyword in KEYWORDS:
if keyword in c.text:
kws += 1
if english:
for keyword in KEYWORDS_EN:
if keyword in c.text:
kws += 1
res[c] = kws
return {x[0]: x[1] for x in sorted(res.items(), key=lambda x: (x[1],x[0].text))[::-1]}
from entro import entropy
# Entropía del texto
e = entropy(t)
# Análisis estadístico de los caracteres del texto
multigram = False
if multigram:
print("ANÁLISIS POLIALFABÉTICO")
n_gram = int(input("Ngram > "))
else:
print("- ANÁLISIS MONOALFABÉTICO:")
n_gram = 1
ar = analiza_repetidos(t, i=n_gram)
d = {x: len(ar[x]) for x in ar}
estadisticas = {x[0]: x[1] for x in sorted(d.items(), key=lambda x: (x[1],x[0]))[::-1]}
conv_estadisticas_es = {x[0]: x[1] for x in sorted(estadisticas_castellano.items(), key=lambda x: (x[1],x[0]))[::-1]}
conv_estadisticas_en = {x[0]: x[1] for x in sorted(estadisticas_ingles.items(), key=lambda x: (x[1],x[0]))[::-1]}
r_std = [v for i, v in enumerate(estadisticas)]
r_mix_es = [v for i, v in enumerate(conv_estadisticas_es)]
r_mix_en = [v for i, v in enumerate(conv_estadisticas_en)]
# Tabla de posibles traducciones
translations = {r_std[i]: [r_mix_es[i]] for i in range(min(len(r_std), len(r_mix_es)))}
if english:
for i in range(min(len(r_std), len(r_mix_en))):
c = r_mix_en[i]
tr = r_std[i]
if c in translations:
translations[tr].append(c)
else:
translations[tr] = [c]
# Caracteres iguales juntos pueden ser LL, RR
rep = repetidos(t)
add_rep = True and (n_gram == 1)
if add_rep:
for d in rep:
translations[d].append('L')
translations[d].append('R')
# Caracteres diferentes repetidos juntos pueden ser CH
dup = analiza_repetidos(t, i=2)
add_dup = False and (n_gram == 1)
if add_dup:
for r in dup:
translations[r[0]].append('C')
translations[r[1]].append('H')
# Representación matriz
mat = representa_matrix(t, n = 20)
def unifica(translations):
for x in translations:
translations[x] = list(set(translations[x]))
return translations
translations = unifica(translations)
print()
print("[*] Mensaje:")
print(t)
print()
print("[*] Entropía: %f" % e)
print()
print("[*] Estadísticas:")
print(estadisticas)
print()
print("[*] Repetidos (RR, LL):")
print(rep)
print()
print("[*] Parejas repetidas:")
print(dup)
print()
print("[*] Matriz:")
print(mat)
print()
print("[*] Traducciones")
print(translations)
claves = 1
for i in translations:
v = translations[i]
claves *= len(v) if len(v) > 0 else 1
print()
print("[*] Claves posibles: %d" % claves)
# Resultados de la traducción
trans_res = []
gen_translations(translations, {})
'''
AFFINE ANALYSIS
C = ord('C') - ord('A')
D = ord('D') - ord('A')
L = ord('L') - ord('A')
A = ord('A') - ord('A')
# (a * L) + b = C
# (a * A) + b = D
# b = D - (a * A)
# (a * L) + D - (a * A) = C
# a * (L - A) = C - D
# a = (C - D) * (L - A)**(-1)
a = ( inverso(L - A, l) * (C - D) ) % l
# a = 7
b = ( D - (a * A) )
# b = 3
'''
stat = most_able(trans_res, KEYWORDS=KEYWORDS)
affine_values = dict(a=[], b=[])
print()
print('-[CODEBOOK BY CRYPTOANALYSIS]')
n_best = 3
for k in stat:
if n_best:
m = k.text
print("[%d] %s" % (stat[k], k))
broken = affine_break(m[0], m[1], t[0], t[1])
affine_values['a'].append(broken['a'])
affine_values['b'].append(broken['b'])
else:
break
n_best -= 1
affine_values['a'] = list(set(affine_values['a']))
affine_values['b'] = list(set(affine_values['b']))
affines = []
for a in affine_values['a']:
for b in affine_values['b']:
aux = decode_affine(t, a, b)
affines.append(Descifrado(aux, broken))
print()
print('-[AFFINE BY CRYPTOANALYSIS]')
n_best = 3
for x in most_able(affines, KEYWORDS=KEYWORDS):
if n_best:
print("%s" % x)
else:
break
n_best -= 1
others = True
if others:
'''
AFFINE ANALYSIS MANUAL
C = ord('C') - ord('A')
D = ord('D') - ord('A')
L = ord('L') - ord('A')
A = ord('A') - ord('A')
# (a * L) + b = C
# (a * A) + b = D
# b = D - (a * A)
# (a * L) + D - (a * A) = C
# a * (L - A) = C - D
# a = (C - D) * (L - A)**(-1)
a = ( inverso(L - A, l) * (C - D) ) % l
# a = 7
b = ( D - (a * A) )
# b = 3
'''
print()
print("-[AFFINE BRUTEFORCE]")
affines = []
for i in range(26):
for j in range(26):
aux = decode_affine(t, i, j)
# input("%d %d %s" % (i, j, aux))
affines.append(Descifrado(aux, dict(a=i, b=j)))
n_best = 3
for x in most_able(affines, KEYWORDS=KEYWORDS):
if n_best:
print("%s" % x)
else:
break
n_best -= 1
# CESAR ANALYSIS
from cesar import cesar
print()
print("-[CESAR BRUTEFORCE]")
cesars = []
for i in range(25):
cesars.append(Descifrado(cesar(i, t), dict(rot=i)))
stat = most_able(cesars, KEYWORDS=KEYWORDS)
n_best = 3
for k in stat:
if n_best:
print("%s" % (k))
else:
break
n_best -= 1
print()
print("-[ATBASH]")
letras = [chr(x + ord('A')) for x in range(ord('Z') - ord('A') + 1)]
atbash_code = letras[::-1]
def atbash(t):
res = ""
for x in t:
res += atbash_code[letras.index(x)]
return res
print("%s" % atbash(t))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment