Last active
December 17, 2018 20:39
-
-
Save junquera/ebc6d30895ca1334696cd6ea5e801ddf to your computer and use it in GitHub Desktop.
Análisis estadístico de texto cifrado por sustitución monoalfabética
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
l = ord('Z') - ord('A') + 1 | |
def inverso(x, m): | |
for i in range(m): | |
r = (x * i) % m | |
if r == 1: | |
return i | |
return 0 | |
def encode_affine(t, a, b): | |
res = "" | |
for i in t: | |
d = chr(ord('A') + (((a * (ord(i) - ord('A'))) + b) % l)) | |
res += d | |
return res | |
def decode_affine(t, a, b): | |
a = inverso(a, l) | |
res = "" | |
for i in t: | |
d = chr(ord('A') + ((a * ((ord(i) - ord('A')) - b)) % l)) | |
res += d | |
return res | |
def affine_break(m1, m2, c1, c2): | |
# (a * m1) + b = c1 | |
# (a * m2) + b = c2 | |
# b = c2 - (a * m2) | |
# (a * m1) + c2 -(a * m2) = c1 | |
# a * (m1 - m2) = c1 - c2 | |
# --- | |
# a = (c1 - c2) * inv(m1 - m2) | |
# b = c2 - (a * m2) | |
m1 = ord(m1) - ord('A') | |
m2 = ord(m2) - ord('A') | |
c1 = ord(c1) - ord('A') | |
c2 = ord(c2) - ord('A') | |
a = ((c1 - c2) * inverso((m1 - m2), l)) % l | |
b = (c2 - (a * m2)) % l | |
return dict(a=a, b=b) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
t = ''' | |
VAQG JYHN QGTR YNOQ AEXM TRZR QQAU QAQQ XQZQ YUSA QEFM QZXM OUGP MP | |
''' | |
i = ord('A') | |
f = ord('Z') | |
r = (f + 1) - i | |
l = list("".join(t.split())) | |
def print_four(n, s): | |
aux = s | |
res = "" | |
while len(aux): | |
res += aux[:4] | |
res += " " | |
aux = aux[4:] | |
print("[%02d] %s" % (n, res)) | |
def cesar(n, l): | |
res = [] | |
for y in l: | |
c = chr(((ord(y) - i + n) % r) + i) | |
res.append(c) | |
return "".join(res) | |
if __name__ == '__main__': | |
for x in range(r)[:int(r/2) + 1]: | |
pos = cesar(-x, l) # [] | |
neg = cesar(x, l) | |
print_four(x, pos) | |
print_four(r-x, neg) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
from math import log | |
def entropy(b): | |
l = len(b) | |
count = {} | |
for i in b: | |
if i in count: | |
count[i] += 1 | |
else: | |
count[i] = 1 | |
result = 0 | |
for v in count: | |
f = float(count[v])/l | |
result -= (f * log(f, 2)) | |
return result | |
if __name__ == '__main__': | |
with open(sys.argv[1]) as f: | |
b = f.read() | |
result = entropy(b) | |
print(result) | |
# Porcentaje | |
# print(result/8) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ ANÁLISIS ESTADÍSTICO DE TEXTO ] | |
[ CIFRADO POR SUSTITUCIÓN MONOALFABÉTICA ] | |
Javier Junquera Sánchez <[email protected]> | |
Fases: | |
1. Análisis de entropía | |
2. Análisis de aparición de caracteres | |
3. Análisis de caracteres repetidos juntos (RR, LL) | |
4. Análisis de dupletas repetidas | |
5. Generación de posibles traducciones basado en estadísticas de idioma | |
6. Uso de estadísticas para cifrado afín | |
7. Fuerza bruta afín, césar y atbash | |
---------- | |
[*] Mensaje: | |
CDSFRXTHYDJDZHUDYFYDGXZYFRHNYDYDQXZRXQFCESXEXZHGXYFUHTHCDQRHDPCDHJECFJFQGDRHQYFJFYHYDZYFZFTNSHYDYGHFQFNQFMFRGXDRNJNCDGHUXLNFQDYHFYHZRNGFGHFQFFQNQHJEDRGXFQFCSFRXSGFYFCXZYFSFRAXZYFCHQYHUHYNXZFCCDJDDLNFZFYHZRNGDZHRDYDJFYHYDYFRXQGSXCFZRXSSFRGDFMHRDWPNQDUHXCDRHQQFRFZDSHDPESXEXSRHXQDCYFCXZYFSFRAXZLNFQNFZGSDZXRHFYDYRXQZHYFSDMNQYDJFQGDCFZPLNFESFRHZDQYFNQDYFMFQZDRFSSDYD | |
[*] Entropía: 3.911532 | |
[*] Estadísticas: | |
{'F': 54, 'D': 41, 'Y': 34, 'H': 31, 'X': 25, 'R': 25, 'Q': 25, 'Z': 23, 'S': 18, 'N': 17, 'C': 16, 'G': 15, 'J': 9, 'E': 7, 'U': 5, 'P': 4, 'M': 4, 'L': 4, 'T': 3, 'A': 2, 'W': 1} | |
[*] Repetidos (RR, LL): | |
{'F': [140], 'C': [190], 'D': [194], 'S': [233, 358], 'Q': [256]} | |
[*] Parejas repetidas: | |
{'CD': [0, 55, 62, 115, 191, 252], 'DS': [1, 262], 'SF': [2, 156, 170, 234, 286, 337], 'FR': [3, 24, 106, 157, 171, 221, 235, 258, 287, 338], 'RX': [4, 35, 158, 222, 231, 310], 'XT': [5], 'TH': [6, 53], 'HY': [7, 82, 93, 184, 217, 314], 'YD': [8, 18, 28, 30, 83, 94, 212, 218, 307, 322, 361], 'DJ': [9, 192, 213, 323], 'JD': [10, 193], 'DZ': [11, 84, 207, 301], 'ZH': [12, 45, 208, 313], 'HU': [13, 118, 182], 'UD': [14], 'DY': [15, 29, 95, 125, 211, 219, 308, 348, 360], 'YF': [16, 23, 49, 77, 86, 163, 168, 176, 220, 279, 284, 315, 344, 349], 'FY': [17, 80, 128, 162, 200, 215, 306], 'DG': [19, 116], 'GX': [20, 47, 108, 150], 'XZ': [21, 33, 44, 166, 174, 187, 282, 290], 'ZY': [22, 85, 167, 175, 283], 'RH': [25, 58, 74, 254, 273, 304, 339], 'HN': [26], 'NY': [27], 'DQ': [31, 56, 342], 'QX': [32], 'ZR': [34, 131, 203, 230], 'XQ': [36, 223, 275, 311], 'QF': [37, 100, 103, 139, 153, 257], 'FC': [38, 154, 164, 177, 189, 280], 'CE': [39], 'ES': [40, 267, 336], 'SX': [41, 226, 268], 'XE': [42, 269], 'EX': [43, 270], 'HG': [46], 'XY': [48], 'FU': [50], 'UH': [51, 183, 249], 'HT': [52], 'HC': [54], 'QR': [57], 'HD': [59, 264], 'DP': [60, 265], 'PC': [61], 'DH': [63], 'HJ': [64, 145], 'JE': [65, 146], 'EC': [66], 'CF': [67, 228, 329], 'FJ': [68, 78], 'JF': [69, 79, 214, 324], 'FQ': [70, 99, 123, 138, 141, 152, 294, 325, 352], 'QG': [71, 224, 326], 'GD': [72, 206, 237, 327], 'DR': [73, 110, 148, 253, 355], 'HQ': [75, 179, 255], 'QY': [76, 180, 321, 343], 'YH': [81, 126, 129, 181, 201, 216], 'FZ': [87, 198, 229, 260, 297, 330], 'ZF': [88, 188, 199], 'FT': [89], 'TN': [90], 'NS': [91], 'SH': [92, 263], 'YG': [96], 'GH': [97, 117, 136], 'HF': [98, 127, 137, 305], 'FN': [101, 345], 'NQ': [102, 143, 246, 320, 346], 'FM': [104, 239, 350], 'MF': [105, 351], 'RG': [107, 149, 236], 'XD': [109], 'RN': [111, 132, 204], 'NJ': [112], 'JN': [113], 'NC': [114], 'UX': [119], 'XL': [120], 'LN': [121, 196, 292, 333], 'NF': [122, 197, 293, 296, 334], 'QD': [124, 247, 276, 347], 'HZ': [130, 202, 340], 'NG': [133, 205], 'GF': [134, 161], 'FG': [135], 'FF': [140], 'QN': [142, 295], 'QH': [144], 'ED': [147], 'XF': [151], 'CS': [155], 'XS': [159, 232, 271], 'SG': [160], 'CX': [165, 281], 'FS': [169, 285, 316, 357], 'RA': [172, 288], 'AX': [173, 289], 'CH': [178], 'YN': [185], 'NX': [186], 'CC': [190], 'DD': [194], 'DL': [195], 'HR': [209, 241], 'RD': [210, 242], 'GS': [225, 299], 'XC': [227, 251], 'SS': [233, 358], 'DF': [238], 'MH': [240], 'DW': [243], 'WP': [244], 'PN': [245], 'DU': [248], 'HX': [250, 274], 'QQ': [256], 'RF': [259, 356], 'ZD': [261, 341, 354], 'PE': [266], 'SR': [272], 'DC': [277, 328], 'CY': [278], 'ZL': [291], 'ZG': [298], 'SD': [300, 317, 359], 'ZX': [302], 'XR': [303], 'YR': [309], 'QZ': [312, 353], 'DM': [318], 'MN': [319], 'ZP': [331], 'PL': [332], 'FE': [335]} | |
[*] Matriz: | |
C D S F R X T H Y D J D Z H U D Y F Y D | |
G X Z Y F R H N Y D Y D Q X Z R X Q F C | |
E S X E X Z H G X Y F U H T H C D Q R H | |
D P C D H J E C F J F Q G D R H Q Y F J | |
F Y H Y D Z Y F Z F T N S H Y D Y G H F | |
Q F N Q F M F R G X D R N J N C D G H U | |
X L N F Q D Y H F Y H Z R N G F G H F Q | |
F F Q N Q H J E D R G X F Q F C S F R X | |
S G F Y F C X Z Y F S F R A X Z Y F C H | |
Q Y H U H Y N X Z F C C D J D D L N F Z | |
F Y H Z R N G D Z H R D Y D J F Y H Y D | |
Y F R X Q G S X C F Z R X S S F R G D F | |
M H R D W P N Q D U H X C D R H Q Q F R | |
F Z D S H D P E S X E X S R H X Q D C Y | |
F C X Z Y F S F R A X Z L N F Q N F Z G | |
S D Z X R H F Y D Y R X Q Z H Y F S D M | |
N Q Y D J F Q G D C F Z P L N F E S F R | |
H Z D Q Y F N Q D Y F M F Q Z D R F S S | |
D Y D | |
[*] Traducciones | |
{'F': ['L', 'E', 'R'], 'D': ['L', 'R', 'A'], 'Y': ['O'], 'H': ['S'], 'X': ['N'], 'R': ['R'], 'Q': ['L', 'R'], 'Z': ['D'], 'S': ['L', 'R', 'I'], 'N': ['U'], 'C': ['T', 'L', 'R'], 'G': ['C'], 'J': ['M'], 'E': ['P'], 'U': ['Q'], 'P': ['Y'], 'M': ['B'], 'L': ['H'], 'T': ['V'], 'A': ['G'], 'W': ['J']} | |
[*] Claves posibles: 162 | |
-[CODEBOOK BY CRYPTOANALYSIS] | |
[18] [{'F': 'E', 'D': 'A', 'Y': 'O', 'H': 'S', 'X': 'N', 'R': 'R', 'Q': 'R', 'Z': 'D', 'S': 'R', 'N': 'U', 'C': 'L', 'G': 'C', 'J': 'M', 'E': 'P', 'U': 'Q', 'P': 'Y', 'M': 'B', 'L': 'H', 'T': 'V', 'A': 'G', 'W': 'J'}] LARERNVSOAMADSQAOEOACNDOERSUOAOARNDRNRELPRNPNDSCNOEQSVSLARRSAYLASMPLEMERCARSROEMEOSOADOEDEVURSOAOCSEREUREBERCNARUMULACSQNHUERAOSEOSDRUCECSEREERURSMPARCNERELRERNRCEOELNDOERERGNDOELSROSQSOUNDELLAMAAHUEDEOSDRUCADSRAOAMEOSOAOERNRCRNLEDRNRRERCAEBSRAJYURAQSNLARSRREREDARSAYPRNPNRRSNRALOELNDOERERGNDHUERUEDCRADNRSEOAORNRDSOERABUROAMERCALEDYHUEPRERSDAROEURAOEBERDARERRAOA | |
[18] [{'F': 'E', 'D': 'A', 'Y': 'O', 'H': 'S', 'X': 'N', 'R': 'R', 'Q': 'L', 'Z': 'D', 'S': 'R', 'N': 'U', 'C': 'L', 'G': 'C', 'J': 'M', 'E': 'P', 'U': 'Q', 'P': 'Y', 'M': 'B', 'L': 'H', 'T': 'V', 'A': 'G', 'W': 'J'}] LARERNVSOAMADSQAOEOACNDOERSUOAOALNDRNLELPRNPNDSCNOEQSVSLALRSAYLASMPLEMELCARSLOEMEOSOADOEDEVURSOAOCSELEULEBERCNARUMULACSQNHUELAOSEOSDRUCECSELEELULSMPARCNELELRERNRCEOELNDOERERGNDOELSLOSQSOUNDELLAMAAHUEDEOSDRUCADSRAOAMEOSOAOERNLCRNLEDRNRRERCAEBSRAJYULAQSNLARSLLEREDARSAYPRNPNRRSNLALOELNDOERERGNDHUELUEDCRADNRSEOAORNLDSOERABULOAMELCALEDYHUEPRERSDALOEULAOEBELDARERRAOA | |
[18] [{'F': 'E', 'D': 'A', 'Y': 'O', 'H': 'S', 'X': 'N', 'R': 'R', 'Q': 'R', 'Z': 'D', 'S': 'L', 'N': 'U', 'C': 'L', 'G': 'C', 'J': 'M', 'E': 'P', 'U': 'Q', 'P': 'Y', 'M': 'B', 'L': 'H', 'T': 'V', 'A': 'G', 'W': 'J'}] LALERNVSOAMADSQAOEOACNDOERSUOAOARNDRNRELPLNPNDSCNOEQSVSLARRSAYLASMPLEMERCARSROEMEOSOADOEDEVULSOAOCSEREUREBERCNARUMULACSQNHUERAOSEOSDRUCECSEREERURSMPARCNERELLERNLCEOELNDOELERGNDOELSROSQSOUNDELLAMAAHUEDEOSDRUCADSRAOAMEOSOAOERNRCLNLEDRNLLERCAEBSRAJYURAQSNLARSRREREDALSAYPLNPNLRSNRALOELNDOELERGNDHUERUEDCLADNRSEOAORNRDSOELABUROAMERCALEDYHUEPLERSDAROEURAOEBERDARELLAOA | |
-[AFFINE BY CRYPTOANALYSIS] | |
[{'a': 7, 'b': 3}] LARECOGIDAMASIVADEDATOSDECIUDADANOSCONELPROPOSITODEVIGILANCIAYLAIMPLEMENTACINDEMEDIDASDESEGURIDADTIENEUNEFECTOACUMULATIVOQUENADIEDISCUTETIENEENUNIMPACTOENELRECORTEDELOSDERECHOSDELINDIVIDUOSELLAMAAQUESEDISCUTASICADAMEDIDADECONTROLESCORRECTAEFICAZYUNAVIOLACINNECESARIAYPROPORCIONALDELOSDERECHOSQUENUESTRASOCIEDADCONSIDERAFUNDAMENTALESYQUEPRECISANDEUNADEFENSACERRADA | |
-[AFFINE BRUTEFORCE] | |
[{'a': 7, 'b': 3}] LARECOGIDAMASIVADEDATOSDECIUDADANOSCONELPROPOSITODEVIGILANCIAYLAIMPLEMENTACINDEMEDIDASDESEGURIDADTIENEUNEFECTOACUMULATIVOQUENADIEDISCUTETIENEENUNIMPACTOENELRECORTEDELOSDERECHOSDELINDIVIDUOSELLAMAAQUESEDISCUTASICADAMEDIDADECONTROLESCORRECTAEFICAZYUNAVIOLACINNECESARIAYPROPORCIONALDELOSDERECHOSQUENUESTRASOCIEDADCONSIDERAFUNDAMENTALESYQUEPRECISANDEUNADEFENSACERRADA | |
[{'a': 5, 'b': 13}] DYBOGCWEXYUYSERYXOXYJCSXOGEAXYXYLCSGCLODTBCTCSEJCXOREWEDYLGEYQDYEUTDOUOLJYGELXOUOXEXYSXOSOWABEXYXJEOLOALOFOGJCYGAUADYJERCKAOLYXEOXESGAJOJEOLOOLALEUTYGJCOLODBOGCBJOXODCSXOBOGNCSXODELXEREXACSODDYUYYKAOSOXESGAJYSEGYXYUOXEXYXOGCLJBCDOSGCBBOGJYOFEGYHQALYRECDYGELLOGOSYBEYQTBCTCBGECLYDXODCSXOBOGNCSKAOLAOSJBYSCGEOXYXGCLSEXOBYFALXYUOLJYDOSQKAOTBOGESYLXOALYXOFOLSYGOBBYXY | |
[{'a': 9, 'b': 3}] XATGQIWMLASAOMZALGLAJIOLGQMELALANIOQINGXDTIDIOMJILGZMWMXANQMAKXAMSDXGSGNJAQMNLGSGLMLAOLGOGWETMLALJMGNGENGBGQJIAQESEXAJMZIYEGNALMGLMOQEJGJMGNGGNENMSDAQJIGNGXTGQITJGLGXIOLGTGQRIOLGXMNLMZMLEIOGXXASAAYEGOGLMOQEJAOMQALASGLMLALGQINJTIXGOQITTGQJAGBMQAFKENAZMIXAQMNNGQGOATMAKDTIDITQMINAXLGXIOLGTGQRIOYEGNEGOJTAOIQMGLALQINOMLGTABENLASGNJAXGOKYEGDTGQMOANLGENALGBGNOAQGTTALA | |
-[CESAR BRUTEFORCE] | |
[{'rot': 13}] PQFSEKGULQWQMUHQLSLQTKMLSEUALQLQDKMEKDSPRFKRKMUTKLSHUGUPQDEUQCPQUWRPSWSDTQEUDLSWSLULQMLSMSGAFULQLTUSDSADSZSETKQEAWAPQTUHKYASDQLUSLUMEATSTUSDSSDADUWRQETKSDSPFSEKFTSLSPKMLSFSENKMLSPUDLUHULAKMSPPQWQQYASMSLUMEATQMUEQLQWSLULQLSEKDTFKPSMEKFFSETQSZUEQJCADQHUKPQEUDDSESMQFUQCRFKRKFEUKDQPLSPKMLSFSENKMYASDASMTFQMKEUSLQLEKDMULSFQZADLQWSDTQPSMCYASRFSEUMQDLSADQLSZSDMQESFFQLQ | |
[{'rot': 11}] NODQCIESJOUOKSFOJQJORIKJQCSYJOJOBIKCIBQNPDIPIKSRIJQFSESNOBCSOANOSUPNQUQBROCSBJQUQJSJOKJQKQEYDSJOJRSQBQYBQXQCRIOCYUYNORSFIWYQBOJSQJSKCYRQRSQBQQBYBSUPOCRIQBQNDQCIDRQJQNIKJQDQCLIKJQNSBJSFSJYIKQNNOUOOWYQKQJSKCYROKSCOJOUQJSJOJQCIBRDINQKCIDDQCROQXSCOHAYBOFSINOCSBBQCQKODSOAPDIPIDCSIBONJQNIKJQDQCLIKWYQBYQKRDOKICSQJOJCIBKSJQDOXYBJOUQBRONQKAWYQPDQCSKOBJQYBOJQXQBKOCQDDOJO | |
[{'rot': 7}] JKZMYEAOFKQKGOBKFMFKNEGFMYOUFKFKXEGYEXMJLZELEGONEFMBOAOJKXYOKWJKOQLJMQMXNKYOXFMQMFOFKGFMGMAUZOFKFNOMXMUXMTMYNEKYUQUJKNOBESUMXKFOMFOGYUNMNOMXMMXUXOQLKYNEMXMJZMYEZNMFMJEGFMZMYHEGFMJOXFOBOFUEGMJJKQKKSUMGMFOGYUNKGOYKFKQMFOFKFMYEXNZEJMGYEZZMYNKMTOYKDWUXKBOEJKYOXXMYMGKZOKWLZELEZYOEXKJFMJEGFMZMYHEGSUMXUMGNZKGEYOMFKFYEXGOFMZKTUXFKQMXNKJMGWSUMLZMYOGKXFMUXKFMTMXGKYMZZKFK | |
-[ATBASH] | |
XWHUICGSBWQWASFWBUBWTCABUISMBWBWJCAICJUXVHCVCASTCBUFSGSXWJISWKXWSQVXUQUJTWISJBUQUBSBWABUAUGMHSBWBTSUJUMJUNUITCWIMQMXWTSFCOMUJWBSUBSAIMTUTSUJUUJMJSQVWITCUJUXHUICHTUBUXCABUHUIZCABUXSJBSFSBMCAUXXWQWWOMUAUBSAIMTWASIWBWQUBSBWBUICJTHCXUAICHHUITWUNSIWDKMJWFSCXWISJJUIUAWHSWKVHCVCHISCJWXBUXCABUHUIZCAOMUJMUATHWACISUBWBICJASBUHWNMJBWQUJTWXUAKOMUVHUISAWJBUMJWBUNUJAWIUHHWBW |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python3 | |
''' | |
[ ANÁLISIS ESTADÍSTICO DE TEXTO ] | |
[ CIFRADO POR SUSTITUCIÓN MONOALFABÉTICA ] | |
Javier Junquera Sánchez <[email protected]> | |
Fases: | |
1. Análisis de entropía | |
2. Análisis de aparición de caracteres | |
3. Análisis de caracteres repetidos juntos (RR, LL) | |
4. Análisis de dupletas repetidas | |
5. Generación de posibles traducciones basado en estadísticas de idioma | |
6. Uso de estadísticas para cifrado afín | |
7. Fuerza bruta afín, césar y atbash | |
''' | |
import re | |
from affine import decode_affine, affine_break | |
# BASADO EN EL QUIJOTE | |
estadisticas_castellano = {'A': 0.1203914356874436, 'B': 0.015130070791165134, 'C': 0.03719184102245509, 'D': 0.05437026585244542, 'E': 0.1386395633977034, 'F': 0.004934755557097172, 'G': 0.01092558720618012, 'H': 0.012658047516610746, 'I': 0.048772133465719646, 'J': 0.006582358140488471, 'K': 8.238012916956494e-05, 'L': 0.05551801306561237, 'M': 0.02788784162203377, 'N': 0.06781866708951004, 'O': 0.09582667220822723, 'P': 0.022245112473652295, 'Q': 0.02013110209127842, 'R': 0.06325617061234451, 'S': 0.07834783863652835, 'T': 0.03919621769909202, 'U': 0.048757887277968516, 'V': 0.011134324826707514, 'W': 0.00016847665514377192, 'X': 0.00025209558324821755, 'Y': 0.015759380650084517, 'Z': 0.004021760742090114} | |
estadisticas_castellano_esp = {' ': 1, 'A': 0.1203914356874436, 'B': 0.015130070791165134, 'C': 0.03719184102245509, 'D': 0.05437026585244542, 'E': 0.1386395633977034, 'F': 0.004934755557097172, 'G': 0.01092558720618012, 'H': 0.012658047516610746, 'I': 0.048772133465719646, 'J': 0.006582358140488471, 'K': 8.238012916956494e-05, 'L': 0.05551801306561237, 'M': 0.02788784162203377, 'N': 0.06781866708951004, 'O': 0.09582667220822723, 'P': 0.022245112473652295, 'Q': 0.02013110209127842, 'R': 0.06325617061234451, 'S': 0.07834783863652835, 'T': 0.03919621769909202, 'U': 0.048757887277968516, 'V': 0.011134324826707514, 'W': 0.00016847665514377192, 'X': 0.00025209558324821755, 'Y': 0.015759380650084517, 'Z': 0.004021760742090114} | |
# estadisticas_castellano = estadisticas_castellano_esp | |
# BASADO EN EL SEÑOR DE LOS ANILLO | |
estadisticas_ingles = {'A': 0.0831295411584481, 'B': 0.01738984496652256, 'C': 0.017155136950865266, 'D': 0.052443360917834525, 'E': 0.12285071813081565, 'F': 0.024581248091420035, 'G': 0.02449291711778557, 'H': 0.06476679361089449, 'I': 0.06393143497452282, 'J': 0.0006511254627912083, 'K': 0.009077900347519288, 'L': 0.04530117076396197, 'M': 0.022928197013403595, 'N': 0.06837322107728455, 'O': 0.0779066568745473, 'P': 0.013705181494913397, 'Q': 0.00058677003914324, 'R': 0.05933696247447866, 'S': 0.05987073392944122, 'T': 0.08989443186779629, 'U': 0.02559074493295679, 'V': 0.008983260018625218, 'W': 0.026715072040218356, 'X': 0.000682672239089232, 'Y': 0.019290222770715505, 'Z': 0.00036468073400515346} | |
with open('t.txt') as f: | |
t = f.read().replace(' ', '').replace('\n', '') | |
KEYWORDS = ['CRIP', 'CUANDO', 'EN', 'HA', 'HABIA', 'ERA', 'CONTRA', 'DESDE', 'SOLO', 'PERO', 'LE', 'SI', 'ESTA', 'AHORA', 'ALLI', 'SE', 'SEGUN', 'ANTE', 'SER', 'EL', 'POR', 'PARA', 'TAMBIEN', 'TODO', 'SUS', 'PORQUE', 'AQUI', 'YA', 'HACIA', 'A', 'CON', 'HAN', 'DEL', 'Y', 'AL', 'COMO', 'HASTA', 'QUE', 'O', 'UN', 'BAJO', 'LO', 'MAS', 'SU', 'LOS', 'SIN', 'NO', 'PUEDE', 'DOS', 'ENTRE', 'SOBRE', 'CIFR', 'MI', 'FUE', 'MUY', 'TRAS', 'LAS', 'LA', 'ES', 'SON', 'VEZ', 'ME', 'CABE', 'YO', 'HAY', 'ESTE', 'UNA', 'AÑOS', 'DE'] | |
KEYWORDS_EN = list(set(['THE', 'IS', 'AND', 'WHO', 'HE', 'IT', 'WHERE', 'CRYPT', 'A'])) | |
class Descifrado(): | |
def __init__(self, text, key): | |
self.text = text | |
self.key = key | |
def __str__(self): | |
return "[%s] %s" % (self.key, self.text) | |
MAX = 5 | |
def analiza_repetidos(t,i=2): | |
t = re.sub(r'[^A-Z]', '', t.upper()) | |
res = {} | |
for j in range(len(t)): | |
ocur = t[j:j+i] | |
if len(ocur) < i: | |
continue | |
last = t[j:].find(ocur) | |
while last >= 0: | |
if ocur in res: | |
res[ocur].append(j+last) | |
else: | |
res[ocur] = [j+last] | |
n = t[j+last+1:].find(ocur) | |
if n >= 0: | |
last += n + 1 | |
else: | |
last = -1 | |
for x in res: | |
val = list(set(res[x])) | |
val.sort() | |
res[x] = val | |
return res | |
def calcula_distancias(v): | |
dist = [] | |
for x in range(len(v))[1:]: | |
dist.append(v[x] - v[x-1]) | |
return dist | |
def divisores(x): | |
res = [] | |
for i in range(int(x/2))[2:]: | |
if x % i == 0: | |
res.append(i) | |
return res | |
def analisis_estadistico(txt): | |
txt = re.sub(r'[^A-Z]', '', txt.upper()) | |
d = {chr(c + ord('A')): 0 for c in range(ord('Z') - ord('A') + 1)} | |
for x in txt: | |
if x in d: | |
d[x] += 1 | |
else: | |
d[x] = 1 | |
return {x: float(d[x])/len(txt) for x in d} | |
def gen_keys(cs, n, key): | |
if len(key) == len(cs): | |
return [key] | |
keys = [] | |
for c in cs[n]: | |
keys += gen_keys(cs, n+1, key + c) | |
return keys | |
from math import gcd | |
def comun_divisor(values): | |
res = values[0] | |
for v in values[1:]: | |
aux = gcd(res, v) | |
if aux <= 2: | |
return res | |
res = aux | |
return res | |
def repetidos(t): | |
res = {} | |
for x in range(len(t) - 1): | |
if t[x] == t[x+1]: | |
c = t[x:x+1] | |
if c in res: | |
res[c].append(x) | |
else: | |
res[c] = [x] | |
return res | |
def representa_matrix(t, n=10): | |
res = "" | |
for i in range(len(t)): | |
if i % n == 0 and i > 0: | |
res += "\n" | |
res += "%c " % t[i] | |
return res | |
def translate(t, translations, n=1): | |
res = "" | |
while len(t): | |
c = t[:n] | |
t = t[n:] | |
res += translations.get(c, " ") | |
return res | |
def gen_translations(translations, translation): | |
translation_aux = {k: translation[k] for k in translation} | |
if len(translations) == 0: | |
r = translate(t, translation, n=n_gram) | |
trans_res.append(Descifrado(r, translation)) | |
return | |
ts = translations.keys() | |
for i in ts: | |
ts_values = translations[i] | |
for j in ts_values: | |
# print(it) | |
# input(ts_values) | |
# input(j) | |
translation_aux[i] = j | |
# input(translations) | |
next = {k: translations[k] for k in translations if k!=i} | |
# input(next) | |
gen_translations(next, translation_aux) | |
break | |
english = False | |
def most_able(candidates, KEYWORDS=[]): | |
res = {} | |
for c in candidates: | |
kws = 0 | |
for keyword in KEYWORDS: | |
if keyword in c.text: | |
kws += 1 | |
if english: | |
for keyword in KEYWORDS_EN: | |
if keyword in c.text: | |
kws += 1 | |
res[c] = kws | |
return {x[0]: x[1] for x in sorted(res.items(), key=lambda x: (x[1],x[0].text))[::-1]} | |
from entro import entropy | |
# Entropía del texto | |
e = entropy(t) | |
# Análisis estadístico de los caracteres del texto | |
multigram = False | |
if multigram: | |
print("ANÁLISIS POLIALFABÉTICO") | |
n_gram = int(input("Ngram > ")) | |
else: | |
print("- ANÁLISIS MONOALFABÉTICO:") | |
n_gram = 1 | |
ar = analiza_repetidos(t, i=n_gram) | |
d = {x: len(ar[x]) for x in ar} | |
estadisticas = {x[0]: x[1] for x in sorted(d.items(), key=lambda x: (x[1],x[0]))[::-1]} | |
conv_estadisticas_es = {x[0]: x[1] for x in sorted(estadisticas_castellano.items(), key=lambda x: (x[1],x[0]))[::-1]} | |
conv_estadisticas_en = {x[0]: x[1] for x in sorted(estadisticas_ingles.items(), key=lambda x: (x[1],x[0]))[::-1]} | |
r_std = [v for i, v in enumerate(estadisticas)] | |
r_mix_es = [v for i, v in enumerate(conv_estadisticas_es)] | |
r_mix_en = [v for i, v in enumerate(conv_estadisticas_en)] | |
# Tabla de posibles traducciones | |
translations = {r_std[i]: [r_mix_es[i]] for i in range(min(len(r_std), len(r_mix_es)))} | |
if english: | |
for i in range(min(len(r_std), len(r_mix_en))): | |
c = r_mix_en[i] | |
tr = r_std[i] | |
if c in translations: | |
translations[tr].append(c) | |
else: | |
translations[tr] = [c] | |
# Caracteres iguales juntos pueden ser LL, RR | |
rep = repetidos(t) | |
add_rep = True and (n_gram == 1) | |
if add_rep: | |
for d in rep: | |
translations[d].append('L') | |
translations[d].append('R') | |
# Caracteres diferentes repetidos juntos pueden ser CH | |
dup = analiza_repetidos(t, i=2) | |
add_dup = False and (n_gram == 1) | |
if add_dup: | |
for r in dup: | |
translations[r[0]].append('C') | |
translations[r[1]].append('H') | |
# Representación matriz | |
mat = representa_matrix(t, n = 20) | |
def unifica(translations): | |
for x in translations: | |
translations[x] = list(set(translations[x])) | |
return translations | |
translations = unifica(translations) | |
print() | |
print("[*] Mensaje:") | |
print(t) | |
print() | |
print("[*] Entropía: %f" % e) | |
print() | |
print("[*] Estadísticas:") | |
print(estadisticas) | |
print() | |
print("[*] Repetidos (RR, LL):") | |
print(rep) | |
print() | |
print("[*] Parejas repetidas:") | |
print(dup) | |
print() | |
print("[*] Matriz:") | |
print(mat) | |
print() | |
print("[*] Traducciones") | |
print(translations) | |
claves = 1 | |
for i in translations: | |
v = translations[i] | |
claves *= len(v) if len(v) > 0 else 1 | |
print() | |
print("[*] Claves posibles: %d" % claves) | |
# Resultados de la traducción | |
trans_res = [] | |
gen_translations(translations, {}) | |
''' | |
AFFINE ANALYSIS | |
C = ord('C') - ord('A') | |
D = ord('D') - ord('A') | |
L = ord('L') - ord('A') | |
A = ord('A') - ord('A') | |
# (a * L) + b = C | |
# (a * A) + b = D | |
# b = D - (a * A) | |
# (a * L) + D - (a * A) = C | |
# a * (L - A) = C - D | |
# a = (C - D) * (L - A)**(-1) | |
a = ( inverso(L - A, l) * (C - D) ) % l | |
# a = 7 | |
b = ( D - (a * A) ) | |
# b = 3 | |
''' | |
stat = most_able(trans_res, KEYWORDS=KEYWORDS) | |
affine_values = dict(a=[], b=[]) | |
print() | |
print('-[CODEBOOK BY CRYPTOANALYSIS]') | |
n_best = 3 | |
for k in stat: | |
if n_best: | |
m = k.text | |
print("[%d] %s" % (stat[k], k)) | |
broken = affine_break(m[0], m[1], t[0], t[1]) | |
affine_values['a'].append(broken['a']) | |
affine_values['b'].append(broken['b']) | |
else: | |
break | |
n_best -= 1 | |
affine_values['a'] = list(set(affine_values['a'])) | |
affine_values['b'] = list(set(affine_values['b'])) | |
affines = [] | |
for a in affine_values['a']: | |
for b in affine_values['b']: | |
aux = decode_affine(t, a, b) | |
affines.append(Descifrado(aux, broken)) | |
print() | |
print('-[AFFINE BY CRYPTOANALYSIS]') | |
n_best = 3 | |
for x in most_able(affines, KEYWORDS=KEYWORDS): | |
if n_best: | |
print("%s" % x) | |
else: | |
break | |
n_best -= 1 | |
others = True | |
if others: | |
''' | |
AFFINE ANALYSIS MANUAL | |
C = ord('C') - ord('A') | |
D = ord('D') - ord('A') | |
L = ord('L') - ord('A') | |
A = ord('A') - ord('A') | |
# (a * L) + b = C | |
# (a * A) + b = D | |
# b = D - (a * A) | |
# (a * L) + D - (a * A) = C | |
# a * (L - A) = C - D | |
# a = (C - D) * (L - A)**(-1) | |
a = ( inverso(L - A, l) * (C - D) ) % l | |
# a = 7 | |
b = ( D - (a * A) ) | |
# b = 3 | |
''' | |
print() | |
print("-[AFFINE BRUTEFORCE]") | |
affines = [] | |
for i in range(26): | |
for j in range(26): | |
aux = decode_affine(t, i, j) | |
# input("%d %d %s" % (i, j, aux)) | |
affines.append(Descifrado(aux, dict(a=i, b=j))) | |
n_best = 3 | |
for x in most_able(affines, KEYWORDS=KEYWORDS): | |
if n_best: | |
print("%s" % x) | |
else: | |
break | |
n_best -= 1 | |
# CESAR ANALYSIS | |
from cesar import cesar | |
print() | |
print("-[CESAR BRUTEFORCE]") | |
cesars = [] | |
for i in range(25): | |
cesars.append(Descifrado(cesar(i, t), dict(rot=i))) | |
stat = most_able(cesars, KEYWORDS=KEYWORDS) | |
n_best = 3 | |
for k in stat: | |
if n_best: | |
print("%s" % (k)) | |
else: | |
break | |
n_best -= 1 | |
print() | |
print("-[ATBASH]") | |
letras = [chr(x + ord('A')) for x in range(ord('Z') - ord('A') + 1)] | |
atbash_code = letras[::-1] | |
def atbash(t): | |
res = "" | |
for x in t: | |
res += atbash_code[letras.index(x)] | |
return res | |
print("%s" % atbash(t)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment