Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save leptoquark1/d87f15b853bb1d2e8cb202eb624c6020 to your computer and use it in GitHub Desktop.

Select an option

Save leptoquark1/d87f15b853bb1d2e8cb202eb624c6020 to your computer and use it in GitHub Desktop.
Remove all Bitwarden duplicates from json file
import json
import os
from collections import defaultdict
import sys
def validate_json_structure(data):
"""Validate the basic structure of the Bitwarden JSON export"""
if not isinstance(data, dict):
raise ValueError("Invalid JSON format: Root must be an object")
if 'items' not in data:
raise ValueError("Invalid JSON format: Missing 'items' array")
if not isinstance(data['items'], list):
raise ValueError("Invalid JSON format: 'items' must be an array")
def get_uris_from_item(item):
"""Extract all URIs from an item"""
if not item or not isinstance(item, dict):
return set()
login = item.get('login')
if not login or not isinstance(login, dict):
return set()
uris = login.get('uris', []) or []
if not isinstance(uris, list):
return set()
# Collect all URIs in a set
uri_set = set()
for uri_obj in uris:
if isinstance(uri_obj, dict) and uri_obj.get('uri'):
# Split multiple URIs if they are comma-separated
for uri in uri_obj['uri'].split(','):
uri_set.add(uri.strip())
return uri_set
def add_field_to_item(item: dict, f_name: str, f_val: str, f_type: int):
"""Appends a field to given items fields list"""
if not (item and f_val):
return
fields = item.get("fields")
if type(fields) is not list:
item["fields"] = fields = []
fields.append({ "name": f_name, "value": f_val, "type": f_type })
username_trim_chars = "#-!+~?)(/&%$\"'\n\t\r "
field_alt_username = "Alternative Username"
field_alt_password = "Alternative Password"
def items_are_duplicate(item1: dict, item2: dict) -> bool:
"""Check whether two nessecary are duplicates by comparing name and login credentials"""
# Invalid items are not equal
if not (item1 or item2):
return False
# Some shared URI is required to continue
if not bool(get_uris_from_item(item1).intersection(get_uris_from_item(item2))):
return False
# extract credentials
login1: dict = item1.get("login", {})
login2: dict = item2.get("login", {})
# extract usernames and lower it as they are mostly case-insensitive
user1: str = login1.get("username", "").lower()
user2: str = login2.get("username", "").lower()
# extract passwords
password1: str = login1.get("password", "")
password2: str = login2.get("password", "")
# extract and sanitize names
name1 = item1.get("name", "").strip(username_trim_chars).lower()
name2 = item2.get("name", "").strip(username_trim_chars).lower()
# Check for identical name
# consider only complex identical names
if len(name1) + len(name2) > 5 and name1 == name2:
# if so we may need to...
if user1 != user2:
# backup user2 in item1 fields list
add_field_to_item(item1, field_alt_username, user2, 0)
if password1 != password2:
# backup password2 in item1 fields list
add_field_to_item(item1, field_alt_password, password2, 1)
return True
# Check usernames and passwords are identical
if user1 and user2 and user1 == user2:
if password1 != password2:
# if not, backup password2 in item1 fields list, as they share same username
add_field_to_item(item1, field_alt_password, password2, 1)
return True
return False
def get_user_confirmation():
warning_text = """
⚠️ WARNING - USE AT YOUR OWN RISK ⚠️
This script will process your Bitwarden export file and:
1. Identify duplicates based on matching URIs, username, and password
2. Create two new files:
- *_edited.json: Contains unique entries (first occurrence kept)
- *_deleted.json: Contains all duplicate entries that were removed
To import the cleaned data into Bitwarden:
1. First, delete all items in your Bitwarden vault
2. Then import the '*_edited.json' file into Bitwarden
Are you sure you want to continue? (Yes/No): """
while True:
response = input(warning_text).strip().lower()
if response == 'yes':
return True
elif response == 'no':
return False
else:
print("\nPlease enter 'Yes' or 'No'")
def process_json_file(file_path):
# Read and validate JSON file
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
validate_json_structure(data)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON file: {str(e)}")
except Exception as e:
raise ValueError(f"Error reading file: {str(e)}")
# Copy all metadata
output_data = {
'encrypted': data.get('encrypted', False),
'folders': data.get('folders', []),
'collections': data.get('collections', []),
'organizations': data.get('organizations', [])
}
items = data['items']
duplicate_items = []
unique_items = []
used_indices = set()
# Find duplicates
for i in range(len(items)):
if i in used_indices:
continue
item1 = items[i]
is_duplicate = False
# Compare with all subsequent items
for j in range(i + 1, len(items)):
if j in used_indices:
continue
item2 = items[j]
if items_are_duplicate(item1, item2):
if not is_duplicate:
unique_items.append(item1)
is_duplicate = True
duplicate_items.append(item2)
used_indices.add(j)
if not is_duplicate:
unique_items.append(item1)
used_indices.add(i)
# Get base filename without extension
base_name = os.path.splitext(file_path)[0]
# Create output data with duplicates
deleted_data = output_data.copy()
deleted_data['items'] = duplicate_items
# Create output data with cleaned entries
edited_data = output_data.copy()
edited_data['items'] = unique_items
# Write duplicates to _deleted file
deleted_file = f"{base_name}_deleted.json"
with open(deleted_file, 'w', encoding='utf-8') as f:
json.dump(deleted_data, f, indent=2, ensure_ascii=False)
# Write cleaned data to _edited file
edited_file = f"{base_name}_edited.json"
with open(edited_file, 'w', encoding='utf-8') as f:
json.dump(edited_data, f, indent=2, ensure_ascii=False)
return len(duplicate_items), len(unique_items)
def main():
# Get file path from user input
file_path = input("Please enter the path to your Bitwarden JSON export file: ")
if not os.path.exists(file_path):
print(f"\nError: File '{file_path}' does not exist!")
sys.exit(1)
# Get user confirmation
if not get_user_confirmation():
print("\nOperation cancelled by user.")
sys.exit(0)
try:
duplicates, uniques = process_json_file(file_path)
print(f"\nβœ… Processing completed successfully!")
print(f"πŸ“Š Statistics:")
print(f" - {uniques} unique entries saved to '*_edited.json'")
print(f" - {duplicates} duplicate entries saved to '*_deleted.json'")
print("\nπŸ“ Next steps:")
print("1. Backup your Bitwarden vault")
print("2. Delete all items in your vault")
print("3. Import the '*_edited.json' file into Bitwarden")
except Exception as e:
print(f"\n❌ An error occurred: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
main()
@leptoquark1

Copy link
Copy Markdown
Author

Unit tests

class TestItemsAreDuplicate(unittest.TestCase):

    def test_only_name_equal(self):
        self.assertFalse(items_are_duplicate(
            { 'name': "Bitwarden" },
            { 'name': "Bitwarden" }),
            "Same name but no uris"
        )
        self.assertFalse(items_are_duplicate(
            { 'name': " Bitwarden+!" },
            { 'name': "\nBitwarden" }),
            "Same name but unsanitized and no uris"
        )
        self.assertFalse(items_are_duplicate(
            { 'name': "Bitwarden", 'login': {'uris': [{'uri': 'b'}]} },
            { 'name': "\nBitwarden", 'login': {'uris': [{'uri': 'a'}]} }),
            "Same name but unsanitized and different uris"
        )
        self.assertFalse(items_are_duplicate(
            { 'name': "Bitwarden", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'b'}]} },
            { 'name': "\nBitwarden", 'login': {'username': "b", 'password': "b", 'uris': [{'uri': 'a'}]} }),
            "Same name but unsanitized and different credentials and uris"
        )
        self.assertTrue(items_are_duplicate(
            {'name': "Bitwarden", 'login': {'uris': [{'uri': 'a'}]}},
            {'name': "\nBitwarden", 'login': {'uris': [{'uri': 'a'}]}}),
            "Same name but unsanitized and same uris"
        )

    def test_names_differ(self):
        self.assertFalse(items_are_duplicate(
            {'name': "1Bitwarden"},
            {'name': "Bitwarden"}),
            "Different name but no uris"
        )
        self.assertFalse(items_are_duplicate(
            {'name': " Bitwarden1+!"},
            {'name': "\nBitwarden"}),
            "Different name but unsanitized and no uris"
        )
        self.assertFalse(items_are_duplicate(
            {'name': "Bitwarden1", 'login': {'uris': [{'uri': 'b'}]}},
            {'name': "\nBitwarden", 'login': {'uris': [{'uri': 'a'}]}}),
            "Different name but unsanitized and different uris"
        )
        self.assertFalse(items_are_duplicate(
            {'name': "Bitwarden", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'b'}]}},
            {'name': "\n1Bitwarden", 'login': {'username': "b", 'password': "b", 'uris': [{'uri': 'a'}]}}),
            "Different name but unsanitized and different credentials and uris"
        )


    def test_different_name_but_same_uri(self):
        self.assertFalse(items_are_duplicate(
            {'name': "Bitwarden", 'login': {'uris': [{'uri': 'a'}]}},
            {'name': "Bitwarden2", 'login': {'uris': [{'uri': 'a'}]}}),
            "Different name same uris"
        )
        self.assertFalse(items_are_duplicate(
            {'name': "Bitwarden", 'login': {'uris': [{'uri': 'a'}]}},
            {'name': "Bitwarden2", 'login': {'uris': [{'uri': 'b'}, {'uri': 'a'}]}}),
            "Different name and one same uri"
        )

    def test_same_uri_and_some_credentials(self):
        i1 = {'name': "Bitwarden", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'a'}]}}
        i2 = {'name': "\n1Bitwarden", 'login': {'username': "a", 'password': "b", 'uris': [{'uri': 'a'}]}}
        self.assertTrue(items_are_duplicate(i1, i2), "Share username but not password and name")
        self.assertListEqual(
            i1.get("fields"),
            [{'name': "Alternative Password", 'value': "b", 'type': 1}],
            "Backup of item2 password in fields"
        )

        i1 = {'name': "Bitwarden", 'login': {'username': "b", 'password': "a", 'uris': [{'uri': 'a'}]}}
        i2 = {'name': "\n1Bitwarden", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'a'}]}}
        self.assertFalse(items_are_duplicate(i1, i2), "Share password but not username and name")
        self.assertIsNone(i1.get("fields"))

        i1 = {'name': "Bitwarden", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'a'}]}}
        i2 = {'name': "Bitwarden1", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'a'}]}}
        self.assertTrue(items_are_duplicate(i1, i2), "Share password but not username")
        self.assertIsNone(i1.get("fields"))

    def test_name_same_uri_and_some_credentials(self):
        i1 = {'name': "Bitwarden", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'a'}]}}
        i2 = {'name': "Bitwarden", 'login': {'username': "a", 'password': "b", 'uris': [{'uri': 'a'}]}}
        self.assertTrue(items_are_duplicate(i1, i2), "Share username but not password")
        self.assertListEqual(
            i1.get("fields"),
            [{'name': field_alt_password, 'value': "b", 'type': 1}],
            "Backup of item2 password in fields"
        )

        i1 = {'name': "Bitwarden", 'login': {'username': "b", 'password': "a", 'uris': [{'uri': 'a'}]}}
        i2 = {'name': "Bitwarden", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'a'}]}}
        self.assertTrue(items_are_duplicate(i1, i2), "Share password but not username")
        self.assertListEqual(
            i1.get("fields"),
            [{'name': field_alt_username, 'value': "a", 'type': 0}],
            "Backup of item2 username in fields"
        )

        i1 = {'name': "Bitwarden", 'login': {'username': "b", 'password': "a", 'uris': [{'uri': 'a'}]}}
        i2 = {'name': "Bitwarden", 'login': {'username': "a", 'password': "b", 'uris': [{'uri': 'a'}]}}
        self.assertTrue(items_are_duplicate(i1, i2), "Share neither password and username")
        self.assertIsNotNone(i1.get("fields"))
        self.assertEqual(len(i1.get("fields")), 2)
        self.assertDictEqual(i1.get("fields")[0], {'name': field_alt_username, 'value': "a", 'type': 0})
        self.assertDictEqual(i1.get("fields")[1], {'name': field_alt_password, 'value': "b", 'type': 1})


    def test_same_uri_and_same_credentials(self):
        i1 = {'name': "Bitwarden2", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'a'}]}}
        i2 = {'name': "Bitwarden", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'a'}]}}
        self.assertTrue(items_are_duplicate(i1, i2), "Share password and username")
        self.assertIsNone(i1.get("fields"))

    def test_different_uri_and_some_credentials_(self):
        i1 = {'name': "Bitwarden2", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'b'}]}}
        i2 = {'name': "Bitwarden", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'a'}]}}
        self.assertFalse(items_are_duplicate(i1, i2), "Share nothing more than credentials")
        self.assertIsNone(i1.get("fields"))

        i1 = {'name': "Bitwarden2", 'login': {'username': "a", 'password': "a", 'uris': [{'uri': 'b'}]}}
        i2 = {'name': "Bitwarden", 'login': {'username': "a", 'password': "b", 'uris': [{'uri': 'a'}]}}
        self.assertFalse(items_are_duplicate(i1, i2), "Share nothing more than username")
        self.assertIsNone(i1.get("fields"))

        i1 = {'name': "Bitwarden2", 'login': {'username': "b", 'password': "a", 'uris': [{'uri': 'b'}]}}
        i2 = {'name': "Bitwarden", 'login': {'username': "a", 'password': "b", 'uris': [{'uri': 'a'}]}}
        self.assertFalse(items_are_duplicate(i1, i2), "Share nothing")
        self.assertIsNone(i1.get("fields"))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment