Skip to content

Instantly share code, notes, and snippets.

@gummipunkt
Last active March 28, 2026 19:42
Show Gist options
  • Select an option

  • Save gummipunkt/7453137c18131ab21196972907543507 to your computer and use it in GitHub Desktop.

Select an option

Save gummipunkt/7453137c18131ab21196972907543507 to your computer and use it in GitHub Desktop.
Remove all Bitwarden duplicates from json file
import json
import os
from collections import defaultdict
import sys
def validate_json_structure(data):
"""Validate the basic structure of the Bitwarden JSON export"""
if not isinstance(data, dict):
raise ValueError("Invalid JSON format: Root must be an object")
if 'items' not in data:
raise ValueError("Invalid JSON format: Missing 'items' array")
if not isinstance(data['items'], list):
raise ValueError("Invalid JSON format: 'items' must be an array")
def get_uris_from_item(item):
"""Extract all URIs from an item"""
if not item or not isinstance(item, dict):
return set()
login = item.get('login')
if not login or not isinstance(login, dict):
return set()
uris = login.get('uris', []) or []
if not isinstance(uris, list):
return set()
# Collect all URIs in a set
uri_set = set()
for uri_obj in uris:
if isinstance(uri_obj, dict) and uri_obj.get('uri'):
# Split multiple URIs if they are comma-separated
for uri in uri_obj['uri'].split(','):
uri_set.add(uri.strip())
return uri_set
def add_field_to_item(item: dict, f_name: str, f_val: str, f_type: int):
"""Appends a field to given items fields list"""
if not (item and f_val):
return
fields = item.get("fields")
if type(fields) is not list:
item["fields"] = fields = []
fields.append({ "name": f_name, "value": f_val, "type": f_type })
username_trim_chars = "#-!+~?)(/&%$\"'\n\t\r "
field_alt_username = "Alternative Username"
field_alt_password = "Alternative Password"
def items_are_duplicate(item1: dict, item2: dict) -> bool:
"""Check whether two nessecary are duplicates by comparing name and login credentials"""
# Invalid items are not equal
if not (item1 or item2):
return False
# Some shared URI is required to continue
if not bool(get_uris_from_item(item1).intersection(get_uris_from_item(item2))):
return False
# extract credentials
login1: dict = item1.get("login", {})
login2: dict = item2.get("login", {})
# extract usernames and lower it as they are mostly case-insensitive
user1: str = login1.get("username", "").lower()
user2: str = login2.get("username", "").lower()
# extract passwords
password1: str = login1.get("password", "")
password2: str = login2.get("password", "")
# extract and sanitize names
name1 = item1.get("name", "").strip(username_trim_chars).lower()
name2 = item2.get("name", "").strip(username_trim_chars).lower()
# Check for identical name
# consider only complex identical names
if len(name1) + len(name2) > 5 and name1 == name2:
# if so we may need to...
if user1 != user2:
# backup user2 in item1 fields list
add_field_to_item(item1, field_alt_username, user2, 0)
if password1 != password2:
# backup password2 in item1 fields list
add_field_to_item(item1, field_alt_password, password2, 1)
return True
# Check usernames and passwords are identical
if user1 and user2 and user1 == user2:
if password1 != password2:
# if not, backup password2 in item1 fields list, as they share same username
add_field_to_item(item1, field_alt_password, password2, 1)
return True
return False
def get_user_confirmation():
warning_text = """
⚠️ WARNING - USE AT YOUR OWN RISK ⚠️
This script will process your Bitwarden export file and:
1. Identify duplicates based on matching URIs, username, and password
2. Create two new files:
- *_edited.json: Contains unique entries (first occurrence kept)
- *_deleted.json: Contains all duplicate entries that were removed
To import the cleaned data into Bitwarden:
1. First, delete all items in your Bitwarden vault
2. Then import the '*_edited.json' file into Bitwarden
Are you sure you want to continue? (Yes/No): """
while True:
response = input(warning_text).strip().lower()
if response == 'yes':
return True
elif response == 'no':
return False
else:
print("\nPlease enter 'Yes' or 'No'")
def process_json_file(file_path):
# Read and validate JSON file
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
validate_json_structure(data)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON file: {str(e)}")
except Exception as e:
raise ValueError(f"Error reading file: {str(e)}")
# Copy all metadata
output_data = {
'encrypted': data.get('encrypted', False),
'folders': data.get('folders', []),
'collections': data.get('collections', []),
'organizations': data.get('organizations', [])
}
items = data['items']
duplicate_items = []
unique_items = []
used_indices = set()
# Find duplicates
for i in range(len(items)):
if i in used_indices:
continue
item1 = items[i]
is_duplicate = False
# Compare with all subsequent items
for j in range(i + 1, len(items)):
if j in used_indices:
continue
item2 = items[j]
if items_are_duplicate(item1, item2):
if not is_duplicate:
unique_items.append(item1)
is_duplicate = True
duplicate_items.append(item2)
used_indices.add(j)
if not is_duplicate:
unique_items.append(item1)
used_indices.add(i)
# Get base filename without extension
base_name = os.path.splitext(file_path)[0]
# Create output data with duplicates
deleted_data = output_data.copy()
deleted_data['items'] = duplicate_items
# Create output data with cleaned entries
edited_data = output_data.copy()
edited_data['items'] = unique_items
# Write duplicates to _deleted file
deleted_file = f"{base_name}_deleted.json"
with open(deleted_file, 'w', encoding='utf-8') as f:
json.dump(deleted_data, f, indent=2, ensure_ascii=False)
# Write cleaned data to _edited file
edited_file = f"{base_name}_edited.json"
with open(edited_file, 'w', encoding='utf-8') as f:
json.dump(edited_data, f, indent=2, ensure_ascii=False)
return len(duplicate_items), len(unique_items)
def main():
# Get file path from user input
file_path = input("Please enter the path to your Bitwarden JSON export file: ")
if not os.path.exists(file_path):
print(f"\nError: File '{file_path}' does not exist!")
sys.exit(1)
# Get user confirmation
if not get_user_confirmation():
print("\nOperation cancelled by user.")
sys.exit(0)
try:
duplicates, uniques = process_json_file(file_path)
print(f"\nβœ… Processing completed successfully!")
print(f"πŸ“Š Statistics:")
print(f" - {uniques} unique entries saved to '*_edited.json'")
print(f" - {duplicates} duplicate entries saved to '*_deleted.json'")
print("\nπŸ“ Next steps:")
print("1. Backup your Bitwarden vault")
print("2. Delete all items in your vault")
print("3. Import the '*_edited.json' file into Bitwarden")
except Exception as e:
print(f"\n❌ An error occurred: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
main()
@gummipunkt
Copy link
Copy Markdown
Author

Output:

  1. File with all removed items
  2. Cleaned file without duplicates

Just import the _edited.json file to your Bitwarden Vault Webinterface

@leptoquark1
Copy link
Copy Markdown

@gummipunkt In my opinion the URI check in items_are_duplicate may result in false positives as an item can have more than one login credential for the same URI.

See my suggested change

What I changed:

  1. They must at least share some URI to pass check !!
    1.1. It will pass, if the sanitized names are identical (if necessary it will backup password and/or username of the second item to fields list of first item).
    1.2. It will pass, if both usernames and passwords are identical
    1.3. If they share only the usernames, it will backup the password of second item to fields list of first item
    1.4. If the sanitized names are identical .

Too keep information lost minimal, I also added a merge function that will merge some item2 attributes to item1 for duplicate items.

@gummipunkt
Copy link
Copy Markdown
Author

@gummipunkt In my opinion the URI check in items_are_duplicate may result in false positives as an item can have more than one login credential for the same URI.

See my suggested change

What I changed:

1. They must at least share some URI to pass check !!
   1.1. It will pass, if the sanitized names are identical (if necessary it will backup password and/or username of the second item to fields list of first item).
   1.2. It will pass, if both usernames and passwords are identical
   1.3. If they share only the usernames, it will backup the password of second item to fields list of first item
   1.4. If the sanitized names are identical .

Too keep information lost minimal, I also added a merge function that will merge some item2 attributes to item1 for duplicate items.

thank you

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment