Last active
March 28, 2026 19:42
-
-
Save gummipunkt/7453137c18131ab21196972907543507 to your computer and use it in GitHub Desktop.
Remove all Bitwarden duplicates from json file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import os | |
| from collections import defaultdict | |
| import sys | |
| def validate_json_structure(data): | |
| """Validate the basic structure of the Bitwarden JSON export""" | |
| if not isinstance(data, dict): | |
| raise ValueError("Invalid JSON format: Root must be an object") | |
| if 'items' not in data: | |
| raise ValueError("Invalid JSON format: Missing 'items' array") | |
| if not isinstance(data['items'], list): | |
| raise ValueError("Invalid JSON format: 'items' must be an array") | |
| def get_uris_from_item(item): | |
| """Extract all URIs from an item""" | |
| if not item or not isinstance(item, dict): | |
| return set() | |
| login = item.get('login') | |
| if not login or not isinstance(login, dict): | |
| return set() | |
| uris = login.get('uris', []) or [] | |
| if not isinstance(uris, list): | |
| return set() | |
| # Collect all URIs in a set | |
| uri_set = set() | |
| for uri_obj in uris: | |
| if isinstance(uri_obj, dict) and uri_obj.get('uri'): | |
| # Split multiple URIs if they are comma-separated | |
| for uri in uri_obj['uri'].split(','): | |
| uri_set.add(uri.strip()) | |
| return uri_set | |
| def add_field_to_item(item: dict, f_name: str, f_val: str, f_type: int): | |
| """Appends a field to given items fields list""" | |
| if not (item and f_val): | |
| return | |
| fields = item.get("fields") | |
| if type(fields) is not list: | |
| item["fields"] = fields = [] | |
| fields.append({ "name": f_name, "value": f_val, "type": f_type }) | |
| username_trim_chars = "#-!+~?)(/&%$\"'\n\t\r " | |
| field_alt_username = "Alternative Username" | |
| field_alt_password = "Alternative Password" | |
| def items_are_duplicate(item1: dict, item2: dict) -> bool: | |
| """Check whether two nessecary are duplicates by comparing name and login credentials""" | |
| # Invalid items are not equal | |
| if not (item1 or item2): | |
| return False | |
| # Some shared URI is required to continue | |
| if not bool(get_uris_from_item(item1).intersection(get_uris_from_item(item2))): | |
| return False | |
| # extract credentials | |
| login1: dict = item1.get("login", {}) | |
| login2: dict = item2.get("login", {}) | |
| # extract usernames and lower it as they are mostly case-insensitive | |
| user1: str = login1.get("username", "").lower() | |
| user2: str = login2.get("username", "").lower() | |
| # extract passwords | |
| password1: str = login1.get("password", "") | |
| password2: str = login2.get("password", "") | |
| # extract and sanitize names | |
| name1 = item1.get("name", "").strip(username_trim_chars).lower() | |
| name2 = item2.get("name", "").strip(username_trim_chars).lower() | |
| # Check for identical name | |
| # consider only complex identical names | |
| if len(name1) + len(name2) > 5 and name1 == name2: | |
| # if so we may need to... | |
| if user1 != user2: | |
| # backup user2 in item1 fields list | |
| add_field_to_item(item1, field_alt_username, user2, 0) | |
| if password1 != password2: | |
| # backup password2 in item1 fields list | |
| add_field_to_item(item1, field_alt_password, password2, 1) | |
| return True | |
| # Check usernames and passwords are identical | |
| if user1 and user2 and user1 == user2: | |
| if password1 != password2: | |
| # if not, backup password2 in item1 fields list, as they share same username | |
| add_field_to_item(item1, field_alt_password, password2, 1) | |
| return True | |
| return False | |
| def get_user_confirmation(): | |
| warning_text = """ | |
| β οΈ WARNING - USE AT YOUR OWN RISK β οΈ | |
| This script will process your Bitwarden export file and: | |
| 1. Identify duplicates based on matching URIs, username, and password | |
| 2. Create two new files: | |
| - *_edited.json: Contains unique entries (first occurrence kept) | |
| - *_deleted.json: Contains all duplicate entries that were removed | |
| To import the cleaned data into Bitwarden: | |
| 1. First, delete all items in your Bitwarden vault | |
| 2. Then import the '*_edited.json' file into Bitwarden | |
| Are you sure you want to continue? (Yes/No): """ | |
| while True: | |
| response = input(warning_text).strip().lower() | |
| if response == 'yes': | |
| return True | |
| elif response == 'no': | |
| return False | |
| else: | |
| print("\nPlease enter 'Yes' or 'No'") | |
| def process_json_file(file_path): | |
| # Read and validate JSON file | |
| try: | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| validate_json_structure(data) | |
| except json.JSONDecodeError as e: | |
| raise ValueError(f"Invalid JSON file: {str(e)}") | |
| except Exception as e: | |
| raise ValueError(f"Error reading file: {str(e)}") | |
| # Copy all metadata | |
| output_data = { | |
| 'encrypted': data.get('encrypted', False), | |
| 'folders': data.get('folders', []), | |
| 'collections': data.get('collections', []), | |
| 'organizations': data.get('organizations', []) | |
| } | |
| items = data['items'] | |
| duplicate_items = [] | |
| unique_items = [] | |
| used_indices = set() | |
| # Find duplicates | |
| for i in range(len(items)): | |
| if i in used_indices: | |
| continue | |
| item1 = items[i] | |
| is_duplicate = False | |
| # Compare with all subsequent items | |
| for j in range(i + 1, len(items)): | |
| if j in used_indices: | |
| continue | |
| item2 = items[j] | |
| if items_are_duplicate(item1, item2): | |
| if not is_duplicate: | |
| unique_items.append(item1) | |
| is_duplicate = True | |
| duplicate_items.append(item2) | |
| used_indices.add(j) | |
| if not is_duplicate: | |
| unique_items.append(item1) | |
| used_indices.add(i) | |
| # Get base filename without extension | |
| base_name = os.path.splitext(file_path)[0] | |
| # Create output data with duplicates | |
| deleted_data = output_data.copy() | |
| deleted_data['items'] = duplicate_items | |
| # Create output data with cleaned entries | |
| edited_data = output_data.copy() | |
| edited_data['items'] = unique_items | |
| # Write duplicates to _deleted file | |
| deleted_file = f"{base_name}_deleted.json" | |
| with open(deleted_file, 'w', encoding='utf-8') as f: | |
| json.dump(deleted_data, f, indent=2, ensure_ascii=False) | |
| # Write cleaned data to _edited file | |
| edited_file = f"{base_name}_edited.json" | |
| with open(edited_file, 'w', encoding='utf-8') as f: | |
| json.dump(edited_data, f, indent=2, ensure_ascii=False) | |
| return len(duplicate_items), len(unique_items) | |
| def main(): | |
| # Get file path from user input | |
| file_path = input("Please enter the path to your Bitwarden JSON export file: ") | |
| if not os.path.exists(file_path): | |
| print(f"\nError: File '{file_path}' does not exist!") | |
| sys.exit(1) | |
| # Get user confirmation | |
| if not get_user_confirmation(): | |
| print("\nOperation cancelled by user.") | |
| sys.exit(0) | |
| try: | |
| duplicates, uniques = process_json_file(file_path) | |
| print(f"\nβ Processing completed successfully!") | |
| print(f"π Statistics:") | |
| print(f" - {uniques} unique entries saved to '*_edited.json'") | |
| print(f" - {duplicates} duplicate entries saved to '*_deleted.json'") | |
| print("\nπ Next steps:") | |
| print("1. Backup your Bitwarden vault") | |
| print("2. Delete all items in your vault") | |
| print("3. Import the '*_edited.json' file into Bitwarden") | |
| except Exception as e: | |
| print(f"\nβ An error occurred: {str(e)}") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |
Author
@gummipunkt In my opinion the URI check in items_are_duplicate may result in false positives as an item can have more than one login credential for the same URI.
What I changed:
- They must at least share some URI to pass check !!
1.1. It will pass, if the sanitized names are identical (if necessary it will backup password and/or username of the second item to fields list of first item).
1.2. It will pass, if both usernames and passwords are identical
1.3. If they share only the usernames, it will backup the password of second item to fields list of first item
1.4. If the sanitized names are identical .
Too keep information lost minimal, I also added a merge function that will merge some item2 attributes to item1 for duplicate items.
Author
@gummipunkt In my opinion the URI check in
items_are_duplicatemay result in false positives as an item can have more than one login credential for the same URI.What I changed:
1. They must at least share some URI to pass check !! 1.1. It will pass, if the sanitized names are identical (if necessary it will backup password and/or username of the second item to fields list of first item). 1.2. It will pass, if both usernames and passwords are identical 1.3. If they share only the usernames, it will backup the password of second item to fields list of first item 1.4. If the sanitized names are identical .Too keep information lost minimal, I also added a merge function that will merge some item2 attributes to item1 for duplicate items.
thank you
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output:
Just import the _edited.json file to your Bitwarden Vault Webinterface