Skip to content

Instantly share code, notes, and snippets.

@kbrock
Last active October 22, 2020 00:44
Show Gist options
  • Select an option

  • Save kbrock/3ee8e721f0e50218621652ac55b81652 to your computer and use it in GitHub Desktop.

Select an option

Save kbrock/3ee8e721f0e50218621652ac55b81652 to your computer and use it in GitHub Desktop.
Converts from using a foreign relationship table to using a materialized path in the same table (using ancestry)
relationships = Relationship.where(relationship: 'genealogy', resource_type: 'VmOrTemplate').select(:id, :resource_id, :ancestry).to_a
rel_by_rel_id = relationships.index_by(:id) # relationship => resource_id
rel_by_vm_id = relationships.index_by(:resource_id) # resource_id => ancestry
Vm.where(:id => Relationship.select(:resource_id)).each_in_batches do |vm|
ancestry = rel_by_vm_id[vm.id].ancestry.split("/").map { |rel_id| rel_by_rel_id[rel_id].resource_id.to_s }.join("/")
vm.update_attribute(:ancestry => ancestry)
end
--- step 1 ancestry for the relationships (split apart)
def ancestry_resource_ids(relationship, model_class_name, id_range)
Relationship.where(
:relationship => relationship,
:resource_type => model_class_name,
:resource_id => id_range
).each_with_object([]) do |a_rels, ret|
a_rels.ancestry.split('/').map(&:to_i)
.each_with_index do |rel_id|
ret << [a_rels.resource_id, rel_id]
end
end
end
def ancestry_resource_ids(relationship, model_class_name, id_range)
%q{
SELECT a_rels.resource_id AS src_id, relationships.id AS rel_id
FROM relationships a_rels
LEFT JOIN LATERAL UNNEST(STRING_TO_ARRAY(a_rels.ancestry, '/')::BIGINT[])
WITH ORDINALITY AS relationships(id, indx) ON TRUE
WHERE a_rels.relationship = '#{relationship}'
AND a_rels.resource_type = '#{model_class_name}'
AND a_rels.resource_id BETWEEN #{id_range.first} AND #{id_range.last}
ORDER BY a_rels.resource_id, relationships.indx
}
end
---- step 2 new ancestry with src_id from ancestry with rel_id
def ancestry_of_src_ids_for_src(ancestry_resources, relationship)
ancestry_resources.group_by {|src_id, rel_id| src_id }
.map do |src_id, recs|
{
src_id: src_id,
new_ancestry: recs.map {|rec| Model.find(id: rec.rel_id).resource_id }
.map(&:to_s).join('/')
}
}
end
SELECT ancestry_resources.src_id AS src_id,
ARRAY_TO_STRING(ARRAY_AGG(res_rels.resource_id)::VARCHAR[], '/') AS new_ancestry
FROM ( #{ancestry_resources}
) AS ancestry_resources
JOIN relationships res_rels
ON res_rels.id = ancestry_resources.rel_id
GROUP BY ancestry_resources.src_id
def update_src(model, new_ancestors)
UPDATE vms
SET ancestry = new_ancestry
FROM ( #{new_ancestors}
) AS new_ancestors
WHERE new_ancestors.src_id = vms.id
SQL
end
ancestry_resources = ancestry_resource_ids()
new_ancestors = ancestry_of_src_ids_for_src(ancestry_resources, )
update_src(VmOrTemplate, new_ancestors)
----
UPDATE vms
SET ancestry = new_ancestry
FROM (
-- join to resources to convert rel.id to vm.id (it is in resource_id column)
-- ARRAY_AGG takes multiple rows and converts to an array
SELECT ancestor_resources_for_vms.vm_id AS vm_id,
ARRAY_TO_STRING(ARRAY_AGG(res_rels.resource_id)::VARCHAR[], '/') AS new_ancestry
FROM (
-- get vm and associated ancestor rel_id
-- unnest converts an array to multiple rows, 1 ancestor per row
-- with ordinality, and order by relationships.indx make sure ancestors stay in the same order
SELECT vms.id AS vm_id, relationships.id AS rel_id
FROM vms
JOIN relationships a_rels ON a_rels.resource_id = vms.id
AND a_rels.relationship = 'genealogy'
AND a_rels.resource_type = 'VmOrTemplate'
LEFT JOIN LATERAL UNNEST(STRING_TO_ARRAY(a_rels.ancestry, '/')::BIGINT[])
WITH ORDINALITY AS relationships(id, indx) ON TRUE
WHERE vms.id BETWEEN #{id_range.first} AND #{id_range.last}
ORDER BY vms.id, relationships.indx
) AS ancestor_resources_for_vms
JOIN relationships res_rels ON res_rels.id = ancestor_resources_for_vms.rel_id AND res_rels.relationship = 'genealogy'
GROUP BY ancestor_resources_for_vms.vm_id
) AS new_ancestors_for_vms
WHERE new_ancestors_for_vms.vm_id = vms.id
@d-m-u
Copy link
Copy Markdown

d-m-u commented Jul 3, 2020

edit: yes, regions. it's missing the in_my_region stuff

@kbrock
Copy link
Copy Markdown
Author

kbrock commented Jul 10, 2020

where we currentlyu stand:

UPDATE vms
SET ancestry = new_ancestry
FROM (
  -- join to resources to convert rel.id to vm.id (it is in resource_id column)
  -- ARRAY_AGG takes multiple rows and converts to an array
  SELECT ancestor_resources_for_vms.vm_id AS vm_id,
  ARRAY_TO_STRING(ARRAY_AGG(res_rels.resource_id)::VARCHAR[], '/') AS new_ancestry
  FROM (
    -- get vm and associated ancestor rel_id
    -- unnest converts an array to multiple rows, 1 ancestor per row
    -- with ordinality, and order by relationships.indx make sure ancestors stay in that order
    SELECT vms.id AS vm_id, relationships.id AS rel_id
    FROM vms
    JOIN relationships a_rels ON a_rels.resource_id = vms.id
    AND a_rels.relationship = 'genealogy'
    AND a_rels.resource_type = 'VmOrTemplate'
    LEFT JOIN LATERAL UNNEST(STRING_TO_ARRAY(a_rels.ancestry, '/')::BIGINT[])
    -- TODO: WHERE vms.id between X, Y
    WITH ORDINALITY AS relationships(id, indx) ON TRUE
    ORDER BY vms.id, relationships.indx
  ) AS ancestor_resources_for_vms
  JOIN relationships res_rels ON res_rels.id = ancestor_resources_for_vms.rel_id AND res_rels.relationship = 'genealogy'
  GROUP BY ancestor_resources_for_vms.vm_id
) AS new_ancestors_for_vms
WHERE new_ancestors_for_vms.vm_id = vms.id

@d-m-u
Copy link
Copy Markdown

d-m-u commented Jul 12, 2020

      UPDATE vms
      SET ancestry = new_ancestry
      FROM (
        SELECT ancestor_resources_for_vms.vm_id AS vm_id,
        ARRAY_TO_STRING(ARRAY_AGG(res_rels.resource_id)::VARCHAR[], '/') AS new_ancestry
        FROM (
          SELECT vms.id AS vm_id, relationships.id AS rel_id
          FROM vms
          JOIN relationships a_rels ON a_rels.resource_id = vms.id
          AND a_rels.relationship = 'genealogy'
          AND a_rels.resource_type = 'VmOrTemplate'
          LEFT JOIN LATERAL UNNEST(STRING_TO_ARRAY(a_rels.ancestry, '/')::BIGINT[])
          WITH ORDINALITY AS relationships(id, indx) ON TRUE
          WHERE vms.id BETWEEN #{id_range.first} AND #{id_range.last}
          ORDER BY vms.id, relationships.indx
        ) AS ancestor_resources_for_vms
        JOIN relationships res_rels ON res_rels.id = ancestor_resources_for_vms.rel_id AND res_rels.relationship = 'genealogy'
        GROUP BY ancestor_resources_for_vms.vm_id
      ) AS new_ancestors_for_vms
      WHERE new_ancestors_for_vms.vm_id = vms.id

it's closer; i haven't checked the ordering yet

@d-m-u
Copy link
Copy Markdown

d-m-u commented Jul 13, 2020

def ancestry_resource_ids(relationship, model_class_name, id_range)
    Relationship.where(
      :relationship => relationship,
      :resource_type => model_class_name,
      :resource_id => id_range
    ).each_with_object([]) do |a_rels, ret|
      next unless a_rels.ancestry

      a_rels.ancestry.split('/').map(&:to_i)
      .each_with_index do |rel_id|
        ret << [a_rels.resource_id, rel_id]
      end
    end
  end

we need that guard clause i think

@kbrock
Copy link
Copy Markdown
Author

kbrock commented Oct 21, 2020

assumption of my concern is that all the vms in the genealogy chain are local and not hidden by the server.
I think they can be archived and the oids for archived vms still come back.

(and this would be pruned out by joining to vms) - but then our data would be different from the source (e.g. vmware) and we don't want that either.)

If I knew what to do I would be fixing the code and not writing a comment

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment