CrowdDotDev · borfast · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025
@@ -0,0 +1,2 @@
+ALTER TABLE "segmentRepositories"
+DROP COLUMN updated_at;
@@ -0,0 +1,3 @@
+ALTER PUBLICATION sequin_pub ADD TABLE "segmentRepositories";
+ALTER TABLE "segmentRepositories" REPLICA IDENTITY FULL;
+GRANT SELECT ON "segmentRepositories" to sequin;
@@ -0,0 +1,2 @@
+ALTER TABLE "segmentRepositories"
+ADD COLUMN updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP;
diff --git a/services/cronjobs/archived_repositories/src/database.ts b/services/cronjobs/archived_repositories/src/database.ts
@@ -62,7 +62,7 @@ export async function updateRepositoryStatus(
   try {
     await client.query(
       `UPDATE "segmentRepositories" 
-       SET archived = $1, excluded = $2, last_archived_check = NOW(), updatedAt = NOW()
+       SET archived = $1, excluded = $2, last_archived_check = NOW(), updated_at = NOW()
        WHERE repository = $3`,
       [isArchived, isExcluded, repository]
     );

@@ -9,6 +9,8 @@ DESCRIPTION >
     - `logoUrl`, `organizationId`, `website`, `github`, `linkedin`, `twitter` contain project branding and social links.
     - `widgets` array defines which insights widgets are enabled for this project.
     - `repositories` array contains the list of repository URLs associated with the project.
+    - `archivedRepositories` a list of archived repositories for the project, which is used to show archived repos in the frontend.
+    - `excludedRepositories` a list of excluded repositories for the project; excluded repos are not used in health score and security metrics calculations.
     - `enabled` and `isLF` are flags for project status and Linux Foundation association (UInt8 boolean).
     - `keywords` array contains searchable keywords and tags for the project.
     - `collectionsSlugs` array contains slugs of collections this project belongs to.
@@ -38,6 +40,8 @@ SCHEMA >
     `twitter` String,
     `widgets` Array(String),
     `repositories` Array(String),
+    `archivedRepositories` Array(String),
+    `excludedRepositories` Array(String),
     `enabled` UInt8,
     `isLF` UInt8,
     `keywords` Array(String),

@@ -0,0 +1,28 @@
+DESCRIPTION >
+    - `segmentRepositories` contains the repository data associated with segments.
+    - Replicated from Postgres - it is meant to become the single source of truth about repositories in the future.
+    - Schema:
+    - `repository` is the URL for the repository.
+    - `segmentId` links to the segment the repository belongs to.
+    - `insightsProjectId` links to the insightsProject the repository belongs to.
+    - `archived` indicates whether the repository is archived (true/false).
+    - `excluded` indicates whether the repository is excluded from analytics and metrics (true/false).
+    - `last_archived_check` is the timestamp of the last check for whether the repository is archived.
+    - `createdAt` is a standard timestamp field to record lifecycle tracking.
+    - `updatedAt` is a standard timestamp field to record lifecycle tracking.
+
+SCHEMA >
+    `repository` String `json:$.record.repository`,
+    `segmentId` UUID `json:$.record.segmentId`,
+    `insightsProjectId` UUID `json:$.record.insightsProjectId`,
+    `archived` Bool `json:$.record.archived`,
+    `excluded` Bool `json:$.record.excluded`,
+    `last_archived_check` Nullable(DateTime64(3)) `json:$.record.last_archived_check`,
+    `createdAt` DateTime64(3) `json:$.record.createdAt`,
+    `updatedAt` DateTime64(3) `json:$.record.updated_at`
+
+
+ENGINE ReplacingMergeTree
+ENGINE_PARTITION_KEY toYear(createdAt)
+ENGINE_SORTING_KEY repository
+ENGINE_VER createdAt
@@ -14,6 +14,8 @@ SQL >
         insights_projects_populated_ds.description,
         insights_projects_populated_ds.logoUrl as logo,
         insights_projects_populated_ds.repositories,
+        insights_projects_populated_ds.archivedRepositories,
+        insights_projects_populated_ds.excludedRepositories,
         insights_projects_populated_ds.isLF,
         insights_projects_populated_ds.widgets,
         insights_projects_populated_ds.keywords,

@@ -107,6 +107,21 @@ NODE insights_projects_populated_copy_health_score_deduplicated
 SQL >
     SELECT id, overallScore as healthScore FROM health_score_copy_ds
 
+NODE archived_excluded_repositories
+DESCRIPTION >
+    Get the archived and excluded repositories for each segment and insightsProject
+
+SQL >
+    SELECT
+        segmentId,
+        insightsProjectId,
+        groupArrayIf(repository, archived = true) AS "archivedRepositories",
+        groupArrayIf(repository, excluded = true) AS "excludedRepositories"
+    FROM segmentRepositories FINAL
+    WHERE
+        archived = true OR excluded = true
+    GROUP BY segmentId, insightsProjectId
+
 NODE insights_projects_populated_copy_results
 DESCRIPTION >
     Join everything together
@@ -140,7 +155,9 @@ SQL >
         insights_projects_populated_copy_aggregates.softwareValue as softwareValue,
         insights_projects_populated_copy_aggregates.contributorCount as contributorCount,
         insights_projects_populated_copy_aggregates.organizationCount as organizationCount,
-        insights_projects_populated_copy_health_score_deduplicated.healthScore as healthScore
+        insights_projects_populated_copy_health_score_deduplicated.healthScore as healthScore,
+        archived_excluded_repositories.archivedRepositories as archivedRepositories,
+        archived_excluded_repositories.excludedRepositories as excludedRepositories
     FROM insightsProjects FINAL
     LEFT JOIN
         insights_projects_populated_copy_collections_slugs
@@ -160,6 +177,9 @@ SQL >
     LEFT JOIN
         insights_projects_populated_copy_health_score_deduplicated
         ON insights_projects_populated_copy_health_score_deduplicated.id = insightsProjects.id
+    LEFT JOIN
+        archived_excluded_repositories
+        ON archived_excluded_repositories.insightsProjectId = insightsProjects.id
     WHERE isNull (insightsProjects.deletedAt)
 
 TYPE COPY

@@ -45,7 +45,10 @@ SQL >
         activityRepositories_filtered.repo as slug,
         null as logo,
         activityRepositories_filtered.projectSlug as "projectSlug",
-        null as name
+        null as name,
+        sr.archived as archived,
+        sr.excluded as excluded
     from activityRepositories_filtered
+    join segmentRepositories as sr on sr.insightsProjectId = activityRepositories_filtered.projectId
     order by activityRepositories_filtered.repo asc
     limit {{ Integer(limit, 10, description="Limit number of records for each type", required=False) }}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		ALTER TABLE "segmentRepositories"
		DROP COLUMN updated_at;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		ALTER TABLE "segmentRepositories"
		ADD COLUMN updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP;