Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions backend/src/database/migrations/U1751459866__gitIntegration.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
DROP TRIGGER IF EXISTS cleanup_orphaned_repositories_trigger ON git."repositoryIntegrations";

-- Drop function
DROP FUNCTION IF EXISTS git.cleanup_orphaned_repositories();

-- Drop indexes
DROP INDEX IF EXISTS "ix_git_repositoryIntegrations_integrationId";
DROP INDEX IF EXISTS "ix_git_repositoryIntegrations_repositoryId";
DROP INDEX IF EXISTS "ix_git_repositories_lastProcessedAt";
DROP INDEX IF EXISTS "ix_git_repositories_state_priority";
DROP INDEX IF EXISTS "ix_git_repositories_priority";
DROP INDEX IF EXISTS "ix_git_repositories_state";

-- Drop tables
DROP TABLE IF EXISTS git."repositoryIntegrations";
DROP TABLE IF EXISTS git.repositories;

-- Drop schema
DROP SCHEMA IF EXISTS git CASCADE;
80 changes: 80 additions & 0 deletions backend/src/database/migrations/V1751459866__gitIntegration.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
-- Create the git schema
CREATE SCHEMA IF NOT EXISTS git;

-- Main repositories table
CREATE TABLE git.repositories (
id UUID PRIMARY KEY NOT NULL DEFAULT uuid_generate_v4(),
"createdAt" TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
"updatedAt" TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
"deletedAt" TIMESTAMP WITH TIME ZONE,

-- Repository identification
url VARCHAR(1024) NOT NULL,

-- Processing state and priority
state VARCHAR(50) NOT NULL DEFAULT 'pending',
priority INTEGER NOT NULL DEFAULT 0, -- 0=urgent, 1=high, 2=normal

-- Processing metadata
"lastProcessedAt" TIMESTAMP WITH TIME ZONE,

-- Constraints
UNIQUE (url)
);

-- Repository to Integration associations (many-to-many)
CREATE TABLE git."repositoryIntegrations" (
id UUID PRIMARY KEY NOT NULL DEFAULT uuid_generate_v4(),
"createdAt" TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
"updatedAt" TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),

"repositoryId" UUID NOT NULL REFERENCES git.repositories (id) ON DELETE CASCADE,
"integrationId" UUID NOT NULL REFERENCES public."integrations" (id) ON DELETE CASCADE,

-- Constraints
UNIQUE ("repositoryId", "integrationId")
);

-- Function to clean up orphaned repositories
CREATE OR REPLACE FUNCTION git.cleanup_orphaned_repositories()
RETURNS TRIGGER AS $$
BEGIN
-- Delete repositories that no longer have any associations
DELETE FROM git.repositories
WHERE id NOT IN (
SELECT DISTINCT "repositoryId"
FROM git."repositoryIntegrations"
);
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

-- Trigger to clean up orphaned repositories after association deletion
CREATE TRIGGER cleanup_orphaned_repositories_trigger
AFTER DELETE ON git."repositoryIntegrations"
FOR EACH ROW
EXECUTE FUNCTION git.cleanup_orphaned_repositories();



-- Create indexes for optimal query performance

-- Repositories indexes
CREATE INDEX "ix_git_repositories_state" ON git.repositories (state);
CREATE INDEX "ix_git_repositories_priority" ON git.repositories (priority);
CREATE INDEX "ix_git_repositories_state_priority" ON git.repositories (state, priority);
CREATE INDEX "ix_git_repositories_lastProcessedAt" ON git.repositories ("lastProcessedAt");

-- Repository Integrations indexes
CREATE INDEX "ix_git_repositoryIntegrations_repositoryId" ON git."repositoryIntegrations" ("repositoryId");
CREATE INDEX "ix_git_repositoryIntegrations_integrationId" ON git."repositoryIntegrations" ("integrationId");



-- Add comments for documentation
COMMENT ON SCHEMA git IS 'Schema for git integration system that manages repository processing and integration associations';
COMMENT ON TABLE git.repositories IS 'Stores git repository metadata and processing state for the git integration system';
COMMENT ON TABLE git."repositoryIntegrations" IS 'Many-to-many relationship between repositories and integrations';

COMMENT ON COLUMN git.repositories.priority IS 'Processing priority: 0=urgent, 1=high, 2=normal';
COMMENT ON COLUMN git.repositories.state IS 'Current processing state of the repository';
48 changes: 26 additions & 22 deletions scripts/services/docker/Dockerfile.git_integration
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
FROM python:3.13.5-slim-bullseye AS builder
# Base image for both stages
FROM python:3.13.5-slim-bullseye AS base

# Builder stage: install build dependencies, uv, and dependencies
FROM base AS builder

# Install build dependencies
RUN apt-get update && apt-get install -y \
build-essential \
--no-install-recommends \
&& rm -rf /var/lib/apt/lists/*

# Install uv from official image
# Copy uv binary from official image
COPY --from=ghcr.io/astral-sh/uv:0.7.17 /uv /usr/local/bin/uv

WORKDIR /usr/crowd/app
Expand All @@ -18,20 +22,25 @@ ENV UV_LINK_MODE=copy \
UV_PROJECT_ENVIRONMENT=/usr/crowd/app/.venv \
UV_VENV_PATH=/usr/crowd/app/.venv

# Copy only necessary files for dependency resolution (for better caching)
COPY ./services/apps/git_integration/pyproject.toml ./services/apps/git_integration/uv.lock ./services/apps/git_integration/README.md ./
COPY ./LICENSE ./
# Copy only lock, pyproject.toml and License for dependency install caching
COPY ./services/apps/git_integration/pyproject.toml ./services/apps/git_integration/uv.lock ./LICENSE ./

# Install dependencies excluding the project itself for better caching
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-install-project --no-dev

# Install dependencies
RUN uv sync --frozen --no-dev
# Copy full source code
COPY ./services/apps/git_integration ./LICENSE ./

# Copy source code
COPY ./services/apps/git_integration ./
# Sync full project including the project itself
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-dev


FROM python:3.13.5-slim-bullseye AS runner
# Runner: minimal image with runtime deps and virtualenv only
FROM base AS runner

# Install runtime dependencies
# Install runtime dependencies only
RUN apt-get update && apt-get install -y \
ca-certificates \
git \
Expand All @@ -40,27 +49,22 @@ RUN apt-get update && apt-get install -y \
&& apt-get clean \
&& apt-get autoremove -y

# Set Python environment variables
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV PIP_NO_CACHE_DIR=off
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=off

WORKDIR /usr/crowd/app

# Copy the virtual environment from builder stage
# Copy virtual environment and app source from builder
COPY --from=builder /usr/crowd/app/.venv /usr/crowd/app/.venv
COPY --from=builder /usr/crowd/app /usr/crowd/app

# Copy the git_integration service from builder stage
COPY --from=builder /usr/crowd/app/ ./

# Activate virtual environment by adding it to PATH
# Add virtual environment bin to PATH
ENV PATH="/usr/crowd/app/.venv/bin:$PATH"

# Make runner script executable
RUN chmod +x ./src/runner.sh

# Expose the default port
EXPOSE 8085

# Set the default command to run the server
CMD ["./src/runner.sh"]
1 change: 1 addition & 0 deletions services/apps/git_integration/LICENSE
1 change: 1 addition & 0 deletions services/apps/git_integration/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies = [
"prettytable>=3.11.0",
"python-slugify>=8.0.4",
"asyncpg",
"loguru>=0.7.3",
]

[project.optional-dependencies]
Expand Down
1 change: 0 additions & 1 deletion services/apps/git_integration/src/crowdgit-cron

This file was deleted.

1 change: 0 additions & 1 deletion services/apps/git_integration/src/crowdgit-server

This file was deleted.

65 changes: 65 additions & 0 deletions services/apps/git_integration/src/crowdgit/database/connection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from typing import Dict, Any, Optional
from contextlib import asynccontextmanager

import asyncpg
from asyncpg import Pool, Connection
from loguru import logger

from crowdgit.settings import (
CROWD_DB_WRITE_HOST,
CROWD_DB_PORT,
CROWD_DB_USERNAME,
CROWD_DB_PASSWORD,
CROWD_DB_DATABASE,
)

# Global connection pool
_pool: Optional[Pool] = None


def get_db_config() -> Dict[str, Any]:
"""Get database configuration"""
return {
"database": CROWD_DB_DATABASE,
"user": CROWD_DB_USERNAME,
"password": CROWD_DB_PASSWORD,
"host": CROWD_DB_WRITE_HOST,
"port": CROWD_DB_PORT,
"min_size": 5,
"max_size": 20,
"command_timeout": 120,
"server_settings": {"application_name": "git_integration"},
}


async def get_pool() -> Pool:
"""Get or create connection pool"""
global _pool
if _pool is None:
config = get_db_config()
_pool = await asyncpg.create_pool(**config)
logger.info("Created database connection pool")
return _pool


@asynccontextmanager
async def get_db_connection() -> Connection:
"""Get database connection from pool"""
pool = await get_pool()

async with pool.acquire() as connection:
try:
yield connection
except Exception as exc:
logger.exception("Database error occurred: {}", exc)
raise


async def close_pool():
"""Close connection pool"""
global _pool

if _pool:
await _pool.close()
_pool = None
logger.info("Closed database connection pool")
24 changes: 24 additions & 0 deletions services/apps/git_integration/src/crowdgit/database/crud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import Dict, Any, Optional
from .registry import fetchval, fetchrow


async def insert_repository(url: str, priority: int = 0) -> str:
"""Insert a new repository"""
query = """
INSERT INTO git.repositories (url, priority, state)
VALUES ($1, $2, 'pending')
RETURNING id
"""
result = await fetchval(query, (url, priority))
return str(result)


async def get_repository_by_url(url: str) -> Optional[Dict[str, Any]]:
"""Get repository by URL"""
query = """
SELECT id, url, state, priority, "lastProcessedAt", "createdAt", "updatedAt"
FROM git.repositories
WHERE url = $1 AND "deletedAt" IS NULL
"""
result = await fetchrow(query, (url,))
return dict(result) if result else None
49 changes: 49 additions & 0 deletions services/apps/git_integration/src/crowdgit/database/registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from typing import List, Dict, Any, Optional
from .connection import get_db_connection
from loguru import logger
from crowdgit.errors import InternalError

async def query(sql: str, params: tuple = None) -> List[Dict[str, Any]]:
"""Execute query with connection pooling"""
try:
async with get_db_connection() as conn:
results = await conn.fetch(sql, *params) if params else await conn.fetch(sql)
return [dict(row) for row in results]
except Exception as error:
logger.error("Database query failed - SQL: {}, Params: {}, Error: {}", sql, params, error)
raise InternalError("Database query failed")


async def execute(sql: str, params: tuple = None) -> str:
"""Execute write query with connection pooling"""
try:
async with get_db_connection() as conn:
result = await conn.execute(sql, *params) if params else await conn.execute(sql)
return result
except Exception as error:
logger.error("Database write operation failed - SQL: {}, Params: {}, Error: {}", sql, params, error)
raise InternalError("Database execute operation failed")



async def fetchval(sql: str, params: tuple = None) -> Any:
"""Execute query and return single value"""
try:
async with get_db_connection() as conn:
result = await conn.fetchval(sql, *params) if params else await conn.fetchval(sql)
return result
except Exception as error:
logger.error("Database fetchval failed - SQL: {}, Params: {}, Error: {}", sql, params, error)
raise InternalError("Database fetchval failed")


async def fetchrow(sql: str, params: tuple = None) -> Optional[Dict[str, Any]]:
"""Execute query and return single row"""
try:
async with get_db_connection() as conn:
result = await conn.fetchrow(sql, *params) if params else await conn.fetchrow(sql)
return dict(result) if result else None
except Exception as error:
logger.error("Database fetchrow failed - SQL: {}, Params: {}, Error: {}", sql, params, error)
raise InternalError("Database fetchrow failed")

26 changes: 26 additions & 0 deletions services/apps/git_integration/src/crowdgit/enums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from enum import Enum


class ErrorCode(str, Enum):
"""Standard Error codes"""

UNKNOWN = "unknown"
INTERNAL = "server-error"


class RepositoryState(str, Enum):
"""Repository processing states"""

PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"


class RepositoryPriority(int):
"""Repository processing priorities"""

URGENT = 0
HIGH = 1
NORMAL = 2
LOW = 3
13 changes: 11 additions & 2 deletions services/apps/git_integration/src/crowdgit/errors.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
# -*- coding: utf-8 -*-
from dataclasses import dataclass
from crowdgit.enums import ErrorCode


@dataclass
class CrowdGitError(Exception):
pass
error_message: str = "An unknown error occurred"
error_code: ErrorCode | None = ErrorCode.UNKNOWN


@dataclass
class InternalError(CrowdGitError):
error_message: str = "Internal error"
error_code: ErrorCode = ErrorCode.INTERNAL


class GitRunError(CrowdGitError):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Models package
Loading
Loading