Python project structure — pyproject.toml, src layout, __init__.py, .gitignore, dependency groups, type hints, py.typed, test structure, entry points, ruff/mypy configuration
91
87%
Does it follow best practices?
Impact
99%
1.03xAverage score across 5 eval scenarios
Passed
No known issues
{
"context": "Tests whether the agent properly restructures a monolithic Flask app into a well-organized project with pyproject.toml, dependency groups, Flask app factory, Blueprints, separated concerns, .gitignore, type hints, and test infrastructure.",
"type": "weighted_checklist",
"checklist": [
{
"name": "pyproject.toml with build-system",
"description": "pyproject.toml exists with [build-system] and [project] sections, not setup.py",
"max_score": 7
},
{
"name": "requires-python",
"description": "pyproject.toml has requires-python >= 3.11",
"max_score": 4
},
{
"name": "Dependency groups separated",
"description": "[project.optional-dependencies] has dev or test groups with pytest, ruff, mypy — not in main dependencies",
"max_score": 7
},
{
"name": "Ruff configured",
"description": "[tool.ruff] section exists in pyproject.toml",
"max_score": 5
},
{
"name": "Mypy strict",
"description": "[tool.mypy] section with strict = true in pyproject.toml",
"max_score": 5
},
{
"name": ".gitignore with venv",
"description": ".gitignore includes .venv/ or venv/",
"max_score": 5
},
{
"name": ".gitignore with pycache and .env",
"description": ".gitignore includes __pycache__/ and .env",
"max_score": 4
},
{
"name": "Flask app factory",
"description": "main.py has a create_app() function that creates and returns a Flask instance, registering blueprints and error handlers",
"max_score": 8
},
{
"name": "Config from environment (BaseSettings)",
"description": "config.py uses pydantic-settings BaseSettings with database_url, secret_key, debug loaded from environment",
"max_score": 7
},
{
"name": "__init__.py everywhere",
"description": "All package directories (app package, routes/, tests/) have __init__.py",
"max_score": 7
},
{
"name": "Routes use Blueprints",
"description": "Routes are in separate files using Flask Blueprint — products, orders, users each in their own file",
"max_score": 7
},
{
"name": "Database logic in db.py",
"description": "Database connection and queries are in a separate db.py, not in route files",
"max_score": 5
},
{
"name": "Custom errors with handler",
"description": "errors.py has custom exception classes and error handlers registered in the app factory",
"max_score": 5
},
{
"name": "Type hints on functions",
"description": "Function definitions include type hints for parameters and return types",
"max_score": 7
},
{
"name": "tests/ with conftest.py and __init__.py",
"description": "tests/ directory has __init__.py, conftest.py with app/client fixtures, and at least one test file",
"max_score": 7
},
{
"name": "No setup.py",
"description": "No setup.py or setup.cfg file exists",
"max_score": 3
},
{
"name": "No hardcoded config",
"description": "No hardcoded DATABASE_URL, SECRET_KEY, or similar values in source code — all loaded from Settings",
"max_score": 4
}
]
}evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
skills
python-project-structure
verifiers