Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .config/mise.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,9 @@
[tools]
python = "3.13.3"
node = "latest"
lefthook = "latest"
yamllint = "latest"
actionlint = "latest"
shellcheck = "latest"
markdownlint-cli2 = "latest"
jq = "latest"
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
__pycache__/
*.csv
**/.csv
*.json
**/*.json
*.xlsx
**/.xlsx
*.xlsx#
**/.xlsx#
*.parquet
**/.parquet
60 changes: 60 additions & 0 deletions .lefthook.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Refer for explanation to following link:
# https://lefthook.dev/configuration/
pre-commit:
parallel: true
jobs:
- name: shell script validation
run: shellcheck --shell=bash -x {staged_files}
glob:
- "*.sh"
- "*.zsh"
- "*.bash"

- name: markdown linting
run: markdownlint-cli2 --fix --config .markdownlint.json {staged_files}
glob:
- "*.md"
stage_fixed: true

- name: yaml linting
run: yamllint -c .yamllint-config.yaml .
glob: "*.y*ml"

- name: Github Action linting
run: actionlint
glob:
- ".github/workflows/*.y*ml"

- name: Ruff Formatting
run: uv run ruff format -q .
glob:
- "*.py"
stage_fixed: true

- name: Ruff Syntax checking
run: uv run ruff check --fix -q
glob:
- "*.py"
stage_fixed: true

- name: MyPy type validation
run: uv run mypy .
glob:
- "*.py"

- name: json validation
run: tools/check-json.sh {staged_files}
glob:
- "*.json"

- name: check for merge conflicts
run: tools/check-merge-conflicts.sh {staged_files}

- name: check for bad file details
run: tools/check-file-details.sh {staged_files}
stage_fixed: true

output:
- success
- failure
- summary
44 changes: 0 additions & 44 deletions .pre-commit-config.yaml

This file was deleted.

8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ Examples of the full setup for some OSes are below:
eval "$(mise activate bash)"
pip install --upgrade pip wheel uv
uv sync
uv run pre-commit install
lefthook install pre-commit -f
```

Another command for installing mise in your session can also work (in bash):
Expand All @@ -98,7 +98,7 @@ Another command for installing mise in your session can also work (in bash):
eval "$(mise activate zsh)"
pip install --upgrade pip wheel uv
uv sync
uv run pre-commit install
lefthook install pre-commit -f
```

## Todo / Known Issues
Expand All @@ -115,7 +115,9 @@ seems wrong.

## Contributing & Code Standards

We have a [.pre-commit-config.yaml](.pre-commit-config.yaml) file which enforces some linting / formatting rules.
We have a [.lefthook.yml](.lefthook.yml) file which enforces some linting / formatting rules.

We also rely on [ruff](https://docs.astral.sh/ruff/) and [mypy](https://www.mypy-lang.org/) for ensuring python coding standards.

Pull requests and reviews are welcome on the main repo. For checking type safety use [mypy](https://github.com/python/mypy):

Expand Down
12 changes: 7 additions & 5 deletions file_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import copy
import json
import os
from schemas import enrichment_print_schema
from utils import (
convert_to_dataframe,
logger,
output_folder,
)
import xlsxwriter # type: ignore [import-untyped]

Expand All @@ -16,8 +18,9 @@ def export_to_file(
if not facilities_data or not facilities_data.get("facilities", []):
logger.warning("No data to export!")
return ""

full_name = f"{filename}.{file_type}"
# make sure the folder we're dropping files into exists
os.makedirs(output_folder, exist_ok=True)
full_name = f"{output_folder}/{filename}.{file_type}"
if file_type in ["csv", "xlsx", "parquet"]:
writer = convert_to_dataframe(facilities_data["facilities"])
match file_type:
Expand All @@ -36,10 +39,9 @@ def export_to_file(
json.dump(facilities_data, f_out, indent=2, sort_keys=True, default=str)

logger.info(
"%s file '%s.%s' created successfully with %s facilities.",
file_type,
filename,
"%s file '%s' created successfully with %s facilities.",
file_type,
full_name,
len(facilities_data["facilities"]),
)
return filename
Expand Down
2 changes: 1 addition & 1 deletion ice_scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@
"HLG": "Harlingen Field Office",
"HOU": "Houston Field Office",
"LOS": "Los Angeles Field Office",
"MIA": "Miami Field Office",
"MIA": "Miramar Sub Office",
"NEW": "Newark Field Office",
"NOL": "New Orleans Field Office",
"NYC": "New York City Field Office",
Expand Down
7 changes: 5 additions & 2 deletions ice_scrapers/field_offices.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def scrape_field_offices() -> dict:
return office_data


def _scrape_page(page_url: str) -> list:
def _scrape_page(page_url: str) -> list[dict]:
"""Scrape a single page of facilities using BeautifulSoup"""
logger.debug(" Fetching: %s", page_url)
try:
Expand Down Expand Up @@ -120,7 +120,10 @@ def _extract_single_office(element: BeautifulSoup, page_url: str) -> dict:
field_office = element.select_one(".views-field-title")
if field_office:
office["field_office"] = field_office.text.strip()
office["id"] = field_office_to_aor[office["field_office"]]
try:
office["id"] = field_office_to_aor[office["field_office"]]
except Exception:
logger.warning("Could not attach %s as a field office! Maybe update AORs?", office["field_office"])
address = element.select_one(".address-line1")
if address:
office["address"]["street"] = address.text.strip()
Expand Down
3 changes: 1 addition & 2 deletions ice_scrapers/spreadsheet_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
facility_schema,
field_office_schema,
)
from typing import Tuple
from utils import (
logger,
session,
Expand Down Expand Up @@ -59,7 +58,7 @@
]


def _download_sheet(keep_sheet: bool = True, force_download: bool = True) -> Tuple[polars.DataFrame, str]:
def _download_sheet(keep_sheet: bool = True, force_download: bool = True) -> tuple[polars.DataFrame, str]:
"""Download the detention stats sheet from ice.gov"""
resp = session.get(base_xlsx_url, timeout=120)
resp.raise_for_status()
Expand Down
9 changes: 4 additions & 5 deletions ice_scrapers/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from bs4 import BeautifulSoup
import re
from typing import Tuple
from utils import (
logger,
session,
Expand Down Expand Up @@ -35,7 +34,7 @@ def special_facilities(facility: dict) -> dict:
return facility


def repair_name(name: str, locality: str) -> Tuple[str, bool]:
def repair_name(name: str, locality: str) -> tuple[str, bool]:
"""Even facility names are occasionally bad"""
matches = [
{"match": "ALEXANDRIA STAGING FACILI", "replace": "Alexandria Staging Facility", "locality": "ALEXANDRIA"},
Expand Down Expand Up @@ -96,7 +95,7 @@ def repair_name(name: str, locality: str) -> Tuple[str, bool]:
return name, cleaned


def repair_street(street: str, locality: str = "") -> Tuple[str, bool]:
def repair_street(street: str, locality: str = "") -> tuple[str, bool]:
"""Generally, we'll let the spreadsheet win arguments just to be consistent"""
street_filters = [
# address mismatch between site and spreadsheet
Expand Down Expand Up @@ -217,7 +216,7 @@ def repair_street(street: str, locality: str = "") -> Tuple[str, bool]:
return street, cleaned


def repair_zip(zip_code: int, locality: str) -> Tuple[str, bool]:
def repair_zip(zip_code: int, locality: str) -> tuple[str, bool]:
"""
Excel does a cool thing where it strips leading 0s
Also, many zip codes are mysteriously discordant
Expand Down Expand Up @@ -248,7 +247,7 @@ def repair_zip(zip_code: int, locality: str) -> Tuple[str, bool]:
return zcode, cleaned


def repair_locality(locality: str, administrative_area: str) -> Tuple[str, bool]:
def repair_locality(locality: str, administrative_area: str) -> tuple[str, bool]:
"""
There is no consistency with any address.
How the post office ever successfully delivered a letter is beyond me
Expand Down
5 changes: 2 additions & 3 deletions ice_scrapers/vera_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
import polars
from schemas import facility_schema
from typing import Tuple
from utils import (
logger,
session,
Expand All @@ -17,7 +16,7 @@
filename = f"{SCRIPT_DIR}{os.sep}vera_facilities.csv"


def _vera_name_fixes(name: str, city: str) -> Tuple[str, bool]:
def _vera_name_fixes(name: str, city: str) -> tuple[str, bool]:
"""Match Vera names with ice.gov names"""
matches = [
{"match": "Adams County", "replace": "Adams County Courthouse", "city": "Ritzville"},
Expand Down Expand Up @@ -199,7 +198,7 @@ def _vera_name_fixes(name: str, city: str) -> Tuple[str, bool]:
return name, fixed


def _vera_city_fixes(city: str, state: str) -> Tuple[str, bool]:
def _vera_city_fixes(city: str, state: str) -> tuple[str, bool]:
"""There are a few cases where getting a state match requires some munging"""
matches = [
{"match": "Saipan", "replace": "Susupe, Saipan", "city": "MP"},
Expand Down
Loading