https://github.com/dylanhogg/minimal-python-project-template
from datetime import datetime
print(datetime.now().strftime("%Y%m%d-%H%M%S"))
import ipdb; ipdb.set_trace() # https://github.com/inducer/pudb
import pudb; pudb.set_trace() # https://github.com/gotcha/ipdb
pytest -s my_test.py # turn off capture output in pytest to see pdb output
pytest my_test.py --pdb --pdbcls=IPython.terminal.debugger:TerminalPdb # Alternative
Also see pudb
https://docs.astral.sh/uv/getting-started/installation/
https://realpython.com/uv-vs-pip/
$ curl -LsSf https://astral.sh/uv/install.sh | sh
uv venv
uv pip install -r requirements-dev.txt
uv run example.py
https://docs.astral.sh/uv/guides/scripts/#running-a-script-with-dependencies
https://docs.astral.sh/uv/guides/tools/#commands-with-plugins
https://til.simonwillison.net/llms/bert-ner
https://simonwillison.net/2025/Feb/7/pip-install-llm-smollm2
uv run --with numpy --with torch --with transformers python -c '
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline
import json
model = AutoModelForTokenClassification.from_pretrained("dslim/distilbert-NER")
tokenizer = AutoTokenizer.from_pretrained("dslim/distilbert-NER")
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
text = "This is an example sentence about Dylan Hogg who lives in Australia and is an ML Engineer."
print(json.dumps(nlp(text), indent=2, default=repr))'
# Also using uvx to run installed commands:
uvx --with mkdocs-material mkdocs --help
https://rich.readthedocs.io/en/stable/tables.html
from rich.console import Console
from rich.table import Table
table = Table(show_header=True, header_style="bold blue")
table.add_column("Date", style="dim", width=12)
table.add_column("Title")
table.add_column("Production Budget", justify="right")
table.add_column("Box Office", justify="right")
table.add_row(
"Dev 20, 2019", "Star Wars: The Rise of Skywalker", "$275,000,000", "$375,126,118"
)
table.add_row(
"May 25, 2018",
"[red]Solo[/red]: A Star Wars Story",
"$275,000,000",
"$393,151,347",
)
table.add_row(
"Dec 15, 2017",
"Star Wars Ep. VIII: The Last Jedi",
"$262,000,000",
"[bold]$1,332,539,889[/bold]",
)
console = Console()
console.print(table)
https://en.wikipedia.org/w/api.php?action=query&list=search&format=json&srsearch=python
https://en.wikipedia.org/w/api.php?action=help&modules=main
todo
from transformers import AutoTokenizer
# A list of colors in RGB for representing the tokens
colors = [
'102;194;165', '252;141;98', '141;160;203',
'231;138;195', '166;216;84', '255;217;47'
]
def show_tokens(sentence: str, tokenizer_name: str):
""" Show the tokens each separated by a different color """
# Load the tokenizer and tokenize the input
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
token_ids = tokenizer(sentence).input_ids
# Extract vocabulary length
print(f"Vocab length: {len(tokenizer)}")
# Print a colored list of tokens
for idx, t in enumerate(token_ids):
print(
f'\x1b[0;30;48;2;{colors[idx % len(colors)]}m' +
tokenizer.decode(t) +
'\x1b[0m',
end=' '
)
show_tokens(text, "bert-base-cased")
todo
todo
pandas-pipe-method.ipynb
pandas-pipe-parallel-with-caching.ipynb