# /// script # requires-python = ">=3.10" # dependencies = [ # "duckdb==1.1.1", # "marimo", # "pandas==2.2.3", # "vega-datasets==0.9.0", # ] # /// import marimo __generated_with = "0.9.1" app = marimo.App(width="medium") @app.cell def __(): import marimo as mo return (mo,) @app.cell(hide_code=True) def __(mo): mo.md( """ # Querying dataframes This notebook shows how to use SQL to query Python dataframes. First, we create a dataframe called `df`. """ ) return @app.cell def __(): from vega_datasets import data df = data.iris() df.head() return data, df @app.cell(hide_code=True) def __(mo): mo.md( f""" Next, we **create a SQL cell**. Create a SQL cell in one of two ways: 1. Click the {mo.icon("lucide:database")} `SQL` button at the **bottom of your notebook** 2. **Right-click** the {mo.icon("lucide:circle-plus")} button to the **left of a cell**, and choose `SQL`. In the SQL cell, you can query dataframes in your notebook as if they were tables — just reference them by name. """ ) return @app.cell def __(df, mo): result = mo.sql( f""" SELECT species, mean(petalLength) as meanPetalLength FROM df GROUP BY species ORDER BY meanPetalLength """, output=False ) return (result,) @app.cell(hide_code=True) def __(mo): mo.md( r""" The query output is returned to Python as a dataframe (Polars if you have it installed, Pandas otherwise). Choose the dataframe name via the **output variable** input in the bottom-left of the cell. If the name starts with an underscore, it won't be made available to other cells. In this case, we've named the output `result`. """ ) return @app.cell def __(result): result return if __name__ == "__main__": app.run()