{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "YVZXVTAnpTbW" }, "source": [ "# Qrlew as a query Translator\n", "[![View On GitHub](https://img.shields.io/badge/View_in_Github-grey?logo=github)](https://github.com/Qrlew/docs/blob/main/tutorials/mssql_translator.ipynb)\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Qrlew/pyqrlew/blob/main/examples/mssql_translator.ipynb)\n", "\n", "In this notebook, we apply `qrlew` to translate SQL queries from one dialect e.g. PostgresSql to another such as MsSQL." ] }, { "cell_type": "markdown", "metadata": { "id": "7z7PGALUpTbX" }, "source": [ "## Install and run the Microsoft SQL Server\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "GeXxtrsNEyTr" }, "outputs": [], "source": [ "%%capture\n", "# Install the mssql-server 2017 (Not working on colab)\n", "!curl https://packages.microsoft.com/keys/microsoft.asc | sudo tee /etc/apt/trusted.gpg.d/microsoft.asc\n", "!curl https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list\n", "!sudo apt-get update\n", "!sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18\n", "!sudo ACCEPT_EULA=Y apt-get install -y mssql-tools18\n", "!sudo apt-get install -y unixodbc-dev\n", "\n", "# installing mssql-server\n", "!curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | sudo gpg --dearmor -o /usr/share/keyrings/microsoft-prod.gpg\n", "!curl https://packages.microsoft.com/keys/microsoft.asc | sudo tee /etc/apt/trusted.gpg.d/microsoft.asc\n", "!curl -fsSL https://packages.microsoft.com/config/ubuntu/22.04/mssql-server-2022.list | sudo tee /etc/apt/sources.list.d/mssql-server-2022.list\n", "!sudo apt-get update\n", "!sudo apt-get install -y mssql-server\n", "\n", "# Configure and run the server manually. This is a workaround to install the MSSQL server in a colab\n", "%env MSSQL_SA_PASSWORD=\"MyStrongPass$\"\n", "!sudo /opt/mssql/bin/mssql-conf -n setup accept-eula\n", "!nohup /opt/mssql/bin/sqlservr &" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "iJwE8DCOnnkx", "outputId": "10ed8f7e-2ad6-4b8b-94f2-7d177e7e0366" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " \n", "-----------\n", " 1\n", "\n", "(1 rows affected)\n" ] } ], "source": [ "# give time to the server to start and check that it started correctly\n", "import time\n", "time.sleep(10)\n", "!/opt/mssql-tools18/bin/sqlcmd -S localhost -U SA -P $MSSQL_SA_PASSWORD -C -Q \"SELECT 1\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "OjZXeKMNMIdP" }, "outputs": [], "source": [ "%%capture\n", "!pip install -U pyqrlew matplotlib graphviz sqlalchemy pyodbc" ] }, { "cell_type": "markdown", "metadata": { "id": "oznAAH-fpTbY" }, "source": [ "For the purpose of this demonstration, we focus on a single table.\n", "We create the `census_schema` and `census_table` and we fill it with values." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rnIPJKPXpTbZ", "outputId": "aafc5c0b-9da8-4ec6-ed42-c4729087bddc" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "(70 rows affected)\n" ] } ], "source": [ "sql_script = \"\"\"\n", "CREATE SCHEMA census_schema;\n", "GO\n", "CREATE TABLE census_schema.census_table (\n", " id INT IDENTITY(1,1) PRIMARY KEY,\n", " first_name VARCHAR(50),\n", " last_name VARCHAR(50),\n", " age INT,\n", " gender VARCHAR(10),\n", " occupation VARCHAR(50),\n", " city VARCHAR(50),\n", " state VARCHAR(50)\n", ");\n", "GO\n", "-- Inserting sample data\n", "INSERT INTO census_schema.census_table (first_name, last_name, age, gender, occupation, city, state)\n", "VALUES\n", " ('John', 'Doe', 30, 'Male', 'Engineer', 'New York', 'NY'),\n", " ('Jane', 'Doe', 28, 'Female', 'Engineer', 'Los Angeles', 'CA'),\n", " ('Bob', 'Smith', 45, 'Male', 'Analyst', 'Chicago', 'IL'),\n", " ('Alice', 'Johnson', 35, 'Female', 'Data Scientist', 'San Francisco', 'CA'),\n", " ('Tom', 'Brown', 50, 'Male', 'Lawyer', 'Miami', 'FL'),\n", " ('Emily', 'Davis', 32, 'Female', 'Software Developer', 'Seattle', 'WA'),\n", " ('Charlie', 'Miller', 40, 'Male', 'Accountant', 'Boston', 'MA'),\n", " ('Eva', 'Williams', 29, 'Female', 'Marketing Manager', 'Austin', 'TX'),\n", " ('Mike', 'Jones', 38, 'Male', 'Analyst', 'Denver', 'CO'),\n", " ('Sophia', 'Taylor', 42, 'Female', 'Analyst', 'Atlanta', 'GA'),\n", " ('John', 'Doe', 30, 'Male', 'Engineer', 'New York', 'NY'),\n", " ('Jane', 'Doe', 28, 'Female', 'Engineer', 'Los Angeles', 'CA'),\n", " ('Bob', 'Smith', 45, 'Male', 'Analyst', 'Chicago', 'IL'),\n", " ('Alice', 'Johnson', 35, 'Female', 'Data Scientist', 'San Francisco', 'CA'),\n", " ('Tom', 'Brown', 50, 'Male', 'Lawyer', 'Miami', 'FL'),\n", " ('Emily', 'Davis', 32, 'Female', 'Software Developer', 'Seattle', 'WA'),\n", " ('Charlie', 'Miller', 40, 'Male', 'Accountant', 'Boston', 'MA'),\n", " ('Eva', 'Williams', 29, 'Female', 'Marketing Manager', 'Austin', 'TX'),\n", " ('Mike', 'Jones', 38, 'Male', 'Analyst', 'Denver', 'CO'),\n", " ('Sophia', 'Taylor', 42, 'Female', 'Analyst', 'Atlanta', 'GA'),\n", " ('Alex', 'Johnson', 33, 'Male', 'Analyst', 'Portland', 'OR'),\n", " ('Megan', 'White', 27, 'Female', 'Data Scientist', 'San Diego', 'CA'),\n", " ('Daniel', 'Clark', 36, 'Male', 'Analyst', 'Houston', 'TX'),\n", " ('Olivia', 'Martinez', 31, 'Female', 'Data Scientist', 'Phoenix', 'AZ'),\n", " ('William', 'Lee', 48, 'Male', 'Professor', 'Philadelphia', 'PA'),\n", " ('Emma', 'Garcia', 29, 'Female', 'Data Scientist', 'Minneapolis', 'MN'),\n", " ('Ryan', 'Harris', 41, 'Male', 'Data Scientist', 'Dallas', 'TX'),\n", " ('Ava', 'Taylor', 26, 'Female', 'Product Manager', 'San Jose', 'CA'),\n", " ('Christopher', 'Anderson', 37, 'Male', 'Engineer', 'Detroit', 'MI'),\n", " ('Sophie', 'Wilson', 34, 'Female', 'HR Specialist', 'Raleigh', 'NC'),\n", " ('David', 'Miller', 29, 'Male', 'Engineer', 'New York', 'NY'),\n", " ('Sophia', 'Taylor', 34, 'Female', 'Engineer', 'Los Angeles', 'CA'),\n", " ('Bob', 'Smith', 42, 'Male', 'Analyst', 'Chicago', 'IL'),\n", " ('Alice', 'Johnson', 39, 'Female', 'Data Scientist', 'San Francisco', 'CA'),\n", " ('Tom', 'Brown', 37, 'Male', 'Lawyer', 'Miami', 'FL'),\n", " ('Emily', 'Davis', 28, 'Female', 'Software Developer', 'Seattle', 'WA'),\n", " ('Charlie', 'Miller', 45, 'Male', 'Accountant', 'Boston', 'MA'),\n", " ('Eva', 'Williams', 31, 'Female', 'Marketing Manager', 'Austin', 'TX'),\n", " ('Mike', 'Jones', 36, 'Male', 'Analyst', 'Denver', 'CO'),\n", " ('Sophia', 'Taylor', 49, 'Female', 'Analyst', 'Atlanta', 'GA'),\n", " ('Alex', 'Johnson', 37, 'Male', 'Analyst', 'Portland', 'OR'),\n", " ('Megan', 'White', 27, 'Female', 'Data Scientist', 'San Diego', 'CA'),\n", " ('Daniel', 'Clark', 41, 'Male', 'Analyst', 'Houston', 'TX'),\n", " ('Olivia', 'Martinez', 32, 'Female', 'Data Scientist', 'Phoenix', 'AZ'),\n", " ('William', 'Lee', 47, 'Male', 'Professor', 'Philadelphia', 'PA'),\n", " ('Emma', 'Garcia', 30, 'Female', 'Data Scientist', 'Minneapolis', 'MN'),\n", " ('Ryan', 'Harris', 40, 'Male', 'Entrepreneur', 'Dallas', 'TX'),\n", " ('Ava', 'Taylor', 26, 'Female', 'Product Manager', 'San Jose', 'CA'),\n", " ('Christopher', 'Anderson', 35, 'Male', 'Engineer', 'Detroit', 'MI'),\n", " ('Sophie', 'Wilson', 33, 'Female', 'HR Specialist', 'Raleigh', 'NC'),\n", " ('Grace', 'Brown', 55, 'Female', 'Analyst', 'New York', 'NY'),\n", " ('Brian', 'Taylor', 50, 'Male', 'Professor', 'Los Angeles', 'CA'),\n", " ('Lily', 'Harrison', 42, 'Female', 'Engineer', 'Chicago', 'IL'),\n", " ('Kevin', 'Garcia', 55, 'Male', 'Analyst', 'San Francisco', 'CA'),\n", " ('Sophie', 'Smithson', 48, 'Female', 'Lawyer', 'Miami', 'FL'),\n", " ('John', 'Clarkston', 38, 'Male', 'Software Developer', 'Seattle', 'WA'),\n", " ('Emma', 'Millerson', 40, 'Female', 'Accountant', 'Boston', 'MA'),\n", " ('David', 'Williamson', 45, 'Male', 'Marketing Manager', 'Austin', 'TX'),\n", " ('Ava', 'Johnsonson', 38, 'Female', 'Analyst', 'Denver', 'CO'),\n", " ('Daniel', 'Anderson', 42, 'Male', 'Data Scientist', 'Atlanta', 'GA'),\n", " ('Michael', 'Martinson', 33, 'Male', 'Engineer', 'Portland', 'OR'),\n", " ('Sophia', 'Whiterson', 27, 'Female', 'Data Scientist', 'San Diego', 'CA'),\n", " ('Robert', 'Davidson', 41, 'Male', 'Analyst', 'Houston', 'TX'),\n", " ('Grace', 'Wilson', 45, 'Female', 'Data Scientist', 'Phoenix', 'AZ'),\n", " ('Andrew', 'Leeson', 48, 'Male', 'Professor', 'Philadelphia', 'PA'),\n", " ('Emily', 'Garcia', 39, 'Female', 'Data Scientist', 'Minneapolis', 'MN'),\n", " ('Christopher', 'Sanders', 41, 'Male', 'Entrepreneur', 'Dallas', 'TX'),\n", " ('Olivia', 'Martinez', 36, 'Female', 'Product Manager', 'San Jose', 'CA'),\n", " ('Sophie', 'Brownson', 43, 'Female', 'Engineer', 'Detroit', 'MI'),\n", " ('Matthew', 'Taylors', 46, 'Male', 'HR Specialist', 'Raleigh', 'NC');\n", "GO\n", "\n", "\"\"\"\n", "with open('sql_script.sql', 'w') as f:\n", " f.write(sql_script)\n", "\n", "!/opt/mssql-tools18/bin/sqlcmd -S localhost -U SA -P $MSSQL_SA_PASSWORD -C -i sql_script.sql" ] }, { "cell_type": "markdown", "metadata": { "id": "p7wDj3bCpTbZ" }, "source": [ "# Use qrlew on the database" ] }, { "cell_type": "markdown", "metadata": { "id": "Nm9m9CadpTbZ" }, "source": [ "At this stage, we have a mssql dataset that can be connected with `qrlew`." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "GGSyaQ8h49LU" }, "outputs": [], "source": [ "from urllib.parse import quote_plus\n", "from sqlalchemy import create_engine\n", "import os" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "kasCN3_i4mZv", "outputId": "7c1f47d5-4bb6-47a1-b6a4-ed2ab572729c" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mssql+pyodbc:///?odbc_connect=Driver%3D%7BODBC+Driver+18+for+SQL+Server%7D%3BServer%3Dtcp%3Alocalhost%3BDatabase%3Dmaster%3BUid%3Dsa%3BPwd%3D%22MyStrongPass%24%22%3BEncrypt%3Dyes%3BTrustServerCertificate%3Dyes%3BConnection+Timeout%3D30%3B\n" ] } ], "source": [ "# creating the sqlalchemy engine\n", "MSSQL_DRIVER = \"{ODBC Driver 18 for SQL Server}\"\n", "MSSQL_SERVER = \"localhost\"\n", "MSSQL_DATABASE = \"master\"\n", "MSSQL_USERNAME = \"sa\"\n", "MSSQL_PASSWORD = os.environ.get('MSSQL_SA_PASSWORD')\n", "ADVANCED_PARAMETERS = \"Encrypt=yes;TrustServerCertificate=yes;Connection Timeout=30;\"\n", "MSSQL_URI = \"mssql+pyodbc:///?odbc_connect={}\".format(\n", " quote_plus(\n", " (\n", " f\"Driver={MSSQL_DRIVER};Server=tcp:{MSSQL_SERVER};\"\n", " f\"Database={MSSQL_DATABASE};Uid={MSSQL_USERNAME};\"\n", " f\"Pwd={MSSQL_PASSWORD};{ADVANCED_PARAMETERS}\"\n", " )\n", " )\n", ")\n", "print(MSSQL_URI)\n", "engine = create_engine(MSSQL_URI)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bNyTQREeNhJv", "outputId": "caed9f16-bed2-41f9-9637-d67bd87cc873" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[(1,)]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ ":3: RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to \"sqlalchemy<2.0\". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", " res = conn.execute(\"SELECT 1\")\n" ] } ], "source": [ "# another sanity check with sqlalchemy\n", "with engine.connect() as conn:\n", " res = conn.execute(\"SELECT 1\")\n", " print(res.fetchall())" ] }, { "cell_type": "markdown", "metadata": { "id": "rLDseL_FpTba" }, "source": [ "The `dataset_from_database` function, located in `pyqrlew.io.dataset`, enables the creation of a Dataset object from a SQLAlchemy engine.\n", "It takes the following parameters:\n", "- `name`: the dataset's name,\n", "- `engine`: the SQLAlchemy engine establishing a connection to the database,\n", "- `schema_name`: the optional name of the schema,\n", "- `ranges`: if set to True, we use the the minimum and maximum values of numeric and datetime columns for setting their bounds.\n", " **This may violates differential privacy** and should be use if these values are public.\n", " Otherwise, use `False`. In that case, the bounds have to be provided in the SQL query,\n", "- `possible_values_threshold`: an optional integer that serves as a threshold for identifying a column as categorical. If a column surpasses this threshold, its categories are published. Similar to the ranges parameter, exercising this option may compromise differential privacy." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "8mv1h-zHpTba" }, "outputs": [], "source": [ "from pyqrlew.io.dataset import dataset_from_database\n", "from pyqrlew import Dialect\n", "\n", "ds = dataset_from_database(\n", " name=\"census_table\",\n", " engine=engine,\n", " schema_name=\"census_schema\",\n", " ranges=True,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "EbybUFIHpTba" }, "source": [ "`qrlew` connects to the database via the SQLAlchemy engine and fetch the tables." ] }, { "cell_type": "markdown", "metadata": { "id": "wKo_hLzHpTba" }, "source": [ "The table `\"census_schema\".\"census_table\"` is represented as a `Table` variant of a `Relation`.\n", "This specific representation encapsulates all columns, their respective types, and optionally includes information about bounds and possible values.\n", "\n", "In the provided example, we observe the bounds specification for the integer column `age` and the declaration of possible values for the string column `gender`." ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 201 }, "id": "j2aeA1iwpTbb", "outputId": "3c656b4a-0dab-48e6-ba4b-b245ca827c40" }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "graph_8ozp\n", "\n", "\n", "\n", "graph_8ozp\n", "\n", "CENSUS_SCHEMA_CENSUS_TABLE size ∈ int{70}\n", "id = id ∈ int[1 70]\n", "first_name = first_name ∈ str\n", "last_name = last_name ∈ str\n", "age = age ∈ int[26 55]\n", "gender = gender ∈ str\n", "occupation = occupation ∈ str\n", "city = city ∈ str\n", "state = state ∈ str\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import graphviz\n", "display(graphviz.Source(ds.census_schema.census_table.relation().dot()))" ] }, { "cell_type": "markdown", "metadata": { "id": "HjtfgHGXpTbb" }, "source": [ "Any SQL query can be transformed into a `Relation`, a `qrlew` object which allows easly for query manipulation such as query translation to another dialect." ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 454 }, "id": "XFSjLgYYpTbb", "outputId": "4bd26cb3-c3eb-49da-dca5-8647a6d3fd99" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[34mWITH\u001b[0m\n", " map_u3eo (id, first_name, last_name, age, gender, occupation, city, state) \u001b[35mAS\u001b[0m (\u001b[35mSELECT\u001b[0m \"id\" \u001b[35mAS\u001b[0m id, \"first_name\" \u001b[35mAS\u001b[0m first_name, \"last_name\" \u001b[35mAS\u001b[0m last_name, \"age\" \u001b[35mAS\u001b[0m age, \"gender\" \u001b[35mAS\u001b[0m gender, \"occupation\" \u001b[35mAS\u001b[0m occupation, \"city\" \u001b[35mAS\u001b[0m city, \"state\" \u001b[35mAS\u001b[0m state FROM \"census_schema\".\"census_table\"),\n", " map_1_fm (id, first_name, last_name, age, gender, occupation, city, state) \u001b[35mAS\u001b[0m (\u001b[35mSELECT\u001b[0m TOP (10) \"id\" \u001b[35mAS\u001b[0m id, \"first_name\" \u001b[35mAS\u001b[0m first_name, \"last_name\" \u001b[35mAS\u001b[0m last_name, \"age\" \u001b[35mAS\u001b[0m age, \"gender\" \u001b[35mAS\u001b[0m gender, \"occupation\" \u001b[35mAS\u001b[0m occupation, \"city\" \u001b[35mAS\u001b[0m city, \"state\" \u001b[35mAS\u001b[0m state FROM \"map_u3eo\")\n", "\u001b[35mSELECT\u001b[0m TOP (10) * FROM \"map_1_fm\"\n" ] }, { "data": { "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idfirst_namelast_nameagegenderoccupationcitystate
01JohnDoe30MaleEngineerNew YorkNY
12JaneDoe28FemaleEngineerLos AngelesCA
23BobSmith45MaleAnalystChicagoIL
34AliceJohnson35FemaleData ScientistSan FranciscoCA
45TomBrown50MaleLawyerMiamiFL
56EmilyDavis32FemaleSoftware DeveloperSeattleWA
67CharlieMiller40MaleAccountantBostonMA
78EvaWilliams29FemaleMarketing ManagerAustinTX
89MikeJones38MaleAnalystDenverCO
910SophiaTaylor42FemaleAnalystAtlantaGA
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "text/plain": [ " id first_name last_name age gender occupation city \\\n", "0 1 John Doe 30 Male Engineer New York \n", "1 2 Jane Doe 28 Female Engineer Los Angeles \n", "2 3 Bob Smith 45 Male Analyst Chicago \n", "3 4 Alice Johnson 35 Female Data Scientist San Francisco \n", "4 5 Tom Brown 50 Male Lawyer Miami \n", "5 6 Emily Davis 32 Female Software Developer Seattle \n", "6 7 Charlie Miller 40 Male Accountant Boston \n", "7 8 Eva Williams 29 Female Marketing Manager Austin \n", "8 9 Mike Jones 38 Male Analyst Denver \n", "9 10 Sophia Taylor 42 Female Analyst Atlanta \n", "\n", " state \n", "0 NY \n", "1 CA \n", "2 IL \n", "3 CA \n", "4 FL \n", "5 WA \n", "6 MA \n", "7 TX \n", "8 CO \n", "9 GA " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "from pyqrlew.utils import print_query\n", "# write a PostgreSql query: LIMIT is not allowed in MsSql but it becomes TOP\n", "psql_query = \"SELECT * FROM census_schema.census_table LIMIT 10\"\n", "relation = ds.relation(psql_query)\n", "# we need to pass the MsSql dialect to to_query relation method\n", "mssql_query = relation.to_query(Dialect.MsSql)\n", "print_query(mssql_query)\n", "# execute\n", "results = pd.read_sql(mssql_query, engine)\n", "results" ] }, { "cell_type": "markdown", "metadata": { "id": "Zkh9DmjSIC7B" }, "source": [ "There are serveral differences between PostgreSql and MsSql dialects: `LIMIT` becomes `TOP` in the `SELECT` quantifier, `LN` which is natural log in PostgreSql becomes `LOG` in MsSql, `MD5(col)` PostgreSql function is tranlated as `CONVERT(VARCHAR(MAX), HASHBYTES('MD5', col, 2))` to name a few." ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 470 }, "id": "8_w3ISbcEm6g", "outputId": "eeed3d75-25f5-4069-867d-2b069968fb45" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[34mWITH\u001b[0m\n", " map_3l7g (age, nat_log_age, hashed_name) \u001b[35mAS\u001b[0m (\u001b[35mSELECT\u001b[0m \"age\" \u001b[35mAS\u001b[0m age, LOG(\"age\") \u001b[35mAS\u001b[0m nat_log_age, CONVERT(VARCHAR(MAX),\n", " H\u001b[35mAS\u001b[0mHBYTES('MD5', CONCAT(\"first_name\", \"last_name\")),\n", " 2) \u001b[35mAS\u001b[0m hashed_name FROM \"census_schema\".\"census_table\"),\n", " map_3_1r (age, nat_log_age, hashed_name) \u001b[35mAS\u001b[0m (\u001b[35mSELECT\u001b[0m TOP (10) \"age\" \u001b[35mAS\u001b[0m age, \"nat_log_age\" \u001b[35mAS\u001b[0m nat_log_age, \"hashed_name\" \u001b[35mAS\u001b[0m hashed_name FROM \"map_3l7g\")\n", "\u001b[35mSELECT\u001b[0m TOP (10) * FROM \"map_3_1r\"\n" ] }, { "data": { "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agenat_log_agehashed_name
0303.4011979FD9F63E0D6487537569075DA85A0C7F
1283.3322051E218214D6994742C9D543F8FBB5EE10
2453.806662103891BACA2751A856B094DB796E3FEE
3353.55534862360D09B6561E319B76DA8AE91DD526
4503.9120235306CF440C6634E13DBF38DE4DFDA04E
5323.465736D7E90AD028AFE50F5A0AEB8A7A6818A1
6403.68887983D1145CCB93A34EBBDBD410C3F9A5CB
7293.3672965436CDEC603390DE2E6C097FA7EBD065
8383.637586FC2BA1CF41ADAA8605FA0362C2E60AF1
9423.7376700E7479482648F5E1013A27599B3F9648
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ], "text/plain": [ " age nat_log_age hashed_name\n", "0 30 3.401197 9FD9F63E0D6487537569075DA85A0C7F\n", "1 28 3.332205 1E218214D6994742C9D543F8FBB5EE10\n", "2 45 3.806662 103891BACA2751A856B094DB796E3FEE\n", "3 35 3.555348 62360D09B6561E319B76DA8AE91DD526\n", "4 50 3.912023 5306CF440C6634E13DBF38DE4DFDA04E\n", "5 32 3.465736 D7E90AD028AFE50F5A0AEB8A7A6818A1\n", "6 40 3.688879 83D1145CCB93A34EBBDBD410C3F9A5CB\n", "7 29 3.367296 5436CDEC603390DE2E6C097FA7EBD065\n", "8 38 3.637586 FC2BA1CF41ADAA8605FA0362C2E60AF1\n", "9 42 3.737670 0E7479482648F5E1013A27599B3F9648" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "psql_query = \"SELECT age, LN(age) AS nat_log_age, MD5(CONCAT(first_name, last_name)) AS hashed_name FROM census_schema.census_table LIMIT 10\"\n", "relation = ds.relation(psql_query)\n", "\n", "mssql_query = relation.to_query(Dialect.MsSql)\n", "print_query(mssql_query)\n", "\n", "results = pd.read_sql(mssql_query, engine)\n", "results" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "id": "voEgsnbdLljf" }, "outputs": [], "source": [] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "myenv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 0 }