import gradio as gr import requests import re import os # Define the function to call the API API_KEY = os.getenv('API_KEY') sample_prompts = [["Ensure the `property_type` column contains only distinct values matching `['Residential', 'Commercial', 'Industrial']`."], ["For the `billing_amount` column: Ensure the sum is less than or equal to 5000 and the average is between 50 and 200."], ["For the `policy_number` field: Ensure all entries are unique and formatted as 'INS-XXXXXX'. Validate that this field does not contain any null values."], ["For field 'metadata': Ensure the field contains valid JSON objects and that parsing does not raise exceptions."], ["Ensure that the `score` column contains integer values only and that they are greater than or equal to zero."], ["For the `employee_id` field: Ensure that all values are unique and fall within the specified set of valid employee IDs."], ["Check that the total count of emails in the `emails` table equals the count of unique addresses in the `recipients` column."], ["For the `sender_id` and `receiver_id` columns: Ensure that each ID in `sender_id` exists in the `users` table and is unique across records."], ["For fields `likes` and `shares`: Ensure the sum of `likes` and `shares` does not exceed `engagement_score`. Ensure all fields are of type 'integer' and not null."], ["Ensure the `employee_id` column has distinct values and all fall within the range of 1 to 5000."], ["Validate that the `metadata` column contains valid JSON objects that adhere to the specified schema."], ["For the `email` field: Ensure it follows a valid email format and contains unique values. Confirm the field is required."], ] sample_ge = ["expect_column_values_to_be_in_set(column='property_type', value_set=['Residential', 'Commercial', 'Industrial'])", "expect_column_sum_to_be_between(column='billing_amount', min_value=0, max_value=5000),expect_column_mean_to_be_between(column='billing_amount', min_value=50, max_value=200)", "expect_column_values_to_be_unique(column='policy_number'),expect_column_values_to_match_regex(column='policy_number', regex=r'^INS-\d{8}$'),expect_column_values_to_not_be_null(column='policy_number')", "expect_column_values_to_be_json_parseable(column='metadata'),expect_column_values_to_not_be_null(column='metadata') ", "expect_column_values_to_be_of_type(column='score', type_='int'),expect_column_values_to_be_between(column='score', min_value=0, max_value=None)"] article_content = """

Key Features

Custom Validation Rules: Input any data quality validation rule, and the app will generate corresponding Great Expectations expectations.
Comprehensive Validation Support: From validating column values, sums, and averages, to ensuring unique values and matching regex patterns, this app supports a wide range of data quality checks.
User-Friendly Interface: Simply enter your validation rule prompt, and our app will convert it into an easy-to-implement Great Expectations rule.
Live Conversion: Adjust the parameters like temperature and min_p to fine-tune the conversion process, with real-time results.

""" Examples = {"Prompts": sample_prompts, "Great Expectations":sample_ge} def query_vllm(prompt, use_cache=True, temperature=0.1, min_p=0.1): api_url = "https://api.runpod.ai/v2/3xt35aqk9wnzjz/runsync"#"https://m5ypgg3uebl8r4hj.us-east-1.aws.endpoints.huggingface.cloud/v1/chat/completions" # "http://172.17.0.2:8000/v1/chat/completions" headers = {"Content-Type": "application/json", "Accept" : "application/json", 'Authorization': f'Bearer {API_KEY}'} payload = { "input":{ "prompt":prompt, "temperature": temperature, "min_p": min_p, } } try: response = requests.post(api_url, headers=headers, json=payload) response_data = response.json() # Check if "output" and "choices" are in the response data if "output" in response_data and len(response_data["output"]) > 0: raw_output = response_data["output"][0]["choices"][0]["tokens"][0] # Clean up the raw output (optional) cleaned_output = raw_output.strip() # cleaned_output = '\n'.join(line for line in cleaned_output.splitlines() if line.startswith('expect_') or line.startswith('#')).strip() cleaned_output = cleaned_output.replace('Cutting Knowledge Date: December 2023','') # Return the formatted output return f"Expectation:\n{cleaned_output}" else: # Handle missing 'output' or 'choices' key in the response return f"Error: {response_data.get('message', 'Unknown error')}" except Exception as e: return f"Error: {e}" # Create a Gradio interface interface = gr.Interface( fn=query_vllm, inputs=[ gr.Textbox( label="Prompt", placeholder="Describe your data quality rule(s). Example: 'Column `age` must be non-null and between 0 and 120.'", lines=6 ), gr.Checkbox(label="Use Cache", value=True), gr.Slider(label="Temperature", minimum=0.1, maximum=0.5, step=0.01, value=0.1), gr.Slider(label="Min P (μP sampling)", minimum=0.0, maximum=0.4, step=0.1, value=0.1), ], outputs="text", title="🚀 Data Quality → Great Expectations Converter", description=""" ### Welcome! 🎉 Quickly turn your **data validation rules** into **Great Expectations** suites—ready to plug into your pipelines. - ✅ Consistent, reliable checks - 🔌 Easy pipeline integration - 🧪 Clear, reproducible validations > **Heads-up:** first run might take a moment while the server wakes up. > We’re running in serverless mode to keep things light & cost-friendly. 🚀😊🙏 """, article=article_content, live=False, examples=sample_prompts, cache_examples=True, examples_per_page=6, theme="default", api_name="generate", analytics_enabled=True, css=""" /* Subtle card-like wrapper for the output */ .svelte-1ipelgc, .wrap .output-class { border-radius: 16px; box-shadow: 0 6px 24px rgba(0,0,0,0.06); } /* Make the title pop a bit */ h1, h5 { letter-spacing: 0.2px; } /* Nicer spacing for the description list */ .prose ul { margin-top: 0.5rem; } .prose blockquote { border-left: 4px solid rgba(0,0,0,0.08); padding-left: 0.8rem; color: rgba(0,0,0,0.7); } """ ) # Launch the Gradio app if __name__ == "__main__": interface.launch(share=True)