• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jorvik-io / jorvik / 17895240058

21 Sep 2025 03:02PM UTC coverage: 91.564% (-2.6%) from 94.155%
17895240058

Pull #53

github

web-flow
Merge 6bce7887b into ef14347f4
Pull Request #53: Extend Transactions example

35 of 83 new or added lines in 5 files covered. (42.17%)

2 existing lines in 2 files now uncovered.

1487 of 1624 relevant lines covered (91.56%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

30.36
/examples/sample_data_generator.py
1
# This Python script generates 2 sample datasets: customers and transactions to demonstrate the examples.
2
import pandas as pd
1✔
3
import numpy as np
1✔
4
from datetime import datetime
1✔
5
import sqlite3
1✔
6
from pathlib import Path
1✔
7
import tempfile
1✔
8
import shutil
1✔
9

10
# Modify these constants to control data volume and location
11
N_CUSTOMERS = 100
1✔
12
N_TRANSACTIONS = 100_000
1✔
13
CUSTOMERS_PATH = '/tmp/sources/customers.csv'
1✔
14
TRANSACTIONS_PATH = '/tmp/sources/transactions_db.sqlite'
1✔
15
DBFS_PATH = '/dbfs/tmp/sources'  # For Databricks
1✔
16

17
def random_dates(start: datetime, end: datetime, n: int) -> list[datetime]:
1✔
18
    """Generates a list of random dates between start and end"""
NEW
19
    start_u = start.timestamp()
×
NEW
20
    end_u = end.timestamp()
×
NEW
21
    return [datetime.fromtimestamp(np.random.uniform(start_u, end_u)) for _ in range(n)]
×
22

23
def generate_customers(num_rows: int = N_CUSTOMERS) -> pd.DataFrame:
1✔
24
    """Generates a DataFrame of dummy customer data"""
NEW
25
    first_names = [
×
26
        "John", "Jane", "Michael", "Emily", "David", "Sarah", "Chris", "Jessica",
27
        "Daniel", "Laura", "James", "Olivia", "Matthew", "Emma", "Joshua", "Sophia"
28
    ]
NEW
29
    last_names = [
×
30
        "Smith", "Johnson", "Brown", "Williams", "Jones", "Garcia", "Miller",
31
        "Davis", "Rodriguez", "Martinez", "Hernandez", "Lopez", "Gonzalez", "Wilson"
32
    ]
NEW
33
    cities = ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix",
×
34
              "Philadelphia", "San Antonio", "San Diego", "Dallas", "San Jose"]
35

NEW
36
    start_date = datetime(2022, 1, 1, 0, 0, 0)
×
NEW
37
    end_date = datetime(2022, 6, 1, 0, 0, 0)
×
38

NEW
39
    data = []
×
NEW
40
    for customer_id in range(1, num_rows + 1):
×
NEW
41
        first = np.random.choice(first_names)
×
NEW
42
        last = np.random.choice(last_names)
×
NEW
43
        email = f"{first.lower()}.{last.lower()}@mail.com"
×
NEW
44
        age = np.random.randint(15, 80)
×
NEW
45
        city = np.random.choice(cities)
×
NEW
46
        reg_date = random_dates(start_date, end_date, 1)[0].date()
×
NEW
47
        data.append((customer_id, f"{first} {last}", email, age, city, reg_date))
×
48

NEW
49
    customers = pd.DataFrame(data, columns=["customer_id", "name", "email", "age", "city", "registration_date"])
×
NEW
50
    customers['customer_id'] = 'c_' + customers['customer_id'].astype(str)
×
NEW
51
    return customers
×
52

53
def generate_transactions(num_rows: int = N_TRANSACTIONS, n_customers: int = N_CUSTOMERS) -> pd.DataFrame:
1✔
54
    """Generates a DataFrame of dummy transaction data"""
NEW
55
    start_date = datetime(2022, 1, 1, 0, 0, 0)
×
NEW
56
    end_date = datetime(2025, 6, 1, 0, 0, 0)
×
57

NEW
58
    data = {
×
59
        "transaction_id": np.arange(1, num_rows + 1),
60
        "customer_id": np.random.randint(1, n_customers + 1, size=num_rows),
61
        "product_id": np.random.randint(1, 21, size=num_rows),
62
        "quantity": np.random.randint(1, 101, size=num_rows),
63
        "price": np.round(np.random.uniform(10.0, 100.0, size=num_rows), 2),
64
        "timestamp": random_dates(start_date, end_date, num_rows)
65
    }
66

NEW
67
    transactions = pd.DataFrame(data)
×
NEW
68
    transactions['transaction_id'] = 't_' + transactions['transaction_id'].astype(str)
×
NEW
69
    transactions['customer_id'] = 'c_' + transactions['customer_id'].astype(str)
×
NEW
70
    transactions['product_id'] = 'p_' + transactions['product_id'].astype(str)
×
NEW
71
    return transactions
×
72

73
def save_csv_to_dbfs(df: pd.DataFrame, filename: str, dbfs_path: str = DBFS_PATH):
1✔
74
    """Saves data in df (Pandas DataFrame) to DBFS path as CSV"""
NEW
75
    path = Path(dbfs_path)
×
NEW
76
    path.mkdir(parents=True, exist_ok=True)
×
NEW
77
    df.to_csv(f"{dbfs_path}/{filename}.csv", index=False)
×
78

79
def save_sqlite_to_dbfs(df: pd.DataFrame, filename: str, dbfs_path: str = DBFS_PATH):
1✔
80
    """Saves data in df (Pandas DataFrame) to DBFS path as SQLite database"""
NEW
81
    path = Path(dbfs_path)
×
NEW
82
    path.mkdir(parents=True, exist_ok=True)
×
83

NEW
84
    with tempfile.NamedTemporaryFile(delete=False, suffix=".sqlite") as tmp_file:
×
NEW
85
        temp_path = Path(tmp_file.name)
×
86

87
    # sqlite connector cannot write directly to DBFS. Write to a temp file and copy.
NEW
88
    with sqlite3.connect(temp_path) as conn:
×
NEW
89
        df.to_sql('transactions', conn, if_exists='replace', index=False)
×
NEW
90
    dest_path = f"{dbfs_path}/{filename}.sqlite"
×
NEW
91
    shutil.copy(temp_path, dest_path)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc