1. Repeated Payments (Stripe)
Problem: Identify any payments made at the same merchant with the same credit card and same amount within 10 minutes of each other, then return the total count of such repeats.
Sample dataset
CREATE TABLE transactions (
transaction_id INT,
merchant_id INT,
credit_card_id INT,
amount DECIMAL(10,2),
transaction_ts DATETIME
);
INSERT INTO transactions VALUES
(1, 101, 1, 100.00, '2022-09-25 12:00:00'),
(2, 101, 1, 100.00, '2022-09-25 12:08:00'),
(3, 101, 1, 100.00, '2022-09-25 12:28:00'),
(4, 102, 2, 300.00, '2022-09-25 12:00:00'),
(5, 102, 2, 400.00, '2022-09-25 14:00:00');
Approach
- Self-join
transactions
to itself onmerchant_id
,credit_card_id
, andamount
- In the join condition, ensure the second transaction’s timestamp is within 10 minutes after the first
SELECT COUNT(*) AS repeated_payment_count
FROM transactions t1
JOIN transactions t2
ON t1.merchant_id = t2.merchant_id
AND t1.credit_card_id = t2.credit_card_id
AND t1.amount = t2.amount
AND t2.transaction_ts BETWEEN t1.transaction_ts
AND t1.transaction_ts + INTERVAL 10 MINUTE;
2. Median Google Search Frequency (Google)
Problem: Given a summary table of yearly searches per user (with searches
and num_users
), report the median number of searches
Sample Dataset
CREATE TABLE search_summary (
searches INT,
num_users INT
);
INSERT INTO search_summary VALUES
(1, 2),
(2, 2),
(3, 3),
(4, 1);
Approach
- Use the ANSI SQL
PERCENTILE_CONT(0.5)
window function to compute the median across the distribution. - If unsupported, simulate by ordering all rows, computing row numbers and total count, then averaging the middle one or two values.
Solution
SELECT
PERCENTILE_CONT(0.5)
WITHIN GROUP (ORDER BY searches) AS median_searches
FROM search_summary;
3. Monthly Merchant Balance (Visa)
Problem: For each day, compute the cumulative balance of a merchant account, resetting to zero at the start of each month.
Sample dataset
CREATE TABLE merchant_tx (
transaction_id INT,
type VARCHAR(10), -- 'deposit' or 'withdrawal'
amount DECIMAL(10,2),
transaction_date DATETIME
);
INSERT INTO merchant_tx VALUES
(19153, 'deposit', 65.90, '2022-07-10 10:00:00'),
(53151, 'deposit', 178.55, '2022-07-08 10:00:00'),
(29776, 'withdrawal', 25.90, '2022-07-08 10:00:00'),
(16461, 'withdrawal', 45.99, '2022-07-08 10:00:00'),
(77134, 'deposit', 32.60, '2022-07-10 10:00:00');
Approach
- Map
deposit
to +amount
,withdrawal
to –amount
. - Partition by year-month and order by
transaction_date
. - Use a running sum window function that restarts at each month.
Solution
SELECT
DATE(transaction_date) AS day,
SUM(CASE WHEN type = 'deposit' THEN amount
ELSE -amount END)
OVER (PARTITION BY YEAR(transaction_date), MONTH(transaction_date)
ORDER BY transaction_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
AS cumulative_balance
FROM merchant_tx
ORDER BY day;

4. Server Utilization Time (Amazon AWS)
Problem: Servers log a start
and stop
event. Calculate total fleet uptime in full days.
Sample Table
CREATE TABLE server_logs (
server_id INT,
status_time DATETIME,
session_status VARCHAR(5) -- 'start' or 'stop'
);
INSERT INTO server_logs VALUES
(1, '2022-08-02 10:00:00', 'start'),
(1, '2022-08-04 10:00:00', 'stop'),
(2, '2022-08-17 10:00:00', 'start'),
(2, '2022-08-24 10:00:00', 'stop');
Approach
- Self-join or use LEAD() to pair each
start
with the nextstop
. - Compute the difference in days for each session.
- Sum across all sessions.
Solution (using LEAD)
WITH sessions AS (
SELECT
server_id,
status_time AS start_time,
LEAD(status_time) OVER
(PARTITION BY server_id ORDER BY status_time) AS stop_time,
session_status
FROM server_logs
)
SELECT
SUM(DATEDIFF(day, start_time, stop_time)) AS total_uptime_days
FROM sessions
WHERE session_status = 'start';
5. Uniquely Staffed Consultants (Accenture)
Problem: For each client, report the total consultants staffed and those exclusively staffed to only one client. DataLemur
Sample Tables
CREATE TABLE staffing (
employee_id INT,
engagement_id INT
);
INSERT INTO staffing VALUES
(1001, 1), (1001, 2), (1002, 1),
(1003, 3), (1004, 4);
CREATE TABLE engagements (
engagement_id INT,
client_name VARCHAR(100)
);
INSERT INTO engagements VALUES
(1, 'Dept of Defense'),
(2, 'Dept of Education'),
(3, 'Google'),
(4, 'Google');
Approach
- Join
staffing
→engagements
. - Group by
client_name
to count all consultants. - For exclusive count, filter those with COUNT(employee_id) = 1 in a subquery or use
CASE WHEN COUNT(DISTINCT engagement_id)=1
.
Solution
SELECT
e.client_name,
COUNT(s.employee_id) AS total_staffed,
SUM(CASE WHEN c.cnt = 1 THEN 1 ELSE 0 END) AS exclusive_staffed
FROM (
SELECT employee_id, COUNT(DISTINCT engagement_id) AS cnt
FROM staffing
GROUP BY employee_id
) c
JOIN staffing s ON s.employee_id = c.employee_id
JOIN engagements e ON e.engagement_id = s.engagement_id
GROUP BY e.client_name
ORDER BY e.client_name;
6. Event Friends Recommendation (Facebook)
Problem: Recommend friend pairs who are not already friends but have attended ≥2 of the same private events.
Sample Tables
CREATE TABLE friendships (
user_a_id INT,
user_b_id INT,
status ENUM('friends','not_friends')
);
INSERT INTO friendships VALUES
(111,333,'not_friends'),
(222,333,'not_friends'),
(333,222,'not_friends'),
(222,111,'friends'),
(111,222,'friends'),
(333,111,'not_friends');
CREATE TABLE attendance (
user_id INT,
event_id INT,
event_type ENUM('public','private'),
attendance_status ENUM('going','maybe','not_going'),
event_date DATE
);
INSERT INTO attendance VALUES
(111,567,'public','going','2022-07-12'),
(222,789,'private','going','2022-07-15'),
(333,789,'private','maybe','2022-07-15'),
(111,234,'private','not_going','2022-07-18'),
(222,234,'private','going','2022-07-18'),
(333,234,'private','going','2022-07-18');
Approach
- Filter
attendance
forevent_type='private'
andattendance_status='going'
. - Self-join on
attendance
to pair users byevent_id
. - Group pairs
(a,b)
and find those with COUNT ≥ 2. - Exclude existing
status='friends'
.
Solution
WITH common_events AS (
SELECT
LEAST(a.user_id, b.user_id) AS user_a,
GREATEST(a.user_id, b.user_id) AS user_b,
COUNT(DISTINCT a.event_id) AS cnt_events
FROM attendance a
JOIN attendance b
ON a.event_id = b.event_id
AND a.user_id < b.user_id
WHERE a.event_type = 'private'
AND a.attendance_status = 'going'
AND b.attendance_status = 'going'
GROUP BY 1,2
)
SELECT user_a AS user_a_id, user_b AS user_b_id
FROM common_events ce
LEFT JOIN friendships f
ON ce.user_a = f.user_a_id
AND ce.user_b = f.user_b_id
WHERE cnt_events >= 2
AND (f.status IS NULL OR f.status = 'not_friends')
ORDER BY user_a, user_b;
7. 3-Topping Pizzas (McKinsey)
Problem: From a toppings list, generate all 3-topping combinations, output their total cost, and sort by cost desc, then toppings asc.
Sample Table
CREATE TABLE toppings (
topping_name VARCHAR(255),
ingredient_cost DECIMAL(5,2)
);
INSERT INTO toppings VALUES
('Pepperoni', 0.50),
('Sausage', 0.70),
('Chicken', 0.55),
('Extra Cheese', 0.40);
Approach
- Self-join the table three times, ensuring
t1 < t2 < t3
lexicographically. - Compute
total_cost = t1.cost + t2.cost + t3.cost
. - Concatenate toppings in alphabetical order.
- Order by
total_cost DESC
, then toppingsASC
.
Solution
SELECT
CONCAT(t1.topping_name, ',', t2.topping_name, ',', t3.topping_name) AS pizza,
ROUND(t1.ingredient_cost + t2.ingredient_cost + t3.ingredient_cost, 2) AS total_cost
FROM toppings t1
JOIN toppings t2 ON t1.topping_name < t2.topping_name
JOIN toppings t3 ON t2.topping_name < t3.topping_name
ORDER BY total_cost DESC, pizza ASC;
8. Follow-Up AirPod Percentage (Apple)
Problem: What percentage of buyers purchased AirPods immediately after buying an iPhone? (Round to integer percent.)
Sample Table
CREATE TABLE purchases (
transaction_id INT,
customer_id INT,
product_name VARCHAR(50),
transaction_timestamp DATETIME
);
INSERT INTO purchases VALUES
(1, 101, 'iPhone', '2022-08-08 00:00:00'),
(2, 101, 'AirPods','2022-08-08 00:00:00'),
(5, 301, 'iPhone', '2022-09-05 00:00:00'),
(6, 301, 'iPad', '2022-09-06 00:00:00'),
(7, 301, 'AirPods','2022-09-07 00:00:00');
Approach
- Partition by
customer_id
, order bytransaction_timestamp
. - Use
LEAD(product_name)
to inspect the next purchase. - Count customers where
current='iPhone'
andnext='AirPods'
. - Divide by distinct iPhone buyers.
Solution
WITH seq AS (
SELECT
customer_id,
product_name,
LEAD(product_name) OVER
(PARTITION BY customer_id ORDER BY transaction_timestamp) AS next_prod
FROM purchases
)
SELECT
ROUND(
100.0 * SUM(CASE WHEN product_name='iPhone' AND next_prod='AirPods' THEN 1 ELSE 0 END)
/ COUNT(DISTINCT CASE WHEN product_name='iPhone' THEN customer_id END)
) AS follow_up_pct;
9. Marketing Touch Streak (Snowflake)
Problem: Identify contacts with ≥3 consecutive weeks of any marketing touch and at least one ‘trial_request’ touch. Return their emails.
Sample Tables
CREATE TABLE events (
event_id INT,
contact_id INT,
event_type STRING, -- e.g., 'webinar','trial_request',...
event_date DATE
);
INSERT INTO events VALUES
(1,1,'webinar','2022-04-17'),
(2,1,'trial_request','2022-04-23'),
(3,1,'whitepaper_download','2022-04-30'),
(4,2,'handson_lab','2022-04-19'),
(5,2,'trial_request','2022-04-23'),
(6,2,'conference_registration','2022-04-24'),
(7,3,'whitepaper_download','2022-04-30'),
(8,4,'trial_request','2022-04-30'),
(9,4,'webinar','2022-05-14');
CREATE TABLE contacts (
contact_id INT,
email VARCHAR(100)
);
INSERT INTO contacts VALUES
(1,'andy.markus@att.net'),
(2,'rajan.bhatt@capitalone.com'),
(3,'lissa_rogers@jetblue.com'),
(4,'kevinliu@square.com');
Approach
- Convert
event_date
to week numbers (e.g.,DATE_TRUNC('week', event_date)
). - Deduplicate touches per week.
- Use the Gaps & Islands trick: assign
ROW_NUMBER()
over weeks, subtract from week index to form islands Medium. - Filter islands of length ≥3 and
HAVING
seen at least onetrial_request
.
Solution
WITH weeks AS (
SELECT DISTINCT
contact_id,
DATE_TRUNC('week', event_date) AS wk,
MAX(CASE WHEN event_type='trial_request' THEN 1 ELSE 0 END)
OVER (PARTITION BY contact_id, DATE_TRUNC('week', event_date)) AS has_trial
FROM events
),
numed AS (
SELECT
contact_id,
wk,
ROW_NUMBER() OVER (PARTITION BY contact_id ORDER BY wk) AS rn
FROM weeks
),
islands AS (
SELECT
contact_id,
wk,
has_trial,
DATEADD(day, -rn, wk) AS grp
FROM numed
)
SELECT DISTINCT c.email
FROM (
SELECT
contact_id,
grp,
COUNT(*) AS streak_len,
MAX(has_trial) AS any_trial
FROM islands
GROUP BY contact_id, grp
HAVING COUNT(*) >= 3 AND MAX(has_trial) = 1
) s
JOIN contacts c ON c.contact_id = s.contact_id;
10. Bad Delivery Rate (DoorDash)
Problem: For users who signed up in June 2022, calculate the percentage of their orders in the first 14 days that had a bad experience—wrong/missing items, never received, or delivered >30 mins late. DataLemur
Sample Tables
CREATE TABLE orders (
order_id INT,
customer_id INT,
status VARCHAR(30), -- 'completed successfully', 'completed incorrectly', 'never_received'
order_ts DATETIME
);
INSERT INTO orders VALUES
(727424,8472,'completed successfully','2022-06-05 09:12:00'),
(242513,2341,'completed incorrectly','2022-06-05 14:40:00'),
(141367,1314,'completed incorrectly','2022-06-07 15:03:00'),
(582193,5421,'never_received','2022-07-07 15:22:00'),
(253613,1314,'completed successfully','2022-06-12 13:43:00');
CREATE TABLE delivery (
trip_id INT,
dasher_id INT,
est_deliv_ts DATETIME,
act_deliv_ts DATETIME
);
INSERT INTO delivery VALUES
(100463,101,'2022-06-05 09:42:00','2022-06-05 09:38:00'),
(100482,102,'2022-06-05 15:10:00','2022-06-05 15:46:00'),
(100362,101,'2022-06-07 15:33:00','2022-06-07 16:45:00'),
(100657,102,'2022-07-07 15:52:00', NULL),
(100213,103,'2022-06-12 14:13:00','2022-06-12 14:10:00');
CREATE TABLE customers (
customer_id INT,
signup_ts DATETIME
);
INSERT INTO customers VALUES
(8472,'2022-05-30 00:00:00'),
(2341,'2022-06-01 00:00:00'),
(1314,'2022-06-03 00:00:00'),
(1435,'2022-06-05 00:00:00'),
(5421,'2022-06-07 00:00:00');
Approach
- Filter
customers
forsignup_ts
in June 2022. - Join to
orders
, restrict to first 14 days per customer. - Left-join
delivery
to detect late (>est +30 mins), incorrect, or never-received. - Aggregate to compute
100 * bad_orders / total_orders
.
Solution
WITH first14 AS (
SELECT
o.customer_id,
o.order_id,
o.status,
d.est_deliv_ts,
d.act_deliv_ts
FROM customers c
JOIN orders o
ON c.customer_id = o.customer_id
AND o.order_ts BETWEEN c.signup_ts
AND DATEADD(day,14,c.signup_ts)
LEFT JOIN delivery d
ON o.trip_id = d.trip_id
),
flagged AS (
SELECT
customer_id,
CASE
WHEN status IN ('completed incorrectly','never_received') THEN 1
WHEN act_deliv_ts > DATEADD(minute,30,est_deliv_ts) THEN 1
ELSE 0
END AS is_bad
FROM first14
)
SELECT
ROUND(100.0 * SUM(is_bad) / COUNT(*), 2) AS bad_experience_pct
FROM flagged;