+
ksqlDB Internals
Module 1:
ksqlDB's Architecture
It's Not Exactly Simple
What If It Looked More Like Postgres?
How You Use It
Module 2:
How Stateless Operations Work
Streams
CREATE STREAM readings (
sensor VARCHAR KEY,
reading DOUBLE,
location VARCHAR
) WITH (
kafka_topic='readings',
value_format='json',
partitions=3
);
Rows
INSERT INTO readings (sensor, reading, location)
VALUES ('sensor-1', 45, 'wheel');
INSERT INTO readings (sensor, reading, location)
VALUES ('sensor-2', 41, 'motor');
INSERT INTO readings (sensor, reading, location)
VALUES ('sensor-1', 42, 'wheel');
INSERT INTO readings (sensor, reading, location)
VALUES ('sensor-3', 42, 'muffler');
...
Transforming a Stream
-- pq1
CREATE STREAM clean AS
SELECT sensor,
reading,
UCASE(location) AS location
FROM readings
EMIT CHANGES;
Filtering Rows Out of a Stream
-- pq1
CREATE STREAM clean AS
SELECT
sensor,
reading,
UCASE(location) AS location
FROM readings
EMIT CHANGES;
-- pq2
CREATE STREAM high_readings AS
SELECT sensor, reading, location
FROM clean
WHERE reading > 41
EMIT CHANGES;
Combining Many Operations Into One
-- pq1
CREATE STREAM high_pri AS
SELECT sensor,
reading,
UCASE(location) AS location
FROM readings
WHERE reading > 41
EMIT CHANGES;
Processing With Multiple Consumers
-- pq1
CREATE STREAM high_pri AS
SELECT sensor,
reading,
UCASE(location) AS location
FROM readings
WHERE reading > 41
EMIT CHANGES;
-- pq2
CREATE STREAM by_location AS
SELECT *
FROM high_pri
PARTITION BY location
EMIT CHANGES;
-- pq3
CREATE STREAM by_reading AS
SELECT *
FROM high_pri
PARTITION BY reading
EMIT CHANGES;
Module 3:
How Stateful Operations Work
Materializing a View From a Stream
-- pq1
CREATE TABLE avg_readings AS
SELECT sensor,
AVG(reading) AS avg
FROM readings
GROUP BY sensor
EMIT CHANGES;
Automatic Repartitioning
-- pq1
[[ internal ]]
-- pq2
CREATE TABLE part_avg AS
SELECT area,
AVG(reading) AS avg
FROM readings
GROUP BY area
EMIT CHANGES;
Replaying From Changelogs
-- pq1
CREATE TABLE part_avg AS
SELECT area,
AVG(reading) AS avg
FROM readings
GROUP BY area
EMIT CHANGES;
Replaying From a Compacted Topic
-- pq1
CREATE TABLE part_avg AS
SELECT area,
AVG(reading) AS avg
FROM readings
GROUP BY area
EMIT CHANGES;
Module 4:
How Streaming Joins Work
Stream-Table Joins: Inner
CREATE STREAM enriched_readings AS SELECT r.reading, r.area, b.brand_name FROM readings r INNER JOIN brands b ON b.sensor = r.sensor EMIT CHANGES;
Stream-table Joins: Left Outer
CREATE STREAM enriched_readings AS SELECT r.reading, r.area, b.brand_name FROM readings r LEFT OUTER JOIN brands b ON b.sensor = r.sensor EMIT CHANGES;
Preloading Table Data
CREATE STREAM enriched_readings AS SELECT r.reading, r.area, b.brand_name FROM readings r LEFT OUTER JOIN brands b ON b.sensor = r.sensor EMIT CHANGES;
Table-Table Joins: Inner
CREATE TABLE enriched_sensors AS SELECT o.owner, b.brand_name FROM owners o INNER JOIN brands b ON b.sensor = o.sensor EMIT CHANGES;
Module 5:
Scaling and Fault Tolerance
Scaling: 1x
Scaling: 2x
Scaling: 8x
Scaling with State
Module 6:
High Availability
High Availability