// EMBEDDED 1:1 (preferred for small, tightly coupled data)
// users collection
{
"_id": ObjectId("u1"),
"username": "mayur",
"email": "mayur@example.com",
"profile": { // embedded 1:1
"fullName": "Mayur Badodiya",
"bio": "Full Stack Developer",
"location": "Mumbai",
"website": "https://mayur.dev",
"joinedAt": ISODate("2023-01-15")
}
}
// Single query gets user + profile ✅
// REFERENCED 1:1 (for sensitive or independently accessed data)
// users collection
{ "_id": ObjectId("u1"), "username": "mayur", "profileId": ObjectId("p1") }
// user_payroll collection (sensitive, separate access control)
{
"_id": ObjectId("p1"),
"userId": ObjectId("u1"),
"salary": 150000,
"bankAccount": "XXXX1234", // sensitive!
"pf": 18000,
"tds": 15000
}
// Query: separate query needed (or $lookup in aggregation)
// Bidirectional reference (link both ways)
db.employees.updateOne({ _id: empId }, { $set: { payrollId: payrollId } });
db.payroll.updateOne({ _id: payrollId }, { $set: { employeeId: empId } });
Why it matters: 1:1 modeling tests your ability to make the embed vs reference decision which has significant performance implications. Shows schema design maturity.
Real applications: User + profile (embed), employee + payroll (reference for security), product + extended specs (embed if small, reference if large).
Common mistakes: Assuming 1:1 always means embed — if the related document is 100KB (large binary, detailed specs), embedding it into every parent document bloats all reads unnecessarily.
// APPROACH 1: Embed array (one-to-FEW)
// Best when: N < 100, always accessed with parent, no independent queries
{
"_id": ObjectId("o1"),
"customerId": ObjectId("c1"),
"items": [ // embedded array (N is small: 1-100 items)
{ "productId": ObjectId("p1"), "name": "Laptop", "qty": 1, "price": 75000 },
{ "productId": ObjectId("p2"), "name": "Mouse", "qty": 2, "price": 800 }
],
"total": 76600
}
// APPROACH 2: Reference parent (one-to-MANY)
// Best when: N is large or unbounded, children queried independently
// blog_posts collection
{ "_id": ObjectId("bp1"), "title": "Intro to MongoDB", "commentCount": 5000 }
// comments collection (each references the post)
{ "_id": ObjectId("c1"), "postId": ObjectId("bp1"), "user": "Alice", "text": "Great!" }
{ "_id": ObjectId("c2"), "postId": ObjectId("bp1"), "user": "Bob", "text": "Helpful!" }
// Index: db.comments.createIndex({ postId: 1, createdAt: -1 })
// Query: paginated comments for a post
db.comments.find({ postId: postId }).sort({ createdAt: -1 }).limit(20);
// APPROACH 3: Hybrid (summary in parent, full in children)
{
"_id": ObjectId("bp1"),
"title": "Intro to MongoDB",
"commentCount": 5000,
"latestComments": [ // last 3 for preview
{ "user": "Carol", "text": "Excellent!", "date": ISODate("...") }
]
}
// Full comments via separate query to comments collection
Why it matters: 1:N is the most common relationship type and the core of all MongoDB schema design interviews. Choosing the wrong approach leads to 16 MB document explosions or N+1 query problems.
Real applications: Orders with items (embed, bounded), posts with comments (reference, unbounded), users with orders (reference, large N).
Common mistakes: Embedding all children without considering growth — a "one-to-few" design becomes "one-to-many" as the application scales, hitting document size limits in production.
// M:N APPROACH 1: Embed arrays (for small, bounded sets)
// Students and courses (each student in < 20 courses)
db.students.insertOne({
_id: ObjectId("s1"),
name: "Mayur",
enrolledCourses: [ObjectId("c1"), ObjectId("c2"), ObjectId("c3")]
});
// Find student's courses (array of ObjectIds → $lookup)
db.students.aggregate([
{ $match: { _id: studentId } },
{ $lookup: { from: "courses", localField: "enrolledCourses", foreignField: "_id", as: "courses" } }
]);
// M:N APPROACH 2: Junction collection (for large M:N with metadata)
// Users and products (many users like many products)
db.likes.insertOne({
_id: ObjectId("..."),
userId: ObjectId("u1"),
productId: ObjectId("p1"),
likedAt: ISODate("2026-04-01"),
rating: 4 // relationship attribute!
});
// Indexes for both directions
db.likes.createIndex({ userId: 1, productId: 1 }, { unique: true });
db.likes.createIndex({ productId: 1, likedAt: -1 });
// Query: products liked by a user
db.likes.find({ userId: userId }).sort({ likedAt: -1 });
// Query: users who liked a product (with user info via $lookup)
db.likes.aggregate([
{ $match: { productId: productId } },
{ $lookup: { from: "users", localField: "userId", foreignField: "_id", as: "user" } },
{ $unwind: "$user" },
{ $project: { "user.name": 1, likedAt: 1 } }
]);
Why it matters: M:N modeling is a key interview challenge that reveals your ability to balance query efficiency vs write complexity vs data integrity.
Real applications: Users with roles, students with courses, products with categories, posts with tags, actors in movies.
Common mistakes: Using embedded arrays for genuinely large M:N relationships — a user with 10,000 liked products can't have them all embedded. Use a junction collection with proper indexes.
$lookup. This is useful when: (1) both parent-to-child and child-to-parent queries are frequent, (2) the relationship array is small and bounded, (3) you want to avoid join-style aggregations for common queries. The trade-off: write complexity doubles — adding or removing a relationship requires updating two documents, which should be done atomically in a transaction to prevent inconsistency.
// TWO-WAY REFERENCE: Author-Books relationship
// books collection (stores authorId reference)
{
"_id": ObjectId("b1"),
"title": "Effective Java",
"authorId": ObjectId("a1"), // reference to author
"year": 2018
}
// authors collection (stores array of book _ids)
{
"_id": ObjectId("a1"),
"name": "Joshua Bloch",
"bookIds": [ObjectId("b1"), ObjectId("b2")], // reference to books
"bookCount": 2
}
// Query: books by an author (uses embedded array — no join!)
const author = db.authors.findOne({ name: "Joshua Bloch" });
db.books.find({ _id: { $in: author.bookIds } }); // fast!
// Query: author of a book (uses embedded reference — no join!)
const book = db.books.findOne({ title: "Effective Java" });
db.authors.findOne({ _id: book.authorId }); // fast!
// Adding a new book (must update BOTH atomically)
const session = client.startSession();
await session.withTransaction(async () => {
const book = await db.books.insertOne({ title: "New Book", authorId: authorId }, { session });
await db.authors.updateOne(
{ _id: authorId },
{ $push: { bookIds: book.insertedId }, $inc: { bookCount: 1 } },
{ session }
);
});
Why it matters: Shows understanding of bidirectional efficiency vs write complexity trade-offs and the need for atomic operations when maintaining consistency across documents.
Real applications: Friend relationships (user A friends B → B's friendList gains A), author-book catalog, employee-department with frequent queries in both directions.
Common mistakes: Not using transactions for two-way reference updates — if the second update fails, you have a one-sided reference that causes data inconsistency.
// ARRAY OF ANCESTORS pattern (best for reads)
// Category tree:
// Electronics → Computers → Laptops → Gaming Laptops
// categories collection
{ _id: "electronics", name: "Electronics", ancestors: [], level: 0 }
{ _id: "computers", name: "Computers", ancestors: ["electronics"], level: 1 }
{ _id: "laptops", name: "Laptops", ancestors: ["electronics", "computers"], level: 2 }
{ _id: "gaming-laptops", name: "Gaming Laptops", ancestors: ["electronics", "computers", "laptops"], level: 3 }
// Query: all products under "computers" (all descendants)
db.categories.find({ ancestors: "computers" });
// Returns: computers, laptops, gaming-laptops and all their children
// Single query! No recursive lookups!
// Query: full breadcrumb path for "gaming-laptops"
const cat = db.categories.findOne({ _id: "gaming-laptops" });
db.categories.find({ _id: { $in: cat.ancestors } }).sort({ level: 1 });
// Returns ancestor chain for breadcrumb display
// Query: immediate children of "computers"
db.categories.find({ ancestors: { $all: ["computers"], $size: /* level + 1 */ } });
// More precise: find docs where computers is the LAST ancestor
db.categories.find({ "ancestors.2": { $exists: false }, ancestors: "computers" });
// Index on ancestors array (multikey)
db.categories.createIndex({ ancestors: 1 });
db.categories.createIndex({ level: 1, ancestors: 1 });
Why it matters: Tree queries are a complex but realistic interview topic, especially for e-commerce, CMS, and org chart systems. Shows knowledge beyond basic CRUD.
Real applications: E-commerce category trees (finding all products in "Electronics" and all its subcategories), org chart queries (all reports under a manager), comment threading.
Common mistakes: Using recursive application-level fetching for tree traversal — each level requires a new query, resulting in N queries for an N-level tree (N+1 problem).
// PRICE SNAPSHOT — denormalize at time of purchase
// (product price may change in the future; order must preserve historical price)
db.orders.insertOne({
"_id": ObjectId("..."),
"customerId": userId,
"items": [{
"productId": ObjectId("p1"),
"name": "MacBook Pro", // denormalized snapshot
"price": 200000, // price AT TIME OF PURCHASE (not live price)
"qty": 1,
"category": "Laptops" // denormalized for reporting
}],
"total": 200000,
"createdAt": new Date()
});
// Even if product is deleted or price changes, order is accurate
// CATEGORY PATH — denormalize for product display
{
"_id": ObjectId("prod1"),
"name": "Gaming Laptop",
"price": 150000,
"categoryId": ObjectId("cat1"),
"categoryPath": "Electronics / Computers / Laptops", // denormalized
"categoryIds": [ObjectId("root"), ObjectId("cat2"), ObjectId("cat1")]
}
// AUTHOR INFO in posts — denormalized for feed display
{
"_id": ObjectId("post1"),
"title": "MongoDB Tips",
"authorId": ObjectId("u1"),
"author": { // denormalized subset
"name": "Mayur",
"avatar": "https://cdn.example.com/avatars/mayur.jpg"
}
}
Why it matters: Denormalization is a key tool in MongoDB schema optimization. Shows ability to make informed trade-offs between consistency and performance.
Real applications: Price snapshots in orders (legal requirement), author info in posts (avoid join on every feed request), product category paths (avoid tree traversal for breadcrumbs).
Common mistakes: Denormalizing highly volatile data (user's current address) — every address change requires updating hundreds of order documents. Only denormalize stable or snapshot data.
// EMBEDDED TAG ARRAYS (most common approach)
// posts collection
{
"_id": ObjectId("p1"),
"title": "MongoDB Indexing Guide",
"body": "...",
"tags": ["mongodb", "database", "performance", "indexing"],
"tagCount": 4
}
// Create multikey index on tags
db.posts.createIndex({ tags: 1 });
// Query: find all posts with "mongodb" tag
db.posts.find({ tags: "mongodb" }); // uses multikey index!
// Query: posts with ALL of these tags
db.posts.find({ tags: { $all: ["mongodb", "indexing"] } });
// Query: posts with ANY of these tags
db.posts.find({ tags: { $in: ["mongodb", "nosql"] } });
// Tag statistics (count posts per tag)
db.posts.aggregate([
{ $unwind: "$tags" },
{ $group: { _id: "$tags", count: { $sum: 1 } } },
{ $sort: { count: -1 } },
{ $limit: 20 } // top 20 tags
]);
// Separate tags collection (for metadata: description, image, related tags)
{
"_id": "mongodb",
"description": "MongoDB is a NoSQL document database...",
"relatedTags": ["nosql", "database", "atlas"],
"postCount": 1245 // computed field updated on write
}
// Enforce tag existence (optional)
db.posts.createIndex({ tags: 1 }, {
partialFilterExpression: { tags: { $type: "array", $not: { $size: 0 } } }
});
Why it matters: Tag systems are in virtually every application. Shows ability to choose the right relationship model for actual query patterns rather than applying generic M:N handling.
Real applications: Blog post tags, product categories, e-commerce product attributes, GitHub repo topics, recipe ingredient lists.
Common mistakes: Creating a full junction collection for tags when embedded arrays work perfectly — over-engineering a simple use case with unnecessary complexity.
$lookup is MongoDB's join mechanism, relying on it heavily is considered an anti-pattern in MongoDB because: (1) MongoDB is designed for locality of data — related data should be stored together, (2) $lookup cannot use indexes on the local collection's joined field as efficiently as the foreign collection's field, (3) across shards, $lookup requires scatter-gather operations (cannot be routed to a single shard), (4) it adds latency proportional to the number of input documents. The MongoDB recommendation is to embed data that is always accessed together rather than constantly joining it.
// ❌ LOOKUP ANTI-PATTERN: Joining on every API request
// App fetches user profile + address + preferences + subscription
// This runs 4 $lookup operations per user profile load
db.users.aggregate([
{ $match: { _id: userId } },
{ $lookup: { from: "addresses", localField: "_id", foreignField: "userId", as: "address" } },
{ $lookup: { from: "preferences", localField: "_id", foreignField: "userId", as: "prefs" } },
{ $lookup: { from: "subscriptions", localField: "_id", foreignField: "userId", as: "sub" } },
{ $lookup: { from: "paymentMethods", localField: "_id", foreignField: "userId", as: "payment" } }
]);
// 4 joins per request × 10K req/second = massive overhead!
// ✅ BETTER: Embed data that is always accessed together
{
"_id": ObjectId("u1"),
"name": "Mayur",
"email": "mayur@example.com",
"address": { // embedded (always shown with profile)
"city": "Mumbai",
"pincode": "400001"
},
"preferences": { // embedded (always loaded with user)
"theme": "dark",
"language": "en"
}
// subscription and payment: separate documents (different access pattern, sensitive)
}
// Single document read — no join needed!
// When $lookup IS appropriate:
// - Rarely accessed combined views (admin reports)
// - Aggregation queries (not hot user-facing paths)
// - Data that changes independently at different rates
Why it matters: Knowing when NOT to use $lookup demonstrates mature MongoDB design philosophy. Over-reliance on joins indicates a developer is still thinking relationally.
Real applications: Every high-traffic API endpoint that powers user-facing features should avoid repeated $lookup — embed the data that belongs together. Reserve $lookup for analytics pipelines.
Common mistakes: Normalizing MongoDB schemas like SQL tables "for data integrity" and then using $lookup on every request — this gives you the worst of both worlds: no SQL ACID and no MongoDB performance.
// NATIVE TIME SERIES COLLECTIONS (MongoDB 5.0+)
db.createCollection("sensorReadings", {
timeseries: {
timeField: "timestamp", // required: the time field
metaField: "sensorId", // field for grouping (the entity ID)
granularity: "minutes" // "seconds" | "minutes" | "hours"
},
expireAfterSeconds: 7776000 // TTL: 90 days
});
// Insert measurements (same as regular insert)
db.sensorReadings.insertMany([
{ timestamp: new Date(), sensorId: "temp001", temperature: 23.5, humidity: 65 },
{ timestamp: new Date(), sensorId: "temp001", temperature: 23.7, humidity: 64 }
]);
// MongoDB automatically buckets by time + sensorId for optimal storage
// Efficient time-range query
db.sensorReadings.find({
sensorId: "temp001",
timestamp: { $gte: ISODate("2026-04-05T09:00:00"), $lte: ISODate("2026-04-05T17:00:00") }
}).sort({ timestamp: 1 });
// Aggregation on time series
db.sensorReadings.aggregate([
{ $match: { sensorId: "temp001", timestamp: { $gte: yesterday } } },
{ $group: {
_id: { $dateTrunc: { date: "$timestamp", unit: "hour" } },
avgTemp: { $avg: "$temperature" },
maxTemp: { $max: "$temperature" }
}},
{ $sort: { "_id": 1 } }
]);
// Sensors metadata (separate entity collection)
{
"_id": "temp001",
"location": "Server Room A",
"type": "temperature",
"unit": "celsius",
"installDate": ISODate("2025-01-01")
}
Why it matters: Time-series data is one of the fastest-growing data types in modern applications. Native Time Series Collections (MongoDB 5.0+) are a key feature gap that shows MongoDB vs InfluxDB/TimescaleDB knowledge.
Real applications: IoT sensor networks, stock/crypto price feeds, DevOps monitoring (CPU, memory, latency over time), user activity tracking.
Common mistakes: Using regular collections for high-throughput time-series data instead of time series collections — native collections provide 10x+ better compression and query performance.
// PATTERN 1: Type discriminator reference
// comments collection — comments on either posts or videos
{
"_id": ObjectId("c1"),
"userId": ObjectId("u1"),
"text": "Great content!",
"targetType": "post", // discriminator
"targetId": ObjectId("p1") // reference (which collection = targetType)
}
{
"_id": ObjectId("c2"),
"userId": ObjectId("u1"),
"text": "Amazing video!",
"targetType": "video",
"targetId": ObjectId("v1")
}
// Application: route query based on targetType
const comment = await db.comments.findOne({ _id: commentId });
const target = comment.targetType === "post"
? await db.posts.findOne({ _id: comment.targetId })
: await db.videos.findOne({ _id: comment.targetId });
// PATTERN 2: Union reference (explicit optional fields)
{
"_id": ObjectId("c3"),
"userId": ObjectId("u1"),
"text": "Interesting!",
"postId": ObjectId("p1"), // null if on a video
"videoId": null // null if on a post
}
// PATTERN 3: Unified parent collection approach
// All commentable content in one collection
{ _id: ObjectId("..."), type: "post", title: "...", content: "..." }
{ _id: ObjectId("..."), type: "video", title: "...", url: "..." }
// comments.targetId always points to this single collection
Why it matters: Polymorphic relationships appear in real-world systems and test your ability to design flexible, maintainable schemas for complex domain models.
Real applications: Notification systems (notify about likes on posts, videos, or comments), like/reaction systems on multiple content types, tagging across different entities.
Common mistakes: Not indexing { targetType: 1, targetId: 1 } — fetching all comments for an entity without this compound index causes full collection scans.
// M:N with attributes: Students enrolled in Courses
// students collection
{ _id: ObjectId("s1"), name: "Mayur", email: "mayur@example.com" }
// courses collection
{ _id: ObjectId("c1"), name: "MongoDB Mastery", instructor: "Prof. John", credits: 4 }
// enrollments collection (junction with attributes)
{
"_id": ObjectId("e1"),
"studentId": ObjectId("s1"), // M side reference
"courseId": ObjectId("c1"), // N side reference
// Relationship attributes:
"enrolledAt": ISODate("2026-01-15"),
"grade": null, // updated after completion
"status": "active", // "active" | "completed" | "dropped"
"paymentId": ObjectId("pay1"), // payment made for this enrollment
"completedAt": null
}
// Indexes for both directions + filtering
db.enrollments.createIndex({ studentId: 1, status: 1 }); // student's courses
db.enrollments.createIndex({ courseId: 1, enrolledAt: -1 }); // course's students
db.enrollments.createIndex({ studentId: 1, courseId: 1 }, { unique: true }); // no duplicate
// Query: all active courses for a student (with course details)
db.enrollments.aggregate([
{ $match: { studentId: studentId, status: "active" } },
{ $lookup: { from: "courses", localField: "courseId", foreignField: "_id", as: "course" } },
{ $unwind: "$course" }
]);
Why it matters: Identifies whether you know when a simple embedded array is insufficient and a full junction collection is required. Relationship attributes are a clear indicator.
Real applications: E-learning enrollments (enrollment date, grade, progress), project memberships (role: lead/member, joined date), order items (quantity, price at purchase, discount applied).
Common mistakes: Trying to push relationship attributes into the array elements on both sides — this creates data duplication and consistency issues when relationship attributes change.
userId in orders), (2) Both fields in compound indexes for junction collections (e.g., { userId: 1 } and { productId: 1 } in likes), (3) The localField referenced in $lookup's foreignField. Without these indexes, what looks like an efficient reference schema degrades into full collection scans on every join operation.
// Relationship indexes — non-negotiable for production!
// One-to-many: orders belong to users
db.orders.createIndex({ userId: 1, createdAt: -1 }); // query user's orders by date
db.orders.createIndex({ userId: 1, status: 1 }); // query active orders for user
// Many-to-many junction: likes (userId + productId)
db.likes.createIndex({ userId: 1, productId: 1 }, { unique: true }); // dedup + user query
db.likes.createIndex({ productId: 1, createdAt: -1 }); // product's likers
// Comment polymorphism
db.comments.createIndex({ targetType: 1, targetId: 1, createdAt: -1 });
// Social: follows
db.follows.createIndex({ followerId: 1 }); // who am I following?
db.follows.createIndex({ followeeId: 1 }); // who follows me?
// Verify indexes are used with explain
db.orders.find({ userId: userId }).explain("executionStats");
// Check: "winningPlan.stage": "IXSCAN" (not "COLLSCAN"!)
// Index on $lookup foreignField is critical
// $lookup from orders joining on userId — must have userId indexed!
db.users.aggregate([{
$lookup: {
from: "orders",
localField: "_id", // user _id
foreignField: "userId", // orders.userId MUST BE INDEXED
as: "orders"
}
}]);
Why it matters: Many developers design good relationship schemas but forget to index the relationship fields — making all the carefully designed references perform poorly in production.
Real applications: A social media app without an index on followerId in the follows collection performs a full scan of 100M rows every time a user's follow list is loaded.
Common mistakes: Creating the data model correctly but not adding the corresponding indexes — this makes every $lookup and relationship query a full collection scan.