const mongoose = require('mongoose');
// Connect
await mongoose.connect('mongodb://localhost:27017/myapp', {
serverSelectionTimeoutMS: 5000
});
// Define Schema
const userSchema = new mongoose.Schema({
name: { type: String, required: true, trim: true },
email: { type: String, required: true, unique: true, lowercase: true },
age: { type: Number, min: 0, max: 120 },
role: { type: String, enum: ['user','admin'], default: 'user' },
createdAt: { type: Date, default: Date.now }
});
// Create Model
const User = mongoose.model('User', userSchema);
// Use Model
const user = await User.create({ name: 'Mayur', email: 'mayur@test.com' });
Why it matters: Mongoose is used in the vast majority of Node.js + MongoDB applications. Backend interviews at companies using the MERN stack will almost certainly ask Mongoose questions.
Real applications: Express.js APIs use Mongoose models to enforce data structure, validate input, and automatically populate related documents in a clean, predictable way.
Common mistakes: Opening a new Mongoose connection on every request instead of reusing the connection pool — always connect once on startup and reuse the connection throughout the application lifecycle.
String, Number, Date, Buffer, Boolean, Mixed (any type), ObjectId, Array, Decimal128, Map. Each type supports built-in validators: String has minlength, maxlength, match (regex), enum; Number has min, max; all types support required and validate (custom function).
const productSchema = new mongoose.Schema({
name: { type: String, required: [true, 'Name is required'],
minlength: 3, maxlength: 100, trim: true },
price: { type: Number, required: true, min: [0, 'Price must be positive'] },
category: { type: String, enum: ['electronics','clothing','food'], required: true },
email: { type: String, match: [/^[\w.-]+@[\w.-]+\.\w+$/, 'Invalid email'] },
sku: { type: String, unique: true, sparse: true },
// Custom validator
phoneNumber: {
type: String,
validate: {
validator: (v) => /^\d{10}$/.test(v),
message: 'Phone must be 10 digits'
}
}
});
Why it matters: Schema-level validation is the first line of defense against bad data in production MongoDB applications — it enforces invariants before data reaches the database.
Real applications: User registration schemas validate email format, password minimum length, and role enum values, preventing invalid data from being saved without manual application-level checks.
Common mistakes: Relying solely on frontend validation and skipping Mongoose validation — always validate server-side since clients can bypass frontend checks.
pre) or after (post) model operations. Document middleware applies to: save, validate, remove, deleteOne. Query middleware applies to: find, findOne, updateOne, deleteOne, etc. Use this in document middleware to access the document; use this in query middleware to access the query. Async hooks use async/await.
const userSchema = new mongoose.Schema({ ... });
// Pre-save: hash password before saving
userSchema.pre('save', async function(next) {
if (!this.isModified('password')) return next();
this.password = await bcrypt.hash(this.password, 12);
next();
});
// Pre-find: exclude deleted documents from all queries
userSchema.pre(/^find/, function(next) {
this.find({ deleted: { $ne: true } }); // query middleware
next();
});
// Post-save: send welcome email after creating new user
userSchema.post('save', async function(doc) {
if (this.isNew) {
await sendWelcomeEmail(doc.email);
}
});
Why it matters: Middleware is Mongoose's most powerful feature — it enables cross-cutting concerns like password hashing, soft deletes, audit logging, and cache invalidation without polluting business logic.
Real applications: Auth systems use pre('save') to hash passwords. Soft-delete systems use pre(/^find/) to automatically exclude deleted documents from all queries without modifying every query.
Common mistakes: Using arrow functions in middleware — arrow functions don't have their own this, so this.isModified() won't work. Always use regular function expressions in Mongoose hooks.
schema.virtual(). Useful for: derived display values (fullName from first/last), formatted data, and computed aggregates. By default, virtuals are not included in toJSON() or toObject() output — enable with { virtuals: true } or set globally in schema options.
const personSchema = new mongoose.Schema(
{
firstName: { type: String, required: true },
lastName: { type: String, required: true },
birthDate: { type: Date }
},
{ toJSON: { virtuals: true }, toObject: { virtuals: true } }
);
// Computed virtual: fullName
personSchema.virtual('fullName')
.get(function() {
return `${this.firstName} ${this.lastName}`;
})
.set(function(name) {
const [first, ...rest] = name.split(' ');
this.firstName = first;
this.lastName = rest.join(' ');
});
// Age virtual
personSchema.virtual('age').get(function() {
const ms = Date.now() - this.birthDate;
return Math.floor(ms / (365.25 * 24 * 60 * 60 * 1000));
});
const user = await User.findOne({ ... });
console.log(user.fullName); // "Mayur Badodiya"
Why it matters: Virtuals keep derived data computation in one place (the model) rather than scattered across API controllers, ensuring consistent derived values across the application.
Real applications: User models with firstName/lastName use a fullName virtual for display. Product models compute discountedPrice virtual from price and discountPercent without storing derived values.
Common mistakes: Trying to query by a virtual field — virtuals don't exist in MongoDB, so User.find({ fullName: "Mayur" }) returns nothing. Query by actual stored fields instead.
populate() replaces an ObjectId reference field with the actual referenced document by running a second query. It's Mongoose's abstraction over MongoDB's $lookup, working at the application level. You can populate nested references with dot notation, select specific fields, and populate multiple paths simultaneously. populate() is convenient but less efficient than $lookup for complex queries since it may run multiple additional queries.
// Schema with reference
const postSchema = new mongoose.Schema({
title: String,
authorId: { type: mongoose.Schema.Types.ObjectId, ref: 'User' },
tags: [{ type: mongoose.Schema.Types.ObjectId, ref: 'Tag' }]
});
// Populate single ref
const post = await Post.findOne({ _id: postId })
.populate('authorId', 'name email -_id'); // select name, email, exclude _id
// Populate multiple paths
const post = await Post.findOne({ _id: postId })
.populate('authorId', 'name')
.populate('tags', 'label color');
// Populate nested refs (author's company)
const post = await Post.findOne({ _id: postId })
.populate({
path: 'authorId',
populate: { path: 'companyId', select: 'name' }
});
Why it matters: populate() is used in nearly every Mongoose-based API — understanding its behavior (multiple queries!) vs $lookup (single pipeline) helps you make the right performance choice.
Real applications: Blog APIs populate author info and tag names when returning post lists. For complex join logic or high-volume queries, use aggregation with $lookup instead of populate.
Common mistakes: Using populate() in a loop (N+1 query problem) — each populate runs a separate query. Use populate on the query itself or use $lookup for batch population of many documents.
.lean() returns plain JavaScript objects instead of full Mongoose documents. Regular Mongoose query results are Mongoose Document instances with methods, getters, and change tracking overhead. Lean queries return plain POJOs — up to 5-10x faster and use significantly less memory. Use lean() for read-only operations where you don't need Mongoose document methods, virtuals (unless you enable them), or save/update capabilities.
// Normal query: returns Mongoose Document instances (heavy)
const users = await User.find({ active: true });
// users[0] instanceof mongoose.Document === true
// Has .save(), .remove(), $__ tracking fields, virtuals...
// .lean() query: returns plain objects (fast, lightweight)
const users = await User.find({ active: true }).lean();
// users[0] instanceof mongoose.Document === false
// Plain POJO: { _id, name, email, ... }
// Use lean() when:
// - READ-ONLY data display
// - API responses where you don't need .save()
// - Performance-critical list endpoints
// DON'T use lean() when:
// - You need to use .save() after modifications
// - You need document methods or virtuals
// - You're going to update the document
// lean() with virtuals (Mongoose 7+)
const users = await User.find().lean({ virtuals: true });
Why it matters: .lean() is a simple but significant optimization. Not using it for read-only API endpoints is leaving performance improvements on the table.
Real applications: List API endpoints (GET /products?page=1) use .lean() for all read operations — no need for document methods when just serializing to JSON response.
Common mistakes: Using .lean() then trying to call document methods like .save() — lean documents are plain objects with no Mongoose methods, causing "save is not a function" errors.
session option. Use mongoose.startSession(), then pass the session to model operations. The session's withTransaction() method handles commit, abort, and retry. Mongoose also provides a convenience method model.startSession().
const transfer = async (fromId, toId, amount) => {
const session = await mongoose.startSession();
try {
await session.withTransaction(async () => {
const from = await Account.findById(fromId).session(session);
if (!from || from.balance < amount) {
throw new Error('Insufficient balance');
}
await Account.findByIdAndUpdate(
fromId,
{ $inc: { balance: -amount } },
{ session }
);
await Account.findByIdAndUpdate(
toId,
{ $inc: { balance: +amount } },
{ session }
);
// Record transaction in audit log
await Transaction.create([{
fromId, toId, amount,
timestamp: new Date()
}], { session }); // insertMany uses array + session
});
} finally {
session.endSession();
}
};
Why it matters: Mongoose transaction patterns appear in every senior Node.js/MongoDB interview — the key is knowing how to pass sessions to different Mongoose operations.
Real applications: Payment services use Mongoose transactions to atomically create a payment record, update account balance, and log the activity — all failing or all succeeding together.
Common mistakes: For Model.create() inside a transaction, pass an array and use Model.create([doc], { session }) — the array form is required for session support in Mongoose create.
mongoose.plugin().
// Create a reusable timestamps plugin
function timestampsPlugin(schema, options) {
schema.add({
createdAt: { type: Date, default: Date.now },
updatedAt: { type: Date }
});
schema.pre('save', function(next) {
if (!this.isNew) this.updatedAt = new Date();
next();
});
schema.pre(['updateOne','findOneAndUpdate'], function(next) {
this.set({ updatedAt: new Date() });
next();
});
}
// Apply to specific schemas
userSchema.plugin(timestampsPlugin);
productSchema.plugin(timestampsPlugin);
// Apply globally to ALL schemas
mongoose.plugin(timestampsPlugin);
// Using popular paginate plugin
const mongoosePaginate = require('mongoose-paginate-v2');
userSchema.plugin(mongoosePaginate);
const result = await User.paginate(filter, { page: 1, limit: 10 });
Why it matters: Plugins are Mongoose's DRY mechanism — cross-cutting schema concerns (soft-delete, audit timestamps, pagination) should be plugins not duplicated in every schema.
Real applications: Large Express applications use a softDelete plugin on all schemas — adds a deletedAt field and overrides /^find/ queries to exclude soft-deleted records automatically.
Common mistakes: Duplicating timestamp and soft-delete logic in every schema — this creates maintenance problems. Extract recurring patterns into plugins and apply them consistently.
schema.methods) add functions to document instances; statics (schema.statics) are model-level functions.
// Instance methods (on document instances)
userSchema.methods.getFullName = function() {
return `${this.firstName} ${this.lastName}`;
};
const user = await User.findById(id);
console.log(user.getFullName());
// Statics (on model)
userSchema.statics.findByEmail = function(email) {
return this.findOne({ email: email.toLowerCase() });
};
const user = await User.findByEmail('test@example.com');
// .select() — field projection in Mongoose
User.find({ active: true })
.select('name email -_id') // include name,email; exclude _id
.lean();
// $expr — compare two fields in same document
User.find({ $expr: { $gt: ["$age", "$minAge"] } });
// find docs where age > minAge field (both in same doc)
Why it matters: Instance methods and statics keep query logic in the model layer — centralizing access patterns makes code more maintainable and testable.
Real applications: User models have a statics.findByEmail() method used across auth middleware, login controllers, and profile lookup — one implementation, consistent behavior.
Common mistakes: Writing the same query logic in multiple controllers instead of schema statics. Spreading query construction creates maintenance bugholes when query conditions change.
_id or createdAt) as the position marker for the next page — it's O(1) regardless of page depth. The mongoose-paginate-v2 plugin adds paginate() method with metadata automatically.
// Offset-based (simple, slow for large pages)
const page = 3, limit = 10;
const users = await User
.find(filter)
.sort({ createdAt: -1 })
.skip((page - 1) * limit)
.limit(limit)
.select('name email')
.lean();
const total = await User.countDocuments(filter);
// Cursor-based (fast regardless of page depth)
const lastId = req.query.cursor; // ObjectId of last seen doc
const users = await User
.find({
...filter,
...(lastId ? { _id: { $lt: new mongoose.Types.ObjectId(lastId) } } : {})
})
.sort({ _id: -1 })
.limit(limit)
.select('name email')
.lean();
const nextCursor = users.length ? users[users.length-1]._id : null;
Why it matters: Pagination is in almost every list API. Interviewers specifically ask about cursor vs offset pagination and the performance cliff of large skip values.
Real applications: Social feed scrolling uses cursor-based pagination — users scroll infinitely and the server uses last seen _id as cursor, serving consistent fast pages regardless of feed depth.
Common mistakes: Using skip(10000) for deep pagination — MongoDB must scan and discard 10,000 documents for every request. Cursor-based pagination avoids this by jumping directly to the next position.
.exec(), .then(), or await them. This allows fluent method chaining to build queries incrementally: .where(), .equals(), .gt(), .lt(), .in(), .or(), .and(), .sort(), .select(), .limit(), .skip(), .populate(). Conditional query building enables dynamic filters based on request parameters.
// Fluent Mongoose query builder
const query = User.find()
.where('age').gte(18).lte(65)
.where('active').equals(true)
.sort({ name: 1 })
.select('name email age')
.limit(20)
.lean();
// Dynamic filter building (very common in API development)
function buildUserQuery(filters) {
let query = User.find();
if (filters.minAge) query = query.where('age').gte(filters.minAge);
if (filters.maxAge) query = query.where('age').lte(filters.maxAge);
if (filters.role) query = query.where('role').equals(filters.role);
if (filters.city) query = query.where('address.city').equals(filters.city);
return query
.sort(filters.sort || { createdAt: -1 })
.limit(filters.limit || 20)
.lean();
}
const users = await buildUserQuery(req.query);
Why it matters: Building dynamic queries with Mongoose's builder pattern is a common real-world task — filtering, sorting, and paginating search results based on query parameters.
Real applications: Search and filter APIs build queries dynamically based on incoming filter parameters using Mongoose's chainable query builder to construct complex MongoDB queries.
Common mistakes: Building query objects with raw MongoDB syntax ($gte, $lt) and then calling find() — Mongoose's query builder operators (.gte(), .lt()) are cleaner and equally performant.
// Model.aggregate() — raw pipeline, no Mongoose middleware
const stats = await User.aggregate([
{ $match: { active: true, createdAt: { $gte: startDate } } },
{ $group: {
_id: "$role",
count: { $sum: 1 },
avgAge: { $avg: "$age" }
}},
{ $sort: { count: -1 } }
]);
// Returns plain objects, NOT Mongoose documents
// Middleware NOT triggered, schema types NOT cast
// Pipeline from Model context (vs db.collection.aggregate)
// Automatically scoped to the model's collection
// For reusability: Mongoose supports aggregation with classes
const UserStatsAggregation = User.aggregate([
// ... stages
]);
await UserStatsAggregation; // lazy execution
Why it matters: Knowing when to use aggregate() vs find() shows Mongoose expertise — complex analytics belong in pipelines, not application loops over find() results.
Real applications: Admin dashboards use Model.aggregate() for sales summaries, user growth charts, and funnel analytics — computations that span many documents and require grouping.
Common mistakes: Using Model.aggregate() and expecting Mongoose middleware to run — pre/post hooks for find/save don't trigger for aggregations. Add explicit created/updated logic in the pipeline if needed.