Created
February 24, 2017 13:26
-
-
Save danielkraic/1dd467e21db9a301a9ca2076aed7ddb6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
db.data.drop() | |
db.data.insertOne({"name": "val1"}) | |
db.data.insertOne({"_id": "id100", "name": "val1"}) | |
db.data.insertMany([ | |
{ "name": "val1" }, | |
{ "name": "val2" } | |
] | |
) | |
db.data.insertMany( [ | |
{ "name": "val1" }, | |
{ "name": "val2" } | |
], | |
{ "ordered": false} | |
) | |
_id, type of ObjectId() by default | |
12-byte hex string: 4: date + 3: mac addr + 2: process_id + 3: counter | |
db.data.find() | |
db.data.find().pretty() | |
db.data.find({"rated": "PG-13"}) | |
db.data.find({"rated": "PG-13"}).count() | |
db.data.find({"rated": "PG-13", "year": 2009}) | |
db.data.find({"tomato.meter": 100}) | |
db.data.find({"writers": ["Eathan", "Joel"]}) // MATCH ENTIRE ARRAY | |
db.data.find({"actors": "Brad"}) // MATCH ANY ELEMENT OF ARRAY | |
db.data.find({"actors.0": "Brad"}) // MATCH FIRST ELEMENT OF ARRAY | |
// cursor | |
var c = db.data.find() | |
var doc = function() { return c.hasNext() ? c.next() : null; } | |
doc() | |
c.objsLeftInBadge() | |
// projection | |
db.data.find({"rated": "PG-13"}, { title: 1, _id: 0 }) | |
// query operators | |
// comparsion $eq, $ne, $lt, $lte, $gt, $gte, $in, $nin | |
db.data.find({"rated": { $ne: "UNRATED" }}) | |
db.data.find({"runtime": { $gt: 90 }}) | |
db.data.find({"runtime": { $gte: 90, $lte: 120 }}) | |
db.data.find({"tomato.meter": {$gt: 95}, "runtime": { $gte: 90, $lte: 120 }}) | |
db.data.find({"rated": { $in: ["G", "PG"] }}) | |
// element $exists, $type | |
db.data.find({"tomate.meter": { $exists: true }}).count() | |
db.data.find({"tomate.meter": { $exists: false }}).count() | |
.db.data.find({"_id": { $type: "string" }}) | |
// logical operators $or $and $not $nor | |
db.data.find( { $or: [ {"tomato.eter": { $gte: 95}, { "metacritic": $gt: 88} ] } ) | |
db.data.find({ $and: [ {"metacritic: {$not: null}}, {"metacritic": {$exists: true}} ] }) | |
// regex | |
d.data.find({ "awards.text": { $regex: /^Won\s.*/ } }) | |
d.data.find({ "awards.text": { $regex: '^Won\s.*' } }) | |
d.data.find({ "awards.text": { $regex: '^Won\s.*', $options: "i" } }) | |
// arays operator | |
{ "genre": { $all: ["Comedy", "Crime", "Drama"] } } | |
{ "country": { $size: 1 } } | |
{ "boxoffice": { $elemMatch: { "country": "UK", "revenue": { $gt: 15} } } } | |
db.movieDetails.find({'awards.oscars.award': 'bestPicture'}) | |
db.movieDetails.updateMany({ year: {$gte: 2010, $lte: 2013}, | |
"imdb.votes": {$lt: 10000}, | |
$and: [{"tomato.consensus": {$exists: true} }, | |
{"tomato.consensus": null} ] }, | |
{ $unset: { "tomato.consensus": "" } }); | |
// update | |
db.data.updateOne({"title": "The Martian"}, {$set: {"poster": "http://d.c"}) | |
db.data.updateOne({"title": "The Martian"}, {$set: {"awards": {"wins": 0, "nominations": 2}}}) | |
// update operators | |
// $set, $unset, $rename, $min, $max, $inc, $mul, ... | |
db.data.updateOne({"title": "Martian"}, {$inc: {"tomao.revies": 4, "tomato.userReviews: 3}}) | |
// arrays operators: $addToSet $pop $pull $pullAll $push | |
db.data.updateOne( | |
{"title": "Martian"}, | |
{ $push : { "reviews": | |
{ "rating": 1, "author": "Someone", "text": ".."} | |
} | |
} | |
) | |
db.data.updateOne( | |
{"title": "Martian"}, | |
{ $push : { "reviews": | |
{ $each: [ | |
{ "rating": 1, "author": "Someone", "text": ".."}, | |
{ "rating": 2, "author": "Someone2", "text": ".."} | |
] | |
} | |
} | |
} | |
) | |
// update many | |
db.data.updateMany({"rated": null}, {$set: {"rated": "UNRATED"} }) | |
db.data.updateMany({"rated": null}, {$unset: {"rated": ""} }) | |
// upsert | |
db.data.update({...}, {...}, "upsert": true) | |
// replaceOne | |
db.data.replaceOne() | |
SORT -> SKIP -> LIMIT | |
HOMEWORK | |
1: | |
db.grades.find({'score': {$gte: 65}}, {'_id': 0, 'student_id': 1}).sort({'score': 1}).limit(1) | |
2: | |
```python | |
for doc in m.students.grades.find({'type': 'homework'}).sort([('student_id', pymongo.ASCENDING),('score', pymongo.ASCENDING)]): | |
if last_student != doc['student_id']: | |
counter += 1 | |
last_student = doc['student_id'] | |
g.remove({'_id': doc['_id']}) | |
``` | |
3: | |
ch4: | |
// hint to use index | |
db.students.find({student_id:{$gt:500000}, class_id:54}).sort({student_id:1}).hint({class_id:1}).explain("executionstats") | |
// profiling | |
db.getProfilingStatus() | |
db.getProfilingLevel() | |
db.setProfilingLevel(0) // do not log queries | |
db.setProfilingLevel(1, 4) // log queries slower than 4s | |
db.setProfilingLevel(2) // log all queries | |
db.system.profile.find() | |
db.system.profile.find().sort({ts:1}) | |
db.system.profile.find({ns:/school.students/}) | |
db.system.profile.find({millis: {$lt:1}}, {ts:1, millis:1}).sort({ts:-1}) | |
mongotop | |
mongotop 5 // 5 seconds refresh | |
mongotop | |
mongostat | |
storage engines | |
mmap v1: | |
collection level locking | |
in place updates | |
power of two sizes allocations | |
wired tiger: | |
document level concurrency (no locking is needed) | |
compression (documents data, indexes) | |
append only (no inplace updates) | |
killall mongod | |
mkdir WT | |
mongod -dbpath WT -storageEngine wiredTiger | |
mongod --dbpath WT --storageEngine wiredTiger -wiredTigerIndexPrefixCompression true | |
db.foo.stats() | |
index: btree | |
db.students.createIndex({student_id:1}) | |
db.students.createIndex({student_id:1, class_id:-1}) | |
db.students.createIndex({location:'2d'}) | |
db.students.createIndex({location:'2dsphere'}) | |
// full text search | |
db.students.createIndex({location:'text'}) | |
db.data.find({'$text': {'$search': 'dog cat'}}) //find dog or cat in document | |
db.students.getIndexes() | |
db.students.dropIndex({student_id:1}) | |
// multikey indexes - on array | |
db.students.createIndex({student_id:1}, {'unique':true}) | |
db.students.createIndex({student_id:1}, {'unique':true, 'sparse':true}) // sprase - do not use null values in index | |
db.students.createIndex({student_id:1}, {'background':true}) | |
foreground index creation: | |
fast, block W/R to whole DB (all collections) | |
background index creation: | |
slow, no blocks | |
explain: find, update, remove, aggregate (wont work with insert) | |
db.students.explain().find({student_id: 5}) //COLLSCAN, docsExamined | |
db.students.explain(true).find({student_id: 5}) //COLLSCAN, docsExamined | |
var exp = db.students.explain() | |
exp.help() | |
exp.find({'name': 'noo'}).sort('name':1) | |
explain verbosity: | |
queryPlanner // default | |
executionStats | |
allPlansExecution | |
var exp = db.students.explain('executionStats') | |
var exp = db.students.explain('allPlansExecution') | |
covered query: query result is satisfied with index (no documents need to be inspected) | |
NO db.ijk_data.find({'i':10, 'k':20}) | |
NO db.ijk_data.find({'i':10, 'k':20}, {'_id': 0}) | |
YES db.ijk_data.find({'i':10, 'k':20}, {'_id': 0, 'i':1, 'j':1, 'k':1}) | |
db.examples.stats() | |
db.examples.totalIndexSize() | |
db.students.find({'scores': { $elemMatch: {'type':'exam', 'score': {$gt: 98}} } }) | |
db.stores.find({ loc:{ $near: { $geometry: { type: "Point", coordinates: [-130, 39]}, $maxDistance:1000000 } } }) | |
for (i=0; i<10; i++) { db.ex.insert({'a': i*i}) } | |
for (i=0; i<10; i++) { x=[]; for (j=0; j<10; j++) { x.push({"key": i, "val": j}); }; db.ex.insert({'arr': x}); } | |
agregation pipeline: | |
$project (reshape doc) | |
$match | |
$group | |
$sort (100mb limit to memory based sort, or disk based sort) | |
$skip (after sort) | |
$limit (after sort) | |
$unwind (nornalize, explode data) | |
$out (redirect to coll) | |
$redact | |
$geoner | |
$group: | |
$sum $avg $min $max | |
$push $addtoset (for building arrays) | |
$first $last (only with $sort) | |
explain: db.zips.aggregate([...], {explain:true}) | |
allowDiskUse (100mb limit for in-memory aggregation): db.zips.aggregate([...], {allowDiskUse:true}) | |
cursor | |
db.products.aggregate([{$group:{_id:"$manufacturer",num_products:{$sum:1}}}]) | |
db.products.aggregate([{$group:{_id:{"cat":"$category"},num_products:{$sum:1}}}]) | |
db.products.aggregate([{$group:{_id:{"maker": "$manufacturer", "cat":"$category"},num_products:{$sum:1}}}]) | |
db.products.aggregate([{$group:{_id:{"maker": "$manufacturer"}, sum_price:{$sum:"$price"}}}]) | |
db.zips.aggregate([{$group:{ _id: "$state", "population": {$sum:"$pop"}}}]) | |
db.zips.aggregate([{$group:{ _id: "$_id", "average_pop": {$avg:"$pop"}}}]) | |
db.zips.aggregate([{"$group":{"_id":"$state", "average_pop":{"$avg":"$pop"}}}]) | |
db.zips.aggregate([{$group: {_id: "$city", "postal_codes": {$addToSet: "$_id"}}}]) | |
db.zips.aggregate([{$group: {_id: "$state", "pop": {$max: "$pop"}}}]) | |
db.grades.aggregate([ | |
{'$group':{_id:{class_id:"$class_id", student_id:"$student_id"}, 'average':{"$avg":"$score"}}}, | |
{'$group':{_id:"$_id.class_id", 'average':{"$avg":"$average"}}}]) | |
db.products.aggregate([ | |
{$project: { | |
_id:0, | |
'maker': {$toLower:"$manufacturer"}, | |
'details': {'category': "$category", | |
'price' : {"$multiply":["$price",10]} | |
}, | |
'item':'$name' | |
}} | |
]) | |
db.zips.aggregate([{$project: {_id:0, "city": {$toLower:"$city"}, "pop":1, "state":1, "zip":"$_id"}}]) | |
db.zips.aggregate([ | |
{$match: | |
{ | |
state:"NY" | |
} | |
}, | |
{$group: | |
{ | |
_id: "$city", | |
population: {$sum:"$pop"}, | |
zip_codes: {$addToSet: "$_id"} | |
} | |
}, | |
{$project: | |
{ | |
_id: 0, | |
city: "$_id", | |
population: 1, | |
zip_codes:1 | |
} | |
} | |
]) | |
db.zips.aggregate([{$match:{pop:{$gt:100000}}}]) | |
db.zips.aggregate([ | |
{$match: {"state" : "NY"}}, | |
{$group: {_id: "$city", "pop": {$sum: "$pop"}}}, | |
{$project: {_id:0, "city": "$_id", pop:1}}, | |
{$sort: {pop:-1}} ]) | |
db.zips.aggregate([ | |
{$match:{pop:{$gt:100000}}} | |
{$skip: 4}, | |
{$limit: 10} | |
]) | |
use agg | |
db.zips.aggregate([ | |
/* get the population of every city in every state */ | |
{$group: | |
{ | |
_id: {state:"$state", city:"$city"}, | |
population: {$sum:"$pop"}, | |
} | |
}, | |
/* sort by state, population */ | |
{$sort: | |
{"_id.state":1, "population":-1} | |
}, | |
/* group by state, get the first item in each group */ | |
{$group: | |
{ | |
_id:"$_id.state", | |
city: {$first: "$_id.city"}, | |
population: {$first:"$population"} | |
} | |
} | |
]) | |
db.posts.aggregate([ | |
/* unwind by tags */ | |
{"$unwind":"$tags"}, | |
/* now group by tags, counting each tag */ | |
{"$group": | |
{"_id":"$tags", | |
"count":{$sum:1} | |
} | |
}, | |
/* sort by popularity */ | |
{"$sort":{"count":-1}}, | |
/* show me the top 10 */ | |
{"$limit": 10}, | |
/* change the name of _id to be tag */ | |
{"$project": | |
{_id:0, | |
'tag':'$_id', | |
'count' : 1 | |
} | |
} | |
]) | |
use agg; | |
db.inventory.drop(); | |
db.inventory.insert({'name':"Polo Shirt", 'sizes':["Small", "Medium", "Large"], 'colors':['navy', 'white', 'orange', 'red']}) | |
db.inventory.insert({'name':"T-Shirt", 'sizes':["Small", "Medium", "Large", "X-Large"], 'colors':['navy', "black", 'orange', 'red']}) | |
db.inventory.insert({'name':"Chino Pants", 'sizes':["32x32", "31x30", "36x32"], 'colors':['navy', 'white', 'orange', 'violet']}) | |
db.inventory.aggregate([ | |
{$unwind: "$sizes"}, | |
{$unwind: "$colors"}, | |
{$group: | |
{ | |
'_id': {'size':'$sizes', 'color':'$colors'}, | |
'count' : {'$sum':1} | |
} | |
} | |
]) | |
db.products.aggregate([ | |
{$group:{_id:"$manufacturer",num_products:{$sum:1}}} // output _id must be unique | |
{$out: 'collection_name'} // will drop collection if exist !!! | |
]) | |
// hw5.1: find author with most comments | |
db.posts.aggregate([ | |
{$project: {"_id":0, "authors":"$comments.author"}}, | |
{$unwind: "$authors"}, | |
{$group: {"_id": "$authors", 'sum': {$sum:1}}}, | |
{$sort: {"sum": -1}} | |
]) | |
// hw5.2: find avg population in cities of CA and NY with population over 25000 | |
db.zips.aggregate([ | |
{$match: {"state": {$in: ["CA", "NY"]} }}, | |
{$group: { _id:{ "state": "$state", "city": "$city"}, "pop": {$sum: "$pop"} }}, | |
{$match: {"pop":{$gt: 25000}}}, | |
{$group: {"_id": null, "avg_pop": {$avg: "$pop"}}} | |
]) | |
// hw5.3: class with highest avg students scores (except quizes) | |
db.grades.aggregate([ | |
{$project:{ _id: 0, "class_id": 1, "student_id":1, "scores": 1}}, | |
{$unwind: "$scores"}, | |
{$match: {"scores.type": {$ne: "quiz"}}}, | |
{$group: {"_id": {"class_id": "$class_id", "student_id": "$student_id"}, "avg_score": {$avg: "$scores.score"}}}, | |
{$group: {"_id": "$_id.class_id", "avg_scores": {$avg: "$avg_score"}}}, | |
{$sort: {"avg_scores":-1}} | |
]) | |
// hw5.4: find number of people that live in city that name start with number | |
db.zips.aggregate([ | |
{$match: {"city": {$regex: /^[0-9]/}}}, | |
{$group: {_id: null, "pop_sum": {$sum: "$pop"}}} | |
]) | |
write concern: | |
w - 1 (how many nodes to wait for ack write) | |
j - false (do not wait for journal to save to disk) | |
wtimeout - ho long to wait for ack | |
( all of them may be set at connection, for collection, in rs config) | |
replica set: | |
1 node is primary (writes and reads), others nodes are secondary (only reads) | |
replication is asynchronous | |
if primary goes down, on of secondary nodes becomes primary (election) | |
minimal number of nodes in replica set: 3 | |
types of nodes in replica set: | |
- regular (has data, can become primary) | |
- arbiter (only for voting purposes) | |
- delayed (disaster recovery node, set X hour behind other nodes, cannot become primary) | |
- hidden (eq. for analytics, cannot become primary) | |
strong consistency: | |
read and write only from primary nodes | |
eventual consistency: | |
write to primary, but read from secondary | |
you can read what you write, but there may be small delay | |
// create replica set | |
#!/usr/bin/env bash | |
mkdir -p /data/rs1 /data/rs2 /data/rs3 | |
mongod --replSet m101 --logpath "1.log" --dbpath /data/rs1 --port 27017 --oplogSize 64 --fork --smallfiles | |
mongod --replSet m101 --logpath "2.log" --dbpath /data/rs2 --port 27018 --oplogSize 64 --smallfiles --fork | |
mongod --replSet m101 --logpath "3.log" --dbpath /data/rs3 --port 27019 --oplogSize 64 --smallfiles --fork | |
// init replica set | |
config = { _id: "m101", members:[ | |
{ _id : 0, host : "localhost:27017"}, | |
{ _id : 1, host : "localhost:27018"}, | |
{ _id : 2, host : "localhost:27019"} ] | |
}; | |
rs.initiate(config); | |
rs.status() | |
rs.config() / show rs config | |
rs.slaveOk() // allow reads from secondary node | |
rs.stepDown() // ?? | |
rs.help() | |
each rs node has oplog (secondary is constantly reading oplog of primary) | |
use local | |
show collections | |
db.oplog.rs.find() | |
after failover - node will connect to primary and copy its entire dataset | |
proper handling of failover: | |
insert: catch AutoReconnect errors: if AutoReconnect occures, sleep X seconds and retry operation | |
for retry in range (3): | |
try: | |
things.insert_one({'_id':i}) | |
print "Inserted Document: " + str(i) | |
time.sleep(.1) | |
break | |
except pymongo.errors.AutoReconnect as e: | |
print "Exception ",type(e), e | |
print "Retrying.." | |
time.sleep(5) | |
except pymongo.errors.DuplicateKeyError as e: | |
print "duplicate..but it worked" | |
break | |
read: retry | |
update: | |
- idempotent ($set): retry | |
- non-idempotent ($inc, $push): ?? depends on application | |
read preference: | |
- Primary | |
- PrimaryPrefered | |
- Secondary (only secondary) | |
- SecondaryPrefered | |
- Nearest (closest pingtime) | |
sharding | |
mongos - router between sharded hosts | |
shared key: | |
- every doc must include shared key | |
- shared key is immutable | |
- shared key must be indexed (or non-multi-key index that starts with shared key) | |
- in update shared key must be specified or 'multi' | |
- all queries must include shared key, or query will be send to all shards | |
- unique key is not allowed (unless it is part of shared key) | |
types of sharding: | |
- ranged based (s1: 1..100, s2: 101..200, ...) | |
- hashed based (hashing over shared key) | |
sh.status() | |
# final exam | |
## Q1 | |
```javascript | |
db.messages.find({'headers.From': '[email protected]', 'headers.To': '[email protected]'}, {'headers.From':1, 'headers.To':1}) | |
``` | |
## Q2 | |
```javascript | |
// zle | |
//db.messages.aggregate( {$project: {'_id': 0, 'From':'$headers.From', 'To': '$headers.To'}}, {$unwind: '$To'}, {'$group': {'_id': {'From':'$From', 'To': '$To'}, 'sum': {$sum:1} }}, {$sort: {"sum": -1}} ) | |
// dobre | |
db.messages.aggregate( {$project: {'From':'$headers.From', 'To': '$headers.To'}}, {$unwind: '$To'}, {$group: {_id: {'Msg': '$_id', 'From': '$From'}, 'To': {$addToSet: '$To'}}}, {$unwind: '$To'}, {'$group': {'_id': {'From':'$_id.From', 'To': '$To'}, 'sum': {$sum:1} }}, {$sort: {"sum": -1}} ) | |
``` | |
## Q3 | |
```javascript | |
// get 'To' count | |
db.messages.aggregate( {$match: {'headers.Message-ID': '<8147308.1075851042335.JavaMail.evans@thyme>'}}, {$project: {'From': '$headers.From', 'To': '$headers.To'}}, {$unwind: '$To'}, {$group: {'_id':null, 'sum': {$sum: 1}}} ) | |
// update | |
db.messages.update({'headers.Message-ID': '<8147308.1075851042335.JavaMail.evans@thyme>'}, {$push: {'headers.To': '[email protected]'}}) | |
``` | |
## Q4 | |
```python | |
try: | |
self.posts.update_one({'permalink': permalink}, {'$inc': {"comments.{ord}.num_likes".format(ord=comment_ordinal): 1}}) | |
except Exception as err: | |
print('Error INC likes. Error: {err}'.format(err=err)) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment