Skip to content. | Skip to navigation

Personal tools

Navigation

You are here: Home / Tips / NoSQL

NoSQL

MongoDB https://www.itread01.com/content/1541437023.html Hbase https://www.infoq.cn/article/JAl9mSCpyS8vX80TYc4D https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html https://stackoverflow.com/questions/41498672/how-to-convert-an-500gb-sql-table-into-apache-parquet Apache arrow / parquet 是 file bard columnar store 可以只讀整個 dataset 其中你需要的幾個 columns https://medium.com/@danielmiller5791/using-airflow-and-spark-to-crunch-us-immigration-data-aa7c95b37031

和關聯資料庫裡的 exists 不同, 下列範例是傳回不包含欄位 id 的所有記錄

> db.customers.find({id:{$exists:false}})

Large Scale Migration

why-sql-beating-nosql-what-this-means-for-future-of-data-time-series-database

from pymongo import MongoClient
client = MongoClient()
db = client.nobel_prize
coll = db.winners

from pymongo import MongoClient
def get_mongo_database(db_name, host='localhost', port=27017, username=None, password=None):
  if username and password:
    mongo_uri = 'mongodb://%s:%s@%s/%s'%(username, password, host, db_name)
    conn = MongoClient(mongo_uri)
  else:
    conn = MongoClient(host, port)
  return conn[db_name]

db = get_mongo_dataase(DB_NOBEL_PRIZE)
coll = db[COLL_WINNERS]

nobel_winners = [
  {'category': 'Physics',
   'name': 'Albert Einstein',
   'nationality': 'Swiss',
   'sex': 'male',
   'year': 1921},
]

coll.insert(nobel_winners)

res = coll.find({'category':'Physics'})
list(res)
res = coll.find({'$or':[{'year': {'$gt': 1930}}, {'sex':'female'}]})
list(res)


# 預設要刪除 ID
def mongo_coll_to_dicts(dbname='test', collname=’test’, query={}, del_id=True, **kw):
  db = get_mongo_database(dbname, **kw)
  res = list(db[collname].find(query))
  if del_id:
    for r in res:
      r.pop('_id')
  return res


REST_EU_ROOT_URL = "http://restcountries.eu/rest/v1"

def REST_country_request(field='all', name=None, params=None):
  headers={'User-Agent': 'Mozilla/5.0'}
  if not params:
    params = {}
  if field == 'all':
    return requests.get(REST_EU_ROOT_URL + '/all')
  url = '%s/%s/%s ' % (REST_EU_ROOT_URL, field, name)
  print('Requesting URL: ' + url)
  response = requests.get(url, params=params, headers=headers)

  if not response.status_code == 200:
    raise Exception('Request failed with status code ' + str(response.status_code))
  return response


response = REST_country_request('currency', 'usd')
response.json()


db_nobel = get_mongo_database('nobel_prize')
col = db_nobel['country_data']
response = REST_country_request()
col.insert(response.json())

res = col.find({'currencies':{'$in':['USD']}})
list(res)