python操作elasticsearch-chengxuyonghu-ChinaUnix博客

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search,Q,scan
es = Elasticsearch()

1、仿照kibana中的写法
q = {"query": {"match": {"name": "easy"}}}
res = es.Search(body=q)
#这中方式返回的结果和kibana查询结果一致，为json

2、通过elasticsearch_dsl的Search方法
s = Search().using(es).query('match',name='easy')
res = s.execute()
# MultiMatch(query='python django', fields=['title', 'body'])
# Match(title={"query": "web framework", "type": "phrase"})

3、通过Q方式写查询语句(避免了字典的多重嵌套)
q = Q("match",name='easy')
s = Search().using(es).query(q)
res = s.execute(ignore_cache=True)
#ignore_cache=True 清除缓存数据，不然即使改变了查询语句执行出的为之前的结果
可通过s.to_dict()验证语句
可通过s.query = Q("match",name='haha') 重建查询语句
#通过dsl的查询得到的结果有自己的调用方法 res.hits[0].id

4、通过scan的方式
q = {"query": {"match": {"name": "easy"}}}
res = scan(es,size=100,preserve_order=True,query=q)
#本机上实验size参数无作用，总是返回所有结果，preserve_order=True设置为flase或不设置会报错，希望解答
doc = [i for i in res] #即可得到所有返回结果

import traceback
from pymongo import MongoClient
from elasticsearch import Elasticsearch

# 建立到MongoDB的连接
_db = MongoClient('mongodb://127.0.0.1:27017')['blog']

# 建立到Elasticsearch的连接
_es = Elasticsearch()

# 初始化索引的Mappings设置
_index_mappings = {
"mappings": {
"user": {
"properties": {
"title": { "type": "text" },
"name": { "type": "text" },
"age": { "type": "integer" }
}
},
"blogpost": {
"properties": {
"title": { "type": "text" },
"body": { "type": "text" },
"user_id": {
"type": "keyword"
},
"created": {
"type": "date"
}
}
}
}
}

# 如果索引不存在，则创建索引
if _es.indices.exists(index='blog_index') is not True:
_es.indices.create(index='blog_index', body=_index_mappings)

# 从MongoDB中查询数据，由于在Elasticsearch使用自动生成_id，因此从MongoDB查询
# 返回的结果中将_id去掉。
user_cursor = db.user.find({}, projection={'_id':False})
user_docs = [x for x in user_cursor]

# 记录处理的文档数
processed = 0
# 将查询出的文档添加到Elasticsearch中
for _doc in user_docs:
try:
# 将refresh设为true，使得添加的文档可以立即搜索到；
# 默认为false，可能会导致下面的search没有结果
_es.index(index='blog_index', doc_type='user', refresh=True, body=_doc)
processed += 1
print('Processed: ' + str(processed), flush=True)
except:
traceback.print_exc()

# 查询所有记录结果
print('Search all...', flush=True)
_query_all = {
'query': {
'match_all': {}
}
}
_searched = _es.search(index='blog_index', doc_type='user', body=_query_all)
print(_searched, flush=True)

# 输出查询到的结果
for hit in _searched['hits']['hits']:
print(hit['_source'], flush=True)

# 查询姓名中包含jerry的记录
print('Search name contains jerry.', flush=True)
_query_name_contains = {
'query': {
'match': {
'name': 'jerry'
}
}
}
_searched = _es.search(index='blog_index', doc_type='user', body=_query_name_contains)
print(_searched, flush=True)

#coding=utf-8
from datetime import datetime
from elasticsearch import Elasticsearch

# 连接elasticsearch,默认是9200
es = Elasticsearch()

# 创建索引，索引的名字是my-index,如果已经存在了，就返回个400，
# 这个索引可以现在创建，也可以在后面插入数据的时候再临时创建
# es.indices.create(index='my-index', ignore)
#es.indices.create(index='my-index', )

# 也可以，在插入数据的时候再创建索引"my_index",单个插入12条数据
# es.index(index="my-index", doc_type="my-type", id=1, body={"name": "xiaoming", "age": 18})
# es.index(index="my-index", doc_type="my-type", id=2, body={"name": "daming", "age": 18})
# es.index(index="my-index", doc_type="my-type", id=3, body={"name": "xiaoxue", "age": 19})
# es.index(index="my-index", doc_type="my-type", id=4, body={"name": "daxue", "age": 19})
# es.index(index="my-index", doc_type="my-type", id=5, body={"name": "xiaojun", "age": 20})
# es.index(index="my-index", doc_type="my-type", id=6, body={"name": "dajun", "age": 20})
# es.index(index="my-index", doc_type="my-type", id=7, body={"name": "xiaohua", "age": 21})
# es.index(index="my-index", doc_type="my-type", id=8, body={"name": "dahua", "age": 21})
# es.index(index="my-index", doc_type="my-type", id=9, body={"name": "xiaozhang", "age": 22})
# es.index(index="my-index", doc_type="my-type", id=10, body={"name": "dazhang", "age": 22})
# es.index(index="my-index", doc_type="my-type", id=11, body={"name": "xiaomei", "age": 23})
# es.index(index="my-index", doc_type="my-type", id=12, body={"name": "damei", "age": 23})

#根据id删除数据
# es.delete(index="my-index", doc_type="my-type", id=1)

# 获取age=18的所有值match
# res = es.search(index="my-index", body={'query': {'match': {'age':18}}})

# 获取所有值match_all
res = es.search(index="my-index", body={'query': {'match_all': {}}})
print res
print '*'*100

# 查询数据，两种get and search
# get获取
# res = es.get(index="my-index", doc_type="my-type", id=01)
# print'res_001 = ',res
print '*'*100

# search获取
# res = es.search(index="test-index", body={"query": {"match_all": {}}})
print '*'*100

# CURL的操作

# ES查询

# curl是利用URL语法在命令行方式下工作的开源文件传输工具，使用curl可以简单实现常见的get/post请求。简单的认为是可以在命令行下面访问url的一个工具。
# curl
# -X 指定http的请求方法有HEAD GET POST PUT DELETE
# -d 指定要传输的数据
# -H 指定http请求头信息

# elasticsearch rest api遵循的格式为：
# curl -X :///

# 检查es版本信息
# curl IP:9200

# 查看集群是否健康
# curl

# 查看节点列表
# curl

# 列出所有索引及存储大小
# curl

# 创建索引
# 创建索引名为XX,默认会有5个分片，1个索引
# curl -XPUT 'IP:9200/XX?pretty'
# 常见后查看，列出所有索引及存储大小
# curl

# 示例 es.index(index="my-index", doc_type="my-type", id=1, body={"name": "xiaoming", "age": 18})数据格式如此所示，由以上文中python代码插入的数据
# 01、显示es的版本等信息
# curl -XGET

# 02、curl命令查询某个索引库index下的所有数据（在url后面加上一个pretty则会对返回结果进行格式化）
# curl -XGET /索引/_search?pretty
# curl -XGET /my-index/_search?pretty

# 03、curl命令查询某个type下的所有数据（在url后面加上一个pretty则会对返回结果进行格式化）
# curl -XGET /索引/类型/_search?pretty
# curl -XGET /my-index/my-type/_search?pretty

#
# 04、根据id查询具体的一条记录：
# curl -XGET /my-index/my-type/1?pretty
# 输出
# {
# "_index" : "my-index",
# "_type" : "my-type",
# "_id" : "1",
# "_version" : 2,
# "found" : true,
# "_source" : {
# "age" : 18,
# "name" : "xiaoming"
# }
# }

# 05、查询一条索引文档中的具体的字段：
# curl -XGET /my-index/my-type/1?_source=name
# 输出
# {"_index":"my-index","_type":"my-type","_id":"1","_version":2,"found":true,"_source":{"name":"xiaoming"}}

# 06、如果要查询多个字段，使用","进行隔开
# curl -XGET /my-index/my-type/1?_source=name,age
# 输出
# {"_index":"my-index","_type":"my-type","_id":"1","_version":2,"found":true,"_source":{"name":"xiaoming","age":18}}

# 07、获取source所有数据
# curl -XGET /my-index/my-type/1?_source
# 输出
# {"_index":"my-index","_type":"my-type","_id":"1","_version":2,"found":true,"_source":{"name":"xiaoming","age":18}}

# 08、根据条件进行查询name是xiaoming的
# curl -XGET /my-index/my-type/_search?q=name:xiaoming
# 输出
# {"took":14,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":1,"max_score":0.2876821,"hits":[{"_index":"my-index","_type":"my-type","_id":"1","_score":0.2876821,"_source":{"age":18,"name":"xiaoming"}}]}}

# 09、根据条件进行查询name是xiaoming和name是xiaoxue的，xiaoming和xiaoxue之间用","隔开
# curl -XGET /my-index/my-type/_search?q=name:xiaoming, xiaoxue
# 输出
# {"took":134,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":2,"max_score":0.9808292,"hits":[{"_index":"my-index","_type":"my-type","_id":"2","_score":0.9808292,"_source":{"age":19,"name":"xiaoxue"}},{"_index":"my-index","_type":"my-type","_id":"1","_score":0.2876821,"_source":{"age":18,"name":"xiaoming"}}]}}

# ES更新
########################################### 报406错误，未解决#######################################################
# ES使用PUT或者POST对文档进行更新，如果指定ID的文档已经存在，则执行更新操作
# 注意：执行更新操作的时候，ES首先将旧的文档标记为删除状态，然后添加新的文档，旧的文档不会立即消失，但是你也无法访问，ES会继续添加更多数据的时候在后台清理已经标记为删除状态的文档。
# 局部更新
# 可以添加新字段或者更新已经存在字段(必须使用POST)
# curl -XPOST /my-index/my-type/1/_update -d '{"doc":{"name": "xuexue", "age": "11"}}'
########################################### 报406错误，未解决#######################################################

# 10、使用文件的方式
# curl - XPOST / PUT http: // master:9200 / index / type / _bulk - -data - binary @ path
# 比如
# curl - XPOST ' --data-binary @/home/uplooking/Documents/accounts.json
---------------------