（又是好多天没怎么更新，忙成狗啊）

* card.py from collections import Counter import numpy as np import itertools
def cmp_to_key(mycmp): 'Convert a cmp= function into a key= function' class K:
def __init__(self, obj, *args): self.obj = obj def __lt__(self, other): return
mycmp(self.obj, other.obj) < 0 def __gt__(self, other): return mycmp(self.obj,
other.obj) > 0 def __eq__(self, other): return mycmp(self.obj, other.obj) == 0
def __le__(self, other): return mycmp(self.obj, other.obj) <= 0 def __ge__(self,
other): return mycmp(self.obj, other.obj) >= 0 def __ne__(self, other): return
mycmp(self.obj, other.obj) != 0 return K def get_action_space(): actions = [[]]
# max_cards = 20 # single for card in Card.cards: actions.append([card]) # pair
for card in Card.cards: if card != '*' and card != '\$': actions.append([card] *
2) # triple for card in Card.cards: if card != '*' and card != '\$': actions.
append([card] * 3) # 3 + 1 for main in Card.cards: if main != '*' and main !=
'\$': for extra in Card.cards: if extra != main: actions.append([main] * 3 + [
extra]) # 3 + 2 for main in Card.cards: if main != '*' and main != '\$': for
extrain Card.cards: if extra != main and extra != '*' and extra != '\$': actions.
append([main] * 3 + [extra] * 2) # single sequence for start_v in range(Card.
to_value('3'), Card.to_value('2')): for end_v in range(start_v + 5, Card.
to_value('2')): seq = list(range(start_v, end_v)) actions.append(Card.to_cards(
seq)) # double sequence for start_v in range(Card.to_value('3'), Card.to_value(
'2')): for end_v in range(start_v + 3, min(start_v + 20 // 2, Card.to_value('2')
)): seq = list(range(start_v, end_v)) actions.append(Card.to_cards(seq) * 2) #
triple sequence for start_v in range(Card.to_value('3'), Card.to_value('2')):
for end_v in range(start_v + 2, min(start_v + 20 // 3, Card.to_value('2'))): seq
= list(range(start_v, end_v)) actions.append(Card.to_cards(seq) * 3) # 3 + 1
sequence for start_v in range(Card.to_value('3'), Card.to_value('2')): for end_v
in range(start_v + 2, min(start_v + 20 // 4, Card.to_value('2'))): seq = list(
range(start_v, end_v)) main = Card.to_cards(seq) remains = [card for card in
Card.cards if card not in main] for extra in list(itertools.combinations(remains
, end_v - start_v)): if not ('*' in list(extra) and '\$' in list(extra)): actions
.append(main * 3 + list(extra)) # 3 + 2 sequence for start_v in range(Card.
to_value('3'), Card.to_value('2')): for end_v in range(start_v + 2, min(start_v
+ 20 // 5, Card.to_value('2'))): seq = list(range(start_v, end_v)) main = Card.
to_cards(seq) remains = [card for card in Card.cards if card not in main and
cardnot in ['*', '\$']] for extra in list(itertools.combinations(remains, end_v -
start_v)): actions.append(main * 3 + list(extra) * 2) # bomb for card in Card.
cards: if card != '*' and card != '\$': actions.append([card] * 4) # bigbang
actions.append(['*', '\$']) # 4 + 1 + 1 for main in Card.cards: if main != '*'
and main != '\$': remains = [card for card in Card.cards if card != main] for
extrain list(itertools.combinations(remains, 2)): if not ('*' in list(extra) and
'\$' in list(extra)): actions.append([main] * 4 + list(extra)) return actions
class Card: cards = ['3', '4', '5', '6', '7', '8', '9', '10', 'J', 'Q', 'K', 'A'
, '2', '*', '\$'] # full_cards = [x for pair in zip(cards, cards, cards, cards)
for x in pair if x not in ['*', '\$']] # full_cards += ['*', '\$'] cards.index('3'
) cards_to_onehot_idx = dict((x, i * 4) for (i, x) in enumerate(cards))
cards_to_onehot_idx['*'] = 52 cards_to_onehot_idx['\$'] = 53 cards_to_value =
dict(zip(cards, range(len(cards)))) value_to_cards = dict((v, c) for (c, v) in
cards_to_value.items()) def __init__(self): pass @staticmethod def to_onehot(
cards): counts = Counter(cards) onehot = np.zeros(54) for x in cards: if x in [
'*', '\$']: onehot[Card.cards_to_onehot_idx[x]] = 1 else: subvec = np.zeros(4)
subvec[:counts[x]] = 1 onehot[Card.cards_to_onehot_idx[x]:Card.
cards_to_onehot_idx[x]+4] = subvec return onehot @staticmethod def to_value(card
): if type(card) is list: val = 0 for c in card: val += Card.cards_to_value[c]
return val else: return Card.cards_to_value[card] @staticmethod def to_cards(
values): if type(values) is list: cards = [] for v in values: cards.append(Card.
value_to_cards[v]) return cards else: return Card.value_to_cards[values] class
CardGroup: def __init__(self, cards, t, val): self.type = t self.cards = cards
self.value = val def __len__(self): return len(self.cards) def bigger_than(self,
g): if g.type == 'bigbang': return False if g.type == 'bomb': if (self.type ==
'bomb' and self.value > g.value) or self.type == 'bigbang': return True else:
return False if (self.type == 'bomb' or self.type == 'bigbang') or \ (self.type
== g.type and len(self) == len(g) and self.value > g.value): return True else:
return False @staticmethod def isvalid(cards): return CardGroup.folks(cards) ==
1 @staticmethod def to_cardgroup(cards): candidates = CardGroup.analyze(cards)
for c in candidates: if len(c.cards) == len(cards): return c print(cards) raise
Exception("Invalid Cards!") @staticmethod def folks(cards): cand = CardGroup.
analyze(cards) cnt = 10000 # if not cards: # return 0 # for c in cand: # remain
= list(cards) # for card in c.cards: # remain.remove(card) # if
CardGroup.folks(remain) + 1 < cnt: # cnt = CardGroup.folks(remain) + 1 # return
cnt spec = False for c in cand: if c.type == 'triple_seq' or c.type ==
'triple+single' or \ c.type == 'triple+double' or c.type == 'quadric+singles' or
\ c.type == 'quadric+doubles' or c.type == 'triple_seq+singles' or \ c.type ==
'triple_seq+doubles' or c.type == 'single_seq' or \ c.type == 'double_seq': spec
= True remain = list(cards) for card in c.cards: remain.remove(card) if
CardGroup.folks(remain) + 1 < cnt: cnt = CardGroup.folks(remain) + 1 if not spec
: cnt = len(cand) return cnt @staticmethod def analyze(cards): cards = list(
cards) candidates = [] counts = Counter(cards) if '*' in cards and '\$' in cards:
candidates.append((CardGroup(['*', '\$'], 'bigbang', 10000))) cards.remove('*')
cards.remove('\$') quadrics = [] # quadric for c in counts: if counts[c] == 4:
quadrics.append(c) candidates.append(CardGroup([c] * 4, 'bomb', Card.to_value(c)
)) cards = filter(lambda a: a != c, cards) counts = Counter(cards) singles = [c
for c in counts if counts[c] == 1] doubles = [c for c in counts if counts[c] ==
2] triples = [c for c in counts if counts[c] == 3] singles.sort(key=lambda k:
Card.cards_to_value[k]) doubles.sort(key=lambda k: Card.cards_to_value[k])
triples.sort(key=lambda k: Card.cards_to_value[k]) # continuous sequence if len(
singles) > 0: cnt = 1 cand = [singles[0]] for i in range(1, len(singles)): if
Card.to_value(singles[i]) >= Card.to_value('2'): break if Card.to_value(singles[
i]) == Card.to_value(cand[-1]) + 1: cand.append(singles[i]) cnt += 1 else: if
cnt>= 5: candidates.append(CardGroup(cand, 'single_seq', Card.to_value(cand[-1])
)) # for c in cand: # cards.remove(c) cand = [singles[i]] cnt = 1 if cnt >= 5:
candidates.append(CardGroup(cand, 'single_seq', Card.to_value(cand[-1]))) # for
c in cand: # cards.remove(c) if len(doubles) > 0: cnt = 1 cand = [doubles[0]] *
2 for i in range(1, len(doubles)): if Card.to_value(doubles[i]) >= Card.to_value
('2'): break if Card.to_value(doubles[i]) == Card.to_value(cand[-1]) + 1: cand
+= [doubles[i]] * 2 cnt += 1 else: if cnt >= 3: candidates.append(CardGroup(cand
, 'double_seq', Card.to_value(cand[-1]))) # for c in cand: # if c in cards: #
cards.remove(c) cand = [doubles[i]] * 2 cnt = 1 if cnt >= 3: candidates.append(
CardGroup(cand, 'double_seq', Card.to_value(cand[-1]))) # for c in cand: # if c
in cards: # cards.remove(c) if len(triples) > 0: cnt = 1 cand = [triples[0]] * 3
for i in range(1, len(triples)): if Card.to_value(triples[i]) >= Card.to_value(
'2'): break if Card.to_value(triples[i]) == Card.to_value(cand[-1]) + 1: cand +=
[triples[i]] * 3 cnt += 1 else: if cnt >= 2: candidates.append(CardGroup(cand,
'triple_seq', Card.to_value(cand[-1]))) # for c in cand: # if c in cards: #
cards.remove(c) cand = [triples[i]] * 3 cnt = 1 if cnt >= 2: candidates.append(
CardGroup(cand, 'triple_seq', Card.to_value(cand[-1]))) # for c in cand: # if c
in cards: # cards.remove(c) for t in triples: candidates.append(CardGroup([t] *
3, 'triple', Card.to_value(t))) counts = Counter(cards) singles = [c for c in
countsif counts[c] == 1] doubles = [c for c in counts if counts[c] == 2] #
single for s in singles: candidates.append(CardGroup([s], 'single', Card.
to_value(s))) # double for d in doubles: candidates.append(CardGroup([d] * 2,
'double', Card.to_value(d))) # 3 + 1, 3 + 2 for c in triples: triple = [c] * 3
for s in singles: if s not in triple: candidates.append(CardGroup(triple + [s],
'triple+single', Card.to_value(c) * 1000 + Card.to_value(s))) for d in doubles:
if d not in triple: candidates.append(CardGroup(triple + [d] * 2,
'triple+double', Card.to_value(c) * 1000 + Card.to_value(d))) # 4 + 2 for c in
quadrics: for extra in list(itertools.combinations(singles, 2)): candidates.
append(CardGroup([c] * 4 + list(extra), 'quadric+singles', Card.to_value(c) *
1000 + Card.to_value(list(extra)))) for extra in list(itertools.combinations(
doubles, 2)): candidates.append(CardGroup([c] * 4 + list(extra) * 2,
'quadric+doubles', Card.to_value(c) * 1000 + Card.to_value(list(extra)))) # 3 *
n + n, 3 * n + 2 * n triple_seq = [c.cards for c in candidates if c.type ==
'triple_seq'] for cand in triple_seq: cnt = len(cand) // 3 for extra in list(
itertools.combinations(singles, cnt)): candidates.append( CardGroup(cand + list(
extra), 'triple_seq+singles', Card.to_value(cand[-1]) * 1000 + Card.to_value(
list(extra)))) for extra in list(itertools.combinations(doubles, cnt)):
candidates.append( CardGroup(cand + list(extra) * 2, 'triple_seq+doubles', Card.
to_value(cand[-1]) * 1000 + Card.to_value(list(extra)))) importance = ['single',
'double', 'double_seq', 'single_seq', 'triple+single', 'triple+double',
'triple_seq+singles', 'triple_seq+doubles', 'triple_seq', 'triple',
cmp_to_key(lambda x, y: importance.index(x.type) - importance.index(y.type) if
importance.index(x.type) != importance.index(y.type) else x.value - y.value)) #
for c in candidates: # print c.cards return candidates if __name__ == '__main__'
: pass actions = get_action_space() # print(CardGroup.folks(['3', '4', '3',
'4', '3', '4', '*', '\$'])) # CardGroup.to_cardgroup(['3', '4', '3', '4', '3',
'4', '*', '\$']) # print actions[561] # print CardGroup.folks(actions[561]) for i
in range(1, len(actions)): print(i) print(CardGroup.folks(actions[i])) assert
CardGroup.folks(actions[i]) == 1 # CardGroup.to_cardgroup(actions[i]) # actions
= get_action_space() # print Card.to_onehot(['3', '4', '4', '\$']) # print
len(actions) # print Card.to_cards(1) # CardGroup.analyze(['3', '3', '3', '4',
'4', '4', '10', 'J', 'Q', 'A', 'A', '2', '2', '*', '\$'])

* player.py from __future__ import print_function from card import CardGroup,
Cardfrom collections import Counter def counter_subset(list1, list2): c1, c2 =
Counter(list1), Counter(list2) for (k, n) in c1.items(): if n > c2[k]: return
False return True class Player: def __init__(self, name): self.cards = [] self.
candidates= [] self.need_analyze = True self.name = name self.is_lord = False
self.trainable = False self.is_human = False def draw(self, group): self.
need_analyze= True if type(group) is list: self.cards += group else: self.cards.
append(group) def discard(self, group): self.need_analyze = True if type(group)
is list: for c in group: self.cards.remove(c) else: self.cards.remove(group) def
respond(self, last_player, cards, before_player, next_player): if self.is_human:
print("你的牌: ", end='') print(self.cards) intend = input("输入您的操作（0表示跳过）: ")
intend= intend.strip() intend = intend.split(',') # print(intend) if intend[0]
== '0': return last_player, cards, True else: if not counter_subset(intend, self
.cards) or \ not CardGroup.isvalid(intend): print("无效操作，请重试") return self.
respond(last_player, cards, before_player, next_player) else: if last_player is
not None and last_player != self: if not (CardGroup.to_cardgroup(intend)).
bigger_than(cards): print('你必须出更大的牌') return self.respond(last_player, cards,
before_player, next_player) self.discard(intend) return self, CardGroup.
to_cardgroup(intend), False if self.need_analyze: self.candidates = CardGroup.
analyze(self.cards) self.need_analyze = False if last_player is None or self is
last_player: if CardGroup.folks(self.cards) == 2: self.discard(self.candidates[-
1].cards) return self, self.candidates[-1], False elif not next_player.is_lord
and len(next_player.cards) == 1: for group in self.candidates: if group.type ==
candidates[0].cards) return self, self.candidates[0], False elif next_player.
is_lordand len(next_player.cards) == 1: for group in self.candidates: if group.
type != 'single': self.discard(group.cards) return self, group, False self.
discard(self.candidates[-1].cards) return self, self.candidates[-1], False else:
for group in self.candidates: if group.type != 'single' or Card.to_value(group.
cards[0]) < Card.to_value('A'): self.discard(group.cards) return self, group,
False # print "player %s cards:" % self.name # print self.cards # print "player
%s respond:" % self.name # print self.candidates[0].cards #
elif not last_player.is_lord: if CardGroup.folks(self.cards) <= 2: for c in self
.candidates: if c.bigger_than(cards): self.discard(c.cards) return self, c,
False return last_player, cards, True elif before_player.is_lord and last_player
is not before_player: return last_player, cards, True else: for c in self.
candidates: if c.bigger_than(cards) and cards.type not in ['bomb', 'bigbang'] \
and Card.to_value(c.cards[0]) < Card.to_value('A'): self.discard(c.cards) return
self, c, False return last_player, cards, True else: for c in self.candidates:
if c.bigger_than(cards) and c.type not in ['bomb', 'bigbang']: self.discard(c.
cards) return self, c, False # use bomb for c in self.candidates: if c.
bigger_than(cards): self.discard(c.cards) return self, c, False return
last_player, cards, True

* game.py from __future__ import print_function from card import Card,
CardGroupimport card from player import Player import numpy as np import random
from collections import Counter def counter_subset(list1, list2): c1, c2 =
Counter(list1), Counter(list2) for (k, n) in c1.items(): if n > c2[k]: return
False return True class Game: def __init__(self): self.deck = None self.players
= [] self.last_player = None self.last_cards = None self.lord_idx = -1 self.
history= [] self.extra_cards = [] self.action_space = card.get_action_space()
self.next_turn = 0 self.reset() def reset(self): self.deck = [c for c in Card.
cardsif c not in ['*', '\$']] * 4 self.deck = self.deck + ['*', '\$'] self.players
= [] self.last_player = None self.last_cards = None self.lord_idx = -1 self.
history= [] self.extra_cards = [] random.shuffle(self.deck) for i in range(3):
self.action_space) for j in range(mask.size): if counter_subset(self.
self.last_player is not None: if self.last_player is not self.players[i]: for j
action_space[j]).bigger_than(self.last_cards): mask[j] = False elif self.
def prepare(self, lord_idx): self.lord_idx = lord_idx # three cards for the lord
for i in range(3): self.extra_cards.append(self.deck[i]) del self.deck[:3] print
("底牌: ", end='') print(self.extra_cards) # draw cards in turn for i in range(len
(self.deck)): self.players[i % 3].draw(self.deck[i]) self.deck = [] # suppose
the third player is the lord self.players[lord_idx].draw(self.extra_cards) self.
players[lord_idx].is_lord = True for p in self.players: p.cards = sorted(p.cards
, key=lambda k: Card.cards_to_value[k]) self.next_turn = (lord_idx + 3) % 3 for
iin range(lord_idx, lord_idx + 3): idx = i % 3 if self.players[idx].trainable:
self.next_turn = idx break else: self.last_player, self.last_cards, passed =
self.players[idx].respond(self.last_player, self.last_cards, self.players[(idx -
1) % 3], self.players[(idx + 1) % 3]) self.log(idx, self.last_cards.cards,
passed) def run(self): over = False winner = None while not over: #
raw_input("Press Enter to continue...") over = False for i in range(3): i = (
self.next_turn + i) % 3 self.last_player, self.last_cards, passed = self.players
[i].respond(self.last_player, self.last_cards, self.players[(i - 1) % 3], self.
players[(i + 1) % 3]) self.log(i, self.last_cards.cards, passed) self.history +=
self.last_cards.cards if not self.players[i].cards: # winner =
self.players[i].name winner = i over = True break print("赢家是玩家 %s" % winner)
print('....................................') return winner def check_winner(
self): for i in range(3): if not self.players[i].cards: return i return None def
log(self, i, cards, passed): if passed: print("玩家 %d 跳过" % i) else: print("玩家
%d 出牌:" % i, end='') print(cards) def step(self, i, a, single_step=False): if a
!= 0: self.players[i].discard(self.action_space[a]) self.last_player = self.
players[i] assert self.players[i] is self.last_player self.last_cards =
CardGroup.to_cardgroup(self.action_space[a]) self.history += self.last_cards.
cards self.log(i, self.last_cards.cards, False) if not self.players[i].cards:
return 2 if self.players[i].is_lord else 1, True else: self.log(i, [], True) if
not single_step: ai = 0 for k in range(i + 1, i + 3): ai = k % 3 if self.players
[ai].trainable: break if not self.players[ai].cards: # TODO: add coordination
rewards return -1, True self.last_player, self.last_cards, passed = self.players
[ai].respond(self.last_player, self.last_cards, self.players[(ai - 1) % 3], self
.players[(ai + 1) % 3]) self.log(ai, self.last_cards.cards, passed) if not
passed: self.history += self.last_cards.cards self.next_turn = ai % 3 else: self
.next_turn = (self.next_turn + 1) % 3 return 0, False def get_state(self, i):
return np.hstack((Card.to_onehot(self.history), Card.to_onehot(self.extra_cards)
, Card.to_onehot(self.players[i].cards))) if __name__ == '__main__': game = Game
() cnt = 0 total = 100 for i in range(total): game.reset() game.players[2].
is_human= True game.prepare(2) winner = game.run() if winner == 0: cnt += 1
print("地主获胜率: %f" % (cnt / float(total)))