User:Yatzmon/sandbox

=Overview= Zero-shot learning (ZSL) is an object categorization problem, found mostly in computer vision. Whereas most machine learning based object categorization algorithms require training on very large datasets with hundreds or thousands of image samples, zero-shot learning aims to classify object categories without any training samples at all. To recognize zero-shot categories, a classifier is given semantic information about each zero-shot (unseen) class, like a textual description or a set of visual attributes. For example, by describing a new image category of a "jackalope" as "a rabbit with horns".

This problem is usually solved by learning a similarity score between an image and a description $$Similarity(description, image)$$, where the scoring function is trained on an auxiliary set of labelled image samples of seen categories. Then, given a test image from a zero-shot (unseen) class, and a set of predefined descriptions for unseen classes, a classifier selects the description that best matches the image, and predicts the class that is coupled to that description. Class descriptions are provided by (possibly several) experts, and one aims to learn a scoring function that can find the class whose description is most compatible with an image.

In a broader perspective, ZSL is a task of "compositional reasoning"    ,CompositionSystems, Lake2014, Lake2017, AndreasNAACL, where new concepts are constructed by recombining primitive elements biederman1987, Lake2017. This ability resembles human learning, where humans can easily recombine simple skills to solve new tasks Lake2014.

@article{CompositionSystems, title={Composition systems}, author={Geman, S. and Potter, D. and Chi, Z.}, journal={Quarterly of Applied Mathematics}, volume={60}, number={4}, pages={707--736}, year={2002}, publisher={[Providence, RI] Brown University, Division of Applied Mathematics.} }

@book{Lake2014, author = {B. Lake}, title = {Towards more human-like concept learning in machines: compositionality, causality, and learning-to-learn}, publisher = {Massachusetts Institute of Technology}, year = {2014} }

@InProceedings{Lake2017, author = {Lake, B. and Ullman, T. and Tenenbaum, J. and Gershman S.}, title = {Building Machines That Learn and Think Like People}, booktitle = {Behavioral and Brain Sciences}, year = {2017} }

@InProceedings{AndreasNAACL, author = {Andreas, J. and Rohrbach, M. and Darrell, T. and Klein D.}, title = {Learning to compose neural networks for question answering}, booktitle = {NAACL}, year = {2016} }

Generalized Zero-Shot Learning
Generalized ZSL (GZSL) extends ZSL to a realistic scenario where the test data contains both seen and unseen classes. There are two main kinds GZSL methods. First, approaches that directly use the semantic class descriptions of both seen and unseen classes COSMO, LAGO, RelationNet, DEM, DCN, socher2013zero, chao for training and inference. Second, approaches that synthesize feature vectors of unseen classes using generative models like VAE or GAN, and then use them in training xian_2018, CCGAN, CVAE2, CVAE1, ZhuGAN, CADAVAE.

@inproceedings{COSMO, title={Adaptive Confidence Smoothing for Generalized Zero-Shot Learning}, author={Atzmon, Y. and Chechik, G.}, booktitle = {CVPR}, month = {June}, year={2019}, }

@inproceedings{LAGO, title={Probabilistic AND-OR Attribute Grouping for Zero-Shot Learning}, author={Atzmon, Y. and Chechik, G.}, booktitle={UAI}, year={2018}, }

@inproceedings{RelationNet, title={Learning to Compare: Relation Network for Few-Shot Learning}, author={Sung, F. and Yang, Y. and Zhang, L. and Xiang, T. and Torr, P. HS and Hospedales, T. M}, booktitle={CVPR}, year={2018} }

@INPROCEEDINGS{DEM, author={L. Zhang and T. Xiang and S. Gong}, booktitle={CVPR}, title={Learning a Deep Embedding Model for Zero-Shot Learning}, year={2017}, }

@inproceedings{DCN, title={Generalized Zero-Shot Learning with Deep Calibration Network}, author={Liu, S. and Long, M. and Wang, J. and Jordan, M.}, booktitle={NIPS}, year={2018} }

@inproceedings{socher2013zero, title={Zero-Shot Learning Through Cross-Modal Transfer}, author={Socher, R. and Ganjoo, M. and Manning, C.D. and Ng, A.}, booktitle={NIPS}, year={2013} } @INPROCEEDINGS{chao, author={Chao, R. and Changpinyo, S. and Gong, B. and Sha F.}, booktitle={ICCV}, title={An Empirical Study and Analysis of Generalized Zero-Shot Learning for Object Recognition in the Wild}, year={2016}, }

@InProceedings{xian_2018, title={Feature Generating Networks for Zero-Shot Learning}, author={Xian, Y. and Lorenz, T. and Schiele, B. and Akata, Z.}, booktitle = {CVPR}, year={2018} }

@InProceedings{CCGAN, author = {Felix, R. and Kumar, V. and Reid, I. and Carneiro, G.}, title = {Multi-modal Cycle-consistent Generalized Zero-Shot Learning}, booktitle = {ECCV}, year = {2018} }

@inproceedings{ZhuGAN, title={A Generative Adversarial Approach for Zero-Shot Learning from Noisy Texts}, author={Zhu, Y. and Elhoseiny, M. and Liu, B. and Peng, X. and Elgammal, A.}, booktitle = {CVPR}, year={2018} }

@inproceedings{CVAE1, title={A Generative Model For Zero Shot Learning Using Conditional Variational Autoencoders}, author={Mishra, A. and Reddy, M. and Mittal, A. and Murthy, H. A.}, booktitle={WACV}, year={2018} }

@inproceedings{Schnfeld2019GeneralizedZL, title={Generalized Zero-Shot Learning via Aligned Variational Autoencoders}, author={Edgar Sch{\"o}nfeld and Sayna Ebrahimi and Samarth Sinha and Trevor Darrell and Zeynep Akata}, booktitle={CVPR},  year={2019} }

=A mathematical formulation of the problem setup= More formally, in zero-shot learning, a training set $$\mathcal{D}$$ has $$N$$ labeled samples: $$\mathcal{D} = \{ (x_i, y_i), i=1 \dots N \}$$, where each $$x_i$$ is a feature vector and $$y_i\in\mathcal{S}$$ is a label from a seen class $$\mathcal{S} = \{1,2, \dots |\mathcal{S} |\}$$.

At test time, a new set of samples $$\mathcal{D}'=\{x_i, i=N+1 \dots N+M\}$$ is given from a set of unseen classes $$\mathcal{U}  = \{|\mathcal{S}|+1, \dots |\mathcal{S}| + |\mathcal{U}|\}$$. The goal is to predict the correct class of each sample. As a supervision signal, each class $$y \in \mathcal{S} \cup \mathcal{U}$$ is accompanied with a class-description  vector $$a_y$$ in the form of semantic attributes DAP or natural language embedding Reed, ZhuGAN, socher2013zero. The crux of ZSL is to learn a similarity score for samples and class-descriptions $$F(a_y, x)$$, and predict the class $$y$$ that maximizes that score.

In probabilistic approaches to ZSL DAP, WangBN, socher2013zero,LAGO,DCN the similarity score assigns a probability for each class $$p(Y=y|x)= F(a_y, x)$$, with $$Y$$ viewed as a random variable for the label $$y$$ of a sample $$x$$.

Generalized Zero-Shot Learning
While in ZSL test samples are drawn from the set of unseen classes $$Y\in\mathcal{U}$$, in GZSL samples are drawn from either the seen or unseen sets: $$Y\in\mathcal{S} \cup \mathcal{U}$$.

@inproceedings{DAP, title={Learning to Detect Unseen Object Classes by Between-Class Attribute Transfer}, author={Lampert, C.H. and Nickisch, H. and Harmeling, S.}, booktitle={CVPR}, year={2009}, }

@INPROCEEDINGS{WangBN, author={X. Wang and Q. Ji}, title={A Unified Probabilistic Approach Modeling Relationships between Attributes and Objects}, booktitle={ICCV}, year={2013}, }

=References=