A searchable list of some of my publications is below. You can also access my publications from the following sites.
My ORCID is
Publications:
Tianhao Zhang, Weilong Yang, Honglak Lee, Hung-Yu Tseng, Irfan Essa, Lu Jiang
Image manipulation by text instruction Patent
2023.
Abstract | Links | BibTeX | Tags: content creation, generative AI, google, media generation, patents
@patent{2023-Zhang-IMTI,
title = {Image manipulation by text instruction},
author = {Tianhao Zhang and Weilong Yang and Honglak Lee and Hung-Yu Tseng and Irfan Essa and Lu Jiang},
url = {https://patents.google.com/patent/US11562518},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
abstract = {A method for generating an output image from an input image and an input text instruction that specifies a location and a modification of an edit applied to the input image using a neural network is described. The neural network includes an image encoder, an image decoder, and an instruction attention network. The method includes receiving the input image and the input text instruction; extracting, from the input image, an input image feature that represents features of the input image using the image encoder; generating a spatial feature and a modification feature from the input text instruction using the instruction attention network; generating an edited image feature from the input image feature, the spatial feature and the modification feature; and generating the output image from the edited image feature using the image decoder.},
howpublished = {US Patent # US11562518},
keywords = {content creation, generative AI, google, media generation, patents},
pubstate = {published},
tppubtype = {patent}
}
Caroline Pantofaru, Vinay Bettadapura, Krishna Bharat, Irfan Essa
Systems and methods for directing content generation using a first-person point-of-view device. Patent
2020.
Abstract | Links | BibTeX | Tags: computer vision, google, patents
@patent{2020-Pantofaru-SMDCGUFPD,
title = {Systems and methods for directing content generation using a first-person point-of-view device.},
author = {Caroline Pantofaru and Vinay Bettadapura and Krishna Bharat and Irfan Essa},
url = {https://patents.google.com/patent/US10721439},
year = {2020},
date = {2020-07-21},
urldate = {2020-07-01},
publisher = {(US Patent # 10721439)},
abstract = {A method for personalizing a content item using captured footage is disclosed. The method includes receiving a first video feed from a first camera, wherein the first camera is designated as a source camera for capturing an event during a first time duration. The method also includes receiving data from a second camera, and determining, based on the received data from the second camera, that an action was performed using the second camera, the action being indicative of a region of interest (ROI) of the user of the second camera occurring within a second time duration. The method further includes designating the second camera as the source camera for capturing the event during the second time duration.
},
howpublished = {US Patent # 10721439},
keywords = {computer vision, google, patents},
pubstate = {published},
tppubtype = {patent}
}
Steven Hickson, Anelia Angelova, Irfan Essa, Rahul Sukthankar
Category learning neural networks Patent
2020.
Abstract | Links | BibTeX | Tags: google, machine learning, patents
@patent{2020-Hickson-CLNN,
title = {Category learning neural networks},
author = {Steven Hickson and Anelia Angelova and Irfan Essa and Rahul Sukthankar},
url = {https://patents.google.com/patent/US10635979},
year = {2020},
date = {2020-04-28},
urldate = {2020-04-28},
publisher = {(US Patent # 10635979)},
abstract = {Methods, systems, and apparatus, including computer programs encoded on a computer storage medium, for determining a clustering of images into a plurality of semantic categories. In one aspect, a method comprises: training a categorization neural network, comprising, at each of a plurality of iterations: processing an image depicting an object using the categorization neural network to generate (i) a current prediction for whether the image depicts an object or a background region, and (ii) a current embedding of the image; determining a plurality of current cluster centers based on the current values of the categorization neural network parameters, wherein each cluster center represents a respective semantic category; and determining a gradient of an objective function that includes a classification loss and a clustering loss, wherein the clustering loss depends on a similarity between the current embedding of the image and the current cluster centers.
},
howpublished = {US Patent #10635979},
keywords = {google, machine learning, patents},
pubstate = {published},
tppubtype = {patent}
}
Thad Eugene Starner, Irfan Essa, Hayes Solos Raffle, Daniel Aminzade
Object occlusion to initiate a visual search Patent
2019, (US Patent 10,437,882).
Abstract | Links | BibTeX | Tags: computer vision, google, patents
@patent{2019-Starner-OOIVS,
title = {Object occlusion to initiate a visual search},
author = {Thad Eugene Starner and Irfan Essa and Hayes Solos Raffle and Daniel Aminzade},
url = {https://patents.google.com/patent/US10437882},
year = {2019},
date = {2019-10-01},
urldate = {2019-10-01},
publisher = {(US Patent # 10437882)},
abstract = {Methods, systems, and apparatus, including computer programs encoded on computer storage media, for video segmentation. One of the methods includes receiving a digital video; performing hierarchical graph-based video segmentation on at least one frame of the digital video to generate a boundary representation for the at least one frame; generating a vector representation from the boundary representation for the at least one frame of the digital video, wherein generating the vector representation includes generating a polygon composed of at least three vectors, wherein each vector comprises two vertices connected by a line segment, from a boundary in the boundary representation; linking the vector representation to the at least one frame of the digital video; and storing the vector representation with the at least one frame of the digital video.
},
howpublished = {US Patent # 10437882},
note = {US Patent 10,437,882},
keywords = {computer vision, google, patents},
pubstate = {published},
tppubtype = {patent}
}
Irfan Essa, Vivek Kwatra, Matthias Grundmann
Vector representation for video segmentation Patent
2018, (US Patent Application 14/587,420).
Links | BibTeX | Tags: computer vision, google, patents
@patent{2018-Essa-VRVS,
title = {Vector representation for video segmentation},
author = {Irfan Essa and Vivek Kwatra and Matthias Grundmann},
url = {https://patents.google.com/patent/US20180350131},
year = {2018},
date = {2018-12-06},
urldate = {2018-12-01},
publisher = {(US Patent Application # 14/587,420)},
howpublished = {US Patent # US20180350131A1},
note = {US Patent Application 14/587,420},
keywords = {computer vision, google, patents},
pubstate = {published},
tppubtype = {patent}
}
Caroline Pantofaru, Vinay Bettadapura, Krishna Bharat, Irfan Essa
Systems and methods for directing content generation using a first-person point-of-view device Patent
2018, (US Patent 10,110,850).
Abstract | Links | BibTeX | Tags: computer vision, google, patents
@patent{2018-Pantofaru-SMDCGUFPD,
title = {Systems and methods for directing content generation using a first-person point-of-view device},
author = {Caroline Pantofaru and Vinay Bettadapura and Krishna Bharat and Irfan Essa},
url = {https://patents.google.com/patent/US10110850},
year = {2018},
date = {2018-10-23},
urldate = {2018-10-01},
publisher = {(US Patent #10110850)},
abstract = {A method for localizing the attention of a user of a first-person point-of-view (FPPOV) device is disclosed. The method includes receiving data from an FPPOV device, the data being indicative of a first region-of-interest (ROI) of an event for a first time duration and a second ROI of the event for a second time duration. The method further include determining that a first camera from a plurality of cameras best captures the first ROI during the first time duration, and determining that a second camera from the plurality of cameras best captures the second ROI during the second time duration.
},
howpublished = {US Patent # US10110850B1},
note = {US Patent 10,110,850},
keywords = {computer vision, google, patents},
pubstate = {published},
tppubtype = {patent}
}
Matthias Grundmann, Vivek Kwatra, Irfan Essa
2018, (US Patent 9,888,180).
Links | BibTeX | Tags: computer vision, google, patents
@patent{2018-Grundmann-CCMERSDCSDVS,
title = {Cascaded camera motion estimation, rolling shutter detection, and camera shake detection for video stabilization},
author = {Matthias Grundmann and Vivek Kwatra and Irfan Essa},
url = {https://patents.google.com/patent/US9888180},
year = {2018},
date = {2018-02-06},
urldate = {2018-02-01},
publisher = {(US Patent #9888180)},
howpublished = {US Patent # US9888180},
note = {US Patent 9,888,180},
keywords = {computer vision, google, patents},
pubstate = {published},
tppubtype = {patent}
}
Irfan Essa, Matthias Grundmann, Jessica Hodgins, Kihwan Kim, Iain Matthews, Ariel Shamir
System and method for utilizing motion fields to predict evolution in dynamic scenes Patent
2017.
Abstract | Links | BibTeX | Tags: computer vision, patents, sports visualization
@patent{2017-Essa-SAMFUMFTPEIDS,
title = {System and method for utilizing motion fields to predict evolution in dynamic scenes},
author = {Irfan Essa and Matthias Grundmann and Jessica Hodgins and Kihwan Kim and Iain Matthews and Ariel Shamir},
url = {https://patents.google.com/patent/US9600760},
year = {2017},
date = {2017-03-21},
abstract = {Described herein are methods, systems, apparatuses and products for utilizing motion fields to predict evolution in dynamic scenes. One aspect provides for accessing active object position data including positioning information of a plurality of individual active objects; extracting a plurality of individual active object motions from the active object position data; constructing a motion field using the plurality of individual active object motions; and using the motion field to predict one or more points of convergence at one or more spatial locations that active objects are proceeding towards at a future point in time. Other embodiments are disclosed.
},
howpublished = {US Patent #US9600760},
keywords = {computer vision, patents, sports visualization},
pubstate = {published},
tppubtype = {patent}
}
Other Publication Sites
A few more sites that aggregate research publications: Academic.edu, Bibsonomy, CiteULike, Mendeley.
Copyright/About
[Please see the Copyright Statement that may apply to the content listed here.]
This list of publications is produced by using the teachPress plugin for WordPress.